lecrapaud 0.5.1__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- lecrapaud/api.py +71 -61
- lecrapaud/config.py +5 -1
- lecrapaud/db/alembic/versions/{2025_06_20_1924-1edada319fd7_initial_setup.py → 2025_06_23_1748-f089dfb7e3ba_.py} +20 -20
- lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +30 -0
- lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +34 -0
- lecrapaud/db/models/__init__.py +14 -2
- lecrapaud/db/models/base.py +48 -2
- lecrapaud/db/models/{dataset.py → experiment.py} +23 -25
- lecrapaud/db/models/feature_selection.py +5 -5
- lecrapaud/db/models/model_selection.py +5 -5
- lecrapaud/db/models/score.py +3 -1
- lecrapaud/db/models/target.py +4 -4
- lecrapaud/db/session.py +4 -4
- lecrapaud/directories.py +0 -2
- lecrapaud/experiment.py +25 -18
- lecrapaud/feature_engineering.py +51 -22
- lecrapaud/feature_selection.py +41 -36
- lecrapaud/jobs/tasks.py +3 -3
- lecrapaud/model_selection.py +266 -259
- lecrapaud/search_space.py +23 -4
- lecrapaud/utils.py +2 -2
- {lecrapaud-0.5.1.dist-info → lecrapaud-0.6.2.dist-info}/METADATA +2 -2
- lecrapaud-0.6.2.dist-info/RECORD +43 -0
- lecrapaud/services/__init__.py +0 -0
- lecrapaud/services/embedding_categorical.py +0 -71
- lecrapaud/services/indicators.py +0 -309
- lecrapaud/speed_tests/experiments.py +0 -139
- lecrapaud/speed_tests/trash.py +0 -37
- lecrapaud-0.5.1.dist-info/RECORD +0 -46
- {lecrapaud-0.5.1.dist-info → lecrapaud-0.6.2.dist-info}/LICENSE +0 -0
- {lecrapaud-0.5.1.dist-info → lecrapaud-0.6.2.dist-info}/WHEEL +0 -0
lecrapaud/search_space.py
CHANGED
|
@@ -835,14 +835,33 @@ dl_recurrent_models = [
|
|
|
835
835
|
},
|
|
836
836
|
]
|
|
837
837
|
|
|
838
|
+
all_models = ml_models + dl_recurrent_models
|
|
838
839
|
|
|
839
|
-
def get_models_idx(*model_names):
|
|
840
|
-
models = ml_models + dl_recurrent_models
|
|
841
840
|
|
|
841
|
+
def get_models_idx(*model_names):
|
|
842
842
|
matching_idx = [
|
|
843
|
-
i for i, model in enumerate(
|
|
843
|
+
i for i, model in enumerate(all_models) if model["model_name"] in model_names
|
|
844
844
|
]
|
|
845
845
|
return matching_idx
|
|
846
846
|
|
|
847
847
|
|
|
848
|
-
|
|
848
|
+
def normalize_models_idx(models_idx: list[int | str]) -> list[int]:
|
|
849
|
+
"""
|
|
850
|
+
Convert a list of model identifiers (int or str) to a list of model indices (int).
|
|
851
|
+
If an element is a string, it is resolved using `get_models_idx`.
|
|
852
|
+
|
|
853
|
+
Returns:
|
|
854
|
+
List of model indices (ints).
|
|
855
|
+
"""
|
|
856
|
+
normalized = []
|
|
857
|
+
for model_idx in models_idx:
|
|
858
|
+
if isinstance(model_idx, int):
|
|
859
|
+
normalized.append(model_idx)
|
|
860
|
+
elif isinstance(model_idx, str):
|
|
861
|
+
resolved = get_models_idx(model_idx)
|
|
862
|
+
if not resolved:
|
|
863
|
+
raise ValueError(f"No model index found for name: {model_idx}")
|
|
864
|
+
normalized.append(resolved[0])
|
|
865
|
+
else:
|
|
866
|
+
raise TypeError(f"Unsupported type: {type(model_idx)}")
|
|
867
|
+
return normalized
|
lecrapaud/utils.py
CHANGED
|
@@ -21,7 +21,7 @@ def setup_logger():
|
|
|
21
21
|
global _LOGGER_ALREADY_CONFIGURED
|
|
22
22
|
if _LOGGER_ALREADY_CONFIGURED: # ← bail out if done before
|
|
23
23
|
|
|
24
|
-
return logging.getLogger("
|
|
24
|
+
return logging.getLogger("lecrapaud" if PYTHON_ENV != "Worker" else "")
|
|
25
25
|
|
|
26
26
|
print(
|
|
27
27
|
f"Setting up logger with PYTHON_ENV {PYTHON_ENV} and LOGGING_LEVEL {LOGGING_LEVEL}"
|
|
@@ -34,7 +34,7 @@ def setup_logger():
|
|
|
34
34
|
logging.basicConfig(format=fmt, datefmt=datefmt) # root format
|
|
35
35
|
formatter = logging.Formatter(fmt, datefmt=datefmt)
|
|
36
36
|
|
|
37
|
-
logger = logging.getLogger("" if PYTHON_ENV == "Worker" else "
|
|
37
|
+
logger = logging.getLogger("" if PYTHON_ENV == "Worker" else "lecrapaud")
|
|
38
38
|
|
|
39
39
|
log_level = getattr(logging, LOGGING_LEVEL.upper(), logging.INFO)
|
|
40
40
|
logger.setLevel(log_level)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: lecrapaud
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.2
|
|
4
4
|
Summary: Framework for machine and deep learning, with regression, classification and time series analysis
|
|
5
5
|
License: Apache License
|
|
6
6
|
Author: Pierre H. Gallet
|
|
@@ -157,6 +157,7 @@ Below are the main arguments you can pass to `create_experiment` (or the `Experi
|
|
|
157
157
|
| `columns_te_target` | list | Columns for target encoding target | `['target']` |
|
|
158
158
|
| `data` | DataFrame | Your main dataset (required for new experiment) | `your_dataframe` |
|
|
159
159
|
| `date_column` | str | Name of the date column | `'date'` |
|
|
160
|
+
| `experiment_name` | str | Name for the training session | `'my_session'` |
|
|
160
161
|
| `group_column` | str | Name of the group column | `'stock_id'` |
|
|
161
162
|
| `max_timesteps` | int | Max timesteps for time series models | `30` |
|
|
162
163
|
| `models_idx` | list | Indices of models to use for model selection | `[0, 1, 2]` |
|
|
@@ -165,7 +166,6 @@ Below are the main arguments you can pass to `create_experiment` (or the `Experi
|
|
|
165
166
|
| `perform_hyperopt` | bool | Whether to perform hyperparameter optimization | `True`/`False` |
|
|
166
167
|
| `plot` | bool | Whether to plot results | `True`/`False` |
|
|
167
168
|
| `preserve_model` | bool | Whether to preserve the best model | `True`/`False` |
|
|
168
|
-
| `session_name` | str | Name for the training session | `'my_session'` |
|
|
169
169
|
| `target_clf` | list | List of classification target column indices/names | `[1, 2, 3]` |
|
|
170
170
|
| `target_mclf` | list | Multi-class classification targets (not yet implemented) | `[11]` |
|
|
171
171
|
| `target_numbers` | list | List of regression target column indices/names | `[1, 2, 3]` |
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
|
|
2
|
+
lecrapaud/api.py,sha256=JFwOCawI9fYXod-Jt2w1Y_UWUoGA-bPqGaN_dtP0-gs,10289
|
|
3
|
+
lecrapaud/config.py,sha256=82JaFv8HWsrwuzOo28kOXEdLaJ8KIzr0P3cXx8CkeMA,936
|
|
4
|
+
lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
|
|
5
|
+
lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
|
|
6
|
+
lecrapaud/db/alembic/env.py,sha256=rseEi8oR_eKXYYW3UwOKiCMuDEwT4lxsT7llySOUpgk,2305
|
|
7
|
+
lecrapaud/db/alembic/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl93dMj8,635
|
|
8
|
+
lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py,sha256=MNPyqWaQSHNV8zljD1G9f-LzrVz-nOKlgOhHEE0U8Oo,13060
|
|
9
|
+
lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py,sha256=g6aLRV6jAKXkPUEcs9FAeGfsYpe9rMTxfqbNib3U0-U,809
|
|
10
|
+
lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py,sha256=dl6tfvcqErgJ6NKvjve0euu7l0BWyEAKSS-ychsEAl8,1139
|
|
11
|
+
lecrapaud/db/alembic.ini,sha256=zgvur-5jnLsT66_98FaTOTNgjwObGZCE0HqMwRAeJrs,3587
|
|
12
|
+
lecrapaud/db/models/__init__.py,sha256=Lhyw9fVLdom0Fc6yIP-ip8FjkU1EwVwjae5q2VM815Q,740
|
|
13
|
+
lecrapaud/db/models/base.py,sha256=CYtof_UjFwX3C7XUifequh_UtLHJ25bU7LCwT501uGE,7508
|
|
14
|
+
lecrapaud/db/models/experiment.py,sha256=WNV5gz78JljL0xlAjy121K3cgDqwC0r6zL3bpH6b0dY,3642
|
|
15
|
+
lecrapaud/db/models/feature.py,sha256=5o77O2FyRObnLOCGNj8kaPSGM3pLv1Ov6mXXHYkmnYY,1136
|
|
16
|
+
lecrapaud/db/models/feature_selection.py,sha256=nXy_Lg3uDxid71vYll_qzdo8ajYsJEXskI7vLQ3uyW0,3315
|
|
17
|
+
lecrapaud/db/models/feature_selection_rank.py,sha256=PvEpdv-JJt2wZMtX5TO0wyZ3IONlPkeDaC49i0VA-qU,2074
|
|
18
|
+
lecrapaud/db/models/model.py,sha256=F0hyMjd4FFHCv6_arIWBEmBCGOfG3b6_uzU8ExtFE90,952
|
|
19
|
+
lecrapaud/db/models/model_selection.py,sha256=fkZoUv7fdlBygWsfQyYPoayLomyp-gowiA3fbFPqdqw,1827
|
|
20
|
+
lecrapaud/db/models/model_training.py,sha256=egggSfkW8C2nTadytc5DdjU7d2VEMT6LRRZxO1ZD5To,1600
|
|
21
|
+
lecrapaud/db/models/score.py,sha256=_yaa6yBxugcOZMvLxqqIaMN7QGvzAOzOGCYQO0_gBjw,1601
|
|
22
|
+
lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk,1649
|
|
23
|
+
lecrapaud/db/session.py,sha256=K9dTyXmO-aF_2r9RRHsDsbW9_zLNDaOcchXlpiv7cSo,2719
|
|
24
|
+
lecrapaud/directories.py,sha256=t4PrnJR48MpDfBOTYTyGlDVMUr39mcaj7uCPTaocBRw,725
|
|
25
|
+
lecrapaud/experiment.py,sha256=NwwGDZqUyvvRu5EDK3_Oh0_kF29bNIPDawVFFpzFvZM,2350
|
|
26
|
+
lecrapaud/feature_engineering.py,sha256=jrU9OCKX1UtEoEJQoEwrvVf6NT3nQfGCX4CBy8ceZu4,31168
|
|
27
|
+
lecrapaud/feature_selection.py,sha256=v2-wAmm_U5_aI0NUB5xYhI1KoB60-1-CBAyJgFbyaRk,42713
|
|
28
|
+
lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
|
|
29
|
+
lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
|
|
30
|
+
lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
|
|
31
|
+
lecrapaud/jobs/scheduler.py,sha256=SiYWPxokpKnR8V6btLOO6gbK0PEjSRoeG0kCbQvYPf4,990
|
|
32
|
+
lecrapaud/jobs/tasks.py,sha256=OjI4RZHQQBH64dc0rlIK23wDhcOgE-cPhNZnzOmkgaE,1649
|
|
33
|
+
lecrapaud/model_selection.py,sha256=wX7ON5qIfR-wV6KBDaAvbMHEfa2kqRbZIzwj09KKgVg,62008
|
|
34
|
+
lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
|
|
35
|
+
lecrapaud/speed_tests/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
|
|
36
|
+
lecrapaud/speed_tests/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
|
|
37
|
+
lecrapaud/speed_tests/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
|
|
38
|
+
lecrapaud/speed_tests/tests.ipynb,sha256=RjI7LDHSsbadUkea_hT14sD7ivljtIQk4NB5McXJ1bE,3835
|
|
39
|
+
lecrapaud/utils.py,sha256=Dy2uhK9cslzoqRHiIE6MdkxjNJWjmKmzGr6i7EYM28A,8106
|
|
40
|
+
lecrapaud-0.6.2.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
|
|
41
|
+
lecrapaud-0.6.2.dist-info/METADATA,sha256=IX_u-tTi8o_SkBezBF6OJlINiHMBUR3HaWxqVYfs-o0,11623
|
|
42
|
+
lecrapaud-0.6.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
43
|
+
lecrapaud-0.6.2.dist-info/RECORD,,
|
lecrapaud/services/__init__.py
DELETED
|
File without changes
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
import numpy as np
|
|
3
|
-
from sklearn.decomposition import PCA
|
|
4
|
-
from sentence_transformers import SentenceTransformer
|
|
5
|
-
|
|
6
|
-
# Sample DataFrame with categorical features
|
|
7
|
-
data = pd.DataFrame(
|
|
8
|
-
{
|
|
9
|
-
"SECTOR": ["Tech", "Finance", "Health", "Education", "Retail"],
|
|
10
|
-
"SUBINDUSTRY": [
|
|
11
|
-
"Software",
|
|
12
|
-
"Banking",
|
|
13
|
-
"Pharmaceuticals",
|
|
14
|
-
"Online Education",
|
|
15
|
-
"E-commerce",
|
|
16
|
-
],
|
|
17
|
-
"LOCATION": ["USA", "UK", "Germany", "India", "Brazil"],
|
|
18
|
-
}
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
# Step 1: Load a pre-trained Word2Vec-like model from Hugging Face (Sentence Transformer)
|
|
22
|
-
# This model generates dense vector representations (embeddings) of text
|
|
23
|
-
model = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
|
|
24
|
-
|
|
25
|
-
# Step 2: Use the model to generate embeddings for each categorical feature
|
|
26
|
-
# We'll generate embeddings for each category in SECTOR, SUBINDUSTRY, and LOCATION
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def get_embeddings(text_column):
|
|
30
|
-
"""Function to generate embeddings for a given text column."""
|
|
31
|
-
return np.array([model.encode(text) for text in text_column])
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
# Generate embeddings for the categorical features
|
|
35
|
-
sector_embeddings = get_embeddings(data["SECTOR"])
|
|
36
|
-
subindustry_embeddings = get_embeddings(data["SUBINDUSTRY"])
|
|
37
|
-
location_embeddings = get_embeddings(data["LOCATION"])
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
# Step 3: Reduce dimensionality using PCA to k dimensions
|
|
41
|
-
def reduce_dimensionality(embeddings, k):
|
|
42
|
-
"""Function to reduce dimensionality of embeddings using PCA."""
|
|
43
|
-
pca = PCA(n_components=k)
|
|
44
|
-
return pca.fit_transform(embeddings)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
# Set k (number of dimensions after PCA)
|
|
48
|
-
k = 3 # Reduce to 3 dimensions
|
|
49
|
-
|
|
50
|
-
# Apply PCA to reduce dimensionality of the embeddings
|
|
51
|
-
reduced_sector_embeddings = reduce_dimensionality(sector_embeddings, k)
|
|
52
|
-
reduced_subindustry_embeddings = reduce_dimensionality(subindustry_embeddings, k)
|
|
53
|
-
reduced_location_embeddings = reduce_dimensionality(location_embeddings, k)
|
|
54
|
-
|
|
55
|
-
# Step 4: Combine the reduced embeddings back into the DataFrame
|
|
56
|
-
# Create new DataFrames for the reduced embeddings
|
|
57
|
-
sector_df = pd.DataFrame(
|
|
58
|
-
reduced_sector_embeddings, columns=[f"SECTOR_PC{i+1}" for i in range(k)]
|
|
59
|
-
)
|
|
60
|
-
subindustry_df = pd.DataFrame(
|
|
61
|
-
reduced_subindustry_embeddings, columns=[f"SUBINDUSTRY_PC{i+1}" for i in range(k)]
|
|
62
|
-
)
|
|
63
|
-
location_df = pd.DataFrame(
|
|
64
|
-
reduced_location_embeddings, columns=[f"LOCATION_PC{i+1}" for i in range(k)]
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
# Concatenate the reduced embeddings with the original data (if needed)
|
|
68
|
-
encoded_data = pd.concat([sector_df, subindustry_df, location_df], axis=1)
|
|
69
|
-
|
|
70
|
-
# Display the resulting DataFrame with reduced embeddings
|
|
71
|
-
print(encoded_data)
|
lecrapaud/services/indicators.py
DELETED
|
@@ -1,309 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
import numpy as np
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def rsi(ohlc: pd.DataFrame, period: int = 14) -> pd.Series:
|
|
6
|
-
"""Implements the RSI indicator
|
|
7
|
-
|
|
8
|
-
Args:
|
|
9
|
-
- ohlc (pd.DataFrame):
|
|
10
|
-
- period (int):
|
|
11
|
-
|
|
12
|
-
Return:
|
|
13
|
-
an pd.Series with the RSI indicator values
|
|
14
|
-
"""
|
|
15
|
-
close = ohlc["CLOSE"]
|
|
16
|
-
delta = close.diff()
|
|
17
|
-
|
|
18
|
-
gain = (delta.where(delta > 0, 0)).ewm(alpha=1 / period).mean()
|
|
19
|
-
loss = (-delta.where(delta < 0, 0)).ewm(alpha=1 / period).mean()
|
|
20
|
-
|
|
21
|
-
rs = gain / loss
|
|
22
|
-
rsi = 100 - (100 / (1 + rs))
|
|
23
|
-
return pd.Series(rsi, index=ohlc.index)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def macd(
|
|
27
|
-
ohlc: pd.DataFrame,
|
|
28
|
-
short_period: int = 12,
|
|
29
|
-
long_period: int = 26,
|
|
30
|
-
signal_period: int = 9,
|
|
31
|
-
):
|
|
32
|
-
close = ohlc["CLOSE"]
|
|
33
|
-
short_ema = close.ewm(span=short_period, adjust=False).mean()
|
|
34
|
-
long_ema = close.ewm(span=long_period, adjust=False).mean()
|
|
35
|
-
|
|
36
|
-
macd_line = short_ema - long_ema
|
|
37
|
-
signal_line = macd_line.ewm(span=signal_period, adjust=False).mean()
|
|
38
|
-
return macd_line, signal_line
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def bollinger_bands(ohlc: pd.DataFrame, period: int = 20, num_std: int = 2):
|
|
42
|
-
close = ohlc["CLOSE"]
|
|
43
|
-
sma = close.rolling(window=period).mean()
|
|
44
|
-
std = close.rolling(window=period).std()
|
|
45
|
-
|
|
46
|
-
upper_band = sma + (num_std * std)
|
|
47
|
-
lower_band = sma - (num_std * std)
|
|
48
|
-
|
|
49
|
-
return upper_band, sma, lower_band
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def adx(ohlc: pd.DataFrame, period: int = 14):
|
|
53
|
-
high = ohlc["HIGH"]
|
|
54
|
-
low = ohlc["LOW"]
|
|
55
|
-
close = ohlc["CLOSE"]
|
|
56
|
-
|
|
57
|
-
plus_dm = high.diff().where((high.diff() > low.diff()) & (high.diff() > 0), 0)
|
|
58
|
-
minus_dm = low.diff().where((low.diff() > high.diff()) & (low.diff() > 0), 0)
|
|
59
|
-
|
|
60
|
-
tr = pd.concat(
|
|
61
|
-
[high - low, abs(high - close.shift()), abs(low - close.shift())], axis=1
|
|
62
|
-
).max(axis=1)
|
|
63
|
-
|
|
64
|
-
atr = tr.rolling(window=period).mean()
|
|
65
|
-
plus_di = 100 * (plus_dm.rolling(window=period).mean() / atr)
|
|
66
|
-
minus_di = 100 * (minus_dm.rolling(window=period).mean() / atr)
|
|
67
|
-
|
|
68
|
-
dx = 100 * abs(plus_di - minus_di) / (plus_di + minus_di)
|
|
69
|
-
adx = dx.rolling(window=period).mean()
|
|
70
|
-
|
|
71
|
-
return adx
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def sma(ohlc: pd.DataFrame, period: int):
|
|
75
|
-
return ohlc["CLOSE"].rolling(window=period).mean()
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def ema(ohlc: pd.DataFrame, period: int):
|
|
79
|
-
return ohlc["CLOSE"].ewm(span=period, adjust=False).mean()
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def atr(ohlc: pd.DataFrame, period: int = 14):
|
|
83
|
-
high = ohlc["HIGH"]
|
|
84
|
-
low = ohlc["LOW"]
|
|
85
|
-
close = ohlc["CLOSE"]
|
|
86
|
-
|
|
87
|
-
tr = pd.concat(
|
|
88
|
-
[high - low, abs(high - close.shift()), abs(low - close.shift())], axis=1
|
|
89
|
-
).max(axis=1)
|
|
90
|
-
atr = tr.rolling(window=period).mean()
|
|
91
|
-
|
|
92
|
-
return atr
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def stochastic(ohlc: pd.DataFrame, period: int = 14, k_slowing_period: int = 3):
|
|
96
|
-
low_min = ohlc["LOW"].rolling(window=period).min()
|
|
97
|
-
high_max = ohlc["HIGH"].rolling(window=period).max()
|
|
98
|
-
|
|
99
|
-
k_percent = 100 * (ohlc["CLOSE"] - low_min) / (high_max - low_min)
|
|
100
|
-
d_percent = k_percent.rolling(window=k_slowing_period).mean() # Smoothed K
|
|
101
|
-
|
|
102
|
-
return k_percent, d_percent
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def mfi(ohlc: pd.DataFrame, period: int = 14):
|
|
106
|
-
typical_price = (ohlc["HIGH"] + ohlc["LOW"] + ohlc["CLOSE"]) / 3
|
|
107
|
-
money_flow = typical_price * ohlc["VOLUME"]
|
|
108
|
-
|
|
109
|
-
positive_flow = money_flow.where(typical_price > typical_price.shift(), 0)
|
|
110
|
-
negative_flow = money_flow.where(typical_price < typical_price.shift(), 0)
|
|
111
|
-
|
|
112
|
-
positive_mf = positive_flow.rolling(window=period).sum()
|
|
113
|
-
negative_mf = negative_flow.rolling(window=period).sum()
|
|
114
|
-
|
|
115
|
-
mfi = 100 - (100 / (1 + (positive_mf / negative_mf)))
|
|
116
|
-
|
|
117
|
-
return mfi
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def fibonacci_retracement(high: float, low: float):
|
|
121
|
-
diff = high - low
|
|
122
|
-
levels = {
|
|
123
|
-
"23.6%": high - diff * 0.236,
|
|
124
|
-
"38.2%": high - diff * 0.382,
|
|
125
|
-
"50.0%": high - diff * 0.5,
|
|
126
|
-
"61.8%": high - diff * 0.618,
|
|
127
|
-
"100%": low,
|
|
128
|
-
}
|
|
129
|
-
return levels
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
def ichimoku_cloud(ohlc: pd.DataFrame):
|
|
133
|
-
high = ohlc["HIGH"]
|
|
134
|
-
low = ohlc["LOW"]
|
|
135
|
-
|
|
136
|
-
tenkan_sen = (high.rolling(window=9).max() + low.rolling(window=9).min()) / 2
|
|
137
|
-
kijun_sen = (high.rolling(window=26).max() + low.rolling(window=26).min()) / 2
|
|
138
|
-
senkou_span_a = ((tenkan_sen + kijun_sen) / 2).shift(26)
|
|
139
|
-
senkou_span_b = (
|
|
140
|
-
(high.rolling(window=52).max() + low.rolling(window=52).min()) / 2
|
|
141
|
-
).shift(26)
|
|
142
|
-
chikou_span = ohlc["CLOSE"].shift(26)
|
|
143
|
-
|
|
144
|
-
return tenkan_sen, kijun_sen, senkou_span_a, senkou_span_b, chikou_span
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
def parabolic_sar(ohlc: pd.DataFrame, af_step: float = 0.02, af_max: float = 0.2):
|
|
148
|
-
high = ohlc["HIGH"]
|
|
149
|
-
low = ohlc["LOW"]
|
|
150
|
-
close = ohlc["CLOSE"]
|
|
151
|
-
|
|
152
|
-
# Initialize the SAR series with the closing prices as a starting point
|
|
153
|
-
sar = close.copy()
|
|
154
|
-
|
|
155
|
-
# Define initial trend and extreme point
|
|
156
|
-
trend_up = True
|
|
157
|
-
ep = high.iloc[0] if trend_up else low.iloc[0] # Extremum Price
|
|
158
|
-
af = af_step # Acceleration Factor
|
|
159
|
-
|
|
160
|
-
# Iterate over the data points starting from the second row
|
|
161
|
-
for i in range(1, len(ohlc)):
|
|
162
|
-
prev_sar = sar.iloc[i - 1] # Previous SAR value
|
|
163
|
-
|
|
164
|
-
if trend_up:
|
|
165
|
-
# Update SAR for an uptrend
|
|
166
|
-
sar.iloc[i] = prev_sar + af * (ep - prev_sar)
|
|
167
|
-
if low.iloc[i] < sar.iloc[i]:
|
|
168
|
-
# Switch to downtrend if current low breaks the SAR
|
|
169
|
-
trend_up = False
|
|
170
|
-
sar.iloc[i] = ep
|
|
171
|
-
ep = low.iloc[i]
|
|
172
|
-
af = af_step
|
|
173
|
-
else:
|
|
174
|
-
# Update SAR for a downtrend
|
|
175
|
-
sar.iloc[i] = prev_sar + af * (ep - prev_sar)
|
|
176
|
-
if high.iloc[i] > sar.iloc[i]:
|
|
177
|
-
# Switch to uptrend if current high breaks the SAR
|
|
178
|
-
trend_up = True
|
|
179
|
-
sar.iloc[i] = ep
|
|
180
|
-
ep = high.iloc[i]
|
|
181
|
-
af = af_step
|
|
182
|
-
|
|
183
|
-
# Update the extremum price (EP) and acceleration factor (AF) based on the trend
|
|
184
|
-
if trend_up:
|
|
185
|
-
if high.iloc[i] > ep:
|
|
186
|
-
ep = high.iloc[i]
|
|
187
|
-
af = min(af + af_step, af_max)
|
|
188
|
-
else:
|
|
189
|
-
if low.iloc[i] < ep:
|
|
190
|
-
ep = low.iloc[i]
|
|
191
|
-
af = min(af + af_step, af_max)
|
|
192
|
-
|
|
193
|
-
return sar
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
def chaikin_money_flow(ohlc: pd.DataFrame, period: int = 21):
|
|
197
|
-
money_flow_multiplier = (
|
|
198
|
-
(ohlc["CLOSE"] - ohlc["LOW"]) - (ohlc["HIGH"] - ohlc["CLOSE"])
|
|
199
|
-
) / (ohlc["HIGH"] - ohlc["LOW"])
|
|
200
|
-
money_flow_volume = money_flow_multiplier * ohlc["VOLUME"]
|
|
201
|
-
|
|
202
|
-
cmf = (
|
|
203
|
-
money_flow_volume.rolling(window=period).sum()
|
|
204
|
-
/ ohlc["VOLUME"].rolling(window=period).sum()
|
|
205
|
-
)
|
|
206
|
-
|
|
207
|
-
return cmf
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
def pivot_points(ohlc: pd.DataFrame):
|
|
211
|
-
high = ohlc["HIGH"]
|
|
212
|
-
low = ohlc["LOW"]
|
|
213
|
-
close = ohlc["CLOSE"]
|
|
214
|
-
|
|
215
|
-
pivot = (high + low + close) / 3
|
|
216
|
-
r1 = 2 * pivot - low
|
|
217
|
-
s1 = 2 * pivot - high
|
|
218
|
-
r2 = pivot + (high - low)
|
|
219
|
-
s2 = pivot - (high - low)
|
|
220
|
-
|
|
221
|
-
return pivot, r1, s1, r2, s2
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
def volatility(
|
|
225
|
-
ohlc: pd.DataFrame,
|
|
226
|
-
period: int = 14,
|
|
227
|
-
):
|
|
228
|
-
"""
|
|
229
|
-
Calculates rolling volatility for each stock based on the rolling standard deviation of returns.
|
|
230
|
-
|
|
231
|
-
Parameters:
|
|
232
|
-
- ohlc: pd.DataFrame containing stock data, including returns (RET) and stock identifier.
|
|
233
|
-
- period: int, the rolling window period for volatility calculation (default is 14 days).
|
|
234
|
-
|
|
235
|
-
Returns:
|
|
236
|
-
- pd.Series representing the calculated volatility for each row in the DataFrame.
|
|
237
|
-
"""
|
|
238
|
-
|
|
239
|
-
# Calculate returns based on CLOSE prices
|
|
240
|
-
ret = ohlc["CLOSE"].pct_change()
|
|
241
|
-
|
|
242
|
-
# Calculate rolling standard deviation of returns
|
|
243
|
-
rolling_std = ret.rolling(window=period, min_periods=1).std()
|
|
244
|
-
|
|
245
|
-
# Multiply by the square root of the period to scale volatility
|
|
246
|
-
volatility = rolling_std * np.sqrt(period)
|
|
247
|
-
|
|
248
|
-
return volatility
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
def cumulative_return(ohlc: pd.DataFrame, period: int = 14):
|
|
252
|
-
"""
|
|
253
|
-
Calculates cumulative returns over the specified period using the 'CLOSE' price.
|
|
254
|
-
|
|
255
|
-
Parameters:
|
|
256
|
-
- ohlc: pd.DataFrame containing stock data, including 'CLOSE' column.
|
|
257
|
-
- period: int, the number of days over which to calculate the cumulative return.
|
|
258
|
-
|
|
259
|
-
Returns:
|
|
260
|
-
- pd.Series representing the cumulative returns for each row in the DataFrame.
|
|
261
|
-
"""
|
|
262
|
-
|
|
263
|
-
# Calculate cumulative return based on CLOSE prices
|
|
264
|
-
cumul_ret = ohlc["CLOSE"].pct_change(period - 1)
|
|
265
|
-
|
|
266
|
-
return cumul_ret
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
def close_diff(ohlc: pd.DataFrame):
|
|
270
|
-
"""
|
|
271
|
-
Calculates the difference between consecutive close prices.
|
|
272
|
-
|
|
273
|
-
Parameters:
|
|
274
|
-
- ohlc: pd.DataFrame containing stock data with a 'CLOSE' column.
|
|
275
|
-
|
|
276
|
-
Returns:
|
|
277
|
-
- pd.Series representing the difference in closing prices.
|
|
278
|
-
"""
|
|
279
|
-
return ohlc["CLOSE"].diff()
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
def obv(ohlc: pd.DataFrame):
|
|
283
|
-
"""
|
|
284
|
-
Calculates On-Balance Volume (OBV) based on closing price differences and volume.
|
|
285
|
-
|
|
286
|
-
Parameters:
|
|
287
|
-
- ohlc: pd.DataFrame containing 'CLOSE', 'VOLUME' columns.
|
|
288
|
-
|
|
289
|
-
Returns:
|
|
290
|
-
- pd.Series representing the OBV values.
|
|
291
|
-
"""
|
|
292
|
-
close_diff = ohlc["CLOSE"].diff()
|
|
293
|
-
obv = (np.sign(close_diff) * ohlc["VOLUME"]).fillna(0).cumsum()
|
|
294
|
-
return obv
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
def pressure(ohlc: pd.DataFrame):
|
|
298
|
-
"""
|
|
299
|
-
Calculates both upward and downward pressure based on price movements.
|
|
300
|
-
|
|
301
|
-
Parameters:
|
|
302
|
-
- ohlc: pd.DataFrame containing 'OPEN', 'HIGH', 'LOW', and 'CLOSE' columns.
|
|
303
|
-
|
|
304
|
-
Returns:
|
|
305
|
-
- pd.DataFrame with 'UPWARD_PRESSURE' and 'DOWNWARD_PRESSURE' columns.
|
|
306
|
-
"""
|
|
307
|
-
upward = (ohlc["LOW"] - ohlc["OPEN"]) / ohlc["OPEN"]
|
|
308
|
-
downward = (ohlc["HIGH"] - ohlc["CLOSE"]) / ohlc["OPEN"]
|
|
309
|
-
return upward, downward
|
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
# Experiments on sharpe ratio to calculate as loss or metric
|
|
2
|
-
class SharpeRatioTFND(tf.keras.metrics.Metric):
|
|
3
|
-
|
|
4
|
-
def __init__(self, name="sharpe_ratio_tf_nd", **kwargs):
|
|
5
|
-
super().__init__(name=name, **kwargs)
|
|
6
|
-
self.sharpe_ratio = 0
|
|
7
|
-
self.df = pd.DataFrame(columns=["TARGET", "PRED", "DATE", "TARGET_1"])
|
|
8
|
-
|
|
9
|
-
# @tf.numpy_function(Tout=tf.float32)
|
|
10
|
-
def update_state(self, data, y_pred, sample_weight=None):
|
|
11
|
-
portfolio_size = 10
|
|
12
|
-
|
|
13
|
-
y_true = pd.Series(data[:, 0].numpy(), index=data[:, 1].numpy(), name="TARGET")
|
|
14
|
-
y_pred = pd.Series(
|
|
15
|
-
y_pred.numpy().flatten(), index=data[:, 1].numpy(), name="PRED"
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
df = pd.concat(
|
|
19
|
-
[y_true, y_pred, stock_data[["DATE", "TARGET_1"]]], join="inner", axis=1
|
|
20
|
-
)
|
|
21
|
-
self.df = pd.concat([self.df, df], axis=0)
|
|
22
|
-
|
|
23
|
-
def _calc_spread_return_per_day(df: pd.DataFrame, portfolio_size: int):
|
|
24
|
-
return (
|
|
25
|
-
df.sort_values("PRED", ascending=False)["TARGET_1"].iloc[
|
|
26
|
-
:portfolio_size
|
|
27
|
-
]
|
|
28
|
-
).mean()
|
|
29
|
-
|
|
30
|
-
buf = self.df.groupby("DATE").apply(_calc_spread_return_per_day, portfolio_size)
|
|
31
|
-
|
|
32
|
-
if buf.shape[0] == 1:
|
|
33
|
-
self.sharpe_ratio = buf.values[0] * (252 / np.sqrt(252))
|
|
34
|
-
else:
|
|
35
|
-
self.sharpe_ratio = (buf.mean() * 252) / (buf.std() * np.sqrt(252))
|
|
36
|
-
|
|
37
|
-
def result(self):
|
|
38
|
-
return self.sharpe_ratio
|
|
39
|
-
|
|
40
|
-
def reset_states(self):
|
|
41
|
-
self.sharpe_ratio = 0
|
|
42
|
-
self.df = pd.DataFrame(columns=["TARGET", "PRED", "DATES", "TARGET_1"])
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
@tf.numpy_function(Tout=tf.float32)
|
|
46
|
-
def sharpe_ratio_tf_nd(data, y_pred):
|
|
47
|
-
|
|
48
|
-
portfolio_size = 10
|
|
49
|
-
|
|
50
|
-
y_true = pd.Series(data[:, 0], index=data[:, 1], name="TARGET")
|
|
51
|
-
y_pred = pd.Series(y_pred.flatten(), index=data[:, 1], name="PRED")
|
|
52
|
-
|
|
53
|
-
df = pd.concat(
|
|
54
|
-
[y_true, y_pred, stock_data[["DATE", "TARGET_1"]]], join="inner", axis=1
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
print(df)
|
|
58
|
-
|
|
59
|
-
def _calc_spread_return_per_day(df: pd.DataFrame, portfolio_size: int):
|
|
60
|
-
print(
|
|
61
|
-
df.sort_values("PRED", ascending=False)[
|
|
62
|
-
["PRED", "TARGET", "TARGET_1"]
|
|
63
|
-
].head(10)
|
|
64
|
-
)
|
|
65
|
-
return (
|
|
66
|
-
df.sort_values("PRED", ascending=False)["TARGET_1"].iloc[:portfolio_size]
|
|
67
|
-
).mean()
|
|
68
|
-
|
|
69
|
-
buf = df.groupby("DATE").apply(_calc_spread_return_per_day, portfolio_size)
|
|
70
|
-
|
|
71
|
-
if buf.shape[0] == 1:
|
|
72
|
-
sharpe_ratio = buf.values[0] * (252 / np.sqrt(252))
|
|
73
|
-
else:
|
|
74
|
-
sharpe_ratio = (buf.mean() * 252) / (buf.std() * np.sqrt(252))
|
|
75
|
-
print(buf, sharpe_ratio)
|
|
76
|
-
return sharpe_ratio
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def sharpe_ratio_tf(data, y_pred):
|
|
80
|
-
|
|
81
|
-
portfolio_size = 10
|
|
82
|
-
# unscale
|
|
83
|
-
y_true = data[:, 0]
|
|
84
|
-
indexes = data[:, 1]
|
|
85
|
-
|
|
86
|
-
dates = stock_data[["DATE", "TARGET_1"]].iloc[indexes]
|
|
87
|
-
dates = tf.convert_to_tensor(dates)
|
|
88
|
-
dates = tf.dtypes.cast(dates, tf.float32)
|
|
89
|
-
|
|
90
|
-
y_true, y_pred = unscale_tf(y_true, y_pred)
|
|
91
|
-
y_true = tf.dtypes.cast(y_true, tf.float32)
|
|
92
|
-
y_pred = tf.dtypes.cast(y_pred, tf.float32)
|
|
93
|
-
y_true = tf.reshape(y_true, y_pred.shape)
|
|
94
|
-
|
|
95
|
-
# concat and sort by pred
|
|
96
|
-
print(y_pred, y_true, dates)
|
|
97
|
-
tensor = tf.concat([y_pred, y_true, dates], axis=1)
|
|
98
|
-
tensor_ordered = tf.gather(
|
|
99
|
-
tensor, tf.argsort(tensor[:, 0], direction="DESCENDING"), axis=0
|
|
100
|
-
)
|
|
101
|
-
|
|
102
|
-
# groupby and reduce with mean of 10 first elements per date groups.
|
|
103
|
-
def init_func(_):
|
|
104
|
-
return (0.0, 0.0)
|
|
105
|
-
|
|
106
|
-
def reduce_func(state, value):
|
|
107
|
-
print(state, value)
|
|
108
|
-
if state[1] < portfolio_size:
|
|
109
|
-
return (state[0] + value[3], state[1] + 1)
|
|
110
|
-
else:
|
|
111
|
-
return state
|
|
112
|
-
|
|
113
|
-
def finalize_func(s, n):
|
|
114
|
-
return s / n
|
|
115
|
-
|
|
116
|
-
reducer = tf.data.experimental.Reducer(init_func, reduce_func, finalize_func)
|
|
117
|
-
|
|
118
|
-
def key_f(row):
|
|
119
|
-
print(row)
|
|
120
|
-
return tf.dtypes.cast(row[2], tf.int64)
|
|
121
|
-
|
|
122
|
-
ds_transformation_func = tf.data.experimental.group_by_reducer(
|
|
123
|
-
key_func=key_f, reducer=reducer
|
|
124
|
-
)
|
|
125
|
-
print(tensor_ordered, tensor_ordered.shape)
|
|
126
|
-
slices = tf.slice(tensor_ordered, [0, 0], [-1, -1])
|
|
127
|
-
print(slices)
|
|
128
|
-
ds = tf.data.Dataset.from_tensor_slices(slices)
|
|
129
|
-
buf = ds.apply(ds_transformation_func)
|
|
130
|
-
# ds = ds.batch(10)
|
|
131
|
-
|
|
132
|
-
# print(ds.as_numpy_iterator())
|
|
133
|
-
# iterator = iter(ds)
|
|
134
|
-
# buf = iterator
|
|
135
|
-
print(buf)
|
|
136
|
-
# sharpe calcul
|
|
137
|
-
sharpe_ratio = (K.mean(buf) * 252) / (K.std(buf) * K.sqrt(252))
|
|
138
|
-
print(sharpe_ratio)
|
|
139
|
-
return sharpe_ratio
|
lecrapaud/speed_tests/trash.py
DELETED
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
# def _get_weekly_return(y_true, y_pred):
|
|
2
|
-
# df = pd.concat([y_true, y_pred, stock_data[['YEARWEEK', 'STOCK', 'TARGET_1']]], join='inner', axis=1)
|
|
3
|
-
# df['PRED'] += 1
|
|
4
|
-
# df['TARGET'] += 1
|
|
5
|
-
# return df[['YEARWEEK', 'STOCK', 'PRED', 'TARGET']].groupby(['YEARWEEK', 'STOCK']).prod().reset_index()
|
|
6
|
-
|
|
7
|
-
# def _calc_spread_return_per_week(df, portfolio_size):
|
|
8
|
-
# return (df.sort_values('PRED', ascending=False)['TARGET_1'][:portfolio_size] - 1).mean()
|
|
9
|
-
|
|
10
|
-
# def sharpe_ratio_weekly(y_true, y_pred, portfolio_size:int=10):
|
|
11
|
-
# df = _get_weekly_return(y_true, y_pred)
|
|
12
|
-
# buf = df.groupby('YEARWEEK').apply(_calc_spread_return_per_week, portfolio_size)
|
|
13
|
-
# sharpe_ratio = (buf.mean() * 52) / (buf.std() * np.sqrt(52))
|
|
14
|
-
# buf += 1
|
|
15
|
-
# cumulated_roi = buf.prod() - 1
|
|
16
|
-
# cagr = buf.prod() ** (1 / (buf.shape[0]/52) ) - 1
|
|
17
|
-
# return sharpe_ratio, cumulated_roi, cagr
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def sharpe_ratio_daily(y_true, y_pred, portfolio_size: int = 10):
|
|
21
|
-
df = pd.concat(
|
|
22
|
-
[y_true, y_pred, stock_data[["DATE", "TARGET_1"]]], join="inner", axis=1
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
def _calc_spread_return_per_day(df: pd.DataFrame, portfolio_size: int):
|
|
26
|
-
# print(df.sort_values('PRED', ascending=False)[['PRED', 'TARGET', 'TARGET_1']].head(10))
|
|
27
|
-
return (
|
|
28
|
-
df.sort_values("PRED", ascending=False)["TARGET_1"].iloc[:portfolio_size]
|
|
29
|
-
).mean()
|
|
30
|
-
|
|
31
|
-
buf = df.groupby("DATE").apply(_calc_spread_return_per_day, portfolio_size)
|
|
32
|
-
|
|
33
|
-
sharpe_ratio = (buf.mean() * 252) / (buf.std() * np.sqrt(252))
|
|
34
|
-
buf += 1
|
|
35
|
-
cumulated_roi = buf.prod() - 1
|
|
36
|
-
cagr = buf.prod() ** (1 / (buf.shape[0] / 252)) - 1
|
|
37
|
-
return sharpe_ratio, cumulated_roi, cagr
|