lecrapaud 0.5.1__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

lecrapaud/search_space.py CHANGED
@@ -835,14 +835,33 @@ dl_recurrent_models = [
835
835
  },
836
836
  ]
837
837
 
838
+ all_models = ml_models + dl_recurrent_models
838
839
 
839
- def get_models_idx(*model_names):
840
- models = ml_models + dl_recurrent_models
841
840
 
841
+ def get_models_idx(*model_names):
842
842
  matching_idx = [
843
- i for i, model in enumerate(models) if model["model_name"] in model_names
843
+ i for i, model in enumerate(all_models) if model["model_name"] in model_names
844
844
  ]
845
845
  return matching_idx
846
846
 
847
847
 
848
- all_models = ml_models + dl_recurrent_models
848
+ def normalize_models_idx(models_idx: list[int | str]) -> list[int]:
849
+ """
850
+ Convert a list of model identifiers (int or str) to a list of model indices (int).
851
+ If an element is a string, it is resolved using `get_models_idx`.
852
+
853
+ Returns:
854
+ List of model indices (ints).
855
+ """
856
+ normalized = []
857
+ for model_idx in models_idx:
858
+ if isinstance(model_idx, int):
859
+ normalized.append(model_idx)
860
+ elif isinstance(model_idx, str):
861
+ resolved = get_models_idx(model_idx)
862
+ if not resolved:
863
+ raise ValueError(f"No model index found for name: {model_idx}")
864
+ normalized.append(resolved[0])
865
+ else:
866
+ raise TypeError(f"Unsupported type: {type(model_idx)}")
867
+ return normalized
lecrapaud/utils.py CHANGED
@@ -21,7 +21,7 @@ def setup_logger():
21
21
  global _LOGGER_ALREADY_CONFIGURED
22
22
  if _LOGGER_ALREADY_CONFIGURED: # ← bail out if done before
23
23
 
24
- return logging.getLogger("stock" if PYTHON_ENV != "Worker" else "")
24
+ return logging.getLogger("lecrapaud" if PYTHON_ENV != "Worker" else "")
25
25
 
26
26
  print(
27
27
  f"Setting up logger with PYTHON_ENV {PYTHON_ENV} and LOGGING_LEVEL {LOGGING_LEVEL}"
@@ -34,7 +34,7 @@ def setup_logger():
34
34
  logging.basicConfig(format=fmt, datefmt=datefmt) # root format
35
35
  formatter = logging.Formatter(fmt, datefmt=datefmt)
36
36
 
37
- logger = logging.getLogger("" if PYTHON_ENV == "Worker" else "stock")
37
+ logger = logging.getLogger("" if PYTHON_ENV == "Worker" else "lecrapaud")
38
38
 
39
39
  log_level = getattr(logging, LOGGING_LEVEL.upper(), logging.INFO)
40
40
  logger.setLevel(log_level)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.5.1
3
+ Version: 0.6.2
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -157,6 +157,7 @@ Below are the main arguments you can pass to `create_experiment` (or the `Experi
157
157
  | `columns_te_target` | list | Columns for target encoding target | `['target']` |
158
158
  | `data` | DataFrame | Your main dataset (required for new experiment) | `your_dataframe` |
159
159
  | `date_column` | str | Name of the date column | `'date'` |
160
+ | `experiment_name` | str | Name for the training session | `'my_session'` |
160
161
  | `group_column` | str | Name of the group column | `'stock_id'` |
161
162
  | `max_timesteps` | int | Max timesteps for time series models | `30` |
162
163
  | `models_idx` | list | Indices of models to use for model selection | `[0, 1, 2]` |
@@ -165,7 +166,6 @@ Below are the main arguments you can pass to `create_experiment` (or the `Experi
165
166
  | `perform_hyperopt` | bool | Whether to perform hyperparameter optimization | `True`/`False` |
166
167
  | `plot` | bool | Whether to plot results | `True`/`False` |
167
168
  | `preserve_model` | bool | Whether to preserve the best model | `True`/`False` |
168
- | `session_name` | str | Name for the training session | `'my_session'` |
169
169
  | `target_clf` | list | List of classification target column indices/names | `[1, 2, 3]` |
170
170
  | `target_mclf` | list | Multi-class classification targets (not yet implemented) | `[11]` |
171
171
  | `target_numbers` | list | List of regression target column indices/names | `[1, 2, 3]` |
@@ -0,0 +1,43 @@
1
+ lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
2
+ lecrapaud/api.py,sha256=JFwOCawI9fYXod-Jt2w1Y_UWUoGA-bPqGaN_dtP0-gs,10289
3
+ lecrapaud/config.py,sha256=82JaFv8HWsrwuzOo28kOXEdLaJ8KIzr0P3cXx8CkeMA,936
4
+ lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
5
+ lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
6
+ lecrapaud/db/alembic/env.py,sha256=rseEi8oR_eKXYYW3UwOKiCMuDEwT4lxsT7llySOUpgk,2305
7
+ lecrapaud/db/alembic/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl93dMj8,635
8
+ lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py,sha256=MNPyqWaQSHNV8zljD1G9f-LzrVz-nOKlgOhHEE0U8Oo,13060
9
+ lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py,sha256=g6aLRV6jAKXkPUEcs9FAeGfsYpe9rMTxfqbNib3U0-U,809
10
+ lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py,sha256=dl6tfvcqErgJ6NKvjve0euu7l0BWyEAKSS-ychsEAl8,1139
11
+ lecrapaud/db/alembic.ini,sha256=zgvur-5jnLsT66_98FaTOTNgjwObGZCE0HqMwRAeJrs,3587
12
+ lecrapaud/db/models/__init__.py,sha256=Lhyw9fVLdom0Fc6yIP-ip8FjkU1EwVwjae5q2VM815Q,740
13
+ lecrapaud/db/models/base.py,sha256=CYtof_UjFwX3C7XUifequh_UtLHJ25bU7LCwT501uGE,7508
14
+ lecrapaud/db/models/experiment.py,sha256=WNV5gz78JljL0xlAjy121K3cgDqwC0r6zL3bpH6b0dY,3642
15
+ lecrapaud/db/models/feature.py,sha256=5o77O2FyRObnLOCGNj8kaPSGM3pLv1Ov6mXXHYkmnYY,1136
16
+ lecrapaud/db/models/feature_selection.py,sha256=nXy_Lg3uDxid71vYll_qzdo8ajYsJEXskI7vLQ3uyW0,3315
17
+ lecrapaud/db/models/feature_selection_rank.py,sha256=PvEpdv-JJt2wZMtX5TO0wyZ3IONlPkeDaC49i0VA-qU,2074
18
+ lecrapaud/db/models/model.py,sha256=F0hyMjd4FFHCv6_arIWBEmBCGOfG3b6_uzU8ExtFE90,952
19
+ lecrapaud/db/models/model_selection.py,sha256=fkZoUv7fdlBygWsfQyYPoayLomyp-gowiA3fbFPqdqw,1827
20
+ lecrapaud/db/models/model_training.py,sha256=egggSfkW8C2nTadytc5DdjU7d2VEMT6LRRZxO1ZD5To,1600
21
+ lecrapaud/db/models/score.py,sha256=_yaa6yBxugcOZMvLxqqIaMN7QGvzAOzOGCYQO0_gBjw,1601
22
+ lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk,1649
23
+ lecrapaud/db/session.py,sha256=K9dTyXmO-aF_2r9RRHsDsbW9_zLNDaOcchXlpiv7cSo,2719
24
+ lecrapaud/directories.py,sha256=t4PrnJR48MpDfBOTYTyGlDVMUr39mcaj7uCPTaocBRw,725
25
+ lecrapaud/experiment.py,sha256=NwwGDZqUyvvRu5EDK3_Oh0_kF29bNIPDawVFFpzFvZM,2350
26
+ lecrapaud/feature_engineering.py,sha256=jrU9OCKX1UtEoEJQoEwrvVf6NT3nQfGCX4CBy8ceZu4,31168
27
+ lecrapaud/feature_selection.py,sha256=v2-wAmm_U5_aI0NUB5xYhI1KoB60-1-CBAyJgFbyaRk,42713
28
+ lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
29
+ lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
30
+ lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
31
+ lecrapaud/jobs/scheduler.py,sha256=SiYWPxokpKnR8V6btLOO6gbK0PEjSRoeG0kCbQvYPf4,990
32
+ lecrapaud/jobs/tasks.py,sha256=OjI4RZHQQBH64dc0rlIK23wDhcOgE-cPhNZnzOmkgaE,1649
33
+ lecrapaud/model_selection.py,sha256=wX7ON5qIfR-wV6KBDaAvbMHEfa2kqRbZIzwj09KKgVg,62008
34
+ lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
35
+ lecrapaud/speed_tests/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
36
+ lecrapaud/speed_tests/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
37
+ lecrapaud/speed_tests/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
38
+ lecrapaud/speed_tests/tests.ipynb,sha256=RjI7LDHSsbadUkea_hT14sD7ivljtIQk4NB5McXJ1bE,3835
39
+ lecrapaud/utils.py,sha256=Dy2uhK9cslzoqRHiIE6MdkxjNJWjmKmzGr6i7EYM28A,8106
40
+ lecrapaud-0.6.2.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
41
+ lecrapaud-0.6.2.dist-info/METADATA,sha256=IX_u-tTi8o_SkBezBF6OJlINiHMBUR3HaWxqVYfs-o0,11623
42
+ lecrapaud-0.6.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
43
+ lecrapaud-0.6.2.dist-info/RECORD,,
File without changes
@@ -1,71 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- from sklearn.decomposition import PCA
4
- from sentence_transformers import SentenceTransformer
5
-
6
- # Sample DataFrame with categorical features
7
- data = pd.DataFrame(
8
- {
9
- "SECTOR": ["Tech", "Finance", "Health", "Education", "Retail"],
10
- "SUBINDUSTRY": [
11
- "Software",
12
- "Banking",
13
- "Pharmaceuticals",
14
- "Online Education",
15
- "E-commerce",
16
- ],
17
- "LOCATION": ["USA", "UK", "Germany", "India", "Brazil"],
18
- }
19
- )
20
-
21
- # Step 1: Load a pre-trained Word2Vec-like model from Hugging Face (Sentence Transformer)
22
- # This model generates dense vector representations (embeddings) of text
23
- model = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
24
-
25
- # Step 2: Use the model to generate embeddings for each categorical feature
26
- # We'll generate embeddings for each category in SECTOR, SUBINDUSTRY, and LOCATION
27
-
28
-
29
- def get_embeddings(text_column):
30
- """Function to generate embeddings for a given text column."""
31
- return np.array([model.encode(text) for text in text_column])
32
-
33
-
34
- # Generate embeddings for the categorical features
35
- sector_embeddings = get_embeddings(data["SECTOR"])
36
- subindustry_embeddings = get_embeddings(data["SUBINDUSTRY"])
37
- location_embeddings = get_embeddings(data["LOCATION"])
38
-
39
-
40
- # Step 3: Reduce dimensionality using PCA to k dimensions
41
- def reduce_dimensionality(embeddings, k):
42
- """Function to reduce dimensionality of embeddings using PCA."""
43
- pca = PCA(n_components=k)
44
- return pca.fit_transform(embeddings)
45
-
46
-
47
- # Set k (number of dimensions after PCA)
48
- k = 3 # Reduce to 3 dimensions
49
-
50
- # Apply PCA to reduce dimensionality of the embeddings
51
- reduced_sector_embeddings = reduce_dimensionality(sector_embeddings, k)
52
- reduced_subindustry_embeddings = reduce_dimensionality(subindustry_embeddings, k)
53
- reduced_location_embeddings = reduce_dimensionality(location_embeddings, k)
54
-
55
- # Step 4: Combine the reduced embeddings back into the DataFrame
56
- # Create new DataFrames for the reduced embeddings
57
- sector_df = pd.DataFrame(
58
- reduced_sector_embeddings, columns=[f"SECTOR_PC{i+1}" for i in range(k)]
59
- )
60
- subindustry_df = pd.DataFrame(
61
- reduced_subindustry_embeddings, columns=[f"SUBINDUSTRY_PC{i+1}" for i in range(k)]
62
- )
63
- location_df = pd.DataFrame(
64
- reduced_location_embeddings, columns=[f"LOCATION_PC{i+1}" for i in range(k)]
65
- )
66
-
67
- # Concatenate the reduced embeddings with the original data (if needed)
68
- encoded_data = pd.concat([sector_df, subindustry_df, location_df], axis=1)
69
-
70
- # Display the resulting DataFrame with reduced embeddings
71
- print(encoded_data)
@@ -1,309 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
-
4
-
5
- def rsi(ohlc: pd.DataFrame, period: int = 14) -> pd.Series:
6
- """Implements the RSI indicator
7
-
8
- Args:
9
- - ohlc (pd.DataFrame):
10
- - period (int):
11
-
12
- Return:
13
- an pd.Series with the RSI indicator values
14
- """
15
- close = ohlc["CLOSE"]
16
- delta = close.diff()
17
-
18
- gain = (delta.where(delta > 0, 0)).ewm(alpha=1 / period).mean()
19
- loss = (-delta.where(delta < 0, 0)).ewm(alpha=1 / period).mean()
20
-
21
- rs = gain / loss
22
- rsi = 100 - (100 / (1 + rs))
23
- return pd.Series(rsi, index=ohlc.index)
24
-
25
-
26
- def macd(
27
- ohlc: pd.DataFrame,
28
- short_period: int = 12,
29
- long_period: int = 26,
30
- signal_period: int = 9,
31
- ):
32
- close = ohlc["CLOSE"]
33
- short_ema = close.ewm(span=short_period, adjust=False).mean()
34
- long_ema = close.ewm(span=long_period, adjust=False).mean()
35
-
36
- macd_line = short_ema - long_ema
37
- signal_line = macd_line.ewm(span=signal_period, adjust=False).mean()
38
- return macd_line, signal_line
39
-
40
-
41
- def bollinger_bands(ohlc: pd.DataFrame, period: int = 20, num_std: int = 2):
42
- close = ohlc["CLOSE"]
43
- sma = close.rolling(window=period).mean()
44
- std = close.rolling(window=period).std()
45
-
46
- upper_band = sma + (num_std * std)
47
- lower_band = sma - (num_std * std)
48
-
49
- return upper_band, sma, lower_band
50
-
51
-
52
- def adx(ohlc: pd.DataFrame, period: int = 14):
53
- high = ohlc["HIGH"]
54
- low = ohlc["LOW"]
55
- close = ohlc["CLOSE"]
56
-
57
- plus_dm = high.diff().where((high.diff() > low.diff()) & (high.diff() > 0), 0)
58
- minus_dm = low.diff().where((low.diff() > high.diff()) & (low.diff() > 0), 0)
59
-
60
- tr = pd.concat(
61
- [high - low, abs(high - close.shift()), abs(low - close.shift())], axis=1
62
- ).max(axis=1)
63
-
64
- atr = tr.rolling(window=period).mean()
65
- plus_di = 100 * (plus_dm.rolling(window=period).mean() / atr)
66
- minus_di = 100 * (minus_dm.rolling(window=period).mean() / atr)
67
-
68
- dx = 100 * abs(plus_di - minus_di) / (plus_di + minus_di)
69
- adx = dx.rolling(window=period).mean()
70
-
71
- return adx
72
-
73
-
74
- def sma(ohlc: pd.DataFrame, period: int):
75
- return ohlc["CLOSE"].rolling(window=period).mean()
76
-
77
-
78
- def ema(ohlc: pd.DataFrame, period: int):
79
- return ohlc["CLOSE"].ewm(span=period, adjust=False).mean()
80
-
81
-
82
- def atr(ohlc: pd.DataFrame, period: int = 14):
83
- high = ohlc["HIGH"]
84
- low = ohlc["LOW"]
85
- close = ohlc["CLOSE"]
86
-
87
- tr = pd.concat(
88
- [high - low, abs(high - close.shift()), abs(low - close.shift())], axis=1
89
- ).max(axis=1)
90
- atr = tr.rolling(window=period).mean()
91
-
92
- return atr
93
-
94
-
95
- def stochastic(ohlc: pd.DataFrame, period: int = 14, k_slowing_period: int = 3):
96
- low_min = ohlc["LOW"].rolling(window=period).min()
97
- high_max = ohlc["HIGH"].rolling(window=period).max()
98
-
99
- k_percent = 100 * (ohlc["CLOSE"] - low_min) / (high_max - low_min)
100
- d_percent = k_percent.rolling(window=k_slowing_period).mean() # Smoothed K
101
-
102
- return k_percent, d_percent
103
-
104
-
105
- def mfi(ohlc: pd.DataFrame, period: int = 14):
106
- typical_price = (ohlc["HIGH"] + ohlc["LOW"] + ohlc["CLOSE"]) / 3
107
- money_flow = typical_price * ohlc["VOLUME"]
108
-
109
- positive_flow = money_flow.where(typical_price > typical_price.shift(), 0)
110
- negative_flow = money_flow.where(typical_price < typical_price.shift(), 0)
111
-
112
- positive_mf = positive_flow.rolling(window=period).sum()
113
- negative_mf = negative_flow.rolling(window=period).sum()
114
-
115
- mfi = 100 - (100 / (1 + (positive_mf / negative_mf)))
116
-
117
- return mfi
118
-
119
-
120
- def fibonacci_retracement(high: float, low: float):
121
- diff = high - low
122
- levels = {
123
- "23.6%": high - diff * 0.236,
124
- "38.2%": high - diff * 0.382,
125
- "50.0%": high - diff * 0.5,
126
- "61.8%": high - diff * 0.618,
127
- "100%": low,
128
- }
129
- return levels
130
-
131
-
132
- def ichimoku_cloud(ohlc: pd.DataFrame):
133
- high = ohlc["HIGH"]
134
- low = ohlc["LOW"]
135
-
136
- tenkan_sen = (high.rolling(window=9).max() + low.rolling(window=9).min()) / 2
137
- kijun_sen = (high.rolling(window=26).max() + low.rolling(window=26).min()) / 2
138
- senkou_span_a = ((tenkan_sen + kijun_sen) / 2).shift(26)
139
- senkou_span_b = (
140
- (high.rolling(window=52).max() + low.rolling(window=52).min()) / 2
141
- ).shift(26)
142
- chikou_span = ohlc["CLOSE"].shift(26)
143
-
144
- return tenkan_sen, kijun_sen, senkou_span_a, senkou_span_b, chikou_span
145
-
146
-
147
- def parabolic_sar(ohlc: pd.DataFrame, af_step: float = 0.02, af_max: float = 0.2):
148
- high = ohlc["HIGH"]
149
- low = ohlc["LOW"]
150
- close = ohlc["CLOSE"]
151
-
152
- # Initialize the SAR series with the closing prices as a starting point
153
- sar = close.copy()
154
-
155
- # Define initial trend and extreme point
156
- trend_up = True
157
- ep = high.iloc[0] if trend_up else low.iloc[0] # Extremum Price
158
- af = af_step # Acceleration Factor
159
-
160
- # Iterate over the data points starting from the second row
161
- for i in range(1, len(ohlc)):
162
- prev_sar = sar.iloc[i - 1] # Previous SAR value
163
-
164
- if trend_up:
165
- # Update SAR for an uptrend
166
- sar.iloc[i] = prev_sar + af * (ep - prev_sar)
167
- if low.iloc[i] < sar.iloc[i]:
168
- # Switch to downtrend if current low breaks the SAR
169
- trend_up = False
170
- sar.iloc[i] = ep
171
- ep = low.iloc[i]
172
- af = af_step
173
- else:
174
- # Update SAR for a downtrend
175
- sar.iloc[i] = prev_sar + af * (ep - prev_sar)
176
- if high.iloc[i] > sar.iloc[i]:
177
- # Switch to uptrend if current high breaks the SAR
178
- trend_up = True
179
- sar.iloc[i] = ep
180
- ep = high.iloc[i]
181
- af = af_step
182
-
183
- # Update the extremum price (EP) and acceleration factor (AF) based on the trend
184
- if trend_up:
185
- if high.iloc[i] > ep:
186
- ep = high.iloc[i]
187
- af = min(af + af_step, af_max)
188
- else:
189
- if low.iloc[i] < ep:
190
- ep = low.iloc[i]
191
- af = min(af + af_step, af_max)
192
-
193
- return sar
194
-
195
-
196
- def chaikin_money_flow(ohlc: pd.DataFrame, period: int = 21):
197
- money_flow_multiplier = (
198
- (ohlc["CLOSE"] - ohlc["LOW"]) - (ohlc["HIGH"] - ohlc["CLOSE"])
199
- ) / (ohlc["HIGH"] - ohlc["LOW"])
200
- money_flow_volume = money_flow_multiplier * ohlc["VOLUME"]
201
-
202
- cmf = (
203
- money_flow_volume.rolling(window=period).sum()
204
- / ohlc["VOLUME"].rolling(window=period).sum()
205
- )
206
-
207
- return cmf
208
-
209
-
210
- def pivot_points(ohlc: pd.DataFrame):
211
- high = ohlc["HIGH"]
212
- low = ohlc["LOW"]
213
- close = ohlc["CLOSE"]
214
-
215
- pivot = (high + low + close) / 3
216
- r1 = 2 * pivot - low
217
- s1 = 2 * pivot - high
218
- r2 = pivot + (high - low)
219
- s2 = pivot - (high - low)
220
-
221
- return pivot, r1, s1, r2, s2
222
-
223
-
224
- def volatility(
225
- ohlc: pd.DataFrame,
226
- period: int = 14,
227
- ):
228
- """
229
- Calculates rolling volatility for each stock based on the rolling standard deviation of returns.
230
-
231
- Parameters:
232
- - ohlc: pd.DataFrame containing stock data, including returns (RET) and stock identifier.
233
- - period: int, the rolling window period for volatility calculation (default is 14 days).
234
-
235
- Returns:
236
- - pd.Series representing the calculated volatility for each row in the DataFrame.
237
- """
238
-
239
- # Calculate returns based on CLOSE prices
240
- ret = ohlc["CLOSE"].pct_change()
241
-
242
- # Calculate rolling standard deviation of returns
243
- rolling_std = ret.rolling(window=period, min_periods=1).std()
244
-
245
- # Multiply by the square root of the period to scale volatility
246
- volatility = rolling_std * np.sqrt(period)
247
-
248
- return volatility
249
-
250
-
251
- def cumulative_return(ohlc: pd.DataFrame, period: int = 14):
252
- """
253
- Calculates cumulative returns over the specified period using the 'CLOSE' price.
254
-
255
- Parameters:
256
- - ohlc: pd.DataFrame containing stock data, including 'CLOSE' column.
257
- - period: int, the number of days over which to calculate the cumulative return.
258
-
259
- Returns:
260
- - pd.Series representing the cumulative returns for each row in the DataFrame.
261
- """
262
-
263
- # Calculate cumulative return based on CLOSE prices
264
- cumul_ret = ohlc["CLOSE"].pct_change(period - 1)
265
-
266
- return cumul_ret
267
-
268
-
269
- def close_diff(ohlc: pd.DataFrame):
270
- """
271
- Calculates the difference between consecutive close prices.
272
-
273
- Parameters:
274
- - ohlc: pd.DataFrame containing stock data with a 'CLOSE' column.
275
-
276
- Returns:
277
- - pd.Series representing the difference in closing prices.
278
- """
279
- return ohlc["CLOSE"].diff()
280
-
281
-
282
- def obv(ohlc: pd.DataFrame):
283
- """
284
- Calculates On-Balance Volume (OBV) based on closing price differences and volume.
285
-
286
- Parameters:
287
- - ohlc: pd.DataFrame containing 'CLOSE', 'VOLUME' columns.
288
-
289
- Returns:
290
- - pd.Series representing the OBV values.
291
- """
292
- close_diff = ohlc["CLOSE"].diff()
293
- obv = (np.sign(close_diff) * ohlc["VOLUME"]).fillna(0).cumsum()
294
- return obv
295
-
296
-
297
- def pressure(ohlc: pd.DataFrame):
298
- """
299
- Calculates both upward and downward pressure based on price movements.
300
-
301
- Parameters:
302
- - ohlc: pd.DataFrame containing 'OPEN', 'HIGH', 'LOW', and 'CLOSE' columns.
303
-
304
- Returns:
305
- - pd.DataFrame with 'UPWARD_PRESSURE' and 'DOWNWARD_PRESSURE' columns.
306
- """
307
- upward = (ohlc["LOW"] - ohlc["OPEN"]) / ohlc["OPEN"]
308
- downward = (ohlc["HIGH"] - ohlc["CLOSE"]) / ohlc["OPEN"]
309
- return upward, downward
@@ -1,139 +0,0 @@
1
- # Experiments on sharpe ratio to calculate as loss or metric
2
- class SharpeRatioTFND(tf.keras.metrics.Metric):
3
-
4
- def __init__(self, name="sharpe_ratio_tf_nd", **kwargs):
5
- super().__init__(name=name, **kwargs)
6
- self.sharpe_ratio = 0
7
- self.df = pd.DataFrame(columns=["TARGET", "PRED", "DATE", "TARGET_1"])
8
-
9
- # @tf.numpy_function(Tout=tf.float32)
10
- def update_state(self, data, y_pred, sample_weight=None):
11
- portfolio_size = 10
12
-
13
- y_true = pd.Series(data[:, 0].numpy(), index=data[:, 1].numpy(), name="TARGET")
14
- y_pred = pd.Series(
15
- y_pred.numpy().flatten(), index=data[:, 1].numpy(), name="PRED"
16
- )
17
-
18
- df = pd.concat(
19
- [y_true, y_pred, stock_data[["DATE", "TARGET_1"]]], join="inner", axis=1
20
- )
21
- self.df = pd.concat([self.df, df], axis=0)
22
-
23
- def _calc_spread_return_per_day(df: pd.DataFrame, portfolio_size: int):
24
- return (
25
- df.sort_values("PRED", ascending=False)["TARGET_1"].iloc[
26
- :portfolio_size
27
- ]
28
- ).mean()
29
-
30
- buf = self.df.groupby("DATE").apply(_calc_spread_return_per_day, portfolio_size)
31
-
32
- if buf.shape[0] == 1:
33
- self.sharpe_ratio = buf.values[0] * (252 / np.sqrt(252))
34
- else:
35
- self.sharpe_ratio = (buf.mean() * 252) / (buf.std() * np.sqrt(252))
36
-
37
- def result(self):
38
- return self.sharpe_ratio
39
-
40
- def reset_states(self):
41
- self.sharpe_ratio = 0
42
- self.df = pd.DataFrame(columns=["TARGET", "PRED", "DATES", "TARGET_1"])
43
-
44
-
45
- @tf.numpy_function(Tout=tf.float32)
46
- def sharpe_ratio_tf_nd(data, y_pred):
47
-
48
- portfolio_size = 10
49
-
50
- y_true = pd.Series(data[:, 0], index=data[:, 1], name="TARGET")
51
- y_pred = pd.Series(y_pred.flatten(), index=data[:, 1], name="PRED")
52
-
53
- df = pd.concat(
54
- [y_true, y_pred, stock_data[["DATE", "TARGET_1"]]], join="inner", axis=1
55
- )
56
-
57
- print(df)
58
-
59
- def _calc_spread_return_per_day(df: pd.DataFrame, portfolio_size: int):
60
- print(
61
- df.sort_values("PRED", ascending=False)[
62
- ["PRED", "TARGET", "TARGET_1"]
63
- ].head(10)
64
- )
65
- return (
66
- df.sort_values("PRED", ascending=False)["TARGET_1"].iloc[:portfolio_size]
67
- ).mean()
68
-
69
- buf = df.groupby("DATE").apply(_calc_spread_return_per_day, portfolio_size)
70
-
71
- if buf.shape[0] == 1:
72
- sharpe_ratio = buf.values[0] * (252 / np.sqrt(252))
73
- else:
74
- sharpe_ratio = (buf.mean() * 252) / (buf.std() * np.sqrt(252))
75
- print(buf, sharpe_ratio)
76
- return sharpe_ratio
77
-
78
-
79
- def sharpe_ratio_tf(data, y_pred):
80
-
81
- portfolio_size = 10
82
- # unscale
83
- y_true = data[:, 0]
84
- indexes = data[:, 1]
85
-
86
- dates = stock_data[["DATE", "TARGET_1"]].iloc[indexes]
87
- dates = tf.convert_to_tensor(dates)
88
- dates = tf.dtypes.cast(dates, tf.float32)
89
-
90
- y_true, y_pred = unscale_tf(y_true, y_pred)
91
- y_true = tf.dtypes.cast(y_true, tf.float32)
92
- y_pred = tf.dtypes.cast(y_pred, tf.float32)
93
- y_true = tf.reshape(y_true, y_pred.shape)
94
-
95
- # concat and sort by pred
96
- print(y_pred, y_true, dates)
97
- tensor = tf.concat([y_pred, y_true, dates], axis=1)
98
- tensor_ordered = tf.gather(
99
- tensor, tf.argsort(tensor[:, 0], direction="DESCENDING"), axis=0
100
- )
101
-
102
- # groupby and reduce with mean of 10 first elements per date groups.
103
- def init_func(_):
104
- return (0.0, 0.0)
105
-
106
- def reduce_func(state, value):
107
- print(state, value)
108
- if state[1] < portfolio_size:
109
- return (state[0] + value[3], state[1] + 1)
110
- else:
111
- return state
112
-
113
- def finalize_func(s, n):
114
- return s / n
115
-
116
- reducer = tf.data.experimental.Reducer(init_func, reduce_func, finalize_func)
117
-
118
- def key_f(row):
119
- print(row)
120
- return tf.dtypes.cast(row[2], tf.int64)
121
-
122
- ds_transformation_func = tf.data.experimental.group_by_reducer(
123
- key_func=key_f, reducer=reducer
124
- )
125
- print(tensor_ordered, tensor_ordered.shape)
126
- slices = tf.slice(tensor_ordered, [0, 0], [-1, -1])
127
- print(slices)
128
- ds = tf.data.Dataset.from_tensor_slices(slices)
129
- buf = ds.apply(ds_transformation_func)
130
- # ds = ds.batch(10)
131
-
132
- # print(ds.as_numpy_iterator())
133
- # iterator = iter(ds)
134
- # buf = iterator
135
- print(buf)
136
- # sharpe calcul
137
- sharpe_ratio = (K.mean(buf) * 252) / (K.std(buf) * K.sqrt(252))
138
- print(sharpe_ratio)
139
- return sharpe_ratio
@@ -1,37 +0,0 @@
1
- # def _get_weekly_return(y_true, y_pred):
2
- # df = pd.concat([y_true, y_pred, stock_data[['YEARWEEK', 'STOCK', 'TARGET_1']]], join='inner', axis=1)
3
- # df['PRED'] += 1
4
- # df['TARGET'] += 1
5
- # return df[['YEARWEEK', 'STOCK', 'PRED', 'TARGET']].groupby(['YEARWEEK', 'STOCK']).prod().reset_index()
6
-
7
- # def _calc_spread_return_per_week(df, portfolio_size):
8
- # return (df.sort_values('PRED', ascending=False)['TARGET_1'][:portfolio_size] - 1).mean()
9
-
10
- # def sharpe_ratio_weekly(y_true, y_pred, portfolio_size:int=10):
11
- # df = _get_weekly_return(y_true, y_pred)
12
- # buf = df.groupby('YEARWEEK').apply(_calc_spread_return_per_week, portfolio_size)
13
- # sharpe_ratio = (buf.mean() * 52) / (buf.std() * np.sqrt(52))
14
- # buf += 1
15
- # cumulated_roi = buf.prod() - 1
16
- # cagr = buf.prod() ** (1 / (buf.shape[0]/52) ) - 1
17
- # return sharpe_ratio, cumulated_roi, cagr
18
-
19
-
20
- def sharpe_ratio_daily(y_true, y_pred, portfolio_size: int = 10):
21
- df = pd.concat(
22
- [y_true, y_pred, stock_data[["DATE", "TARGET_1"]]], join="inner", axis=1
23
- )
24
-
25
- def _calc_spread_return_per_day(df: pd.DataFrame, portfolio_size: int):
26
- # print(df.sort_values('PRED', ascending=False)[['PRED', 'TARGET', 'TARGET_1']].head(10))
27
- return (
28
- df.sort_values("PRED", ascending=False)["TARGET_1"].iloc[:portfolio_size]
29
- ).mean()
30
-
31
- buf = df.groupby("DATE").apply(_calc_spread_return_per_day, portfolio_size)
32
-
33
- sharpe_ratio = (buf.mean() * 252) / (buf.std() * np.sqrt(252))
34
- buf += 1
35
- cumulated_roi = buf.prod() - 1
36
- cagr = buf.prod() ** (1 / (buf.shape[0] / 252)) - 1
37
- return sharpe_ratio, cumulated_roi, cagr