lecrapaud 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

Files changed (63) hide show
  1. lecrapaud/__init__.py +1 -0
  2. lecrapaud/api.py +271 -0
  3. lecrapaud/config.py +25 -0
  4. lecrapaud/db/__init__.py +1 -0
  5. lecrapaud/db/alembic/README +1 -0
  6. lecrapaud/db/alembic/env.py +78 -0
  7. lecrapaud/db/alembic/script.py.mako +26 -0
  8. lecrapaud/db/alembic/versions/2025_04_06_1738-7390745388e4_initial_setup.py +295 -0
  9. lecrapaud/db/alembic/versions/2025_04_06_1755-40cd8d3e798e_unique_constraint_for_data.py +30 -0
  10. lecrapaud/db/alembic/versions/2025_05_23_1724-2360941fa0bd_longer_string.py +52 -0
  11. lecrapaud/db/alembic/versions/2025_05_27_1159-b96396dcfaff_add_env_to_trading_tables.py +34 -0
  12. lecrapaud/db/alembic/versions/2025_05_27_1337-40cbfc215f7c_fix_nb_character_on_portfolio.py +39 -0
  13. lecrapaud/db/alembic/versions/2025_05_27_1526-3de994115317_to_datetime.py +36 -0
  14. lecrapaud/db/alembic/versions/2025_05_27_2003-25c227c684f8_add_fees_to_transactions.py +30 -0
  15. lecrapaud/db/alembic/versions/2025_05_27_2047-6b6f2d38e9bc_double_instead_of_float.py +132 -0
  16. lecrapaud/db/alembic/versions/2025_05_31_1111-c175e4a36d68_generalise_stock_to_group.py +36 -0
  17. lecrapaud/db/alembic/versions/2025_05_31_1256-5681095bfc27_create_investment_run_and_portfolio_.py +62 -0
  18. lecrapaud/db/alembic/versions/2025_05_31_1806-339927587383_add_investment_run_id.py +107 -0
  19. lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py +38 -0
  20. lecrapaud/db/alembic/versions/2025_05_31_1849-3b8550297e8e_change_date_to_datetime.py +44 -0
  21. lecrapaud/db/alembic/versions/2025_05_31_1852-e6b8c95d8243_add_date_to_portfolio_history.py +30 -0
  22. lecrapaud/db/alembic/versions/2025_06_10_1136-db8cdd83563a_addnewsandoptiontodata.py +32 -0
  23. lecrapaud/db/alembic/versions/2025_06_17_1652-c45f5e49fa2c_make_fields_nullable.py +89 -0
  24. lecrapaud/db/models/__init__.py +11 -0
  25. lecrapaud/db/models/base.py +181 -0
  26. lecrapaud/db/models/dataset.py +129 -0
  27. lecrapaud/db/models/feature.py +45 -0
  28. lecrapaud/db/models/feature_selection.py +125 -0
  29. lecrapaud/db/models/feature_selection_rank.py +79 -0
  30. lecrapaud/db/models/model.py +40 -0
  31. lecrapaud/db/models/model_selection.py +63 -0
  32. lecrapaud/db/models/model_training.py +62 -0
  33. lecrapaud/db/models/score.py +65 -0
  34. lecrapaud/db/models/target.py +67 -0
  35. lecrapaud/db/session.py +45 -0
  36. lecrapaud/directory_management.py +28 -0
  37. lecrapaud/experiment.py +64 -0
  38. lecrapaud/feature_engineering.py +846 -0
  39. lecrapaud/feature_selection.py +1167 -0
  40. lecrapaud/integrations/openai_integration.py +225 -0
  41. lecrapaud/jobs/__init__.py +13 -0
  42. lecrapaud/jobs/config.py +17 -0
  43. lecrapaud/jobs/scheduler.py +36 -0
  44. lecrapaud/jobs/tasks.py +57 -0
  45. lecrapaud/model_selection.py +1671 -0
  46. lecrapaud/predictions.py +292 -0
  47. lecrapaud/preprocessing.py +984 -0
  48. lecrapaud/search_space.py +848 -0
  49. lecrapaud/services/__init__.py +0 -0
  50. lecrapaud/services/embedding_categorical.py +71 -0
  51. lecrapaud/services/indicators.py +309 -0
  52. lecrapaud/speed_tests/experiments.py +139 -0
  53. lecrapaud/speed_tests/test-gpu-bilstm.ipynb +261 -0
  54. lecrapaud/speed_tests/test-gpu-resnet.ipynb +166 -0
  55. lecrapaud/speed_tests/test-gpu-transformers.ipynb +254 -0
  56. lecrapaud/speed_tests/tests.ipynb +145 -0
  57. lecrapaud/speed_tests/trash.py +37 -0
  58. lecrapaud/training.py +239 -0
  59. lecrapaud/utils.py +246 -0
  60. lecrapaud-0.1.0.dist-info/LICENSE +201 -0
  61. lecrapaud-0.1.0.dist-info/METADATA +105 -0
  62. lecrapaud-0.1.0.dist-info/RECORD +63 -0
  63. lecrapaud-0.1.0.dist-info/WHEEL +4 -0
File without changes
@@ -0,0 +1,71 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.decomposition import PCA
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+ # Sample DataFrame with categorical features
7
+ data = pd.DataFrame(
8
+ {
9
+ "SECTOR": ["Tech", "Finance", "Health", "Education", "Retail"],
10
+ "SUBINDUSTRY": [
11
+ "Software",
12
+ "Banking",
13
+ "Pharmaceuticals",
14
+ "Online Education",
15
+ "E-commerce",
16
+ ],
17
+ "LOCATION": ["USA", "UK", "Germany", "India", "Brazil"],
18
+ }
19
+ )
20
+
21
+ # Step 1: Load a pre-trained Word2Vec-like model from Hugging Face (Sentence Transformer)
22
+ # This model generates dense vector representations (embeddings) of text
23
+ model = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
24
+
25
+ # Step 2: Use the model to generate embeddings for each categorical feature
26
+ # We'll generate embeddings for each category in SECTOR, SUBINDUSTRY, and LOCATION
27
+
28
+
29
+ def get_embeddings(text_column):
30
+ """Function to generate embeddings for a given text column."""
31
+ return np.array([model.encode(text) for text in text_column])
32
+
33
+
34
+ # Generate embeddings for the categorical features
35
+ sector_embeddings = get_embeddings(data["SECTOR"])
36
+ subindustry_embeddings = get_embeddings(data["SUBINDUSTRY"])
37
+ location_embeddings = get_embeddings(data["LOCATION"])
38
+
39
+
40
+ # Step 3: Reduce dimensionality using PCA to k dimensions
41
+ def reduce_dimensionality(embeddings, k):
42
+ """Function to reduce dimensionality of embeddings using PCA."""
43
+ pca = PCA(n_components=k)
44
+ return pca.fit_transform(embeddings)
45
+
46
+
47
+ # Set k (number of dimensions after PCA)
48
+ k = 3 # Reduce to 3 dimensions
49
+
50
+ # Apply PCA to reduce dimensionality of the embeddings
51
+ reduced_sector_embeddings = reduce_dimensionality(sector_embeddings, k)
52
+ reduced_subindustry_embeddings = reduce_dimensionality(subindustry_embeddings, k)
53
+ reduced_location_embeddings = reduce_dimensionality(location_embeddings, k)
54
+
55
+ # Step 4: Combine the reduced embeddings back into the DataFrame
56
+ # Create new DataFrames for the reduced embeddings
57
+ sector_df = pd.DataFrame(
58
+ reduced_sector_embeddings, columns=[f"SECTOR_PC{i+1}" for i in range(k)]
59
+ )
60
+ subindustry_df = pd.DataFrame(
61
+ reduced_subindustry_embeddings, columns=[f"SUBINDUSTRY_PC{i+1}" for i in range(k)]
62
+ )
63
+ location_df = pd.DataFrame(
64
+ reduced_location_embeddings, columns=[f"LOCATION_PC{i+1}" for i in range(k)]
65
+ )
66
+
67
+ # Concatenate the reduced embeddings with the original data (if needed)
68
+ encoded_data = pd.concat([sector_df, subindustry_df, location_df], axis=1)
69
+
70
+ # Display the resulting DataFrame with reduced embeddings
71
+ print(encoded_data)
@@ -0,0 +1,309 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+
5
+ def rsi(ohlc: pd.DataFrame, period: int = 14) -> pd.Series:
6
+ """Implements the RSI indicator
7
+
8
+ Args:
9
+ - ohlc (pd.DataFrame):
10
+ - period (int):
11
+
12
+ Return:
13
+ an pd.Series with the RSI indicator values
14
+ """
15
+ close = ohlc["CLOSE"]
16
+ delta = close.diff()
17
+
18
+ gain = (delta.where(delta > 0, 0)).ewm(alpha=1 / period).mean()
19
+ loss = (-delta.where(delta < 0, 0)).ewm(alpha=1 / period).mean()
20
+
21
+ rs = gain / loss
22
+ rsi = 100 - (100 / (1 + rs))
23
+ return pd.Series(rsi, index=ohlc.index)
24
+
25
+
26
+ def macd(
27
+ ohlc: pd.DataFrame,
28
+ short_period: int = 12,
29
+ long_period: int = 26,
30
+ signal_period: int = 9,
31
+ ):
32
+ close = ohlc["CLOSE"]
33
+ short_ema = close.ewm(span=short_period, adjust=False).mean()
34
+ long_ema = close.ewm(span=long_period, adjust=False).mean()
35
+
36
+ macd_line = short_ema - long_ema
37
+ signal_line = macd_line.ewm(span=signal_period, adjust=False).mean()
38
+ return macd_line, signal_line
39
+
40
+
41
+ def bollinger_bands(ohlc: pd.DataFrame, period: int = 20, num_std: int = 2):
42
+ close = ohlc["CLOSE"]
43
+ sma = close.rolling(window=period).mean()
44
+ std = close.rolling(window=period).std()
45
+
46
+ upper_band = sma + (num_std * std)
47
+ lower_band = sma - (num_std * std)
48
+
49
+ return upper_band, sma, lower_band
50
+
51
+
52
+ def adx(ohlc: pd.DataFrame, period: int = 14):
53
+ high = ohlc["HIGH"]
54
+ low = ohlc["LOW"]
55
+ close = ohlc["CLOSE"]
56
+
57
+ plus_dm = high.diff().where((high.diff() > low.diff()) & (high.diff() > 0), 0)
58
+ minus_dm = low.diff().where((low.diff() > high.diff()) & (low.diff() > 0), 0)
59
+
60
+ tr = pd.concat(
61
+ [high - low, abs(high - close.shift()), abs(low - close.shift())], axis=1
62
+ ).max(axis=1)
63
+
64
+ atr = tr.rolling(window=period).mean()
65
+ plus_di = 100 * (plus_dm.rolling(window=period).mean() / atr)
66
+ minus_di = 100 * (minus_dm.rolling(window=period).mean() / atr)
67
+
68
+ dx = 100 * abs(plus_di - minus_di) / (plus_di + minus_di)
69
+ adx = dx.rolling(window=period).mean()
70
+
71
+ return adx
72
+
73
+
74
+ def sma(ohlc: pd.DataFrame, period: int):
75
+ return ohlc["CLOSE"].rolling(window=period).mean()
76
+
77
+
78
+ def ema(ohlc: pd.DataFrame, period: int):
79
+ return ohlc["CLOSE"].ewm(span=period, adjust=False).mean()
80
+
81
+
82
+ def atr(ohlc: pd.DataFrame, period: int = 14):
83
+ high = ohlc["HIGH"]
84
+ low = ohlc["LOW"]
85
+ close = ohlc["CLOSE"]
86
+
87
+ tr = pd.concat(
88
+ [high - low, abs(high - close.shift()), abs(low - close.shift())], axis=1
89
+ ).max(axis=1)
90
+ atr = tr.rolling(window=period).mean()
91
+
92
+ return atr
93
+
94
+
95
+ def stochastic(ohlc: pd.DataFrame, period: int = 14, k_slowing_period: int = 3):
96
+ low_min = ohlc["LOW"].rolling(window=period).min()
97
+ high_max = ohlc["HIGH"].rolling(window=period).max()
98
+
99
+ k_percent = 100 * (ohlc["CLOSE"] - low_min) / (high_max - low_min)
100
+ d_percent = k_percent.rolling(window=k_slowing_period).mean() # Smoothed K
101
+
102
+ return k_percent, d_percent
103
+
104
+
105
+ def mfi(ohlc: pd.DataFrame, period: int = 14):
106
+ typical_price = (ohlc["HIGH"] + ohlc["LOW"] + ohlc["CLOSE"]) / 3
107
+ money_flow = typical_price * ohlc["VOLUME"]
108
+
109
+ positive_flow = money_flow.where(typical_price > typical_price.shift(), 0)
110
+ negative_flow = money_flow.where(typical_price < typical_price.shift(), 0)
111
+
112
+ positive_mf = positive_flow.rolling(window=period).sum()
113
+ negative_mf = negative_flow.rolling(window=period).sum()
114
+
115
+ mfi = 100 - (100 / (1 + (positive_mf / negative_mf)))
116
+
117
+ return mfi
118
+
119
+
120
+ def fibonacci_retracement(high: float, low: float):
121
+ diff = high - low
122
+ levels = {
123
+ "23.6%": high - diff * 0.236,
124
+ "38.2%": high - diff * 0.382,
125
+ "50.0%": high - diff * 0.5,
126
+ "61.8%": high - diff * 0.618,
127
+ "100%": low,
128
+ }
129
+ return levels
130
+
131
+
132
+ def ichimoku_cloud(ohlc: pd.DataFrame):
133
+ high = ohlc["HIGH"]
134
+ low = ohlc["LOW"]
135
+
136
+ tenkan_sen = (high.rolling(window=9).max() + low.rolling(window=9).min()) / 2
137
+ kijun_sen = (high.rolling(window=26).max() + low.rolling(window=26).min()) / 2
138
+ senkou_span_a = ((tenkan_sen + kijun_sen) / 2).shift(26)
139
+ senkou_span_b = (
140
+ (high.rolling(window=52).max() + low.rolling(window=52).min()) / 2
141
+ ).shift(26)
142
+ chikou_span = ohlc["CLOSE"].shift(26)
143
+
144
+ return tenkan_sen, kijun_sen, senkou_span_a, senkou_span_b, chikou_span
145
+
146
+
147
+ def parabolic_sar(ohlc: pd.DataFrame, af_step: float = 0.02, af_max: float = 0.2):
148
+ high = ohlc["HIGH"]
149
+ low = ohlc["LOW"]
150
+ close = ohlc["CLOSE"]
151
+
152
+ # Initialize the SAR series with the closing prices as a starting point
153
+ sar = close.copy()
154
+
155
+ # Define initial trend and extreme point
156
+ trend_up = True
157
+ ep = high.iloc[0] if trend_up else low.iloc[0] # Extremum Price
158
+ af = af_step # Acceleration Factor
159
+
160
+ # Iterate over the data points starting from the second row
161
+ for i in range(1, len(ohlc)):
162
+ prev_sar = sar.iloc[i - 1] # Previous SAR value
163
+
164
+ if trend_up:
165
+ # Update SAR for an uptrend
166
+ sar.iloc[i] = prev_sar + af * (ep - prev_sar)
167
+ if low.iloc[i] < sar.iloc[i]:
168
+ # Switch to downtrend if current low breaks the SAR
169
+ trend_up = False
170
+ sar.iloc[i] = ep
171
+ ep = low.iloc[i]
172
+ af = af_step
173
+ else:
174
+ # Update SAR for a downtrend
175
+ sar.iloc[i] = prev_sar + af * (ep - prev_sar)
176
+ if high.iloc[i] > sar.iloc[i]:
177
+ # Switch to uptrend if current high breaks the SAR
178
+ trend_up = True
179
+ sar.iloc[i] = ep
180
+ ep = high.iloc[i]
181
+ af = af_step
182
+
183
+ # Update the extremum price (EP) and acceleration factor (AF) based on the trend
184
+ if trend_up:
185
+ if high.iloc[i] > ep:
186
+ ep = high.iloc[i]
187
+ af = min(af + af_step, af_max)
188
+ else:
189
+ if low.iloc[i] < ep:
190
+ ep = low.iloc[i]
191
+ af = min(af + af_step, af_max)
192
+
193
+ return sar
194
+
195
+
196
+ def chaikin_money_flow(ohlc: pd.DataFrame, period: int = 21):
197
+ money_flow_multiplier = (
198
+ (ohlc["CLOSE"] - ohlc["LOW"]) - (ohlc["HIGH"] - ohlc["CLOSE"])
199
+ ) / (ohlc["HIGH"] - ohlc["LOW"])
200
+ money_flow_volume = money_flow_multiplier * ohlc["VOLUME"]
201
+
202
+ cmf = (
203
+ money_flow_volume.rolling(window=period).sum()
204
+ / ohlc["VOLUME"].rolling(window=period).sum()
205
+ )
206
+
207
+ return cmf
208
+
209
+
210
+ def pivot_points(ohlc: pd.DataFrame):
211
+ high = ohlc["HIGH"]
212
+ low = ohlc["LOW"]
213
+ close = ohlc["CLOSE"]
214
+
215
+ pivot = (high + low + close) / 3
216
+ r1 = 2 * pivot - low
217
+ s1 = 2 * pivot - high
218
+ r2 = pivot + (high - low)
219
+ s2 = pivot - (high - low)
220
+
221
+ return pivot, r1, s1, r2, s2
222
+
223
+
224
+ def volatility(
225
+ ohlc: pd.DataFrame,
226
+ period: int = 14,
227
+ ):
228
+ """
229
+ Calculates rolling volatility for each stock based on the rolling standard deviation of returns.
230
+
231
+ Parameters:
232
+ - ohlc: pd.DataFrame containing stock data, including returns (RET) and stock identifier.
233
+ - period: int, the rolling window period for volatility calculation (default is 14 days).
234
+
235
+ Returns:
236
+ - pd.Series representing the calculated volatility for each row in the DataFrame.
237
+ """
238
+
239
+ # Calculate returns based on CLOSE prices
240
+ ret = ohlc["CLOSE"].pct_change()
241
+
242
+ # Calculate rolling standard deviation of returns
243
+ rolling_std = ret.rolling(window=period, min_periods=1).std()
244
+
245
+ # Multiply by the square root of the period to scale volatility
246
+ volatility = rolling_std * np.sqrt(period)
247
+
248
+ return volatility
249
+
250
+
251
+ def cumulative_return(ohlc: pd.DataFrame, period: int = 14):
252
+ """
253
+ Calculates cumulative returns over the specified period using the 'CLOSE' price.
254
+
255
+ Parameters:
256
+ - ohlc: pd.DataFrame containing stock data, including 'CLOSE' column.
257
+ - period: int, the number of days over which to calculate the cumulative return.
258
+
259
+ Returns:
260
+ - pd.Series representing the cumulative returns for each row in the DataFrame.
261
+ """
262
+
263
+ # Calculate cumulative return based on CLOSE prices
264
+ cumul_ret = ohlc["CLOSE"].pct_change(period - 1)
265
+
266
+ return cumul_ret
267
+
268
+
269
+ def close_diff(ohlc: pd.DataFrame):
270
+ """
271
+ Calculates the difference between consecutive close prices.
272
+
273
+ Parameters:
274
+ - ohlc: pd.DataFrame containing stock data with a 'CLOSE' column.
275
+
276
+ Returns:
277
+ - pd.Series representing the difference in closing prices.
278
+ """
279
+ return ohlc["CLOSE"].diff()
280
+
281
+
282
+ def obv(ohlc: pd.DataFrame):
283
+ """
284
+ Calculates On-Balance Volume (OBV) based on closing price differences and volume.
285
+
286
+ Parameters:
287
+ - ohlc: pd.DataFrame containing 'CLOSE', 'VOLUME' columns.
288
+
289
+ Returns:
290
+ - pd.Series representing the OBV values.
291
+ """
292
+ close_diff = ohlc["CLOSE"].diff()
293
+ obv = (np.sign(close_diff) * ohlc["VOLUME"]).fillna(0).cumsum()
294
+ return obv
295
+
296
+
297
+ def pressure(ohlc: pd.DataFrame):
298
+ """
299
+ Calculates both upward and downward pressure based on price movements.
300
+
301
+ Parameters:
302
+ - ohlc: pd.DataFrame containing 'OPEN', 'HIGH', 'LOW', and 'CLOSE' columns.
303
+
304
+ Returns:
305
+ - pd.DataFrame with 'UPWARD_PRESSURE' and 'DOWNWARD_PRESSURE' columns.
306
+ """
307
+ upward = (ohlc["LOW"] - ohlc["OPEN"]) / ohlc["OPEN"]
308
+ downward = (ohlc["HIGH"] - ohlc["CLOSE"]) / ohlc["OPEN"]
309
+ return upward, downward
@@ -0,0 +1,139 @@
1
+ # Experiments on sharpe ratio to calculate as loss or metric
2
+ class SharpeRatioTFND(tf.keras.metrics.Metric):
3
+
4
+ def __init__(self, name="sharpe_ratio_tf_nd", **kwargs):
5
+ super().__init__(name=name, **kwargs)
6
+ self.sharpe_ratio = 0
7
+ self.df = pd.DataFrame(columns=["TARGET", "PRED", "DATE", "TARGET_1"])
8
+
9
+ # @tf.numpy_function(Tout=tf.float32)
10
+ def update_state(self, data, y_pred, sample_weight=None):
11
+ portfolio_size = 10
12
+
13
+ y_true = pd.Series(data[:, 0].numpy(), index=data[:, 1].numpy(), name="TARGET")
14
+ y_pred = pd.Series(
15
+ y_pred.numpy().flatten(), index=data[:, 1].numpy(), name="PRED"
16
+ )
17
+
18
+ df = pd.concat(
19
+ [y_true, y_pred, stock_data[["DATE", "TARGET_1"]]], join="inner", axis=1
20
+ )
21
+ self.df = pd.concat([self.df, df], axis=0)
22
+
23
+ def _calc_spread_return_per_day(df: pd.DataFrame, portfolio_size: int):
24
+ return (
25
+ df.sort_values("PRED", ascending=False)["TARGET_1"].iloc[
26
+ :portfolio_size
27
+ ]
28
+ ).mean()
29
+
30
+ buf = self.df.groupby("DATE").apply(_calc_spread_return_per_day, portfolio_size)
31
+
32
+ if buf.shape[0] == 1:
33
+ self.sharpe_ratio = buf.values[0] * (252 / np.sqrt(252))
34
+ else:
35
+ self.sharpe_ratio = (buf.mean() * 252) / (buf.std() * np.sqrt(252))
36
+
37
+ def result(self):
38
+ return self.sharpe_ratio
39
+
40
+ def reset_states(self):
41
+ self.sharpe_ratio = 0
42
+ self.df = pd.DataFrame(columns=["TARGET", "PRED", "DATES", "TARGET_1"])
43
+
44
+
45
+ @tf.numpy_function(Tout=tf.float32)
46
+ def sharpe_ratio_tf_nd(data, y_pred):
47
+
48
+ portfolio_size = 10
49
+
50
+ y_true = pd.Series(data[:, 0], index=data[:, 1], name="TARGET")
51
+ y_pred = pd.Series(y_pred.flatten(), index=data[:, 1], name="PRED")
52
+
53
+ df = pd.concat(
54
+ [y_true, y_pred, stock_data[["DATE", "TARGET_1"]]], join="inner", axis=1
55
+ )
56
+
57
+ print(df)
58
+
59
+ def _calc_spread_return_per_day(df: pd.DataFrame, portfolio_size: int):
60
+ print(
61
+ df.sort_values("PRED", ascending=False)[
62
+ ["PRED", "TARGET", "TARGET_1"]
63
+ ].head(10)
64
+ )
65
+ return (
66
+ df.sort_values("PRED", ascending=False)["TARGET_1"].iloc[:portfolio_size]
67
+ ).mean()
68
+
69
+ buf = df.groupby("DATE").apply(_calc_spread_return_per_day, portfolio_size)
70
+
71
+ if buf.shape[0] == 1:
72
+ sharpe_ratio = buf.values[0] * (252 / np.sqrt(252))
73
+ else:
74
+ sharpe_ratio = (buf.mean() * 252) / (buf.std() * np.sqrt(252))
75
+ print(buf, sharpe_ratio)
76
+ return sharpe_ratio
77
+
78
+
79
+ def sharpe_ratio_tf(data, y_pred):
80
+
81
+ portfolio_size = 10
82
+ # unscale
83
+ y_true = data[:, 0]
84
+ indexes = data[:, 1]
85
+
86
+ dates = stock_data[["DATE", "TARGET_1"]].iloc[indexes]
87
+ dates = tf.convert_to_tensor(dates)
88
+ dates = tf.dtypes.cast(dates, tf.float32)
89
+
90
+ y_true, y_pred = unscale_tf(y_true, y_pred)
91
+ y_true = tf.dtypes.cast(y_true, tf.float32)
92
+ y_pred = tf.dtypes.cast(y_pred, tf.float32)
93
+ y_true = tf.reshape(y_true, y_pred.shape)
94
+
95
+ # concat and sort by pred
96
+ print(y_pred, y_true, dates)
97
+ tensor = tf.concat([y_pred, y_true, dates], axis=1)
98
+ tensor_ordered = tf.gather(
99
+ tensor, tf.argsort(tensor[:, 0], direction="DESCENDING"), axis=0
100
+ )
101
+
102
+ # groupby and reduce with mean of 10 first elements per date groups.
103
+ def init_func(_):
104
+ return (0.0, 0.0)
105
+
106
+ def reduce_func(state, value):
107
+ print(state, value)
108
+ if state[1] < portfolio_size:
109
+ return (state[0] + value[3], state[1] + 1)
110
+ else:
111
+ return state
112
+
113
+ def finalize_func(s, n):
114
+ return s / n
115
+
116
+ reducer = tf.data.experimental.Reducer(init_func, reduce_func, finalize_func)
117
+
118
+ def key_f(row):
119
+ print(row)
120
+ return tf.dtypes.cast(row[2], tf.int64)
121
+
122
+ ds_transformation_func = tf.data.experimental.group_by_reducer(
123
+ key_func=key_f, reducer=reducer
124
+ )
125
+ print(tensor_ordered, tensor_ordered.shape)
126
+ slices = tf.slice(tensor_ordered, [0, 0], [-1, -1])
127
+ print(slices)
128
+ ds = tf.data.Dataset.from_tensor_slices(slices)
129
+ buf = ds.apply(ds_transformation_func)
130
+ # ds = ds.batch(10)
131
+
132
+ # print(ds.as_numpy_iterator())
133
+ # iterator = iter(ds)
134
+ # buf = iterator
135
+ print(buf)
136
+ # sharpe calcul
137
+ sharpe_ratio = (K.mean(buf) * 252) / (K.std(buf) * K.sqrt(252))
138
+ print(sharpe_ratio)
139
+ return sharpe_ratio