lecrapaud 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- lecrapaud/__init__.py +0 -0
- lecrapaud/config.py +16 -0
- lecrapaud/db/__init__.py +0 -0
- lecrapaud/db/alembic/README +1 -0
- lecrapaud/db/alembic/env.py +78 -0
- lecrapaud/db/alembic/script.py.mako +26 -0
- lecrapaud/db/alembic/versions/2025_04_06_1738-7390745388e4_initial_setup.py +295 -0
- lecrapaud/db/alembic/versions/2025_04_06_1755-40cd8d3e798e_unique_constraint_for_data.py +30 -0
- lecrapaud/db/alembic/versions/2025_05_23_1724-2360941fa0bd_longer_string.py +52 -0
- lecrapaud/db/alembic/versions/2025_05_27_1159-b96396dcfaff_add_env_to_trading_tables.py +34 -0
- lecrapaud/db/alembic/versions/2025_05_27_1337-40cbfc215f7c_fix_nb_character_on_portfolio.py +39 -0
- lecrapaud/db/alembic/versions/2025_05_27_1526-3de994115317_to_datetime.py +36 -0
- lecrapaud/db/alembic/versions/2025_05_27_2003-25c227c684f8_add_fees_to_transactions.py +30 -0
- lecrapaud/db/alembic/versions/2025_05_27_2047-6b6f2d38e9bc_double_instead_of_float.py +132 -0
- lecrapaud/db/alembic/versions/2025_05_31_1111-c175e4a36d68_generalise_stock_to_group.py +36 -0
- lecrapaud/db/alembic/versions/2025_05_31_1256-5681095bfc27_create_investment_run_and_portfolio_.py +62 -0
- lecrapaud/db/alembic/versions/2025_05_31_1806-339927587383_add_investment_run_id.py +107 -0
- lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py +38 -0
- lecrapaud/db/alembic/versions/2025_05_31_1849-3b8550297e8e_change_date_to_datetime.py +44 -0
- lecrapaud/db/alembic/versions/2025_05_31_1852-e6b8c95d8243_add_date_to_portfolio_history.py +30 -0
- lecrapaud/db/alembic/versions/2025_06_10_1136-db8cdd83563a_addnewsandoptiontodata.py +32 -0
- lecrapaud/db/crud.py +179 -0
- lecrapaud/db/models/__init__.py +11 -0
- lecrapaud/db/models/base.py +6 -0
- lecrapaud/db/models/dataset.py +124 -0
- lecrapaud/db/models/feature.py +46 -0
- lecrapaud/db/models/feature_selection.py +126 -0
- lecrapaud/db/models/feature_selection_rank.py +80 -0
- lecrapaud/db/models/model.py +41 -0
- lecrapaud/db/models/model_selection.py +56 -0
- lecrapaud/db/models/model_training.py +54 -0
- lecrapaud/db/models/score.py +62 -0
- lecrapaud/db/models/target.py +59 -0
- lecrapaud/db/services.py +0 -0
- lecrapaud/db/setup.py +58 -0
- lecrapaud/directory_management.py +28 -0
- lecrapaud/feature_engineering.py +1119 -0
- lecrapaud/feature_selection.py +1229 -0
- lecrapaud/jobs/__init__.py +13 -0
- lecrapaud/jobs/config.py +17 -0
- lecrapaud/jobs/scheduler.py +36 -0
- lecrapaud/jobs/tasks.py +57 -0
- lecrapaud/model_selection.py +1571 -0
- lecrapaud/predictions.py +292 -0
- lecrapaud/search_space.py +844 -0
- lecrapaud/services/__init__.py +0 -0
- lecrapaud/services/embedding_categorical.py +71 -0
- lecrapaud/services/indicators.py +309 -0
- lecrapaud/speed_tests/experiments.py +139 -0
- lecrapaud/speed_tests/test-gpu-bilstm.ipynb +261 -0
- lecrapaud/speed_tests/test-gpu-resnet.ipynb +166 -0
- lecrapaud/speed_tests/test-gpu-transformers.ipynb +254 -0
- lecrapaud/speed_tests/tests.ipynb +145 -0
- lecrapaud/speed_tests/trash.py +37 -0
- lecrapaud/training.py +151 -0
- lecrapaud/utils.py +246 -0
- lecrapaud-0.4.0.dist-info/LICENSE +201 -0
- lecrapaud-0.4.0.dist-info/METADATA +103 -0
- lecrapaud-0.4.0.dist-info/RECORD +60 -0
- lecrapaud-0.4.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,844 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
# ML models
|
|
4
|
+
from sklearn.linear_model import (
|
|
5
|
+
SGDRegressor,
|
|
6
|
+
LinearRegression,
|
|
7
|
+
SGDClassifier,
|
|
8
|
+
LogisticRegression,
|
|
9
|
+
)
|
|
10
|
+
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
|
|
11
|
+
from sklearn.neural_network import MLPRegressor, MLPClassifier
|
|
12
|
+
from sklearn.svm import LinearSVR, LinearSVC
|
|
13
|
+
from sklearn.naive_bayes import GaussianNB
|
|
14
|
+
|
|
15
|
+
# Ensemble models
|
|
16
|
+
from lightgbm import LGBMRegressor, LGBMClassifier
|
|
17
|
+
from xgboost import XGBRegressor, XGBClassifier
|
|
18
|
+
from sklearn.ensemble import (
|
|
19
|
+
RandomForestRegressor,
|
|
20
|
+
AdaBoostRegressor,
|
|
21
|
+
RandomForestClassifier,
|
|
22
|
+
AdaBoostClassifier,
|
|
23
|
+
BaggingClassifier,
|
|
24
|
+
)
|
|
25
|
+
import lightgbm as lgb
|
|
26
|
+
import xgboost as xgb
|
|
27
|
+
|
|
28
|
+
# DL models
|
|
29
|
+
from keras import Model, Input
|
|
30
|
+
from keras.layers import (
|
|
31
|
+
Dense,
|
|
32
|
+
LSTM,
|
|
33
|
+
Bidirectional,
|
|
34
|
+
GRU,
|
|
35
|
+
LayerNormalization,
|
|
36
|
+
RepeatVector,
|
|
37
|
+
MultiHeadAttention,
|
|
38
|
+
Add,
|
|
39
|
+
GlobalAveragePooling1D,
|
|
40
|
+
Dropout,
|
|
41
|
+
Activation,
|
|
42
|
+
TimeDistributed,
|
|
43
|
+
)
|
|
44
|
+
from tcn import TCN
|
|
45
|
+
from keras.initializers import Identity
|
|
46
|
+
from keras.regularizers import L2
|
|
47
|
+
from keras.activations import sigmoid
|
|
48
|
+
|
|
49
|
+
# Search spaces
|
|
50
|
+
from ray import tune
|
|
51
|
+
|
|
52
|
+
# we cannot use tune.sample_from function to make conditionnal search space, because hyperopt and bayesian opt need a fixed search space
|
|
53
|
+
|
|
54
|
+
ml_models = [
|
|
55
|
+
{
|
|
56
|
+
"model_name": "linear",
|
|
57
|
+
"recurrent": False,
|
|
58
|
+
"need_scaling": True,
|
|
59
|
+
"classification": {
|
|
60
|
+
"create_model": LogisticRegression,
|
|
61
|
+
"search_params": {
|
|
62
|
+
"penalty": tune.choice(
|
|
63
|
+
["l2"]
|
|
64
|
+
), # None is not compatible with liblinear, and l1/elasticnet not compatible with most solvers
|
|
65
|
+
"C": tune.loguniform(1e-4, 1e2),
|
|
66
|
+
"l1_ratio": tune.quniform(0.2, 0.8, 0.1),
|
|
67
|
+
"solver": tune.choice(
|
|
68
|
+
[
|
|
69
|
+
"sag",
|
|
70
|
+
"saga",
|
|
71
|
+
"liblinear",
|
|
72
|
+
"lbfgs",
|
|
73
|
+
"newton-cg",
|
|
74
|
+
"newton-cholesky",
|
|
75
|
+
]
|
|
76
|
+
),
|
|
77
|
+
"max_iter": tune.randint(100, 1000),
|
|
78
|
+
"n_jobs": -1,
|
|
79
|
+
"random_state": 42,
|
|
80
|
+
},
|
|
81
|
+
},
|
|
82
|
+
"regression": {
|
|
83
|
+
"create_model": LinearRegression,
|
|
84
|
+
"search_params": {
|
|
85
|
+
"n_jobs": -1,
|
|
86
|
+
"fit_intercept": tune.choice([True, False]),
|
|
87
|
+
},
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"model_name": "sgd",
|
|
92
|
+
"recurrent": False,
|
|
93
|
+
"need_scaling": True,
|
|
94
|
+
"classification": {
|
|
95
|
+
"create_model": SGDClassifier,
|
|
96
|
+
"search_params": {
|
|
97
|
+
"loss": tune.choice(
|
|
98
|
+
[
|
|
99
|
+
"hinge",
|
|
100
|
+
"log_loss",
|
|
101
|
+
"modified_huber",
|
|
102
|
+
"squared_hinge",
|
|
103
|
+
]
|
|
104
|
+
),
|
|
105
|
+
"penalty": tune.choice(["l1", "l2", "elasticnet"]),
|
|
106
|
+
"alpha": tune.loguniform(1e-6, 1e-2),
|
|
107
|
+
"l1_ratio": tune.quniform(0.2, 0.8, 0.1),
|
|
108
|
+
"max_iter": tune.randint(1000, 5000),
|
|
109
|
+
"shuffle": tune.choice([True, False]),
|
|
110
|
+
"random_state": 42,
|
|
111
|
+
},
|
|
112
|
+
},
|
|
113
|
+
"regression": {
|
|
114
|
+
"create_model": SGDRegressor,
|
|
115
|
+
"search_params": {
|
|
116
|
+
"penalty": tune.choice(["l1", "l2", "elasticnet"]),
|
|
117
|
+
"alpha": tune.loguniform(1e-6, 1e-2),
|
|
118
|
+
"l1_ratio": tune.quniform(0.2, 0.8, 0.1),
|
|
119
|
+
"max_iter": tune.randint(1000, 5000),
|
|
120
|
+
"random_state": 42,
|
|
121
|
+
},
|
|
122
|
+
},
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
"model_name": "naive_bayes",
|
|
126
|
+
"recurrent": False,
|
|
127
|
+
"need_scaling": False,
|
|
128
|
+
"classification": {
|
|
129
|
+
"create_model": GaussianNB, # Naive Bayes classifier for classification
|
|
130
|
+
"search_params": {
|
|
131
|
+
"var_smoothing": tune.loguniform(
|
|
132
|
+
1e-9, 1e-6
|
|
133
|
+
) # Smoothing parameter to deal with zero probabilities
|
|
134
|
+
},
|
|
135
|
+
},
|
|
136
|
+
"regression": None,
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
"model_name": "bagging_naive_bayes",
|
|
140
|
+
"recurrent": False,
|
|
141
|
+
"need_scaling": False,
|
|
142
|
+
"classification": {
|
|
143
|
+
"create_model": BaggingClassifier,
|
|
144
|
+
"search_params": {
|
|
145
|
+
"estimator": GaussianNB(), # Base model for bagging
|
|
146
|
+
"n_estimators": tune.randint(10, 100), # Number of base estimators
|
|
147
|
+
"max_samples": tune.uniform(
|
|
148
|
+
0.5, 1.0
|
|
149
|
+
), # Proportion of samples to draw for each base estimator
|
|
150
|
+
"max_features": tune.uniform(
|
|
151
|
+
0.5, 1.0
|
|
152
|
+
), # Proportion of features to draw for each base estimator
|
|
153
|
+
"bootstrap": tune.choice(
|
|
154
|
+
[True, False]
|
|
155
|
+
), # Whether samples are drawn with replacement
|
|
156
|
+
"bootstrap_features": tune.choice(
|
|
157
|
+
[True, False]
|
|
158
|
+
), # Whether features are drawn with replacement
|
|
159
|
+
"random_state": 42, # Fixed random state for reproducibility
|
|
160
|
+
},
|
|
161
|
+
},
|
|
162
|
+
"regression": None,
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
"model_name": "svm",
|
|
166
|
+
"recurrent": False,
|
|
167
|
+
"need_scaling": True,
|
|
168
|
+
"classification": {
|
|
169
|
+
"create_model": LinearSVC,
|
|
170
|
+
"search_params": {
|
|
171
|
+
# "penalty": tune.choice(["l1", "l2"]), # issue with l1 + hinge
|
|
172
|
+
"C": tune.loguniform(1e-4, 1e2), # Regularization strength
|
|
173
|
+
"max_iter": tune.randint(100, 2000), # Maximum number of iterations
|
|
174
|
+
"tol": tune.loguniform(1e-5, 1e-2), # Tolerance for stopping criteria
|
|
175
|
+
"fit_intercept": tune.choice(
|
|
176
|
+
[True, False]
|
|
177
|
+
), # Whether to calculate intercept
|
|
178
|
+
"loss": tune.choice(["hinge", "squared_hinge"]), # Loss function
|
|
179
|
+
"dual": "auto", # Dual only when hinge loss is not used and samples < features
|
|
180
|
+
"random_state": 42, # Fixed random state for reproducibility
|
|
181
|
+
},
|
|
182
|
+
},
|
|
183
|
+
"regression": {
|
|
184
|
+
"create_model": LinearSVR,
|
|
185
|
+
"search_params": {
|
|
186
|
+
"C": tune.loguniform(1e-4, 1e2), # Regularization strength
|
|
187
|
+
"max_iter": tune.randint(100, 2000), # Maximum number of iterations
|
|
188
|
+
"tol": tune.loguniform(1e-5, 1e-2), # Tolerance for stopping criteria
|
|
189
|
+
"epsilon": tune.loguniform(
|
|
190
|
+
1e-4, 1e-1
|
|
191
|
+
), # Epsilon in the epsilon-insensitive loss function
|
|
192
|
+
"fit_intercept": tune.choice(
|
|
193
|
+
[True, False]
|
|
194
|
+
), # Whether to calculate intercept
|
|
195
|
+
"loss": tune.choice(
|
|
196
|
+
["epsilon_insensitive", "squared_epsilon_insensitive"]
|
|
197
|
+
), # Loss function
|
|
198
|
+
"dual": "auto", # Dual is not applicable for certain configurations in SVR
|
|
199
|
+
"random_state": 42, # Fixed random state for reproducibility
|
|
200
|
+
},
|
|
201
|
+
},
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
"model_name": "tree",
|
|
205
|
+
"recurrent": False,
|
|
206
|
+
"need_scaling": False,
|
|
207
|
+
"classification": {
|
|
208
|
+
"create_model": DecisionTreeClassifier,
|
|
209
|
+
"search_params": {
|
|
210
|
+
"criterion": tune.choice(["gini", "entropy", "log_loss"]),
|
|
211
|
+
"max_depth": tune.randint(8, 64),
|
|
212
|
+
"min_samples_split": tune.randint(2, 10),
|
|
213
|
+
"min_samples_leaf": tune.randint(1, 4),
|
|
214
|
+
"max_features": tune.uniform(
|
|
215
|
+
0.5, 1.0
|
|
216
|
+
), # Proportion of features to draw for each base estimator
|
|
217
|
+
"random_state": 42,
|
|
218
|
+
},
|
|
219
|
+
},
|
|
220
|
+
"regression": {
|
|
221
|
+
"create_model": DecisionTreeRegressor,
|
|
222
|
+
"search_params": {
|
|
223
|
+
"max_depth": tune.randint(8, 64),
|
|
224
|
+
"min_samples_split": tune.randint(2, 10),
|
|
225
|
+
"min_samples_leaf": tune.randint(1, 4),
|
|
226
|
+
"max_features": tune.uniform(
|
|
227
|
+
0.5, 1.0
|
|
228
|
+
), # Proportion of features to draw for each base estimator
|
|
229
|
+
"random_state": 42,
|
|
230
|
+
},
|
|
231
|
+
},
|
|
232
|
+
},
|
|
233
|
+
{
|
|
234
|
+
"model_name": "forest",
|
|
235
|
+
"recurrent": False,
|
|
236
|
+
"need_scaling": False,
|
|
237
|
+
"classification": {
|
|
238
|
+
"create_model": RandomForestClassifier,
|
|
239
|
+
"search_params": {
|
|
240
|
+
"n_estimators": tune.randint(50, 1000), # Number of trees in the forest
|
|
241
|
+
"max_depth": tune.randint(8, 64), # Maximum depth of the trees
|
|
242
|
+
"min_samples_split": tune.randint(
|
|
243
|
+
2, 20
|
|
244
|
+
), # Minimum samples required to split a node
|
|
245
|
+
"min_samples_leaf": tune.randint(
|
|
246
|
+
1, 10
|
|
247
|
+
), # Minimum samples required at a leaf node
|
|
248
|
+
"max_features": tune.choice(
|
|
249
|
+
["sqrt", "log2", None]
|
|
250
|
+
), # Number of features to consider at each split
|
|
251
|
+
"bootstrap": tune.choice(
|
|
252
|
+
[True, False]
|
|
253
|
+
), # Whether to use bootstrap sampling
|
|
254
|
+
"criterion": tune.choice(
|
|
255
|
+
["gini", "entropy", "log_loss"]
|
|
256
|
+
), # The function to measure the quality of a split
|
|
257
|
+
# "oob_score": tune.choice(
|
|
258
|
+
# [True, False]
|
|
259
|
+
# ), # Whether to use out-of-bag samples to estimate generalization accuracy: not working if bootstrap = False
|
|
260
|
+
"n_jobs": -1, # Use all processors
|
|
261
|
+
"random_state": 42, # Fixed random state for reproducibility
|
|
262
|
+
},
|
|
263
|
+
},
|
|
264
|
+
"regression": {
|
|
265
|
+
"create_model": RandomForestRegressor,
|
|
266
|
+
"search_params": {
|
|
267
|
+
"n_estimators": tune.randint(50, 1000), # Number of trees in the forest
|
|
268
|
+
"max_depth": tune.randint(5, 30), # Maximum depth of the trees
|
|
269
|
+
"min_samples_split": tune.randint(
|
|
270
|
+
2, 20
|
|
271
|
+
), # Minimum samples required to split a node
|
|
272
|
+
"min_samples_leaf": tune.randint(
|
|
273
|
+
1, 10
|
|
274
|
+
), # Minimum samples required at a leaf node
|
|
275
|
+
"max_features": tune.choice(
|
|
276
|
+
["sqrt", "log2", None]
|
|
277
|
+
), # Number of features to consider at each split
|
|
278
|
+
"bootstrap": tune.choice(
|
|
279
|
+
[True, False]
|
|
280
|
+
), # Whether to use bootstrap sampling
|
|
281
|
+
"criterion": tune.choice(
|
|
282
|
+
["squared_error", "absolute_error", "friedman_mse"]
|
|
283
|
+
), # Loss function to use
|
|
284
|
+
# "oob_score": tune.choice(
|
|
285
|
+
# [True, False]
|
|
286
|
+
# ), # Whether to use out-of-bag samples to estimate generalization accuracy: not working if bootstrap = False
|
|
287
|
+
"n_jobs": -1, # Use all processors
|
|
288
|
+
"random_state": 42, # Fixed random state for reproducibility
|
|
289
|
+
},
|
|
290
|
+
},
|
|
291
|
+
},
|
|
292
|
+
{
|
|
293
|
+
"model_name": "adaboost",
|
|
294
|
+
"recurrent": False,
|
|
295
|
+
"need_scaling": False,
|
|
296
|
+
"classification": {
|
|
297
|
+
"create_model": AdaBoostClassifier,
|
|
298
|
+
"search_params": {
|
|
299
|
+
"n_estimators": tune.randint(50, 1000), # Number of boosting stages
|
|
300
|
+
"learning_rate": tune.loguniform(
|
|
301
|
+
1e-4, 1
|
|
302
|
+
), # Learning rate shrinks the contribution of each classifier
|
|
303
|
+
"random_state": 42, # Fixed random state for reproducibility
|
|
304
|
+
"estimator": tune.choice(
|
|
305
|
+
[
|
|
306
|
+
DecisionTreeClassifier(max_depth=2**i, random_state=42)
|
|
307
|
+
for i in range(1, 6)
|
|
308
|
+
]
|
|
309
|
+
), # Base estimators are decision trees with varying depths
|
|
310
|
+
},
|
|
311
|
+
},
|
|
312
|
+
"regression": {
|
|
313
|
+
"create_model": AdaBoostRegressor,
|
|
314
|
+
"search_params": {
|
|
315
|
+
"n_estimators": tune.randint(50, 1000), # Number of boosting stages
|
|
316
|
+
"learning_rate": tune.loguniform(1e-4, 1), # Learning rate
|
|
317
|
+
"loss": tune.choice(
|
|
318
|
+
["linear", "square", "exponential"]
|
|
319
|
+
), # Loss function to use when updating weights
|
|
320
|
+
"random_state": 42, # Fixed random state for reproducibility
|
|
321
|
+
"estimator": tune.choice(
|
|
322
|
+
[
|
|
323
|
+
DecisionTreeRegressor(max_depth=2**i, random_state=42)
|
|
324
|
+
for i in range(1, 6)
|
|
325
|
+
]
|
|
326
|
+
), # Base estimators are decision trees with varying depths
|
|
327
|
+
},
|
|
328
|
+
},
|
|
329
|
+
},
|
|
330
|
+
{
|
|
331
|
+
"model_name": "xgb",
|
|
332
|
+
"recurrent": False,
|
|
333
|
+
"need_scaling": False,
|
|
334
|
+
"classification": {
|
|
335
|
+
"create_model": "xgb",
|
|
336
|
+
"search_params": {
|
|
337
|
+
"num_boost_round": tune.randint(
|
|
338
|
+
50, 1000
|
|
339
|
+
), # Number of boosting rounds (trees)
|
|
340
|
+
"early_stopping_rounds": tune.randint(5, 50),
|
|
341
|
+
"model_params": {
|
|
342
|
+
"max_depth": tune.randint(3, 10), # Maximum depth of trees
|
|
343
|
+
"eta": tune.loguniform(
|
|
344
|
+
1e-4, 0.5
|
|
345
|
+
), # Learning rate, note 'eta' is used instead of 'learning_rate'
|
|
346
|
+
"subsample": tune.quniform(
|
|
347
|
+
0.6, 1, 0.05
|
|
348
|
+
), # Subsample ratio of training instances
|
|
349
|
+
"colsample_bytree": tune.quniform(
|
|
350
|
+
0.6, 1, 0.05
|
|
351
|
+
), # Subsample ratio of columns for each tree
|
|
352
|
+
"gamma": tune.uniform(
|
|
353
|
+
0, 10
|
|
354
|
+
), # Minimum loss reduction for further partitioning
|
|
355
|
+
"min_child_weight": tune.loguniform(
|
|
356
|
+
1, 10
|
|
357
|
+
), # Minimum sum of instance weights in a child
|
|
358
|
+
"alpha": tune.loguniform(
|
|
359
|
+
1e-5, 1
|
|
360
|
+
), # L1 regularization term on weights
|
|
361
|
+
"lambda": tune.loguniform(
|
|
362
|
+
1e-5, 1
|
|
363
|
+
), # L2 regularization term on weights
|
|
364
|
+
"random_state": 42, # Fixed random state
|
|
365
|
+
"n_jobs": -1, # Number of parallel threads for computation
|
|
366
|
+
},
|
|
367
|
+
},
|
|
368
|
+
},
|
|
369
|
+
"regression": {
|
|
370
|
+
"create_model": "xgb",
|
|
371
|
+
"search_params": {
|
|
372
|
+
"num_boost_round": tune.randint(
|
|
373
|
+
50, 1000
|
|
374
|
+
), # Number of boosting rounds (trees)
|
|
375
|
+
"early_stopping_rounds": tune.randint(5, 50),
|
|
376
|
+
"model_params": {
|
|
377
|
+
"max_depth": tune.randint(3, 10), # Maximum depth of trees
|
|
378
|
+
"eta": tune.loguniform(1e-4, 0.5), # Learning rate (eta)
|
|
379
|
+
"subsample": tune.quniform(
|
|
380
|
+
0.6, 1, 0.05
|
|
381
|
+
), # Subsample ratio of training instances
|
|
382
|
+
"colsample_bytree": tune.quniform(
|
|
383
|
+
0.6, 1, 0.05
|
|
384
|
+
), # Subsample ratio of columns for each tree
|
|
385
|
+
"gamma": tune.uniform(
|
|
386
|
+
0, 10
|
|
387
|
+
), # Minimum loss reduction for further partitioning
|
|
388
|
+
"min_child_weight": tune.loguniform(
|
|
389
|
+
1, 10
|
|
390
|
+
), # Minimum sum of instance weights in a child
|
|
391
|
+
"alpha": tune.loguniform(
|
|
392
|
+
1e-5, 1
|
|
393
|
+
), # L1 regularization term on weights
|
|
394
|
+
"lambda": tune.loguniform(
|
|
395
|
+
1e-5, 1
|
|
396
|
+
), # L2 regularization term on weights
|
|
397
|
+
"random_state": 42, # Fixed random state
|
|
398
|
+
"n_jobs": -1, # Number of parallel threads for computation
|
|
399
|
+
},
|
|
400
|
+
},
|
|
401
|
+
},
|
|
402
|
+
},
|
|
403
|
+
{
|
|
404
|
+
"model_name": "lgb",
|
|
405
|
+
"recurrent": False,
|
|
406
|
+
"need_scaling": False,
|
|
407
|
+
"classification": {
|
|
408
|
+
"create_model": "lgb",
|
|
409
|
+
"search_params": {
|
|
410
|
+
"num_boost_round": tune.randint(
|
|
411
|
+
50, 1000
|
|
412
|
+
), # Number of boosting rounds (trees)
|
|
413
|
+
"early_stopping_rounds": tune.randint(5, 50),
|
|
414
|
+
"model_params": {
|
|
415
|
+
"max_depth": tune.randint(3, 10), # Maximum depth of trees
|
|
416
|
+
"learning_rate": tune.loguniform(1e-4, 0.5), # Learning rate
|
|
417
|
+
"num_leaves": tune.randint(
|
|
418
|
+
20, 150
|
|
419
|
+
), # Maximum number of leaves in one tree
|
|
420
|
+
"subsample": tune.quniform(
|
|
421
|
+
0.6, 1, 0.05
|
|
422
|
+
), # Fraction of training data for each boosting round
|
|
423
|
+
"colsample_bytree": tune.quniform(
|
|
424
|
+
0.6, 1, 0.05
|
|
425
|
+
), # Fraction of features to use per tree
|
|
426
|
+
"min_data_in_leaf": tune.randint(
|
|
427
|
+
20, 100
|
|
428
|
+
), # Minimum number of data points in a leaf
|
|
429
|
+
"lambda_l1": tune.loguniform(1e-5, 1), # L1 regularization
|
|
430
|
+
"lambda_l2": tune.loguniform(1e-5, 1), # L2 regularization
|
|
431
|
+
"random_state": 42, # Fixed random state for reproducibility
|
|
432
|
+
"n_jobs": -1, # Use all cores for parallel computation
|
|
433
|
+
},
|
|
434
|
+
},
|
|
435
|
+
},
|
|
436
|
+
"regression": {
|
|
437
|
+
"create_model": "lgb",
|
|
438
|
+
"search_params": {
|
|
439
|
+
"num_boost_round": tune.randint(
|
|
440
|
+
50, 1000
|
|
441
|
+
), # Number of boosting rounds (trees)
|
|
442
|
+
"early_stopping_rounds": tune.randint(5, 50),
|
|
443
|
+
"model_params": {
|
|
444
|
+
"max_depth": tune.randint(3, 10), # Maximum depth of trees
|
|
445
|
+
"learning_rate": tune.loguniform(1e-4, 0.5), # Learning rate
|
|
446
|
+
"num_leaves": tune.randint(
|
|
447
|
+
20, 150
|
|
448
|
+
), # Maximum number of leaves in one tree
|
|
449
|
+
"subsample": tune.quniform(
|
|
450
|
+
0.6, 1, 0.05
|
|
451
|
+
), # Fraction of training data for each boosting round
|
|
452
|
+
"colsample_bytree": tune.quniform(
|
|
453
|
+
0.6, 1, 0.05
|
|
454
|
+
), # Fraction of features to use per tree
|
|
455
|
+
"min_data_in_leaf": tune.randint(
|
|
456
|
+
20, 100
|
|
457
|
+
), # Minimum number of data points in a leaf
|
|
458
|
+
"lambda_l1": tune.loguniform(1e-5, 1), # L1 regularization
|
|
459
|
+
"lambda_l2": tune.loguniform(1e-5, 1), # L2 regularization
|
|
460
|
+
"random_state": 42, # Fixed random state for reproducibility
|
|
461
|
+
"n_jobs": -1, # Use all cores for parallel computation
|
|
462
|
+
},
|
|
463
|
+
},
|
|
464
|
+
},
|
|
465
|
+
},
|
|
466
|
+
]
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def get_model_constructor(model_name: str):
|
|
470
|
+
|
|
471
|
+
def constructor(
|
|
472
|
+
params: dict,
|
|
473
|
+
input_shape: tuple[int, int],
|
|
474
|
+
target_type: str,
|
|
475
|
+
num_class: Optional[int] = None,
|
|
476
|
+
):
|
|
477
|
+
"""
|
|
478
|
+
Builds the recurrent model based on the initialized parameters and model name.
|
|
479
|
+
:return: A Keras Model object.
|
|
480
|
+
"""
|
|
481
|
+
inputs = Input(shape=input_shape)
|
|
482
|
+
|
|
483
|
+
# Model selection logic
|
|
484
|
+
if model_name == "LSTM-1":
|
|
485
|
+
x = LSTM(**params["model_params"])(inputs)
|
|
486
|
+
|
|
487
|
+
elif model_name == "LSTM-2":
|
|
488
|
+
x = LSTM(**params["model_params"], return_sequences=True)(inputs)
|
|
489
|
+
x = LSTM(**params["model_params"])(x)
|
|
490
|
+
|
|
491
|
+
elif model_name == "LSTM-2-Deep":
|
|
492
|
+
x = LSTM(**params["model_params"], return_sequences=True)(inputs)
|
|
493
|
+
x = LSTM(**params["model_params"])(x)
|
|
494
|
+
x = Dense(50)(x)
|
|
495
|
+
|
|
496
|
+
elif model_name == "BiLSTM-1":
|
|
497
|
+
x = Bidirectional(LSTM(**params["model_params"]))(inputs)
|
|
498
|
+
|
|
499
|
+
elif model_name == "BiLSTM-2": # TODO: create search params ?
|
|
500
|
+
x = Bidirectional(LSTM(**params["model_params"], return_sequences=True))(
|
|
501
|
+
inputs
|
|
502
|
+
)
|
|
503
|
+
x = Bidirectional(LSTM(**params["model_params"]))(x)
|
|
504
|
+
|
|
505
|
+
elif model_name == "GRU-1":
|
|
506
|
+
x = GRU(**params["model_params"])(inputs)
|
|
507
|
+
|
|
508
|
+
elif model_name == "GRU-2": # TODO: create search params ?
|
|
509
|
+
x = GRU(**params["model_params"], return_sequences=True)(inputs)
|
|
510
|
+
x = GRU(**params["model_params"])(x)
|
|
511
|
+
|
|
512
|
+
elif model_name == "GRU-2-Deep": # TODO: create search params ?
|
|
513
|
+
x = GRU(**params["model_params"], return_sequences=True)(inputs)
|
|
514
|
+
x = GRU(**params["model_params"])(x)
|
|
515
|
+
x = Dense(50)(x)
|
|
516
|
+
|
|
517
|
+
elif model_name == "BiGRU-1":
|
|
518
|
+
x = Bidirectional(GRU(**params["model_params"]))(inputs)
|
|
519
|
+
|
|
520
|
+
elif model_name == "BiGRU-2": # TODO: create search params ?
|
|
521
|
+
x = Bidirectional(GRU(**params["model_params"], return_sequences=True))(
|
|
522
|
+
inputs
|
|
523
|
+
)
|
|
524
|
+
x = Bidirectional(GRU(**params["model_params"]))(x)
|
|
525
|
+
|
|
526
|
+
elif model_name == "TCN-1":
|
|
527
|
+
x = TCN(**params["model_params"])(inputs)
|
|
528
|
+
|
|
529
|
+
elif model_name == "TCN-2": # TODO: create search params ?
|
|
530
|
+
x = TCN(**params["model_params"], return_sequences=True)(inputs)
|
|
531
|
+
x = TCN(**params["model_params"])(x)
|
|
532
|
+
|
|
533
|
+
elif model_name == "TCN-2-Deep": # TODO: create search params ?
|
|
534
|
+
x = TCN(**params["model_params"], return_sequences=True)(inputs)
|
|
535
|
+
x = TCN(**params["model_params"])(x)
|
|
536
|
+
x = Dense(50)(x)
|
|
537
|
+
|
|
538
|
+
elif model_name == "BiTCN-1": # TODO: create search params ?
|
|
539
|
+
x = Bidirectional(TCN(**params["model_params"], return_sequences=False))(
|
|
540
|
+
inputs
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
elif model_name == "BiTCN-2": # TODO: create search params ?
|
|
544
|
+
x = Bidirectional(TCN(**params["model_params"], return_sequences=True))(
|
|
545
|
+
inputs
|
|
546
|
+
)
|
|
547
|
+
x = Bidirectional(TCN(**params["model_params"], return_sequences=False))(x)
|
|
548
|
+
|
|
549
|
+
elif model_name == "Seq2Seq":
|
|
550
|
+
# encoder
|
|
551
|
+
encoder_last_h1, encoder_last_h2, encoder_last_c = LSTM(
|
|
552
|
+
**params["model_params"], return_state=True
|
|
553
|
+
)(inputs)
|
|
554
|
+
encoder_last_h1 = LayerNormalization(epsilon=1e-6)(encoder_last_h1)
|
|
555
|
+
encoder_last_c = LayerNormalization(epsilon=1e-6)(encoder_last_c)
|
|
556
|
+
|
|
557
|
+
# decoder
|
|
558
|
+
decoder_timesteps = max(int(input_shape[0] / 5), 2)
|
|
559
|
+
decoder = RepeatVector(decoder_timesteps)(encoder_last_h1)
|
|
560
|
+
x = LSTM(**params["model_params"], return_state=False)(
|
|
561
|
+
decoder, initial_state=[encoder_last_h1, encoder_last_c]
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
elif model_name == "Transformer":
|
|
565
|
+
|
|
566
|
+
def transformer_encoder(
|
|
567
|
+
inputs, num_layers, head_size, num_heads, ff_dim, dropout=0
|
|
568
|
+
):
|
|
569
|
+
for _ in range(num_layers):
|
|
570
|
+
# Attention and Normalization
|
|
571
|
+
x = LayerNormalization(epsilon=1e-6)(inputs)
|
|
572
|
+
x = MultiHeadAttention(
|
|
573
|
+
key_dim=head_size, num_heads=num_heads, dropout=dropout
|
|
574
|
+
)(x, x)
|
|
575
|
+
x = Add()([x, inputs])
|
|
576
|
+
|
|
577
|
+
# Feed Forward Part
|
|
578
|
+
y = LayerNormalization(epsilon=1e-6)(x)
|
|
579
|
+
y = Dense(ff_dim, activation="relu")(y)
|
|
580
|
+
y = Dropout(dropout)(y)
|
|
581
|
+
y = Dense(inputs.shape[-1])(y)
|
|
582
|
+
inputs = Add()([y, x])
|
|
583
|
+
|
|
584
|
+
return inputs
|
|
585
|
+
|
|
586
|
+
x = transformer_encoder(inputs, **params["model_params"])
|
|
587
|
+
x = GlobalAveragePooling1D()(x)
|
|
588
|
+
x = LayerNormalization(epsilon=1e-6)(x)
|
|
589
|
+
|
|
590
|
+
else:
|
|
591
|
+
raise ValueError(f"Invalid model name: {model_name}")
|
|
592
|
+
|
|
593
|
+
# Define output layer based on target type
|
|
594
|
+
if num_class is not None and num_class > 2:
|
|
595
|
+
outputs = Dense(
|
|
596
|
+
num_class,
|
|
597
|
+
kernel_initializer=Identity(),
|
|
598
|
+
kernel_regularizer=L2(l2=params["l2"]),
|
|
599
|
+
activation="softmax",
|
|
600
|
+
)(x)
|
|
601
|
+
else:
|
|
602
|
+
outputs = Dense(
|
|
603
|
+
1,
|
|
604
|
+
kernel_initializer=Identity(),
|
|
605
|
+
kernel_regularizer=L2(l2=params["l2"]),
|
|
606
|
+
activation=(sigmoid if target_type == "classification" else "linear"),
|
|
607
|
+
)(x)
|
|
608
|
+
|
|
609
|
+
# Build the model
|
|
610
|
+
model = Model(inputs=inputs, outputs=outputs, name=model_name)
|
|
611
|
+
|
|
612
|
+
# Set the name of the model based on its parameters
|
|
613
|
+
units = (
|
|
614
|
+
params["model_params"].get("nb_filters")
|
|
615
|
+
or params["model_params"].get("units")
|
|
616
|
+
or params["model_params"].get("head_size")
|
|
617
|
+
)
|
|
618
|
+
nb_params = model.count_params()
|
|
619
|
+
timesteps = input_shape[0]
|
|
620
|
+
model.model_name = model.name
|
|
621
|
+
|
|
622
|
+
return model
|
|
623
|
+
|
|
624
|
+
return constructor
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
dl_recurrent_models = [
|
|
628
|
+
{
|
|
629
|
+
"recurrent": True,
|
|
630
|
+
"need_scaling": True,
|
|
631
|
+
"model_name": "LSTM-1",
|
|
632
|
+
"create_model": get_model_constructor("LSTM-1"),
|
|
633
|
+
"search_params": {
|
|
634
|
+
"model_params": {
|
|
635
|
+
"units": tune.choice([32, 64, 128]),
|
|
636
|
+
"activation": tune.choice(["tanh", "relu"]),
|
|
637
|
+
"recurrent_activation": tune.choice(["sigmoid", "relu"]),
|
|
638
|
+
"kernel_initializer": Identity(),
|
|
639
|
+
"recurrent_initializer": Identity(),
|
|
640
|
+
"dropout": tune.quniform(0.0, 0.5, 0.1),
|
|
641
|
+
"recurrent_dropout": tune.quniform(0.0, 0.5, 0.1),
|
|
642
|
+
},
|
|
643
|
+
"learning_rate": tune.loguniform(1e-4, 1e-2),
|
|
644
|
+
"batch_size": tune.choice([32, 64, 128]),
|
|
645
|
+
"epochs": tune.choice([50, 100, 200]),
|
|
646
|
+
"timesteps": tune.choice([5, 10, 20, 50, 120]),
|
|
647
|
+
"clipnorm": tune.quniform(0.5, 2.0, 0.5),
|
|
648
|
+
"l2": tune.loguniform(1e-6, 1e-1),
|
|
649
|
+
},
|
|
650
|
+
},
|
|
651
|
+
{
|
|
652
|
+
"recurrent": True,
|
|
653
|
+
"need_scaling": True,
|
|
654
|
+
"model_name": "LSTM-2",
|
|
655
|
+
"create_model": get_model_constructor("LSTM-2"),
|
|
656
|
+
"search_params": {
|
|
657
|
+
"model_params": {
|
|
658
|
+
"units": tune.choice([32, 64, 128]),
|
|
659
|
+
"activation": tune.choice(["tanh", "relu"]),
|
|
660
|
+
"recurrent_activation": tune.choice(["sigmoid", "relu"]),
|
|
661
|
+
"kernel_initializer": Identity(),
|
|
662
|
+
"recurrent_initializer": Identity(),
|
|
663
|
+
"dropout": tune.quniform(0.0, 0.5, 0.1),
|
|
664
|
+
"recurrent_dropout": tune.quniform(0.0, 0.5, 0.1),
|
|
665
|
+
},
|
|
666
|
+
"learning_rate": tune.loguniform(1e-4, 1e-2),
|
|
667
|
+
"batch_size": tune.choice([32, 64, 128]),
|
|
668
|
+
"epochs": tune.choice([50, 100, 200]),
|
|
669
|
+
"timesteps": tune.choice([5, 10, 20, 50, 120]),
|
|
670
|
+
"clipnorm": tune.quniform(0.5, 2.0, 0.5),
|
|
671
|
+
"l2": tune.loguniform(1e-6, 1e-1),
|
|
672
|
+
},
|
|
673
|
+
},
|
|
674
|
+
{
|
|
675
|
+
"recurrent": True,
|
|
676
|
+
"need_scaling": True,
|
|
677
|
+
"model_name": "LSTM-2-Deep",
|
|
678
|
+
"create_model": get_model_constructor("LSTM-2-Deep"),
|
|
679
|
+
"search_params": {
|
|
680
|
+
"model_params": {
|
|
681
|
+
"units": tune.choice([32, 64, 128]),
|
|
682
|
+
"activation": tune.choice(["tanh", "relu"]),
|
|
683
|
+
"recurrent_activation": tune.choice(["sigmoid", "relu"]),
|
|
684
|
+
"kernel_initializer": Identity(),
|
|
685
|
+
"recurrent_initializer": Identity(),
|
|
686
|
+
"dropout": tune.quniform(0.0, 0.5, 0.1),
|
|
687
|
+
"recurrent_dropout": tune.quniform(0.0, 0.5, 0.1),
|
|
688
|
+
},
|
|
689
|
+
"learning_rate": tune.loguniform(1e-4, 1e-2),
|
|
690
|
+
"batch_size": tune.choice([32, 64, 128]),
|
|
691
|
+
"epochs": tune.choice([50, 100, 200]),
|
|
692
|
+
"timesteps": tune.choice([5, 10, 20, 50, 120]),
|
|
693
|
+
"clipnorm": tune.quniform(0.5, 2.0, 0.5),
|
|
694
|
+
"l2": tune.loguniform(1e-6, 1e-1),
|
|
695
|
+
},
|
|
696
|
+
},
|
|
697
|
+
{
|
|
698
|
+
"recurrent": True,
|
|
699
|
+
"need_scaling": True,
|
|
700
|
+
"model_name": "BiLSTM-1",
|
|
701
|
+
"create_model": get_model_constructor("BiLSTM-1"),
|
|
702
|
+
"search_params": {
|
|
703
|
+
"model_params": {
|
|
704
|
+
"units": tune.choice([32, 64, 128]),
|
|
705
|
+
"activation": tune.choice(["tanh", "relu"]),
|
|
706
|
+
"recurrent_activation": tune.choice(["sigmoid", "relu"]),
|
|
707
|
+
"kernel_initializer": Identity(),
|
|
708
|
+
"recurrent_initializer": Identity(),
|
|
709
|
+
"dropout": tune.quniform(0.0, 0.5, 0.1),
|
|
710
|
+
"recurrent_dropout": tune.quniform(0.0, 0.5, 0.1),
|
|
711
|
+
},
|
|
712
|
+
"learning_rate": tune.loguniform(1e-4, 1e-2),
|
|
713
|
+
"batch_size": tune.choice([32, 64, 128]),
|
|
714
|
+
"epochs": tune.choice([50, 100, 200]),
|
|
715
|
+
"timesteps": tune.choice([5, 10, 20, 50, 120]),
|
|
716
|
+
"clipnorm": tune.quniform(0.5, 2.0, 0.5),
|
|
717
|
+
"l2": tune.loguniform(1e-6, 1e-1),
|
|
718
|
+
},
|
|
719
|
+
},
|
|
720
|
+
{
|
|
721
|
+
"recurrent": True,
|
|
722
|
+
"need_scaling": True,
|
|
723
|
+
"model_name": "GRU-1",
|
|
724
|
+
"create_model": get_model_constructor("GRU-1"),
|
|
725
|
+
"search_params": {
|
|
726
|
+
"model_params": {
|
|
727
|
+
"units": tune.choice([32, 64, 128]),
|
|
728
|
+
"activation": tune.choice(["tanh", "relu"]),
|
|
729
|
+
"recurrent_activation": tune.choice(["sigmoid", "relu"]),
|
|
730
|
+
"kernel_initializer": Identity(),
|
|
731
|
+
"recurrent_initializer": Identity(),
|
|
732
|
+
"dropout": tune.quniform(0.0, 0.5, 0.1),
|
|
733
|
+
"recurrent_dropout": tune.quniform(0.0, 0.5, 0.1),
|
|
734
|
+
},
|
|
735
|
+
"learning_rate": tune.loguniform(1e-4, 1e-2),
|
|
736
|
+
"batch_size": tune.choice([32, 64, 128]),
|
|
737
|
+
"epochs": tune.choice([50, 100, 200]),
|
|
738
|
+
"timesteps": tune.choice([5, 10, 20, 50, 120]),
|
|
739
|
+
"clipnorm": tune.quniform(0.5, 2.0, 0.5),
|
|
740
|
+
"l2": tune.loguniform(1e-6, 1e-1),
|
|
741
|
+
},
|
|
742
|
+
},
|
|
743
|
+
{
|
|
744
|
+
"recurrent": True,
|
|
745
|
+
"need_scaling": True,
|
|
746
|
+
"model_name": "BiGRU-1",
|
|
747
|
+
"create_model": get_model_constructor("GRU-1"),
|
|
748
|
+
"search_params": {
|
|
749
|
+
"model_params": {
|
|
750
|
+
"units": tune.choice([32, 64, 128]),
|
|
751
|
+
"activation": tune.choice(["tanh", "relu"]),
|
|
752
|
+
"recurrent_activation": tune.choice(["sigmoid", "relu"]),
|
|
753
|
+
"kernel_initializer": Identity(),
|
|
754
|
+
"recurrent_initializer": Identity(),
|
|
755
|
+
"dropout": tune.quniform(0.0, 0.5, 0.1),
|
|
756
|
+
"recurrent_dropout": tune.quniform(0.0, 0.5, 0.1),
|
|
757
|
+
},
|
|
758
|
+
"learning_rate": tune.loguniform(1e-4, 1e-2),
|
|
759
|
+
"batch_size": tune.choice([32, 64, 128]),
|
|
760
|
+
"epochs": tune.choice([50, 100, 200]),
|
|
761
|
+
"timesteps": tune.choice([5, 10, 20, 50, 120]),
|
|
762
|
+
"clipnorm": tune.quniform(0.5, 2.0, 0.5),
|
|
763
|
+
"l2": tune.loguniform(1e-6, 1e-1),
|
|
764
|
+
},
|
|
765
|
+
},
|
|
766
|
+
{
|
|
767
|
+
"recurrent": True,
|
|
768
|
+
"need_scaling": True,
|
|
769
|
+
"model_name": "TCN-1",
|
|
770
|
+
"create_model": get_model_constructor("TCN-1"),
|
|
771
|
+
"search_params": {
|
|
772
|
+
"model_params": {
|
|
773
|
+
"nb_filters": tune.choice([32, 64, 128]),
|
|
774
|
+
"kernel_size": tune.choice([2, 3, 5]),
|
|
775
|
+
"dropout_rate": tune.quniform(0.0, 0.5, 0.1),
|
|
776
|
+
},
|
|
777
|
+
"learning_rate": tune.loguniform(1e-4, 1e-2),
|
|
778
|
+
"batch_size": tune.choice([32, 64, 128]),
|
|
779
|
+
"epochs": tune.choice([50, 100, 200]),
|
|
780
|
+
"timesteps": tune.choice([5, 10, 20, 50, 120]),
|
|
781
|
+
"clipnorm": tune.quniform(0.5, 2.0, 0.5),
|
|
782
|
+
"l2": tune.loguniform(1e-6, 1e-1),
|
|
783
|
+
},
|
|
784
|
+
},
|
|
785
|
+
{
|
|
786
|
+
"recurrent": True,
|
|
787
|
+
"need_scaling": True,
|
|
788
|
+
"model_name": "Seq2Seq",
|
|
789
|
+
"create_model": get_model_constructor("Seq2Seq"),
|
|
790
|
+
"search_params": {
|
|
791
|
+
"model_params": {
|
|
792
|
+
"units": tune.choice([32, 64, 128]),
|
|
793
|
+
"kernel_initializer": Identity(),
|
|
794
|
+
"recurrent_initializer": Identity(),
|
|
795
|
+
"dropout": tune.quniform(0.0, 0.5, 0.1),
|
|
796
|
+
"recurrent_dropout": tune.quniform(0.0, 0.5, 0.1),
|
|
797
|
+
},
|
|
798
|
+
"learning_rate": tune.loguniform(1e-4, 1e-2),
|
|
799
|
+
"batch_size": tune.choice([32, 64, 128]),
|
|
800
|
+
"epochs": tune.choice([50, 100, 200]),
|
|
801
|
+
"timesteps": tune.choice([5, 10, 20, 50, 120]),
|
|
802
|
+
"clipnorm": tune.quniform(0.5, 2.0, 0.5),
|
|
803
|
+
"l2": tune.loguniform(1e-6, 1e-1),
|
|
804
|
+
},
|
|
805
|
+
},
|
|
806
|
+
{
|
|
807
|
+
"recurrent": True,
|
|
808
|
+
"need_scaling": True,
|
|
809
|
+
"model_name": "Transformer",
|
|
810
|
+
"create_model": get_model_constructor("Transformer"),
|
|
811
|
+
"search_params": {
|
|
812
|
+
"model_params": {
|
|
813
|
+
"head_size": tune.choice(
|
|
814
|
+
[32, 64, 128, 256, 512]
|
|
815
|
+
), # Example of different head sizes to explore
|
|
816
|
+
"num_heads": tune.choice(
|
|
817
|
+
[8, 16, 32]
|
|
818
|
+
), # Exploring different number of heads
|
|
819
|
+
"ff_dim": tune.choice(
|
|
820
|
+
[128, 256, 512, 1024, 2048]
|
|
821
|
+
), # Feed-forward dimension options
|
|
822
|
+
"num_layers": tune.choice([6, 12, 24]), # Number of transformer layers
|
|
823
|
+
"dropout": tune.quniform(
|
|
824
|
+
0.1, 0.5, 0.1
|
|
825
|
+
), # Dropout rate between 0.1 and 0.5
|
|
826
|
+
},
|
|
827
|
+
"learning_rate": tune.loguniform(1e-4, 1e-2),
|
|
828
|
+
"batch_size": tune.choice([32, 64, 128]),
|
|
829
|
+
"epochs": tune.choice([50, 100, 200]),
|
|
830
|
+
"timesteps": tune.choice([5, 10, 20, 50, 120]),
|
|
831
|
+
"clipnorm": tune.quniform(0.5, 2.0, 0.5),
|
|
832
|
+
"l2": tune.loguniform(1e-6, 1e-1),
|
|
833
|
+
},
|
|
834
|
+
},
|
|
835
|
+
]
|
|
836
|
+
|
|
837
|
+
|
|
838
|
+
def get_models_idx(*model_names):
|
|
839
|
+
models = ml_models + dl_recurrent_models
|
|
840
|
+
|
|
841
|
+
matching_idx = [
|
|
842
|
+
i for i, model in enumerate(models) if model["model_name"] in model_names
|
|
843
|
+
]
|
|
844
|
+
return matching_idx
|