lecrapaud 0.19.3__tar.gz → 0.20.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/PKG-INFO +1 -1
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/api.py +11 -49
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/config.py +1 -0
- lecrapaud-0.20.1/lecrapaud/db/alembic/versions/2025_10_25_0635-07e303521594_add_unique_constraint_to_score.py +39 -0
- lecrapaud-0.20.1/lecrapaud/db/alembic/versions/2025_10_26_1727-033e0f7eca4f_merge_score_and_model_trainings_into_.py +264 -0
- lecrapaud-0.20.1/lecrapaud/db/alembic/versions/2025_10_28_2006-0a8fb7826e9b_add_number_of_targets_and_remove_other_.py +42 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/models/__init__.py +2 -4
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/models/base.py +103 -65
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/models/experiment.py +79 -99
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/models/feature_selection.py +0 -3
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/models/feature_selection_rank.py +0 -18
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/models/model_selection.py +2 -2
- lecrapaud-0.19.3/lecrapaud/db/models/score.py → lecrapaud-0.20.1/lecrapaud/db/models/model_selection_score.py +29 -12
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/session.py +1 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/experiment.py +11 -13
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/feature_engineering.py +34 -49
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/feature_selection.py +90 -22
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/model_selection.py +434 -192
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/search_space.py +2 -1
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/utils.py +22 -2
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/pyproject.toml +1 -1
- lecrapaud-0.19.3/lecrapaud/db/models/model_training.py +0 -64
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/LICENSE +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/README.md +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/__init__.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/__init__.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/alembic/README +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/alembic/env.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/alembic/script.py.mako +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/alembic/versions/2025_08_25_1434-7ed9963e732f_add_best_score_to_model_selection.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/alembic/versions/2025_08_28_1516-c36e9fee22b9_add_avg_precision_to_score.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/alembic/versions/2025_08_28_1622-8b11c1ba982e_change_name_column.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/alembic.ini +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/models/feature.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/models/model.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/models/target.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/db/models/utils.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/directories.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/integrations/openai_integration.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/jobs/__init__.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/jobs/config.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/jobs/scheduler.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/jobs/tasks.py +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/misc/tabpfn_tests.ipynb +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/misc/test-gpu-bilstm.ipynb +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/misc/test-gpu-resnet.ipynb +0 -0
- {lecrapaud-0.19.3 → lecrapaud-0.20.1}/lecrapaud/misc/test-gpu-transformers.ipynb +0 -0
|
@@ -165,6 +165,12 @@ class ExperimentEngine:
|
|
|
165
165
|
|
|
166
166
|
def __init__(self, id: int = None, data: pd.DataFrame = None, **kwargs):
|
|
167
167
|
"""Initialize the experiment engine with either new or existing experiment."""
|
|
168
|
+
# Set all kwargs as instance attributes
|
|
169
|
+
if "models_idx" in kwargs:
|
|
170
|
+
kwargs["models_idx"] = normalize_models_idx(kwargs["models_idx"])
|
|
171
|
+
for key, value in kwargs.items():
|
|
172
|
+
setattr(self, key, value)
|
|
173
|
+
|
|
168
174
|
if id:
|
|
169
175
|
self.experiment = Experiment.get(id)
|
|
170
176
|
kwargs.update(self.experiment.context)
|
|
@@ -180,12 +186,6 @@ class ExperimentEngine:
|
|
|
180
186
|
)
|
|
181
187
|
self.experiment = create_experiment(data=data, **kwargs)
|
|
182
188
|
|
|
183
|
-
# Set all kwargs as instance attributes
|
|
184
|
-
for key, value in kwargs.items():
|
|
185
|
-
if key == "models_idx":
|
|
186
|
-
value = normalize_models_idx(value)
|
|
187
|
-
setattr(self, key, value)
|
|
188
|
-
|
|
189
189
|
def train(self, data, best_params=None):
|
|
190
190
|
logger.info("Running training...")
|
|
191
191
|
|
|
@@ -309,12 +309,8 @@ class ExperimentEngine:
|
|
|
309
309
|
def feature_engineering(self, data, for_training=True):
|
|
310
310
|
app = FeatureEngineeringEngine(
|
|
311
311
|
data=data,
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
columns_date=getattr(self, "columns_date", []),
|
|
315
|
-
columns_te_groupby=getattr(self, "columns_te_groupby", []),
|
|
316
|
-
columns_te_target=getattr(self, "columns_te_target", []),
|
|
317
|
-
for_training=getattr(self, "for_training", True),
|
|
312
|
+
experiment=self.experiment,
|
|
313
|
+
for_training=for_training,
|
|
318
314
|
)
|
|
319
315
|
data = app.run()
|
|
320
316
|
return data
|
|
@@ -322,21 +318,7 @@ class ExperimentEngine:
|
|
|
322
318
|
def preprocess_feature(self, data, for_training=True):
|
|
323
319
|
app = PreprocessFeature(
|
|
324
320
|
data=data,
|
|
325
|
-
experiment=
|
|
326
|
-
time_series=getattr(self, "time_series", False),
|
|
327
|
-
date_column=getattr(self, "date_column", None),
|
|
328
|
-
group_column=getattr(self, "group_column", None),
|
|
329
|
-
val_size=getattr(self, "val_size", 0.2),
|
|
330
|
-
test_size=getattr(self, "test_size", 0.2),
|
|
331
|
-
columns_pca=getattr(self, "columns_pca", []),
|
|
332
|
-
pca_temporal=getattr(self, "pca_temporal", []),
|
|
333
|
-
pca_cross_sectional=getattr(self, "pca_cross_sectional", []),
|
|
334
|
-
columns_onehot=getattr(self, "columns_onehot", []),
|
|
335
|
-
columns_binary=getattr(self, "columns_binary", []),
|
|
336
|
-
columns_ordinal=getattr(self, "columns_ordinal", []),
|
|
337
|
-
columns_frequency=getattr(self, "columns_frequency", []),
|
|
338
|
-
target_numbers=getattr(self, "target_numbers", []),
|
|
339
|
-
target_clf=getattr(self, "target_clf", []),
|
|
321
|
+
experiment=self.experiment,
|
|
340
322
|
)
|
|
341
323
|
if for_training:
|
|
342
324
|
train, val, test = app.run()
|
|
@@ -351,7 +333,6 @@ class ExperimentEngine:
|
|
|
351
333
|
train=train,
|
|
352
334
|
target_number=target_number,
|
|
353
335
|
experiment=self.experiment,
|
|
354
|
-
target_clf=self.target_clf,
|
|
355
336
|
)
|
|
356
337
|
app.run()
|
|
357
338
|
self.experiment = Experiment.get(self.experiment.id)
|
|
@@ -368,14 +349,7 @@ class ExperimentEngine:
|
|
|
368
349
|
train=train,
|
|
369
350
|
val=val,
|
|
370
351
|
test=test,
|
|
371
|
-
experiment=
|
|
372
|
-
target_numbers=getattr(self, "target_numbers", []),
|
|
373
|
-
target_clf=getattr(self, "target_clf", []),
|
|
374
|
-
models_idx=getattr(self, "models_idx", []),
|
|
375
|
-
time_series=getattr(self, "time_series", False),
|
|
376
|
-
max_timesteps=getattr(self, "max_timesteps", 120),
|
|
377
|
-
date_column=getattr(self, "date_column", None),
|
|
378
|
-
group_column=getattr(self, "group_column", None),
|
|
352
|
+
experiment=self.experiment,
|
|
379
353
|
)
|
|
380
354
|
if for_training:
|
|
381
355
|
data, reshaped_data = app.run()
|
|
@@ -390,25 +364,13 @@ class ExperimentEngine:
|
|
|
390
364
|
data=data,
|
|
391
365
|
reshaped_data=reshaped_data,
|
|
392
366
|
target_number=target_number,
|
|
393
|
-
experiment=
|
|
394
|
-
target_clf=getattr(self, "target_clf", []),
|
|
395
|
-
models_idx=getattr(self, "models_idx", []),
|
|
396
|
-
time_series=getattr(self, "time_series", False),
|
|
397
|
-
date_column=getattr(self, "date_column", None),
|
|
398
|
-
group_column=getattr(self, "group_column", None),
|
|
399
|
-
target_clf_thresholds=getattr(self, "target_clf_thresholds", {}),
|
|
367
|
+
experiment=self.experiment,
|
|
400
368
|
)
|
|
401
369
|
if best_params and target_number not in best_params.keys():
|
|
402
370
|
raise ValueError(
|
|
403
371
|
f"Target {target_number} not found in best_params passed as argument"
|
|
404
372
|
)
|
|
405
373
|
app.run(
|
|
406
|
-
self.experiment_name,
|
|
407
|
-
perform_hyperopt=self.perform_hyperopt,
|
|
408
|
-
number_of_trials=self.number_of_trials,
|
|
409
|
-
perform_crossval=self.perform_crossval,
|
|
410
|
-
plot=self.plot,
|
|
411
|
-
preserve_model=self.preserve_model,
|
|
412
374
|
best_params=best_params[target_number] if best_params else None,
|
|
413
375
|
)
|
|
414
376
|
|
|
@@ -34,3 +34,4 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
|
34
34
|
LECRAPAUD_LOGFILE = os.getenv("LECRAPAUD_LOGFILE")
|
|
35
35
|
LECRAPAUD_LOCAL = os.getenv("LECRAPAUD_LOCAL", False)
|
|
36
36
|
LECRAPAUD_TABLE_PREFIX = os.getenv("LECRAPAUD_TABLE_PREFIX", "lecrapaud")
|
|
37
|
+
LECRAPAUD_OPTIMIZATION_BACKEND = os.getenv("LECRAPAUD_OPTIMIZATION_BACKEND", "ray").lower()
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""add unique constraint to score
|
|
2
|
+
|
|
3
|
+
Revision ID: 07e303521594
|
|
4
|
+
Revises: 8b11c1ba982e
|
|
5
|
+
Create Date: 2025-10-25 06:35:57.950929
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Sequence, Union
|
|
10
|
+
|
|
11
|
+
from alembic import op
|
|
12
|
+
import sqlalchemy as sa
|
|
13
|
+
from lecrapaud.config import LECRAPAUD_TABLE_PREFIX
|
|
14
|
+
|
|
15
|
+
# revision identifiers, used by Alembic.
|
|
16
|
+
revision: str = "07e303521594"
|
|
17
|
+
down_revision: Union[str, None] = "8b11c1ba982e"
|
|
18
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
19
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def upgrade() -> None:
|
|
23
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
24
|
+
op.create_unique_constraint(
|
|
25
|
+
"unique_score_per_model_training",
|
|
26
|
+
f"{LECRAPAUD_TABLE_PREFIX}_scores",
|
|
27
|
+
["model_training_id"],
|
|
28
|
+
)
|
|
29
|
+
# ### end Alembic commands ###
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def downgrade() -> None:
|
|
33
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
34
|
+
op.drop_constraint(
|
|
35
|
+
"unique_score_per_model_training",
|
|
36
|
+
f"{LECRAPAUD_TABLE_PREFIX}_scores",
|
|
37
|
+
type_="unique",
|
|
38
|
+
)
|
|
39
|
+
# ### end Alembic commands ###
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""merge score and model_trainings into model_selection_scores
|
|
2
|
+
|
|
3
|
+
Revision ID: 033e0f7eca4f
|
|
4
|
+
Revises: 07e303521594
|
|
5
|
+
Create Date: 2025-10-26 17:27:30.400473
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Sequence, Union
|
|
10
|
+
|
|
11
|
+
from alembic import op
|
|
12
|
+
import sqlalchemy as sa
|
|
13
|
+
from lecrapaud.config import LECRAPAUD_TABLE_PREFIX
|
|
14
|
+
|
|
15
|
+
# revision identifiers, used by Alembic.
|
|
16
|
+
revision: str = "033e0f7eca4f"
|
|
17
|
+
down_revision: Union[str, None] = "07e303521594"
|
|
18
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
19
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def upgrade() -> None:
|
|
23
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
24
|
+
# Check if table exists using inspector
|
|
25
|
+
from sqlalchemy import inspect
|
|
26
|
+
inspector = inspect(op.get_bind())
|
|
27
|
+
existing_tables = inspector.get_table_names()
|
|
28
|
+
|
|
29
|
+
if f"{LECRAPAUD_TABLE_PREFIX}_model_selection_scores" not in existing_tables:
|
|
30
|
+
op.create_table(
|
|
31
|
+
f"{LECRAPAUD_TABLE_PREFIX}_model_selection_scores",
|
|
32
|
+
sa.Column("id", sa.BigInteger(), autoincrement=True, nullable=False),
|
|
33
|
+
sa.Column(
|
|
34
|
+
"created_at",
|
|
35
|
+
sa.TIMESTAMP(timezone=True),
|
|
36
|
+
server_default=sa.text("now()"),
|
|
37
|
+
nullable=False,
|
|
38
|
+
),
|
|
39
|
+
sa.Column(
|
|
40
|
+
"updated_at",
|
|
41
|
+
sa.TIMESTAMP(timezone=True),
|
|
42
|
+
server_default=sa.text("now()"),
|
|
43
|
+
nullable=False,
|
|
44
|
+
),
|
|
45
|
+
sa.Column("best_params", sa.JSON(), nullable=True),
|
|
46
|
+
sa.Column("model_path", sa.String(length=255), nullable=True),
|
|
47
|
+
sa.Column("training_time", sa.Integer(), nullable=True),
|
|
48
|
+
sa.Column("model_id", sa.BigInteger(), nullable=False),
|
|
49
|
+
sa.Column("model_selection_id", sa.BigInteger(), nullable=False),
|
|
50
|
+
sa.Column("eval_data_std", sa.Float(), nullable=True),
|
|
51
|
+
sa.Column("rmse", sa.Float(), nullable=True),
|
|
52
|
+
sa.Column("rmse_std_ratio", sa.Float(), nullable=True),
|
|
53
|
+
sa.Column("mae", sa.Float(), nullable=True),
|
|
54
|
+
sa.Column("mape", sa.Float(), nullable=True),
|
|
55
|
+
sa.Column("mam", sa.Float(), nullable=True),
|
|
56
|
+
sa.Column("mad", sa.Float(), nullable=True),
|
|
57
|
+
sa.Column("mae_mam_ratio", sa.Float(), nullable=True),
|
|
58
|
+
sa.Column("mae_mad_ratio", sa.Float(), nullable=True),
|
|
59
|
+
sa.Column("r2", sa.Float(), nullable=True),
|
|
60
|
+
sa.Column("logloss", sa.Float(), nullable=True),
|
|
61
|
+
sa.Column("accuracy", sa.Float(), nullable=True),
|
|
62
|
+
sa.Column("precision", sa.Float(), nullable=True),
|
|
63
|
+
sa.Column("recall", sa.Float(), nullable=True),
|
|
64
|
+
sa.Column("f1", sa.Float(), nullable=True),
|
|
65
|
+
sa.Column("roc_auc", sa.Float(), nullable=True),
|
|
66
|
+
sa.Column("avg_precision", sa.Float(), nullable=True),
|
|
67
|
+
sa.Column("thresholds", sa.JSON(), nullable=True),
|
|
68
|
+
sa.Column("precision_at_threshold", sa.Float(), nullable=True),
|
|
69
|
+
sa.Column("recall_at_threshold", sa.Float(), nullable=True),
|
|
70
|
+
sa.Column("f1_at_threshold", sa.Float(), nullable=True),
|
|
71
|
+
sa.ForeignKeyConstraint(
|
|
72
|
+
["model_id"],
|
|
73
|
+
[f"{LECRAPAUD_TABLE_PREFIX}_models.id"],
|
|
74
|
+
),
|
|
75
|
+
sa.ForeignKeyConstraint(
|
|
76
|
+
["model_selection_id"],
|
|
77
|
+
[f"{LECRAPAUD_TABLE_PREFIX}_model_selections.id"],
|
|
78
|
+
ondelete="CASCADE",
|
|
79
|
+
),
|
|
80
|
+
sa.PrimaryKeyConstraint("id"),
|
|
81
|
+
sa.UniqueConstraint(
|
|
82
|
+
"model_id",
|
|
83
|
+
"model_selection_id",
|
|
84
|
+
name="uq_model_selection_score_composite",
|
|
85
|
+
),
|
|
86
|
+
)
|
|
87
|
+
op.create_index(
|
|
88
|
+
op.f("ix_model_selection_scores_id"),
|
|
89
|
+
f"{LECRAPAUD_TABLE_PREFIX}_model_selection_scores",
|
|
90
|
+
["id"],
|
|
91
|
+
unique=False,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Migrate existing data from model_trainings and scores to model_selection_scores
|
|
95
|
+
op.execute(
|
|
96
|
+
f"""
|
|
97
|
+
INSERT INTO {LECRAPAUD_TABLE_PREFIX}_model_selection_scores (
|
|
98
|
+
created_at, updated_at, best_params, model_path, training_time,
|
|
99
|
+
model_id, model_selection_id,
|
|
100
|
+
eval_data_std, rmse, rmse_std_ratio, mae, mape, mam, mad,
|
|
101
|
+
mae_mam_ratio, mae_mad_ratio, r2, logloss, accuracy, `precision`,
|
|
102
|
+
recall, f1, roc_auc, avg_precision, thresholds,
|
|
103
|
+
precision_at_threshold, recall_at_threshold, f1_at_threshold
|
|
104
|
+
)
|
|
105
|
+
SELECT
|
|
106
|
+
mt.created_at,
|
|
107
|
+
mt.updated_at,
|
|
108
|
+
mt.best_params,
|
|
109
|
+
mt.model_path,
|
|
110
|
+
COALESCE(mt.training_time, s.training_time) as training_time,
|
|
111
|
+
mt.model_id,
|
|
112
|
+
mt.model_selection_id,
|
|
113
|
+
s.eval_data_std, s.rmse, s.rmse_std_ratio, s.mae, s.mape,
|
|
114
|
+
s.mam, s.mad, s.mae_mam_ratio, s.mae_mad_ratio, s.r2,
|
|
115
|
+
s.logloss, s.accuracy, s.`precision`, s.recall, s.f1,
|
|
116
|
+
s.roc_auc, s.avg_precision, s.thresholds,
|
|
117
|
+
s.precision_at_threshold, s.recall_at_threshold, s.f1_at_threshold
|
|
118
|
+
FROM {LECRAPAUD_TABLE_PREFIX}_model_trainings mt
|
|
119
|
+
LEFT JOIN {LECRAPAUD_TABLE_PREFIX}_scores s ON s.model_training_id = mt.id
|
|
120
|
+
"""
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Drop the old tables
|
|
124
|
+
op.drop_table(f"{LECRAPAUD_TABLE_PREFIX}_scores")
|
|
125
|
+
op.drop_table(f"{LECRAPAUD_TABLE_PREFIX}_model_trainings")
|
|
126
|
+
# ### end Alembic commands ###
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def downgrade() -> None:
|
|
130
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
131
|
+
# Recreate the old tables
|
|
132
|
+
op.create_table(
|
|
133
|
+
f"{LECRAPAUD_TABLE_PREFIX}_model_trainings",
|
|
134
|
+
sa.Column("id", sa.BigInteger(), autoincrement=True, nullable=False),
|
|
135
|
+
sa.Column(
|
|
136
|
+
"created_at",
|
|
137
|
+
sa.TIMESTAMP(timezone=True),
|
|
138
|
+
server_default=sa.text("now()"),
|
|
139
|
+
nullable=False,
|
|
140
|
+
),
|
|
141
|
+
sa.Column(
|
|
142
|
+
"updated_at",
|
|
143
|
+
sa.TIMESTAMP(timezone=True),
|
|
144
|
+
server_default=sa.text("now()"),
|
|
145
|
+
nullable=False,
|
|
146
|
+
),
|
|
147
|
+
sa.Column("best_params", sa.JSON(), nullable=True),
|
|
148
|
+
sa.Column("model_path", sa.String(length=255), nullable=True),
|
|
149
|
+
sa.Column("training_time", sa.Integer(), nullable=True),
|
|
150
|
+
sa.Column("model_id", sa.BigInteger(), nullable=False),
|
|
151
|
+
sa.Column("model_selection_id", sa.BigInteger(), nullable=False),
|
|
152
|
+
sa.ForeignKeyConstraint(
|
|
153
|
+
["model_id"],
|
|
154
|
+
[f"{LECRAPAUD_TABLE_PREFIX}_models.id"],
|
|
155
|
+
),
|
|
156
|
+
sa.ForeignKeyConstraint(
|
|
157
|
+
["model_selection_id"],
|
|
158
|
+
[f"{LECRAPAUD_TABLE_PREFIX}_model_selections.id"],
|
|
159
|
+
ondelete="CASCADE",
|
|
160
|
+
),
|
|
161
|
+
sa.PrimaryKeyConstraint("id"),
|
|
162
|
+
sa.UniqueConstraint(
|
|
163
|
+
"model_id", "model_selection_id", name="uq_model_training_composite"
|
|
164
|
+
),
|
|
165
|
+
)
|
|
166
|
+
op.create_index(
|
|
167
|
+
op.f("ix_model_trainings_id"),
|
|
168
|
+
f"{LECRAPAUD_TABLE_PREFIX}_model_trainings",
|
|
169
|
+
["id"],
|
|
170
|
+
unique=False,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
op.create_table(
|
|
174
|
+
f"{LECRAPAUD_TABLE_PREFIX}_scores",
|
|
175
|
+
sa.Column("id", sa.BigInteger(), autoincrement=True, nullable=False),
|
|
176
|
+
sa.Column(
|
|
177
|
+
"created_at",
|
|
178
|
+
sa.TIMESTAMP(timezone=True),
|
|
179
|
+
server_default=sa.text("now()"),
|
|
180
|
+
nullable=False,
|
|
181
|
+
),
|
|
182
|
+
sa.Column(
|
|
183
|
+
"updated_at",
|
|
184
|
+
sa.TIMESTAMP(timezone=True),
|
|
185
|
+
server_default=sa.text("now()"),
|
|
186
|
+
nullable=False,
|
|
187
|
+
),
|
|
188
|
+
sa.Column("type", sa.String(length=50), nullable=False),
|
|
189
|
+
sa.Column("training_time", sa.Integer(), nullable=True),
|
|
190
|
+
sa.Column("eval_data_std", sa.Float(), nullable=True),
|
|
191
|
+
sa.Column("rmse", sa.Float(), nullable=True),
|
|
192
|
+
sa.Column("rmse_std_ratio", sa.Float(), nullable=True),
|
|
193
|
+
sa.Column("mae", sa.Float(), nullable=True),
|
|
194
|
+
sa.Column("mape", sa.Float(), nullable=True),
|
|
195
|
+
sa.Column("mam", sa.Float(), nullable=True),
|
|
196
|
+
sa.Column("mad", sa.Float(), nullable=True),
|
|
197
|
+
sa.Column("mae_mam_ratio", sa.Float(), nullable=True),
|
|
198
|
+
sa.Column("mae_mad_ratio", sa.Float(), nullable=True),
|
|
199
|
+
sa.Column("r2", sa.Float(), nullable=True),
|
|
200
|
+
sa.Column("logloss", sa.Float(), nullable=True),
|
|
201
|
+
sa.Column("accuracy", sa.Float(), nullable=True),
|
|
202
|
+
sa.Column("precision", sa.Float(), nullable=True),
|
|
203
|
+
sa.Column("recall", sa.Float(), nullable=True),
|
|
204
|
+
sa.Column("f1", sa.Float(), nullable=True),
|
|
205
|
+
sa.Column("roc_auc", sa.Float(), nullable=True),
|
|
206
|
+
sa.Column("avg_precision", sa.Float(), nullable=True),
|
|
207
|
+
sa.Column("thresholds", sa.JSON(), nullable=True),
|
|
208
|
+
sa.Column("precision_at_threshold", sa.Float(), nullable=True),
|
|
209
|
+
sa.Column("recall_at_threshold", sa.Float(), nullable=True),
|
|
210
|
+
sa.Column("f1_at_threshold", sa.Float(), nullable=True),
|
|
211
|
+
sa.Column("model_training_id", sa.BigInteger(), nullable=False),
|
|
212
|
+
sa.ForeignKeyConstraint(
|
|
213
|
+
["model_training_id"],
|
|
214
|
+
[f"{LECRAPAUD_TABLE_PREFIX}_model_trainings.id"],
|
|
215
|
+
ondelete="CASCADE",
|
|
216
|
+
),
|
|
217
|
+
sa.PrimaryKeyConstraint("id"),
|
|
218
|
+
sa.UniqueConstraint(
|
|
219
|
+
"model_training_id", name="unique_score_per_model_training"
|
|
220
|
+
),
|
|
221
|
+
)
|
|
222
|
+
op.create_index(
|
|
223
|
+
op.f("ix_scores_id"), f"{LECRAPAUD_TABLE_PREFIX}_scores", ["id"], unique=False
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# Migrate data back (note: we'll lose the type column data, defaulting to 'testset')
|
|
227
|
+
op.execute(
|
|
228
|
+
f"""
|
|
229
|
+
INSERT INTO {LECRAPAUD_TABLE_PREFIX}_model_trainings (
|
|
230
|
+
id, created_at, updated_at, best_params, model_path,
|
|
231
|
+
training_time, model_id, model_selection_id
|
|
232
|
+
)
|
|
233
|
+
SELECT
|
|
234
|
+
id, created_at, updated_at, best_params, model_path,
|
|
235
|
+
training_time, model_id, model_selection_id
|
|
236
|
+
FROM {LECRAPAUD_TABLE_PREFIX}_model_selection_scores
|
|
237
|
+
"""
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
op.execute(
|
|
241
|
+
f"""
|
|
242
|
+
INSERT INTO {LECRAPAUD_TABLE_PREFIX}_scores (
|
|
243
|
+
created_at, updated_at, type, training_time, eval_data_std,
|
|
244
|
+
rmse, rmse_std_ratio, mae, mape, mam, mad, mae_mam_ratio,
|
|
245
|
+
mae_mad_ratio, r2, logloss, accuracy, `precision`, recall,
|
|
246
|
+
f1, roc_auc, avg_precision, thresholds, precision_at_threshold,
|
|
247
|
+
recall_at_threshold, f1_at_threshold, model_training_id
|
|
248
|
+
)
|
|
249
|
+
SELECT
|
|
250
|
+
created_at, updated_at, 'testset', training_time, eval_data_std,
|
|
251
|
+
rmse, rmse_std_ratio, mae, mape, mam, mad, mae_mam_ratio,
|
|
252
|
+
mae_mad_ratio, r2, logloss, accuracy, precision, recall,
|
|
253
|
+
f1, roc_auc, avg_precision, thresholds, precision_at_threshold,
|
|
254
|
+
recall_at_threshold, f1_at_threshold, id
|
|
255
|
+
FROM {LECRAPAUD_TABLE_PREFIX}_model_selection_scores
|
|
256
|
+
"""
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
op.drop_index(
|
|
260
|
+
op.f("ix_model_selection_scores_id"),
|
|
261
|
+
table_name=f"{LECRAPAUD_TABLE_PREFIX}_model_selection_scores",
|
|
262
|
+
)
|
|
263
|
+
op.drop_table(f"{LECRAPAUD_TABLE_PREFIX}_model_selection_scores")
|
|
264
|
+
# ### end Alembic commands ###
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""add number_of_targets and remove other fields from experiments
|
|
2
|
+
|
|
3
|
+
Revision ID: 0a8fb7826e9b
|
|
4
|
+
Revises: 033e0f7eca4f
|
|
5
|
+
Create Date: 2025-10-28 20:06:54.792631
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
from typing import Sequence, Union
|
|
9
|
+
|
|
10
|
+
from alembic import op
|
|
11
|
+
import sqlalchemy as sa
|
|
12
|
+
from sqlalchemy.dialects import mysql
|
|
13
|
+
|
|
14
|
+
# revision identifiers, used by Alembic.
|
|
15
|
+
revision: str = '0a8fb7826e9b'
|
|
16
|
+
down_revision: Union[str, None] = '033e0f7eca4f'
|
|
17
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
18
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def upgrade() -> None:
|
|
22
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
23
|
+
op.add_column('lecrapaud_experiments', sa.Column('number_of_targets', sa.Integer(), nullable=True))
|
|
24
|
+
op.drop_column('lecrapaud_experiments', 'corr_threshold')
|
|
25
|
+
op.drop_column('lecrapaud_experiments', 'max_features')
|
|
26
|
+
op.drop_column('lecrapaud_experiments', 'percentile')
|
|
27
|
+
op.drop_column('lecrapaud_experiments', 'type')
|
|
28
|
+
op.drop_index(op.f('ix_model_selection_scores_id'), table_name='lecrapaud_model_selection_scores')
|
|
29
|
+
op.create_index(op.f('ix_lecrapaud_model_selection_scores_id'), 'lecrapaud_model_selection_scores', ['id'], unique=False)
|
|
30
|
+
# ### end Alembic commands ###
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def downgrade() -> None:
|
|
34
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
35
|
+
op.drop_index(op.f('ix_lecrapaud_model_selection_scores_id'), table_name='lecrapaud_model_selection_scores')
|
|
36
|
+
op.create_index(op.f('ix_model_selection_scores_id'), 'lecrapaud_model_selection_scores', ['id'], unique=False)
|
|
37
|
+
op.add_column('lecrapaud_experiments', sa.Column('type', mysql.VARCHAR(length=50), nullable=False))
|
|
38
|
+
op.add_column('lecrapaud_experiments', sa.Column('percentile', mysql.FLOAT(), nullable=False))
|
|
39
|
+
op.add_column('lecrapaud_experiments', sa.Column('max_features', mysql.INTEGER(), autoincrement=False, nullable=False))
|
|
40
|
+
op.add_column('lecrapaud_experiments', sa.Column('corr_threshold', mysql.FLOAT(), nullable=False))
|
|
41
|
+
op.drop_column('lecrapaud_experiments', 'number_of_targets')
|
|
42
|
+
# ### end Alembic commands ###
|
|
@@ -4,9 +4,8 @@ from lecrapaud.db.models.feature_selection_rank import FeatureSelectionRank
|
|
|
4
4
|
from lecrapaud.db.models.feature_selection import FeatureSelection
|
|
5
5
|
from lecrapaud.db.models.feature import Feature
|
|
6
6
|
from lecrapaud.db.models.model_selection import ModelSelection
|
|
7
|
-
from lecrapaud.db.models.model_training import ModelTraining
|
|
8
7
|
from lecrapaud.db.models.model import Model
|
|
9
|
-
from lecrapaud.db.models.
|
|
8
|
+
from lecrapaud.db.models.model_selection_score import ModelSelectionScore
|
|
10
9
|
from lecrapaud.db.models.target import Target
|
|
11
10
|
|
|
12
11
|
__all__ = [
|
|
@@ -16,8 +15,7 @@ __all__ = [
|
|
|
16
15
|
'FeatureSelection',
|
|
17
16
|
'Feature',
|
|
18
17
|
'ModelSelection',
|
|
19
|
-
'ModelTraining',
|
|
20
18
|
'Model',
|
|
21
|
-
'
|
|
19
|
+
'ModelSelectionScore',
|
|
22
20
|
'Target',
|
|
23
21
|
]
|