lecrapaud 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

lecrapaud/api.py CHANGED
@@ -165,6 +165,12 @@ class ExperimentEngine:
165
165
 
166
166
  def __init__(self, id: int = None, data: pd.DataFrame = None, **kwargs):
167
167
  """Initialize the experiment engine with either new or existing experiment."""
168
+ # Set all kwargs as instance attributes
169
+ if "models_idx" in kwargs:
170
+ kwargs["models_idx"] = normalize_models_idx(kwargs["models_idx"])
171
+ for key, value in kwargs.items():
172
+ setattr(self, key, value)
173
+
168
174
  if id:
169
175
  self.experiment = Experiment.get(id)
170
176
  kwargs.update(self.experiment.context)
@@ -180,12 +186,6 @@ class ExperimentEngine:
180
186
  )
181
187
  self.experiment = create_experiment(data=data, **kwargs)
182
188
 
183
- # Set all kwargs as instance attributes
184
- for key, value in kwargs.items():
185
- if key == "models_idx":
186
- value = normalize_models_idx(value)
187
- setattr(self, key, value)
188
-
189
189
  def train(self, data, best_params=None):
190
190
  logger.info("Running training...")
191
191
 
@@ -309,12 +309,8 @@ class ExperimentEngine:
309
309
  def feature_engineering(self, data, for_training=True):
310
310
  app = FeatureEngineeringEngine(
311
311
  data=data,
312
- columns_drop=getattr(self, "columns_drop", []),
313
- columns_boolean=getattr(self, "columns_boolean", []),
314
- columns_date=getattr(self, "columns_date", []),
315
- columns_te_groupby=getattr(self, "columns_te_groupby", []),
316
- columns_te_target=getattr(self, "columns_te_target", []),
317
- for_training=getattr(self, "for_training", True),
312
+ experiment=self.experiment,
313
+ for_training=for_training,
318
314
  )
319
315
  data = app.run()
320
316
  return data
@@ -322,21 +318,7 @@ class ExperimentEngine:
322
318
  def preprocess_feature(self, data, for_training=True):
323
319
  app = PreprocessFeature(
324
320
  data=data,
325
- experiment=getattr(self, "experiment", None),
326
- time_series=getattr(self, "time_series", False),
327
- date_column=getattr(self, "date_column", None),
328
- group_column=getattr(self, "group_column", None),
329
- val_size=getattr(self, "val_size", 0.2),
330
- test_size=getattr(self, "test_size", 0.2),
331
- columns_pca=getattr(self, "columns_pca", []),
332
- pca_temporal=getattr(self, "pca_temporal", []),
333
- pca_cross_sectional=getattr(self, "pca_cross_sectional", []),
334
- columns_onehot=getattr(self, "columns_onehot", []),
335
- columns_binary=getattr(self, "columns_binary", []),
336
- columns_ordinal=getattr(self, "columns_ordinal", []),
337
- columns_frequency=getattr(self, "columns_frequency", []),
338
- target_numbers=getattr(self, "target_numbers", []),
339
- target_clf=getattr(self, "target_clf", []),
321
+ experiment=self.experiment,
340
322
  )
341
323
  if for_training:
342
324
  train, val, test = app.run()
@@ -351,7 +333,6 @@ class ExperimentEngine:
351
333
  train=train,
352
334
  target_number=target_number,
353
335
  experiment=self.experiment,
354
- target_clf=self.target_clf,
355
336
  )
356
337
  app.run()
357
338
  self.experiment = Experiment.get(self.experiment.id)
@@ -368,14 +349,7 @@ class ExperimentEngine:
368
349
  train=train,
369
350
  val=val,
370
351
  test=test,
371
- experiment=getattr(self, "experiment", None),
372
- target_numbers=getattr(self, "target_numbers", []),
373
- target_clf=getattr(self, "target_clf", []),
374
- models_idx=getattr(self, "models_idx", []),
375
- time_series=getattr(self, "time_series", False),
376
- max_timesteps=getattr(self, "max_timesteps", 120),
377
- date_column=getattr(self, "date_column", None),
378
- group_column=getattr(self, "group_column", None),
352
+ experiment=self.experiment,
379
353
  )
380
354
  if for_training:
381
355
  data, reshaped_data = app.run()
@@ -390,25 +364,13 @@ class ExperimentEngine:
390
364
  data=data,
391
365
  reshaped_data=reshaped_data,
392
366
  target_number=target_number,
393
- experiment=getattr(self, "experiment", None),
394
- target_clf=getattr(self, "target_clf", []),
395
- models_idx=getattr(self, "models_idx", []),
396
- time_series=getattr(self, "time_series", False),
397
- date_column=getattr(self, "date_column", None),
398
- group_column=getattr(self, "group_column", None),
399
- target_clf_thresholds=getattr(self, "target_clf_thresholds", {}),
367
+ experiment=self.experiment,
400
368
  )
401
369
  if best_params and target_number not in best_params.keys():
402
370
  raise ValueError(
403
371
  f"Target {target_number} not found in best_params passed as argument"
404
372
  )
405
373
  app.run(
406
- self.experiment_name,
407
- perform_hyperopt=self.perform_hyperopt,
408
- number_of_trials=self.number_of_trials,
409
- perform_crossval=self.perform_crossval,
410
- plot=self.plot,
411
- preserve_model=self.preserve_model,
412
374
  best_params=best_params[target_number] if best_params else None,
413
375
  )
414
376
 
lecrapaud/config.py CHANGED
@@ -34,3 +34,4 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
34
34
  LECRAPAUD_LOGFILE = os.getenv("LECRAPAUD_LOGFILE")
35
35
  LECRAPAUD_LOCAL = os.getenv("LECRAPAUD_LOCAL", False)
36
36
  LECRAPAUD_TABLE_PREFIX = os.getenv("LECRAPAUD_TABLE_PREFIX", "lecrapaud")
37
+ LECRAPAUD_OPTIMIZATION_BACKEND = os.getenv("LECRAPAUD_OPTIMIZATION_BACKEND", "ray").lower()
@@ -0,0 +1,39 @@
1
+ """add unique constraint to score
2
+
3
+ Revision ID: 07e303521594
4
+ Revises: 8b11c1ba982e
5
+ Create Date: 2025-10-25 06:35:57.950929
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+ from lecrapaud.config import LECRAPAUD_TABLE_PREFIX
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "07e303521594"
17
+ down_revision: Union[str, None] = "8b11c1ba982e"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ op.create_unique_constraint(
25
+ "unique_score_per_model_training",
26
+ f"{LECRAPAUD_TABLE_PREFIX}_scores",
27
+ ["model_training_id"],
28
+ )
29
+ # ### end Alembic commands ###
30
+
31
+
32
+ def downgrade() -> None:
33
+ # ### commands auto generated by Alembic - please adjust! ###
34
+ op.drop_constraint(
35
+ "unique_score_per_model_training",
36
+ f"{LECRAPAUD_TABLE_PREFIX}_scores",
37
+ type_="unique",
38
+ )
39
+ # ### end Alembic commands ###
@@ -0,0 +1,264 @@
1
+ """merge score and model_trainings into model_selection_scores
2
+
3
+ Revision ID: 033e0f7eca4f
4
+ Revises: 07e303521594
5
+ Create Date: 2025-10-26 17:27:30.400473
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+ from lecrapaud.config import LECRAPAUD_TABLE_PREFIX
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "033e0f7eca4f"
17
+ down_revision: Union[str, None] = "07e303521594"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ # Check if table exists using inspector
25
+ from sqlalchemy import inspect
26
+ inspector = inspect(op.get_bind())
27
+ existing_tables = inspector.get_table_names()
28
+
29
+ if f"{LECRAPAUD_TABLE_PREFIX}_model_selection_scores" not in existing_tables:
30
+ op.create_table(
31
+ f"{LECRAPAUD_TABLE_PREFIX}_model_selection_scores",
32
+ sa.Column("id", sa.BigInteger(), autoincrement=True, nullable=False),
33
+ sa.Column(
34
+ "created_at",
35
+ sa.TIMESTAMP(timezone=True),
36
+ server_default=sa.text("now()"),
37
+ nullable=False,
38
+ ),
39
+ sa.Column(
40
+ "updated_at",
41
+ sa.TIMESTAMP(timezone=True),
42
+ server_default=sa.text("now()"),
43
+ nullable=False,
44
+ ),
45
+ sa.Column("best_params", sa.JSON(), nullable=True),
46
+ sa.Column("model_path", sa.String(length=255), nullable=True),
47
+ sa.Column("training_time", sa.Integer(), nullable=True),
48
+ sa.Column("model_id", sa.BigInteger(), nullable=False),
49
+ sa.Column("model_selection_id", sa.BigInteger(), nullable=False),
50
+ sa.Column("eval_data_std", sa.Float(), nullable=True),
51
+ sa.Column("rmse", sa.Float(), nullable=True),
52
+ sa.Column("rmse_std_ratio", sa.Float(), nullable=True),
53
+ sa.Column("mae", sa.Float(), nullable=True),
54
+ sa.Column("mape", sa.Float(), nullable=True),
55
+ sa.Column("mam", sa.Float(), nullable=True),
56
+ sa.Column("mad", sa.Float(), nullable=True),
57
+ sa.Column("mae_mam_ratio", sa.Float(), nullable=True),
58
+ sa.Column("mae_mad_ratio", sa.Float(), nullable=True),
59
+ sa.Column("r2", sa.Float(), nullable=True),
60
+ sa.Column("logloss", sa.Float(), nullable=True),
61
+ sa.Column("accuracy", sa.Float(), nullable=True),
62
+ sa.Column("precision", sa.Float(), nullable=True),
63
+ sa.Column("recall", sa.Float(), nullable=True),
64
+ sa.Column("f1", sa.Float(), nullable=True),
65
+ sa.Column("roc_auc", sa.Float(), nullable=True),
66
+ sa.Column("avg_precision", sa.Float(), nullable=True),
67
+ sa.Column("thresholds", sa.JSON(), nullable=True),
68
+ sa.Column("precision_at_threshold", sa.Float(), nullable=True),
69
+ sa.Column("recall_at_threshold", sa.Float(), nullable=True),
70
+ sa.Column("f1_at_threshold", sa.Float(), nullable=True),
71
+ sa.ForeignKeyConstraint(
72
+ ["model_id"],
73
+ [f"{LECRAPAUD_TABLE_PREFIX}_models.id"],
74
+ ),
75
+ sa.ForeignKeyConstraint(
76
+ ["model_selection_id"],
77
+ [f"{LECRAPAUD_TABLE_PREFIX}_model_selections.id"],
78
+ ondelete="CASCADE",
79
+ ),
80
+ sa.PrimaryKeyConstraint("id"),
81
+ sa.UniqueConstraint(
82
+ "model_id",
83
+ "model_selection_id",
84
+ name="uq_model_selection_score_composite",
85
+ ),
86
+ )
87
+ op.create_index(
88
+ op.f("ix_model_selection_scores_id"),
89
+ f"{LECRAPAUD_TABLE_PREFIX}_model_selection_scores",
90
+ ["id"],
91
+ unique=False,
92
+ )
93
+
94
+ # Migrate existing data from model_trainings and scores to model_selection_scores
95
+ op.execute(
96
+ f"""
97
+ INSERT INTO {LECRAPAUD_TABLE_PREFIX}_model_selection_scores (
98
+ created_at, updated_at, best_params, model_path, training_time,
99
+ model_id, model_selection_id,
100
+ eval_data_std, rmse, rmse_std_ratio, mae, mape, mam, mad,
101
+ mae_mam_ratio, mae_mad_ratio, r2, logloss, accuracy, `precision`,
102
+ recall, f1, roc_auc, avg_precision, thresholds,
103
+ precision_at_threshold, recall_at_threshold, f1_at_threshold
104
+ )
105
+ SELECT
106
+ mt.created_at,
107
+ mt.updated_at,
108
+ mt.best_params,
109
+ mt.model_path,
110
+ COALESCE(mt.training_time, s.training_time) as training_time,
111
+ mt.model_id,
112
+ mt.model_selection_id,
113
+ s.eval_data_std, s.rmse, s.rmse_std_ratio, s.mae, s.mape,
114
+ s.mam, s.mad, s.mae_mam_ratio, s.mae_mad_ratio, s.r2,
115
+ s.logloss, s.accuracy, s.`precision`, s.recall, s.f1,
116
+ s.roc_auc, s.avg_precision, s.thresholds,
117
+ s.precision_at_threshold, s.recall_at_threshold, s.f1_at_threshold
118
+ FROM {LECRAPAUD_TABLE_PREFIX}_model_trainings mt
119
+ LEFT JOIN {LECRAPAUD_TABLE_PREFIX}_scores s ON s.model_training_id = mt.id
120
+ """
121
+ )
122
+
123
+ # Drop the old tables
124
+ op.drop_table(f"{LECRAPAUD_TABLE_PREFIX}_scores")
125
+ op.drop_table(f"{LECRAPAUD_TABLE_PREFIX}_model_trainings")
126
+ # ### end Alembic commands ###
127
+
128
+
129
+ def downgrade() -> None:
130
+ # ### commands auto generated by Alembic - please adjust! ###
131
+ # Recreate the old tables
132
+ op.create_table(
133
+ f"{LECRAPAUD_TABLE_PREFIX}_model_trainings",
134
+ sa.Column("id", sa.BigInteger(), autoincrement=True, nullable=False),
135
+ sa.Column(
136
+ "created_at",
137
+ sa.TIMESTAMP(timezone=True),
138
+ server_default=sa.text("now()"),
139
+ nullable=False,
140
+ ),
141
+ sa.Column(
142
+ "updated_at",
143
+ sa.TIMESTAMP(timezone=True),
144
+ server_default=sa.text("now()"),
145
+ nullable=False,
146
+ ),
147
+ sa.Column("best_params", sa.JSON(), nullable=True),
148
+ sa.Column("model_path", sa.String(length=255), nullable=True),
149
+ sa.Column("training_time", sa.Integer(), nullable=True),
150
+ sa.Column("model_id", sa.BigInteger(), nullable=False),
151
+ sa.Column("model_selection_id", sa.BigInteger(), nullable=False),
152
+ sa.ForeignKeyConstraint(
153
+ ["model_id"],
154
+ [f"{LECRAPAUD_TABLE_PREFIX}_models.id"],
155
+ ),
156
+ sa.ForeignKeyConstraint(
157
+ ["model_selection_id"],
158
+ [f"{LECRAPAUD_TABLE_PREFIX}_model_selections.id"],
159
+ ondelete="CASCADE",
160
+ ),
161
+ sa.PrimaryKeyConstraint("id"),
162
+ sa.UniqueConstraint(
163
+ "model_id", "model_selection_id", name="uq_model_training_composite"
164
+ ),
165
+ )
166
+ op.create_index(
167
+ op.f("ix_model_trainings_id"),
168
+ f"{LECRAPAUD_TABLE_PREFIX}_model_trainings",
169
+ ["id"],
170
+ unique=False,
171
+ )
172
+
173
+ op.create_table(
174
+ f"{LECRAPAUD_TABLE_PREFIX}_scores",
175
+ sa.Column("id", sa.BigInteger(), autoincrement=True, nullable=False),
176
+ sa.Column(
177
+ "created_at",
178
+ sa.TIMESTAMP(timezone=True),
179
+ server_default=sa.text("now()"),
180
+ nullable=False,
181
+ ),
182
+ sa.Column(
183
+ "updated_at",
184
+ sa.TIMESTAMP(timezone=True),
185
+ server_default=sa.text("now()"),
186
+ nullable=False,
187
+ ),
188
+ sa.Column("type", sa.String(length=50), nullable=False),
189
+ sa.Column("training_time", sa.Integer(), nullable=True),
190
+ sa.Column("eval_data_std", sa.Float(), nullable=True),
191
+ sa.Column("rmse", sa.Float(), nullable=True),
192
+ sa.Column("rmse_std_ratio", sa.Float(), nullable=True),
193
+ sa.Column("mae", sa.Float(), nullable=True),
194
+ sa.Column("mape", sa.Float(), nullable=True),
195
+ sa.Column("mam", sa.Float(), nullable=True),
196
+ sa.Column("mad", sa.Float(), nullable=True),
197
+ sa.Column("mae_mam_ratio", sa.Float(), nullable=True),
198
+ sa.Column("mae_mad_ratio", sa.Float(), nullable=True),
199
+ sa.Column("r2", sa.Float(), nullable=True),
200
+ sa.Column("logloss", sa.Float(), nullable=True),
201
+ sa.Column("accuracy", sa.Float(), nullable=True),
202
+ sa.Column("precision", sa.Float(), nullable=True),
203
+ sa.Column("recall", sa.Float(), nullable=True),
204
+ sa.Column("f1", sa.Float(), nullable=True),
205
+ sa.Column("roc_auc", sa.Float(), nullable=True),
206
+ sa.Column("avg_precision", sa.Float(), nullable=True),
207
+ sa.Column("thresholds", sa.JSON(), nullable=True),
208
+ sa.Column("precision_at_threshold", sa.Float(), nullable=True),
209
+ sa.Column("recall_at_threshold", sa.Float(), nullable=True),
210
+ sa.Column("f1_at_threshold", sa.Float(), nullable=True),
211
+ sa.Column("model_training_id", sa.BigInteger(), nullable=False),
212
+ sa.ForeignKeyConstraint(
213
+ ["model_training_id"],
214
+ [f"{LECRAPAUD_TABLE_PREFIX}_model_trainings.id"],
215
+ ondelete="CASCADE",
216
+ ),
217
+ sa.PrimaryKeyConstraint("id"),
218
+ sa.UniqueConstraint(
219
+ "model_training_id", name="unique_score_per_model_training"
220
+ ),
221
+ )
222
+ op.create_index(
223
+ op.f("ix_scores_id"), f"{LECRAPAUD_TABLE_PREFIX}_scores", ["id"], unique=False
224
+ )
225
+
226
+ # Migrate data back (note: we'll lose the type column data, defaulting to 'testset')
227
+ op.execute(
228
+ f"""
229
+ INSERT INTO {LECRAPAUD_TABLE_PREFIX}_model_trainings (
230
+ id, created_at, updated_at, best_params, model_path,
231
+ training_time, model_id, model_selection_id
232
+ )
233
+ SELECT
234
+ id, created_at, updated_at, best_params, model_path,
235
+ training_time, model_id, model_selection_id
236
+ FROM {LECRAPAUD_TABLE_PREFIX}_model_selection_scores
237
+ """
238
+ )
239
+
240
+ op.execute(
241
+ f"""
242
+ INSERT INTO {LECRAPAUD_TABLE_PREFIX}_scores (
243
+ created_at, updated_at, type, training_time, eval_data_std,
244
+ rmse, rmse_std_ratio, mae, mape, mam, mad, mae_mam_ratio,
245
+ mae_mad_ratio, r2, logloss, accuracy, `precision`, recall,
246
+ f1, roc_auc, avg_precision, thresholds, precision_at_threshold,
247
+ recall_at_threshold, f1_at_threshold, model_training_id
248
+ )
249
+ SELECT
250
+ created_at, updated_at, 'testset', training_time, eval_data_std,
251
+ rmse, rmse_std_ratio, mae, mape, mam, mad, mae_mam_ratio,
252
+ mae_mad_ratio, r2, logloss, accuracy, precision, recall,
253
+ f1, roc_auc, avg_precision, thresholds, precision_at_threshold,
254
+ recall_at_threshold, f1_at_threshold, id
255
+ FROM {LECRAPAUD_TABLE_PREFIX}_model_selection_scores
256
+ """
257
+ )
258
+
259
+ op.drop_index(
260
+ op.f("ix_model_selection_scores_id"),
261
+ table_name=f"{LECRAPAUD_TABLE_PREFIX}_model_selection_scores",
262
+ )
263
+ op.drop_table(f"{LECRAPAUD_TABLE_PREFIX}_model_selection_scores")
264
+ # ### end Alembic commands ###
@@ -0,0 +1,42 @@
1
+ """add number_of_targets and remove other fields from experiments
2
+
3
+ Revision ID: 0a8fb7826e9b
4
+ Revises: 033e0f7eca4f
5
+ Create Date: 2025-10-28 20:06:54.792631
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ from sqlalchemy.dialects import mysql
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = '0a8fb7826e9b'
16
+ down_revision: Union[str, None] = '033e0f7eca4f'
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ op.add_column('lecrapaud_experiments', sa.Column('number_of_targets', sa.Integer(), nullable=True))
24
+ op.drop_column('lecrapaud_experiments', 'corr_threshold')
25
+ op.drop_column('lecrapaud_experiments', 'max_features')
26
+ op.drop_column('lecrapaud_experiments', 'percentile')
27
+ op.drop_column('lecrapaud_experiments', 'type')
28
+ op.drop_index(op.f('ix_model_selection_scores_id'), table_name='lecrapaud_model_selection_scores')
29
+ op.create_index(op.f('ix_lecrapaud_model_selection_scores_id'), 'lecrapaud_model_selection_scores', ['id'], unique=False)
30
+ # ### end Alembic commands ###
31
+
32
+
33
+ def downgrade() -> None:
34
+ # ### commands auto generated by Alembic - please adjust! ###
35
+ op.drop_index(op.f('ix_lecrapaud_model_selection_scores_id'), table_name='lecrapaud_model_selection_scores')
36
+ op.create_index(op.f('ix_model_selection_scores_id'), 'lecrapaud_model_selection_scores', ['id'], unique=False)
37
+ op.add_column('lecrapaud_experiments', sa.Column('type', mysql.VARCHAR(length=50), nullable=False))
38
+ op.add_column('lecrapaud_experiments', sa.Column('percentile', mysql.FLOAT(), nullable=False))
39
+ op.add_column('lecrapaud_experiments', sa.Column('max_features', mysql.INTEGER(), autoincrement=False, nullable=False))
40
+ op.add_column('lecrapaud_experiments', sa.Column('corr_threshold', mysql.FLOAT(), nullable=False))
41
+ op.drop_column('lecrapaud_experiments', 'number_of_targets')
42
+ # ### end Alembic commands ###
@@ -4,9 +4,8 @@ from lecrapaud.db.models.feature_selection_rank import FeatureSelectionRank
4
4
  from lecrapaud.db.models.feature_selection import FeatureSelection
5
5
  from lecrapaud.db.models.feature import Feature
6
6
  from lecrapaud.db.models.model_selection import ModelSelection
7
- from lecrapaud.db.models.model_training import ModelTraining
8
7
  from lecrapaud.db.models.model import Model
9
- from lecrapaud.db.models.score import Score
8
+ from lecrapaud.db.models.model_selection_score import ModelSelectionScore
10
9
  from lecrapaud.db.models.target import Target
11
10
 
12
11
  __all__ = [
@@ -16,8 +15,7 @@ __all__ = [
16
15
  'FeatureSelection',
17
16
  'Feature',
18
17
  'ModelSelection',
19
- 'ModelTraining',
20
18
  'Model',
21
- 'Score',
19
+ 'ModelSelectionScore',
22
20
  'Target',
23
21
  ]