lecrapaud 0.17.0__tar.gz → 0.18.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

Files changed (46) hide show
  1. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/PKG-INFO +1 -1
  2. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/api.py +62 -85
  3. lecrapaud-0.18.0/lecrapaud/db/alembic/versions/2025_08_25_1434-7ed9963e732f_add_best_score_to_model_selection.py +30 -0
  4. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/models/experiment.py +52 -39
  5. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/models/model_selection.py +1 -0
  6. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/feature_engineering.py +2 -2
  7. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/model_selection.py +12 -0
  8. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/pyproject.toml +1 -1
  9. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/LICENSE +0 -0
  10. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/README.md +0 -0
  11. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/__init__.py +0 -0
  12. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/config.py +0 -0
  13. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/__init__.py +0 -0
  14. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/alembic/README +0 -0
  15. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/alembic/env.py +0 -0
  16. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/alembic/script.py.mako +0 -0
  17. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
  18. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
  19. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
  20. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py +0 -0
  21. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/alembic.ini +0 -0
  22. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/models/__init__.py +0 -0
  23. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/models/base.py +0 -0
  24. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/models/feature.py +0 -0
  25. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/models/feature_selection.py +0 -0
  26. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/models/feature_selection_rank.py +0 -0
  27. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/models/model.py +0 -0
  28. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/models/model_training.py +0 -0
  29. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/models/score.py +0 -0
  30. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/models/target.py +0 -0
  31. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/models/utils.py +0 -0
  32. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/session.py +0 -0
  33. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/directories.py +0 -0
  34. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/experiment.py +0 -0
  35. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/feature_selection.py +0 -0
  36. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/integrations/openai_integration.py +0 -0
  37. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/jobs/__init__.py +0 -0
  38. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/jobs/config.py +0 -0
  39. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/jobs/scheduler.py +0 -0
  40. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/jobs/tasks.py +0 -0
  41. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/misc/tabpfn_tests.ipynb +0 -0
  42. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/misc/test-gpu-bilstm.ipynb +0 -0
  43. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/misc/test-gpu-resnet.ipynb +0 -0
  44. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/misc/test-gpu-transformers.ipynb +0 -0
  45. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/search_space.py +0 -0
  46. {lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.17.0
3
+ Version: 0.18.0
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -109,58 +109,35 @@ class LeCrapaud:
109
109
  Returns:
110
110
  dict: Dictionary containing experiment names as keys and their scores as values
111
111
  """
112
- from lecrapaud.db import SessionLocal
113
- from sqlalchemy.orm import joinedload
112
+ # Get all experiments with the given name pattern
113
+ experiments = self.list_experiments(name=name)
114
114
 
115
- db = SessionLocal()
116
- try:
117
- # Get all experiments with the given name pattern
118
- experiments = (
119
- db.query(Experiment)
120
- .options(
121
- joinedload(Experiment.model_selections).joinedload(
122
- ModelSelection.scores
123
- )
124
- )
125
- .filter(Experiment.name.ilike(f"%{name}%"))
126
- .all()
127
- )
115
+ if not experiments:
116
+ return {"error": f"No experiments found with name containing '{name}'"}
128
117
 
129
- if not experiments:
130
- return {"error": f"No experiments found with name containing '{name}'"}
131
-
132
- comparison = {}
133
-
134
- for exp in experiments:
135
- scores = {
136
- "rmse": exp.avg_rmse,
137
- "logloss": exp.avg_logloss,
138
- "accuracy": None,
139
- "f1": None,
140
- "roc_auc": None,
141
- }
142
-
143
- # Get classification metrics from the first model selection with scores
144
- for model_sel in exp.model_selections:
145
- if model_sel.scores:
146
- for score in model_sel.scores:
147
- if score.type == "validation": # Use validation scores
148
- if score.accuracy is not None:
149
- scores["accuracy"] = score.accuracy
150
- if score.f1 is not None:
151
- scores["f1"] = score.f1
152
- if score.roc_auc is not None:
153
- scores["roc_auc"] = score.roc_auc
154
- break
155
-
156
- comparison[exp.name] = scores
157
-
158
- return comparison
118
+ comparison = {}
159
119
 
160
- except Exception as e:
161
- return {"error": f"Error comparing experiment scores: {str(e)}"}
162
- finally:
163
- db.close()
120
+ for exp in experiments:
121
+ for model_sel in exp.experiment.model_selections:
122
+
123
+ if model_sel.best_score:
124
+
125
+ scores = {
126
+ "rmse": model_sel.best_score["rmse"],
127
+ "logloss": model_sel.best_score["logloss"],
128
+ "accuracy": model_sel.best_score["accuracy"],
129
+ "f1": model_sel.best_score["f1"],
130
+ "roc_auc": model_sel.best_score["roc_auc"],
131
+ }
132
+ target_name = model_sel.target.name
133
+
134
+ comparison[exp.experiment.name][target_name] = scores
135
+ else:
136
+ logger.warning(
137
+ f"No best score found for experiment {exp.experiment.name} and target {model_sel.target.name}"
138
+ )
139
+
140
+ return comparison
164
141
 
165
142
  def list_experiments(
166
143
  self, name: str = None, limit: int = 1000
@@ -331,12 +308,12 @@ class ExperimentEngine:
331
308
  def feature_engineering(self, data, for_training=True):
332
309
  app = FeatureEngineeringEngine(
333
310
  data=data,
334
- columns_drop=self.columns_drop,
335
- columns_boolean=self.columns_boolean,
336
- columns_date=self.columns_date,
337
- columns_te_groupby=self.columns_te_groupby,
338
- columns_te_target=self.columns_te_target,
339
- for_training=for_training,
311
+ columns_drop=getattr(self, "columns_drop", []),
312
+ columns_boolean=getattr(self, "columns_boolean", []),
313
+ columns_date=getattr(self, "columns_date", []),
314
+ columns_te_groupby=getattr(self, "columns_te_groupby", []),
315
+ columns_te_target=getattr(self, "columns_te_target", []),
316
+ for_training=getattr(self, "for_training", True),
340
317
  )
341
318
  data = app.run()
342
319
  return data
@@ -344,21 +321,21 @@ class ExperimentEngine:
344
321
  def preprocess_feature(self, data, for_training=True):
345
322
  app = PreprocessFeature(
346
323
  data=data,
347
- experiment=self.experiment,
348
- time_series=self.time_series,
349
- date_column=self.date_column,
350
- group_column=self.group_column,
351
- val_size=self.val_size,
352
- test_size=self.test_size,
353
- columns_pca=self.columns_pca,
354
- pca_temporal=self.pca_temporal,
355
- pca_cross_sectional=self.pca_cross_sectional,
356
- columns_onehot=self.columns_onehot,
357
- columns_binary=self.columns_binary,
358
- columns_frequency=self.columns_frequency,
359
- columns_ordinal=self.columns_ordinal,
360
- target_numbers=self.target_numbers,
361
- target_clf=self.target_clf,
324
+ experiment=getattr(self, "experiment", None),
325
+ time_series=getattr(self, "time_series", False),
326
+ date_column=getattr(self, "date_column", None),
327
+ group_column=getattr(self, "group_column", None),
328
+ val_size=getattr(self, "val_size", 0.2),
329
+ test_size=getattr(self, "test_size", 0.2),
330
+ columns_pca=getattr(self, "columns_pca", []),
331
+ pca_temporal=getattr(self, "pca_temporal", []),
332
+ pca_cross_sectional=getattr(self, "pca_cross_sectional", []),
333
+ columns_onehot=getattr(self, "columns_onehot", []),
334
+ columns_binary=getattr(self, "columns_binary", []),
335
+ columns_ordinal=getattr(self, "columns_ordinal", []),
336
+ columns_frequency=getattr(self, "columns_frequency", []),
337
+ target_numbers=getattr(self, "target_numbers", []),
338
+ target_clf=getattr(self, "target_clf", []),
362
339
  )
363
340
  if for_training:
364
341
  train, val, test = app.run()
@@ -390,14 +367,14 @@ class ExperimentEngine:
390
367
  train=train,
391
368
  val=val,
392
369
  test=test,
393
- experiment=self.experiment,
394
- target_numbers=self.target_numbers,
395
- target_clf=self.target_clf,
396
- models_idx=self.models_idx,
397
- time_series=self.time_series,
398
- max_timesteps=self.max_timesteps,
399
- date_column=self.date_column,
400
- group_column=self.group_column,
370
+ experiment=getattr(self, "experiment", None),
371
+ target_numbers=getattr(self, "target_numbers", []),
372
+ target_clf=getattr(self, "target_clf", []),
373
+ models_idx=getattr(self, "models_idx", []),
374
+ time_series=getattr(self, "time_series", False),
375
+ max_timesteps=getattr(self, "max_timesteps", 120),
376
+ date_column=getattr(self, "date_column", None),
377
+ group_column=getattr(self, "group_column", None),
401
378
  )
402
379
  if for_training:
403
380
  data, reshaped_data = app.run()
@@ -412,13 +389,13 @@ class ExperimentEngine:
412
389
  data=data,
413
390
  reshaped_data=reshaped_data,
414
391
  target_number=target_number,
415
- experiment=self.experiment,
416
- target_clf=self.target_clf,
417
- models_idx=self.models_idx,
418
- time_series=self.time_series,
419
- date_column=self.date_column,
420
- group_column=self.group_column,
421
- target_clf_thresholds=self.target_clf_thresholds,
392
+ experiment=getattr(self, "experiment", None),
393
+ target_clf=getattr(self, "target_clf", []),
394
+ models_idx=getattr(self, "models_idx", []),
395
+ time_series=getattr(self, "time_series", False),
396
+ date_column=getattr(self, "date_column", None),
397
+ group_column=getattr(self, "group_column", None),
398
+ target_clf_thresholds=getattr(self, "target_clf_thresholds", {}),
422
399
  )
423
400
  if best_params and target_number not in best_params.keys():
424
401
  raise ValueError(
@@ -0,0 +1,30 @@
1
+ """add best_score to model selection
2
+
3
+ Revision ID: 7ed9963e732f
4
+ Revises: 72aa496ca65b
5
+ Create Date: 2025-08-25 14:34:58.866912
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = '7ed9963e732f'
16
+ down_revision: Union[str, None] = '72aa496ca65b'
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ op.add_column('lecrapaud_model_selections', sa.Column('best_score', sa.JSON(), nullable=True))
24
+ # ### end Alembic commands ###
25
+
26
+
27
+ def downgrade() -> None:
28
+ # ### commands auto generated by Alembic - please adjust! ###
29
+ op.drop_column('lecrapaud_model_selections', 'best_score')
30
+ # ### end Alembic commands ###
@@ -25,6 +25,7 @@ from lecrapaud.db.models.score import Score
25
25
 
26
26
  from lecrapaud.db.models.base import Base, with_db
27
27
  from lecrapaud.db.models.utils import create_association_table
28
+ from lecrapaud.utils import logger
28
29
 
29
30
  # jointures
30
31
  lecrapaud_experiment_target_association = create_association_table(
@@ -241,7 +242,8 @@ class Experiment(Base):
241
242
  # This ensures we're comparing apples to apples by normalizing the scores
242
243
  experiments = db.query(cls).filter(cls.name.ilike(f"%{name}%")).all()
243
244
  if not experiments:
244
- raise ValueError(f"No experiments found with the given name: {name}")
245
+ logger.error(f"No experiments found with the given name: {name}")
246
+ return None
245
247
 
246
248
  # Get all scores
247
249
  rmse_scores = [e.avg_rmse for e in experiments if e.avg_rmse is not None]
@@ -250,9 +252,10 @@ class Experiment(Base):
250
252
  ]
251
253
 
252
254
  if not rmse_scores or not logloss_scores:
253
- raise ValueError(
255
+ logger.error(
254
256
  "No experiments found with both RMSE and LogLoss scores. Maybe try with only one metric."
255
257
  )
258
+ return None
256
259
 
257
260
  # Normalize scores (subtract min and divide by range)
258
261
  min_rmse = min(rmse_scores)
@@ -306,80 +309,90 @@ class Experiment(Base):
306
309
  def best_score(self, target_number: int) -> dict:
307
310
  """
308
311
  Returns the scores for the best model of the specified target.
309
-
312
+
310
313
  Args:
311
314
  target_number (int): The target number to get scores for
312
-
315
+
313
316
  Returns:
314
317
  dict: A dictionary containing the experiment name, target number, and the best model's scores
315
318
  """
316
319
  # Find the target
317
320
  target_name = f"TARGET_{target_number}"
318
321
  target = next((t for t in self.targets if t.name == target_name), None)
319
-
322
+
320
323
  if not target:
321
324
  return {
322
- 'experiment_name': self.name,
323
- 'target_number': target_number,
324
- 'error': f'Target {target_name} not found in this experiment',
325
- 'scores': {}
325
+ "experiment_name": self.name,
326
+ "target_number": target_number,
327
+ "error": f"Target {target_name} not found in this experiment",
328
+ "scores": {},
326
329
  }
327
-
330
+
328
331
  # Find the best model selection for this target
329
332
  best_model_selection = next(
330
- (ms for ms in self.model_selections if ms.target_id == target.id),
331
- None
333
+ (ms for ms in self.model_selections if ms.target_id == target.id), None
332
334
  )
333
-
335
+
334
336
  if not best_model_selection or not best_model_selection.model_trainings:
335
337
  return {
336
- 'experiment_name': self.name,
337
- 'target_number': target_number,
338
- 'error': 'No model found for this target',
339
- 'scores': {}
338
+ "experiment_name": self.name,
339
+ "target_number": target_number,
340
+ "error": "No model found for this target",
341
+ "scores": {},
340
342
  }
341
-
343
+
342
344
  # Get the best model training (assuming the first one is the best)
343
345
  best_training = best_model_selection.model_trainings[0]
344
-
346
+
345
347
  # Get the validation score for this training
346
- validation_scores = [s for s in best_training.score if s.type == 'validation']
347
-
348
+ validation_scores = [s for s in best_training.score if s.type == "validation"]
349
+
348
350
  if not validation_scores:
349
351
  return {
350
- 'experiment_name': self.name,
351
- 'target_number': target_number,
352
- 'error': 'No validation scores found for the best model',
353
- 'scores': {}
352
+ "experiment_name": self.name,
353
+ "target_number": target_number,
354
+ "error": "No validation scores found for the best model",
355
+ "scores": {},
354
356
  }
355
-
357
+
356
358
  # Get all available metrics from the first validation score
357
359
  score = validation_scores[0]
358
360
  available_metrics = [
359
- 'rmse', 'mae', 'r2', 'logloss', 'accuracy',
360
- 'precision', 'recall', 'f1', 'roc_auc'
361
+ "rmse",
362
+ "mae",
363
+ "r2",
364
+ "logloss",
365
+ "accuracy",
366
+ "precision",
367
+ "recall",
368
+ "f1",
369
+ "roc_auc",
361
370
  ]
362
-
371
+
363
372
  scores = {}
364
373
  for metric in available_metrics:
365
374
  value = getattr(score, metric, None)
366
375
  if value is not None:
367
376
  scores[metric] = value
368
-
377
+
369
378
  # Get the model info
370
379
  model_info = {
371
- 'model_type': best_training.model.model_type if best_training.model else 'unknown',
372
- 'model_name': best_training.model.name if best_training.model else 'unknown',
373
- 'training_time_seconds': best_training.training_time
380
+ "model_type": (
381
+ best_training.model.model_type if best_training.model else "unknown"
382
+ ),
383
+ "model_name": (
384
+ best_training.model.name if best_training.model else "unknown"
385
+ ),
386
+ "training_time_seconds": best_training.training_time,
374
387
  }
375
-
388
+
376
389
  return {
377
- 'experiment_name': self.name,
378
- 'target_number': target_number,
379
- 'model': model_info,
380
- 'scores': scores
390
+ "experiment_name": self.name,
391
+ "target_number": target_number,
392
+ "model": model_info,
393
+ "scores": scores,
381
394
  }
382
-
395
+
383
396
  def get_features(self, target_number: int):
384
397
  targets = [t for t in self.targets if t.name == f"TARGET_{target_number}"]
385
398
  if targets:
@@ -36,6 +36,7 @@ class ModelSelection(Base):
36
36
  )
37
37
  best_model_params = Column(JSON)
38
38
  best_thresholds = Column(JSON)
39
+ best_score = Column(JSON)
39
40
  best_model_path = Column(String(255))
40
41
  best_model_id = Column(
41
42
  BigInteger,
@@ -319,8 +319,8 @@ class PreprocessFeature:
319
319
  val_size: float = 0.2,
320
320
  test_size: float = 0.2,
321
321
  columns_pca: list[str] = [],
322
- pca_temporal: dict[str, list[str]] = {},
323
- pca_cross_sectional: dict[str, list[str]] = {},
322
+ pca_temporal: list[dict[str, list[str]]] = [],
323
+ pca_cross_sectional: list[dict[str, list[str]]] = [],
324
324
  columns_onehot: list[str] = [],
325
325
  columns_binary: list[str] = [],
326
326
  columns_ordinal: list[str] = [],
@@ -1093,6 +1093,7 @@ class ModelSelectionEngine:
1093
1093
  best_model_params = json.load(f)[best_model_name]
1094
1094
 
1095
1095
  # Save model_selection results to db
1096
+
1096
1097
  model_selection = ModelSelection.get(model_selection.id)
1097
1098
  model_selection.best_model_id = Model.find_by(
1098
1099
  name=best_score_overall["MODEL_NAME"], type=self.target_type
@@ -1100,6 +1101,17 @@ class ModelSelectionEngine:
1100
1101
  model_selection.best_model_params = best_model_params
1101
1102
  model_selection.best_thresholds = best_thresholds
1102
1103
  model_selection.best_model_path = best_model_path
1104
+
1105
+ drop_cols = [
1106
+ "DATE",
1107
+ "MODEL_NAME",
1108
+ "MODEL_PATH",
1109
+ ]
1110
+ best_score_overall = {
1111
+ k: v for k, v in best_score_overall.items() if k not in drop_cols
1112
+ }
1113
+ score_data = {k.lower(): v for k, v in best_score_overall.items()}
1114
+ model_selection.best_score = score_data
1103
1115
  model_selection.save()
1104
1116
 
1105
1117
  logger.info(f"Best model overall is : {best_score_overall}")
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lecrapaud"
3
- version = "0.17.0"
3
+ version = "0.18.0"
4
4
  description = "Framework for machine and deep learning, with regression, classification and time series analysis"
5
5
  authors = [
6
6
  {name = "Pierre H. Gallet"}
File without changes
File without changes