lecrapaud 0.16.7__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

lecrapaud/api.py CHANGED
@@ -102,63 +102,43 @@ class LeCrapaud:
102
102
 
103
103
  def compare_experiment_scores(self, name: str):
104
104
  """Compare scores of experiments with matching names.
105
-
105
+
106
106
  Args:
107
107
  name (str): Name or partial name of experiments to compare
108
-
108
+
109
109
  Returns:
110
110
  dict: Dictionary containing experiment names as keys and their scores as values
111
111
  """
112
- from lecrapaud.db import SessionLocal
113
- from sqlalchemy.orm import joinedload
114
-
115
- db = SessionLocal()
116
- try:
117
- # Get all experiments with the given name pattern
118
- experiments = (
119
- db.query(Experiment)
120
- .options(joinedload(Experiment.model_selections)
121
- .joinedload(ModelSelection.scores))
122
- .filter(Experiment.name.ilike(f"%{name}%"))
123
- .all()
124
- )
125
-
126
- if not experiments:
127
- return {"error": f"No experiments found with name containing '{name}'"}
128
-
129
- comparison = {}
130
-
131
- for exp in experiments:
132
- scores = {
133
- "rmse": exp.avg_rmse,
134
- "logloss": exp.avg_logloss,
135
- "accuracy": None,
136
- "f1": None,
137
- "roc_auc": None
138
- }
139
-
140
- # Get classification metrics from the first model selection with scores
141
- for model_sel in exp.model_selections:
142
- if model_sel.scores:
143
- for score in model_sel.scores:
144
- if score.type == 'validation': # Use validation scores
145
- if score.accuracy is not None:
146
- scores["accuracy"] = score.accuracy
147
- if score.f1 is not None:
148
- scores["f1"] = score.f1
149
- if score.roc_auc is not None:
150
- scores["roc_auc"] = score.roc_auc
151
- break
152
-
153
- comparison[exp.name] = scores
154
-
155
- return comparison
156
-
157
- except Exception as e:
158
- return {"error": f"Error comparing experiment scores: {str(e)}"}
159
- finally:
160
- db.close()
161
-
112
+ # Get all experiments with the given name pattern
113
+ experiments = self.list_experiments(name=name)
114
+
115
+ if not experiments:
116
+ return {"error": f"No experiments found with name containing '{name}'"}
117
+
118
+ comparison = {}
119
+
120
+ for exp in experiments:
121
+ for model_sel in exp.experiment.model_selections:
122
+
123
+ if model_sel.best_score:
124
+
125
+ scores = {
126
+ "rmse": model_sel.best_score["rmse"],
127
+ "logloss": model_sel.best_score["logloss"],
128
+ "accuracy": model_sel.best_score["accuracy"],
129
+ "f1": model_sel.best_score["f1"],
130
+ "roc_auc": model_sel.best_score["roc_auc"],
131
+ }
132
+ target_name = model_sel.target.name
133
+
134
+ comparison[exp.experiment.name][target_name] = scores
135
+ else:
136
+ logger.warning(
137
+ f"No best score found for experiment {exp.experiment.name} and target {model_sel.target.name}"
138
+ )
139
+
140
+ return comparison
141
+
162
142
  def list_experiments(
163
143
  self, name: str = None, limit: int = 1000
164
144
  ) -> list["ExperimentEngine"]:
@@ -328,12 +308,12 @@ class ExperimentEngine:
328
308
  def feature_engineering(self, data, for_training=True):
329
309
  app = FeatureEngineeringEngine(
330
310
  data=data,
331
- columns_drop=self.columns_drop,
332
- columns_boolean=self.columns_boolean,
333
- columns_date=self.columns_date,
334
- columns_te_groupby=self.columns_te_groupby,
335
- columns_te_target=self.columns_te_target,
336
- for_training=for_training,
311
+ columns_drop=getattr(self, "columns_drop", []),
312
+ columns_boolean=getattr(self, "columns_boolean", []),
313
+ columns_date=getattr(self, "columns_date", []),
314
+ columns_te_groupby=getattr(self, "columns_te_groupby", []),
315
+ columns_te_target=getattr(self, "columns_te_target", []),
316
+ for_training=getattr(self, "for_training", True),
337
317
  )
338
318
  data = app.run()
339
319
  return data
@@ -341,19 +321,21 @@ class ExperimentEngine:
341
321
  def preprocess_feature(self, data, for_training=True):
342
322
  app = PreprocessFeature(
343
323
  data=data,
344
- experiment=self.experiment,
345
- time_series=self.time_series,
346
- date_column=self.date_column,
347
- group_column=self.group_column,
348
- val_size=self.val_size,
349
- test_size=self.test_size,
350
- columns_pca=self.columns_pca,
351
- columns_onehot=self.columns_onehot,
352
- columns_binary=self.columns_binary,
353
- columns_frequency=self.columns_frequency,
354
- columns_ordinal=self.columns_ordinal,
355
- target_numbers=self.target_numbers,
356
- target_clf=self.target_clf,
324
+ experiment=getattr(self, "experiment", None),
325
+ time_series=getattr(self, "time_series", False),
326
+ date_column=getattr(self, "date_column", None),
327
+ group_column=getattr(self, "group_column", None),
328
+ val_size=getattr(self, "val_size", 0.2),
329
+ test_size=getattr(self, "test_size", 0.2),
330
+ columns_pca=getattr(self, "columns_pca", []),
331
+ pca_temporal=getattr(self, "pca_temporal", []),
332
+ pca_cross_sectional=getattr(self, "pca_cross_sectional", []),
333
+ columns_onehot=getattr(self, "columns_onehot", []),
334
+ columns_binary=getattr(self, "columns_binary", []),
335
+ columns_ordinal=getattr(self, "columns_ordinal", []),
336
+ columns_frequency=getattr(self, "columns_frequency", []),
337
+ target_numbers=getattr(self, "target_numbers", []),
338
+ target_clf=getattr(self, "target_clf", []),
357
339
  )
358
340
  if for_training:
359
341
  train, val, test = app.run()
@@ -385,14 +367,14 @@ class ExperimentEngine:
385
367
  train=train,
386
368
  val=val,
387
369
  test=test,
388
- experiment=self.experiment,
389
- target_numbers=self.target_numbers,
390
- target_clf=self.target_clf,
391
- models_idx=self.models_idx,
392
- time_series=self.time_series,
393
- max_timesteps=self.max_timesteps,
394
- date_column=self.date_column,
395
- group_column=self.group_column,
370
+ experiment=getattr(self, "experiment", None),
371
+ target_numbers=getattr(self, "target_numbers", []),
372
+ target_clf=getattr(self, "target_clf", []),
373
+ models_idx=getattr(self, "models_idx", []),
374
+ time_series=getattr(self, "time_series", False),
375
+ max_timesteps=getattr(self, "max_timesteps", 120),
376
+ date_column=getattr(self, "date_column", None),
377
+ group_column=getattr(self, "group_column", None),
396
378
  )
397
379
  if for_training:
398
380
  data, reshaped_data = app.run()
@@ -407,13 +389,13 @@ class ExperimentEngine:
407
389
  data=data,
408
390
  reshaped_data=reshaped_data,
409
391
  target_number=target_number,
410
- experiment=self.experiment,
411
- target_clf=self.target_clf,
412
- models_idx=self.models_idx,
413
- time_series=self.time_series,
414
- date_column=self.date_column,
415
- group_column=self.group_column,
416
- target_clf_thresholds=self.target_clf_thresholds,
392
+ experiment=getattr(self, "experiment", None),
393
+ target_clf=getattr(self, "target_clf", []),
394
+ models_idx=getattr(self, "models_idx", []),
395
+ time_series=getattr(self, "time_series", False),
396
+ date_column=getattr(self, "date_column", None),
397
+ group_column=getattr(self, "group_column", None),
398
+ target_clf_thresholds=getattr(self, "target_clf_thresholds", {}),
417
399
  )
418
400
  if best_params and target_number not in best_params.keys():
419
401
  raise ValueError(
@@ -0,0 +1,30 @@
1
+ """add best_score to model selection
2
+
3
+ Revision ID: 7ed9963e732f
4
+ Revises: 72aa496ca65b
5
+ Create Date: 2025-08-25 14:34:58.866912
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = '7ed9963e732f'
16
+ down_revision: Union[str, None] = '72aa496ca65b'
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ op.add_column('lecrapaud_model_selections', sa.Column('best_score', sa.JSON(), nullable=True))
24
+ # ### end Alembic commands ###
25
+
26
+
27
+ def downgrade() -> None:
28
+ # ### commands auto generated by Alembic - please adjust! ###
29
+ op.drop_column('lecrapaud_model_selections', 'best_score')
30
+ # ### end Alembic commands ###
@@ -25,6 +25,7 @@ from lecrapaud.db.models.score import Score
25
25
 
26
26
  from lecrapaud.db.models.base import Base, with_db
27
27
  from lecrapaud.db.models.utils import create_association_table
28
+ from lecrapaud.utils import logger
28
29
 
29
30
  # jointures
30
31
  lecrapaud_experiment_target_association = create_association_table(
@@ -241,7 +242,8 @@ class Experiment(Base):
241
242
  # This ensures we're comparing apples to apples by normalizing the scores
242
243
  experiments = db.query(cls).filter(cls.name.ilike(f"%{name}%")).all()
243
244
  if not experiments:
244
- raise ValueError(f"No experiments found with the given name: {name}")
245
+ logger.error(f"No experiments found with the given name: {name}")
246
+ return None
245
247
 
246
248
  # Get all scores
247
249
  rmse_scores = [e.avg_rmse for e in experiments if e.avg_rmse is not None]
@@ -250,9 +252,10 @@ class Experiment(Base):
250
252
  ]
251
253
 
252
254
  if not rmse_scores or not logloss_scores:
253
- raise ValueError(
255
+ logger.error(
254
256
  "No experiments found with both RMSE and LogLoss scores. Maybe try with only one metric."
255
257
  )
258
+ return None
256
259
 
257
260
  # Normalize scores (subtract min and divide by range)
258
261
  min_rmse = min(rmse_scores)
@@ -306,80 +309,90 @@ class Experiment(Base):
306
309
  def best_score(self, target_number: int) -> dict:
307
310
  """
308
311
  Returns the scores for the best model of the specified target.
309
-
312
+
310
313
  Args:
311
314
  target_number (int): The target number to get scores for
312
-
315
+
313
316
  Returns:
314
317
  dict: A dictionary containing the experiment name, target number, and the best model's scores
315
318
  """
316
319
  # Find the target
317
320
  target_name = f"TARGET_{target_number}"
318
321
  target = next((t for t in self.targets if t.name == target_name), None)
319
-
322
+
320
323
  if not target:
321
324
  return {
322
- 'experiment_name': self.name,
323
- 'target_number': target_number,
324
- 'error': f'Target {target_name} not found in this experiment',
325
- 'scores': {}
325
+ "experiment_name": self.name,
326
+ "target_number": target_number,
327
+ "error": f"Target {target_name} not found in this experiment",
328
+ "scores": {},
326
329
  }
327
-
330
+
328
331
  # Find the best model selection for this target
329
332
  best_model_selection = next(
330
- (ms for ms in self.model_selections if ms.target_id == target.id),
331
- None
333
+ (ms for ms in self.model_selections if ms.target_id == target.id), None
332
334
  )
333
-
335
+
334
336
  if not best_model_selection or not best_model_selection.model_trainings:
335
337
  return {
336
- 'experiment_name': self.name,
337
- 'target_number': target_number,
338
- 'error': 'No model found for this target',
339
- 'scores': {}
338
+ "experiment_name": self.name,
339
+ "target_number": target_number,
340
+ "error": "No model found for this target",
341
+ "scores": {},
340
342
  }
341
-
343
+
342
344
  # Get the best model training (assuming the first one is the best)
343
345
  best_training = best_model_selection.model_trainings[0]
344
-
346
+
345
347
  # Get the validation score for this training
346
- validation_scores = [s for s in best_training.score if s.type == 'validation']
347
-
348
+ validation_scores = [s for s in best_training.score if s.type == "validation"]
349
+
348
350
  if not validation_scores:
349
351
  return {
350
- 'experiment_name': self.name,
351
- 'target_number': target_number,
352
- 'error': 'No validation scores found for the best model',
353
- 'scores': {}
352
+ "experiment_name": self.name,
353
+ "target_number": target_number,
354
+ "error": "No validation scores found for the best model",
355
+ "scores": {},
354
356
  }
355
-
357
+
356
358
  # Get all available metrics from the first validation score
357
359
  score = validation_scores[0]
358
360
  available_metrics = [
359
- 'rmse', 'mae', 'r2', 'logloss', 'accuracy',
360
- 'precision', 'recall', 'f1', 'roc_auc'
361
+ "rmse",
362
+ "mae",
363
+ "r2",
364
+ "logloss",
365
+ "accuracy",
366
+ "precision",
367
+ "recall",
368
+ "f1",
369
+ "roc_auc",
361
370
  ]
362
-
371
+
363
372
  scores = {}
364
373
  for metric in available_metrics:
365
374
  value = getattr(score, metric, None)
366
375
  if value is not None:
367
376
  scores[metric] = value
368
-
377
+
369
378
  # Get the model info
370
379
  model_info = {
371
- 'model_type': best_training.model.model_type if best_training.model else 'unknown',
372
- 'model_name': best_training.model.name if best_training.model else 'unknown',
373
- 'training_time_seconds': best_training.training_time
380
+ "model_type": (
381
+ best_training.model.model_type if best_training.model else "unknown"
382
+ ),
383
+ "model_name": (
384
+ best_training.model.name if best_training.model else "unknown"
385
+ ),
386
+ "training_time_seconds": best_training.training_time,
374
387
  }
375
-
388
+
376
389
  return {
377
- 'experiment_name': self.name,
378
- 'target_number': target_number,
379
- 'model': model_info,
380
- 'scores': scores
390
+ "experiment_name": self.name,
391
+ "target_number": target_number,
392
+ "model": model_info,
393
+ "scores": scores,
381
394
  }
382
-
395
+
383
396
  def get_features(self, target_number: int):
384
397
  targets = [t for t in self.targets if t.name == f"TARGET_{target_number}"]
385
398
  if targets:
@@ -36,6 +36,7 @@ class ModelSelection(Base):
36
36
  )
37
37
  best_model_params = Column(JSON)
38
38
  best_thresholds = Column(JSON)
39
+ best_score = Column(JSON)
39
40
  best_model_path = Column(String(255))
40
41
  best_model_id = Column(
41
42
  BigInteger,
@@ -52,6 +52,9 @@ import os
52
52
 
53
53
  from sklearn.compose import ColumnTransformer
54
54
  from sklearn.decomposition import PCA
55
+ from sklearn.impute import SimpleImputer
56
+ from sklearn.preprocessing import StandardScaler
57
+ from sklearn.pipeline import Pipeline
55
58
  from category_encoders import BinaryEncoder, CountEncoder
56
59
  from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
57
60
  from sklearn.model_selection import train_test_split
@@ -316,6 +319,8 @@ class PreprocessFeature:
316
319
  val_size: float = 0.2,
317
320
  test_size: float = 0.2,
318
321
  columns_pca: list[str] = [],
322
+ pca_temporal: list[dict[str, list[str]]] = [],
323
+ pca_cross_sectional: list[dict[str, list[str]]] = [],
319
324
  columns_onehot: list[str] = [],
320
325
  columns_binary: list[str] = [],
321
326
  columns_ordinal: list[str] = [],
@@ -329,6 +334,8 @@ class PreprocessFeature:
329
334
 
330
335
  self.experiment = experiment
331
336
  self.columns_pca = [col.upper() for col in columns_pca]
337
+ self.pca_temporal = pca_temporal
338
+ self.pca_cross_sectional = pca_cross_sectional
332
339
  self.columns_onehot = [col.upper() for col in columns_onehot]
333
340
  self.columns_binary = [col.upper() for col in columns_binary]
334
341
  self.columns_ordinal = [col.upper() for col in columns_ordinal]
@@ -364,6 +371,20 @@ class PreprocessFeature:
364
371
 
365
372
  joblib.dump(pcas, f"{self.preprocessing_dir}/pcas.pkl")
366
373
 
374
+ train, pcas_cross_sectional = self.add_pca_feature_cross_sectional(train)
375
+ val, _ = self.add_pca_feature_cross_sectional(val, pcas=pcas_cross_sectional)
376
+ test, _ = self.add_pca_feature_cross_sectional(test, pcas=pcas_cross_sectional)
377
+
378
+ joblib.dump(
379
+ pcas_cross_sectional, f"{self.preprocessing_dir}/pcas_cross_sectional.pkl"
380
+ )
381
+
382
+ train, pcas_temporal = self.add_pca_feature_temporal(train)
383
+ val, _ = self.add_pca_feature_temporal(val, pcas=pcas_temporal)
384
+ test, _ = self.add_pca_feature_temporal(test, pcas=pcas_temporal)
385
+
386
+ joblib.dump(pcas_temporal, f"{self.preprocessing_dir}/pcas_temporal.pkl")
387
+
367
388
  # Save all features before encoding
368
389
  joblib.dump(
369
390
  list(train.columns),
@@ -402,6 +423,18 @@ class PreprocessFeature:
402
423
  pcas = joblib.load(f"{self.preprocessing_dir}/pcas.pkl")
403
424
  data, _ = self.add_pca_features(data, pcas=pcas)
404
425
 
426
+ if os.path.exists(f"{self.preprocessing_dir}/pcas_cross_sectional.pkl"):
427
+ pcas_cross_sectional = joblib.load(
428
+ f"{self.preprocessing_dir}/pcas_cross_sectional.pkl"
429
+ )
430
+ data, _ = self.add_pca_feature_cross_sectional(
431
+ data, pcas=pcas_cross_sectional
432
+ )
433
+
434
+ if os.path.exists(f"{self.preprocessing_dir}/pcas_temporal.pkl"):
435
+ pcas_temporal = joblib.load(f"{self.preprocessing_dir}/pcas_temporal.pkl")
436
+ data, _ = self.add_pca_feature_temporal(data, pcas=pcas_temporal)
437
+
405
438
  # Encoding
406
439
  transformer = joblib.load(f"{self.preprocessing_dir}/column_transformer.pkl")
407
440
  data, _ = self.encode_categorical_features(
@@ -577,6 +610,120 @@ class PreprocessFeature:
577
610
 
578
611
  return df, pcas_dict
579
612
 
613
+ def add_pca_feature_cross_sectional(
614
+ self,
615
+ df: pd.DataFrame,
616
+ *,
617
+ n_components: int = 5,
618
+ pcas: dict[str, Pipeline] | None = None, # si fourni: transform only
619
+ impute_strategy: str = "median",
620
+ standardize: bool = True,
621
+ ) -> tuple[pd.DataFrame, dict[str, Pipeline]]:
622
+ """
623
+ Construit un pivot (index=index_col, columns=columns_col, values=value_col),
624
+ fit (ou réutilise) un Pipeline Imputer(+Scaler)+PCA, puis merge les scores
625
+ (par index_col) dans df. Renvoie (df_avec_features, pipe).
626
+ """
627
+
628
+ pcas_dict = {}
629
+
630
+ for pca_cross_sectional in self.pca_cross_sectional:
631
+ name, index_col, columns_col, value_col = (
632
+ pca_cross_sectional[k] for k in ("name", "index", "columns", "value")
633
+ )
634
+ prefix = f"CS_PC_{name}"
635
+
636
+ pivot = df.pivot_table(
637
+ index=index_col, columns=columns_col, values=value_col
638
+ ).sort_index()
639
+
640
+ # Pipeline à réutiliser entre train et test
641
+ if pcas is None:
642
+ steps = [("imputer", SimpleImputer(strategy=impute_strategy))]
643
+ if standardize:
644
+ steps.append(
645
+ ("scaler", StandardScaler(with_mean=True, with_std=True))
646
+ )
647
+ pca = PCA(n_components=n_components, random_state=0)
648
+ steps.append(("pca", pca))
649
+ pipe = Pipeline(steps)
650
+ pipe.fit(pivot) # <- fit sur TRAIN uniquement
651
+ else:
652
+ pipe = pcas[name] # <- TEST : on réutilise le pipe existant
653
+
654
+ scores = pipe.transform(pivot) # shape: (n_index, n_components)
655
+ cols = [f"{prefix}_{i}" for i in range(n_components)]
656
+ scores_df = pd.DataFrame(scores, index=pivot.index, columns=cols)
657
+
658
+ df = df.merge(scores_df.reset_index(), on=index_col, how="left")
659
+ pcas_dict.update({name: pipe})
660
+
661
+ return df, pcas_dict
662
+
663
+ # ----------------- 2) PCA TEMPORELLE (liste de colonnes lags) ----------------
664
+ def add_pca_feature_temporal(
665
+ self,
666
+ df: pd.DataFrame,
667
+ *,
668
+ n_components: int = 5,
669
+ pcas: dict[str, Pipeline] | None = None, # si fourni: transform only
670
+ impute_strategy: (
671
+ str | None
672
+ ) = None, # None = on exige toutes les colonnes présentes
673
+ standardize: bool = True,
674
+ ) -> tuple[pd.DataFrame, dict[str, Pipeline]]:
675
+ """
676
+ Applique une PCA sur une matrice (rows = lignes df, cols = lags).
677
+ Fit le Pipeline sur TRAIN si pcas=None; sinon, utilise pcas et fait transform.
678
+ Ajoute les colonnes f"{prefix}_{i}" dans df. Renvoie (df, pipe).
679
+ """
680
+ pcas_dict = {}
681
+
682
+ for pca_temporal in self.pca_temporal:
683
+ name, cols = (pca_temporal[k] for k in ("name", "columns"))
684
+ prefix = f"TMP_PC_{name}"
685
+
686
+ # Masque des lignes utilisables
687
+ if impute_strategy is None:
688
+ mask = (
689
+ df[cols].notna().all(axis=1)
690
+ ) # on n'impute pas → lignes complètes
691
+ X_fit = df.loc[mask, cols]
692
+ else:
693
+ mask = df[cols].notna().any(axis=1) # on imputera → au moins une valeur
694
+ X_fit = df.loc[mask, cols]
695
+
696
+ # Pipeline
697
+ if pcas is None:
698
+ steps = []
699
+ if impute_strategy is not None:
700
+ steps.append(("imputer", SimpleImputer(strategy=impute_strategy)))
701
+ if standardize:
702
+ steps.append(
703
+ ("scaler", StandardScaler(with_mean=True, with_std=True))
704
+ )
705
+ pca = PCA(n_components=n_components, random_state=0)
706
+ steps.append(("pca", pca))
707
+ pipe = Pipeline(steps)
708
+ if not X_fit.empty:
709
+ pipe.fit(X_fit) # <- fit sur TRAIN uniquement
710
+ else:
711
+ pipe = pcas[name] # <- TEST
712
+
713
+ # Transform uniquement sur lignes valides (mask)
714
+ if not df.loc[mask, cols].empty:
715
+ Z = pipe.transform(df.loc[mask, cols])
716
+ for i in range(n_components):
717
+ df.loc[mask, f"{prefix}_{i}"] = Z[:, i]
718
+ else:
719
+ # crée les colonnes vides si aucune ligne valide (cohérence de schéma)
720
+ for i in range(n_components):
721
+ df[f"{prefix}_{i}"] = pd.NA
722
+
723
+ pcas_dict.update({name: pipe})
724
+
725
+ return df, pcas_dict
726
+
580
727
  # encoding categorical features
581
728
  def encode_categorical_features(
582
729
  self,
@@ -1093,6 +1093,7 @@ class ModelSelectionEngine:
1093
1093
  best_model_params = json.load(f)[best_model_name]
1094
1094
 
1095
1095
  # Save model_selection results to db
1096
+
1096
1097
  model_selection = ModelSelection.get(model_selection.id)
1097
1098
  model_selection.best_model_id = Model.find_by(
1098
1099
  name=best_score_overall["MODEL_NAME"], type=self.target_type
@@ -1100,6 +1101,17 @@ class ModelSelectionEngine:
1100
1101
  model_selection.best_model_params = best_model_params
1101
1102
  model_selection.best_thresholds = best_thresholds
1102
1103
  model_selection.best_model_path = best_model_path
1104
+
1105
+ drop_cols = [
1106
+ "DATE",
1107
+ "MODEL_NAME",
1108
+ "MODEL_PATH",
1109
+ ]
1110
+ best_score_overall = {
1111
+ k: v for k, v in best_score_overall.items() if k not in drop_cols
1112
+ }
1113
+ score_data = {k.lower(): v for k, v in best_score_overall.items()}
1114
+ model_selection.best_score = score_data
1103
1115
  model_selection.save()
1104
1116
 
1105
1117
  logger.info(f"Best model overall is : {best_score_overall}")
@@ -1781,7 +1793,17 @@ def find_best_threshold(
1781
1793
  logger.warning(
1782
1794
  f"[Class {cls}] No threshold with precision ≥ {target_value}"
1783
1795
  )
1784
- best_idx = int(np.argmax(precision)) # fallback
1796
+ # fallback: meilleure precision parmi ceux avec recall>0
1797
+ cand = np.where(recall > 0)[0]
1798
+ if cand.size:
1799
+ best_idx = cand[int(np.argmax(precision[cand]))]
1800
+ logger.warning(
1801
+ f"[Class {cls}] Fallback to best precision with recall>0: "
1802
+ f"idx={best_idx}, precision={precision[best_idx]:.4f}, recall={recall[best_idx]:.4f}"
1803
+ )
1804
+ else:
1805
+ logger.error(f"[Class {cls}] No threshold achieves recall>0.")
1806
+ best_idx = None
1785
1807
 
1786
1808
  elif metric == "f1":
1787
1809
  valid_indices = [i for i, val in enumerate(f1) if val >= target_value]
@@ -1795,6 +1817,15 @@ def find_best_threshold(
1795
1817
  else:
1796
1818
  best_idx = int(np.argmax(values)) # no constraint, get best value
1797
1819
 
1820
+ if best_idx is None:
1821
+ results[cls_str] = {
1822
+ "threshold": None,
1823
+ "precision": None,
1824
+ "recall": None,
1825
+ "f1": None,
1826
+ }
1827
+ continue
1828
+
1798
1829
  results[cls_str] = {
1799
1830
  "threshold": float(thresholds[best_idx]),
1800
1831
  "precision": float(precision[best_idx]),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.16.7
3
+ Version: 0.18.0
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -1,5 +1,5 @@
1
1
  lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
2
- lecrapaud/api.py,sha256=XsdK1jywLOOGcMMtx09KtjLpEzzLpFtXfVjJrQSfcH0,22639
2
+ lecrapaud/api.py,sha256=CJJeFvO-5jPRsVpLIgKJ34JpOXqZSs4RowLnPBwxrDs,22463
3
3
  lecrapaud/config.py,sha256=itiqC31HB8i2Xo-kn2viCQrg_9tnA07-TJuZ-xdnx44,1126
4
4
  lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
5
5
  lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
@@ -9,15 +9,16 @@ lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py,sha256=hyPW0Mt_B4
9
9
  lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py,sha256=6Pf36HAXEVrVlnrohAe2O7gVaXpDiv3LLIP_EEgTyA0,917
10
10
  lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py,sha256=KjwjYvFaNqYmBLTYel8As37fyaBtNVWTqN_3M7y_2eI,1357
11
11
  lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py,sha256=MiqooJuZ1etExl2he3MniaEv8G0LrmqY-0m22m9xKmc,943
12
+ lecrapaud/db/alembic/versions/2025_08_25_1434-7ed9963e732f_add_best_score_to_model_selection.py,sha256=dzPelNA8N1f8rxUAF9KeoRx3FPvcTKshgcKyq_woe8c,858
12
13
  lecrapaud/db/alembic.ini,sha256=Zw2rdwsKV6c7J1SPtoFIPDX08_oTP3MuUKnNxBDiY8I,3796
13
14
  lecrapaud/db/models/__init__.py,sha256=Lhyw9fVLdom0Fc6yIP-ip8FjkU1EwVwjae5q2VM815Q,740
14
15
  lecrapaud/db/models/base.py,sha256=J9ew-0z_-tnWAwhVvOmVDys2R6jPF_oSca_ny6wpXQE,7606
15
- lecrapaud/db/models/experiment.py,sha256=ai4M8SXJOrwoZ803FAzlOMAIN0ktnTkowQP5TadFX6c,14594
16
+ lecrapaud/db/models/experiment.py,sha256=rgNpCNXMei5VhJDNKxelpwqv7iTxoPJ2kkffGaua2sA,14710
16
17
  lecrapaud/db/models/feature.py,sha256=5o77O2FyRObnLOCGNj8kaPSGM3pLv1Ov6mXXHYkmnYY,1136
17
18
  lecrapaud/db/models/feature_selection.py,sha256=mk42xuw1Sm_7Pznfg7TNc5_S4hscdw79QgIe3Bt9ZRI,3245
18
19
  lecrapaud/db/models/feature_selection_rank.py,sha256=Ydsb_rAT58FoSH13wkGjGPByzsjPx3DITXgJ2jgZmow,2198
19
20
  lecrapaud/db/models/model.py,sha256=F0hyMjd4FFHCv6_arIWBEmBCGOfG3b6_uzU8ExtFE90,952
20
- lecrapaud/db/models/model_selection.py,sha256=zNCumJvhGLGmjA14_agLQ-ZFgXc_uoXhtmBnxUk5iM8,1971
21
+ lecrapaud/db/models/model_selection.py,sha256=tJuICcporf3TxQHbJbHxnKgkaVc02z2kJJoCYS2nDcw,2001
21
22
  lecrapaud/db/models/model_training.py,sha256=jAIYPdwBln2jf593soLQ730uYrTfNK8zdG8TesOqmh0,1698
22
23
  lecrapaud/db/models/score.py,sha256=fSfXLt6Dm-8Fy9ku0urMT5Fa6zNqn4YqVnEO4o3zKVI,1669
23
24
  lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk,1649
@@ -25,7 +26,7 @@ lecrapaud/db/models/utils.py,sha256=-a-nWWmpJ2XzidIxo2COVUTrGZIPYCfBzjhcszJj_bM,
25
26
  lecrapaud/db/session.py,sha256=E93WXcFFILFAIeH61ft2Egs7D-6caqs0oi4zCkO5Lq4,2822
26
27
  lecrapaud/directories.py,sha256=0LrANuDgbuneSLker60c6q2hmGnQ3mKHIztTGzTx6Gw,826
27
28
  lecrapaud/experiment.py,sha256=1xLWjOrqAxJh9CdXOx9ppQuRFRRj0GH-xYZqg-ty9hI,2463
28
- lecrapaud/feature_engineering.py,sha256=J7lWp-lQmuMiirT9QeuK5bxl2YutilZ1JGHR12i0V64,32790
29
+ lecrapaud/feature_engineering.py,sha256=ey46MqXBC-c-BS6nRA7zo8uafxmDABy5ThIyTfmXoSo,38982
29
30
  lecrapaud/feature_selection.py,sha256=6ry-oVPQHbipm1XSE5YsH7AY0lQFt4CFbWiHiRs1nxg,43593
30
31
  lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
31
32
  lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
@@ -36,10 +37,10 @@ lecrapaud/misc/tabpfn_tests.ipynb,sha256=VkgsCUJ30d8jaL2VaWtQAgb8ngHPNtPgnXLs7QQ
36
37
  lecrapaud/misc/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
37
38
  lecrapaud/misc/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
38
39
  lecrapaud/misc/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
39
- lecrapaud/model_selection.py,sha256=Q7afY0UzFzs2fFEPNXIBxjpabmruxiTmDh5OssPayLk,71139
40
+ lecrapaud/model_selection.py,sha256=QOwOsn1WEBzR-2ZpHvhzv9Qz47delkBdNziHy-auY3o,72302
40
41
  lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
41
42
  lecrapaud/utils.py,sha256=JdBB1NvbNIx4y0Una-kSZdo1_ZEocc5hwyYFIZKHmGg,8305
42
- lecrapaud-0.16.7.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
43
- lecrapaud-0.16.7.dist-info/METADATA,sha256=5NUEvWiw9TIKhDPOlh7WIYXvcsnKErDPMUdayfBfC24,11081
44
- lecrapaud-0.16.7.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
45
- lecrapaud-0.16.7.dist-info/RECORD,,
43
+ lecrapaud-0.18.0.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
44
+ lecrapaud-0.18.0.dist-info/METADATA,sha256=YtnShzFVl8EQOlwPI44gX2T67szZttyD62V1jZ9rFh0,11081
45
+ lecrapaud-0.18.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
46
+ lecrapaud-0.18.0.dist-info/RECORD,,