lecrapaud 0.8.3__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lecrapaud/api.py CHANGED
@@ -108,17 +108,13 @@ class ExperimentEngine:
108
108
  for target_number in self.target_numbers:
109
109
 
110
110
  # loading model
111
- training_target_dir = f"{self.experiment.path}/TARGET_{target_number}"
111
+ target_dir = f"{self.experiment.path}/TARGET_{target_number}"
112
112
  all_features = self.experiment.get_all_features(
113
113
  date_column=self.date_column, group_column=self.group_column
114
114
  )
115
- if self.experiment.name == "data_28_X_X":
116
- features = joblib.load(
117
- f"{self.experiment.path}/preprocessing/features_{target_number}.pkl"
118
- ) # we keep this for backward compatibility
119
- else:
120
- features = self.experiment.get_features(target_number)
121
- model = ModelEngine(path=training_target_dir)
115
+ features = self.experiment.get_features(target_number)
116
+
117
+ model = ModelEngine(path=target_dir)
122
118
 
123
119
  # getting data
124
120
  if model.recurrent:
@@ -1,4 +1,5 @@
1
1
  from itertools import chain
2
+ import joblib
2
3
 
3
4
  from sqlalchemy import (
4
5
  Column,
@@ -106,6 +107,11 @@ class Experiment(Base):
106
107
  fs for fs in feature_selections if fs.target_id == target_id
107
108
  ][0]
108
109
  features = [f.name for f in feature_selection.features]
110
+
111
+ # fallback to path if no features found
112
+ if len(features) == 0:
113
+ features = joblib.load(f"{self.path}/TARGET_{target_number}/features.pkl")
114
+
109
115
  return features
110
116
 
111
117
  def get_all_features(self, date_column: str = None, group_column: str = None):
@@ -115,6 +121,11 @@ class Experiment(Base):
115
121
  for fs in self.feature_selections
116
122
  if fs.target_id in target_idx
117
123
  )
124
+
125
+ # fallback to path if no features found
126
+ if len(_all_features) == 0:
127
+ _all_features = joblib.load(f"{self.path}/preprocessing/all_features.pkl")
128
+
118
129
  all_features = []
119
130
  if date_column:
120
131
  all_features.append(date_column)
lecrapaud/experiment.py CHANGED
@@ -2,6 +2,7 @@ import os
2
2
  from pathlib import Path
3
3
 
4
4
  import pandas as pd
5
+ import joblib
5
6
 
6
7
  # Set up coverage file path
7
8
  os.environ["COVERAGE_FILE"] = str(Path(".coverage").resolve())
@@ -13,7 +14,7 @@ from lecrapaud.db.session import get_db
13
14
 
14
15
 
15
16
  def create_experiment(
16
- data: pd.DataFrame,
17
+ data: pd.DataFrame | str,
17
18
  corr_threshold,
18
19
  percentile,
19
20
  max_features,
@@ -22,6 +23,10 @@ def create_experiment(
22
23
  experiment_name,
23
24
  **kwargs,
24
25
  ):
26
+ if isinstance(data, str):
27
+ path = f"{data}/data/full.pkl" if "data" not in data else f"{data}/full.pkl"
28
+ data = joblib.load(path)
29
+
25
30
  dates = {}
26
31
  if date_column:
27
32
  dates["start_date"] = pd.to_datetime(data[date_column].iat[0])
@@ -54,7 +54,7 @@ from lecrapaud.search_space import all_models
54
54
  warnings.filterwarnings("ignore", category=FutureWarning)
55
55
 
56
56
 
57
- def load_train_data(experiment_dir, target_number, target_type="regression"):
57
+ def load_train_data(experiment_dir):
58
58
  data_dir = f"{experiment_dir}/data"
59
59
 
60
60
  logger.info("Loading data...")
@@ -90,11 +90,9 @@ class FeatureSelectionEngine:
90
90
  self.experiment_dir = self.experiment.path
91
91
  self.experiment_id = self.experiment.id
92
92
  self.data_dir = f"{self.experiment_dir}/data"
93
- self.preprocessing_dir = f"{self.experiment_dir}/preprocessing"
94
- self.fs_dir_target = (
95
- f"{self.experiment_dir}/{f"TARGET_{self.target_number}"}/feature_selection"
96
- )
97
- os.makedirs(self.fs_dir_target, exist_ok=True)
93
+ self.target_dir = f"{self.experiment_dir}/TARGET_{self.target_number}"
94
+ self.feature_selection_dir = f"{self.target_dir}/feature_selection"
95
+ os.makedirs(self.feature_selection_dir, exist_ok=True)
98
96
 
99
97
  # Main feature selection function
100
98
  def run(
@@ -110,7 +108,6 @@ class FeatureSelectionEngine:
110
108
  """
111
109
  target_number = self.target_number
112
110
  target_type = self.target_type
113
- fs_dir_target = self.fs_dir_target
114
111
 
115
112
  # Create the feature selection in db
116
113
  target = Target.find_by(name=f"TARGET_{target_number}")
@@ -133,7 +130,7 @@ class FeatureSelectionEngine:
133
130
  self.y = self.train[f"TARGET_{target_number}"]
134
131
 
135
132
  logger.info(f"Starting feature selection for TARGET_{target_number}...")
136
- clean_directory(self.fs_dir_target)
133
+ clean_directory(self.feature_selection_dir)
137
134
 
138
135
  # Let's start by removing very low variance feature and extremly correlated features
139
136
  # This is needed to reduce nb of feature but also for methods such as anova or chi2 that requires independent, non constant, non full 0 features
@@ -158,9 +155,7 @@ class FeatureSelectionEngine:
158
155
  self.X_categorical, self.X_numerical = get_features_by_types(self.X)
159
156
 
160
157
  if target_type == "classification" and self.X_categorical.shape[1] > 0:
161
- feat_scores = self.select_categorical_features(
162
- percentile=percentile, save_dir=fs_dir_target
163
- )
158
+ feat_scores = self.select_categorical_features(percentile=percentile)
164
159
  with get_db() as db:
165
160
  for row in feat_scores.itertuples(index=False):
166
161
  feature = Feature.find_by(name=row.features, db=db)
@@ -181,7 +176,7 @@ class FeatureSelectionEngine:
181
176
  ].values.tolist()
182
177
 
183
178
  results = []
184
- params = {"percentile": percentile, "save_dir": fs_dir_target}
179
+ params = {"percentile": percentile}
185
180
  if single_process:
186
181
  results = [
187
182
  self.select_feature_by_linear_correlation(**params),
@@ -288,7 +283,7 @@ class FeatureSelectionEngine:
288
283
  )
289
284
  logger.debug(features_selected_by_every_methods)
290
285
  pd.Series(features_selected_list).to_csv(
291
- f"{fs_dir_target}/features_before_corr.csv",
286
+ f"{self.feature_selection_dir}/features_before_corr.csv",
292
287
  index=True,
293
288
  header=True,
294
289
  index_label="ID",
@@ -298,7 +293,7 @@ class FeatureSelectionEngine:
298
293
  self.X = self.X[features_selected_list]
299
294
  features, features_correlated = self.remove_correlated_features(corr_threshold)
300
295
  pd.Series(features).to_csv(
301
- f"{fs_dir_target}/features_before_max.csv",
296
+ f"{self.feature_selection_dir}/features_before_max.csv",
302
297
  index=True,
303
298
  header=True,
304
299
  index_label="ID",
@@ -327,9 +322,7 @@ class FeatureSelectionEngine:
327
322
  )
328
323
 
329
324
  # save to path
330
- best_features_path = Path(
331
- f"{self.preprocessing_dir}/features_{target_number}.pkl"
332
- ).resolve()
325
+ best_features_path = Path(f"{self.target_dir}/features.pkl").resolve()
333
326
  joblib.dump(features, best_features_path)
334
327
 
335
328
  # save in db
@@ -423,7 +416,7 @@ class FeatureSelectionEngine:
423
416
  # Filter methods
424
417
  # ----------------
425
418
 
426
- def select_categorical_features(self, percentile, save_dir: Optional[str] = None):
419
+ def select_categorical_features(self, percentile):
427
420
  X, y = self.X_categorical, self.y
428
421
 
429
422
  start = time.time()
@@ -446,15 +439,16 @@ class FeatureSelectionEngine:
446
439
  )
447
440
 
448
441
  feat_scores.to_csv(
449
- f"{save_dir}/Chi2.csv", index=True, header=True, index_label="ID"
442
+ f"{self.feature_selection_dir}/Chi2.csv",
443
+ index=True,
444
+ header=True,
445
+ index_label="ID",
450
446
  )
451
447
 
452
448
  return feat_scores
453
449
 
454
450
  # Linear correlation (Person's R for regression and ANOVA for classification)
455
- def select_feature_by_linear_correlation(
456
- self, percentile: int = 20, save_dir: Optional[str] = None
457
- ):
451
+ def select_feature_by_linear_correlation(self, percentile: int = 20):
458
452
  X, y, target_type = self.X_numerical, self.y, self.target_type
459
453
 
460
454
  start = time.time()
@@ -480,7 +474,7 @@ class FeatureSelectionEngine:
480
474
  )
481
475
 
482
476
  feat_scores.to_csv(
483
- f"{save_dir}/{test_type}.csv",
477
+ f"{self.feature_selection_dir}/{test_type}.csv",
484
478
  index=True,
485
479
  header=True,
486
480
  index_label="ID",
@@ -489,9 +483,7 @@ class FeatureSelectionEngine:
489
483
  return feat_scores
490
484
 
491
485
  # Non-Linear correlation (Spearsman's R for regression and Kendall's Tau for classification)
492
- def select_feature_by_nonlinear_correlation(
493
- self, percentile: int = 20, save_dir: Optional[str] = None
494
- ):
486
+ def select_feature_by_nonlinear_correlation(self, percentile: int = 20):
495
487
  X, y, target_type = self.X_numerical, self.y, self.target_type
496
488
 
497
489
  start = time.time()
@@ -537,7 +529,7 @@ class FeatureSelectionEngine:
537
529
  )
538
530
 
539
531
  feat_scores.to_csv(
540
- f"{save_dir}/{test_type}.csv",
532
+ f"{self.feature_selection_dir}/{test_type}.csv",
541
533
  index=True,
542
534
  header=True,
543
535
  index_label="ID",
@@ -546,9 +538,7 @@ class FeatureSelectionEngine:
546
538
  return feat_scores
547
539
 
548
540
  # Mutual Information
549
- def select_feature_by_mi(
550
- self, percentile: int = 20, save_dir: Optional[str] = None
551
- ):
541
+ def select_feature_by_mi(self, percentile: int = 20):
552
542
  X, y, target_type = self.X_numerical, self.y, self.target_type
553
543
 
554
544
  start = time.time()
@@ -575,7 +565,10 @@ class FeatureSelectionEngine:
575
565
  )
576
566
 
577
567
  feat_scores.to_csv(
578
- f"{save_dir}/MI.csv", index=True, header=True, index_label="ID"
568
+ f"{self.feature_selection_dir}/MI.csv",
569
+ index=True,
570
+ header=True,
571
+ index_label="ID",
579
572
  )
580
573
 
581
574
  return feat_scores
@@ -584,9 +577,7 @@ class FeatureSelectionEngine:
584
577
  # ----------------
585
578
 
586
579
  # feature importance
587
- def select_feature_by_feat_imp(
588
- self, percentile: int = 20, save_dir: Optional[str] = None
589
- ):
580
+ def select_feature_by_feat_imp(self, percentile: int = 20):
590
581
  X, y, target_type = self.X_numerical, self.y, self.target_type
591
582
 
592
583
  start = time.time()
@@ -628,7 +619,10 @@ class FeatureSelectionEngine:
628
619
  )
629
620
 
630
621
  feat_scores.to_csv(
631
- f"{save_dir}/FI.csv", index=True, header=True, index_label="ID"
622
+ f"{self.feature_selection_dir}/FI.csv",
623
+ index=True,
624
+ header=True,
625
+ index_label="ID",
632
626
  )
633
627
 
634
628
  return feat_scores
@@ -637,9 +631,7 @@ class FeatureSelectionEngine:
637
631
  # ----------------
638
632
 
639
633
  # recursive feature elimination
640
- def select_feature_by_rfe(
641
- self, percentile: int = 20, save_dir: Optional[str] = None
642
- ):
634
+ def select_feature_by_rfe(self, percentile: int = 20):
643
635
  X, y, target_type = self.X_numerical, self.y, self.target_type
644
636
 
645
637
  start = time.time()
@@ -681,15 +673,16 @@ class FeatureSelectionEngine:
681
673
  )
682
674
 
683
675
  feat_scores.to_csv(
684
- f"{save_dir}/RFE.csv", index=True, header=True, index_label="ID"
676
+ f"{self.feature_selection_dir}/RFE.csv",
677
+ index=True,
678
+ header=True,
679
+ index_label="ID",
685
680
  )
686
681
 
687
682
  return feat_scores
688
683
 
689
684
  # SequentialFeatureSelector (loss based, possibility to do forwards or backwards selection or removal)
690
- def select_feature_by_sfs(
691
- self, percentile: int = 20, save_dir: Optional[str] = None
692
- ):
685
+ def select_feature_by_sfs(self, percentile: int = 20):
693
686
  X, y, target_type = self.X_numerical, self.y, self.target_type
694
687
 
695
688
  start = time.time()
@@ -782,7 +775,10 @@ class FeatureSelectionEngine:
782
775
  )
783
776
 
784
777
  feat_scores.to_csv(
785
- f"{save_dir}/SFS.csv", index=True, header=True, index_label="ID"
778
+ f"{self.feature_selection_dir}/SFS.csv",
779
+ index=True,
780
+ header=True,
781
+ index_label="ID",
786
782
  )
787
783
 
788
784
  return feat_scores
@@ -862,11 +858,13 @@ class PreprocessModel:
862
858
  train_scaled = None
863
859
  val_scaled = None
864
860
  test_scaled = None
861
+ scaler_x = None
865
862
 
866
863
  # save data
867
864
  joblib.dump(train_scaled, f"{self.data_dir}/train_scaled.pkl")
868
865
  joblib.dump(val_scaled, f"{self.data_dir}/val_scaled.pkl")
869
866
  joblib.dump(test_scaled, f"{self.data_dir}/test_scaled.pkl")
867
+ joblib.dump(scaler_x, f"{self.preprocessing_dir}/scaler_x.pkl")
870
868
 
871
869
  data = {
872
870
  "train": self.train,
@@ -875,7 +873,6 @@ class PreprocessModel:
875
873
  "train_scaled": train_scaled,
876
874
  "val_scaled": val_scaled,
877
875
  "test_scaled": test_scaled,
878
- "scalers_y": scalers_y,
879
876
  }
880
877
 
881
878
  # reshape data for time series
@@ -902,7 +899,7 @@ class PreprocessModel:
902
899
  self.train = self.train[columns_to_keep]
903
900
 
904
901
  scaler_x = joblib.load(f"{self.preprocessing_dir}/scaler_x.pkl")
905
- scaled_data = scaler_x.transform(self.train) # TODO: utiliser scale_data
902
+ scaled_data = scaler_x.transform(self.train)
906
903
  scaled_data = pd.DataFrame(
907
904
  scaled_data, columns=self.train.columns, index=self.train.index
908
905
  )
@@ -951,7 +948,6 @@ class PreprocessModel:
951
948
  X_scaled = pd.DataFrame(
952
949
  scaler_x.fit_transform(X), columns=list(X.columns), index=X.index
953
950
  )
954
- joblib.dump(scaler_x, f"{self.preprocessing_dir}/scaler_x.pkl")
955
951
 
956
952
  # Determine which targets need to be scaled
957
953
  targets_numbers_to_scale = [
@@ -980,9 +976,8 @@ class PreprocessModel:
980
976
  columns=y.columns,
981
977
  index=y.index,
982
978
  )
983
- joblib.dump(
984
- scaler_y, f"{self.preprocessing_dir}/scaler_y_{target_number}.pkl"
985
- )
979
+ target_dir = f"{self.experiment_dir}/TARGET_{target_number}"
980
+ joblib.dump(scaler_y, f"{target_dir}/scaler_y.pkl")
986
981
 
987
982
  scalers_y[f"scaler_y_{target_number}"] = scaler_y
988
983
  scaled_targets[target_number] = scaled_y
@@ -145,11 +145,7 @@ class ModelEngine:
145
145
  self.log_dir = log_dir
146
146
 
147
147
  if self.path and self.need_scaling and self.target_type == "regression":
148
- preprocessing_dir = Path(f"{self.path}/../preprocessing")
149
- target_number = self.path.split("/")[-1].split("_")[-1]
150
- self.scaler_y = joblib.load(
151
- preprocessing_dir / f"scaler_y_{target_number}.pkl"
152
- )
148
+ self.scaler_y = joblib.load(f"{self.path}/scaler_y.pkl")
153
149
  else:
154
150
  self.scaler_y = None
155
151
 
@@ -571,10 +567,10 @@ class ModelEngine:
571
567
  if not self.path:
572
568
  raise ValueError("Path is not set, cannot load model")
573
569
 
574
- training_target_dir = Path(self.path)
570
+ target_dir = Path(self.path)
575
571
 
576
572
  # Load threshold
577
- scores_tracking = pd.read_csv(f"{training_target_dir}/scores_tracking.csv")
573
+ scores_tracking = pd.read_csv(f"{target_dir}/scores_tracking.csv")
578
574
  self.threshold = (
579
575
  scores_tracking["THRESHOLD"].values[0]
580
576
  if "THRESHOLD" in scores_tracking.columns
@@ -582,8 +578,8 @@ class ModelEngine:
582
578
  )
583
579
 
584
580
  # Search for files that contain '.best' or '.keras' in the name
585
- best_files = list(training_target_dir.glob("*.best*")) + list(
586
- training_target_dir.glob("*.keras*")
581
+ best_files = list(target_dir.glob("*.best*")) + list(
582
+ target_dir.glob("*.keras*")
587
583
  )
588
584
  # If any files are found, try loading the first one (or process as needed)
589
585
  if best_files:
@@ -610,7 +606,7 @@ class ModelEngine:
610
606
  )
611
607
  else:
612
608
  raise FileNotFoundError(
613
- f"No files with '.best' or '.keras' found in the specified folder: {training_target_dir}"
609
+ f"No files with '.best' or '.keras' found in the specified folder: {target_dir}"
614
610
  )
615
611
 
616
612
  self.model_name = self._model.model_name
@@ -737,7 +733,7 @@ class ModelSelectionEngine:
737
733
  self.experiment_id = self.experiment.id
738
734
  self.data_dir = f"{self.experiment_dir}/data"
739
735
  self.preprocessing_dir = f"{self.experiment_dir}/preprocessing"
740
- self.training_target_dir = f"{self.experiment_dir}/TARGET_{self.target_number}"
736
+ self.target_dir = f"{self.experiment_dir}/TARGET_{self.target_number}"
741
737
  self.metric = "RMSE" if self.target_type == "regression" else "LOGLOSS"
742
738
  self.features = self.experiment.get_features(self.target_number)
743
739
  self.all_features = self.experiment.get_all_features(
@@ -826,7 +822,7 @@ class ModelSelectionEngine:
826
822
  if recurrent is False and config[self.target_type] is None:
827
823
  continue # for naive bayes models that cannot be used in regression
828
824
 
829
- self.results_dir = f"{self.training_target_dir}/{model_name}"
825
+ self.results_dir = f"{self.target_dir}/{model_name}"
830
826
  if not os.path.exists(f"{self.results_dir}"):
831
827
  os.makedirs(f"{self.results_dir}")
832
828
  elif preserve_model and contains_best(self.results_dir):
@@ -885,7 +881,7 @@ class ModelSelectionEngine:
885
881
  x_test = test[self.features]
886
882
  y_test = test[f"TARGET_{self.target_number}"].rename("TARGET")
887
883
 
888
- log_dir = get_log_dir(self.training_target_dir, model_name)
884
+ log_dir = get_log_dir(self.target_dir, model_name)
889
885
  # instantiate model
890
886
  model = ModelEngine(
891
887
  model_name=model_name,
@@ -902,7 +898,7 @@ class ModelSelectionEngine:
902
898
  best_params = self.hyperoptimize(x_train, y_train, x_val, y_val, model)
903
899
 
904
900
  # save best params
905
- best_params_file = f"{self.training_target_dir}/best_params.json"
901
+ best_params_file = f"{self.target_dir}/best_params.json"
906
902
  try:
907
903
  with open(best_params_file, "r") as f:
908
904
  json_dict = json.load(f)
@@ -914,7 +910,7 @@ class ModelSelectionEngine:
914
910
  json.dump(json_dict, f, indent=4)
915
911
  else:
916
912
  try:
917
- with open(f"{self.training_target_dir}/best_params.json") as f:
913
+ with open(f"{self.target_dir}/best_params.json") as f:
918
914
  json_dict = json.load(f)
919
915
  best_params = json_dict[model_name]
920
916
  except Exception:
@@ -1028,7 +1024,7 @@ class ModelSelectionEngine:
1028
1024
  best_score["MODEL_PATH"] = model_path
1029
1025
 
1030
1026
  # Track scores
1031
- scores_tracking_path = f"{self.training_target_dir}/scores_tracking.csv"
1027
+ scores_tracking_path = f"{self.target_dir}/scores_tracking.csv"
1032
1028
  best_score_df = pd.DataFrame([best_score])
1033
1029
 
1034
1030
  if os.path.exists(scores_tracking_path):
@@ -1072,26 +1068,26 @@ class ModelSelectionEngine:
1072
1068
  logger.info(f"Model training finished in {training_time:.2f} seconds")
1073
1069
 
1074
1070
  # find best model type
1075
- scores_tracking_path = f"{self.training_target_dir}/scores_tracking.csv"
1071
+ scores_tracking_path = f"{self.target_dir}/scores_tracking.csv"
1076
1072
  scores_tracking = pd.read_csv(scores_tracking_path)
1077
1073
  best_score_overall = scores_tracking.iloc[0, :]
1078
1074
  best_model_name = best_score_overall["MODEL_NAME"]
1079
1075
 
1080
1076
  # Remove any .best or .keras files
1081
- for file_path in glob.glob(
1082
- os.path.join(self.training_target_dir, "*.best")
1083
- ) + glob.glob(os.path.join(self.training_target_dir, "*.keras")):
1077
+ for file_path in glob.glob(os.path.join(self.target_dir, "*.best")) + glob.glob(
1078
+ os.path.join(self.target_dir, "*.keras")
1079
+ ):
1084
1080
  os.remove(file_path)
1085
1081
  # Copy the best model in root training folder for this target
1086
1082
  best_model_path = Path(
1087
- f"{self.training_target_dir}/{os.path.basename(best_score_overall['MODEL_PATH'])}"
1083
+ f"{self.target_dir}/{os.path.basename(best_score_overall['MODEL_PATH'])}"
1088
1084
  ).resolve()
1089
1085
  copy_any(
1090
1086
  best_score_overall["MODEL_PATH"],
1091
1087
  best_model_path,
1092
1088
  )
1093
1089
 
1094
- with open(f"{self.training_target_dir}/best_params.json", "r") as f:
1090
+ with open(f"{self.target_dir}/best_params.json", "r") as f:
1095
1091
  best_model_params = json.load(f)[best_model_name]
1096
1092
 
1097
1093
  # save model_selection results to db
@@ -1111,8 +1107,8 @@ class ModelSelectionEngine:
1111
1107
  def hyperoptimize(self, x_train, y_train, x_val, y_val, model: ModelEngine):
1112
1108
  self.type_name = "hyperopts"
1113
1109
 
1114
- def collect_error_logs(training_target_dir: int, storage_path: str):
1115
- output_error_file = f"{training_target_dir}/errors.log"
1110
+ def collect_error_logs(target_dir: int, storage_path: str):
1111
+ output_error_file = f"{target_dir}/errors.log"
1116
1112
 
1117
1113
  with open(output_error_file, "a") as outfile:
1118
1114
  # Walk through the ray_results directory
@@ -1176,9 +1172,7 @@ class ModelSelectionEngine:
1176
1172
  f"Markdown table with all trials :\n{results.get_dataframe().to_markdown()}"
1177
1173
  )
1178
1174
  # Collect errors in single file
1179
- collect_error_logs(
1180
- training_target_dir=self.training_target_dir, storage_path=storage_path
1181
- )
1175
+ collect_error_logs(target_dir=self.target_dir, storage_path=storage_path)
1182
1176
 
1183
1177
  except Exception as e:
1184
1178
  raise Exception(e)
@@ -1321,21 +1315,19 @@ def evaluate(
1321
1315
 
1322
1316
 
1323
1317
  # utils
1324
- def get_log_dir(training_target_dir: str, model_name="test_model"):
1318
+ def get_log_dir(target_dir: str, model_name="test_model"):
1325
1319
  """Generates a structured log directory path for TensorBoard."""
1326
1320
  timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M")
1327
- log_dir = (
1328
- Path(training_target_dir + "/tensorboard") / model_name / f"run_{timestamp}"
1329
- )
1321
+ log_dir = Path(target_dir + "/tensorboard") / model_name / f"run_{timestamp}"
1330
1322
  log_dir.mkdir(parents=True, exist_ok=True) # Create directories if they don't exist
1331
1323
  return str(log_dir)
1332
1324
 
1333
1325
 
1334
- def print_scores(training_target_dir: str):
1326
+ def print_scores(target_dir: str):
1335
1327
  """
1336
1328
  Monitor scores
1337
1329
  """
1338
- scores_tracking = pd.read_csv(f"{training_target_dir}/scores_tracking.csv")
1330
+ scores_tracking = pd.read_csv(f"{target_dir}/scores_tracking.csv")
1339
1331
  return scores_tracking
1340
1332
 
1341
1333
 
@@ -1631,22 +1623,22 @@ def plot_threshold(prediction, threshold, precision, recall):
1631
1623
 
1632
1624
 
1633
1625
  # OLD - to sort out
1634
- def get_pred_distribution(training_target_dir: str, model_name="linear"):
1626
+ def get_pred_distribution(target_dir: str, model_name="linear"):
1635
1627
  """
1636
1628
  Look at prediction distributions
1637
1629
  """
1638
1630
  prediction = pd.read_csv(
1639
- f"{training_target_dir}/{model_name}/prediction.csv",
1631
+ f"{target_dir}/{model_name}/prediction.csv",
1640
1632
  index_col="ID",
1641
1633
  )
1642
1634
  prediction.describe()
1643
1635
 
1644
1636
 
1645
- def plot_feature_importance(training_target_dir: str, model_name="linear"):
1637
+ def plot_feature_importance(target_dir: str, model_name="linear"):
1646
1638
  """
1647
1639
  Monitor feature importance ranking to filter out unrelevant features
1648
1640
  """
1649
- model = joblib.load(f"{training_target_dir}/{model_name}/{model_name}.best")
1641
+ model = joblib.load(f"{target_dir}/{model_name}/{model_name}.best")
1650
1642
  if hasattr(model, "feature_importances_"):
1651
1643
  feature_importances_ = model.feature_importances_.flatten()
1652
1644
  elif hasattr(model, "feature_importance"):
@@ -1664,11 +1656,11 @@ def plot_feature_importance(training_target_dir: str, model_name="linear"):
1664
1656
  )
1665
1657
 
1666
1658
 
1667
- def print_model_estimators(training_target_dir: str, model_name="linear"):
1659
+ def print_model_estimators(target_dir: str, model_name="linear"):
1668
1660
  """
1669
1661
  Look at a specific trained model
1670
1662
  """
1671
- model = joblib.load(f"{training_target_dir}/{model_name}/{model_name}.best")
1663
+ model = joblib.load(f"{target_dir}/{model_name}/{model_name}.best")
1672
1664
  for i in range(0, 100):
1673
1665
  logger.info(model.estimators_[i].get_depth())
1674
1666
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.8.3
3
+ Version: 0.9.0
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -1,5 +1,5 @@
1
1
  lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
2
- lecrapaud/api.py,sha256=S4YCj6Ulpomnp9fs0WKwodHt4Yb6HSubqxIrQr9_klE,10799
2
+ lecrapaud/api.py,sha256=MJgu7gaZ2Ip0lF_TP1t8vkADRooaVRsBDALJvM6vSsg,10516
3
3
  lecrapaud/config.py,sha256=WJglRV6-lUfYUy5LZjwv3aO_X6ossHY9BUT7_NCSY1I,942
4
4
  lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
5
5
  lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
@@ -11,7 +11,7 @@ lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py,sha256=dl6tfvcqEr
11
11
  lecrapaud/db/alembic.ini,sha256=zgvur-5jnLsT66_98FaTOTNgjwObGZCE0HqMwRAeJrs,3587
12
12
  lecrapaud/db/models/__init__.py,sha256=Lhyw9fVLdom0Fc6yIP-ip8FjkU1EwVwjae5q2VM815Q,740
13
13
  lecrapaud/db/models/base.py,sha256=CYtof_UjFwX3C7XUifequh_UtLHJ25bU7LCwT501uGE,7508
14
- lecrapaud/db/models/experiment.py,sha256=WNV5gz78JljL0xlAjy121K3cgDqwC0r6zL3bpH6b0dY,3642
14
+ lecrapaud/db/models/experiment.py,sha256=KwFbPTDAEXU0I9_33xMg6Ujx8QR_ffcjKA2YBOUuys4,3997
15
15
  lecrapaud/db/models/feature.py,sha256=5o77O2FyRObnLOCGNj8kaPSGM3pLv1Ov6mXXHYkmnYY,1136
16
16
  lecrapaud/db/models/feature_selection.py,sha256=nXy_Lg3uDxid71vYll_qzdo8ajYsJEXskI7vLQ3uyW0,3315
17
17
  lecrapaud/db/models/feature_selection_rank.py,sha256=PvEpdv-JJt2wZMtX5TO0wyZ3IONlPkeDaC49i0VA-qU,2074
@@ -22,22 +22,22 @@ lecrapaud/db/models/score.py,sha256=_yaa6yBxugcOZMvLxqqIaMN7QGvzAOzOGCYQO0_gBjw,
22
22
  lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk,1649
23
23
  lecrapaud/db/session.py,sha256=K9dTyXmO-aF_2r9RRHsDsbW9_zLNDaOcchXlpiv7cSo,2719
24
24
  lecrapaud/directories.py,sha256=t4PrnJR48MpDfBOTYTyGlDVMUr39mcaj7uCPTaocBRw,725
25
- lecrapaud/experiment.py,sha256=NwwGDZqUyvvRu5EDK3_Oh0_kF29bNIPDawVFFpzFvZM,2350
25
+ lecrapaud/experiment.py,sha256=_kuRARuw1pXe13K3MHz22KOJSiRmvhPb7Q2Mkli32t8,2519
26
26
  lecrapaud/feature_engineering.py,sha256=U3YOftZBB3PEqGbu2aFY_3B3Ks9Hiu04UxixOkBz0UU,31168
27
- lecrapaud/feature_selection.py,sha256=CUekyBet6AehobFJV_RWmKCOjMHKWadcXJUDb6FbRZM,43671
27
+ lecrapaud/feature_selection.py,sha256=_Je2817Ah1v-6Rls4EiYC-fn3EbpBj6Uaq81KWBpQG4,43430
28
28
  lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
29
29
  lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
30
30
  lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
31
31
  lecrapaud/jobs/scheduler.py,sha256=SiYWPxokpKnR8V6btLOO6gbK0PEjSRoeG0kCbQvYPf4,990
32
32
  lecrapaud/jobs/tasks.py,sha256=jfhOCsgZlZGTnsLB_K7-Y3NgJqpzpUCFu7EfDQuIeSY,1655
33
- lecrapaud/model_selection.py,sha256=CZCIxPyFuKcr-jU8Lg-kiPA4idQUv3Tw9788HvHVq7M,62012
33
+ lecrapaud/model_selection.py,sha256=GCA21LGs2G6RqQF188BiJZFP-DNpEhzpTvJlewHFAi4,61504
34
34
  lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
35
35
  lecrapaud/speed_tests/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
36
36
  lecrapaud/speed_tests/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
37
37
  lecrapaud/speed_tests/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
38
38
  lecrapaud/speed_tests/tests.ipynb,sha256=RjI7LDHSsbadUkea_hT14sD7ivljtIQk4NB5McXJ1bE,3835
39
39
  lecrapaud/utils.py,sha256=Dy2uhK9cslzoqRHiIE6MdkxjNJWjmKmzGr6i7EYM28A,8106
40
- lecrapaud-0.8.3.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
41
- lecrapaud-0.8.3.dist-info/METADATA,sha256=w6Nq7AP4_mybzw_dTveQm6PbAddeWMtYswdNe2qoVxc,11623
42
- lecrapaud-0.8.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
43
- lecrapaud-0.8.3.dist-info/RECORD,,
40
+ lecrapaud-0.9.0.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
41
+ lecrapaud-0.9.0.dist-info/METADATA,sha256=GZItqrsRZu_QUCY0hFsdN03454boqWlYIaZh4fIEaHY,11623
42
+ lecrapaud-0.9.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
43
+ lecrapaud-0.9.0.dist-info/RECORD,,