lecrapaud 0.9.2__tar.gz → 0.9.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

Files changed (44) hide show
  1. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/PKG-INFO +1 -1
  2. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/api.py +1 -1
  3. lecrapaud-0.9.4/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py +30 -0
  4. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/model_selection.py +1 -0
  5. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/experiment.py +3 -1
  6. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/feature_engineering.py +10 -3
  7. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/model_selection.py +17 -14
  8. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/pyproject.toml +1 -1
  9. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/LICENSE +0 -0
  10. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/README.md +0 -0
  11. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/__init__.py +0 -0
  12. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/config.py +0 -0
  13. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/__init__.py +0 -0
  14. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/README +0 -0
  15. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/env.py +0 -0
  16. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/script.py.mako +0 -0
  17. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
  18. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
  19. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
  20. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic.ini +0 -0
  21. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/__init__.py +0 -0
  22. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/base.py +0 -0
  23. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/experiment.py +0 -0
  24. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/feature.py +0 -0
  25. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/feature_selection.py +0 -0
  26. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/feature_selection_rank.py +0 -0
  27. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/model.py +0 -0
  28. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/model_training.py +0 -0
  29. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/score.py +0 -0
  30. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/target.py +0 -0
  31. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/session.py +0 -0
  32. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/directories.py +0 -0
  33. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/feature_selection.py +0 -0
  34. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/integrations/openai_integration.py +0 -0
  35. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/jobs/__init__.py +0 -0
  36. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/jobs/config.py +0 -0
  37. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/jobs/scheduler.py +0 -0
  38. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/jobs/tasks.py +0 -0
  39. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/search_space.py +0 -0
  40. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/speed_tests/test-gpu-bilstm.ipynb +0 -0
  41. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/speed_tests/test-gpu-resnet.ipynb +0 -0
  42. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/speed_tests/test-gpu-transformers.ipynb +0 -0
  43. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/speed_tests/tests.ipynb +0 -0
  44. {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.9.2
3
+ Version: 0.9.4
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -149,13 +149,13 @@ class ExperimentEngine:
149
149
  y_pred.name = "PRED"
150
150
 
151
151
  # evaluate if TARGET is in columns
152
+ new_data.columns = new_data.columns.str.upper()
152
153
  if f"TARGET_{target_number}" in new_data.columns:
153
154
  y_true = new_data[f"TARGET_{target_number}"]
154
155
  prediction = pd.concat([y_true, y_pred], axis=1)
155
156
  prediction.rename(
156
157
  columns={f"TARGET_{target_number}": "TARGET"}, inplace=True
157
158
  )
158
- print(prediction)
159
159
  score = evaluate(
160
160
  prediction,
161
161
  target_type=model.target_type,
@@ -0,0 +1,30 @@
1
+ """
2
+
3
+ Revision ID: 72aa496ca65b
4
+ Revises: 86457e2f333f
5
+ Create Date: 2025-06-25 17:59:28.544283
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = '72aa496ca65b'
16
+ down_revision: Union[str, None] = '86457e2f333f'
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ op.add_column('lecrapaud_model_selections', sa.Column('best_thresholds', sa.JSON(), nullable=True))
24
+ # ### end Alembic commands ###
25
+
26
+
27
+ def downgrade() -> None:
28
+ # ### commands auto generated by Alembic - please adjust! ###
29
+ op.drop_column('lecrapaud_model_selections', 'best_thresholds')
30
+ # ### end Alembic commands ###
@@ -34,6 +34,7 @@ class ModelSelection(Base):
34
34
  nullable=False,
35
35
  )
36
36
  best_model_params = Column(JSON)
37
+ best_thresholds = Column(JSON)
37
38
  best_model_path = Column(String(255))
38
39
  best_model_id = Column(
39
40
  BigInteger, ForeignKey("lecrapaud_models.id", ondelete="CASCADE")
@@ -39,7 +39,9 @@ def create_experiment(
39
39
 
40
40
  with get_db() as db:
41
41
  all_targets = Target.get_all(db=db)
42
- targets = [target for target in all_targets if target.name in data.columns]
42
+ targets = [
43
+ target for target in all_targets if target.name in data.columns.str.upper()
44
+ ]
43
45
  experiment_name = f"{experiment_name}_{groups["number_of_groups"] if group_column else 'ng'}_{corr_threshold}_{percentile}_{max_features}_{dates['start_date'].date() if date_column else 'nd'}_{dates['end_date'].date() if date_column else 'nd'}"
44
46
 
45
47
  experiment_dir = f"{tmp_dir}/{experiment_name}"
@@ -493,6 +493,13 @@ class PreprocessFeature:
493
493
  for name, data in zip(["train", "val", "test"], [train, val, test]):
494
494
  logger.info(f"{data.shape} {name} data")
495
495
 
496
+ Experiment.upsert(
497
+ match_fields=["id"],
498
+ id=self.experiment_id,
499
+ train_size=len(train),
500
+ val_size=len(val),
501
+ test_size=len(test),
502
+ )
496
503
  return (
497
504
  train.reset_index(drop=True),
498
505
  val.reset_index(drop=True),
@@ -813,7 +820,7 @@ def traditional_descriptive_analysis(df: pd.DataFrame, group_column: str | None
813
820
 
814
821
  # Create a copy of the DataFrame to avoid modifying the original
815
822
  df_check = df.copy()
816
-
823
+
817
824
  # Convert numpy arrays to tuples for hashing
818
825
  for col in df_check.columns:
819
826
  if df_check[col].apply(lambda x: isinstance(x, np.ndarray)).any():
@@ -830,10 +837,10 @@ def traditional_descriptive_analysis(df: pd.DataFrame, group_column: str | None
830
837
  duplicated_cols = []
831
838
  cols = df_check.columns
832
839
  for i, col1 in enumerate(cols):
833
- for col2 in cols[i+1:]:
840
+ for col2 in cols[i + 1 :]:
834
841
  if df_check[col1].equals(df_check[col2]):
835
842
  duplicated_cols.append(f"{col1} = {col2}")
836
-
843
+
837
844
  results["Duplicated columns"] = (
838
845
  ", ".join(duplicated_cols) if duplicated_cols else "None"
839
846
  )
@@ -569,14 +569,6 @@ class ModelEngine:
569
569
 
570
570
  target_dir = Path(self.path)
571
571
 
572
- # Load threshold
573
- scores_tracking = pd.read_csv(f"{target_dir}/scores_tracking.csv")
574
- self.threshold = (
575
- scores_tracking["THRESHOLD"].values[0]
576
- if "THRESHOLD" in scores_tracking.columns
577
- else None
578
- )
579
-
580
572
  # Search for files that contain '.best' or '.keras' in the name
581
573
  best_files = list(target_dir.glob("*.best*")) + list(
582
574
  target_dir.glob("*.keras*")
@@ -589,17 +581,11 @@ class ModelEngine:
589
581
  try:
590
582
  # Attempt to load the file as a scikit-learn, XGBoost, or LightGBM model (Pickle format)
591
583
  self._model = joblib.load(file_path)
592
- logger.info(
593
- f"Loaded model {self._model.model_name} and threshold {self.threshold}"
594
- )
595
584
  except (pickle.UnpicklingError, EOFError):
596
585
  # If it's not a pickle file, try loading it as a Keras model
597
586
  try:
598
587
  # Attempt to load the file as a Keras model
599
588
  self._model = keras.models.load_model(file_path)
600
- logger.info(
601
- f"Loaded model {self._model.model_name} and threshold {self.threshold}"
602
- )
603
589
  except Exception as e:
604
590
  raise FileNotFoundError(
605
591
  f"Model could not be loaded from path: {file_path}: {e}"
@@ -612,6 +598,17 @@ class ModelEngine:
612
598
  self.model_name = self._model.model_name
613
599
  self.target_type = self._model.target_type
614
600
 
601
+ # Load threshold
602
+ self.threshold = (
603
+ joblib.load(f"{target_dir}/thresholds.pkl")
604
+ if self.target_type == "classification"
605
+ else None
606
+ )
607
+
608
+ logger.info(
609
+ f"Loaded model {self._model.model_name} and threshold {self.threshold}"
610
+ )
611
+
615
612
 
616
613
  def trainable(
617
614
  params,
@@ -1072,6 +1069,11 @@ class ModelSelectionEngine:
1072
1069
  scores_tracking = pd.read_csv(scores_tracking_path)
1073
1070
  best_score_overall = scores_tracking.iloc[0, :]
1074
1071
  best_model_name = best_score_overall["MODEL_NAME"]
1072
+ if self.target_type == "classification":
1073
+ best_thresholds = best_score_overall["THRESHOLDS"]
1074
+ joblib.dump(best_thresholds, f"{self.target_dir}/thresholds.pkl")
1075
+ else:
1076
+ best_thresholds = None
1075
1077
 
1076
1078
  # Remove any .best or .keras files
1077
1079
  for file_path in glob.glob(os.path.join(self.target_dir, "*.best")) + glob.glob(
@@ -1096,6 +1098,7 @@ class ModelSelectionEngine:
1096
1098
  name=best_score_overall["MODEL_NAME"], type=self.target_type
1097
1099
  ).id
1098
1100
  model_selection.best_model_params = best_model_params
1101
+ model_selection.best_thresholds = best_thresholds
1099
1102
  model_selection.best_model_path = best_model_path
1100
1103
  model_selection.save()
1101
1104
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lecrapaud"
3
- version = "0.9.2"
3
+ version = "0.9.4"
4
4
  description = "Framework for machine and deep learning, with regression, classification and time series analysis"
5
5
  authors = [
6
6
  {name = "Pierre H. Gallet"}
File without changes
File without changes
File without changes
File without changes