lecrapaud 0.9.1__py3-none-any.whl → 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

lecrapaud/api.py CHANGED
@@ -149,13 +149,13 @@ class ExperimentEngine:
149
149
  y_pred.name = "PRED"
150
150
 
151
151
  # evaluate if TARGET is in columns
152
+ new_data.columns = new_data.columns.str.upper()
152
153
  if f"TARGET_{target_number}" in new_data.columns:
153
154
  y_true = new_data[f"TARGET_{target_number}"]
154
155
  prediction = pd.concat([y_true, y_pred], axis=1)
155
156
  prediction.rename(
156
157
  columns={f"TARGET_{target_number}": "TARGET"}, inplace=True
157
158
  )
158
- print(prediction)
159
159
  score = evaluate(
160
160
  prediction,
161
161
  target_type=model.target_type,
@@ -0,0 +1,30 @@
1
+ """
2
+
3
+ Revision ID: 72aa496ca65b
4
+ Revises: 86457e2f333f
5
+ Create Date: 2025-06-25 17:59:28.544283
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = '72aa496ca65b'
16
+ down_revision: Union[str, None] = '86457e2f333f'
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ op.add_column('lecrapaud_model_selections', sa.Column('best_thresholds', sa.JSON(), nullable=True))
24
+ # ### end Alembic commands ###
25
+
26
+
27
+ def downgrade() -> None:
28
+ # ### commands auto generated by Alembic - please adjust! ###
29
+ op.drop_column('lecrapaud_model_selections', 'best_thresholds')
30
+ # ### end Alembic commands ###
@@ -34,6 +34,7 @@ class ModelSelection(Base):
34
34
  nullable=False,
35
35
  )
36
36
  best_model_params = Column(JSON)
37
+ best_thresholds = Column(JSON)
37
38
  best_model_path = Column(String(255))
38
39
  best_model_id = Column(
39
40
  BigInteger, ForeignKey("lecrapaud_models.id", ondelete="CASCADE")
@@ -493,6 +493,13 @@ class PreprocessFeature:
493
493
  for name, data in zip(["train", "val", "test"], [train, val, test]):
494
494
  logger.info(f"{data.shape} {name} data")
495
495
 
496
+ Experiment.upsert(
497
+ match_fields=["id"],
498
+ id=self.experiment_id,
499
+ train_size=len(train),
500
+ val_size=len(val),
501
+ test_size=len(test),
502
+ )
496
503
  return (
497
504
  train.reset_index(drop=True),
498
505
  val.reset_index(drop=True),
@@ -813,7 +820,7 @@ def traditional_descriptive_analysis(df: pd.DataFrame, group_column: str | None
813
820
 
814
821
  # Create a copy of the DataFrame to avoid modifying the original
815
822
  df_check = df.copy()
816
-
823
+
817
824
  # Convert numpy arrays to tuples for hashing
818
825
  for col in df_check.columns:
819
826
  if df_check[col].apply(lambda x: isinstance(x, np.ndarray)).any():
@@ -830,10 +837,10 @@ def traditional_descriptive_analysis(df: pd.DataFrame, group_column: str | None
830
837
  duplicated_cols = []
831
838
  cols = df_check.columns
832
839
  for i, col1 in enumerate(cols):
833
- for col2 in cols[i+1:]:
840
+ for col2 in cols[i + 1 :]:
834
841
  if df_check[col1].equals(df_check[col2]):
835
842
  duplicated_cols.append(f"{col1} = {col2}")
836
-
843
+
837
844
  results["Duplicated columns"] = (
838
845
  ", ".join(duplicated_cols) if duplicated_cols else "None"
839
846
  )
@@ -899,10 +899,14 @@ class PreprocessModel:
899
899
  self.train = self.train[columns_to_keep]
900
900
 
901
901
  scaler_x = joblib.load(f"{self.preprocessing_dir}/scaler_x.pkl")
902
- scaled_data = scaler_x.transform(self.train)
903
- scaled_data = pd.DataFrame(
904
- scaled_data, columns=self.train.columns, index=self.train.index
905
- )
902
+
903
+ if scaler_x:
904
+ scaled_data = scaler_x.transform(self.train)
905
+ scaled_data = pd.DataFrame(
906
+ scaled_data, columns=self.train.columns, index=self.train.index
907
+ )
908
+ else:
909
+ scaled_data = self.train
906
910
 
907
911
  reshaped_data = None
908
912
  if (
@@ -569,14 +569,6 @@ class ModelEngine:
569
569
 
570
570
  target_dir = Path(self.path)
571
571
 
572
- # Load threshold
573
- scores_tracking = pd.read_csv(f"{target_dir}/scores_tracking.csv")
574
- self.threshold = (
575
- scores_tracking["THRESHOLD"].values[0]
576
- if "THRESHOLD" in scores_tracking.columns
577
- else None
578
- )
579
-
580
572
  # Search for files that contain '.best' or '.keras' in the name
581
573
  best_files = list(target_dir.glob("*.best*")) + list(
582
574
  target_dir.glob("*.keras*")
@@ -589,17 +581,11 @@ class ModelEngine:
589
581
  try:
590
582
  # Attempt to load the file as a scikit-learn, XGBoost, or LightGBM model (Pickle format)
591
583
  self._model = joblib.load(file_path)
592
- logger.info(
593
- f"Loaded model {self._model.model_name} and threshold {self.threshold}"
594
- )
595
584
  except (pickle.UnpicklingError, EOFError):
596
585
  # If it's not a pickle file, try loading it as a Keras model
597
586
  try:
598
587
  # Attempt to load the file as a Keras model
599
588
  self._model = keras.models.load_model(file_path)
600
- logger.info(
601
- f"Loaded model {self._model.model_name} and threshold {self.threshold}"
602
- )
603
589
  except Exception as e:
604
590
  raise FileNotFoundError(
605
591
  f"Model could not be loaded from path: {file_path}: {e}"
@@ -612,6 +598,17 @@ class ModelEngine:
612
598
  self.model_name = self._model.model_name
613
599
  self.target_type = self._model.target_type
614
600
 
601
+ # Load threshold
602
+ self.threshold = (
603
+ joblib.load(f"{target_dir}/thresholds.pkl")
604
+ if self.target_type == "classification"
605
+ else None
606
+ )
607
+
608
+ logger.info(
609
+ f"Loaded model {self._model.model_name} and threshold {self.threshold}"
610
+ )
611
+
615
612
 
616
613
  def trainable(
617
614
  params,
@@ -1072,6 +1069,11 @@ class ModelSelectionEngine:
1072
1069
  scores_tracking = pd.read_csv(scores_tracking_path)
1073
1070
  best_score_overall = scores_tracking.iloc[0, :]
1074
1071
  best_model_name = best_score_overall["MODEL_NAME"]
1072
+ if self.target_type == "classification":
1073
+ best_thresholds = best_score_overall["THRESHOLDS"]
1074
+ joblib.dump(best_thresholds, f"{self.target_dir}/thresholds.pkl")
1075
+ else:
1076
+ best_thresholds = None
1075
1077
 
1076
1078
  # Remove any .best or .keras files
1077
1079
  for file_path in glob.glob(os.path.join(self.target_dir, "*.best")) + glob.glob(
@@ -1096,6 +1098,7 @@ class ModelSelectionEngine:
1096
1098
  name=best_score_overall["MODEL_NAME"], type=self.target_type
1097
1099
  ).id
1098
1100
  model_selection.best_model_params = best_model_params
1101
+ model_selection.best_thresholds = best_thresholds
1099
1102
  model_selection.best_model_path = best_model_path
1100
1103
  model_selection.save()
1101
1104
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.9.1
3
+ Version: 0.9.3
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -1,5 +1,5 @@
1
1
  lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
2
- lecrapaud/api.py,sha256=MJgu7gaZ2Ip0lF_TP1t8vkADRooaVRsBDALJvM6vSsg,10516
2
+ lecrapaud/api.py,sha256=K1sDm7XBcHmxplWTltQMsVRJ5AOYJ_AUiOS0rtg6uH8,10542
3
3
  lecrapaud/config.py,sha256=WJglRV6-lUfYUy5LZjwv3aO_X6ossHY9BUT7_NCSY1I,942
4
4
  lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
5
5
  lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
@@ -8,6 +8,7 @@ lecrapaud/db/alembic/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl
8
8
  lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py,sha256=MNPyqWaQSHNV8zljD1G9f-LzrVz-nOKlgOhHEE0U8Oo,13060
9
9
  lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py,sha256=g6aLRV6jAKXkPUEcs9FAeGfsYpe9rMTxfqbNib3U0-U,809
10
10
  lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py,sha256=dl6tfvcqErgJ6NKvjve0euu7l0BWyEAKSS-ychsEAl8,1139
11
+ lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py,sha256=sBgPLvvqI_HmPqQ0Kime1ZL1AHSeuYJHlmFJOnXWeuU,835
11
12
  lecrapaud/db/alembic.ini,sha256=zgvur-5jnLsT66_98FaTOTNgjwObGZCE0HqMwRAeJrs,3587
12
13
  lecrapaud/db/models/__init__.py,sha256=Lhyw9fVLdom0Fc6yIP-ip8FjkU1EwVwjae5q2VM815Q,740
13
14
  lecrapaud/db/models/base.py,sha256=CYtof_UjFwX3C7XUifequh_UtLHJ25bU7LCwT501uGE,7508
@@ -16,28 +17,28 @@ lecrapaud/db/models/feature.py,sha256=5o77O2FyRObnLOCGNj8kaPSGM3pLv1Ov6mXXHYkmnY
16
17
  lecrapaud/db/models/feature_selection.py,sha256=nXy_Lg3uDxid71vYll_qzdo8ajYsJEXskI7vLQ3uyW0,3315
17
18
  lecrapaud/db/models/feature_selection_rank.py,sha256=PvEpdv-JJt2wZMtX5TO0wyZ3IONlPkeDaC49i0VA-qU,2074
18
19
  lecrapaud/db/models/model.py,sha256=F0hyMjd4FFHCv6_arIWBEmBCGOfG3b6_uzU8ExtFE90,952
19
- lecrapaud/db/models/model_selection.py,sha256=fkZoUv7fdlBygWsfQyYPoayLomyp-gowiA3fbFPqdqw,1827
20
+ lecrapaud/db/models/model_selection.py,sha256=fQ252IK31HTJiY6XtXGo5-VFQGMxKOhS1PcIGKVIHwo,1862
20
21
  lecrapaud/db/models/model_training.py,sha256=egggSfkW8C2nTadytc5DdjU7d2VEMT6LRRZxO1ZD5To,1600
21
22
  lecrapaud/db/models/score.py,sha256=_yaa6yBxugcOZMvLxqqIaMN7QGvzAOzOGCYQO0_gBjw,1601
22
23
  lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk,1649
23
24
  lecrapaud/db/session.py,sha256=K9dTyXmO-aF_2r9RRHsDsbW9_zLNDaOcchXlpiv7cSo,2719
24
25
  lecrapaud/directories.py,sha256=t4PrnJR48MpDfBOTYTyGlDVMUr39mcaj7uCPTaocBRw,725
25
26
  lecrapaud/experiment.py,sha256=_kuRARuw1pXe13K3MHz22KOJSiRmvhPb7Q2Mkli32t8,2519
26
- lecrapaud/feature_engineering.py,sha256=MnBp0oVYuzL61yn3gCSxFpYRGMe3A0Cb8GAX66alODA,32006
27
- lecrapaud/feature_selection.py,sha256=_Je2817Ah1v-6Rls4EiYC-fn3EbpBj6Uaq81KWBpQG4,43430
27
+ lecrapaud/feature_engineering.py,sha256=2Er29SxHRIdzwxcEjk-2UI-MxQNVBPdTzlTemZ8bqYg,32193
28
+ lecrapaud/feature_selection.py,sha256=9I0nQrou9f3tfIj_LRHCdj_eZYNNG0W4SOIXuHpIYRQ,43519
28
29
  lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
29
30
  lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
30
31
  lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
31
32
  lecrapaud/jobs/scheduler.py,sha256=SiYWPxokpKnR8V6btLOO6gbK0PEjSRoeG0kCbQvYPf4,990
32
33
  lecrapaud/jobs/tasks.py,sha256=jfhOCsgZlZGTnsLB_K7-Y3NgJqpzpUCFu7EfDQuIeSY,1655
33
- lecrapaud/model_selection.py,sha256=GCA21LGs2G6RqQF188BiJZFP-DNpEhzpTvJlewHFAi4,61504
34
+ lecrapaud/model_selection.py,sha256=FRlW0G4qAPk4jyX_5kiCRhTIBrQuSVtmIv440NBu60c,61555
34
35
  lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
35
36
  lecrapaud/speed_tests/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
36
37
  lecrapaud/speed_tests/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
37
38
  lecrapaud/speed_tests/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
38
39
  lecrapaud/speed_tests/tests.ipynb,sha256=RjI7LDHSsbadUkea_hT14sD7ivljtIQk4NB5McXJ1bE,3835
39
40
  lecrapaud/utils.py,sha256=Dy2uhK9cslzoqRHiIE6MdkxjNJWjmKmzGr6i7EYM28A,8106
40
- lecrapaud-0.9.1.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
41
- lecrapaud-0.9.1.dist-info/METADATA,sha256=Rjz-7FASML-yASCCeKHf4eagzuP970B2f75ttC9E7aY,11623
42
- lecrapaud-0.9.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
43
- lecrapaud-0.9.1.dist-info/RECORD,,
41
+ lecrapaud-0.9.3.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
42
+ lecrapaud-0.9.3.dist-info/METADATA,sha256=OUTi5k_W57bbhWfO0gp9nUy15JXf8SMrGQx9VFiTmH0,11623
43
+ lecrapaud-0.9.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
+ lecrapaud-0.9.3.dist-info/RECORD,,