PyPI - lecrapaud - Versions diffs - 0.9.1__py3-none-any.whl → 0.9.3__py3-none-any.whl - Mend

lecrapaud 0.9.1py3-none-any.whl → 0.9.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lecrapaud might be problematic. Click here for more details.

Files changed (10) hide show

lecrapaud/api.py +1 -1
lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py +30 -0
lecrapaud/db/models/model_selection.py +1 -0
lecrapaud/feature_engineering.py +10 -3
lecrapaud/feature_selection.py +8 -4
lecrapaud/model_selection.py +17 -14
{lecrapaud-0.9.1.dist-info → lecrapaud-0.9.3.dist-info}/METADATA +1 -1
{lecrapaud-0.9.1.dist-info → lecrapaud-0.9.3.dist-info}/RECORD +10 -9
{lecrapaud-0.9.1.dist-info → lecrapaud-0.9.3.dist-info}/LICENSE +0 -0
{lecrapaud-0.9.1.dist-info → lecrapaud-0.9.3.dist-info}/WHEEL +0 -0

lecrapaud/api.py CHANGED Viewed

@@ -149,13 +149,13 @@ class ExperimentEngine:
                 y_pred.name = "PRED"
             # evaluate if TARGET is in columns
+            new_data.columns = new_data.columns.str.upper()
             if f"TARGET_{target_number}" in new_data.columns:
                 y_true = new_data[f"TARGET_{target_number}"]
                 prediction = pd.concat([y_true, y_pred], axis=1)
                 prediction.rename(
                     columns={f"TARGET_{target_number}": "TARGET"}, inplace=True
                 )
-                print(prediction)
                 score = evaluate(
                     prediction,
                     target_type=model.target_type,

lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""
+Revision ID: 72aa496ca65b
+Revises: 86457e2f333f
+Create Date: 2025-06-25 17:59:28.544283
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+# revision identifiers, used by Alembic.
+revision: str = '72aa496ca65b'
+down_revision: Union[str, None] = '86457e2f333f'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('lecrapaud_model_selections', sa.Column('best_thresholds', sa.JSON(), nullable=True))
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('lecrapaud_model_selections', 'best_thresholds')
+    # ### end Alembic commands ###

lecrapaud/db/models/model_selection.py CHANGED Viewed

@@ -34,6 +34,7 @@ class ModelSelection(Base):
         nullable=False,
     )
     best_model_params = Column(JSON)
+    best_thresholds = Column(JSON)
     best_model_path = Column(String(255))
     best_model_id = Column(
         BigInteger, ForeignKey("lecrapaud_models.id", ondelete="CASCADE")

lecrapaud/feature_engineering.py CHANGED Viewed

@@ -493,6 +493,13 @@ class PreprocessFeature:
         for name, data in zip(["train", "val", "test"], [train, val, test]):
             logger.info(f"{data.shape} {name} data")
+        Experiment.upsert(
+            match_fields=["id"],
+            id=self.experiment_id,
+            train_size=len(train),
+            val_size=len(val),
+            test_size=len(test),
+        )
         return (
             train.reset_index(drop=True),
             val.reset_index(drop=True),
@@ -813,7 +820,7 @@ def traditional_descriptive_analysis(df: pd.DataFrame, group_column: str | None
         # Create a copy of the DataFrame to avoid modifying the original
         df_check = df.copy()
         # Convert numpy arrays to tuples for hashing
         for col in df_check.columns:
             if df_check[col].apply(lambda x: isinstance(x, np.ndarray)).any():
@@ -830,10 +837,10 @@ def traditional_descriptive_analysis(df: pd.DataFrame, group_column: str | None
             duplicated_cols = []
             cols = df_check.columns
             for i, col1 in enumerate(cols):
-                for col2 in cols[i+1:]:
+                for col2 in cols[i + 1 :]:
                     if df_check[col1].equals(df_check[col2]):
                         duplicated_cols.append(f"{col1} = {col2}")
             results["Duplicated columns"] = (
                 ", ".join(duplicated_cols) if duplicated_cols else "None"
             )

lecrapaud/feature_selection.py CHANGED Viewed

@@ -899,10 +899,14 @@ class PreprocessModel:
         self.train = self.train[columns_to_keep]
         scaler_x = joblib.load(f"{self.preprocessing_dir}/scaler_x.pkl")
-        scaled_data = scaler_x.transform(self.train)
-        scaled_data = pd.DataFrame(
-            scaled_data, columns=self.train.columns, index=self.train.index
-        )
+        if scaler_x:
+            scaled_data = scaler_x.transform(self.train)
+            scaled_data = pd.DataFrame(
+                scaled_data, columns=self.train.columns, index=self.train.index
+            )
+        else:
+            scaled_data = self.train
         reshaped_data = None
         if (

lecrapaud/model_selection.py CHANGED Viewed

@@ -569,14 +569,6 @@ class ModelEngine:
         target_dir = Path(self.path)
-        # Load threshold
-        scores_tracking = pd.read_csv(f"{target_dir}/scores_tracking.csv")
-        self.threshold = (
-            scores_tracking["THRESHOLD"].values[0]
-            if "THRESHOLD" in scores_tracking.columns
-            else None
-        )
         # Search for files that contain '.best' or '.keras' in the name
         best_files = list(target_dir.glob("*.best*")) + list(
             target_dir.glob("*.keras*")
@@ -589,17 +581,11 @@ class ModelEngine:
             try:
                 # Attempt to load the file as a scikit-learn, XGBoost, or LightGBM model (Pickle format)
                 self._model = joblib.load(file_path)
-                logger.info(
-                    f"Loaded model {self._model.model_name} and threshold {self.threshold}"
-                )
             except (pickle.UnpicklingError, EOFError):
                 # If it's not a pickle file, try loading it as a Keras model
                 try:
                     # Attempt to load the file as a Keras model
                     self._model = keras.models.load_model(file_path)
-                    logger.info(
-                        f"Loaded model {self._model.model_name} and threshold {self.threshold}"
-                    )
                 except Exception as e:
                     raise FileNotFoundError(
                         f"Model could not be loaded from path: {file_path}: {e}"
@@ -612,6 +598,17 @@ class ModelEngine:
         self.model_name = self._model.model_name
         self.target_type = self._model.target_type
+        # Load threshold
+        self.threshold = (
+            joblib.load(f"{target_dir}/thresholds.pkl")
+            if self.target_type == "classification"
+            else None
+        )
+        logger.info(
+            f"Loaded model {self._model.model_name} and threshold {self.threshold}"
+        )
 def trainable(
     params,
@@ -1072,6 +1069,11 @@ class ModelSelectionEngine:
         scores_tracking = pd.read_csv(scores_tracking_path)
         best_score_overall = scores_tracking.iloc[0, :]
         best_model_name = best_score_overall["MODEL_NAME"]
+        if self.target_type == "classification":
+            best_thresholds = best_score_overall["THRESHOLDS"]
+            joblib.dump(best_thresholds, f"{self.target_dir}/thresholds.pkl")
+        else:
+            best_thresholds = None
         # Remove any .best or .keras files
         for file_path in glob.glob(os.path.join(self.target_dir, "*.best")) + glob.glob(
@@ -1096,6 +1098,7 @@ class ModelSelectionEngine:
             name=best_score_overall["MODEL_NAME"], type=self.target_type
         ).id
         model_selection.best_model_params = best_model_params
+        model_selection.best_thresholds = best_thresholds
         model_selection.best_model_path = best_model_path
         model_selection.save()

{lecrapaud-0.9.1.dist-info → lecrapaud-0.9.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: lecrapaud
-Version: 0.9.1
+Version: 0.9.3
 Summary: Framework for machine and deep learning, with regression, classification and time series analysis
 License: Apache License
 Author: Pierre H. Gallet

{lecrapaud-0.9.1.dist-info → lecrapaud-0.9.3.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
-lecrapaud/api.py,sha256=MJgu7gaZ2Ip0lF_TP1t8vkADRooaVRsBDALJvM6vSsg,10516
+lecrapaud/api.py,sha256=K1sDm7XBcHmxplWTltQMsVRJ5AOYJ_AUiOS0rtg6uH8,10542
 lecrapaud/config.py,sha256=WJglRV6-lUfYUy5LZjwv3aO_X6ossHY9BUT7_NCSY1I,942
 lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
 lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
@@ -8,6 +8,7 @@ lecrapaud/db/alembic/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl
 lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py,sha256=MNPyqWaQSHNV8zljD1G9f-LzrVz-nOKlgOhHEE0U8Oo,13060
 lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py,sha256=g6aLRV6jAKXkPUEcs9FAeGfsYpe9rMTxfqbNib3U0-U,809
 lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py,sha256=dl6tfvcqErgJ6NKvjve0euu7l0BWyEAKSS-ychsEAl8,1139
+lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py,sha256=sBgPLvvqI_HmPqQ0Kime1ZL1AHSeuYJHlmFJOnXWeuU,835
 lecrapaud/db/alembic.ini,sha256=zgvur-5jnLsT66_98FaTOTNgjwObGZCE0HqMwRAeJrs,3587
 lecrapaud/db/models/__init__.py,sha256=Lhyw9fVLdom0Fc6yIP-ip8FjkU1EwVwjae5q2VM815Q,740
 lecrapaud/db/models/base.py,sha256=CYtof_UjFwX3C7XUifequh_UtLHJ25bU7LCwT501uGE,7508
@@ -16,28 +17,28 @@ lecrapaud/db/models/feature.py,sha256=5o77O2FyRObnLOCGNj8kaPSGM3pLv1Ov6mXXHYkmnY
 lecrapaud/db/models/feature_selection.py,sha256=nXy_Lg3uDxid71vYll_qzdo8ajYsJEXskI7vLQ3uyW0,3315
 lecrapaud/db/models/feature_selection_rank.py,sha256=PvEpdv-JJt2wZMtX5TO0wyZ3IONlPkeDaC49i0VA-qU,2074
 lecrapaud/db/models/model.py,sha256=F0hyMjd4FFHCv6_arIWBEmBCGOfG3b6_uzU8ExtFE90,952
-lecrapaud/db/models/model_selection.py,sha256=fkZoUv7fdlBygWsfQyYPoayLomyp-gowiA3fbFPqdqw,1827
+lecrapaud/db/models/model_selection.py,sha256=fQ252IK31HTJiY6XtXGo5-VFQGMxKOhS1PcIGKVIHwo,1862
 lecrapaud/db/models/model_training.py,sha256=egggSfkW8C2nTadytc5DdjU7d2VEMT6LRRZxO1ZD5To,1600
 lecrapaud/db/models/score.py,sha256=_yaa6yBxugcOZMvLxqqIaMN7QGvzAOzOGCYQO0_gBjw,1601
 lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk,1649
 lecrapaud/db/session.py,sha256=K9dTyXmO-aF_2r9RRHsDsbW9_zLNDaOcchXlpiv7cSo,2719
 lecrapaud/directories.py,sha256=t4PrnJR48MpDfBOTYTyGlDVMUr39mcaj7uCPTaocBRw,725
 lecrapaud/experiment.py,sha256=_kuRARuw1pXe13K3MHz22KOJSiRmvhPb7Q2Mkli32t8,2519
-lecrapaud/feature_engineering.py,sha256=MnBp0oVYuzL61yn3gCSxFpYRGMe3A0Cb8GAX66alODA,32006
-lecrapaud/feature_selection.py,sha256=_Je2817Ah1v-6Rls4EiYC-fn3EbpBj6Uaq81KWBpQG4,43430
+lecrapaud/feature_engineering.py,sha256=2Er29SxHRIdzwxcEjk-2UI-MxQNVBPdTzlTemZ8bqYg,32193
+lecrapaud/feature_selection.py,sha256=9I0nQrou9f3tfIj_LRHCdj_eZYNNG0W4SOIXuHpIYRQ,43519
 lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
 lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
 lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
 lecrapaud/jobs/scheduler.py,sha256=SiYWPxokpKnR8V6btLOO6gbK0PEjSRoeG0kCbQvYPf4,990
 lecrapaud/jobs/tasks.py,sha256=jfhOCsgZlZGTnsLB_K7-Y3NgJqpzpUCFu7EfDQuIeSY,1655
-lecrapaud/model_selection.py,sha256=GCA21LGs2G6RqQF188BiJZFP-DNpEhzpTvJlewHFAi4,61504
+lecrapaud/model_selection.py,sha256=FRlW0G4qAPk4jyX_5kiCRhTIBrQuSVtmIv440NBu60c,61555
 lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
 lecrapaud/speed_tests/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
 lecrapaud/speed_tests/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
 lecrapaud/speed_tests/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
 lecrapaud/speed_tests/tests.ipynb,sha256=RjI7LDHSsbadUkea_hT14sD7ivljtIQk4NB5McXJ1bE,3835
 lecrapaud/utils.py,sha256=Dy2uhK9cslzoqRHiIE6MdkxjNJWjmKmzGr6i7EYM28A,8106
-lecrapaud-0.9.1.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
-lecrapaud-0.9.1.dist-info/METADATA,sha256=Rjz-7FASML-yASCCeKHf4eagzuP970B2f75ttC9E7aY,11623
-lecrapaud-0.9.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-lecrapaud-0.9.1.dist-info/RECORD,,
+lecrapaud-0.9.3.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
+lecrapaud-0.9.3.dist-info/METADATA,sha256=OUTi5k_W57bbhWfO0gp9nUy15JXf8SMrGQx9VFiTmH0,11623
+lecrapaud-0.9.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+lecrapaud-0.9.3.dist-info/RECORD,,

{lecrapaud-0.9.1.dist-info → lecrapaud-0.9.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{lecrapaud-0.9.1.dist-info → lecrapaud-0.9.3.dist-info}/WHEEL RENAMED Viewed

File without changes

lecrapaud 0.9.1__py3-none-any.whl → 0.9.3__py3-none-any.whl

Potentially problematic release.

lecrapaud 0.9.1py3-none-any.whl → 0.9.3py3-none-any.whl