PyPI - lecrapaud - Versions diffs - 0.9.2__tar.gz → 0.9.4__tar.gz - Mend

lecrapaud 0.9.2tar.gz → 0.9.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lecrapaud might be problematic. Click here for more details.

Files changed (44) hide show

{lecrapaud-0.9.2 → lecrapaud-0.9.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: lecrapaud
-Version: 0.9.2
+Version: 0.9.4
 Summary: Framework for machine and deep learning, with regression, classification and time series analysis
 License: Apache License
 Author: Pierre H. Gallet

{lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/api.py RENAMED Viewed

@@ -149,13 +149,13 @@ class ExperimentEngine:
                 y_pred.name = "PRED"
             # evaluate if TARGET is in columns
+            new_data.columns = new_data.columns.str.upper()
             if f"TARGET_{target_number}" in new_data.columns:
                 y_true = new_data[f"TARGET_{target_number}"]
                 prediction = pd.concat([y_true, y_pred], axis=1)
                 prediction.rename(
                     columns={f"TARGET_{target_number}": "TARGET"}, inplace=True
                 )
-                print(prediction)
                 score = evaluate(
                     prediction,
                     target_type=model.target_type,

lecrapaud-0.9.4/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""
+Revision ID: 72aa496ca65b
+Revises: 86457e2f333f
+Create Date: 2025-06-25 17:59:28.544283
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+# revision identifiers, used by Alembic.
+revision: str = '72aa496ca65b'
+down_revision: Union[str, None] = '86457e2f333f'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('lecrapaud_model_selections', sa.Column('best_thresholds', sa.JSON(), nullable=True))
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('lecrapaud_model_selections', 'best_thresholds')
+    # ### end Alembic commands ###

{lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/model_selection.py RENAMED Viewed

@@ -34,6 +34,7 @@ class ModelSelection(Base):
         nullable=False,
     )
     best_model_params = Column(JSON)
+    best_thresholds = Column(JSON)
     best_model_path = Column(String(255))
     best_model_id = Column(
         BigInteger, ForeignKey("lecrapaud_models.id", ondelete="CASCADE")

{lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/experiment.py RENAMED Viewed

@@ -39,7 +39,9 @@ def create_experiment(
     with get_db() as db:
         all_targets = Target.get_all(db=db)
-        targets = [target for target in all_targets if target.name in data.columns]
+        targets = [
+            target for target in all_targets if target.name in data.columns.str.upper()
+        ]
         experiment_name = f"{experiment_name}_{groups["number_of_groups"] if group_column else 'ng'}_{corr_threshold}_{percentile}_{max_features}_{dates['start_date'].date() if date_column else 'nd'}_{dates['end_date'].date() if date_column else 'nd'}"
         experiment_dir = f"{tmp_dir}/{experiment_name}"

{lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/feature_engineering.py RENAMED Viewed

@@ -493,6 +493,13 @@ class PreprocessFeature:
         for name, data in zip(["train", "val", "test"], [train, val, test]):
             logger.info(f"{data.shape} {name} data")
+        Experiment.upsert(
+            match_fields=["id"],
+            id=self.experiment_id,
+            train_size=len(train),
+            val_size=len(val),
+            test_size=len(test),
+        )
         return (
             train.reset_index(drop=True),
             val.reset_index(drop=True),
@@ -813,7 +820,7 @@ def traditional_descriptive_analysis(df: pd.DataFrame, group_column: str | None
         # Create a copy of the DataFrame to avoid modifying the original
         df_check = df.copy()
         # Convert numpy arrays to tuples for hashing
         for col in df_check.columns:
             if df_check[col].apply(lambda x: isinstance(x, np.ndarray)).any():
@@ -830,10 +837,10 @@ def traditional_descriptive_analysis(df: pd.DataFrame, group_column: str | None
             duplicated_cols = []
             cols = df_check.columns
             for i, col1 in enumerate(cols):
-                for col2 in cols[i+1:]:
+                for col2 in cols[i + 1 :]:
                     if df_check[col1].equals(df_check[col2]):
                         duplicated_cols.append(f"{col1} = {col2}")
             results["Duplicated columns"] = (
                 ", ".join(duplicated_cols) if duplicated_cols else "None"
             )

{lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/model_selection.py RENAMED Viewed

@@ -569,14 +569,6 @@ class ModelEngine:
         target_dir = Path(self.path)
-        # Load threshold
-        scores_tracking = pd.read_csv(f"{target_dir}/scores_tracking.csv")
-        self.threshold = (
-            scores_tracking["THRESHOLD"].values[0]
-            if "THRESHOLD" in scores_tracking.columns
-            else None
-        )
         # Search for files that contain '.best' or '.keras' in the name
         best_files = list(target_dir.glob("*.best*")) + list(
             target_dir.glob("*.keras*")
@@ -589,17 +581,11 @@ class ModelEngine:
             try:
                 # Attempt to load the file as a scikit-learn, XGBoost, or LightGBM model (Pickle format)
                 self._model = joblib.load(file_path)
-                logger.info(
-                    f"Loaded model {self._model.model_name} and threshold {self.threshold}"
-                )
             except (pickle.UnpicklingError, EOFError):
                 # If it's not a pickle file, try loading it as a Keras model
                 try:
                     # Attempt to load the file as a Keras model
                     self._model = keras.models.load_model(file_path)
-                    logger.info(
-                        f"Loaded model {self._model.model_name} and threshold {self.threshold}"
-                    )
                 except Exception as e:
                     raise FileNotFoundError(
                         f"Model could not be loaded from path: {file_path}: {e}"
@@ -612,6 +598,17 @@ class ModelEngine:
         self.model_name = self._model.model_name
         self.target_type = self._model.target_type
+        # Load threshold
+        self.threshold = (
+            joblib.load(f"{target_dir}/thresholds.pkl")
+            if self.target_type == "classification"
+            else None
+        )
+        logger.info(
+            f"Loaded model {self._model.model_name} and threshold {self.threshold}"
+        )
 def trainable(
     params,
@@ -1072,6 +1069,11 @@ class ModelSelectionEngine:
         scores_tracking = pd.read_csv(scores_tracking_path)
         best_score_overall = scores_tracking.iloc[0, :]
         best_model_name = best_score_overall["MODEL_NAME"]
+        if self.target_type == "classification":
+            best_thresholds = best_score_overall["THRESHOLDS"]
+            joblib.dump(best_thresholds, f"{self.target_dir}/thresholds.pkl")
+        else:
+            best_thresholds = None
         # Remove any .best or .keras files
         for file_path in glob.glob(os.path.join(self.target_dir, "*.best")) + glob.glob(
@@ -1096,6 +1098,7 @@ class ModelSelectionEngine:
             name=best_score_overall["MODEL_NAME"], type=self.target_type
         ).id
         model_selection.best_model_params = best_model_params
+        model_selection.best_thresholds = best_thresholds
         model_selection.best_model_path = best_model_path
         model_selection.save()

{lecrapaud-0.9.2 → lecrapaud-0.9.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "lecrapaud"
-version = "0.9.2"
+version = "0.9.4"
 description = "Framework for machine and deep learning, with regression, classification and time series analysis"
 authors = [
     {name = "Pierre H. Gallet"}