PyPI - lecrapaud - Versions diffs - 0.9.3__tar.gz → 0.10.0__tar.gz - Mend

lecrapaud 0.9.3tar.gz → 0.10.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lecrapaud might be problematic. Click here for more details.

Files changed (44) hide show

{lecrapaud-0.9.3 → lecrapaud-0.10.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: lecrapaud
-Version: 0.9.3
+Version: 0.10.0
 Summary: Framework for machine and deep learning, with regression, classification and time series analysis
 License: Apache License
 Author: Pierre H. Gallet

{lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/api.py RENAMED Viewed

@@ -167,14 +167,10 @@ class ExperimentEngine:
                 else:
                     scores_reg.append(score)
-            # renaming pred column and concatenating with initial data
+            # renaming and concatenating with initial data
             if isinstance(y_pred, pd.DataFrame):
-                y_pred.rename(
-                    columns={"PRED": f"TARGET_{target_number}_PRED"}, inplace=True
-                )
-                new_data = pd.concat(
-                    [new_data, y_pred[f"TARGET_{target_number}_PRED"]], axis=1
-                )
+                y_pred = y_pred.add_prefix(f"TARGET_{target_number}_")
+                new_data = pd.concat([new_data, y_pred], axis=1)
             else:
                 y_pred.name = f"TARGET_{target_number}_PRED"
@@ -294,3 +290,8 @@ class ExperimentEngine:
     def get_feature_summary(self):
         return pd.read_csv(f"{self.experiment.path}/feature_summary.csv")
+    def get_threshold(self, target_number: int):
+        return joblib.load(
+            f"{self.experiment.path}/TARGET_{target_number}/thresholds.pkl"
+        )

{lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/experiment.py RENAMED Viewed

@@ -39,7 +39,9 @@ def create_experiment(
     with get_db() as db:
         all_targets = Target.get_all(db=db)
-        targets = [target for target in all_targets if target.name in data.columns]
+        targets = [
+            target for target in all_targets if target.name in data.columns.str.upper()
+        ]
         experiment_name = f"{experiment_name}_{groups["number_of_groups"] if group_column else 'ng'}_{corr_threshold}_{percentile}_{max_features}_{dates['start_date'].date() if date_column else 'nd'}_{dates['end_date'].date() if date_column else 'nd'}"
         experiment_dir = f"{tmp_dir}/{experiment_name}"

{lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/feature_selection.py RENAMED Viewed

@@ -47,7 +47,6 @@ from lecrapaud.db import (
     FeatureSelection,
     FeatureSelectionRank,
 )
-from lecrapaud.db.session import get_db
 from lecrapaud.search_space import all_models
 # Annoying Warnings
@@ -120,6 +119,7 @@ class FeatureSelectionEngine:
             target_id=target.id,
             experiment_id=self.experiment_id,
         )
+        feature_map = {f.name: f.id for f in Feature.get_all(limit=20000)}
         if feature_selection.best_features_path and os.path.exists(
             feature_selection.best_features_path
@@ -156,21 +156,30 @@ class FeatureSelectionEngine:
         if target_type == "classification" and self.X_categorical.shape[1] > 0:
             feat_scores = self.select_categorical_features(percentile=percentile)
-            with get_db() as db:
-                for row in feat_scores.itertuples(index=False):
-                    feature = Feature.find_by(name=row.features, db=db)
-                    FeatureSelectionRank.upsert(
-                        ["feature_selection_id", "feature_id", "method"],
-                        db=db,
-                        score=row.score,
-                        pvalue=row.pvalue,
-                        support=row.support,
-                        rank=row.rank,
-                        method=row.method,
-                        training_time=row.training_time,
-                        feature_selection_id=feature_selection.id,
-                        feature_id=feature.id,
-                    )
+            rows = []
+            for row in feat_scores.itertuples(index=False):
+                feature_id = feature_map.get(row.features)
+                rows.append(
+                    {
+                        "feature_selection_id": feature_selection.id,
+                        "feature_id": feature_id,
+                        "method": row.method,
+                        "score": row.score,
+                        "pvalue": row.pvalue,
+                        "support": row.support,
+                        "rank": row.rank,
+                        "training_time": row.training_time,
+                    }
+                )
+            if len(rows) == 0:
+                logger.warning(
+                    f"No categorical features selected for TARGET_{target_number}"
+                )
+            FeatureSelectionRank.bulk_upsert(rows=rows)
             categorical_features_selected = feat_scores[feat_scores["support"]][
                 "features"
             ].values.tolist()
@@ -238,30 +247,26 @@ class FeatureSelectionEngine:
         logger.info("Inserting feature selection results to db...")
         rows = []
-        with get_db() as db:
-            feature_map = {f.name: f.id for f in Feature.get_all(db=db, limit=20000)}
-            for row in feat_scores.itertuples(index=False):
-                feature_id = feature_map.get(row.features)
-                if not feature_id:
-                    continue  # or raise if feature must exist
-                rows.append(
-                    {
-                        "feature_selection_id": feature_selection.id,
-                        "feature_id": feature_id,
-                        "method": row.method,
-                        "score": row.score,
-                        "pvalue": None if pd.isna(row.pvalue) else row.pvalue,
-                        "support": row.support,
-                        "rank": row.rank,
-                        "training_time": row.training_time,
-                    }
-                )
+        for row in feat_scores.itertuples(index=False):
+            feature_id = feature_map.get(row.features)
+            rows.append(
+                {
+                    "feature_selection_id": feature_selection.id,
+                    "feature_id": feature_id,
+                    "method": row.method,
+                    "score": row.score,
+                    "pvalue": None if pd.isna(row.pvalue) else row.pvalue,
+                    "support": row.support,
+                    "rank": row.rank,
+                    "training_time": row.training_time,
+                }
+            )
-            if len(rows) == 0:
-                raise ValueError(f"No features selected for TARGET_{target_number}")
+        if len(rows) == 0:
+            logger.warning(f"No numerical features selected for TARGET_{target_number}")
-            FeatureSelectionRank.bulk_upsert(rows=rows, db=db)
+        FeatureSelectionRank.bulk_upsert(rows=rows)
         # Merge the results
         logger.info("Merging feature selection methods...")

{lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/model_selection.py RENAMED Viewed

@@ -1537,6 +1537,9 @@ def apply_thresholds(
                     pd.Series(np.where(exceeded, col, -np.inf), index=pred_proba.index)
                 )
+        # For each row:
+        # 1. If any threshold is exceeded, take the class with highest probability among exceeded
+        # 2. If no threshold is exceeded, take the class with highest probability overall
         if class_predictions:
             preds_df = pd.concat(class_predictions, axis=1)
             probs_df = pd.concat(class_probabilities, axis=1)

{lecrapaud-0.9.3 → lecrapaud-0.10.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "lecrapaud"
-version = "0.9.3"
+version = "0.10.0"
 description = "Framework for machine and deep learning, with regression, classification and time series analysis"
 authors = [
     {name = "Pierre H. Gallet"}