PyPI - lecrapaud - Versions diffs - 0.14.2__tar.gz → 0.14.3__tar.gz - Mend

lecrapaud 0.14.2tar.gz → 0.14.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lecrapaud might be problematic. Click here for more details.

Files changed (44) hide show

{lecrapaud-0.14.2 → lecrapaud-0.14.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: lecrapaud
-Version: 0.14.2
+Version: 0.14.3
 Summary: Framework for machine and deep learning, with regression, classification and time series analysis
 License: Apache License
 Author: Pierre H. Gallet

{lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/api.py RENAMED Viewed

@@ -119,7 +119,9 @@ class ExperimentEngine:
             os.makedirs(data_dir, exist_ok=True)
         else:
             if data is None:
-                raise ValueError("Either id or data must be provided")
+                raise ValueError(
+                    "Either id or data must be provided. Data can be a path to a folder containing trained models"
+                )
             self.experiment = create_experiment(data=data, **kwargs)
         # Set all kwargs as instance attributes

{lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/experiment.py RENAMED Viewed

@@ -35,7 +35,7 @@ def create_experiment(
     groups = {}
     if group_column:
         groups["number_of_groups"] = data[group_column].nunique()
-        groups["list_of_groups"] = data[group_column].unique().tolist()
+        groups["list_of_groups"] = data[group_column].unique().tolist().sort()
     with get_db() as db:
         all_targets = Target.get_all(db=db)

{lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/feature_engineering.py RENAMED Viewed

@@ -48,6 +48,7 @@ import pandas as pd
 import numpy as np
 from itertools import product
 import joblib
+import os
 from sklearn.compose import ColumnTransformer
 from sklearn.decomposition import PCA
@@ -382,9 +383,11 @@ class PreprocessFeature:
         return train, val, test
     def inference(self):
+        data = self.data
         # PCA
-        pcas = joblib.load(f"{self.preprocessing_dir}/pcas.pkl")
-        data, _ = self.add_pca_features(self.data, pcas=pcas)
+        if os.path.exists(f"{self.preprocessing_dir}/pcas.pkl"):
+            pcas = joblib.load(f"{self.preprocessing_dir}/pcas.pkl")
+            data, _ = self.add_pca_features(data, pcas=pcas)
         # Encoding
         transformer = joblib.load(f"{self.preprocessing_dir}/column_transformer.pkl")

{lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/feature_selection.py RENAMED Viewed

@@ -313,6 +313,13 @@ class FeatureSelectionEngine:
             f"Final pre-selection: {len(features)} features below {corr_threshold}% out of {len(features_selected_list)} features, and rejected {len(features_correlated)} features, {100*len(features)/len(features_selected_list):.2f}% features selected"
         )
+        pd.Series(features).to_csv(
+            f"{self.feature_selection_dir}/features.csv",
+            index=True,
+            header=True,
+            index_label="ID",
+        )
         # analysis 2
         features_selected_by_every_methods_uncorrelated = list(
             set(features) & set(features_selected_by_every_methods)

{lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/model_selection.py RENAMED Viewed

@@ -11,6 +11,7 @@ import joblib
 import glob
 from pathlib import Path
 import pickle
+from pydantic import BaseModel
 os.environ["COVERAGE_FILE"] = str(Path(".coverage").resolve())
@@ -1585,9 +1586,20 @@ def plot_confusion_matrix(y_true, y_pred):
     plt.show()
+class Threshold(BaseModel):
+    threshold: float
+    precision: float
+    recall: float
+    f1: float
+class Thresholds(BaseModel):
+    thresholds: dict[str, Threshold]
 def find_best_threshold(
     prediction: pd.DataFrame, metric: str = "recall", target_value: float | None = None
-) -> dict:
+) -> Thresholds:
     """
     General function to find best threshold optimizing recall, precision, or f1.
@@ -1599,7 +1611,7 @@ def find_best_threshold(
     - target_value (float | None): minimum acceptable value for the chosen metric.
     Returns:
-    - dict: {class_label: {'threshold', 'precision', 'recall', 'f1'}}
+    - Thresholds: {class_label: {'threshold', 'precision', 'recall', 'f1'}}
     """
     assert metric in {"recall", "precision", "f1"}, "Invalid metric"
     y_true = prediction["TARGET"]
@@ -1687,14 +1699,14 @@ def find_best_threshold(
 def apply_thresholds(
-    pred_proba: pd.DataFrame, threshold: dict | int | float, classes
+    pred_proba: pd.DataFrame, threshold: Thresholds | float, classes
 ) -> pd.DataFrame:
     """
     Apply thresholds to predicted probabilities.
     Parameters:
     - pred_proba (pd.DataFrame): Probabilities per class.
-    - threshold (float | dict): Global threshold (float) or per-class dict from `find_best_threshold`.
+    - threshold (Thresholds | float): Global threshold (float) or per-class dict from `find_best_threshold`.
     - classes (iterable): List or array of class labels (used for binary classification).
     Returns:
@@ -1817,8 +1829,6 @@ def plot_threshold(prediction, threshold, precision, recall):
 # OLD - to sort out
 def print_model_estimators(target_dir: str, model_name="linear"):
     """
     Look at a specific trained model

{lecrapaud-0.14.2 → lecrapaud-0.14.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "lecrapaud"
-version = "0.14.2"
+version = "0.14.3"
 description = "Framework for machine and deep learning, with regression, classification and time series analysis"
 authors = [
     {name = "Pierre H. Gallet"}