PyPI - lecrapaud - Versions diffs - 0.14.2__py3-none-any.whl → 0.14.3__py3-none-any.whl - Mend

lecrapaud 0.14.2py3-none-any.whl → 0.14.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lecrapaud might be problematic. Click here for more details.

Files changed (9) hide show

lecrapaud/api.py +3 -1
lecrapaud/experiment.py +1 -1
lecrapaud/feature_engineering.py +5 -2
lecrapaud/feature_selection.py +7 -0
lecrapaud/model_selection.py +16 -6
{lecrapaud-0.14.2.dist-info → lecrapaud-0.14.3.dist-info}/METADATA +1 -1
{lecrapaud-0.14.2.dist-info → lecrapaud-0.14.3.dist-info}/RECORD +9 -9
{lecrapaud-0.14.2.dist-info → lecrapaud-0.14.3.dist-info}/LICENSE +0 -0
{lecrapaud-0.14.2.dist-info → lecrapaud-0.14.3.dist-info}/WHEEL +0 -0

lecrapaud/api.py CHANGED Viewed

@@ -119,7 +119,9 @@ class ExperimentEngine:
             os.makedirs(data_dir, exist_ok=True)
         else:
             if data is None:
-                raise ValueError("Either id or data must be provided")
+                raise ValueError(
+                    "Either id or data must be provided. Data can be a path to a folder containing trained models"
+                )
             self.experiment = create_experiment(data=data, **kwargs)
         # Set all kwargs as instance attributes

lecrapaud/experiment.py CHANGED Viewed

@@ -35,7 +35,7 @@ def create_experiment(
     groups = {}
     if group_column:
         groups["number_of_groups"] = data[group_column].nunique()
-        groups["list_of_groups"] = data[group_column].unique().tolist()
+        groups["list_of_groups"] = data[group_column].unique().tolist().sort()
     with get_db() as db:
         all_targets = Target.get_all(db=db)

lecrapaud/feature_engineering.py CHANGED Viewed

@@ -48,6 +48,7 @@ import pandas as pd
 import numpy as np
 from itertools import product
 import joblib
+import os
 from sklearn.compose import ColumnTransformer
 from sklearn.decomposition import PCA
@@ -382,9 +383,11 @@ class PreprocessFeature:
         return train, val, test
     def inference(self):
+        data = self.data
         # PCA
-        pcas = joblib.load(f"{self.preprocessing_dir}/pcas.pkl")
-        data, _ = self.add_pca_features(self.data, pcas=pcas)
+        if os.path.exists(f"{self.preprocessing_dir}/pcas.pkl"):
+            pcas = joblib.load(f"{self.preprocessing_dir}/pcas.pkl")
+            data, _ = self.add_pca_features(data, pcas=pcas)
         # Encoding
         transformer = joblib.load(f"{self.preprocessing_dir}/column_transformer.pkl")

lecrapaud/feature_selection.py CHANGED Viewed

@@ -313,6 +313,13 @@ class FeatureSelectionEngine:
             f"Final pre-selection: {len(features)} features below {corr_threshold}% out of {len(features_selected_list)} features, and rejected {len(features_correlated)} features, {100*len(features)/len(features_selected_list):.2f}% features selected"
         )
+        pd.Series(features).to_csv(
+            f"{self.feature_selection_dir}/features.csv",
+            index=True,
+            header=True,
+            index_label="ID",
+        )
         # analysis 2
         features_selected_by_every_methods_uncorrelated = list(
             set(features) & set(features_selected_by_every_methods)

lecrapaud/model_selection.py CHANGED Viewed

@@ -11,6 +11,7 @@ import joblib
 import glob
 from pathlib import Path
 import pickle
+from pydantic import BaseModel
 os.environ["COVERAGE_FILE"] = str(Path(".coverage").resolve())
@@ -1585,9 +1586,20 @@ def plot_confusion_matrix(y_true, y_pred):
     plt.show()
+class Threshold(BaseModel):
+    threshold: float
+    precision: float
+    recall: float
+    f1: float
+class Thresholds(BaseModel):
+    thresholds: dict[str, Threshold]
 def find_best_threshold(
     prediction: pd.DataFrame, metric: str = "recall", target_value: float | None = None
-) -> dict:
+) -> Thresholds:
     """
     General function to find best threshold optimizing recall, precision, or f1.
@@ -1599,7 +1611,7 @@ def find_best_threshold(
     - target_value (float | None): minimum acceptable value for the chosen metric.
     Returns:
-    - dict: {class_label: {'threshold', 'precision', 'recall', 'f1'}}
+    - Thresholds: {class_label: {'threshold', 'precision', 'recall', 'f1'}}
     """
     assert metric in {"recall", "precision", "f1"}, "Invalid metric"
     y_true = prediction["TARGET"]
@@ -1687,14 +1699,14 @@ def find_best_threshold(
 def apply_thresholds(
-    pred_proba: pd.DataFrame, threshold: dict | int | float, classes
+    pred_proba: pd.DataFrame, threshold: Thresholds | float, classes
 ) -> pd.DataFrame:
     """
     Apply thresholds to predicted probabilities.
     Parameters:
     - pred_proba (pd.DataFrame): Probabilities per class.
-    - threshold (float | dict): Global threshold (float) or per-class dict from `find_best_threshold`.
+    - threshold (Thresholds | float): Global threshold (float) or per-class dict from `find_best_threshold`.
     - classes (iterable): List or array of class labels (used for binary classification).
     Returns:
@@ -1817,8 +1829,6 @@ def plot_threshold(prediction, threshold, precision, recall):
 # OLD - to sort out
 def print_model_estimators(target_dir: str, model_name="linear"):
     """
     Look at a specific trained model

{lecrapaud-0.14.2.dist-info → lecrapaud-0.14.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: lecrapaud
-Version: 0.14.2
+Version: 0.14.3
 Summary: Framework for machine and deep learning, with regression, classification and time series analysis
 License: Apache License
 Author: Pierre H. Gallet

{lecrapaud-0.14.2.dist-info → lecrapaud-0.14.3.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
-lecrapaud/api.py,sha256=HDJCYSYJ8md9X6ns17sQZB1yo3_O43G5HE7CZ3ICfyc,19655
+lecrapaud/api.py,sha256=j3aCdg4J6DCgmBoLiOWd_b-ji2wZVeyoFfoXd07rBXQ,19751
 lecrapaud/config.py,sha256=N8kQS1bNEXp6loIw7_X2_OjrbY4_a7UZhZcT1XgsYAs,1121
 lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
 lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
@@ -23,9 +23,9 @@ lecrapaud/db/models/score.py,sha256=fSfXLt6Dm-8Fy9ku0urMT5Fa6zNqn4YqVnEO4o3zKVI,
 lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk,1649
 lecrapaud/db/session.py,sha256=K9dTyXmO-aF_2r9RRHsDsbW9_zLNDaOcchXlpiv7cSo,2719
 lecrapaud/directories.py,sha256=0LrANuDgbuneSLker60c6q2hmGnQ3mKHIztTGzTx6Gw,826
-lecrapaud/experiment.py,sha256=xdqUJkfs3MfX2x5EAAcr8_EgfGLkb4TDk9k8TxS85Zc,2507
-lecrapaud/feature_engineering.py,sha256=MaMy4mkZ9wN7B8sLrSQcwjlMfUEU7ek8kTCYMz_Nc0o,32318
-lecrapaud/feature_selection.py,sha256=u3TWq3G5Xh3geQevGDOZEt_rl_m6-K_CR7SttFtpwKw,43409
+lecrapaud/experiment.py,sha256=u1whBtRFEhjM2OtKwHuUAp6T_7JXZhf_90luUiV4nD8,2514
+lecrapaud/feature_engineering.py,sha256=2V69qHmbsw9PZiZf64PpMqCL5mDMOukLeAaNsGftq9o,32421
+lecrapaud/feature_selection.py,sha256=6ry-oVPQHbipm1XSE5YsH7AY0lQFt4CFbWiHiRs1nxg,43593
 lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
 lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
 lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
@@ -35,10 +35,10 @@ lecrapaud/misc/tabpfn_tests.ipynb,sha256=VkgsCUJ30d8jaL2VaWtQAgb8ngHPNtPgnXLs7QQ
 lecrapaud/misc/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
 lecrapaud/misc/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
 lecrapaud/misc/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
-lecrapaud/model_selection.py,sha256=Sv-VJaYvYjZUV2lq0vIKUxcp9PVrdm4IE18RG3RSvsw,67577
+lecrapaud/model_selection.py,sha256=5Q7KJ-oKDcW5ILlFzk4MjHyywB8HlNM6txCFNofku18,67796
 lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
 lecrapaud/utils.py,sha256=JdBB1NvbNIx4y0Una-kSZdo1_ZEocc5hwyYFIZKHmGg,8305
-lecrapaud-0.14.2.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
-lecrapaud-0.14.2.dist-info/METADATA,sha256=0wdQquOq8MZ4edbtEp4HdLdb89_N-XD4moPXZOjNizY,11047
-lecrapaud-0.14.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-lecrapaud-0.14.2.dist-info/RECORD,,
+lecrapaud-0.14.3.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
+lecrapaud-0.14.3.dist-info/METADATA,sha256=WoPUnqxj35FUK2nddUMy7zAx2DhC_DwPdK8m677Ybtc,11047
+lecrapaud-0.14.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+lecrapaud-0.14.3.dist-info/RECORD,,

{lecrapaud-0.14.2.dist-info → lecrapaud-0.14.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{lecrapaud-0.14.2.dist-info → lecrapaud-0.14.3.dist-info}/WHEEL RENAMED Viewed

File without changes

lecrapaud 0.14.2__py3-none-any.whl → 0.14.3__py3-none-any.whl

Potentially problematic release.

lecrapaud 0.14.2py3-none-any.whl → 0.14.3py3-none-any.whl