PyPI - lecrapaud - Versions diffs - 0.11.2__py3-none-any.whl → 0.11.4__py3-none-any.whl - Mend

lecrapaud 0.11.2py3-none-any.whl → 0.11.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lecrapaud might be problematic. Click here for more details.

Files changed (9) hide show

lecrapaud/api.py +15 -2
lecrapaud/db/alembic/env.py +1 -1
lecrapaud/db/alembic.ini +8 -5
lecrapaud/model_selection.py +116 -31
lecrapaud/utils.py +1 -1
{lecrapaud-0.11.2.dist-info → lecrapaud-0.11.4.dist-info}/METADATA +1 -1
{lecrapaud-0.11.2.dist-info → lecrapaud-0.11.4.dist-info}/RECORD +9 -9
{lecrapaud-0.11.2.dist-info → lecrapaud-0.11.4.dist-info}/LICENSE +0 -0
{lecrapaud-0.11.2.dist-info → lecrapaud-0.11.4.dist-info}/WHEEL +0 -0

lecrapaud/api.py CHANGED Viewed

@@ -27,11 +27,12 @@ Basic Usage:
 import joblib
 import pandas as pd
+import ast
+import os
 import logging
 import seaborn as sns
 import numpy as np
 import matplotlib.pyplot as plt
-from lecrapaud.utils import logger
 from lecrapaud.db.session import init_db
 from lecrapaud.feature_selection import FeatureSelectionEngine, PreprocessModel
 from lecrapaud.model_selection import (
@@ -46,6 +47,8 @@ from lecrapaud.feature_engineering import FeatureEngineeringEngine, PreprocessFe
 from lecrapaud.experiment import create_experiment
 from lecrapaud.db import Experiment
 from lecrapaud.search_space import normalize_models_idx
+from lecrapaud.utils import logger
+from lecrapaud.directories import tmp_dir
 class LeCrapaud:
@@ -108,6 +111,12 @@ class ExperimentEngine:
         if id:
             self.experiment = Experiment.get(id)
             kwargs.update(self.experiment.context)
+            experiment_dir = f"{tmp_dir}/{self.experiment.name}"
+            preprocessing_dir = f"{experiment_dir}/preprocessing"
+            data_dir = f"{experiment_dir}/data"
+            os.makedirs(experiment_dir, exist_ok=True)
+            os.makedirs(preprocessing_dir, exist_ok=True)
+            os.makedirs(data_dir, exist_ok=True)
         else:
             if data is None:
                 raise ValueError("Either id or data must be provided")
@@ -344,9 +353,13 @@ class ExperimentEngine:
         return pd.read_csv(f"{self.experiment.path}/feature_summary.csv")
     def get_threshold(self, target_number: int):
-        return joblib.load(
+        thresholds = joblib.load(
             f"{self.experiment.path}/TARGET_{target_number}/thresholds.pkl"
         )
+        if isinstance(thresholds, str):
+            thresholds = ast.literal_eval(thresholds)
+        return thresholds
     def load_model(self, target_number: int, model_name: str = None):

lecrapaud/db/alembic/env.py CHANGED Viewed

@@ -15,7 +15,7 @@ config.set_main_option("sqlalchemy.url", DATABASE_URL)
 # Interpret the config file for Python logging.
 # This line sets up loggers basically.
 if config.config_file_name is not None:
-    fileConfig(config.config_file_name)
+    fileConfig(config.config_file_name, disable_existing_loggers=False)
 # add your model's MetaData object here
 # for 'autogenerate' support

lecrapaud/db/alembic.ini CHANGED Viewed

@@ -84,11 +84,14 @@ sqlalchemy.url = %(DATABASE_URL)s
 [loggers]
 keys = root,sqlalchemy,alembic
+[loggers_root]
+disable_existing_loggers = False
 [handlers]
 keys = console
 [formatters]
-keys = generic
+keys = lecrapaud_format
 [logger_root]
 level = WARN
@@ -109,8 +112,8 @@ qualname = alembic
 class = StreamHandler
 args = (sys.stderr,)
 level = NOTSET
-formatter = generic
+formatter = lecrapaud_format
-[formatter_generic]
-format = %(levelname)-5.5s [%(name)s] %(message)s
-datefmt = %H:%M:%S
+[formatter_lecrapaud_format]
+format = %(asctime)s - %(name)s - %(levelname)s - %(funcName)s - %(message)s
+datefmt = %Y-%m-%d %H:%M:%S

lecrapaud/model_selection.py CHANGED Viewed

@@ -1328,46 +1328,131 @@ def load_model(target_dir: str):
 # plots
 def plot_evaluation_for_classification(prediction: dict):
     """
-    Args
-        prediction (pd.DataFrame): Should be a df with TARGET, PRED, 0, 1 columns for y_true value (TARGET), y_pred (PRED), and probabilities (for classification only : 0 and 1)
+    Plot evaluation metrics for classification tasks (both binary and multiclass).
+    Args:
+        prediction (pd.DataFrame): Should be a df with:
+            - TARGET: true labels
+            - PRED: predicted labels
+            - For binary: column '1' or 1 for positive class probabilities
+            - For multiclass: columns 2 onwards for class probabilities
     """
     y_true = prediction["TARGET"]
     y_pred = prediction["PRED"]
-    y_pred_proba = prediction[1] if 1 in prediction.columns else prediction["1"]
     # Plot confusion matrix
     plot_confusion_matrix(y_true, y_pred)
-    # Compute ROC curve and ROC area
-    fpr, tpr, thresholds = roc_curve(y_true, y_pred_proba)
-    roc_auc = auc(fpr, tpr)
+    # Determine if binary or multiclass
+    unique_labels = np.unique(y_true)
+    unique_labels = np.sort(unique_labels)
+    n_classes = len(unique_labels)
+    if n_classes <= 2:
+        # Binary classification
+        y_pred_proba = prediction[1] if 1 in prediction.columns else prediction["1"]
+        # Compute and plot ROC curve
+        fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
+        roc_auc = auc(fpr, tpr)
+        plt.figure(figsize=(8, 8))
+        plt.plot(
+            fpr,
+            tpr,
+            color="darkorange",
+            lw=2,
+            label=f"ROC curve (area = {roc_auc:0.2f})",
+        )
+        plt.plot([0, 1], [0, 1], color="navy", lw=2, linestyle="--")
+        plt.xlim([0.0, 1.0])
+        plt.ylim([0.0, 1.05])
+        plt.xlabel("False Positive Rate")
+        plt.ylabel("True Positive Rate")
+        plt.title("ROC Curve")
+        plt.legend(loc="lower right")
+        plt.show()
+        # Compute and plot precision-recall curve
+        precision, recall, _ = precision_recall_curve(y_true, y_pred_proba)
+        average_precision = average_precision_score(y_true, y_pred_proba)
+        plt.figure(figsize=(8, 8))
+        plt.step(recall, precision, color="b", alpha=0.2, where="post")
+        plt.fill_between(recall, precision, step="post", alpha=0.2, color="b")
+        plt.xlabel("Recall")
+        plt.ylabel("Precision")
+        plt.ylim([0.0, 1.05])
+        plt.xlim([0.0, 1.0])
+        plt.title(f"Precision-Recall Curve: AP={average_precision:0.2f}")
+        plt.show()
-    plt.figure(figsize=(8, 8))
-    plt.plot(
-        fpr, tpr, color="darkorange", lw=2, label="ROC curve (area = %0.2f)" % roc_auc
-    )
-    plt.plot([0, 1], [0, 1], color="navy", lw=2, linestyle="--")
-    plt.xlim([0.0, 1.0])
-    plt.ylim([0.0, 1.05])
-    plt.xlabel("False Positive Rate")
-    plt.ylabel("True Positive Rate")
-    plt.title("ROC Curve")
-    plt.legend(loc="lower right")
-    plt.show()
+    else:
+        # Multiclass classification
+        # Get class probabilities
+        pred_cols = [
+            col for col in prediction.columns if col not in ["ID", "TARGET", "PRED"]
+        ]
+        y_pred_proba = prediction[pred_cols].values
-    # Compute precision-recall curve
-    precision, recall, _ = precision_recall_curve(y_true, y_pred_proba)
-    average_precision = average_precision_score(y_true, y_pred_proba)
-    plt.figure(figsize=(8, 8))
-    plt.step(recall, precision, color="b", alpha=0.2, where="post")
-    plt.fill_between(recall, precision, step="post", alpha=0.2, color="b")
-    plt.xlabel("Recall")
-    plt.ylabel("Precision")
-    plt.ylim([0.0, 1.05])
-    plt.xlim([0.0, 1.0])
-    plt.title("Precision-Recall Curve: AP={0:0.2f}".format(average_precision))
-    plt.show()
+        # Compute ROC curve and ROC area for each class
+        fpr = dict()
+        tpr = dict()
+        roc_auc = dict()
+        plt.figure(figsize=(10, 8))
+        colors = plt.cm.get_cmap("tab10")(np.linspace(0, 1, n_classes))
+        for i, (label, color) in enumerate(zip(unique_labels, colors)):
+            y_true_binary = (y_true == label).astype(int)
+            y_score = y_pred_proba[:, i]
+            fpr[i], tpr[i], _ = roc_curve(y_true_binary, y_score)
+            roc_auc[i] = auc(fpr[i], tpr[i])
+            plt.plot(
+                fpr[i],
+                tpr[i],
+                color=color,
+                lw=2,
+                label=f"Class {label} (area = {roc_auc[i]:0.2f})",
+            )
+        plt.plot([0, 1], [0, 1], "k--", lw=2)
+        plt.xlim([0.0, 1.0])
+        plt.ylim([0.0, 1.05])
+        plt.xlabel("False Positive Rate")
+        plt.ylabel("True Positive Rate")
+        plt.title("Multiclass ROC Curves (One-vs-Rest)")
+        plt.legend(loc="lower right")
+        plt.show()
+        # Compute PR curve for each class
+        plt.figure(figsize=(10, 8))
+        for i, (label, color) in enumerate(zip(unique_labels, colors)):
+            y_true_binary = (y_true == label).astype(int)
+            y_score = y_pred_proba[:, i]
+            precision, recall, _ = precision_recall_curve(y_true_binary, y_score)
+            average_precision = average_precision_score(y_true_binary, y_score)
+            plt.step(
+                recall,
+                precision,
+                color=color,
+                alpha=0.8,
+                where="post",
+                label=f"Class {label} (AP = {average_precision:0.2f})",
+            )
+        plt.xlabel("Recall")
+        plt.ylabel("Precision")
+        plt.ylim([0.0, 1.05])
+        plt.xlim([0.0, 1.0])
+        plt.title("Multiclass Precision-Recall Curves")
+        plt.legend(loc="lower left")
+        plt.show()
 def plot_confusion_matrix(y_true, y_pred):

lecrapaud/utils.py CHANGED Viewed

@@ -9,7 +9,7 @@ from ftfy import fix_text
 import unicodedata
 import re
 import string
+import sys
 from lecrapaud.directories import logger_dir
 from lecrapaud.config import LOGGING_LEVEL, PYTHON_ENV, LECRAPAUD_LOCAL

{lecrapaud-0.11.2.dist-info → lecrapaud-0.11.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: lecrapaud
-Version: 0.11.2
+Version: 0.11.4
 Summary: Framework for machine and deep learning, with regression, classification and time series analysis
 License: Apache License
 Author: Pierre H. Gallet

{lecrapaud-0.11.2.dist-info → lecrapaud-0.11.4.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,15 @@
 lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
-lecrapaud/api.py,sha256=M3TlXXQ_2HSlXoIgz-wAXOyz5D2fvWtEWj1Yac1VcEU,16647
+lecrapaud/api.py,sha256=nh1dRcqDpEnyOMjvayUNg_DR1D26gXCQ7hZpsYENqk0,17178
 lecrapaud/config.py,sha256=eYnrktVq457xMIMGcUSilJdNxCsaGP_gRAlzCSwd6Vo,1047
 lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
 lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
-lecrapaud/db/alembic/env.py,sha256=rseEi8oR_eKXYYW3UwOKiCMuDEwT4lxsT7llySOUpgk,2305
+lecrapaud/db/alembic/env.py,sha256=0VdxHNIxhPCgUHnx6EwlVZLUMLlbqZ_eV7i0Ho2XqeI,2337
 lecrapaud/db/alembic/script.py.mako,sha256=MEqL-2qATlST9TAOeYgscMn1uy6HUS9NFvDgl93dMj8,635
 lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py,sha256=MNPyqWaQSHNV8zljD1G9f-LzrVz-nOKlgOhHEE0U8Oo,13060
 lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py,sha256=g6aLRV6jAKXkPUEcs9FAeGfsYpe9rMTxfqbNib3U0-U,809
 lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py,sha256=dl6tfvcqErgJ6NKvjve0euu7l0BWyEAKSS-ychsEAl8,1139
 lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py,sha256=sBgPLvvqI_HmPqQ0Kime1ZL1AHSeuYJHlmFJOnXWeuU,835
-lecrapaud/db/alembic.ini,sha256=zgvur-5jnLsT66_98FaTOTNgjwObGZCE0HqMwRAeJrs,3587
+lecrapaud/db/alembic.ini,sha256=TXrZB4pWVLn2EUg867yp6paA_19vGeirO95mTPA3nbs,3699
 lecrapaud/db/models/__init__.py,sha256=Lhyw9fVLdom0Fc6yIP-ip8FjkU1EwVwjae5q2VM815Q,740
 lecrapaud/db/models/base.py,sha256=CYtof_UjFwX3C7XUifequh_UtLHJ25bU7LCwT501uGE,7508
 lecrapaud/db/models/experiment.py,sha256=IeS-TWPT-4l9xCMIdR2S2O-foXNt3Ru6WmtPMWToK7c,4035
@@ -35,10 +35,10 @@ lecrapaud/misc/tabpfn_tests.ipynb,sha256=VkgsCUJ30d8jaL2VaWtQAgb8ngHPNtPgnXLs7QQ
 lecrapaud/misc/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
 lecrapaud/misc/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
 lecrapaud/misc/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
-lecrapaud/model_selection.py,sha256=PQGEWVWN-4ZeHCqrmXBpHgq1QZi_1nOOeu5gazXGDLQ,60487
+lecrapaud/model_selection.py,sha256=h4WPtGCUeuWIXDJ8L2-i1I7RwrZlnxAresGW5l8bGwE,63195
 lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
-lecrapaud/utils.py,sha256=MUgDoJ31GOF8WRLn_WLzDbHw7OTKxq_ldnZT6dpxdQo,8295
-lecrapaud-0.11.2.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
-lecrapaud-0.11.2.dist-info/METADATA,sha256=Ft9avufHygUCmyQ4Fbkp2k_W5B4HkIY_v-zl3EDX4LI,11017
-lecrapaud-0.11.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-lecrapaud-0.11.2.dist-info/RECORD,,
+lecrapaud/utils.py,sha256=JdBB1NvbNIx4y0Una-kSZdo1_ZEocc5hwyYFIZKHmGg,8305
+lecrapaud-0.11.4.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
+lecrapaud-0.11.4.dist-info/METADATA,sha256=3O_6bcQaCfragLXVyAsZWVpP9lvxEziUo9DqWzRE1r4,11017
+lecrapaud-0.11.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+lecrapaud-0.11.4.dist-info/RECORD,,

{lecrapaud-0.11.2.dist-info → lecrapaud-0.11.4.dist-info}/LICENSE RENAMED Viewed

File without changes

{lecrapaud-0.11.2.dist-info → lecrapaud-0.11.4.dist-info}/WHEEL RENAMED Viewed

File without changes

lecrapaud 0.11.2__py3-none-any.whl → 0.11.4__py3-none-any.whl

Potentially problematic release.

lecrapaud 0.11.2py3-none-any.whl → 0.11.4py3-none-any.whl