PyPI - lecrapaud - Versions diffs - 0.17.0__tar.gz → 0.18.0__tar.gz - Mend

lecrapaud 0.17.0tar.gz → 0.18.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lecrapaud might be problematic. Click here for more details.

Files changed (46) hide show

{lecrapaud-0.17.0 → lecrapaud-0.18.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: lecrapaud
-Version: 0.17.0
+Version: 0.18.0
 Summary: Framework for machine and deep learning, with regression, classification and time series analysis
 License: Apache License
 Author: Pierre H. Gallet

{lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/api.py RENAMED Viewed

@@ -109,58 +109,35 @@ class LeCrapaud:
         Returns:
             dict: Dictionary containing experiment names as keys and their scores as values
         """
-        from lecrapaud.db import SessionLocal
-        from sqlalchemy.orm import joinedload
+        # Get all experiments with the given name pattern
+        experiments = self.list_experiments(name=name)
-        db = SessionLocal()
-        try:
-            # Get all experiments with the given name pattern
-            experiments = (
-                db.query(Experiment)
-                .options(
-                    joinedload(Experiment.model_selections).joinedload(
-                        ModelSelection.scores
-                    )
-                )
-                .filter(Experiment.name.ilike(f"%{name}%"))
-                .all()
-            )
+        if not experiments:
+            return {"error": f"No experiments found with name containing '{name}'"}
-            if not experiments:
-                return {"error": f"No experiments found with name containing '{name}'"}
-            comparison = {}
-            for exp in experiments:
-                scores = {
-                    "rmse": exp.avg_rmse,
-                    "logloss": exp.avg_logloss,
-                    "accuracy": None,
-                    "f1": None,
-                    "roc_auc": None,
-                }
-                # Get classification metrics from the first model selection with scores
-                for model_sel in exp.model_selections:
-                    if model_sel.scores:
-                        for score in model_sel.scores:
-                            if score.type == "validation":  # Use validation scores
-                                if score.accuracy is not None:
-                                    scores["accuracy"] = score.accuracy
-                                if score.f1 is not None:
-                                    scores["f1"] = score.f1
-                                if score.roc_auc is not None:
-                                    scores["roc_auc"] = score.roc_auc
-                                break
-                comparison[exp.name] = scores
-            return comparison
+        comparison = {}
-        except Exception as e:
-            return {"error": f"Error comparing experiment scores: {str(e)}"}
-        finally:
-            db.close()
+        for exp in experiments:
+            for model_sel in exp.experiment.model_selections:
+                if model_sel.best_score:
+                    scores = {
+                        "rmse": model_sel.best_score["rmse"],
+                        "logloss": model_sel.best_score["logloss"],
+                        "accuracy": model_sel.best_score["accuracy"],
+                        "f1": model_sel.best_score["f1"],
+                        "roc_auc": model_sel.best_score["roc_auc"],
+                    }
+                    target_name = model_sel.target.name
+                    comparison[exp.experiment.name][target_name] = scores
+                else:
+                    logger.warning(
+                        f"No best score found for experiment {exp.experiment.name} and target {model_sel.target.name}"
+                    )
+        return comparison
     def list_experiments(
         self, name: str = None, limit: int = 1000
@@ -331,12 +308,12 @@ class ExperimentEngine:
     def feature_engineering(self, data, for_training=True):
         app = FeatureEngineeringEngine(
             data=data,
-            columns_drop=self.columns_drop,
-            columns_boolean=self.columns_boolean,
-            columns_date=self.columns_date,
-            columns_te_groupby=self.columns_te_groupby,
-            columns_te_target=self.columns_te_target,
-            for_training=for_training,
+            columns_drop=getattr(self, "columns_drop", []),
+            columns_boolean=getattr(self, "columns_boolean", []),
+            columns_date=getattr(self, "columns_date", []),
+            columns_te_groupby=getattr(self, "columns_te_groupby", []),
+            columns_te_target=getattr(self, "columns_te_target", []),
+            for_training=getattr(self, "for_training", True),
         )
         data = app.run()
         return data
@@ -344,21 +321,21 @@ class ExperimentEngine:
     def preprocess_feature(self, data, for_training=True):
         app = PreprocessFeature(
             data=data,
-            experiment=self.experiment,
-            time_series=self.time_series,
-            date_column=self.date_column,
-            group_column=self.group_column,
-            val_size=self.val_size,
-            test_size=self.test_size,
-            columns_pca=self.columns_pca,
-            pca_temporal=self.pca_temporal,
-            pca_cross_sectional=self.pca_cross_sectional,
-            columns_onehot=self.columns_onehot,
-            columns_binary=self.columns_binary,
-            columns_frequency=self.columns_frequency,
-            columns_ordinal=self.columns_ordinal,
-            target_numbers=self.target_numbers,
-            target_clf=self.target_clf,
+            experiment=getattr(self, "experiment", None),
+            time_series=getattr(self, "time_series", False),
+            date_column=getattr(self, "date_column", None),
+            group_column=getattr(self, "group_column", None),
+            val_size=getattr(self, "val_size", 0.2),
+            test_size=getattr(self, "test_size", 0.2),
+            columns_pca=getattr(self, "columns_pca", []),
+            pca_temporal=getattr(self, "pca_temporal", []),
+            pca_cross_sectional=getattr(self, "pca_cross_sectional", []),
+            columns_onehot=getattr(self, "columns_onehot", []),
+            columns_binary=getattr(self, "columns_binary", []),
+            columns_ordinal=getattr(self, "columns_ordinal", []),
+            columns_frequency=getattr(self, "columns_frequency", []),
+            target_numbers=getattr(self, "target_numbers", []),
+            target_clf=getattr(self, "target_clf", []),
         )
         if for_training:
             train, val, test = app.run()
@@ -390,14 +367,14 @@ class ExperimentEngine:
             train=train,
             val=val,
             test=test,
-            experiment=self.experiment,
-            target_numbers=self.target_numbers,
-            target_clf=self.target_clf,
-            models_idx=self.models_idx,
-            time_series=self.time_series,
-            max_timesteps=self.max_timesteps,
-            date_column=self.date_column,
-            group_column=self.group_column,
+            experiment=getattr(self, "experiment", None),
+            target_numbers=getattr(self, "target_numbers", []),
+            target_clf=getattr(self, "target_clf", []),
+            models_idx=getattr(self, "models_idx", []),
+            time_series=getattr(self, "time_series", False),
+            max_timesteps=getattr(self, "max_timesteps", 120),
+            date_column=getattr(self, "date_column", None),
+            group_column=getattr(self, "group_column", None),
         )
         if for_training:
             data, reshaped_data = app.run()
@@ -412,13 +389,13 @@ class ExperimentEngine:
                 data=data,
                 reshaped_data=reshaped_data,
                 target_number=target_number,
-                experiment=self.experiment,
-                target_clf=self.target_clf,
-                models_idx=self.models_idx,
-                time_series=self.time_series,
-                date_column=self.date_column,
-                group_column=self.group_column,
-                target_clf_thresholds=self.target_clf_thresholds,
+                experiment=getattr(self, "experiment", None),
+                target_clf=getattr(self, "target_clf", []),
+                models_idx=getattr(self, "models_idx", []),
+                time_series=getattr(self, "time_series", False),
+                date_column=getattr(self, "date_column", None),
+                group_column=getattr(self, "group_column", None),
+                target_clf_thresholds=getattr(self, "target_clf_thresholds", {}),
             )
             if best_params and target_number not in best_params.keys():
                 raise ValueError(

lecrapaud-0.18.0/lecrapaud/db/alembic/versions/2025_08_25_1434-7ed9963e732f_add_best_score_to_model_selection.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""add best_score to model selection
+Revision ID: 7ed9963e732f
+Revises: 72aa496ca65b
+Create Date: 2025-08-25 14:34:58.866912
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+# revision identifiers, used by Alembic.
+revision: str = '7ed9963e732f'
+down_revision: Union[str, None] = '72aa496ca65b'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('lecrapaud_model_selections', sa.Column('best_score', sa.JSON(), nullable=True))
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('lecrapaud_model_selections', 'best_score')
+    # ### end Alembic commands ###

{lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/models/experiment.py RENAMED Viewed

@@ -25,6 +25,7 @@ from lecrapaud.db.models.score import Score
 from lecrapaud.db.models.base import Base, with_db
 from lecrapaud.db.models.utils import create_association_table
+from lecrapaud.utils import logger
 # jointures
 lecrapaud_experiment_target_association = create_association_table(
@@ -241,7 +242,8 @@ class Experiment(Base):
             # This ensures we're comparing apples to apples by normalizing the scores
             experiments = db.query(cls).filter(cls.name.ilike(f"%{name}%")).all()
             if not experiments:
-                raise ValueError(f"No experiments found with the given name: {name}")
+                logger.error(f"No experiments found with the given name: {name}")
+                return None
             # Get all scores
             rmse_scores = [e.avg_rmse for e in experiments if e.avg_rmse is not None]
@@ -250,9 +252,10 @@ class Experiment(Base):
             ]
             if not rmse_scores or not logloss_scores:
-                raise ValueError(
+                logger.error(
                     "No experiments found with both RMSE and LogLoss scores. Maybe try with only one metric."
                 )
+                return None
             # Normalize scores (subtract min and divide by range)
             min_rmse = min(rmse_scores)
@@ -306,80 +309,90 @@ class Experiment(Base):
     def best_score(self, target_number: int) -> dict:
         """
         Returns the scores for the best model of the specified target.
         Args:
             target_number (int): The target number to get scores for
         Returns:
             dict: A dictionary containing the experiment name, target number, and the best model's scores
         """
         # Find the target
         target_name = f"TARGET_{target_number}"
         target = next((t for t in self.targets if t.name == target_name), None)
         if not target:
             return {
-                'experiment_name': self.name,
-                'target_number': target_number,
-                'error': f'Target {target_name} not found in this experiment',
-                'scores': {}
+                "experiment_name": self.name,
+                "target_number": target_number,
+                "error": f"Target {target_name} not found in this experiment",
+                "scores": {},
             }
         # Find the best model selection for this target
         best_model_selection = next(
-            (ms for ms in self.model_selections if ms.target_id == target.id),
-            None
+            (ms for ms in self.model_selections if ms.target_id == target.id), None
         )
         if not best_model_selection or not best_model_selection.model_trainings:
             return {
-                'experiment_name': self.name,
-                'target_number': target_number,
-                'error': 'No model found for this target',
-                'scores': {}
+                "experiment_name": self.name,
+                "target_number": target_number,
+                "error": "No model found for this target",
+                "scores": {},
             }
         # Get the best model training (assuming the first one is the best)
         best_training = best_model_selection.model_trainings[0]
         # Get the validation score for this training
-        validation_scores = [s for s in best_training.score if s.type == 'validation']
+        validation_scores = [s for s in best_training.score if s.type == "validation"]
         if not validation_scores:
             return {
-                'experiment_name': self.name,
-                'target_number': target_number,
-                'error': 'No validation scores found for the best model',
-                'scores': {}
+                "experiment_name": self.name,
+                "target_number": target_number,
+                "error": "No validation scores found for the best model",
+                "scores": {},
             }
         # Get all available metrics from the first validation score
         score = validation_scores[0]
         available_metrics = [
-            'rmse', 'mae', 'r2', 'logloss', 'accuracy',
-            'precision', 'recall', 'f1', 'roc_auc'
+            "rmse",
+            "mae",
+            "r2",
+            "logloss",
+            "accuracy",
+            "precision",
+            "recall",
+            "f1",
+            "roc_auc",
         ]
         scores = {}
         for metric in available_metrics:
             value = getattr(score, metric, None)
             if value is not None:
                 scores[metric] = value
         # Get the model info
         model_info = {
-            'model_type': best_training.model.model_type if best_training.model else 'unknown',
-            'model_name': best_training.model.name if best_training.model else 'unknown',
-            'training_time_seconds': best_training.training_time
+            "model_type": (
+                best_training.model.model_type if best_training.model else "unknown"
+            ),
+            "model_name": (
+                best_training.model.name if best_training.model else "unknown"
+            ),
+            "training_time_seconds": best_training.training_time,
         }
         return {
-            'experiment_name': self.name,
-            'target_number': target_number,
-            'model': model_info,
-            'scores': scores
+            "experiment_name": self.name,
+            "target_number": target_number,
+            "model": model_info,
+            "scores": scores,
         }
     def get_features(self, target_number: int):
         targets = [t for t in self.targets if t.name == f"TARGET_{target_number}"]
         if targets:

{lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/db/models/model_selection.py RENAMED Viewed

@@ -36,6 +36,7 @@ class ModelSelection(Base):
     )
     best_model_params = Column(JSON)
     best_thresholds = Column(JSON)
+    best_score = Column(JSON)
     best_model_path = Column(String(255))
     best_model_id = Column(
         BigInteger,

{lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/feature_engineering.py RENAMED Viewed

@@ -319,8 +319,8 @@ class PreprocessFeature:
         val_size: float = 0.2,
         test_size: float = 0.2,
         columns_pca: list[str] = [],
-        pca_temporal: dict[str, list[str]] = {},
-        pca_cross_sectional: dict[str, list[str]] = {},
+        pca_temporal: list[dict[str, list[str]]] = [],
+        pca_cross_sectional: list[dict[str, list[str]]] = [],
         columns_onehot: list[str] = [],
         columns_binary: list[str] = [],
         columns_ordinal: list[str] = [],

{lecrapaud-0.17.0 → lecrapaud-0.18.0}/lecrapaud/model_selection.py RENAMED Viewed

@@ -1093,6 +1093,7 @@ class ModelSelectionEngine:
             best_model_params = json.load(f)[best_model_name]
         # Save model_selection results to db
         model_selection = ModelSelection.get(model_selection.id)
         model_selection.best_model_id = Model.find_by(
             name=best_score_overall["MODEL_NAME"], type=self.target_type
@@ -1100,6 +1101,17 @@ class ModelSelectionEngine:
         model_selection.best_model_params = best_model_params
         model_selection.best_thresholds = best_thresholds
         model_selection.best_model_path = best_model_path
+        drop_cols = [
+            "DATE",
+            "MODEL_NAME",
+            "MODEL_PATH",
+        ]
+        best_score_overall = {
+            k: v for k, v in best_score_overall.items() if k not in drop_cols
+        }
+        score_data = {k.lower(): v for k, v in best_score_overall.items()}
+        model_selection.best_score = score_data
         model_selection.save()
         logger.info(f"Best model overall is : {best_score_overall}")

{lecrapaud-0.17.0 → lecrapaud-0.18.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "lecrapaud"
-version = "0.17.0"
+version = "0.18.0"
 description = "Framework for machine and deep learning, with regression, classification and time series analysis"
 authors = [
     {name = "Pierre H. Gallet"}