PyPI - lecrapaud - Versions diffs - 0.14.7__tar.gz → 0.15.0__tar.gz - Mend

lecrapaud 0.14.7tar.gz → 0.15.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lecrapaud might be problematic. Click here for more details.

Files changed (46) hide show

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: lecrapaud
-Version: 0.14.7
+Version: 0.15.0
 Summary: Framework for machine and deep learning, with regression, classification and time series analysis
 License: Apache License
 Author: Pierre H. Gallet
@@ -19,6 +19,7 @@ Requires-Dist: mlxtend (>=0.23.4)
 Requires-Dist: numpy (>=2.1.3)
 Requires-Dist: openai (>=1.88.0)
 Requires-Dist: pandas (>=2.3.0)
+Requires-Dist: pydantic (>=2.9.2)
 Requires-Dist: python-dotenv (>=1.1.0)
 Requires-Dist: scikit-learn (>=1.6.1)
 Requires-Dist: scipy (<1.14.0)

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/api.py RENAMED Viewed

@@ -88,6 +88,14 @@ class LeCrapaud:
         """
         return ExperimentEngine(id=id, **kwargs)
+    def get_last_experiement_by_name(self, name: str, **kwargs) -> "ExperimentEngine":
+        """Retrieve the last experiment by name."""
+        return ExperimentEngine(id=Experiment.get_last_by_name(name).id, **kwargs)
+    def get_best_experiment_by_name(self, name: str,metric: str = "both", **kwargs) -> "ExperimentEngine":
+        """Retrieve the best experiment by score."""
+        return ExperimentEngine(id=Experiment.get_best_by_score(name=name, metric=metric).id, **kwargs)
     def list_experiments(self, limit=1000) -> list["ExperimentEngine"]:
         """List all experiments in the database."""
         return [ExperimentEngine(id=exp.id) for exp in Experiment.get_all(limit=limit)]

lecrapaud-0.15.0/lecrapaud/db/models/experiment.py ADDED Viewed

@@ -0,0 +1,301 @@
+from itertools import chain
+import joblib
+from sqlalchemy import (
+    Column,
+    Integer,
+    String,
+    DateTime,
+    Float,
+    JSON,
+    Table,
+    ForeignKey,
+    BigInteger,
+    TIMESTAMP,
+    UniqueConstraint,
+    func,
+)
+from sqlalchemy.orm import relationship, aliased
+from sqlalchemy.ext.hybrid import hybrid_property
+from sqlalchemy import func
+from statistics import fmean as mean
+from lecrapaud.db.models.model_selection import ModelSelection
+from lecrapaud.db.models.model_training import ModelTraining
+from lecrapaud.db.models.score import Score
+from lecrapaud.db.models.base import Base, with_db
+from lecrapaud.db.models.utils import create_association_table
+# jointures
+lecrapaud_experiment_target_association = create_association_table(
+    name="experiment_target_association",
+    table1="experiments",
+    column1="experiment",
+    table2="targets",
+    column2="target"
+)
+class Experiment(Base):
+    id = Column(BigInteger, primary_key=True, index=True, autoincrement=True)
+    created_at = Column(
+        TIMESTAMP(timezone=True), server_default=func.now(), nullable=False
+    )
+    updated_at = Column(
+        TIMESTAMP(timezone=True),
+        server_default=func.now(),
+        onupdate=func.now(),
+        nullable=False,
+    )
+    name = Column(String(50), nullable=False)
+    path = Column(String(255))  # we do not have this at creation time
+    type = Column(String(50), nullable=False)
+    size = Column(Integer, nullable=False)
+    train_size = Column(Integer)
+    val_size = Column(Integer)
+    # Relationships
+    model_selections = relationship(
+        "ModelSelection",
+        back_populates="experiment",
+        cascade="all, delete-orphan",
+        lazy="selectin",
+    )
+    @hybrid_property
+    def best_rmse(self):
+        """Best RMSE score across all model selections and trainings."""
+        # Get the minimum RMSE for each model selection
+        min_scores = [
+            min(
+                score.rmse
+                for mt in ms.model_trainings
+                for score in mt.score
+                if score.rmse is not None
+            )
+            for ms in self.model_selections
+            if any(
+                score.rmse is not None
+                for mt in ms.model_trainings
+                for score in mt.score
+            )
+        ]
+        return min(min_scores) if min_scores else None
+    @hybrid_property
+    def best_logloss(self):
+        """Best LogLoss score across all model selections and trainings."""
+        # Get the minimum LogLoss for each model selection
+        min_scores = [
+            min(
+                score.logloss
+                for mt in ms.model_trainings
+                for score in mt.score
+                if score.logloss is not None
+            )
+            for ms in self.model_selections
+            if any(
+                score.logloss is not None
+                for mt in ms.model_trainings
+                for score in mt.score
+            )
+        ]
+        return min(min_scores) if min_scores else None
+    @hybrid_property
+    def avg_rmse(self):
+        """Average RMSE score across all model selections and trainings."""
+        # Get the minimum RMSE for each model selection
+        min_scores = [
+            min(
+                score.rmse
+                for mt in ms.model_trainings
+                for score in mt.score
+                if score.rmse is not None
+            )
+            for ms in self.model_selections
+            if any(
+                score.rmse is not None
+                for mt in ms.model_trainings
+                for score in mt.score
+            )
+        ]
+        return mean(min_scores) if min_scores else None
+    @hybrid_property
+    def avg_logloss(self):
+        """Average LogLoss score across all model selections and trainings."""
+        # Get the minimum LogLoss for each model selection
+        min_scores = [
+            min(
+                score.logloss
+                for mt in ms.model_trainings
+                for score in mt.score
+                if score.logloss is not None
+            )
+            for ms in self.model_selections
+            if any(
+                score.logloss is not None
+                for mt in ms.model_trainings
+                for score in mt.score
+            )
+        ]
+        return mean(min_scores) if min_scores else None
+    test_size = Column(Integer)
+    corr_threshold = Column(Float, nullable=False)
+    max_features = Column(Integer, nullable=False)
+    percentile = Column(Float, nullable=False)
+    number_of_groups = Column(Integer)
+    list_of_groups = Column(JSON)
+    start_date = Column(DateTime)
+    end_date = Column(DateTime)
+    train_start_date = Column(DateTime)
+    train_end_date = Column(DateTime)
+    val_start_date = Column(DateTime)
+    val_end_date = Column(DateTime)
+    test_start_date = Column(DateTime)
+    test_end_date = Column(DateTime)
+    context = Column(JSON)
+    feature_selections = relationship(
+        "FeatureSelection",
+        back_populates="experiment",
+        cascade="all, delete-orphan",
+        lazy="selectin",
+    )
+    targets = relationship(
+        "Target",
+        secondary=lecrapaud_experiment_target_association,
+        back_populates="experiments",
+        lazy="selectin",
+    )
+    __table_args__ = (
+        UniqueConstraint(
+            "name",
+            name="uq_experiments_composite",
+        ),
+    )
+    @classmethod
+    @with_db
+    def get_last_by_name(cls, name: str, db=None):
+        """
+        Find the most recently created experiment that contains the given name string.
+        Args:
+            session: SQLAlchemy session
+            name (str): String to search for in experiment names
+        Returns:
+            Experiment or None: The most recent matching experiment or None if not found
+        """
+        return (
+            db.query(cls)
+            .filter(cls.name.ilike(f'%{name}%'))
+            .order_by(cls.created_at.desc())
+            .first()
+        )
+    @classmethod
+    @with_db
+    def get_best_by_score(cls, name: str, metric='both', db=None):
+        """
+        Find the experiment with the best score based on average RMSE, LogLoss, or both.
+        Args:
+            metric (str): 'rmse', 'logloss', or 'both' to determine which score to optimize
+            db: SQLAlchemy session
+        Returns:
+            Experiment or None: The experiment with the best score or None if not found
+        """
+        if metric == 'both':
+            # Calculate a combined score: average of normalized RMSE and LogLoss
+            # This ensures we're comparing apples to apples by normalizing the scores
+            experiments = db.query(cls).filter(cls.name.ilike(f'%{name}%')).all()
+            if not experiments:
+                return None
+            # Get all scores
+            rmse_scores = [e.avg_rmse for e in experiments if e.avg_rmse is not None]
+            logloss_scores = [e.avg_logloss for e in experiments if e.avg_logloss is not None]
+            if not rmse_scores or not logloss_scores:
+                return None
+            # Normalize scores (subtract min and divide by range)
+            min_rmse = min(rmse_scores)
+            range_rmse = max(rmse_scores) - min_rmse
+            min_logloss = min(logloss_scores)
+            range_logloss = max(logloss_scores) - min_logloss
+            # Calculate combined score for each experiment
+            experiment_scores = []
+            for experiment in experiments:
+                if experiment.avg_rmse is None or experiment.avg_logloss is None:
+                    continue
+                # Normalize both scores
+                norm_rmse = (experiment.avg_rmse - min_rmse) / range_rmse
+                norm_logloss = (experiment.avg_logloss - min_logloss) / range_logloss
+                # Calculate combined score (average of normalized scores)
+                combined_score = (norm_rmse + norm_logloss) / 2
+                experiment_scores.append((experiment, combined_score))
+            # Sort by combined score (ascending since lower is better)
+            experiment_scores.sort(key=lambda x: x[1])
+            return experiment_scores[0][0] if experiment_scores else None
+        # For single metric case (rmse or logloss)
+        score_property = cls.avg_rmse if metric == 'rmse' else cls.avg_logloss
+        return (
+            db.query(cls)
+            .filter(cls.name.ilike(f'%{name}%'), score_property.isnot(None))  # Only consider experiments with scores
+            .order_by(score_property)
+            .first()
+        )
+    def get_features(self, target_number: int):
+        targets = [t for t in self.targets if t.name == f"TARGET_{target_number}"]
+        if targets:
+            target_id = targets[0].id
+            feature_selection = [
+                fs for fs in self.feature_selections if fs.target_id == target_id
+            ]
+            if feature_selection:
+                feature_selection = feature_selection[0]
+                features = [f.name for f in feature_selection.features]
+                return features
+        # fallback to path if no features found
+        features = joblib.load(f"{self.path}/TARGET_{target_number}/features.pkl")
+        return features
+    def get_all_features(self, date_column: str = None, group_column: str = None):
+        target_idx = [target.id for target in self.targets]
+        _all_features = chain.from_iterable(
+            [f.name for f in fs.features]
+            for fs in self.feature_selections
+            if fs.target_id in target_idx
+        )
+        _all_features = list(_all_features)
+        # fallback to path if no features found
+        if len(_all_features) == 0:
+            _all_features = joblib.load(f"{self.path}/preprocessing/all_features.pkl")
+        all_features = []
+        if date_column:
+            all_features.append(date_column)
+        if group_column:
+            all_features.append(group_column)
+        all_features += _all_features
+        all_features = list(dict.fromkeys(all_features))
+        return all_features

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/feature_engineering.py RENAMED Viewed

@@ -658,7 +658,7 @@ class PreprocessFeature:
         X_transformed = pd.DataFrame(transformed, columns=column_names, index=df.index)
         # Try to convert columns to best possible dtypes
-        X_transformed = X_transformed.convert_ups()
+        X_transformed = X_transformed.convert_dtypes()
         # Insert features in db
         if save_in_db:

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "lecrapaud"
-version = "0.14.7"
+version = "0.15.0"
 description = "Framework for machine and deep learning, with regression, classification and time series analysis"
 authors = [
     {name = "Pierre H. Gallet"}
@@ -20,6 +20,7 @@ dependencies = [
     "numpy>=2.1.3",
     "openai>=1.88.0",
     "pandas>=2.3.0",
+    "pydantic>=2.9.2",
     "python-dotenv>=1.1.0",
     "scikit-learn>=1.6.1",
     "scipy<1.14.0",

lecrapaud-0.14.7/lecrapaud/db/models/experiment.py DELETED Viewed

@@ -1,130 +0,0 @@
-from itertools import chain
-import joblib
-from sqlalchemy import (
-    Column,
-    Integer,
-    String,
-    DateTime,
-    Float,
-    JSON,
-    Table,
-    ForeignKey,
-    BigInteger,
-    TIMESTAMP,
-    UniqueConstraint,
-    func,
-)
-from sqlalchemy.orm import relationship
-from lecrapaud.db.models.base import Base
-from lecrapaud.db.models.utils import create_association_table
-# jointures
-lecrapaud_experiment_target_association = create_association_table(
-    name="experiment_target_association",
-    table1="experiments",
-    column1="experiment",
-    table2="targets",
-    column2="target"
-)
-class Experiment(Base):
-    id = Column(BigInteger, primary_key=True, index=True, autoincrement=True)
-    created_at = Column(
-        TIMESTAMP(timezone=True), server_default=func.now(), nullable=False
-    )
-    updated_at = Column(
-        TIMESTAMP(timezone=True),
-        server_default=func.now(),
-        onupdate=func.now(),
-        nullable=False,
-    )
-    name = Column(String(50), nullable=False)
-    path = Column(String(255))  # we do not have this at creation time
-    type = Column(String(50), nullable=False)
-    size = Column(Integer, nullable=False)
-    train_size = Column(Integer)
-    val_size = Column(Integer)
-    test_size = Column(Integer)
-    corr_threshold = Column(Float, nullable=False)
-    max_features = Column(Integer, nullable=False)
-    percentile = Column(Float, nullable=False)
-    number_of_groups = Column(Integer)
-    list_of_groups = Column(JSON)
-    start_date = Column(DateTime)
-    end_date = Column(DateTime)
-    train_start_date = Column(DateTime)
-    train_end_date = Column(DateTime)
-    val_start_date = Column(DateTime)
-    val_end_date = Column(DateTime)
-    test_start_date = Column(DateTime)
-    test_end_date = Column(DateTime)
-    context = Column(JSON)
-    feature_selections = relationship(
-        "FeatureSelection",
-        back_populates="experiment",
-        cascade="all, delete-orphan",
-        lazy="selectin",
-    )
-    model_selections = relationship(
-        "ModelSelection",
-        back_populates="experiment",
-        cascade="all, delete-orphan",
-        lazy="selectin",
-    )
-    targets = relationship(
-        "Target",
-        secondary=lecrapaud_experiment_target_association,
-        back_populates="experiments",
-        lazy="selectin",
-    )
-    __table_args__ = (
-        UniqueConstraint(
-            "name",
-            name="uq_experiments_composite",
-        ),
-    )
-    def get_features(self, target_number: int):
-        targets = [t for t in self.targets if t.name == f"TARGET_{target_number}"]
-        if targets:
-            target_id = targets[0].id
-            feature_selection = [
-                fs for fs in self.feature_selections if fs.target_id == target_id
-            ]
-            if feature_selection:
-                feature_selection = feature_selection[0]
-                features = [f.name for f in feature_selection.features]
-                return features
-        # fallback to path if no features found
-        features = joblib.load(f"{self.path}/TARGET_{target_number}/features.pkl")
-        return features
-    def get_all_features(self, date_column: str = None, group_column: str = None):
-        target_idx = [target.id for target in self.targets]
-        _all_features = chain.from_iterable(
-            [f.name for f in fs.features]
-            for fs in self.feature_selections
-            if fs.target_id in target_idx
-        )
-        _all_features = list(_all_features)
-        # fallback to path if no features found
-        if len(_all_features) == 0:
-            _all_features = joblib.load(f"{self.path}/preprocessing/all_features.pkl")
-        all_features = []
-        if date_column:
-            all_features.append(date_column)
-        if group_column:
-            all_features.append(group_column)
-        all_features += _all_features
-        all_features = list(dict.fromkeys(all_features))
-        return all_features

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/LICENSE RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/README.md RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/__init__.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/config.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/__init__.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/alembic/README RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/alembic/env.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/alembic/script.py.mako RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/alembic.ini RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/models/__init__.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/models/base.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/models/feature.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/models/feature_selection.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/models/feature_selection_rank.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/models/model.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/models/model_selection.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/models/model_training.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/models/score.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/models/target.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/models/utils.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/db/session.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/directories.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/experiment.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/feature_selection.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/integrations/openai_integration.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/jobs/__init__.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/jobs/config.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/jobs/scheduler.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/jobs/tasks.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/misc/tabpfn_tests.ipynb RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/misc/test-gpu-bilstm.ipynb RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/misc/test-gpu-resnet.ipynb RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/misc/test-gpu-transformers.ipynb RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/model_selection.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/search_space.py RENAMED Viewed

File without changes

{lecrapaud-0.14.7 → lecrapaud-0.15.0}/lecrapaud/utils.py RENAMED Viewed

File without changes

lecrapaud 0.14.7__tar.gz → 0.15.0__tar.gz

Potentially problematic release.

lecrapaud 0.14.7tar.gz → 0.15.0tar.gz