PyPI - lecrapaud - Versions diffs - 0.2.0__tar.gz → 0.3.0__tar.gz - Mend

lecrapaud 0.2.0tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lecrapaud might be problematic. Click here for more details.

Files changed (64) hide show

{lecrapaud-0.2.0 → lecrapaud-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: lecrapaud
-Version: 0.2.0
+Version: 0.3.0
 Summary: Framework for machine and deep learning, with regression, classification and time series analysis
 License: Apache License
 Author: Pierre H. Gallet
@@ -49,7 +49,7 @@ Description-Content-Type: text/markdown
 <div align="center">
-# 🐸
+<img src="https://s3.amazonaws.com/pix.iemoji.com/images/emoji/apple/ios-12/256/frog-face.png" width=120 alt="crapaud"/>
 ## Welcome to LeCrapaud
@@ -72,34 +72,21 @@ LeCrapaud is a high-level Python library for end-to-end machine learning workflo
 ## ⚡ Quick Start
-1. Create environment
-```sh
-$ pip install virtualenv
-$ python -m venv .venv
-$ source .venv/bin/activate
-```
-2. Install dependencies
+### Install the package
 ```sh
-$ make install
+pip install lecrapaud
 ```
-3. Deactivate virtualenv (if needed)
-```sh
-$ deactivate
-```
-## 🛠️ How it works
+### How it works
 This package provides a high-level API to manage experiments for feature engineering, model selection, and prediction on tabular data (e.g. stock data).
 ### Typical workflow
 ```python
-from lecrapaud.api import LeCrapaud
+from lecrapaud import LeCrapaud
 # 1. Create the main app
 app = LeCrapaud()
@@ -159,6 +146,26 @@ $ git push -u origin master
 3. Use conventional commits
 https://www.conventionalcommits.org/en/v1.0.0/#summary
+4. Create environment
+```sh
+$ pip install virtualenv
+$ python -m venv .venv
+$ source .venv/bin/activate
+```
+5. Install dependencies
+```sh
+$ make install
+```
+6. Deactivate virtualenv (if needed)
+```sh
+$ deactivate
+```
 ---
-Pierre Gallet © 2024
+Pierre Gallet © 2025

{lecrapaud-0.2.0 → lecrapaud-0.3.0}/README.md RENAMED Viewed

@@ -1,6 +1,6 @@
 <div align="center">
-# 🐸
+<img src="https://s3.amazonaws.com/pix.iemoji.com/images/emoji/apple/ios-12/256/frog-face.png" width=120 alt="crapaud"/>
 ## Welcome to LeCrapaud
@@ -23,34 +23,21 @@ LeCrapaud is a high-level Python library for end-to-end machine learning workflo
 ## ⚡ Quick Start
-1. Create environment
-```sh
-$ pip install virtualenv
-$ python -m venv .venv
-$ source .venv/bin/activate
-```
-2. Install dependencies
+### Install the package
 ```sh
-$ make install
+pip install lecrapaud
 ```
-3. Deactivate virtualenv (if needed)
-```sh
-$ deactivate
-```
-## 🛠️ How it works
+### How it works
 This package provides a high-level API to manage experiments for feature engineering, model selection, and prediction on tabular data (e.g. stock data).
 ### Typical workflow
 ```python
-from lecrapaud.api import LeCrapaud
+from lecrapaud import LeCrapaud
 # 1. Create the main app
 app = LeCrapaud()
@@ -110,6 +97,26 @@ $ git push -u origin master
 3. Use conventional commits
 https://www.conventionalcommits.org/en/v1.0.0/#summary
+4. Create environment
+```sh
+$ pip install virtualenv
+$ python -m venv .venv
+$ source .venv/bin/activate
+```
+5. Install dependencies
+```sh
+$ make install
+```
+6. Deactivate virtualenv (if needed)
+```sh
+$ deactivate
+```
 ---
-Pierre Gallet © 2024
+Pierre Gallet © 2025

{lecrapaud-0.2.0 → lecrapaud-0.3.0}/lecrapaud/api.py RENAMED Viewed

@@ -32,6 +32,8 @@ experiment.model_selection(data) : return best_model
 import joblib
 import pandas as pd
+import logging
+from lecrapaud.utils import logger
 from lecrapaud.db.session import init_db
 from lecrapaud.feature_selection import FeatureSelectionEngine, PreprocessModel
 from lecrapaud.model_selection import ModelSelectionEngine, ModelEngine
@@ -103,7 +105,12 @@ class Experiment:
         std_data, reshaped_data = self.preprocess_model(train, val, test)
         self.model_selection(std_data, reshaped_data)
-    def predict(self, new_data):
+    def predict(self, new_data, verbose: int = 0):
+        if verbose == 0:
+            logger.setLevel(logging.WARNING)
+        logger.warning("Running prediction...")
         data = self.feature_engineering(
             data=new_data,
             for_training=False,
@@ -127,7 +134,6 @@ class Experiment:
             else:
                 features = self.dataset.get_features(target_number)
             model = ModelEngine(path=training_target_dir)
-            model.load()
             # getting data
             if model.recurrent:

lecrapaud-0.3.0/lecrapaud/db/alembic/versions/2025_05_31_1834-52b809a34371_make_nullablee.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""make_nullablee
+Revision ID: 52b809a34371
+Revises: 339927587383
+Create Date: 2025-05-31 18:34:58.962966
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+# revision identifiers, used by Alembic.
+revision: str = "52b809a34371"
+down_revision: Union[str, None] = "339927587383"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.alter_column(
+        "investment_runs",
+        "initial_portfolio",
+        existing_type=mysql.JSON(),
+        nullable=True,
+    )
+    op.create_foreign_key(
+        None,
+        "portfolios",
+        "investment_runs",
+        ["investment_run_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_constraint(None, "portfolios", type_="foreignkey")
+    op.alter_column(
+        "investment_runs",
+        "initial_portfolio",
+        existing_type=mysql.JSON(),
+        nullable=False,
+    )
+    # ### end Alembic commands ###

{lecrapaud-0.2.0 → lecrapaud-0.3.0}/lecrapaud/db/session.py RENAMED Viewed

@@ -4,6 +4,9 @@ from contextlib import contextmanager
 from sqlalchemy import create_engine, text
 from sqlalchemy.orm import sessionmaker
 from urllib.parse import urlparse
+from alembic.config import Config
+from alembic import command
+import os
 from lecrapaud.config import DB_USER, DB_PASSWORD, DB_HOST, DB_PORT, DB_NAME, DB_URI
@@ -39,6 +42,14 @@ def init_db(uri: str = None):
     # Step 4: Create session factory
     _SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=_engine)
+    # Step 5: Apply Alembic migrations programmatically
+    project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))
+    alembic_cfg_path = os.path.join(project_root, "alembic.ini")
+    alembic_cfg = Config(alembic_cfg_path)
+    alembic_cfg.set_main_option("sqlalchemy.url", uri or os.getenv("DATABASE_URL"))
+    command.upgrade(alembic_cfg, "head")
 # Dependency to get a session instance
 @contextmanager

{lecrapaud-0.2.0 → lecrapaud-0.3.0}/lecrapaud/experiment.py RENAMED Viewed

@@ -5,7 +5,7 @@ from pathlib import Path
 os.environ["COVERAGE_FILE"] = str(Path(".coverage").resolve())
 # Internal
-from lecrapaud.directory_management import tmp_dir
+from lecrapaud.directories import tmp_dir
 from lecrapaud.utils import logger
 from lecrapaud.config import PYTHON_ENV
 from lecrapaud.db import (

{lecrapaud-0.2.0 → lecrapaud-0.3.0}/lecrapaud/feature_engineering.py RENAMED Viewed

@@ -101,7 +101,7 @@ class FeatureEngineeringEngine:
     def run(self) -> pd.DataFrame:
         # drop columns
-        self.data = self.data.drop(columns=self.columns_drop)
+        self.data = self.data.drop(columns=self.columns_drop, errors="ignore")
         # convert object columns to numeric if possible
         self.data = convert_object_columns_that_are_numeric(self.data)
@@ -324,6 +324,8 @@ class PreprocessFeature:
         **kwargs,
     ):
         self.data = data
+        self.data.columns = self.data.columns.str.upper()
         self.dataset = dataset
         self.columns_pca = columns_pca
         self.columns_onehot = columns_onehot
@@ -350,7 +352,7 @@ class PreprocessFeature:
             self.train_val_test_split_time_series()
             if self.time_series
             else self.train_val_test_split(
-                stratify_col=f"target_{self.target_numbers[0]}"
+                stratify_col=f"TARGET_{self.target_numbers[0]}"
             )
         )  # TODO: only stratifying first target for now
@@ -359,8 +361,7 @@ class PreprocessFeature:
         val, _ = self.add_pca_features(test, pcas=pcas)
         test, _ = self.add_pca_features(val, pcas=pcas)
-        if PYTHON_ENV != "Test":
-            joblib.dump(pcas, f"{self.preprocessing_dir}/pcas.pkl")
+        joblib.dump(pcas, f"{self.preprocessing_dir}/pcas.pkl")
         # Encoding
         train, transformer = self.encode_categorical_features(train)
@@ -373,11 +374,10 @@ class PreprocessFeature:
             transformer=transformer,
         )
-        if PYTHON_ENV != "Test":
-            joblib.dump(self.data, f"{self.data_dir}/full.pkl")
-            joblib.dump(transformer, f"{self.preprocessing_dir}/column_transformer.pkl")
-            summary = summarize_dataframe(train)
-            summary.to_csv(f"{self.dataset_dir}/feature_summary.csv", index=False)
+        joblib.dump(self.data, f"{self.data_dir}/full.pkl")
+        joblib.dump(transformer, f"{self.preprocessing_dir}/column_transformer.pkl")
+        summary = summarize_dataframe(train)
+        summary.to_csv(f"{self.dataset_dir}/feature_summary.csv", index=False)
         return train, val, test
@@ -579,8 +579,8 @@ class PreprocessFeature:
         columns_ordinal: list[str] = self.columns_ordinal
         columns_frequency: list[str] = self.columns_frequency
-        X = df.loc[:, ~df.columns.str.contains("^target_")]
-        y = df.loc[:, df.columns.str.contains("^target_")]
+        X = df.loc[:, ~df.columns.str.contains("^TARGET_")]
+        y = df.loc[:, df.columns.str.contains("^TARGET_")]
         save_in_db = False
         all_columns = (
@@ -643,7 +643,6 @@ class PreprocessFeature:
         # Try to convert columns to best possible dtypes
         X_transformed = X_transformed.convert_dtypes()
-        X_transformed.columns = X_transformed.columns.str.upper()
         # Insert features in db
         if save_in_db:

{lecrapaud-0.2.0 → lecrapaud-0.3.0}/lecrapaud/feature_selection.py RENAMED Viewed

@@ -37,7 +37,7 @@ from sklearn.preprocessing import StandardScaler, MinMaxScaler
 from scipy.stats import spearmanr, kendalltau
 # Internal
-from lecrapaud.directory_management import tmp_dir, clean_directory
+from lecrapaud.directories import tmp_dir, clean_directory
 from lecrapaud.utils import logger
 from lecrapaud.config import PYTHON_ENV
 from lecrapaud.db import (
@@ -50,10 +50,6 @@ from lecrapaud.db import (
 from lecrapaud.db.session import get_db
 from lecrapaud.search_space import all_models
-# Variables for targets handling
-TARGETS_MCLF = [11]
-GROUPING_COLUMN = "STOCK"
 # Annoying Warnings
 warnings.filterwarnings("ignore", category=FutureWarning)
@@ -103,7 +99,7 @@ class FeatureSelectionEngine:
     # Main feature selection function
     def run(
         self,
-        single_process: bool = False,
+        single_process: bool = True,
     ):
         """Function to do feature selection with a range of different feature selection technics
@@ -114,10 +110,7 @@ class FeatureSelectionEngine:
         """
         target_number = self.target_number
         target_type = self.target_type
-        if PYTHON_ENV != "Test":
-            fs_dir_target = self.fs_dir_target
-        else:
-            fs_dir_target = None
+        fs_dir_target = self.fs_dir_target
         # Create the feature selection in db
         target = Target.find_by(name=f"TARGET_{target_number}")
@@ -162,7 +155,7 @@ class FeatureSelectionEngine:
         # handling categorical features (only if classification)
         self.X_categorical, self.X_numerical = get_features_by_types(self.X)
-        if target_type == "classification":
+        if target_type == "classification" and self.X_categorical.shape[1] > 0:
             feat_scores = self.select_categorical_features(
                 percentile=percentile, save_dir=fs_dir_target
             )
@@ -292,24 +285,22 @@ class FeatureSelectionEngine:
             f"We selected {len(features_selected_list)} features and {len(features_selected_by_every_methods)} were selected unanimously:"
         )
         logger.debug(features_selected_by_every_methods)
-        if PYTHON_ENV != "Test":
-            pd.Series(features_selected_list).to_csv(
-                f"{fs_dir_target}/features_before_corr.csv",
-                index=True,
-                header=True,
-                index_label="ID",
-            )
+        pd.Series(features_selected_list).to_csv(
+            f"{fs_dir_target}/features_before_corr.csv",
+            index=True,
+            header=True,
+            index_label="ID",
+        )
         # removing correlated features
         self.X = self.X[features_selected_list]
         features, features_correlated = self.remove_correlated_features(corr_threshold)
-        if PYTHON_ENV != "Test":
-            pd.Series(features).to_csv(
-                f"{fs_dir_target}/features_before_max.csv",
-                index=True,
-                header=True,
-                index_label="ID",
-            )
+        pd.Series(features).to_csv(
+            f"{fs_dir_target}/features_before_max.csv",
+            index=True,
+            header=True,
+            index_label="ID",
+        )
         features = features[:max_features]
         # adding categorical features selected
@@ -337,8 +328,7 @@ class FeatureSelectionEngine:
         best_features_path = Path(
             f"{self.preprocessing_dir}/features_{target_number}.pkl"
         ).resolve()
-        if PYTHON_ENV != "Test":
-            joblib.dump(features, best_features_path)
+        joblib.dump(features, best_features_path)
         # save in db
         db_features = Feature.filter(name__in=features)
@@ -798,6 +788,7 @@ class PreprocessModel:
         self.dataset_dir = dataset.path
         self.data_dir = f"{self.dataset_dir}/data"
+        self.preprocessing_dir = f"{self.dataset_dir}/preprocessing"
         self.all_features = dataset.get_all_features(
             date_column=date_column, group_column=group_column
@@ -819,31 +810,23 @@ class PreprocessModel:
     def run(self):
         # save data
-        if PYTHON_ENV != "Test":
-            joblib.dump(self.train, f"{self.data_dir}/train.pkl")
-            joblib.dump(self.val, f"{self.data_dir}/val.pkl")
-            joblib.dump(self.test, f"{self.data_dir}/test.pkl")
-            preprocessing_dir = f"{self.dataset_dir}/preprocessing"
-        else:
-            preprocessing_dir = None
+        joblib.dump(self.train, f"{self.data_dir}/train.pkl")
+        joblib.dump(self.val, f"{self.data_dir}/val.pkl")
+        joblib.dump(self.test, f"{self.data_dir}/test.pkl")
         # scaling features
         if any(t not in self.target_clf for t in self.target_numbers) and any(
             all_models[i].get("need_scaling") for i in self.models_idx
         ):
             logger.info("Scaling features...")
-            train_scaled, scaler_x, scalers_y = self.scale_data(
-                self.train, save_dir=preprocessing_dir
-            )
+            train_scaled, scaler_x, scalers_y = self.scale_data(self.train)
             val_scaled, _, _ = self.scale_data(
                 self.val,
-                save_dir=preprocessing_dir,
                 scaler_x=scaler_x,
                 scalers_y=scalers_y,
             )
             test_scaled, _, _ = self.scale_data(
                 self.test,
-                save_dir=preprocessing_dir,
                 scaler_x=scaler_x,
                 scalers_y=scalers_y,
             )
@@ -853,10 +836,9 @@ class PreprocessModel:
             test_scaled = None
         # save data
-        if PYTHON_ENV != "Test":
-            joblib.dump(train_scaled, f"{self.data_dir}/train_scaled.pkl")
-            joblib.dump(val_scaled, f"{self.data_dir}/val_scaled.pkl")
-            joblib.dump(test_scaled, f"{self.data_dir}/test_scaled.pkl")
+        joblib.dump(train_scaled, f"{self.data_dir}/train_scaled.pkl")
+        joblib.dump(val_scaled, f"{self.data_dir}/val_scaled.pkl")
+        joblib.dump(test_scaled, f"{self.data_dir}/test_scaled.pkl")
         data = {
             "train": self.train,
@@ -923,7 +905,6 @@ class PreprocessModel:
     def scale_data(
         self,
         df: pd.DataFrame,
-        save_dir: str,
         scaler_x=None,
         scalers_y: Optional[list] = None,
     ):
@@ -939,8 +920,7 @@ class PreprocessModel:
             X_scaled = pd.DataFrame(
                 scaler_x.fit_transform(X), columns=list(X.columns), index=X.index
             )
-            if save_dir:
-                joblib.dump(scaler_x, f"{save_dir}/scaler_x.pkl")
+            joblib.dump(scaler_x, f"{self.preprocessing_dir}/scaler_x.pkl")
         # Determine which targets need to be scaled
         targets_numbers_to_scale = [
@@ -969,8 +949,9 @@ class PreprocessModel:
                     columns=y.columns,
                     index=y.index,
                 )
-                if save_dir:
-                    joblib.dump(scaler_y, f"{save_dir}/scaler_y_{target_number}.pkl")
+                joblib.dump(
+                    scaler_y, f"{self.preprocessing_dir}/scaler_y_{target_number}.pkl"
+                )
                 scalers_y[f"scaler_y_{target_number}"] = scaler_y
                 scaled_targets[target_number] = scaled_y

lecrapaud 0.2.0__tar.gz → 0.3.0__tar.gz

Potentially problematic release.

lecrapaud 0.2.0tar.gz → 0.3.0tar.gz