PyPI - lecrapaud - Versions diffs - 0.21.1__py3-none-any.whl → 0.21.2__py3-none-any.whl - Mend

lecrapaud 0.21.1py3-none-any.whl → 0.21.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lecrapaud might be problematic. Click here for more details.

Files changed (7) hide show

lecrapaud/config.py CHANGED Viewed

@@ -34,5 +34,5 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 LECRAPAUD_LOGFILE = os.getenv("LECRAPAUD_LOGFILE")
 LECRAPAUD_TABLE_PREFIX = os.getenv("LECRAPAUD_TABLE_PREFIX", "lecrapaud")
 LECRAPAUD_OPTIMIZATION_BACKEND = os.getenv(
-    "LECRAPAUD_OPTIMIZATION_BACKEND", "ray"
+    "LECRAPAUD_OPTIMIZATION_BACKEND", "hyperopt"
 ).lower()

lecrapaud/feature_selection.py CHANGED Viewed

@@ -278,24 +278,32 @@ class FeatureSelectionEngine:
         features_selected_list = features_selected["features"].values.tolist()
-        # Save ensemble features before correlation (aggregated features)
-        logger.info("Saving ensemble features before correlation...")
-        all_features_in_data = self.X.columns.tolist()
+        # Save ensemble features for all numerical features with global ranking
+        logger.info("Saving ensemble features with global ranking for all numerical features...")
+        numerical_features_in_data = self.X_numerical.columns.tolist()
         ensemble_rows = []
-        # Add global rank for selected features
-        features_selected_with_global_rank = features_selected.copy()
-        features_selected_with_global_rank["global_rank"] = range(1, len(features_selected_with_global_rank) + 1)
+        # Create global ranking for ALL numerical features (1 to n, no null values)
+        all_numerical_scores = pd.concat(results, axis=0)
+        all_numerical_scores = all_numerical_scores.groupby("features").agg({
+            "rank": "mean"  # Average rank across all methods
+        }).reset_index()
+        all_numerical_scores.sort_values("rank", inplace=True)
+        all_numerical_scores["global_rank"] = range(1, len(all_numerical_scores) + 1)
-        for feature in all_features_in_data:
+        for feature in numerical_features_in_data:
             feature_id = feature_map.get(feature)
             if feature_id:
                 is_selected = feature in features_selected_list
-                global_rank = None
-                if is_selected:
-                    global_rank = features_selected_with_global_rank[
-                        features_selected_with_global_rank["features"] == feature
+                # Get global rank (no null values - all features get a rank)
+                if feature in all_numerical_scores["features"].values:
+                    global_rank = all_numerical_scores[
+                        all_numerical_scores["features"] == feature
                     ]["global_rank"].values[0]
+                else:
+                    # Fallback: assign last rank + position for features not in results
+                    global_rank = len(all_numerical_scores) + numerical_features_in_data.index(feature) + 1
                 ensemble_rows.append({
                     "feature_selection_id": feature_selection.id,
@@ -353,28 +361,12 @@ class FeatureSelectionEngine:
         )
         # Final update for features after max limitation (final selection)
-        logger.info("Finalizing ensemble features with categorical features...")
+        logger.info("Finalizing ensemble features...")
         for row in ensemble_rows:
             feature = Feature.get(row["feature_id"]).name
             if feature in features and row["support"] == 1:
                 row["support"] = 2  # 2 = in final selection
-        # Add categorical features to ensemble if not already present
-        if target_type == "classification":
-            for cat_feature in categorical_features_selected:
-                feature_id = feature_map.get(cat_feature)
-                if feature_id and not any(row["feature_id"] == feature_id for row in ensemble_rows):
-                    ensemble_rows.append({
-                        "feature_selection_id": feature_selection.id,
-                        "feature_id": feature_id,
-                        "method": "ensemble",
-                        "score": None,
-                        "pvalue": None,
-                        "support": 2,  # 2 = in final selection (categorical)
-                        "rank": None,  # No rank for categorical features added at the end
-                        "training_time": 0,
-                    })
         # Re-save all ensemble data with updated support values
         FeatureSelectionRank.bulk_upsert(rows=ensemble_rows)
         logger.debug(

lecrapaud/model_selection.py CHANGED Viewed

@@ -55,8 +55,7 @@ from tensorboardX import SummaryWriter
 # Optimization
 import ray
-from ray.tune import Tuner, TuneConfig, with_parameters
-from ray.train import RunConfig
+from ray.tune import Tuner, TuneConfig, with_parameters, RunConfig
 from ray.tune.search.hyperopt import HyperOptSearch
 from ray.tune.search.bayesopt import BayesOptSearch
 from ray.tune.logger import TBXLoggerCallback
@@ -1357,8 +1356,12 @@ class ModelSelectionEngine:
         """Choose between Ray Tune and HyperOpt standalone based on configuration."""
         if LECRAPAUD_OPTIMIZATION_BACKEND == "hyperopt":
             return self.hyperoptimize_hyperopt(x_train, y_train, x_val, y_val, model)
-        else:
+        elif LECRAPAUD_OPTIMIZATION_BACKEND == "ray":
             return self.hyperoptimize_ray(x_train, y_train, x_val, y_val, model)
+        else:
+            raise ValueError(
+                f"Invalid optimization backend: {LECRAPAUD_OPTIMIZATION_BACKEND}."
+            )
     def hyperoptimize_hyperopt(
         self, x_train, y_train, x_val, y_val, model: ModelEngine
@@ -1746,11 +1749,11 @@ def evaluate(
         y_pred_proba = (
             prediction[1] if num_classes == 2 else prediction.iloc[:, 2:].values
         )
-        if num_classes > 2:
-            lb = LabelBinarizer(sparse_output=False)  # Change to True for sparse matrix
-            lb.fit(labels)
-            y_true_onhot = lb.transform(y_true)
-            y_pred_onehot = lb.transform(y_pred)
+        # if num_classes > 2:
+        #     lb = LabelBinarizer(sparse_output=False)  # Change to True for sparse matrix
+        #     lb.fit(labels)
+        #     y_true_onhot = lb.transform(y_true)
+        #     y_pred_onehot = lb.transform(y_pred)
         score["LOGLOSS"] = log_loss(y_true, y_pred_proba)
         score["ACCURACY"] = accuracy_score(y_true, y_pred)

{lecrapaud-0.21.1.dist-info → lecrapaud-0.21.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lecrapaud
-Version: 0.21.1
+Version: 0.21.2
 Summary: Framework for machine and deep learning, with regression, classification and time series analysis
 License: Apache License
 License-File: LICENSE
@@ -218,7 +218,11 @@ context = {
     "val_size": 0.2,
     "test_size": 0.2,
     "pca_temporal": [
-        {"name": "LAST_20_RET", "columns": [f"RET_-{i}" for i in range(1, 21)]},
+        # Old format (still supported)
+        # {"name": "LAST_20_RET", "columns": [f"RET_-{i}" for i in range(1, 21)]},
+        # New simplified format - automatically creates lag columns
+        {"name": "LAST_20_RET", "column": "RET", "lags": 20},
+        {"name": "LAST_10_VOL", "column": "VOLUME", "lags": 10},
     ],
     "pca_cross_sectional": [
         {
@@ -255,11 +259,20 @@ experiment = app.create_experiment(data=your_dataframe, **context)
 2. **Parameter Precedence**: When loading an existing experiment, the stored context takes precedence over any parameters passed to the constructor.
-3. **PCA Time Series**: For time series data with `pca_cross_sectional` where index equals `date_column`, the system automatically uses an expanding window approach to prevent data leakage.
+3. **PCA Time Series**:
+   - For time series data, both `pca_cross_sectional` and `pca_temporal` automatically use an expanding window approach with periodic refresh (default: every 90 days) to prevent data leakage.
+   - The system fits PCA only on historical data (lookback window of 365 days by default) and avoids look-ahead bias.
+   - For panel data (e.g., multiple stocks), lag features are created per group when using the simplified `pca_temporal` format.
+   - Missing PCA values are handled with forward-fill followed by zero-fill to ensure compatibility with downstream models.
-4. **OpenAI Embeddings**: If using `columns_pca` with text columns, ensure `OPENAI_API_KEY` is set as an environment variable.
+4. **PCA Temporal Simplified Format**:
+   - Instead of manually listing lag columns: `{"name": "LAST_20_RET", "columns": ["RET_-1", "RET_-2", ..., "RET_-20"]}`
+   - Use the simplified format: `{"name": "LAST_20_RET", "column": "RET", "lags": 20}`
+   - The system automatically creates the lag columns, handling panel data correctly with `group_column`.
-5. **Model Indices**: The `models_idx` parameter accepts both integer indices and string names (e.g., `'xgb'`, `'lgb'`, `'catboost'`).
+5. **OpenAI Embeddings**: If using `columns_pca` with text columns, ensure `OPENAI_API_KEY` is set as an environment variable.
+6. **Model Indices**: The `models_idx` parameter accepts both integer indices and string names (e.g., `'xgb'`, `'lgb'`, `'catboost'`).

{lecrapaud-0.21.1.dist-info → lecrapaud-0.21.2.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 lecrapaud/__init__.py,sha256=7Wp_VF08UZP8o-GkpB4_yRjP4twQmpcTc3202OkPmHs,176
 lecrapaud/api.py,sha256=7OL_wbg9hCmlZ0WI6eCDkublntES3f320OZlpuKu8f4,22376
-lecrapaud/config.py,sha256=0NEg61QdLxQ97bVFDDXa6OwlWFEo_z8VIhX5KrD1ik0,1170
+lecrapaud/config.py,sha256=7kwV9kpglFX79YC3fKcANawWJMYYi7SGaVShNsmO4EQ,1175
 lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
 lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
 lecrapaud/db/alembic/env.py,sha256=RvTTBa3bDVBxmDtapAfzUoeWBgmVQU3s9U6HmQCAP84,2421
@@ -31,7 +31,7 @@ lecrapaud/db/session.py,sha256=u9NCwUoV5VbtScRb6HOSQr4oTEjIwj0waP5mGlc1qJg,3735
 lecrapaud/directories.py,sha256=0LrANuDgbuneSLker60c6q2hmGnQ3mKHIztTGzTx6Gw,826
 lecrapaud/experiment.py,sha256=LiecZS3P4igO_3nJ4IB-2b25CttQS2RePDnhBNucvdE,2478
 lecrapaud/feature_engineering.py,sha256=SvGrJXv24rVgH0QE5mRwJITcCLfUqgbV2Ep68bBVnJs,58794
-lecrapaud/feature_selection.py,sha256=Q9xWVmZsvRjX9mJHB_PY_KLXsEAYNLX7txSe0cniY4A,47529
+lecrapaud/feature_selection.py,sha256=vzL-eklVZl-tHIwqTy4Yg9kYpwOTCoM72IrFoJyDmg8,47203
 lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
 lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
 lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
@@ -41,10 +41,10 @@ lecrapaud/misc/tabpfn_tests.ipynb,sha256=VkgsCUJ30d8jaL2VaWtQAgb8ngHPNtPgnXLs7QQ
 lecrapaud/misc/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
 lecrapaud/misc/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
 lecrapaud/misc/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
-lecrapaud/model_selection.py,sha256=o4_hOEp91_33HtMatVHU7YPc71KZ2hK7wucN63xqWkA,88017
+lecrapaud/model_selection.py,sha256=VL-JQGY-dRsFgfuRob_-lt9bFyex_PyAUJsPHqr453c,88187
 lecrapaud/search_space.py,sha256=caCehJklD3-sgmlisJj_GmuB7LJiVvTF71gEjPGDvV4,36336
 lecrapaud/utils.py,sha256=0k76HFETO0_NgCYUv8b3RTBLgry6MsDBaHJfpAplxCY,8855
-lecrapaud-0.21.1.dist-info/METADATA,sha256=rKls8xvjhu9f72jTw2sjBYCmQPw-N02RSScSOjJ1E2g,14348
-lecrapaud-0.21.1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-lecrapaud-0.21.1.dist-info/licenses/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
-lecrapaud-0.21.1.dist-info/RECORD,,
+lecrapaud-0.21.2.dist-info/METADATA,sha256=5e2V3i21uDdh9fnrdc5MZUQ7EZMJgR8mRKnvGzqwmZw,15337
+lecrapaud-0.21.2.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+lecrapaud-0.21.2.dist-info/licenses/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
+lecrapaud-0.21.2.dist-info/RECORD,,

{lecrapaud-0.21.1.dist-info → lecrapaud-0.21.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{lecrapaud-0.21.1.dist-info → lecrapaud-0.21.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

lecrapaud 0.21.1__py3-none-any.whl → 0.21.2__py3-none-any.whl

Potentially problematic release.

lecrapaud 0.21.1py3-none-any.whl → 0.21.2py3-none-any.whl