lecrapaud 0.8.2__tar.gz → 0.8.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/PKG-INFO +1 -1
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/feature_selection.py +20 -23
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/pyproject.toml +1 -1
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/LICENSE +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/README.md +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/__init__.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/api.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/config.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/__init__.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/alembic/README +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/alembic/env.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/alembic/script.py.mako +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/alembic.ini +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/models/__init__.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/models/base.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/models/experiment.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/models/feature.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/models/feature_selection.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/models/feature_selection_rank.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/models/model.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/models/model_selection.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/models/model_training.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/models/score.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/models/target.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/session.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/directories.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/experiment.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/feature_engineering.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/integrations/openai_integration.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/jobs/__init__.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/jobs/config.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/jobs/scheduler.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/jobs/tasks.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/model_selection.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/search_space.py +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/speed_tests/test-gpu-bilstm.ipynb +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/speed_tests/test-gpu-resnet.ipynb +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/speed_tests/test-gpu-transformers.ipynb +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/speed_tests/tests.ipynb +0 -0
- {lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/utils.py +0 -0
|
@@ -135,10 +135,9 @@ class FeatureSelectionEngine:
|
|
|
135
135
|
logger.info(f"Starting feature selection for TARGET_{target_number}...")
|
|
136
136
|
clean_directory(self.fs_dir_target)
|
|
137
137
|
|
|
138
|
-
# Let's start by removing extremly correlated features
|
|
139
|
-
# This is needed to reduce nb of feature but also for methods such as anova or chi2 that requires independent features
|
|
140
|
-
|
|
141
|
-
self.X = self.remove_constant_columns()
|
|
138
|
+
# Let's start by removing very low variance feature and extremly correlated features
|
|
139
|
+
# This is needed to reduce nb of feature but also for methods such as anova or chi2 that requires independent, non constant, non full 0 features
|
|
140
|
+
self.X = self.remove_low_variance_columns()
|
|
142
141
|
features_uncorrelated, features_correlated = self.remove_correlated_features(
|
|
143
142
|
90, vizualize=False
|
|
144
143
|
)
|
|
@@ -353,34 +352,31 @@ class FeatureSelectionEngine:
|
|
|
353
352
|
# Remove correlation
|
|
354
353
|
# ------------------
|
|
355
354
|
|
|
356
|
-
def
|
|
357
|
-
self,
|
|
358
|
-
threshold: float = 0.99,
|
|
359
|
-
) -> pd.DataFrame:
|
|
355
|
+
def remove_low_variance_columns(self, threshold: float = 1e-10) -> pd.DataFrame:
|
|
360
356
|
"""
|
|
361
|
-
|
|
357
|
+
Removes columns with very low variance (including constant columns).
|
|
362
358
|
|
|
363
359
|
Parameters:
|
|
364
|
-
threshold (float):
|
|
360
|
+
threshold (float): Minimum variance required to keep a column.
|
|
361
|
+
Default is 1e-10 to eliminate near-constant features.
|
|
365
362
|
|
|
366
363
|
Returns:
|
|
367
|
-
pd.DataFrame: Cleaned DataFrame.
|
|
364
|
+
pd.DataFrame: Cleaned DataFrame without low-variance columns.
|
|
368
365
|
"""
|
|
366
|
+
X = self.X
|
|
369
367
|
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
elif value_counts.iloc[0] >= threshold:
|
|
377
|
-
to_drop.append(col)
|
|
368
|
+
low_var_cols = [
|
|
369
|
+
col
|
|
370
|
+
for col in X.columns
|
|
371
|
+
if pd.api.types.is_numeric_dtype(X[col])
|
|
372
|
+
and np.nanvar(X[col].values) < threshold
|
|
373
|
+
]
|
|
378
374
|
|
|
379
|
-
if
|
|
380
|
-
logger.info(f"
|
|
381
|
-
logger.info(
|
|
375
|
+
if low_var_cols:
|
|
376
|
+
logger.info(f"🧹 Removed {len(low_var_cols)} low-variance columns:")
|
|
377
|
+
logger.info(low_var_cols)
|
|
382
378
|
|
|
383
|
-
return
|
|
379
|
+
return X.drop(columns=low_var_cols, errors="ignore")
|
|
384
380
|
|
|
385
381
|
def remove_correlated_features(self, corr_threshold: int, vizualize: bool = False):
|
|
386
382
|
X = self.X
|
|
@@ -866,6 +862,7 @@ class PreprocessModel:
|
|
|
866
862
|
train_scaled = None
|
|
867
863
|
val_scaled = None
|
|
868
864
|
test_scaled = None
|
|
865
|
+
scalers_y = None
|
|
869
866
|
|
|
870
867
|
# save data
|
|
871
868
|
joblib.dump(train_scaled, f"{self.data_dir}/train_scaled.pkl")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py
RENAMED
|
File without changes
|
{lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py
RENAMED
|
File without changes
|
{lecrapaud-0.8.2 → lecrapaud-0.8.4}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|