lecrapaud 0.8.0__tar.gz → 0.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/PKG-INFO +1 -1
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/feature_engineering.py +0 -33
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/feature_selection.py +30 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/pyproject.toml +1 -1
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/LICENSE +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/README.md +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/__init__.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/api.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/config.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/__init__.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/alembic/README +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/alembic/env.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/alembic/script.py.mako +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/alembic.ini +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/models/__init__.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/models/base.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/models/experiment.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/models/feature.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/models/feature_selection.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/models/feature_selection_rank.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/models/model.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/models/model_selection.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/models/model_training.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/models/score.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/models/target.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/session.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/directories.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/experiment.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/integrations/openai_integration.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/jobs/__init__.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/jobs/config.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/jobs/scheduler.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/jobs/tasks.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/model_selection.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/search_space.py +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/speed_tests/test-gpu-bilstm.ipynb +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/speed_tests/test-gpu-resnet.ipynb +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/speed_tests/test-gpu-transformers.ipynb +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/speed_tests/tests.ipynb +0 -0
- {lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/utils.py +0 -0
|
@@ -122,41 +122,8 @@ class FeatureEngineeringEngine:
|
|
|
122
122
|
# Cyclic encode dates
|
|
123
123
|
self.data = self.cyclic_encode_date()
|
|
124
124
|
|
|
125
|
-
# Remove constant columns if training (feature selection methods need independent features)
|
|
126
|
-
if self.for_training:
|
|
127
|
-
self.data = self.remove_constant_columns()
|
|
128
|
-
|
|
129
125
|
return self.data
|
|
130
126
|
|
|
131
|
-
def remove_constant_columns(
|
|
132
|
-
self,
|
|
133
|
-
threshold: float = 0.99,
|
|
134
|
-
) -> pd.DataFrame:
|
|
135
|
-
"""
|
|
136
|
-
Removes constant or almost constant columns from a DataFrame.
|
|
137
|
-
|
|
138
|
-
Parameters:
|
|
139
|
-
threshold (float): Max proportion for a single value (default 0.99 = 99%).
|
|
140
|
-
|
|
141
|
-
Returns:
|
|
142
|
-
pd.DataFrame: Cleaned DataFrame.
|
|
143
|
-
"""
|
|
144
|
-
|
|
145
|
-
to_drop = []
|
|
146
|
-
|
|
147
|
-
for col in self.data.columns:
|
|
148
|
-
value_counts = self.data[col].value_counts(dropna=False, normalize=True)
|
|
149
|
-
if value_counts.empty:
|
|
150
|
-
to_drop.append(col)
|
|
151
|
-
elif value_counts.iloc[0] >= threshold:
|
|
152
|
-
to_drop.append(col)
|
|
153
|
-
|
|
154
|
-
if to_drop:
|
|
155
|
-
logger.info(f"🔍 Removed {len(to_drop)} constant/almost constant columns:")
|
|
156
|
-
logger.info(to_drop)
|
|
157
|
-
|
|
158
|
-
return self.data.drop(columns=to_drop, errors="ignore")
|
|
159
|
-
|
|
160
127
|
def cyclic_encode_date(self) -> pd.DataFrame:
|
|
161
128
|
"""
|
|
162
129
|
Adds cyclic (sine and cosine) encoding for common date parts: day of week, day of month, and month.
|
|
@@ -138,6 +138,7 @@ class FeatureSelectionEngine:
|
|
|
138
138
|
# Let's start by removing extremly correlated features
|
|
139
139
|
# This is needed to reduce nb of feature but also for methods such as anova or chi2 that requires independent features
|
|
140
140
|
# TODO: we could also remove low variance features
|
|
141
|
+
self.X = self.remove_constant_columns()
|
|
141
142
|
features_uncorrelated, features_correlated = self.remove_correlated_features(
|
|
142
143
|
90, vizualize=False
|
|
143
144
|
)
|
|
@@ -352,6 +353,35 @@ class FeatureSelectionEngine:
|
|
|
352
353
|
# Remove correlation
|
|
353
354
|
# ------------------
|
|
354
355
|
|
|
356
|
+
def remove_constant_columns(
|
|
357
|
+
self,
|
|
358
|
+
threshold: float = 0.99,
|
|
359
|
+
) -> pd.DataFrame:
|
|
360
|
+
"""
|
|
361
|
+
Removes constant or almost constant columns from a DataFrame.
|
|
362
|
+
|
|
363
|
+
Parameters:
|
|
364
|
+
threshold (float): Max proportion for a single value (default 0.99 = 99%).
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
pd.DataFrame: Cleaned DataFrame.
|
|
368
|
+
"""
|
|
369
|
+
|
|
370
|
+
to_drop = []
|
|
371
|
+
|
|
372
|
+
for col in self.data.columns:
|
|
373
|
+
value_counts = self.data[col].value_counts(dropna=False, normalize=True)
|
|
374
|
+
if value_counts.empty:
|
|
375
|
+
to_drop.append(col)
|
|
376
|
+
elif value_counts.iloc[0] >= threshold:
|
|
377
|
+
to_drop.append(col)
|
|
378
|
+
|
|
379
|
+
if to_drop:
|
|
380
|
+
logger.info(f"🔍 Removed {len(to_drop)} constant/almost constant columns:")
|
|
381
|
+
logger.info(to_drop)
|
|
382
|
+
|
|
383
|
+
return self.data.drop(columns=to_drop, errors="ignore")
|
|
384
|
+
|
|
355
385
|
def remove_correlated_features(self, corr_threshold: int, vizualize: bool = False):
|
|
356
386
|
X = self.X
|
|
357
387
|
features = X.columns
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py
RENAMED
|
File without changes
|
{lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py
RENAMED
|
File without changes
|
{lecrapaud-0.8.0 → lecrapaud-0.8.1}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|