lecrapaud 0.8.1__tar.gz → 0.8.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

Files changed (43) hide show
  1. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/PKG-INFO +1 -1
  2. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/feature_selection.py +19 -23
  3. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/pyproject.toml +1 -1
  4. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/LICENSE +0 -0
  5. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/README.md +0 -0
  6. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/__init__.py +0 -0
  7. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/api.py +0 -0
  8. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/config.py +0 -0
  9. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/__init__.py +0 -0
  10. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/alembic/README +0 -0
  11. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/alembic/env.py +0 -0
  12. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/alembic/script.py.mako +0 -0
  13. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
  14. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
  15. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
  16. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/alembic.ini +0 -0
  17. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/models/__init__.py +0 -0
  18. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/models/base.py +0 -0
  19. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/models/experiment.py +0 -0
  20. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/models/feature.py +0 -0
  21. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/models/feature_selection.py +0 -0
  22. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/models/feature_selection_rank.py +0 -0
  23. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/models/model.py +0 -0
  24. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/models/model_selection.py +0 -0
  25. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/models/model_training.py +0 -0
  26. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/models/score.py +0 -0
  27. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/models/target.py +0 -0
  28. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/db/session.py +0 -0
  29. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/directories.py +0 -0
  30. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/experiment.py +0 -0
  31. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/feature_engineering.py +0 -0
  32. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/integrations/openai_integration.py +0 -0
  33. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/jobs/__init__.py +0 -0
  34. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/jobs/config.py +0 -0
  35. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/jobs/scheduler.py +0 -0
  36. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/jobs/tasks.py +0 -0
  37. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/model_selection.py +0 -0
  38. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/search_space.py +0 -0
  39. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/speed_tests/test-gpu-bilstm.ipynb +0 -0
  40. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/speed_tests/test-gpu-resnet.ipynb +0 -0
  41. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/speed_tests/test-gpu-transformers.ipynb +0 -0
  42. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/speed_tests/tests.ipynb +0 -0
  43. {lecrapaud-0.8.1 → lecrapaud-0.8.3}/lecrapaud/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.8.1
3
+ Version: 0.8.3
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -135,10 +135,9 @@ class FeatureSelectionEngine:
135
135
  logger.info(f"Starting feature selection for TARGET_{target_number}...")
136
136
  clean_directory(self.fs_dir_target)
137
137
 
138
- # Let's start by removing extremly correlated features
139
- # This is needed to reduce nb of feature but also for methods such as anova or chi2 that requires independent features
140
- # TODO: we could also remove low variance features
141
- self.X = self.remove_constant_columns()
138
+ # Let's start by removing very low variance feature and extremly correlated features
139
+ # This is needed to reduce nb of feature but also for methods such as anova or chi2 that requires independent, non constant, non full 0 features
140
+ self.X = self.remove_low_variance_columns()
142
141
  features_uncorrelated, features_correlated = self.remove_correlated_features(
143
142
  90, vizualize=False
144
143
  )
@@ -353,34 +352,31 @@ class FeatureSelectionEngine:
353
352
  # Remove correlation
354
353
  # ------------------
355
354
 
356
- def remove_constant_columns(
357
- self,
358
- threshold: float = 0.99,
359
- ) -> pd.DataFrame:
355
+ def remove_low_variance_columns(self, threshold: float = 1e-10) -> pd.DataFrame:
360
356
  """
361
- Removes constant or almost constant columns from a DataFrame.
357
+ Removes columns with very low variance (including constant columns).
362
358
 
363
359
  Parameters:
364
- threshold (float): Max proportion for a single value (default 0.99 = 99%).
360
+ threshold (float): Minimum variance required to keep a column.
361
+ Default is 1e-10 to eliminate near-constant features.
365
362
 
366
363
  Returns:
367
- pd.DataFrame: Cleaned DataFrame.
364
+ pd.DataFrame: Cleaned DataFrame without low-variance columns.
368
365
  """
366
+ X = self.X
369
367
 
370
- to_drop = []
371
-
372
- for col in self.data.columns:
373
- value_counts = self.data[col].value_counts(dropna=False, normalize=True)
374
- if value_counts.empty:
375
- to_drop.append(col)
376
- elif value_counts.iloc[0] >= threshold:
377
- to_drop.append(col)
368
+ low_var_cols = [
369
+ col
370
+ for col in X.columns
371
+ if pd.api.types.is_numeric_dtype(X[col])
372
+ and np.nanvar(X[col].values) < threshold
373
+ ]
378
374
 
379
- if to_drop:
380
- logger.info(f"🔍 Removed {len(to_drop)} constant/almost constant columns:")
381
- logger.info(to_drop)
375
+ if low_var_cols:
376
+ logger.info(f"🧹 Removed {len(low_var_cols)} low-variance columns:")
377
+ logger.info(low_var_cols)
382
378
 
383
- return self.data.drop(columns=to_drop, errors="ignore")
379
+ return X.drop(columns=low_var_cols, errors="ignore")
384
380
 
385
381
  def remove_correlated_features(self, corr_threshold: int, vizualize: bool = False):
386
382
  X = self.X
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lecrapaud"
3
- version = "0.8.1"
3
+ version = "0.8.3"
4
4
  description = "Framework for machine and deep learning, with regression, classification and time series analysis"
5
5
  authors = [
6
6
  {name = "Pierre H. Gallet"}
File without changes
File without changes
File without changes
File without changes
File without changes