lecrapaud 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

@@ -122,41 +122,8 @@ class FeatureEngineeringEngine:
122
122
  # Cyclic encode dates
123
123
  self.data = self.cyclic_encode_date()
124
124
 
125
- # Remove constant columns if training (feature selection methods need independent features)
126
- if self.for_training:
127
- self.data = self.remove_constant_columns()
128
-
129
125
  return self.data
130
126
 
131
- def remove_constant_columns(
132
- self,
133
- threshold: float = 0.99,
134
- ) -> pd.DataFrame:
135
- """
136
- Removes constant or almost constant columns from a DataFrame.
137
-
138
- Parameters:
139
- threshold (float): Max proportion for a single value (default 0.99 = 99%).
140
-
141
- Returns:
142
- pd.DataFrame: Cleaned DataFrame.
143
- """
144
-
145
- to_drop = []
146
-
147
- for col in self.data.columns:
148
- value_counts = self.data[col].value_counts(dropna=False, normalize=True)
149
- if value_counts.empty:
150
- to_drop.append(col)
151
- elif value_counts.iloc[0] >= threshold:
152
- to_drop.append(col)
153
-
154
- if to_drop:
155
- logger.info(f"🔍 Removed {len(to_drop)} constant/almost constant columns:")
156
- logger.info(to_drop)
157
-
158
- return self.data.drop(columns=to_drop, errors="ignore")
159
-
160
127
  def cyclic_encode_date(self) -> pd.DataFrame:
161
128
  """
162
129
  Adds cyclic (sine and cosine) encoding for common date parts: day of week, day of month, and month.
@@ -138,6 +138,7 @@ class FeatureSelectionEngine:
138
138
  # Let's start by removing extremly correlated features
139
139
  # This is needed to reduce nb of feature but also for methods such as anova or chi2 that requires independent features
140
140
  # TODO: we could also remove low variance features
141
+ self.X = self.remove_constant_columns()
141
142
  features_uncorrelated, features_correlated = self.remove_correlated_features(
142
143
  90, vizualize=False
143
144
  )
@@ -352,6 +353,35 @@ class FeatureSelectionEngine:
352
353
  # Remove correlation
353
354
  # ------------------
354
355
 
356
+ def remove_constant_columns(
357
+ self,
358
+ threshold: float = 0.99,
359
+ ) -> pd.DataFrame:
360
+ """
361
+ Removes constant or almost constant columns from a DataFrame.
362
+
363
+ Parameters:
364
+ threshold (float): Max proportion for a single value (default 0.99 = 99%).
365
+
366
+ Returns:
367
+ pd.DataFrame: Cleaned DataFrame.
368
+ """
369
+
370
+ to_drop = []
371
+
372
+ for col in self.data.columns:
373
+ value_counts = self.data[col].value_counts(dropna=False, normalize=True)
374
+ if value_counts.empty:
375
+ to_drop.append(col)
376
+ elif value_counts.iloc[0] >= threshold:
377
+ to_drop.append(col)
378
+
379
+ if to_drop:
380
+ logger.info(f"🔍 Removed {len(to_drop)} constant/almost constant columns:")
381
+ logger.info(to_drop)
382
+
383
+ return self.data.drop(columns=to_drop, errors="ignore")
384
+
355
385
  def remove_correlated_features(self, corr_threshold: int, vizualize: bool = False):
356
386
  X = self.X
357
387
  features = X.columns
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.8.0
3
+ Version: 0.8.1
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -23,8 +23,8 @@ lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk
23
23
  lecrapaud/db/session.py,sha256=K9dTyXmO-aF_2r9RRHsDsbW9_zLNDaOcchXlpiv7cSo,2719
24
24
  lecrapaud/directories.py,sha256=t4PrnJR48MpDfBOTYTyGlDVMUr39mcaj7uCPTaocBRw,725
25
25
  lecrapaud/experiment.py,sha256=NwwGDZqUyvvRu5EDK3_Oh0_kF29bNIPDawVFFpzFvZM,2350
26
- lecrapaud/feature_engineering.py,sha256=_KohdWnM5W4z6bDzrNrHuRb_0aY1Fc0DUpssNPi7wy0,32241
27
- lecrapaud/feature_selection.py,sha256=vgA7E4zUJpv3wNC3GA8_l-RjFdwoiIXcgoc6SKQH22o,42746
26
+ lecrapaud/feature_engineering.py,sha256=U3YOftZBB3PEqGbu2aFY_3B3Ks9Hiu04UxixOkBz0UU,31168
27
+ lecrapaud/feature_selection.py,sha256=v4azPcLs-xutvjlIWdTTgJu9vnT5Gpmg9Tb3hEeDqms,43681
28
28
  lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
29
29
  lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
30
30
  lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
@@ -37,7 +37,7 @@ lecrapaud/speed_tests/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNP
37
37
  lecrapaud/speed_tests/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
38
38
  lecrapaud/speed_tests/tests.ipynb,sha256=RjI7LDHSsbadUkea_hT14sD7ivljtIQk4NB5McXJ1bE,3835
39
39
  lecrapaud/utils.py,sha256=Dy2uhK9cslzoqRHiIE6MdkxjNJWjmKmzGr6i7EYM28A,8106
40
- lecrapaud-0.8.0.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
41
- lecrapaud-0.8.0.dist-info/METADATA,sha256=_qAznpxq2p5QHxb7FEKJoPbQQKDb0SfsTkyj6DA0diA,11623
42
- lecrapaud-0.8.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
43
- lecrapaud-0.8.0.dist-info/RECORD,,
40
+ lecrapaud-0.8.1.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
41
+ lecrapaud-0.8.1.dist-info/METADATA,sha256=-N9LIA9ueqU7d4-AlJ0Evl2KBxiZYrzGBz5oMZ9MQnY,11623
42
+ lecrapaud-0.8.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
43
+ lecrapaud-0.8.1.dist-info/RECORD,,