lecrapaud 0.7.1__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- lecrapaud/feature_engineering.py +33 -0
- {lecrapaud-0.7.1.dist-info → lecrapaud-0.8.0.dist-info}/METADATA +1 -1
- {lecrapaud-0.7.1.dist-info → lecrapaud-0.8.0.dist-info}/RECORD +5 -5
- {lecrapaud-0.7.1.dist-info → lecrapaud-0.8.0.dist-info}/LICENSE +0 -0
- {lecrapaud-0.7.1.dist-info → lecrapaud-0.8.0.dist-info}/WHEEL +0 -0
lecrapaud/feature_engineering.py
CHANGED
|
@@ -122,8 +122,41 @@ class FeatureEngineeringEngine:
|
|
|
122
122
|
# Cyclic encode dates
|
|
123
123
|
self.data = self.cyclic_encode_date()
|
|
124
124
|
|
|
125
|
+
# Remove constant columns if training (feature selection methods need independent features)
|
|
126
|
+
if self.for_training:
|
|
127
|
+
self.data = self.remove_constant_columns()
|
|
128
|
+
|
|
125
129
|
return self.data
|
|
126
130
|
|
|
131
|
+
def remove_constant_columns(
|
|
132
|
+
self,
|
|
133
|
+
threshold: float = 0.99,
|
|
134
|
+
) -> pd.DataFrame:
|
|
135
|
+
"""
|
|
136
|
+
Removes constant or almost constant columns from a DataFrame.
|
|
137
|
+
|
|
138
|
+
Parameters:
|
|
139
|
+
threshold (float): Max proportion for a single value (default 0.99 = 99%).
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
pd.DataFrame: Cleaned DataFrame.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
to_drop = []
|
|
146
|
+
|
|
147
|
+
for col in self.data.columns:
|
|
148
|
+
value_counts = self.data[col].value_counts(dropna=False, normalize=True)
|
|
149
|
+
if value_counts.empty:
|
|
150
|
+
to_drop.append(col)
|
|
151
|
+
elif value_counts.iloc[0] >= threshold:
|
|
152
|
+
to_drop.append(col)
|
|
153
|
+
|
|
154
|
+
if to_drop:
|
|
155
|
+
logger.info(f"🔍 Removed {len(to_drop)} constant/almost constant columns:")
|
|
156
|
+
logger.info(to_drop)
|
|
157
|
+
|
|
158
|
+
return self.data.drop(columns=to_drop, errors="ignore")
|
|
159
|
+
|
|
127
160
|
def cyclic_encode_date(self) -> pd.DataFrame:
|
|
128
161
|
"""
|
|
129
162
|
Adds cyclic (sine and cosine) encoding for common date parts: day of week, day of month, and month.
|
|
@@ -23,7 +23,7 @@ lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk
|
|
|
23
23
|
lecrapaud/db/session.py,sha256=K9dTyXmO-aF_2r9RRHsDsbW9_zLNDaOcchXlpiv7cSo,2719
|
|
24
24
|
lecrapaud/directories.py,sha256=t4PrnJR48MpDfBOTYTyGlDVMUr39mcaj7uCPTaocBRw,725
|
|
25
25
|
lecrapaud/experiment.py,sha256=NwwGDZqUyvvRu5EDK3_Oh0_kF29bNIPDawVFFpzFvZM,2350
|
|
26
|
-
lecrapaud/feature_engineering.py,sha256=
|
|
26
|
+
lecrapaud/feature_engineering.py,sha256=_KohdWnM5W4z6bDzrNrHuRb_0aY1Fc0DUpssNPi7wy0,32241
|
|
27
27
|
lecrapaud/feature_selection.py,sha256=vgA7E4zUJpv3wNC3GA8_l-RjFdwoiIXcgoc6SKQH22o,42746
|
|
28
28
|
lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
|
|
29
29
|
lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
|
|
@@ -37,7 +37,7 @@ lecrapaud/speed_tests/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNP
|
|
|
37
37
|
lecrapaud/speed_tests/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
|
|
38
38
|
lecrapaud/speed_tests/tests.ipynb,sha256=RjI7LDHSsbadUkea_hT14sD7ivljtIQk4NB5McXJ1bE,3835
|
|
39
39
|
lecrapaud/utils.py,sha256=Dy2uhK9cslzoqRHiIE6MdkxjNJWjmKmzGr6i7EYM28A,8106
|
|
40
|
-
lecrapaud-0.
|
|
41
|
-
lecrapaud-0.
|
|
42
|
-
lecrapaud-0.
|
|
43
|
-
lecrapaud-0.
|
|
40
|
+
lecrapaud-0.8.0.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
|
|
41
|
+
lecrapaud-0.8.0.dist-info/METADATA,sha256=_qAznpxq2p5QHxb7FEKJoPbQQKDb0SfsTkyj6DA0diA,11623
|
|
42
|
+
lecrapaud-0.8.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
43
|
+
lecrapaud-0.8.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|