dragon-ml-toolbox 9.1.0__tar.gz → 9.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-9.1.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-9.2.0}/PKG-INFO +1 -1
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ETL_engineering.py +42 -3
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/pyproject.toml +1 -1
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/LICENSE +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/README.md +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/GUI_tools.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/MICE_imputation.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_callbacks.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_datasetmaster.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_evaluation.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_evaluation_multi.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_inference.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_models.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_optimization.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_scaler.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_trainer.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/PSO_optimization.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/RNN_forecast.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/SQL.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/VIF_factor.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/__init__.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/_logger.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/_script_info.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/custom_logger.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/data_exploration.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ensemble_evaluation.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ensemble_inference.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ensemble_learning.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/handle_excel.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/keys.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/optimization_tools.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/path_manager.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/utilities.py +0 -0
- {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/setup.cfg +0 -0
|
@@ -4,6 +4,7 @@ import re
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
|
|
6
6
|
from .path_manager import sanitize_filename, make_fullpath
|
|
7
|
+
from .utilities import save_dataframe, load_dataframe
|
|
7
8
|
from ._script_info import _script_info
|
|
8
9
|
from ._logger import _LOGGER
|
|
9
10
|
|
|
@@ -190,12 +191,13 @@ class DataFrameCleaner:
|
|
|
190
191
|
|
|
191
192
|
self.cleaners = cleaners
|
|
192
193
|
|
|
193
|
-
def clean(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
194
|
+
def clean(self, df: pl.DataFrame, clone_df: bool=True) -> pl.DataFrame:
|
|
194
195
|
"""
|
|
195
196
|
Applies all defined cleaning rules to the Polars DataFrame.
|
|
196
197
|
|
|
197
198
|
Args:
|
|
198
199
|
df (pl.DataFrame): The Polars DataFrame to clean.
|
|
200
|
+
clone_df (bool): Whether to work on a clone to prevent undesired changes.
|
|
199
201
|
|
|
200
202
|
Returns:
|
|
201
203
|
pl.DataFrame: A new, cleaned Polars DataFrame.
|
|
@@ -214,7 +216,10 @@ class DataFrameCleaner:
|
|
|
214
216
|
print(f"\t- {miss_col}")
|
|
215
217
|
raise ValueError()
|
|
216
218
|
|
|
217
|
-
|
|
219
|
+
if clone_df:
|
|
220
|
+
df_cleaned = df.clone()
|
|
221
|
+
else:
|
|
222
|
+
df_cleaned = df
|
|
218
223
|
|
|
219
224
|
# Build and apply a series of expressions for each column
|
|
220
225
|
for cleaner in self.cleaners:
|
|
@@ -226,7 +231,14 @@ class DataFrameCleaner:
|
|
|
226
231
|
# Sequentially chain 'replace_all' expressions for each rule
|
|
227
232
|
for pattern, replacement in cleaner.rules.items():
|
|
228
233
|
final_pattern = f"(?i){pattern}" if cleaner.case_insensitive else pattern
|
|
229
|
-
|
|
234
|
+
|
|
235
|
+
if replacement is None:
|
|
236
|
+
# If replacement is None, use a when/then expression to set matching values to null
|
|
237
|
+
col_expr = pl.when(col_expr.str.contains(final_pattern)) \
|
|
238
|
+
.then(None) \
|
|
239
|
+
.otherwise(col_expr)
|
|
240
|
+
else:
|
|
241
|
+
col_expr = col_expr.str.replace_all(final_pattern, replacement)
|
|
230
242
|
|
|
231
243
|
# Execute the expression chain for the column
|
|
232
244
|
df_cleaned = df_cleaned.with_columns(col_expr.alias(col_name))
|
|
@@ -234,6 +246,33 @@ class DataFrameCleaner:
|
|
|
234
246
|
_LOGGER.info(f"Cleaned {len(self.cleaners)} columns.")
|
|
235
247
|
|
|
236
248
|
return df_cleaned
|
|
249
|
+
|
|
250
|
+
def load_clean_save(self, input_filepath: Union[str,Path], output_filepath: Union[str,Path]):
|
|
251
|
+
"""
|
|
252
|
+
This convenience method encapsulates the entire cleaning process into a
|
|
253
|
+
single call. It loads a DataFrame from a specified file, applies all
|
|
254
|
+
cleaning rules configured in the `DataFrameCleaner` instance, and saves
|
|
255
|
+
the resulting cleaned DataFrame to a new file.
|
|
256
|
+
|
|
257
|
+
The method ensures that all data is loaded as string types to prevent
|
|
258
|
+
unintended type inference issues before cleaning operations are applied.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
input_filepath (Union[str, Path]):
|
|
262
|
+
The path to the input data file.
|
|
263
|
+
output_filepath (Union[str, Path]):
|
|
264
|
+
The full path, where the cleaned data file will be saved.
|
|
265
|
+
"""
|
|
266
|
+
df, _ = load_dataframe(df_path=input_filepath, kind="polars", all_strings=True)
|
|
267
|
+
|
|
268
|
+
df_clean = self.clean(df=df, clone_df=False)
|
|
269
|
+
|
|
270
|
+
if isinstance(output_filepath, str):
|
|
271
|
+
output_filepath = make_fullpath(input_path=output_filepath, enforce="file")
|
|
272
|
+
|
|
273
|
+
save_dataframe(df=df_clean, save_dir=output_filepath.parent, filename=output_filepath.name)
|
|
274
|
+
|
|
275
|
+
return None
|
|
237
276
|
|
|
238
277
|
|
|
239
278
|
############ TRANSFORM MAIN ####################
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/dragon_ml_toolbox.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/dragon_ml_toolbox.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|