dragon-ml-toolbox 9.1.0__tar.gz → 9.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (40) hide show
  1. {dragon_ml_toolbox-9.1.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-9.2.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ETL_engineering.py +42 -3
  4. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/pyproject.toml +1 -1
  5. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/LICENSE +0 -0
  6. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/LICENSE-THIRD-PARTY.md +0 -0
  7. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/README.md +0 -0
  8. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  9. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  10. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  11. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  12. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/GUI_tools.py +0 -0
  13. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/MICE_imputation.py +0 -0
  14. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_callbacks.py +0 -0
  15. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_datasetmaster.py +0 -0
  16. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_evaluation.py +0 -0
  17. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_evaluation_multi.py +0 -0
  18. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_inference.py +0 -0
  19. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_models.py +0 -0
  20. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_optimization.py +0 -0
  21. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_scaler.py +0 -0
  22. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ML_trainer.py +0 -0
  23. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/PSO_optimization.py +0 -0
  24. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/RNN_forecast.py +0 -0
  25. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/SQL.py +0 -0
  26. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/VIF_factor.py +0 -0
  27. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/__init__.py +0 -0
  28. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/_logger.py +0 -0
  29. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/_script_info.py +0 -0
  30. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/custom_logger.py +0 -0
  31. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/data_exploration.py +0 -0
  32. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ensemble_evaluation.py +0 -0
  33. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ensemble_inference.py +0 -0
  34. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/ensemble_learning.py +0 -0
  35. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/handle_excel.py +0 -0
  36. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/keys.py +0 -0
  37. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/optimization_tools.py +0 -0
  38. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/path_manager.py +0 -0
  39. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/ml_tools/utilities.py +0 -0
  40. {dragon_ml_toolbox-9.1.0 → dragon_ml_toolbox-9.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 9.1.0
3
+ Version: 9.2.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 9.1.0
3
+ Version: 9.2.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -4,6 +4,7 @@ import re
4
4
  from pathlib import Path
5
5
  from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
6
6
  from .path_manager import sanitize_filename, make_fullpath
7
+ from .utilities import save_dataframe, load_dataframe
7
8
  from ._script_info import _script_info
8
9
  from ._logger import _LOGGER
9
10
 
@@ -190,12 +191,13 @@ class DataFrameCleaner:
190
191
 
191
192
  self.cleaners = cleaners
192
193
 
193
- def clean(self, df: pl.DataFrame) -> pl.DataFrame:
194
+ def clean(self, df: pl.DataFrame, clone_df: bool=True) -> pl.DataFrame:
194
195
  """
195
196
  Applies all defined cleaning rules to the Polars DataFrame.
196
197
 
197
198
  Args:
198
199
  df (pl.DataFrame): The Polars DataFrame to clean.
200
+ clone_df (bool): Whether to work on a clone to prevent undesired changes.
199
201
 
200
202
  Returns:
201
203
  pl.DataFrame: A new, cleaned Polars DataFrame.
@@ -214,7 +216,10 @@ class DataFrameCleaner:
214
216
  print(f"\t- {miss_col}")
215
217
  raise ValueError()
216
218
 
217
- df_cleaned = df.clone()
219
+ if clone_df:
220
+ df_cleaned = df.clone()
221
+ else:
222
+ df_cleaned = df
218
223
 
219
224
  # Build and apply a series of expressions for each column
220
225
  for cleaner in self.cleaners:
@@ -226,7 +231,14 @@ class DataFrameCleaner:
226
231
  # Sequentially chain 'replace_all' expressions for each rule
227
232
  for pattern, replacement in cleaner.rules.items():
228
233
  final_pattern = f"(?i){pattern}" if cleaner.case_insensitive else pattern
229
- col_expr = col_expr.str.replace_all(final_pattern, replacement)
234
+
235
+ if replacement is None:
236
+ # If replacement is None, use a when/then expression to set matching values to null
237
+ col_expr = pl.when(col_expr.str.contains(final_pattern)) \
238
+ .then(None) \
239
+ .otherwise(col_expr)
240
+ else:
241
+ col_expr = col_expr.str.replace_all(final_pattern, replacement)
230
242
 
231
243
  # Execute the expression chain for the column
232
244
  df_cleaned = df_cleaned.with_columns(col_expr.alias(col_name))
@@ -234,6 +246,33 @@ class DataFrameCleaner:
234
246
  _LOGGER.info(f"Cleaned {len(self.cleaners)} columns.")
235
247
 
236
248
  return df_cleaned
249
+
250
+ def load_clean_save(self, input_filepath: Union[str,Path], output_filepath: Union[str,Path]):
251
+ """
252
+ This convenience method encapsulates the entire cleaning process into a
253
+ single call. It loads a DataFrame from a specified file, applies all
254
+ cleaning rules configured in the `DataFrameCleaner` instance, and saves
255
+ the resulting cleaned DataFrame to a new file.
256
+
257
+ The method ensures that all data is loaded as string types to prevent
258
+ unintended type inference issues before cleaning operations are applied.
259
+
260
+ Args:
261
+ input_filepath (Union[str, Path]):
262
+ The path to the input data file.
263
+ output_filepath (Union[str, Path]):
264
+ The full path, where the cleaned data file will be saved.
265
+ """
266
+ df, _ = load_dataframe(df_path=input_filepath, kind="polars", all_strings=True)
267
+
268
+ df_clean = self.clean(df=df, clone_df=False)
269
+
270
+ if isinstance(output_filepath, str):
271
+ output_filepath = make_fullpath(input_path=output_filepath, enforce="file")
272
+
273
+ save_dataframe(df=df_clean, save_dir=output_filepath.parent, filename=output_filepath.name)
274
+
275
+ return None
237
276
 
238
277
 
239
278
  ############ TRANSFORM MAIN ####################
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "9.1.0"
3
+ version = "9.2.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl Loza", email = "luigiloza@gmail.com" }