dragon-ml-toolbox 8.1.0__tar.gz → 8.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (41) hide show
  1. {dragon_ml_toolbox-8.1.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-8.2.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/ETL_engineering.py +47 -10
  4. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/pyproject.toml +1 -1
  5. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/LICENSE +0 -0
  6. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/LICENSE-THIRD-PARTY.md +0 -0
  7. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/README.md +0 -0
  8. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  9. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  10. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  11. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  12. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/GUI_tools.py +0 -0
  13. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/MICE_imputation.py +0 -0
  14. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/ML_callbacks.py +0 -0
  15. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/ML_datasetmaster.py +0 -0
  16. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/ML_evaluation.py +0 -0
  17. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/ML_evaluation_multi.py +0 -0
  18. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/ML_inference.py +0 -0
  19. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/ML_models.py +0 -0
  20. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/ML_optimization.py +0 -0
  21. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/ML_scaler.py +0 -0
  22. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/ML_trainer.py +0 -0
  23. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/PSO_optimization.py +0 -0
  24. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/RNN_forecast.py +0 -0
  25. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/SQL.py +0 -0
  26. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/VIF_factor.py +0 -0
  27. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/_ML_optimization_multi.py +0 -0
  28. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/__init__.py +0 -0
  29. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/_logger.py +0 -0
  30. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/_script_info.py +0 -0
  31. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/custom_logger.py +0 -0
  32. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/data_exploration.py +0 -0
  33. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/ensemble_evaluation.py +0 -0
  34. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/ensemble_inference.py +0 -0
  35. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/ensemble_learning.py +0 -0
  36. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/handle_excel.py +0 -0
  37. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/keys.py +0 -0
  38. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/optimization_tools.py +0 -0
  39. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/path_manager.py +0 -0
  40. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/ml_tools/utilities.py +0 -0
  41. {dragon_ml_toolbox-8.1.0 → dragon_ml_toolbox-8.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 8.1.0
3
+ Version: 8.2.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 8.1.0
3
+ Version: 8.2.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -3,7 +3,6 @@ import re
3
3
  from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
4
4
  from ._script_info import _script_info
5
5
  from ._logger import _LOGGER
6
- import warnings
7
6
 
8
7
 
9
8
  __all__ = [
@@ -13,6 +12,7 @@ __all__ = [
13
12
  "DataProcessor",
14
13
  "BinaryTransformer",
15
14
  "MultiBinaryDummifier",
15
+ "AutoDummifier",
16
16
  "KeywordDummifier",
17
17
  "NumberExtractor",
18
18
  "MultiNumberExtractor",
@@ -277,16 +277,32 @@ class DataProcessor:
277
277
  processed_columns.append(result.alias(output_col_spec))
278
278
 
279
279
  elif isinstance(result, pl.DataFrame):
280
- if not isinstance(output_col_spec, list):
281
- raise TypeError(f"Function for '{input_col_name}' returned a DataFrame but 'output_col' is not a list.")
282
- if len(result.columns) != len(output_col_spec):
283
- raise ValueError(
284
- f"Mismatch in '{input_col_name}': function produced {len(result.columns)} columns, "
285
- f"but recipe specifies {len(output_col_spec)} output names."
286
- )
280
+ # 1. Handle list-based renaming
281
+ if isinstance(output_col_spec, list):
282
+ if len(result.columns) != len(output_col_spec):
283
+ raise ValueError(
284
+ f"Mismatch in '{input_col_name}': function produced {len(result.columns)} columns, "
285
+ f"but recipe specifies {len(output_col_spec)} output names."
286
+ )
287
+
288
+ renamed_df = result.rename(dict(zip(result.columns, output_col_spec)))
289
+ processed_columns.extend(renamed_df.get_columns())
290
+
291
+ # 2. Handle a string prefix for AutoDummifier
292
+ elif isinstance(output_col_spec, str):
293
+ prefix = output_col_spec
294
+ # Replace the original name part with the desired prefix.
295
+ new_names = {
296
+ col: f"{prefix}{col[len(input_col_name):]}" for col in result.columns
297
+ }
298
+ renamed_df = result.rename(new_names)
299
+ processed_columns.extend(renamed_df.get_columns())
287
300
 
288
- renamed_df = result.rename(dict(zip(result.columns, output_col_spec)))
289
- processed_columns.extend(renamed_df.get_columns())
301
+ else:
302
+ raise TypeError(
303
+ f"Function for '{input_col_name}' returned a DataFrame, "
304
+ f"so 'output_col' must be a list of names or a string prefix."
305
+ )
290
306
 
291
307
  else:
292
308
  raise TypeError(f"Function for '{input_col_name}' returned an unexpected type: {type(result)}.")
@@ -413,6 +429,27 @@ class BinaryTransformer:
413
429
  return (~contains_keyword).cast(pl.UInt8)
414
430
 
415
431
 
432
+ class AutoDummifier:
433
+ """
434
+ A transformer that performs one-hot encoding on a categorical column,
435
+ automatically detecting the unique categories from the data.
436
+ """
437
+ def __call__(self, column: pl.Series) -> pl.DataFrame:
438
+ """
439
+ Executes the one-hot encoding logic.
440
+
441
+ Args:
442
+ column (pl.Series): The input Polars Series of categories.
443
+
444
+ Returns:
445
+ pl.DataFrame: A DataFrame with one-hot encoded columns.
446
+ Column names are auto-generated by Polars as
447
+ '{original_col_name}_{category_value}'.
448
+ """
449
+ # Ensure the column is treated as a string before creating dummies
450
+ return column.cast(pl.Utf8).to_dummies()
451
+
452
+
416
453
  class MultiBinaryDummifier:
417
454
  """
418
455
  A one-to-many transformer that creates multiple binary columns from a single
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "8.1.0"
3
+ version = "8.2.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl Loza", email = "luigiloza@gmail.com" }