dragon-ml-toolbox 10.2.1__tar.gz → 10.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (41) hide show
  1. {dragon_ml_toolbox-10.2.1/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-10.3.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/ETL_engineering.py +130 -13
  4. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/pyproject.toml +1 -1
  5. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/LICENSE +0 -0
  6. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/LICENSE-THIRD-PARTY.md +0 -0
  7. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/README.md +0 -0
  8. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  9. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  10. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  11. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  12. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/ETL_cleaning.py +0 -0
  13. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/GUI_tools.py +0 -0
  14. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/MICE_imputation.py +0 -0
  15. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/ML_callbacks.py +0 -0
  16. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/ML_datasetmaster.py +0 -0
  17. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/ML_evaluation.py +0 -0
  18. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/ML_evaluation_multi.py +0 -0
  19. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/ML_inference.py +0 -0
  20. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/ML_models.py +0 -0
  21. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/ML_optimization.py +0 -0
  22. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/ML_scaler.py +0 -0
  23. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/ML_trainer.py +0 -0
  24. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/PSO_optimization.py +0 -0
  25. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/RNN_forecast.py +0 -0
  26. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/SQL.py +0 -0
  27. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/VIF_factor.py +0 -0
  28. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/__init__.py +0 -0
  29. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/_logger.py +0 -0
  30. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/_script_info.py +0 -0
  31. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/custom_logger.py +0 -0
  32. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/data_exploration.py +0 -0
  33. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/ensemble_evaluation.py +0 -0
  34. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/ensemble_inference.py +0 -0
  35. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/ensemble_learning.py +0 -0
  36. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/handle_excel.py +0 -0
  37. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/keys.py +0 -0
  38. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/optimization_tools.py +0 -0
  39. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/path_manager.py +0 -0
  40. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/ml_tools/utilities.py +0 -0
  41. {dragon_ml_toolbox-10.2.1 → dragon_ml_toolbox-10.3.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 10.2.1
3
+ Version: 10.3.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 10.2.1
3
+ Version: 10.3.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,9 @@
1
1
  import polars as pl
2
2
  import re
3
+ from pathlib import Path
3
4
  from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
5
+ from .utilities import load_dataframe, save_dataframe
6
+ from .path_manager import make_fullpath
4
7
  from ._script_info import _script_info
5
8
  from ._logger import _LOGGER
6
9
 
@@ -182,9 +185,28 @@ class DataProcessor:
182
185
  if not processed_columns:
183
186
  _LOGGER.error("The transformation resulted in an empty DataFrame.")
184
187
  return pl.DataFrame()
185
-
188
+
189
+ _LOGGER.info(f"Processed dataframe with {len(processed_columns)} columns.")
190
+
186
191
  return pl.DataFrame(processed_columns)
187
192
 
193
+ def load_transform_save(self, input_path: Union[str,Path], output_path: Union[str,Path]):
194
+ """
195
+ Convenience wrapper for the transform method that includes automatic dataframe loading and saving.
196
+ """
197
+ # Validate paths
198
+ in_path = make_fullpath(input_path, enforce="file")
199
+ out_path = make_fullpath(output_path, make=True, enforce="file")
200
+
201
+ # load df
202
+ df, _ = load_dataframe(df_path=in_path, kind="polars", all_strings=True)
203
+
204
+ # Process
205
+ df_processed = self.transform(df)
206
+
207
+ # save processed df
208
+ save_dataframe(df=df_processed, save_dir=out_path.parent, filename=out_path.name)
209
+
188
210
  def __str__(self) -> str:
189
211
  """
190
212
  Provides a detailed, human-readable string representation of the
@@ -620,13 +642,14 @@ class MultiNumberExtractor:
620
642
  class RatioCalculator:
621
643
  """
622
644
  A transformer that parses a string ratio (e.g., "40:5" or "30/2") and
623
- computes the result of the division. It gracefully handles strings that
624
- do not match the pattern by returning null.
645
+ computes the result of the division. Includes robust handling for
646
+ zeros and single numbers.
625
647
  """
626
648
  def __init__(
627
649
  self,
628
- # Default pattern includes the full-width colon ':'
629
- regex_pattern: str = r"(\d+\.?\d*)\s*[::/]\s*(\d+\.?\d*)"
650
+ regex_pattern: str = r"(\d+\.?\d*)\s*[::/]\s*(\d+\.?\d*)",
651
+ handle_zeros: bool = False,
652
+ handle_single_number: bool = False
630
653
  ):
631
654
  # --- Robust Validation ---
632
655
  try:
@@ -642,26 +665,120 @@ class RatioCalculator:
642
665
  raise ValueError()
643
666
 
644
667
  self.regex_pattern = regex_pattern
668
+ self.handle_zeros = handle_zeros
669
+ self.handle_single_number = handle_single_number
645
670
 
646
671
  def __call__(self, column: pl.Series) -> pl.Series:
647
672
  """
648
- Applies the ratio calculation logic to the input column.
649
- This version uses .str.extract() for maximum stability.
673
+ Applies the ratio calculation logic to the input column. Uses .str.extract() for maximum stability and includes optional handling for zeros and single numbers.
650
674
  """
651
675
  # Extract numerator (group 1) and denominator (group 2) separately.
652
676
  numerator_expr = column.str.extract(self.regex_pattern, 1).cast(pl.Float64, strict=False)
653
677
  denominator_expr = column.str.extract(self.regex_pattern, 2).cast(pl.Float64, strict=False)
654
678
 
655
- # Calculate the ratio, handling division by zero.
656
- final_expr = pl.when(denominator_expr != 0).then(
657
- numerator_expr / denominator_expr
658
- ).otherwise(
659
- None # Handles both null denominators and division by zero
660
- )
679
+ # --- Logic for Requirement A: Special zero handling ---
680
+ if self.handle_zeros:
681
+ ratio_expr = (
682
+ pl.when(numerator_expr.is_not_null() & denominator_expr.is_not_null())
683
+ .then(
684
+ pl.when((numerator_expr == 0) & (denominator_expr == 0)).then(pl.lit(0.0))
685
+ .when((numerator_expr != 0) & (denominator_expr == 0)).then(numerator_expr)
686
+ .when((numerator_expr == 0) & (denominator_expr != 0)).then(denominator_expr)
687
+ .otherwise(numerator_expr / denominator_expr) # Default: both are non-zero
688
+ )
689
+ )
690
+ else:
691
+ # Original logic
692
+ ratio_expr = pl.when(denominator_expr != 0).then(
693
+ numerator_expr / denominator_expr
694
+ ).otherwise(
695
+ None # Handles null denominators and division by zero
696
+ )
697
+
698
+ # --- Logic for Requirement B: Handle single numbers as a fallback ---
699
+ if self.handle_single_number:
700
+ # Regex to match a string that is ONLY a valid float/int
701
+ single_number_regex = r"^\d+\.?\d*$"
702
+ single_number_expr = (
703
+ pl.when(column.str.contains(single_number_regex))
704
+ .then(column.cast(pl.Float64, strict=False))
705
+ .otherwise(None)
706
+ )
707
+ # If ratio_expr is null, try to fill it with single_number_expr
708
+ final_expr = ratio_expr.fill_null(single_number_expr)
709
+ else:
710
+ final_expr = ratio_expr
661
711
 
662
712
  return pl.select(final_expr.round(4)).to_series()
663
713
 
664
714
 
715
+ class TriRatioCalculator:
716
+ """
717
+ A transformer that handles three-part ("A:B:C") and two-part ("A:C")
718
+ ratios, enforcing a strict output structure.
719
+
720
+ - Three-part ratios produce A/B and A/C.
721
+ - Two-part ratios are assumed to be A:C and produce None for A/B.
722
+ - Single values produce None for both outputs.
723
+ """
724
+ def __init__(self, handle_zeros: bool = False):
725
+ """
726
+ Initializes the TriRatioCalculator.
727
+
728
+ Args:
729
+ handle_zeros (bool): If True, returns a valid value if either the denominator or numerator is zero; returns zero if both are zero.
730
+ """
731
+ self.handle_zeros = handle_zeros
732
+
733
+ def _calculate_ratio(self, num: pl.Expr, den: pl.Expr) -> pl.Expr:
734
+ """Helper to contain the core division logic."""
735
+ if self.handle_zeros:
736
+ # Special handling for zeros
737
+ expr = (
738
+ pl.when((num == 0) & (den == 0)).then(pl.lit(0.0))
739
+ .when((num != 0) & (den == 0)).then(num) # Return numerator
740
+ .when((num == 0) & (den != 0)).then(den) # Return denominator
741
+ .otherwise(num / den)
742
+ )
743
+ else:
744
+ # Default behavior: return null if denominator is 0
745
+ expr = pl.when(den != 0).then(num / den).otherwise(None)
746
+
747
+ return expr.round(4)
748
+
749
+ def __call__(self, column: pl.Series) -> pl.DataFrame:
750
+ """
751
+ Applies the robust tri-ratio logic using the lazy API.
752
+ """
753
+ # Wrap the input Series in a DataFrame to use the lazy expression API
754
+ temp_df = column.to_frame()
755
+
756
+ # Define all steps as lazy expressions
757
+ all_numbers_expr = pl.col(column.name).str.extract_all(r"(\d+\.?\d*)")
758
+ num_parts_expr = all_numbers_expr.list.len()
759
+
760
+ expr_A = all_numbers_expr.list.get(0).cast(pl.Float64)
761
+ expr_B = all_numbers_expr.list.get(1).cast(pl.Float64)
762
+ expr_C = all_numbers_expr.list.get(2).cast(pl.Float64)
763
+
764
+ # Define logic for each output column using expressions
765
+ ratio_ab_expr = pl.when(num_parts_expr == 3).then(
766
+ self._calculate_ratio(expr_A, expr_B)
767
+ ).otherwise(None)
768
+
769
+ ratio_ac_expr = pl.when(num_parts_expr == 3).then(
770
+ self._calculate_ratio(expr_A, expr_C)
771
+ ).when(num_parts_expr == 2).then(
772
+ self._calculate_ratio(expr_A, expr_B) # B is actually C in this case
773
+ ).otherwise(None)
774
+
775
+ # Execute the expressions and return the final DataFrame
776
+ return temp_df.select(
777
+ A_div_B=ratio_ab_expr,
778
+ A_div_C=ratio_ac_expr
779
+ )
780
+
781
+
665
782
  class CategoryMapper:
666
783
  """
667
784
  A transformer that maps string categories to specified numerical values using a dictionary.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "10.2.1"
3
+ version = "10.3.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl Loza", email = "luigiloza@gmail.com" }