dragon-ml-toolbox 4.2.1__py3-none-any.whl → 4.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 4.2.1
3
+ Version: 4.2.2
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
- dragon_ml_toolbox-4.2.1.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
- dragon_ml_toolbox-4.2.1.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
3
- ml_tools/ETL_engineering.py,sha256=rlu0bUekdKREcTR0x1jn_TSEqhxgfq3QU71hy6ZyaD8,39503
1
+ dragon_ml_toolbox-4.2.2.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
+ dragon_ml_toolbox-4.2.2.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
3
+ ml_tools/ETL_engineering.py,sha256=P7HN_e3vfmrOqDDK-IenyRSFQPr0N3V9e2gN75QFVWs,39372
4
4
  ml_tools/GUI_tools.py,sha256=n4ZZ5kEjwK5rkOCFJE41HeLFfjhpJVLUSzk9Kd9Kr_0,45410
5
5
  ml_tools/MICE_imputation.py,sha256=b6ZTs8RedXFifOpuMCzr68xM16mCBVh1Ua6kcGfiVtg,11462
6
6
  ml_tools/ML_callbacks.py,sha256=0a-Rbr0Xp_B1FNopOKBBmuJ4MqazS5JgDiT7wx1dHvE,13161
@@ -24,7 +24,7 @@ ml_tools/handle_excel.py,sha256=J9iwIqMZemoxK49J5osSwp9Ge0h9YTKyYGbOm53hcno,1300
24
24
  ml_tools/keys.py,sha256=kK9UF-hek2VcPGFILCKl5geoN6flmMOu7IzhdEA6z5Y,1068
25
25
  ml_tools/path_manager.py,sha256=Z8e7w3MPqQaN8xmTnKuXZS6CIW59BFwwqGhGc00sdp4,13692
26
26
  ml_tools/utilities.py,sha256=mz-M351DzxWxnYVcLX-7ZQ6c-RGoCV9g4VTS9Qif2Es,18348
27
- dragon_ml_toolbox-4.2.1.dist-info/METADATA,sha256=mzW1BLOxrCKZAoZgqzYRcNhHpO4fTNxDGvUwuF5wG88,6572
28
- dragon_ml_toolbox-4.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
- dragon_ml_toolbox-4.2.1.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
30
- dragon_ml_toolbox-4.2.1.dist-info/RECORD,,
27
+ dragon_ml_toolbox-4.2.2.dist-info/METADATA,sha256=11uknvnkfooZGVSt8T178Ru-PQgBb_iOp3tujK_bDQ4,6572
28
+ dragon_ml_toolbox-4.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
+ dragon_ml_toolbox-4.2.2.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
30
+ dragon_ml_toolbox-4.2.2.dist-info/RECORD,,
@@ -3,6 +3,7 @@ import re
3
3
  from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
4
4
  from ._script_info import _script_info
5
5
  from ._logger import _LOGGER
6
+ import warnings
6
7
 
7
8
 
8
9
  __all__ = [
@@ -50,7 +51,7 @@ class ColumnCleaner:
50
51
  ```python
51
52
  id_rules = {
52
53
  # Matches 'ID-12345' or 'ID 12345' and reformats to 'ID:12345'
53
- r'ID[- ](\d+)': r'ID:$1'
54
+ r'ID[- ](\\d+)': r'ID:$1'
54
55
  }
55
56
 
56
57
  id_cleaner = ColumnCleaner(column_name='user_id', rules=id_rules)
@@ -700,26 +701,28 @@ class MultiNumberExtractor:
700
701
 
701
702
  class RatioCalculator:
702
703
  """
703
- A transformer that parses a string ratio (e.g., "40:5" or "30/2") and computes the result of the division.
704
-
705
- Args:
706
- regex_pattern (str, optional):
707
- The regex pattern to find the numerator and denominator. It MUST
708
- contain exactly two capturing groups: the first for the
709
- numerator and the second for the denominator. Defaults to a
710
- pattern that handles common delimiters like ':' and '/'.
704
+ A transformer that parses a string ratio (e.g., "40:5" or "30/2") and
705
+ computes the result of the division. It gracefully handles strings that
706
+ do not match the pattern by returning null.
711
707
  """
712
708
  def __init__(
713
709
  self,
714
- regex_pattern: str = r"(\d+\.?\d*)\s*[:/]\s*(\d+\.?\d*)"
710
+ # Default pattern includes the full-width colon ':'
711
+ regex_pattern: str = r"(\d+\.?\d*)\s*[::/]\s*(\d+\.?\d*)"
715
712
  ):
716
- # --- Validation ---
713
+ # --- Robust Validation ---
717
714
  try:
718
- if re.compile(regex_pattern).groups != 2:
715
+ compiled_pattern = re.compile(regex_pattern)
716
+ if compiled_pattern.groups != 2:
719
717
  raise ValueError(
720
- "regex_pattern must contain exactly two "
718
+ "RatioCalculator regex_pattern must contain exactly two "
721
719
  "capturing groups '(...)'."
722
720
  )
721
+ if compiled_pattern.groupindex:
722
+ raise ValueError(
723
+ "RatioCalculator must be initialized with unnamed capturing groups "
724
+ "(e.g., '(\\d+)'), not named groups (e.g., '(?P<name>\\d+)')."
725
+ )
723
726
  except re.error as e:
724
727
  raise ValueError(f"Invalid regex pattern provided: {e}") from e
725
728
 
@@ -728,27 +731,20 @@ class RatioCalculator:
728
731
  def __call__(self, column: pl.Series) -> pl.Series:
729
732
  """
730
733
  Applies the ratio calculation logic to the input column.
731
-
732
- Args:
733
- column (pl.Series): The input Polars Series of ratio strings.
734
-
735
- Returns:
736
- pl.Series: A new Series of floats containing the division result.
737
- Returns null for invalid formats or division by zero.
734
+ This version uses .str.extract() for maximum stability.
738
735
  """
739
- # .extract_groups returns a struct with a field for each capture group
740
- # e.g., {"group_1": "40", "group_2": "5"}
741
- groups = column.str.extract_groups(self.regex_pattern)
736
+ # Extract numerator (group 1) and denominator (group 2) separately.
737
+ numerator_expr = column.str.extract(self.regex_pattern, 1).cast(pl.Float64, strict=False)
738
+ denominator_expr = column.str.extract(self.regex_pattern, 2).cast(pl.Float64, strict=False)
742
739
 
743
- # Extract numerator and denominator, casting to float
744
- # strict=False ensures that non-matches become null
745
- numerator = groups.struct.field("group_1").cast(pl.Float64, strict=False)
746
- denominator = groups.struct.field("group_2").cast(pl.Float64, strict=False)
740
+ # Calculate the ratio, handling division by zero.
741
+ final_expr = pl.when(denominator_expr != 0).then(
742
+ numerator_expr / denominator_expr
743
+ ).otherwise(
744
+ None # Handles both null denominators and division by zero
745
+ )
747
746
 
748
- # Safely perform division, returning null if denominator is 0
749
- final_expr = pl.when(denominator != 0).then(numerator / denominator).otherwise(None)
750
-
751
- return pl.select(final_expr).to_series()
747
+ return pl.select(final_expr.round(4)).to_series()
752
748
 
753
749
 
754
750
  class CategoryMapper: