dragon-ml-toolbox 4.2.1__py3-none-any.whl → 4.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-4.2.1.dist-info → dragon_ml_toolbox-4.2.2.dist-info}/METADATA +1 -1
- {dragon_ml_toolbox-4.2.1.dist-info → dragon_ml_toolbox-4.2.2.dist-info}/RECORD +7 -7
- ml_tools/ETL_engineering.py +27 -31
- {dragon_ml_toolbox-4.2.1.dist-info → dragon_ml_toolbox-4.2.2.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-4.2.1.dist-info → dragon_ml_toolbox-4.2.2.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-4.2.1.dist-info → dragon_ml_toolbox-4.2.2.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-4.2.1.dist-info → dragon_ml_toolbox-4.2.2.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
dragon_ml_toolbox-4.2.
|
|
2
|
-
dragon_ml_toolbox-4.2.
|
|
3
|
-
ml_tools/ETL_engineering.py,sha256=
|
|
1
|
+
dragon_ml_toolbox-4.2.2.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
|
|
2
|
+
dragon_ml_toolbox-4.2.2.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
|
|
3
|
+
ml_tools/ETL_engineering.py,sha256=P7HN_e3vfmrOqDDK-IenyRSFQPr0N3V9e2gN75QFVWs,39372
|
|
4
4
|
ml_tools/GUI_tools.py,sha256=n4ZZ5kEjwK5rkOCFJE41HeLFfjhpJVLUSzk9Kd9Kr_0,45410
|
|
5
5
|
ml_tools/MICE_imputation.py,sha256=b6ZTs8RedXFifOpuMCzr68xM16mCBVh1Ua6kcGfiVtg,11462
|
|
6
6
|
ml_tools/ML_callbacks.py,sha256=0a-Rbr0Xp_B1FNopOKBBmuJ4MqazS5JgDiT7wx1dHvE,13161
|
|
@@ -24,7 +24,7 @@ ml_tools/handle_excel.py,sha256=J9iwIqMZemoxK49J5osSwp9Ge0h9YTKyYGbOm53hcno,1300
|
|
|
24
24
|
ml_tools/keys.py,sha256=kK9UF-hek2VcPGFILCKl5geoN6flmMOu7IzhdEA6z5Y,1068
|
|
25
25
|
ml_tools/path_manager.py,sha256=Z8e7w3MPqQaN8xmTnKuXZS6CIW59BFwwqGhGc00sdp4,13692
|
|
26
26
|
ml_tools/utilities.py,sha256=mz-M351DzxWxnYVcLX-7ZQ6c-RGoCV9g4VTS9Qif2Es,18348
|
|
27
|
-
dragon_ml_toolbox-4.2.
|
|
28
|
-
dragon_ml_toolbox-4.2.
|
|
29
|
-
dragon_ml_toolbox-4.2.
|
|
30
|
-
dragon_ml_toolbox-4.2.
|
|
27
|
+
dragon_ml_toolbox-4.2.2.dist-info/METADATA,sha256=11uknvnkfooZGVSt8T178Ru-PQgBb_iOp3tujK_bDQ4,6572
|
|
28
|
+
dragon_ml_toolbox-4.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
29
|
+
dragon_ml_toolbox-4.2.2.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
|
|
30
|
+
dragon_ml_toolbox-4.2.2.dist-info/RECORD,,
|
ml_tools/ETL_engineering.py
CHANGED
|
@@ -3,6 +3,7 @@ import re
|
|
|
3
3
|
from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
|
|
4
4
|
from ._script_info import _script_info
|
|
5
5
|
from ._logger import _LOGGER
|
|
6
|
+
import warnings
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
__all__ = [
|
|
@@ -50,7 +51,7 @@ class ColumnCleaner:
|
|
|
50
51
|
```python
|
|
51
52
|
id_rules = {
|
|
52
53
|
# Matches 'ID-12345' or 'ID 12345' and reformats to 'ID:12345'
|
|
53
|
-
r'ID[- ](
|
|
54
|
+
r'ID[- ](\\d+)': r'ID:$1'
|
|
54
55
|
}
|
|
55
56
|
|
|
56
57
|
id_cleaner = ColumnCleaner(column_name='user_id', rules=id_rules)
|
|
@@ -700,26 +701,28 @@ class MultiNumberExtractor:
|
|
|
700
701
|
|
|
701
702
|
class RatioCalculator:
|
|
702
703
|
"""
|
|
703
|
-
A transformer that parses a string ratio (e.g., "40:5" or "30/2") and
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
regex_pattern (str, optional):
|
|
707
|
-
The regex pattern to find the numerator and denominator. It MUST
|
|
708
|
-
contain exactly two capturing groups: the first for the
|
|
709
|
-
numerator and the second for the denominator. Defaults to a
|
|
710
|
-
pattern that handles common delimiters like ':' and '/'.
|
|
704
|
+
A transformer that parses a string ratio (e.g., "40:5" or "30/2") and
|
|
705
|
+
computes the result of the division. It gracefully handles strings that
|
|
706
|
+
do not match the pattern by returning null.
|
|
711
707
|
"""
|
|
712
708
|
def __init__(
|
|
713
709
|
self,
|
|
714
|
-
|
|
710
|
+
# Default pattern includes the full-width colon ':'
|
|
711
|
+
regex_pattern: str = r"(\d+\.?\d*)\s*[::/]\s*(\d+\.?\d*)"
|
|
715
712
|
):
|
|
716
|
-
# --- Validation ---
|
|
713
|
+
# --- Robust Validation ---
|
|
717
714
|
try:
|
|
718
|
-
|
|
715
|
+
compiled_pattern = re.compile(regex_pattern)
|
|
716
|
+
if compiled_pattern.groups != 2:
|
|
719
717
|
raise ValueError(
|
|
720
|
-
"regex_pattern must contain exactly two "
|
|
718
|
+
"RatioCalculator regex_pattern must contain exactly two "
|
|
721
719
|
"capturing groups '(...)'."
|
|
722
720
|
)
|
|
721
|
+
if compiled_pattern.groupindex:
|
|
722
|
+
raise ValueError(
|
|
723
|
+
"RatioCalculator must be initialized with unnamed capturing groups "
|
|
724
|
+
"(e.g., '(\\d+)'), not named groups (e.g., '(?P<name>\\d+)')."
|
|
725
|
+
)
|
|
723
726
|
except re.error as e:
|
|
724
727
|
raise ValueError(f"Invalid regex pattern provided: {e}") from e
|
|
725
728
|
|
|
@@ -728,27 +731,20 @@ class RatioCalculator:
|
|
|
728
731
|
def __call__(self, column: pl.Series) -> pl.Series:
|
|
729
732
|
"""
|
|
730
733
|
Applies the ratio calculation logic to the input column.
|
|
731
|
-
|
|
732
|
-
Args:
|
|
733
|
-
column (pl.Series): The input Polars Series of ratio strings.
|
|
734
|
-
|
|
735
|
-
Returns:
|
|
736
|
-
pl.Series: A new Series of floats containing the division result.
|
|
737
|
-
Returns null for invalid formats or division by zero.
|
|
734
|
+
This version uses .str.extract() for maximum stability.
|
|
738
735
|
"""
|
|
739
|
-
#
|
|
740
|
-
|
|
741
|
-
|
|
736
|
+
# Extract numerator (group 1) and denominator (group 2) separately.
|
|
737
|
+
numerator_expr = column.str.extract(self.regex_pattern, 1).cast(pl.Float64, strict=False)
|
|
738
|
+
denominator_expr = column.str.extract(self.regex_pattern, 2).cast(pl.Float64, strict=False)
|
|
742
739
|
|
|
743
|
-
#
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
740
|
+
# Calculate the ratio, handling division by zero.
|
|
741
|
+
final_expr = pl.when(denominator_expr != 0).then(
|
|
742
|
+
numerator_expr / denominator_expr
|
|
743
|
+
).otherwise(
|
|
744
|
+
None # Handles both null denominators and division by zero
|
|
745
|
+
)
|
|
747
746
|
|
|
748
|
-
|
|
749
|
-
final_expr = pl.when(denominator != 0).then(numerator / denominator).otherwise(None)
|
|
750
|
-
|
|
751
|
-
return pl.select(final_expr).to_series()
|
|
747
|
+
return pl.select(final_expr.round(4)).to_series()
|
|
752
748
|
|
|
753
749
|
|
|
754
750
|
class CategoryMapper:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|