PyPI - dragon-ml-toolbox - Versions diffs - 12.1.0__py3-none-any.whl → 12.2.0__py3-none-any.whl - Mend

dragon-ml-toolbox 12.1.0py3-none-any.whl → 12.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (7) hide show

{dragon_ml_toolbox-12.1.0.dist-info → dragon_ml_toolbox-12.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 12.1.0
+Version: 12.2.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-12.1.0.dist-info → dragon_ml_toolbox-12.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-dragon_ml_toolbox-12.1.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-12.1.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
+dragon_ml_toolbox-12.2.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-12.2.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
 ml_tools/ETL_cleaning.py,sha256=PLRSR-VYnt1nNT9XrcWq40SE0VzHCw7DQ8v9czfSQsU,20366
 ml_tools/ETL_engineering.py,sha256=l0I6Og9o4s6EODdk0kZXjbbC-a3vVPYy1FopP2BkQSQ,54909
 ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
@@ -24,7 +24,7 @@ ml_tools/_logger.py,sha256=dlp5cGbzooK9YSNSZYB4yjZrOaQUGW8PTrM411AOvL8,4717
 ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
 ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
 ml_tools/custom_logger.py,sha256=OZqG7FR_UE6byzY3RDmlj08a336ZU-4DzNBMPLr_d5c,5881
-ml_tools/data_exploration.py,sha256=is9P4c4orIKW6gRhTeScZlCGYH9ODguxMtVlrVubb4E,42515
+ml_tools/data_exploration.py,sha256=H-cHp6jL4u4Kl2L_fktcCdQWRdAzTC6kwFCrOHnzLNA,46549
 ml_tools/ensemble_evaluation.py,sha256=FGHSe8LBI8_w8LjNeJWOcYQ1UK_mc6fVah8gmSvNVGg,26853
 ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGTY,9348
 ml_tools/ensemble_learning.py,sha256=aTPeKthO4zRWBEaQJOUj8jEqVHiHjjOMXuiEWjI9NxM,21946
@@ -35,7 +35,7 @@ ml_tools/optimization_tools.py,sha256=bkKrTjukNOpxgVDMW5mUX5vQ72ckBcS5VA4eG8uZsO
 ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
 ml_tools/serde.py,sha256=k0qAwfMf13lVBQSgq5u9MSXEoo31iOA2-Ncm8XgMCMI,3974
 ml_tools/utilities.py,sha256=gef62GLK7ev5BWkkQekeJoVZqwf2mIuOlOfyCw6WdtE,13882
-dragon_ml_toolbox-12.1.0.dist-info/METADATA,sha256=PJbBSG9h6juu_srL07VVhgOIGqebQwn_rlI1RgZdTwo,6166
-dragon_ml_toolbox-12.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-12.1.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-12.1.0.dist-info/RECORD,,
+dragon_ml_toolbox-12.2.0.dist-info/METADATA,sha256=WS3Im1AwRObhKUkNPDkW0xRM8gdrylqavE9svIVRFKY,6166
+dragon_ml_toolbox-12.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-12.2.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-12.2.0.dist-info/RECORD,,

ml_tools/data_exploration.py CHANGED Viewed

@@ -3,7 +3,7 @@ from pandas.api.types import is_numeric_dtype
 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
-from typing import Union, Literal, Dict, Tuple, List, Optional
+from typing import Union, Literal, Dict, Tuple, List, Optional, Any
 from pathlib import Path
 import re
@@ -33,7 +33,8 @@ __all__ = [
     "match_and_filter_columns_by_regex",
     "standardize_percentages",
     "create_transformer_categorical_map",
-    "reconstruct_one_hot"
+    "reconstruct_one_hot",
+    "reconstruct_binary"
 ]
@@ -1081,7 +1082,110 @@ def reconstruct_one_hot(
         unique_cols_to_drop = list(set(all_ohe_cols_to_drop))
         new_df.drop(columns=unique_cols_to_drop, inplace=True)
         _LOGGER.info(f"Dropped {len(unique_cols_to_drop)} original one-hot encoded columns.")
+    _LOGGER.info(f"Successfully reconstructed {reconstructed_count} feature(s).")
+    return new_df
+def reconstruct_binary(
+    df: pd.DataFrame,
+    reconstruction_map: Dict[str, Tuple[str, Any, Any]],
+    drop_original: bool = True,
+    verbose: bool = True
+) -> pd.DataFrame:
+    """
+    Reconstructs new categorical columns from existing binary (0/1) columns.
+    Used to reverse a binary encoding by mapping 0 and 1 back to
+    descriptive categorical labels.
+    Args:
+        df (pd.DataFrame):
+            The input DataFrame.
+        reconstruction_map (Dict[str, Tuple[str, Any, Any]]):
+            A dictionary defining the reconstructions.
+            Format:
+            { "new_col_name": ("source_col_name", "label_for_0", "label_for_1") }
+            Example:
+            {
+                "Sex": ("Sex_male", "Female", "Male"),
+                "Smoker": ("Is_Smoker", "No", "Yes")
+            }
+        drop_original (bool):
+            If True, the original binary source columns (e.g., "Sex_male")
+            will be dropped from the returned DataFrame.
+        verbose (bool):
+            If True, prints the details of each reconstruction.
+    Returns:
+        pd.DataFrame:
+            A new DataFrame with the reconstructed categorical columns.
+    Raises:
+        TypeError: If `df` is not a pandas DataFrame.
+        ValueError: If `reconstruction_map` is not a dictionary or a
+                    configuration is invalid (e.g., column name collision).
+    Notes:
+        - The function operates on a copy of the DataFrame.
+        - Rows with `NaN` in the source column will have `NaN` in the
+          new column.
+        - Values in the source column other than 0 or 1 (e.g., 2) will
+          result in `NaN` in the new column.
+    """
+    if not isinstance(df, pd.DataFrame):
+        _LOGGER.error("Input must be a pandas DataFrame.")
+        raise TypeError()
+    if not isinstance(reconstruction_map, dict):
+        _LOGGER.error("`reconstruction_map` must be a dictionary with the required format.")
+        raise ValueError()
+    new_df = df.copy()
+    source_cols_to_drop: List[str] = []
+    reconstructed_count = 0
+    _LOGGER.info(f"Attempting to reconstruct {len(reconstruction_map)} binary feature(s).")
+    for new_col_name, config in reconstruction_map.items():
+        # --- 1. Validation ---
+        if not (isinstance(config, tuple) and len(config) == 3):
+            _LOGGER.error(f"Config for '{new_col_name}' is invalid. Must be a 3-item tuple. Skipping.")
+            raise ValueError()
+        source_col, label_for_0, label_for_1 = config
+        if source_col not in new_df.columns:
+            _LOGGER.error(f"Source column '{source_col}' for new column '{new_col_name}' not found. Skipping.")
+            raise ValueError()
+        if new_col_name in new_df.columns and verbose:
+            _LOGGER.warning(f"New column '{new_col_name}' already exists and will be overwritten.")
+        if new_col_name == source_col:
+            _LOGGER.error(f"New column name '{new_col_name}' cannot be the same as source column '{source_col}'.")
+            raise ValueError()
+        # --- 2. Reconstruction ---
+        # .map() handles 0, 1, preserves NaNs, and converts any other value to NaN.
+        mapping_dict = {0: label_for_0, 1: label_for_1}
+        new_df[new_col_name] = new_df[source_col].map(mapping_dict)
+        # --- 3. Logging/Tracking ---
+        source_cols_to_drop.append(source_col)
+        reconstructed_count += 1
+        if verbose:
+            print(f"  - Reconstructed '{new_col_name}' from '{source_col}' (0='{label_for_0}', 1='{label_for_1}').")
+    # --- 4. Cleanup ---
+    if drop_original and source_cols_to_drop:
+        # Use set() to avoid duplicates if the same source col was used
+        unique_cols_to_drop = list(set(source_cols_to_drop))
+        new_df.drop(columns=unique_cols_to_drop, inplace=True)
+        _LOGGER.info(f"Dropped {len(unique_cols_to_drop)} original binary source column(s).")
     _LOGGER.info(f"Successfully reconstructed {reconstructed_count} feature(s).")
     return new_df

{dragon_ml_toolbox-12.1.0.dist-info → dragon_ml_toolbox-12.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-12.1.0.dist-info → dragon_ml_toolbox-12.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-12.1.0.dist-info → dragon_ml_toolbox-12.2.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-12.1.0.dist-info → dragon_ml_toolbox-12.2.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 12.1.0__py3-none-any.whl → 12.2.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 12.1.0py3-none-any.whl → 12.2.0py3-none-any.whl