PyPI - dragon-ml-toolbox - Versions diffs - 12.9.0__py3-none-any.whl → 12.9.2__py3-none-any.whl - Mend

dragon-ml-toolbox 12.9.0py3-none-any.whl → 12.9.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (7) hide show

{dragon_ml_toolbox-12.9.0.dist-info → dragon_ml_toolbox-12.9.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 12.9.0
+Version: 12.9.2
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-12.9.0.dist-info → dragon_ml_toolbox-12.9.2.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-dragon_ml_toolbox-12.9.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-12.9.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
+dragon_ml_toolbox-12.9.2.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-12.9.2.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
 ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
 ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
 ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
@@ -24,7 +24,7 @@ ml_tools/_logger.py,sha256=dlp5cGbzooK9YSNSZYB4yjZrOaQUGW8PTrM411AOvL8,4717
 ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
 ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
 ml_tools/custom_logger.py,sha256=xot-VeZFigKjcVxADgzvI54vZO_MqMMejo7JmDED8Xo,5892
-ml_tools/data_exploration.py,sha256=bERsPSmV8h5YtJEzkVVYXkOyvdq60g-3cvJRhFjnI_A,50270
+ml_tools/data_exploration.py,sha256=haddQFsXAWzuf84NLItcZ4Q7vzN3YWjFoh7lPlWUczo,50679
 ml_tools/ensemble_evaluation.py,sha256=FGHSe8LBI8_w8LjNeJWOcYQ1UK_mc6fVah8gmSvNVGg,26853
 ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGTY,9348
 ml_tools/ensemble_learning.py,sha256=vsIED7nlheYI4w2SBzP6SC1AnNeMfn-2A1Gqw5EfxsM,21964
@@ -35,7 +35,7 @@ ml_tools/optimization_tools.py,sha256=P074YCuZzkqkONnAsM-Zb9DTX_i8cRkkJLpwAWz6CR
 ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
 ml_tools/serde.py,sha256=UIshIesHRFmxr8F6B3LxGG8bYc1HHK-nlE3kENSZL18,5288
 ml_tools/utilities.py,sha256=OcAyV1tEcYAfOWlGjRgopsjDLxU3DcI5EynzvWV4q3A,15754
-dragon_ml_toolbox-12.9.0.dist-info/METADATA,sha256=AAbJFe1QFwEU3uFZV9mUOhXubpSBSRXAHO6zOtJiX10,6166
-dragon_ml_toolbox-12.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-12.9.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-12.9.0.dist-info/RECORD,,
+dragon_ml_toolbox-12.9.2.dist-info/METADATA,sha256=vwKDioQfPVheuLmZasMsZGFynib5C8FMc52Tn1Ql7k0,6166
+dragon_ml_toolbox-12.9.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-12.9.2.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-12.9.2.dist-info/RECORD,,

ml_tools/data_exploration.py CHANGED Viewed

@@ -364,7 +364,6 @@ def encode_categorical_features(
             - If True, encodes Null values as a distinct category 'null_label' with a value of 0. Other categories start from 1.
             - If False, Nulls are ignored and categories start from 0.
         null_label (str): Category to encode Nulls to if `encode_nulls` is True. If a name collision with `null_label` occurs, the fallback key will be "__NULL__".
         split_resulting_dataset (bool):
             - If True, returns two separate DataFrames, one with non-categorical columns and one with the encoded columns.
@@ -1025,6 +1024,7 @@ def reconstruct_one_hot(
     df: pd.DataFrame,
     features_to_reconstruct: List[Union[str, Tuple[str, Optional[str]]]],
     separator: str = '_',
+    baseline_category_name: Optional[str] = "Other",
     drop_original: bool = True,
     verbose: bool = True
 ) -> pd.DataFrame:
@@ -1042,19 +1042,22 @@ def reconstruct_one_hot(
             A list defining the features to reconstruct. This list can contain:
             - A string: (e.g., "Color")
-              This reconstructs the feature 'Color' and assumes all-zero rows represent missing data NaN.
+              This reconstructs the feature 'Color' and assumes all-zero rows represent the baseline category ("Other" by default).
             - A tuple: (e.g., ("Pet", "Dog"))
-              This reconstructs 'Pet' and maps all-zero rows to the baseline category "Dog" (handling 'drop_first=True' scenarios).
+              This reconstructs 'Pet' and maps all-zero rows to the baseline category "Dog".
             - A tuple with None: (e.g., ("Size", None))
-              This is explicit and behaves identically to just passing "Size". All-zero rows will be mapped to NaN.
+              This reconstructs 'Size' and maps all-zero rows to the NaN value.
             Example:
             [
-                "Mood",                      # All-zeros -> NaN
+                "Mood",                      # All-zeros -> "Other"
                 ("Color", "Red"),            # All-zeros -> "Red"
+                ("Size", None)               # All-zeros -> NaN
             ]
         separator (str):
             The character separating the base name from the categorical value in
             the column names (e.g., '_' in 'B_a').
+        baseline_category_name (str | None):
+            The baseline category name to use by default if it is not explicitly provided.
         drop_original (bool):
             If True, the original one-hot encoded columns will be dropped from
             the returned DataFrame.
@@ -1077,6 +1080,10 @@ def reconstruct_one_hot(
     if not isinstance(df, pd.DataFrame):
         _LOGGER.error("Input must be a pandas DataFrame.")
         raise TypeError()
+    if not (baseline_category_name is None or isinstance(baseline_category_name, str)):
+        _LOGGER.error("The baseline_category must be None or a string.")
+        raise TypeError()
     new_df = df.copy()
     all_ohe_cols_to_drop = []
@@ -1090,7 +1097,7 @@ def reconstruct_one_hot(
             if isinstance(item, str):
                 # Case 1: "Color"
                 base_name = item
-                baseline_val = None
+                baseline_val = baseline_category_name
             elif isinstance(item, tuple) and len(item) == 2:
                 # Case 2: ("Pet", "dog") or ("Size", None)
                 base_name, baseline_val = item

{dragon_ml_toolbox-12.9.0.dist-info → dragon_ml_toolbox-12.9.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-12.9.0.dist-info → dragon_ml_toolbox-12.9.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-12.9.0.dist-info → dragon_ml_toolbox-12.9.2.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-12.9.0.dist-info → dragon_ml_toolbox-12.9.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 12.9.0__py3-none-any.whl → 12.9.2__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 12.9.0py3-none-any.whl → 12.9.2py3-none-any.whl