PyPI - dragon-ml-toolbox - Versions diffs - 3.5.0__py3-none-any.whl → 3.6.0__py3-none-any.whl - Mend

dragon-ml-toolbox 3.5.0py3-none-any.whl → 3.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (8) hide show

{dragon_ml_toolbox-3.5.0.dist-info → dragon_ml_toolbox-3.6.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 3.5.0
+Version: 3.6.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-3.5.0.dist-info → dragon_ml_toolbox-3.6.0.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-dragon_ml_toolbox-3.5.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
-dragon_ml_toolbox-3.5.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=6cfpIeQ6D4Mcs10nkogQrkVyq1T7i2qXjjNHFoUMOyE,1892
-ml_tools/ETL_engineering.py,sha256=9dojA9RqE7bq1A70tegsERszoqll7UmcJelVeQeefik,39519
+dragon_ml_toolbox-3.6.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
+dragon_ml_toolbox-3.6.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=6cfpIeQ6D4Mcs10nkogQrkVyq1T7i2qXjjNHFoUMOyE,1892
+ml_tools/ETL_engineering.py,sha256=URol7s45fVIdLqnhyOU1Etbi-D7MksFg-qtNwsKiunY,39488
 ml_tools/GUI_tools.py,sha256=uFx6zIrQZzDPSTtOSHz8ptz-fxZiQz-lXHcrqwuYV_E,20385
 ml_tools/MICE_imputation.py,sha256=ed-YeQkEAeHxTNkWIHs09T4YeYNF0aqAnrUTcdIEp9E,11372
 ml_tools/ML_callbacks.py,sha256=gHZk-lyzAax6iEtG26zHuoobdAZCFJ6BmI6pWoXkOrw,13189
@@ -13,13 +13,13 @@ ml_tools/VIF_factor.py,sha256=4b3HmrrolN7ZIAo16TWwLlExqj_xaa8MxbkXD1xPCys,10295
 ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ml_tools/_particle_swarm_optimization.py,sha256=b_eNNkA89Y40hj76KauivT8KLScH1B9wF2IXptOqkOw,22220
 ml_tools/_pytorch_models.py,sha256=bpWZsrSwCvHJQkR6UfoPpElsMv9AvmiNErNHC8NYB_I,10132
-ml_tools/data_exploration.py,sha256=IkpOyIRPKdu4qKeUdvvMvNPelSVWegNEKMqX3IInmpw,25003
+ml_tools/data_exploration.py,sha256=Ne6LHtHJFkv5SVBvTK-kmSS9OD-N1tT6TacXRVxyI9A,25220
 ml_tools/datasetmaster.py,sha256=N-uwfzWnl_qnoAqjbfS98I1pVNra5u6rhKLdWbFIReA,30122
 ml_tools/ensemble_learning.py,sha256=PPtBBLgLvaYOdY-MlcjXuxWWXf3JQavLNEysFgzjc_s,37470
 ml_tools/handle_excel.py,sha256=lwds7rDLlGSCWiWGI7xNg-Z7kxAepogp0lstSFa0590,12949
 ml_tools/logger.py,sha256=jC4Q2OqmDm8ZO9VpuZqBSWdXryqaJvLscqVJ6caNMOk,6009
 ml_tools/utilities.py,sha256=7cVWXjdxgSoIbZunuxJEOnJDSYp29liYsZexbrVDabs,23132
-dragon_ml_toolbox-3.5.0.dist-info/METADATA,sha256=yxz0lVEDn3k40f3nZk_pocW8WCOXifF6ItuUMJ8LFsM,3273
-dragon_ml_toolbox-3.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-3.5.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-3.5.0.dist-info/RECORD,,
+dragon_ml_toolbox-3.6.0.dist-info/METADATA,sha256=B_xp13t4M52KwRnUUv2AedBXAKBqHL04MBFha1eAYsI,3273
+dragon_ml_toolbox-3.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-3.6.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-3.6.0.dist-info/RECORD,,

ml_tools/ETL_engineering.py CHANGED Viewed

@@ -48,13 +48,12 @@ class ColumnCleaner:
     ## Usage Example
     ```python
-    phone_rules = {
-        # Matches (123) 456-7890 and reformats to 123-456-7890
-        r'\((\d{3})\)\s*(\d{3})-(\d{4})': r'$1-$2-$3'
+    id_rules = {
+        # Matches 'ID-12345' or 'ID 12345' and reformats to 'ID:12345'
+        r'ID[- ](\d+)': r'ID:$1'
     }
-    phone_cleaner = ColumnCleaner(column_name='phone_number', rules=phone_rules)
+    id_cleaner = ColumnCleaner(column_name='user_id', rules=id_rules)
     # This object would then be passed to a DataFrameCleaner.
     ```
     """
@@ -529,7 +528,7 @@ class KeywordDummifier:
         categorize_expr = categorize_expr.otherwise(None).alias("category")
-        temp_df = pl.DataFrame(categorize_expr)
+        temp_df = pl.select(categorize_expr)
         df_with_dummies = temp_df.to_dummies(columns=["category"])
         final_columns = []

ml_tools/data_exploration.py CHANGED Viewed

@@ -15,7 +15,7 @@ import re
 # Keep track of all available tools, show using `info()`
 __all__ = [
     "summarize_dataframe",
-    "drop_zero_only_columns",
+    "drop_constant_columns",
     "drop_rows_with_missing_data",
     "split_features_targets",
     "show_null_columns",
@@ -62,44 +62,49 @@ def summarize_dataframe(df: pd.DataFrame, round_digits: int = 2):
     return summary
-def drop_zero_only_columns(df: pd.DataFrame, verbose: bool=True) -> pd.DataFrame:
+def drop_constant_columns(df: pd.DataFrame, verbose: bool = True) -> pd.DataFrame:
     """
-    Removes columns from a pandas DataFrame that contain only zeros and null/NaN values.
+    Removes columns from a pandas DataFrame that contain only a single unique
+    value or are entirely null/NaN.
-    This utility is useful for cleaning data after dummification steps that may result in empty columns.
+    This utility is useful for cleaning data by removing constant features that
+    have no predictive value.
     Args:
         df (pd.DataFrame):
             The pandas DataFrame to clean.
+        verbose (bool):
+            If True, prints the names of the columns that were dropped.
+            Defaults to True.
     Returns:
         pd.DataFrame:
-            A new DataFrame with the empty columns removed.
+            A new DataFrame with the constant columns removed.
     """
     if not isinstance(df, pd.DataFrame):
         raise TypeError("Input must be a pandas DataFrame.")
     original_columns = set(df.columns)
     cols_to_keep = []
     for col_name in df.columns:
         column = df[col_name]
-        # Keep any column that is not numeric by default
-        if not is_numeric_dtype(column):
+        # We can apply this logic to all columns or only focus on numeric ones.
+        # if not is_numeric_dtype(column):
+        #     cols_to_keep.append(col_name)
+        #     continue
+        # Keep a column if it has more than one unique value (nunique ignores NaNs by default)
+        if column.nunique(dropna=True) > 1:
             cols_to_keep.append(col_name)
-            continue
-        # For numeric columns, check if there's at least one non-zero value.
-        if (column != 0).any():
-            cols_to_keep.append(col_name)
-    dropped_columns = original_columns - set(cols_to_keep)
+    dropped_columns = original_columns - set(cols_to_keep)
     if dropped_columns and verbose:
-        print(f"Dropped {len(dropped_columns)} columns:")
+        print(f"Dropped {len(dropped_columns)} constant columns:")
         for dropped_column in dropped_columns:
             print(f"    {dropped_column}")
     return df[cols_to_keep]

{dragon_ml_toolbox-3.5.0.dist-info → dragon_ml_toolbox-3.6.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-3.5.0.dist-info → dragon_ml_toolbox-3.6.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-3.5.0.dist-info → dragon_ml_toolbox-3.6.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-3.5.0.dist-info → dragon_ml_toolbox-3.6.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 3.5.0__py3-none-any.whl → 3.6.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 3.5.0py3-none-any.whl → 3.6.0py3-none-any.whl