PyPI - dragon-ml-toolbox - Versions diffs - 3.2.0__tar.gz → 3.2.1__tar.gz - Mend

dragon-ml-toolbox 3.2.0tar.gz → 3.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (30) hide show

{dragon_ml_toolbox-3.2.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-3.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 3.2.0
+Version: 3.2.1
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1/dragon_ml_toolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 3.2.0
+Version: 3.2.1
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/ml_tools/ETL_engineering.py RENAMED Viewed

@@ -417,14 +417,17 @@ class KeywordDummifier:
             `group_name` at the same index and contains the keywords to search for.
         case_insensitive (bool):
             If True, keyword matching ignores case.
+        drop_empty (bool):
+            If True, columns that contain no positive matches (all zeros) will be dropped from the final output.
     """
-    def __init__(self, group_names: List[str], group_keywords: List[List[str]], case_insensitive: bool = True):
+    def __init__(self, group_names: List[str], group_keywords: List[List[str]], case_insensitive: bool = True, drop_empty: bool = True):
         if len(group_names) != len(group_keywords):
             raise ValueError("Initialization failed: 'group_names' and 'group_keywords' must have the same length.")
         self.group_names = group_names
         self.group_keywords = group_keywords
         self.case_insensitive = case_insensitive
+        self.drop_empty = drop_empty
     def __call__(self, column: pl.Series) -> pl.DataFrame:
         """
@@ -471,7 +474,16 @@ class KeywordDummifier:
                 # If a group had no matches, create a column of zeros
                 final_columns.append(pl.lit(0, dtype=pl.UInt8).alias(name))
-        return pl.DataFrame(final_columns)
+        # First, create a full DataFrame with all potential columns
+        result_df = pl.DataFrame(final_columns)
+        # If drop_empty is True, filter out all-zero columns
+        if self.drop_empty:
+            # A column is kept if its sum is greater than 0
+            cols_to_keep = [col for col in result_df if col.sum() > 0]
+            return result_df.select(cols_to_keep)
+        return result_df
 class NumberExtractor:

{dragon_ml_toolbox-3.2.0 → dragon_ml_toolbox-3.2.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "dragon-ml-toolbox"
-version = "3.2.0"
+version = "3.2.1"
 description = "A collection of tools for data science and machine learning projects."
 authors = [
     { name = "Karl Loza", email = "luigiloza@gmail.com" }