PyPI - dragon-ml-toolbox - Versions diffs - 20.13.0__py3-none-any.whl → 20.14.0__py3-none-any.whl - Mend

dragon-ml-toolbox 20.13.0py3-none-any.whl → 20.14.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

{dragon_ml_toolbox-20.13.0.dist-info → dragon_ml_toolbox-20.14.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 20.13.0
+Version: 20.14.0
 Summary: Complete pipelines and helper tools for data science and machine learning projects.
 Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-20.13.0.dist-info → dragon_ml_toolbox-20.14.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-dragon_ml_toolbox-20.13.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-20.13.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
+dragon_ml_toolbox-20.14.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-20.14.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
 ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
 ml_tools/ETL_cleaning/__init__.py,sha256=gLRHF-qzwpqKTvbbn9chIQELeUDh_XGpBRX28j-5IqI,545
@@ -80,7 +80,7 @@ ml_tools/ML_optimization/__init__.py,sha256=No18Dsw6Q9zPt8B9fpG0bWomuXmwDC7Dioki
 ml_tools/ML_optimization/_multi_dragon.py,sha256=zQhDxFY8FNxUlcbSnHMVArfojzYjgNa21jSE3pJmRW0,38956
 ml_tools/ML_optimization/_single_dragon.py,sha256=jh5-SK6NKAzbheQhquiYoROozk-RzUv1jiFkIzK_AFg,7288
 ml_tools/ML_optimization/_single_manual.py,sha256=h-_k9JmRqPkjTra1nu7AyYbSyWkYZ1R3utiNmW06WFs,21809
-ml_tools/ML_scaler/_ML_scaler.py,sha256=P75X0Sx8N-VxC2Qy8aG7mWaZlkTfjspiZDi1YiMQD1I,8872
+ml_tools/ML_scaler/_ML_scaler.py,sha256=NcwprqrAHMIKpkzMdExk99I2QpfTSbiJH8rDqmOlnkU,8870
 ml_tools/ML_scaler/__init__.py,sha256=SHDNyLsoOLl2OtkIb3pGg-JRs3E2bYJBgnHwH3vw_Tk,172
 ml_tools/ML_trainer/__init__.py,sha256=42kueHa7Z0b_yLbywNCgIxlW6WmgLBqkTFwKH7vFLXw,379
 ml_tools/ML_trainer/_base_trainer.py,sha256=0ATm672NRsjJ6nv_NEl6-OEd9Bst1-s5OPxfG4qe8Lg,18075
@@ -104,10 +104,10 @@ ml_tools/_core/__init__.py,sha256=m-VP0RW0tOTm9N5NI3kFNcpM7WtVgs0RK9pK3ZJRZQQ,14
 ml_tools/_core/_logger.py,sha256=xzhn_FouMDRVNwXGBGlPC9Ruq6i5uCrmNaS5jesguMU,4972
 ml_tools/_core/_schema_load_ops.py,sha256=KLs9vBzANz5ESe2wlP-C41N4VlgGil-ywcfvWKSOGss,1551
 ml_tools/_core/_script_info.py,sha256=LtFGt10gEvCnhIRMKJPi2yXkiGLcdr7lE-oIP2XGHzQ,234
-ml_tools/data_exploration/__init__.py,sha256=efUBsruHL56B429tUadl3PdG73zAF639Y430uMQRfko,1917
-ml_tools/data_exploration/_analysis.py,sha256=PJNrEBz5ZZXHoUlQ6fh9Y86nzPQrLpVPv2Ye4NfOxgs,14181
+ml_tools/data_exploration/__init__.py,sha256=XNA8gcRx5ifrv092HA7HSpek8havlk_3RZi9aq9dSjg,1957
+ml_tools/data_exploration/_analysis.py,sha256=JSoFJSkv4-_v9YxxmjHZ_PeFRneDENjSEo2sy_uC4oY,14196
 ml_tools/data_exploration/_cleaning.py,sha256=pAZOXgGK35j7O8q6cnyTwYK1GLNnD04A8p2fSyMB1mg,20906
-ml_tools/data_exploration/_features.py,sha256=Z1noJfDxBzFRfusFp6NlpLF2NItuZuzFHq4ssWFqny4,26273
+ml_tools/data_exploration/_features.py,sha256=twJ6OixU4ItRXA8rPJRfg2N9QVsbn38CFqJiLcXav1A,28664
 ml_tools/data_exploration/_plotting.py,sha256=zH1dPcIoAlOuww23xIoBCsQOAshPPv9OyGposOA2RvI,19883
 ml_tools/data_exploration/_schema_ops.py,sha256=Fd6fBGGv4OpxmJ1HG9pith6QL90z0tzssCvzkQxlEEQ,11083
 ml_tools/ensemble_evaluation/__init__.py,sha256=t4Gr8EGEk8RLatyc92-S0BzbQvdvodzoF-qDAH2qjVg,546
@@ -143,7 +143,7 @@ ml_tools/utilities/__init__.py,sha256=h4lE3SQstg-opcQj6QSKhu-HkqSbmHExsWoM9vC5D9
 ml_tools/utilities/_translate.py,sha256=U8hRPa3PmTpIf9n9yR3gBGmp_hkcsjQLwjAHSHc0WHs,10325
 ml_tools/utilities/_utility_save_load.py,sha256=EFvFaTaHahDQWdJWZr-j7cHqRbG_Xrpc96228JhV-bs,16773
 ml_tools/utilities/_utility_tools.py,sha256=bN0J9d1S0W5wNzNntBWqDsJcEAK7-1OgQg3X2fwXns0,6918
-dragon_ml_toolbox-20.13.0.dist-info/METADATA,sha256=bTnTpMlvOFu2IlYpmc0QphbYeqbslxzptluUbEWaO-s,7889
-dragon_ml_toolbox-20.13.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-dragon_ml_toolbox-20.13.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-20.13.0.dist-info/RECORD,,
+dragon_ml_toolbox-20.14.0.dist-info/METADATA,sha256=32IleSQa7t7E42ZB5rM32Lf1MlSAMtKkU-TFky3VckA,7889
+dragon_ml_toolbox-20.14.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+dragon_ml_toolbox-20.14.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-20.14.0.dist-info/RECORD,,

ml_tools/ML_scaler/_ML_scaler.py CHANGED Viewed

@@ -99,7 +99,7 @@ class DragonScaler:
             std = torch.sqrt(torch.clamp(variance, min=1e-8))
         if verbose >= 2:
-            _LOGGER.info(f"Scaler fitted on {n_total} samples for {num_continuous_features} features (Welford's).")
+            _LOGGER.info(f"Scaler fitted on {n_total} samples for {num_continuous_features} columns (Welford's).")
         return cls(mean=mean_global, std=std, continuous_feature_indices=continuous_feature_indices)
     @classmethod
@@ -121,7 +121,7 @@ class DragonScaler:
         std = torch.where(std == 0, torch.tensor(1.0, device=data.device), std)
         if verbose >= 2:
-            _LOGGER.info(f"Scaler fitted on tensor with {data.shape[0]} samples for {num_features} features.")
+            _LOGGER.info(f"Scaler fitted on tensor with {data.shape[0]} samples for {num_features} columns.")
         return cls(mean=mean, std=std, continuous_feature_indices=indices)

ml_tools/data_exploration/__init__.py CHANGED Viewed

@@ -33,6 +33,7 @@ from ._features import (
     reconstruct_one_hot,
     reconstruct_binary,
     reconstruct_multibinary,
+    filter_subset,
 )
 from ._schema_ops import (
@@ -51,6 +52,7 @@ __all__ = [
     "drop_columns_with_missing_data",
     "drop_macro",
     "clean_column_names",
+    "filter_subset",
     "plot_value_distributions",
     "split_features_targets",
     "split_continuous_binary",

ml_tools/data_exploration/_analysis.py CHANGED Viewed

@@ -34,7 +34,7 @@ def summarize_dataframe(df: pd.DataFrame, round_digits: int = 2):
     """
     summary = pd.DataFrame({
         'Data Type': df.dtypes,
-        'Completeness %': (df.notnull().mean() * 100).round(2),
+        'Completeness %': (df.notnull().mean() * 100).round(2), # type: ignore
         'Unique Values': df.nunique(),
         # 'Missing %': (df.isnull().mean() * 100).round(2)
     })

ml_tools/data_exploration/_features.py CHANGED Viewed

@@ -657,3 +657,66 @@ def reconstruct_multibinary(
         _LOGGER.info(f"Reconstructed {converted_count} binary columns matching '{pattern}'.")
     return new_df, target_columns
+def filter_subset(
+    df: pd.DataFrame,
+    filters: Union[dict[str, Any], dict[str, list[Any]]],
+    drop_filter_cols: bool = True,
+    reset_index: bool = True,
+    verbose: int = 3
+) -> pd.DataFrame:
+    """
+    Filters the DataFrame based on a dictionary of column-value conditions.
+    Supports:
+    - Single value matching (e.g., {"Color": "Blue"})
+    - Multiple value matching (e.g., {"Color": ["Blue", "Red"]}) -> OR logic within column.
+    - Multiple column filtering (e.g., {"Color": "Blue", "Size": "Large"}) -> AND logic between columns.
+    Args:
+        df (pd.DataFrame): Input DataFrame.
+        filters (dict[str, Any] | dict[str, list[Any]]): Dictionary where keys are column names and values are the target values (scalar or list).
+        drop_filter_cols (bool): If True, drops the columns used for filtering from the result.
+        reset_index (bool): If True, resets the index of the resulting DataFrame.
+        verbose (int): Verbosity level.
+    Returns:
+        pd.DataFrame: The filtered DataFrame.
+    """
+    df_filtered = df.copy()
+    # Validate columns exist
+    missing_cols = [col for col in filters.keys() if col not in df.columns]
+    if missing_cols:
+        _LOGGER.error(f"Filter columns not found: {missing_cols}")
+        raise ValueError()
+    if verbose >= 2:
+        _LOGGER.info(f"Original shape: {df.shape}")
+    for col, value in filters.items():
+        # Handle list of values (OR logic within column)
+        if isinstance(value, list):
+            df_filtered = df_filtered[df_filtered[col].isin(value)]
+        # Handle single value
+        else:
+            # Warn if the value is a floating point due to potential precision issues
+            if isinstance(value, float) and verbose >= 1:
+                _LOGGER.warning(f"Filtering on column '{col}' with float value '{value}'.")
+            df_filtered = df_filtered[df_filtered[col] == value]
+    if drop_filter_cols:
+        if verbose >= 3:
+            _LOGGER.info(f"Dropping filter columns: {list(filters.keys())}")
+        df_filtered.drop(columns=list(filters.keys()), inplace=True)
+    if reset_index:
+        if verbose >= 3:
+            _LOGGER.info("Resetting index of the filtered DataFrame.")
+        df_filtered.reset_index(drop=True, inplace=True)
+    if verbose >= 2:
+        _LOGGER.info(f"Filtered shape: {df_filtered.shape}")
+    return df_filtered

{dragon_ml_toolbox-20.13.0.dist-info → dragon_ml_toolbox-20.14.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.13.0.dist-info → dragon_ml_toolbox-20.14.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.13.0.dist-info → dragon_ml_toolbox-20.14.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.13.0.dist-info → dragon_ml_toolbox-20.14.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 20.13.0__py3-none-any.whl → 20.14.0__py3-none-any.whl

dragon-ml-toolbox 20.13.0py3-none-any.whl → 20.14.0py3-none-any.whl