PyPI - dragon-ml-toolbox - Versions diffs - 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl - Mend

dragon-ml-toolbox 8.2.0py3-none-any.whl → 9.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (34) hide show

{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/METADATA +5 -1
dragon_ml_toolbox-9.0.0.dist-info/RECORD +35 -0
ml_tools/ETL_engineering.py +177 -79
ml_tools/GUI_tools.py +5 -5
ml_tools/MICE_imputation.py +12 -8
ml_tools/ML_callbacks.py +6 -3
ml_tools/ML_datasetmaster.py +37 -20
ml_tools/ML_evaluation.py +4 -4
ml_tools/ML_evaluation_multi.py +26 -17
ml_tools/ML_inference.py +30 -23
ml_tools/ML_models.py +14 -14
ml_tools/ML_optimization.py +4 -3
ml_tools/ML_scaler.py +7 -7
ml_tools/ML_trainer.py +17 -15
ml_tools/PSO_optimization.py +16 -8
ml_tools/RNN_forecast.py +1 -1
ml_tools/SQL.py +22 -13
ml_tools/VIF_factor.py +7 -6
ml_tools/_logger.py +105 -7
ml_tools/custom_logger.py +12 -8
ml_tools/data_exploration.py +20 -15
ml_tools/ensemble_evaluation.py +10 -6
ml_tools/ensemble_inference.py +18 -18
ml_tools/ensemble_learning.py +8 -5
ml_tools/handle_excel.py +15 -11
ml_tools/optimization_tools.py +3 -4
ml_tools/path_manager.py +21 -15
ml_tools/utilities.py +35 -26
dragon_ml_toolbox-8.2.0.dist-info/RECORD +0 -36
ml_tools/_ML_optimization_multi.py +0 -231
{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/top_level.txt +0 -0

{dragon_ml_toolbox-8.2.0.dist-info → dragon_ml_toolbox-9.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 8.2.0
+Version: 9.0.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
@@ -17,6 +17,7 @@ Requires-Dist: pandas; extra == "base"
 Requires-Dist: numpy; extra == "base"
 Requires-Dist: polars; extra == "base"
 Requires-Dist: joblib; extra == "base"
+Requires-Dist: colorlog; extra == "base"
 Provides-Extra: ml
 Requires-Dist: numpy>=2.0; extra == "ml"
 Requires-Dist: pandas; extra == "ml"
@@ -37,6 +38,7 @@ Requires-Dist: shap; extra == "ml"
 Requires-Dist: tqdm; extra == "ml"
 Requires-Dist: Pillow; extra == "ml"
 Requires-Dist: evotorch; extra == "ml"
+Requires-Dist: colorlog; extra == "ml"
 Provides-Extra: mice
 Requires-Dist: numpy<2.0; extra == "mice"
 Requires-Dist: pandas; extra == "mice"
@@ -48,6 +50,7 @@ Requires-Dist: matplotlib; extra == "mice"
 Requires-Dist: statsmodels; extra == "mice"
 Requires-Dist: lightgbm<=4.5.0; extra == "mice"
 Requires-Dist: shap; extra == "mice"
+Requires-Dist: colorlog; extra == "mice"
 Provides-Extra: pytorch
 Requires-Dist: torch; extra == "pytorch"
 Requires-Dist: torchvision; extra == "pytorch"
@@ -59,6 +62,7 @@ Requires-Dist: ipykernel; extra == "excel"
 Requires-Dist: notebook; extra == "excel"
 Requires-Dist: jupyterlab; extra == "excel"
 Requires-Dist: ipywidgets; extra == "excel"
+Requires-Dist: colorlog; extra == "excel"
 Provides-Extra: gui-boost
 Requires-Dist: numpy; extra == "gui-boost"
 Requires-Dist: joblib; extra == "gui-boost"

dragon_ml_toolbox-9.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,35 @@
+dragon_ml_toolbox-9.0.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
+dragon_ml_toolbox-9.0.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
+ml_tools/ETL_engineering.py,sha256=SH8b9BSR79cib49YpIixjayaruD0qftnW7FV3xskoOs,44876
+ml_tools/GUI_tools.py,sha256=kEQWg-bog3pB5tI22gMGKWaCGHnz9TB2Lvvfhf5F2CI,45412
+ml_tools/MICE_imputation.py,sha256=kVSythWfxJFR4-2mtcYCWQaQ1Oz5yyx_SJu5gjnS7H8,11670
+ml_tools/ML_callbacks.py,sha256=JPvEw_cW5tYNJ2rMSgnNrKLuni_UrmuhDFaOw-u2SvA,13926
+ml_tools/ML_datasetmaster.py,sha256=CBZFpvm0qiY-8gP89iKTkd7jvU-rGQcJwk-_mBJmRSg,29273
+ml_tools/ML_evaluation.py,sha256=28JJ2M71p4pxniwav2Hv3b1a5dsvaoIYNLm-UJQuXvY,16002
+ml_tools/ML_evaluation_multi.py,sha256=2jTSNFCu8cz5C05EusnrDyffs59M2Fq3UXSHxo2TR1A,12515
+ml_tools/ML_inference.py,sha256=SGDPiPxs_OYDKKRZziFMyaWcC8A37c70W9t-dMP5niI,23066
+ml_tools/ML_models.py,sha256=Dl2mTMgVCtnNCSRlyqvMnInsKJVldS7vnBPimD-TnHo,27999
+ml_tools/ML_optimization.py,sha256=a2Uxe1g-y4I-gFa8ENIM8QDS-Pz3hoPRRaVXAWMbyQA,13491
+ml_tools/ML_scaler.py,sha256=O8JzHr2551zPpKRRReEIMvq0lNAAPau6hV59KUMAySg,7420
+ml_tools/ML_trainer.py,sha256=xM-o-gbPhWXm2lOVXbeaTFotgJSDRSHyE7H0-9OOij4,23712
+ml_tools/PSO_optimization.py,sha256=q0VYpssQGbPum7xdnkDXlJQKhZMYZo8acHpKhajPK3c,22954
+ml_tools/RNN_forecast.py,sha256=8rNZr-eWOBXMiDQV22e_tQTPM5LM2IFggEAa1FaoXaI,1965
+ml_tools/SQL.py,sha256=WDgdZUYuLBUpv-4Am9XjVY_Aq_jxBWdLrbcgAIEwefI,10704
+ml_tools/VIF_factor.py,sha256=MkMh_RIdsN2XUPzKNGRiEcmB17R_MmvGV4ezpL5zD2E,10403
+ml_tools/__init__.py,sha256=q0y9faQ6e17XCQ7eUiCZ1FJ4Bg5EQqLjZ9f_l5REUUY,41
+ml_tools/_logger.py,sha256=wcImAiXEZKPNcwM30qBh3t7HvoPURonJY0nrgMGF0sM,4719
+ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
+ml_tools/custom_logger.py,sha256=ry43hk54K6xKo8jRAgq1sFxUpOA9T0LIJ7sw0so2BW0,5880
+ml_tools/data_exploration.py,sha256=hKA_3U-piJ8TtDWhzX_T2Awkg-25e0DC5E8qloqPo6w,27206
+ml_tools/ensemble_evaluation.py,sha256=xMEMfXJ5MjTkTfr1LkFOeD7iUtnVDCW3S9lm3zT-6tY,24778
+ml_tools/ensemble_inference.py,sha256=EFHnbjbu31fcVp88NBx8lWAVdu2Gpg9MY9huVZJHFfM,9350
+ml_tools/ensemble_learning.py,sha256=3s0kH4i_naj0IVl_T4knst-Hwg4TScWjEdsXX5KAi7I,21929
+ml_tools/handle_excel.py,sha256=p5BpBS9vhBhz3lqkk_WQ9Ef7EGedf2dp2cl0yekeRy4,13065
+ml_tools/keys.py,sha256=HtPG8-MWh89C32A7eIlfuuA-DLwkxGkoDfwR2TGN9CQ,1074
+ml_tools/optimization_tools.py,sha256=P3I6lIpvZ8Xf2kX5FvvBKBmrK2pB6idBpkTzfUJxTeE,5073
+ml_tools/path_manager.py,sha256=TJgoqMAryc5F0dal8W_zvJgE1TpOzlskIyYJk614WW4,13809
+ml_tools/utilities.py,sha256=zzfYR7SUSb2rZILTNoCjl_pfLlPdHf4263atXuEb3iE,19341
+dragon_ml_toolbox-9.0.0.dist-info/METADATA,sha256=FWDN8U9RARbPxbCBVrv4ZHqJys-LVo7M3dlyVwKdh74,6941
+dragon_ml_toolbox-9.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-9.0.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-9.0.0.dist-info/RECORD,,

ml_tools/ETL_engineering.py CHANGED Viewed

@@ -1,11 +1,15 @@
 import polars as pl
+import pandas as pd
 import re
+from pathlib import Path
 from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
+from .path_manager import sanitize_filename, make_fullpath
 from ._script_info import _script_info
 from ._logger import _LOGGER
 __all__ = [
+    "save_unique_values",
     "ColumnCleaner",
     "DataFrameCleaner",
     "TransformationRecipe",
@@ -23,6 +27,80 @@ __all__ = [
     "DateFeatureExtractor"
 ]
+################ Unique Values per column #################
+def save_unique_values(csv_path: Union[str, Path], output_dir: Union[str, Path]) -> None:
+    """
+    Loads a CSV file, then analyzes it and saves the unique non-null values
+    from each column into a separate text file exactly as they appear.
+    This is useful for understanding the raw categories or range of values
+    within a dataset before cleaning.
+    Args:
+        csv_path (Union[str, Path]):
+            The file path to the input CSV file.
+        output_dir (Union[str, Path]):
+            The path to the directory where the .txt files will be saved.
+            The directory will be created if it does not exist.
+    """
+    # --- 1. Input Validation ---
+    csv_path = make_fullpath(input_path=csv_path, enforce="file")
+    output_dir = make_fullpath(input_path=output_dir, make=True)
+    # --- 2. Load Data ---
+    try:
+        # Load all columns as strings to preserve original formatting
+        df = pd.read_csv(csv_path, dtype=str, encoding='utf-8')
+    except FileNotFoundError as e:
+        _LOGGER.error(f"The file was not found at '{csv_path}'.")
+        raise e
+    except Exception as e2:
+        _LOGGER.error(f"An error occurred while reading the CSV file.")
+        raise e2
+    else:
+        _LOGGER.info(f"Data loaded from '{csv_path}'")
+    # --- 3. Process Each Column ---
+    for i, column_name in enumerate(df.columns):
+        _LOGGER.info(f"Processing column: '{column_name}'...")
+        # --- Get unique values AS IS ---
+        try:
+            # Drop nulls, get unique values, and sort them.
+            # The values are preserved exactly as they are in the cells.
+            unique_values = df[column_name].dropna().unique()
+            sorted_uniques = sorted(unique_values)
+        except Exception:
+            _LOGGER.exception(f"Could not process column '{column_name}'.")
+            continue
+        if not sorted_uniques:
+            _LOGGER.warning(f"Column '{column_name}' has no unique non-null values. Skipping.")
+            continue
+        # --- Sanitize column name to create a valid filename ---
+        sanitized_name = sanitize_filename(column_name)
+        if not sanitized_name.strip('_'):
+            sanitized_name = f'column_{i}'
+        file_path = output_dir / f"{sanitized_name}_unique_values.txt"
+        # --- Write to file ---
+        try:
+            with open(file_path, 'w', encoding='utf-8') as f:
+                f.write(f"# Unique values for column: '{column_name}'\n")
+                f.write(f"# Total unique non-null values: {len(sorted_uniques)}\n")
+                f.write("-" * 30 + "\n")
+                for value in sorted_uniques:
+                    f.write(f"{value}\n")
+                    f.write("-" * 30 + "\n")
+        except IOError:
+            _LOGGER.exception(f"Error writing to file {file_path}.")
+        else:
+            _LOGGER.info(f"Successfully saved {len(sorted_uniques)} unique values to '{file_path}'")
+    _LOGGER.info("Process complete.")
 ########## EXTRACT and CLEAN ##########
 class ColumnCleaner:
     """
@@ -60,16 +138,19 @@ class ColumnCleaner:
     """
     def __init__(self, column_name: str, rules: Dict[str, str], case_insensitive: bool = True):
         if not isinstance(column_name, str) or not column_name:
-            raise TypeError("The 'column_name' must be a non-empty string.")
+            _LOGGER.error("The 'column_name' must be a non-empty string.")
+            raise TypeError()
         if not isinstance(rules, dict):
-            raise TypeError("The 'rules' argument must be a dictionary.")
+            _LOGGER.error("The 'rules' argument must be a dictionary.")
+            raise TypeError()
         # Validate each regex pattern for correctness
         for pattern in rules.keys():
             try:
                 re.compile(pattern)
-            except re.error as e:
-                raise ValueError(f"Invalid regex pattern '{pattern}': {e}") from e
+            except re.error:
+                _LOGGER.error(f"Invalid regex pattern '{pattern}'.")
+                raise
         self.column_name = column_name
         self.rules = rules
@@ -94,20 +175,17 @@ class DataFrameCleaner:
     """
     def __init__(self, cleaners: List[ColumnCleaner]):
         if not isinstance(cleaners, list):
-            raise TypeError("The 'cleaners' argument must be a list of ColumnCleaner objects.")
+            _LOGGER.error("The 'cleaners' argument must be a list of ColumnCleaner objects.")
+            raise TypeError()
         seen_columns = set()
         for cleaner in cleaners:
             if not isinstance(cleaner, ColumnCleaner):
-                raise TypeError(
-                    f"All items in 'cleaners' list must be ColumnCleaner objects, "
-                    f"but found an object of type {type(cleaner).__name__}."
-                )
+                _LOGGER.error(f"All items in 'cleaners' list must be ColumnCleaner objects, but found an object of type {type(cleaner).__name__}.")
+                raise TypeError()
             if cleaner.column_name in seen_columns:
-                raise ValueError(
-                    f"Duplicate ColumnCleaner found for column '{cleaner.column_name}'. "
-                    "Each column should only have one cleaner."
-                )
+                _LOGGER.error(f"Duplicate ColumnCleaner found for column '{cleaner.column_name}'. Each column should only have one cleaner.")
+                raise ValueError()
             seen_columns.add(cleaner.column_name)
         self.cleaners = cleaners
@@ -131,10 +209,10 @@ class DataFrameCleaner:
         missing_columns = rule_columns - df_columns
         if missing_columns:
-            raise ValueError(
-                f"The following columns specified in cleaning rules "
-                f"were not found in the DataFrame: {sorted(list(missing_columns))}"
-            )
+            _LOGGER.error("The following columns specified in cleaning rules were not found in the DataFrame:")
+            for miss_col in sorted(list(missing_columns)):
+                print(f"\t- {miss_col}")
+            raise ValueError()
         df_cleaned = df.clone()
@@ -153,7 +231,7 @@ class DataFrameCleaner:
             # Execute the expression chain for the column
             df_cleaned = df_cleaned.with_columns(col_expr.alias(col_name))
-        print(f"Cleaned {len(self.cleaners)} columns.")
+        _LOGGER.info(f"Cleaned {len(self.cleaners)} columns.")
         return df_cleaned
@@ -199,16 +277,20 @@ class TransformationRecipe:
         """
         # --- Validation ---
         if not isinstance(input_col_name, str) or not input_col_name:
-            raise TypeError("'input_col' must be a non-empty string.")
+            _LOGGER.error("'input_col' must be a non-empty string.")
+            raise TypeError()
         if transform == _RENAME:
             if not isinstance(output_col_names, str):
-                raise TypeError("For a RENAME operation, 'output_col' must be a string.")
+                _LOGGER.error("For a RENAME operation, 'output_col' must be a string.")
+                raise TypeError()
         elif not isinstance(transform, Callable):
-            raise TypeError(f"'transform' must be a callable function or the string '{_RENAME}'.")
+            _LOGGER.error(f"'transform' must be a callable function or the string '{_RENAME}'.")
+            raise TypeError()
         if isinstance(output_col_names, list) and transform == _RENAME:
-            raise ValueError("A RENAME operation cannot have a list of output columns.")
+            _LOGGER.error("A RENAME operation cannot have a list of output columns.")
+            raise ValueError()
         # --- Add Step ---
         step = {
@@ -243,9 +325,11 @@ class DataProcessor:
                     been populated with transformation steps.
         """
         if not isinstance(recipe, TransformationRecipe):
-            raise TypeError("The recipe must be an instance of TransformationRecipe.")
+            _LOGGER.error("The recipe must be an instance of TransformationRecipe.")
+            raise TypeError()
         if len(recipe) == 0:
-            raise ValueError("The recipe cannot be empty.")
+            _LOGGER.error("The recipe cannot be empty.")
+            raise ValueError()
         self._recipe = recipe
     def transform(self, df: pl.DataFrame) -> pl.DataFrame:
@@ -260,7 +344,8 @@ class DataProcessor:
             transform_action = step["transform"]
             if input_col_name not in df.columns:
-                raise ValueError(f"Input column '{input_col_name}' not found in DataFrame.")
+                _LOGGER.error(f"Input column '{input_col_name}' not found in DataFrame.")
+                raise ValueError()
             input_series = df.get_column(input_col_name)
@@ -273,17 +358,16 @@ class DataProcessor:
                 if isinstance(result, pl.Series):
                     if not isinstance(output_col_spec, str):
-                        raise TypeError(f"Function for '{input_col_name}' returned a Series but 'output_col' is not a string.")
+                        _LOGGER.error(f"Function for '{input_col_name}' returned a Series but 'output_col' is not a string.")
+                        raise TypeError()
                     processed_columns.append(result.alias(output_col_spec))
                 elif isinstance(result, pl.DataFrame):
                     # 1. Handle list-based renaming
                     if isinstance(output_col_spec, list):
                         if len(result.columns) != len(output_col_spec):
-                            raise ValueError(
-                                f"Mismatch in '{input_col_name}': function produced {len(result.columns)} columns, "
-                                f"but recipe specifies {len(output_col_spec)} output names."
-                            )
+                            _LOGGER.error(f"Mismatch in '{input_col_name}': function produced {len(result.columns)} columns, but recipe specifies {len(output_col_spec)} output names.")
+                            raise ValueError()
                         renamed_df = result.rename(dict(zip(result.columns, output_col_spec)))
                         processed_columns.extend(renamed_df.get_columns())
@@ -299,19 +383,19 @@ class DataProcessor:
                         processed_columns.extend(renamed_df.get_columns())
                     else:
-                        raise TypeError(
-                            f"Function for '{input_col_name}' returned a DataFrame, "
-                            f"so 'output_col' must be a list of names or a string prefix."
-                        )
+                        _LOGGER.error(f"Function for '{input_col_name}' returned a DataFrame, so 'output_col' must be a list of names or a string prefix.")
+                        raise TypeError()
                 else:
-                    raise TypeError(f"Function for '{input_col_name}' returned an unexpected type: {type(result)}.")
+                    _LOGGER.error(f"Function for '{input_col_name}' returned an unexpected type: {type(result)}.")
+                    raise TypeError()
-            else: # This case is now unlikely due to builder validation.
-                raise TypeError(f"Invalid 'transform' action for '{input_col_name}': {transform_action}")
+            else: # This case is unlikely due to builder validation.
+                _LOGGER.error(f"Invalid 'transform' action for '{input_col_name}': {transform_action}")
+                raise TypeError()
         if not processed_columns:
-            _LOGGER.warning("⚠️ The transformation resulted in an empty DataFrame.")
+            _LOGGER.error("The transformation resulted in an empty DataFrame.")
             return pl.DataFrame()
         return pl.DataFrame(processed_columns)
@@ -381,18 +465,17 @@ class BinaryTransformer:
     ):
         # --- Validation: Enforce one and only one option ---
         if true_keywords is not None and false_keywords is not None:
-            raise ValueError(
-                "Provide either 'true_keywords' or 'false_keywords', but not both."
-            )
+            _LOGGER.error("Provide either 'true_keywords' or 'false_keywords', but not both.")
+            raise ValueError()
         if true_keywords is None and false_keywords is None:
-            raise ValueError(
-                "You must provide either 'true_keywords' or 'false_keywords'."
-            )
+            _LOGGER.error("You must provide either 'true_keywords' or 'false_keywords'.")
+            raise ValueError()
         # --- Configuration ---
         self.keywords: List[str] = true_keywords if true_keywords is not None else false_keywords # type: ignore
         if not self.keywords:
-            raise ValueError("Keyword list cannot be empty.")
+            _LOGGER.error("Keyword list cannot be empty.")
+            raise ValueError()
         self.mode: str = "true_mode" if true_keywords is not None else "false_mode"
@@ -468,9 +551,11 @@ class MultiBinaryDummifier:
     """
     def __init__(self, keywords: List[str], case_insensitive: bool = True):
         if not isinstance(keywords, list) or not all(isinstance(k, str) for k in keywords):
-            raise TypeError("The 'keywords' argument must be a list of strings.")
+            _LOGGER.error("The 'keywords' argument must be a list of strings.")
+            raise TypeError()
         if not keywords:
-            raise ValueError("The 'keywords' list cannot be empty.")
+            _LOGGER.error("The 'keywords' list cannot be empty.")
+            raise ValueError()
         self.keywords = keywords
         self.case_insensitive = case_insensitive
@@ -530,7 +615,8 @@ class KeywordDummifier:
     """
     def __init__(self, group_names: List[str], group_keywords: List[List[str]], case_insensitive: bool = True):
         if len(group_names) != len(group_keywords):
-            raise ValueError("Initialization failed: 'group_names' and 'group_keywords' must have the same length.")
+            _LOGGER.error("Initialization failed: 'group_names' and 'group_keywords' must have the same length.")
+            raise ValueError()
         self.group_names = group_names
         self.group_keywords = group_keywords
@@ -610,23 +696,28 @@ class NumberExtractor:
     ):
         # --- Validation ---
         if not isinstance(regex_pattern, str):
-            raise TypeError("regex_pattern must be a string.")
+            _LOGGER.error("regex_pattern must be a string.")
+            raise TypeError()
         # Validate that the regex has exactly one capturing group
         try:
             if re.compile(regex_pattern).groups != 1:
-                raise ValueError("regex_pattern must contain exactly one capturing group '(...)'")
+                _LOGGER.error("regex_pattern must contain exactly one capturing group '(...)'")
+                raise ValueError()
         except re.error as e:
-            raise ValueError(f"Invalid regex pattern provided: {e}") from e
+            _LOGGER.error(f"Invalid regex pattern provided: {e}")
+            raise ValueError()
         if dtype not in ["float", "int"]:
-            raise ValueError("dtype must be either 'float' or 'int'.")
+            _LOGGER.error("dtype must be either 'float' or 'int'.")
+            raise ValueError()
         if round_digits is not None:
             if not isinstance(round_digits, int):
-                raise TypeError("round_digits must be an integer.")
+                _LOGGER.error("round_digits must be an integer.")
+                raise TypeError()
             if dtype == "int":
-                _LOGGER.warning(f"⚠️ 'round_digits' is specified but dtype is 'int'. Rounding will be ignored.")
+                _LOGGER.warning(f"'round_digits' is specified but dtype is 'int'. Rounding will be ignored.")
         self.regex_pattern = regex_pattern
         self.dtype = dtype
@@ -684,21 +775,26 @@ class MultiNumberExtractor:
     ):
         # --- Validation ---
         if not isinstance(num_outputs, int) or num_outputs <= 0:
-            raise ValueError("num_outputs must be a positive integer.")
+            _LOGGER.error("num_outputs must be a positive integer.")
+            raise ValueError()
         if not isinstance(regex_pattern, str):
-            raise TypeError("regex_pattern must be a string.")
+            _LOGGER.error("regex_pattern must be a string.")
+            raise TypeError()
         # Validate that the regex has exactly one capturing group
         try:
             if re.compile(regex_pattern).groups != 1:
-                raise ValueError("regex_pattern must contain exactly one capturing group '(...)'")
+                _LOGGER.error("regex_pattern must contain exactly one capturing group '(...)'")
+                raise ValueError()
         except re.error as e:
-            raise ValueError(f"Invalid regex pattern provided: {e}") from e
+            _LOGGER.error(f"Invalid regex pattern provided: {e}")
+            raise ValueError()
         # Validate dtype
         if dtype not in ["float", "int"]:
-            raise ValueError("dtype must be either 'float' or 'int'.")
+            _LOGGER.error("dtype must be either 'float' or 'int'.")
+            raise ValueError()
         self.num_outputs = num_outputs
         self.regex_pattern = regex_pattern
@@ -751,17 +847,14 @@ class RatioCalculator:
         try:
             compiled_pattern = re.compile(regex_pattern)
             if compiled_pattern.groups != 2:
-                raise ValueError(
-                    "RatioCalculator regex_pattern must contain exactly two "
-                    "capturing groups '(...)'."
-                )
+                _LOGGER.error("RatioCalculator regex_pattern must contain exactly two capturing groups '(...)'.")
+                raise ValueError()
             if compiled_pattern.groupindex:
-                raise ValueError(
-                    "RatioCalculator must be initialized with unnamed capturing groups "
-                    "(e.g., '(\\d+)'), not named groups (e.g., '(?P<name>\\d+)')."
-                )
+                _LOGGER.error("RatioCalculator must be initialized with unnamed capturing groups (e.g., '(\\d+)'), not named groups (e.g., '(?P<name>\\d+)').")
+                raise ValueError()
         except re.error as e:
-            raise ValueError(f"Invalid regex pattern provided: {e}") from e
+            _LOGGER.error(f"Invalid regex pattern provided: {e}")
+            raise ValueError()
         self.regex_pattern = regex_pattern
@@ -805,7 +898,8 @@ class CategoryMapper:
         unseen_value: Optional[Union[int, float]] = None,
     ):
         if not isinstance(mapping, dict):
-            raise TypeError("The 'mapping' argument must be a dictionary.")
+            _LOGGER.error("The 'mapping' argument must be a dictionary.")
+            raise TypeError()
         self.mapping = mapping
         self.default_value = unseen_value
@@ -866,7 +960,8 @@ class RegexMapper:
     ):
         # --- Validation ---
         if not isinstance(mapping, dict):
-            raise TypeError("The 'mapping' argument must be a dictionary.")
+            _LOGGER.error("The 'mapping' argument must be a dictionary.")
+            raise TypeError()
         self.unseen_value = unseen_value
@@ -880,9 +975,11 @@ class RegexMapper:
             try:
                 re.compile(final_pattern)
             except re.error as e:
-                raise ValueError(f"Invalid regex pattern '{final_pattern}': {e}") from e
+                _LOGGER.error(f"Invalid regex pattern '{final_pattern}': {e}")
+                raise ValueError()
             if not isinstance(value, (int, float)):
-                raise TypeError(f"Mapping values must be int or float, but got {type(value)} for pattern '{pattern}'.")
+                _LOGGER.error(f"Mapping values must be int or float, but got {type(value)} for pattern '{pattern}'.")
+                raise TypeError()
             self.processed_mapping.append((final_pattern, value))
@@ -937,11 +1034,13 @@ class ValueBinner:
     ):
         # --- Validation ---
         if not isinstance(breaks, list) or len(breaks) < 2:
-            raise ValueError("The 'breaks' argument must be a list of at least two numbers.")
+            _LOGGER.error("The 'breaks' argument must be a list of at least two numbers.")
+            raise ValueError()
         # Check if the list is sorted
         if not all(breaks[i] <= breaks[i+1] for i in range(len(breaks)-1)):
-            raise ValueError("The 'breaks' list must be sorted in ascending order.")
+            _LOGGER.error("The 'breaks' list must be sorted in ascending order.")
+            raise ValueError()
         self.breaks = breaks
         self.left_closed = left_closed
@@ -1001,14 +1100,13 @@ class DateFeatureExtractor:
     ):
         # --- Validation ---
         if not isinstance(features, list) or not features:
-            raise ValueError("'features' must be a non-empty list of strings.")
+            _LOGGER.error("'features' must be a non-empty list of strings.")
+            raise ValueError()
         for feature in features:
             if feature not in self.ALLOWED_FEATURES:
-                raise ValueError(
-                    f"Feature '{feature}' is not supported. "
-                    f"Allowed features are: {self.ALLOWED_FEATURES}"
-                )
+                _LOGGER.error(f"Feature '{feature}' is not supported. Allowed features are: {self.ALLOWED_FEATURES}")
+                raise ValueError()
         self.features = features
         self.format = format

ml_tools/GUI_tools.py CHANGED Viewed

@@ -88,7 +88,7 @@ class ConfigManager:
         path = Path(file_path)
         if path.exists():
-            _LOGGER.warning(f"⚠️ Configuration file already exists at {path}, or wrong path provided. Aborting.")
+            _LOGGER.warning(f"Configuration file already exists at {path}, or wrong path provided. Aborting.")
             return
         config = configparser.ConfigParser()
@@ -150,7 +150,7 @@ class ConfigManager:
         with open(path, 'w') as configfile:
             config.write(configfile)
-        _LOGGER.info(f"📝 Successfully generated config template at: '{path}'")
+        _LOGGER.info(f"Successfully generated config template at: '{path}'")
 # --- GUI Factory ---
@@ -442,14 +442,14 @@ def catch_exceptions(show_popup: bool = True):
         def wrapper(*args, **kwargs):
             try:
                 return func(*args, **kwargs)
-            except Exception as e:
+            except Exception:
                 # Format the full traceback to give detailed error info
-                error_msg = traceback.format_exc()
                 if show_popup:
+                    error_msg = traceback.format_exc()
                     sg.popup_error("An error occurred:", error_msg, title="Error")
                 else:
                     # Fallback for non-GUI contexts or if popup is disabled
-                    _LOGGER.error(error_msg)
+                    _LOGGER.exception("An error occurred.")
         return wrapper
     return decorator

ml_tools/MICE_imputation.py CHANGED Viewed

@@ -38,13 +38,14 @@ def apply_mice(df: pd.DataFrame, df_name: str, binary_columns: Optional[list[str
     imputed_datasets = [kernel.complete_data(dataset=i) for i in range(resulting_datasets)]
     if imputed_datasets is None or len(imputed_datasets) == 0:
-        raise ValueError("❌ No imputed datasets were generated. Check the MICE process.")
+        _LOGGER.error("No imputed datasets were generated. Check the MICE process.")
+        raise ValueError()
     # threshold binary columns
     if binary_columns is not None:
         invalid_binary_columns = set(binary_columns) - set(df.columns)
         if invalid_binary_columns:
-            _LOGGER.warning(f"⚠️ These 'binary columns' are not in the dataset:")
+            _LOGGER.warning(f"These 'binary columns' are not in the dataset:")
             for invalid_binary_col in invalid_binary_columns:
                 print(f"  - {invalid_binary_col}")
         valid_binary_columns = [col for col in binary_columns if col not in invalid_binary_columns]
@@ -63,7 +64,7 @@ def apply_mice(df: pd.DataFrame, df_name: str, binary_columns: Optional[list[str
         assert all(imputed_df.index == df.index), f"❌ Index mismatch in dataset {subname}" # type: ignore
     # print("✅ All imputed datasets match the original DataFrame indexes.")
-    _LOGGER.info("✅ MICE imputation complete.")
+    _LOGGER.info("MICE imputation complete.")
     return kernel, imputed_datasets, imputed_dataset_names
@@ -95,7 +96,8 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
     dataset_count = kernel.num_datasets
     if dataset_count != len(imputed_dataset_names):
-        raise ValueError(f"❌ Expected {dataset_count} names in imputed_dataset_names, got {len(imputed_dataset_names)}")
+        _LOGGER.error(f"Expected {dataset_count} names in imputed_dataset_names, got {len(imputed_dataset_names)}")
+        raise ValueError()
     # Check path
     root_path = make_fullpath(root_dir, make=True)
@@ -133,7 +135,7 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
             plt.savefig(save_path, bbox_inches='tight', format="svg")
             plt.close()
-        _LOGGER.info(f"✅ {dataset_file_dir} process completed.")
+        _LOGGER.info(f"{dataset_file_dir} process completed.")
 # Imputed distributions
@@ -157,7 +159,8 @@ def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_di
         """Helper function to add labels and legends to a figure"""
         if not isinstance(fig, ggplot):
-            raise TypeError("❌ Expected a plotnine.ggplot object")
+            _LOGGER.error(f"Expected a plotnine.ggplot object, received {type(fig)}.")
+            raise TypeError()
         # Edit labels and title
         fig = fig + theme(
@@ -171,7 +174,8 @@ def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_di
         fig = fig.draw()
         if not hasattr(fig, 'axes') or len(fig.axes) == 0:
-            raise RuntimeError("❌ Rendered figure has no axes to modify")
+            _LOGGER.error("Rendered figure has no axes to modify.")
+            raise RuntimeError()
         if filename == "Combined_Distributions":
             custom_xlabel = "Feature Values"
@@ -218,7 +222,7 @@ def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_di
             fig = kernel.plot_imputed_distributions(variables=[feature])
             _process_figure(fig, feature)
-    _LOGGER.info(f"✅ {local_dir_name} completed.")
+    _LOGGER.info(f"{local_dir_name} completed.")
 def run_mice_pipeline(df_path_or_dir: Union[str,Path], target_columns: list[str],

dragon-ml-toolbox 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 8.2.0py3-none-any.whl → 9.0.0py3-none-any.whl