PyPI - dragon-ml-toolbox - Versions diffs - 2.4.0__tar.gz → 3.1.0__tar.gz - Mend

dragon-ml-toolbox 2.4.0tar.gz → 3.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (33) hide show

{dragon_ml_toolbox-2.4.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-3.1.0}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 2.4.0
-Summary: A collection of tools for data science and machine learning projects
+Version: 3.1.0
+Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
 Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
@@ -125,9 +125,12 @@ GUI_tools
 handle_excel
 logger
 MICE_imputation
+ML_callbacks
+ML_evaluation
+ML_trainer
+ML_tutorial
 PSO_optimization
-trainer
+RNN_forecast
 utilities
 VIF_factor
-vision_helpers
 ```

{dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/README.md RENAMED Viewed

@@ -82,9 +82,12 @@ GUI_tools
 handle_excel
 logger
 MICE_imputation
+ML_callbacks
+ML_evaluation
+ML_trainer
+ML_tutorial
 PSO_optimization
-trainer
+RNN_forecast
 utilities
 VIF_factor
-vision_helpers
 ```

{dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0/dragon_ml_toolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 2.4.0
-Summary: A collection of tools for data science and machine learning projects
+Version: 3.1.0
+Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
 Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
@@ -125,9 +125,12 @@ GUI_tools
 handle_excel
 logger
 MICE_imputation
+ML_callbacks
+ML_evaluation
+ML_trainer
+ML_tutorial
 PSO_optimization
-trainer
+RNN_forecast
 utilities
 VIF_factor
-vision_helpers
 ```

{dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/dragon_ml_toolbox.egg-info/SOURCES.txt RENAMED Viewed

@@ -10,16 +10,19 @@ dragon_ml_toolbox.egg-info/top_level.txt
 ml_tools/ETL_engineering.py
 ml_tools/GUI_tools.py
 ml_tools/MICE_imputation.py
+ml_tools/ML_callbacks.py
+ml_tools/ML_evaluation.py
+ml_tools/ML_trainer.py
+ml_tools/ML_tutorial.py
 ml_tools/PSO_optimization.py
+ml_tools/RNN_forecast.py
 ml_tools/VIF_factor.py
 ml_tools/__init__.py
 ml_tools/_particle_swarm_optimization.py
+ml_tools/_pytorch_models.py
 ml_tools/data_exploration.py
 ml_tools/datasetmaster.py
 ml_tools/ensemble_learning.py
 ml_tools/handle_excel.py
 ml_tools/logger.py
-ml_tools/pytorch_models.py
-ml_tools/trainer.py
-ml_tools/utilities.py
-ml_tools/vision_helpers.py
+ml_tools/utilities.py

{dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/ETL_engineering.py RENAMED Viewed

@@ -3,17 +3,18 @@ import re
 from typing import Literal, Union, Optional, Any, Callable, List, Dict
 from .utilities import _script_info
 import pandas as pd
+from .logger import _LOGGER
 __all__ = [
     "ColumnCleaner",
-    "DataFrameCleaner"
+    "DataFrameCleaner",
     "TransformationRecipe",
     "DataProcessor",
     "KeywordDummifier",
     "NumberExtractor",
     "MultiNumberExtractor",
-    "RatioCalculator"
+    "RatioCalculator",
     "CategoryMapper",
     "RegexMapper",
     "ValueBinner",
@@ -24,18 +25,26 @@ __all__ = [
 class ColumnCleaner:
     """
-    Cleans and standardizes a single pandas Series based on a dictionary of regex-to-value replacement rules.
+    Cleans and standardizes a pandas Series by applying regex-to-replacement rules.
+    Supports sub-string replacements and case-insensitivity.
+    Notes:
+    - Write separate, specific rules for each case. Don't combine patterns with an "OR".
+    - Define rules from most specific to more general to create a fallback system.
+    - Beware of chain replacements (rules matching strings that have already been changed by a previous rule).
     Args:
         rules (Dict[str, str]):
-            A dictionary where each key is a regular expression pattern and
-            each value is the standardized string to replace matches with.
+            A dictionary of regex patterns to replacement strings. Can use
+            backreferences in the replacement statement (e.g., r'\\1 \\2 \\3 \\4 \\5') for captured groups.
+        case_insensitive (bool):
+            If True, regex matching ignores case.
     """
-    def __init__(self, rules: Dict[str, str]):
+    def __init__(self, rules: Dict[str, str], case_insensitive: bool = True):
         if not isinstance(rules, dict):
             raise TypeError("The 'rules' argument must be a dictionary.")
-        # Validate that all keys are valid regular expressions
+        # Validate regex patterns
         for pattern in rules.keys():
             try:
                 re.compile(pattern)
@@ -43,32 +52,52 @@ class ColumnCleaner:
                 raise ValueError(f"Invalid regex pattern '{pattern}': {e}") from e
         self.rules = rules
+        self.case_insensitive = case_insensitive
     def clean(self, series: pd.Series) -> pd.Series:
         """
-        Applies the standardization rules to the provided Series (requires string data).
+        Applies the standardization rules sequentially to the provided Series.
-        Non-matching values are kept as they are.
         Args:
             series (pd.Series): The pandas Series to clean.
         Returns:
-            pd.Series: A new Series with the values cleaned and standardized.
+            pd.Series: A new Series with the regex replacements applied.
         """
-        return series.astype(str).replace(self.rules, regex=True)
+        cleaned_series = series.astype(str)
+        # Set the regex flags based on the case_insensitive setting
+        flags = re.IGNORECASE if self.case_insensitive else 0
+        # Sequentially apply each regex rule
+        for pattern, replacement in self.rules.items():
+            cleaned_series = cleaned_series.str.replace(
+                pattern,
+                replacement,
+                regex=True,
+                flags=flags
+            )
+        return cleaned_series
 class DataFrameCleaner:
     """
     Orchestrates the cleaning of multiple columns in a pandas DataFrame using a nested dictionary of rules and `ColumnCleaner` objects.
+    Chosen case-sensitivity is applied to all columns.
+    Notes:
+    - Write separate, specific rules for each case. Don't combine patterns with an "OR".
+    - Define rules from most specific to more general to create a fallback system.
+    - Beware of chain replacements (rules matching strings that have already been changed by a previous rule).
     Args:
         rules (Dict[str, Dict[str, str]]):
             A nested dictionary where each top-level key is a column name,
             and its value is a dictionary of regex rules for that column, as expected by `ColumnCleaner`.
     """
-    def __init__(self, rules: Dict[str, Dict[str, str]]):
+    def __init__(self, rules: Dict[str, Dict[str, str]], case_insensitive: bool = True):
         if not isinstance(rules, dict):
             raise TypeError("The 'rules' argument must be a nested dictionary.")
@@ -80,6 +109,7 @@ class DataFrameCleaner:
                 )
         self.rules = rules
+        self.case_insensitive = case_insensitive
     def clean(self, df: pd.DataFrame) -> pd.DataFrame:
         """
@@ -108,7 +138,7 @@ class DataFrameCleaner:
         for column_name, column_rules in self.rules.items():
             # Create and apply the specific cleaner for the column
-            cleaner = ColumnCleaner(rules=column_rules)
+            cleaner = ColumnCleaner(rules=column_rules, case_insensitive=self.case_insensitive)
             df_cleaned[column_name] = cleaner.clean(df_cleaned[column_name])
         return df_cleaned
@@ -251,7 +281,7 @@ class DataProcessor:
                 raise TypeError(f"Invalid 'transform' action for '{input_col_name}': {transform_action}")
         if not processed_columns:
-            print("Warning: The transformation resulted in an empty DataFrame.")
+            _LOGGER.warning("The transformation resulted in an empty DataFrame.")
             return pl.DataFrame()
         return pl.DataFrame(processed_columns)
@@ -403,7 +433,7 @@ class NumberExtractor:
             if not isinstance(round_digits, int):
                 raise TypeError("round_digits must be an integer.")
             if dtype == "int":
-                print(f"Warning: 'round_digits' is specified but dtype is 'int'. Rounding will be ignored.")
+                _LOGGER.warning(f"'round_digits' is specified but dtype is 'int'. Rounding will be ignored.")
         self.regex_pattern = regex_pattern
         self.dtype = dtype
@@ -561,9 +591,9 @@ class RatioCalculator:
         denominator = groups.struct.field("group_2").cast(pl.Float64, strict=False)
         # Safely perform division, returning null if denominator is 0
-        return pl.when(denominator != 0).then(
-            numerator / denominator
-        ).otherwise(None)
+        final_expr = pl.when(denominator != 0).then(numerator / denominator).otherwise(None)
+        return pl.select(final_expr).to_series()
 class CategoryMapper:

{dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/GUI_tools.py RENAMED Viewed

@@ -7,6 +7,7 @@ from functools import wraps
 from typing import Any, Dict, Tuple, List
 from .utilities import _script_info
 import numpy as np
+from .logger import _LOGGER
 __all__ = [
@@ -46,7 +47,7 @@ class PathManager:
         if self._is_bundled:
             # In a Briefcase bundle, resource_path gives an absolute path
             # to the resource directory.
-            self.package_root = self._resource_path_func(self.package_name, "")
+            self.package_root = self._resource_path_func(self.package_name, "") # type: ignore
         else:
             # In development mode, the package root is the directory
             # containing the anchor file.
@@ -56,7 +57,7 @@ class PathManager:
         """Checks if the app is running in a bundled environment."""
         try:
             # This is the function Briefcase provides in a bundled app
-            from briefcase.platforms.base import resource_path
+            from briefcase.platforms.base import resource_path # type: ignore
             return True, resource_path
         except ImportError:
             return False, None
@@ -147,7 +148,7 @@ class ConfigManager:
         """
         path = Path(file_path)
         if path.exists() and not force_overwrite:
-            print(f"Configuration file already exists at {path}. Aborting.")
+            _LOGGER.warning(f"Configuration file already exists at {path}. Aborting.")
             return
         config = configparser.ConfigParser()
@@ -205,7 +206,7 @@ class ConfigManager:
         with open(path, 'w') as configfile:
             config.write(configfile)
-        print(f"Successfully generated config template at: '{path}'")
+        _LOGGER.info(f"Successfully generated config template at: '{path}'")
 # --- GUI Factory ---
@@ -219,8 +220,8 @@ class GUIFactory:
         Initializes the factory with a configuration object.
         """
         self.config = config
-        sg.theme(self.config.general.theme)
-        sg.set_options(font=(self.config.general.font_family, 12))
+        sg.theme(self.config.general.theme) # type: ignore
+        sg.set_options(font=(self.config.general.font_family, 12)) # type: ignore
     # --- Atomic Element Generators ---
     def make_button(self, text: str, key: str, **kwargs) -> sg.Button:
@@ -234,13 +235,13 @@ class GUIFactory:
                       (e.g., `tooltip='Click me'`, `disabled=True`).
         """
         cfg = self.config
-        font = (cfg.fonts.font_family, cfg.fonts.button_size, cfg.fonts.button_style)
+        font = (cfg.fonts.font_family, cfg.fonts.button_size, cfg.fonts.button_style) # type: ignore
         style_args = {
-            "size": cfg.layout.button_size,
+            "size": cfg.layout.button_size, # type: ignore
             "font": font,
-            "button_color": (cfg.colors.button_text, cfg.colors.button_background),
-            "mouseover_colors": (cfg.colors.button_text, cfg.colors.button_background_hover),
+            "button_color": (cfg.colors.button_text, cfg.colors.button_background), # type: ignore
+            "mouseover_colors": (cfg.colors.button_text, cfg.colors.button_background_hover), # type: ignore
             "border_width": 0,
             **kwargs
         }
@@ -257,7 +258,7 @@ class GUIFactory:
                       (e.g., `title_color='red'`, `relief=sg.RELIEF_SUNKEN`).
         """
         cfg = self.config
-        font = (cfg.fonts.font_family, cfg.fonts.frame_size)
+        font = (cfg.fonts.font_family, cfg.fonts.frame_size) # type: ignore
         style_args = {
             "font": font,
@@ -289,7 +290,7 @@ class GUIFactory:
         """
         cfg = self.config
         bg_color = sg.theme_background_color()
-        label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style)
+        label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style) # type: ignore
         columns = []
         for name, (val_min, val_max) in data_dict.items():
@@ -298,21 +299,21 @@ class GUIFactory:
             label = sg.Text(name, font=label_font, background_color=bg_color, key=f"_text_{name}")
-            input_style = {"size": cfg.layout.input_size_cont, "justification": "center"}
+            input_style = {"size": cfg.layout.input_size_cont, "justification": "center"} # type: ignore
             if is_target:
-                input_style["text_color"] = cfg.colors.target_text
-                input_style["disabled_readonly_background_color"] = cfg.colors.target_background
+                input_style["text_color"] = cfg.colors.target_text # type: ignore
+                input_style["disabled_readonly_background_color"] = cfg.colors.target_background # type: ignore
             element = sg.Input(default_text, key=key, disabled=is_target, **input_style)
             if is_target:
                 layout = [[label], [element]]
             else:
-                range_font = (cfg.fonts.font_family, cfg.fonts.range_size)
+                range_font = (cfg.fonts.font_family, cfg.fonts.range_size) # type: ignore
                 range_text = sg.Text(f"Range: {int(val_min)}-{int(val_max)}", font=range_font, background_color=bg_color)
                 layout = [[label], [element], [range_text]]
-            layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)])
+            layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)]) # type: ignore
             columns.append(sg.Column(layout, background_color=bg_color))
         if layout_mode == 'row':
@@ -340,17 +341,17 @@ class GUIFactory:
         """
         cfg = self.config
         bg_color = sg.theme_background_color()
-        label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style)
+        label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style) # type: ignore
         columns = []
         for name, values in data_dict.items():
             label = sg.Text(name, font=label_font, background_color=bg_color, key=f"_text_{name}")
             element = sg.Combo(
                 values, default_value=values[0], key=name,
-                size=cfg.layout.input_size_binary, readonly=True
+                size=cfg.layout.input_size_binary, readonly=True # type: ignore
             )
             layout = [[label], [element]]
-            layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)])
+            layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)]) # type: ignore
             columns.append(sg.Column(layout, background_color=bg_color))
         if layout_mode == 'row':
@@ -370,8 +371,8 @@ class GUIFactory:
             **kwargs: Additional arguments to pass to the sg.Window constructor
                       (e.g., `location=(100, 100)`, `keep_on_top=True`).
         """
-        cfg = self.config.general
-        version = getattr(self.config.meta, 'version', None)
+        cfg = self.config.general # type: ignore
+        version = getattr(self.config.meta, 'version', None) # type: ignore
         full_title = f"{title} v{version}" if version else title
         window_args = {
@@ -406,9 +407,7 @@ def catch_exceptions(show_popup: bool = True):
                     sg.popup_error("An error occurred:", error_msg, title="Error")
                 else:
                     # Fallback for non-GUI contexts or if popup is disabled
-                    print("--- An exception occurred ---")
-                    print(error_msg)
-                    print("-----------------------------")
+                    _LOGGER.error(error_msg)
         return wrapper
     return decorator

{dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/MICE_imputation.py RENAMED Viewed

@@ -6,6 +6,7 @@ import numpy as np
 from .utilities import load_dataframe, list_csv_paths, sanitize_filename, _script_info, merge_dataframes, save_dataframe, threshold_binary_values, make_fullpath
 from plotnine import ggplot, labs, theme, element_blank # type: ignore
 from typing import Optional, Union
+from .logger import _LOGGER
 __all__ = [
@@ -40,7 +41,9 @@ def apply_mice(df: pd.DataFrame, df_name: str, binary_columns: Optional[list[str
     if binary_columns is not None:
         invalid_binary_columns = set(binary_columns) - set(df.columns)
         if invalid_binary_columns:
-            print(f"⚠️ These 'binary columns' are not in the dataset: {invalid_binary_columns}")
+            _LOGGER.warning(f"⚠️ These 'binary columns' are not in the dataset:")
+            for invalid_binary_col in invalid_binary_columns:
+                print(f"  - {invalid_binary_col}")
         valid_binary_columns = [col for col in binary_columns if col not in invalid_binary_columns]
         for imputed_df in imputed_datasets:
             for binary_column_name in valid_binary_columns:
@@ -125,7 +128,7 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
             plt.savefig(save_path, bbox_inches='tight', format="svg")
             plt.close()
-        print(f"{dataset_file_dir} completed.")
+        _LOGGER.info(f"{dataset_file_dir} completed.")
 # Imputed distributions
@@ -210,7 +213,7 @@ def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_di
             fig = kernel.plot_imputed_distributions(variables=[feature])
             _process_figure(fig, feature)
-    print(f"{local_dir_name} completed.")
+    _LOGGER.info(f"{local_dir_name} completed.")
 def run_mice_pipeline(df_path_or_dir: Union[str,Path], target_columns: list[str],
@@ -240,7 +243,8 @@ def run_mice_pipeline(df_path_or_dir: Union[str,Path], target_columns: list[str]
         all_file_paths = list(list_csv_paths(input_path).values())
     for df_path in all_file_paths:
-        df, df_name = load_dataframe(df_path=df_path)
+        df: pd.DataFrame
+        df, df_name = load_dataframe(df_path=df_path, kind="pandas") # type: ignore
         df, df_targets = _skip_targets(df, target_columns)

dragon-ml-toolbox 2.4.0__tar.gz → 3.1.0__tar.gz

Potentially problematic release.

dragon-ml-toolbox 2.4.0tar.gz → 3.1.0tar.gz