PyPI - dragon-ml-toolbox - Versions diffs - 2.4.0__py3-none-any.whl → 3.0.0__py3-none-any.whl - Mend

dragon-ml-toolbox 2.4.0py3-none-any.whl → 3.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{dragon_ml_toolbox-2.4.0.dist-info → dragon_ml_toolbox-3.0.0.dist-info}/METADATA +7 -4
dragon_ml_toolbox-3.0.0.dist-info/RECORD +25 -0
ml_tools/ETL_engineering.py +8 -7
ml_tools/GUI_tools.py +24 -25
ml_tools/MICE_imputation.py +8 -4
ml_tools/ML_callbacks.py +341 -0
ml_tools/ML_evaluation.py +255 -0
ml_tools/ML_trainer.py +344 -0
ml_tools/ML_tutorial.py +300 -0
ml_tools/PSO_optimization.py +27 -20
ml_tools/RNN_forecast.py +49 -0
ml_tools/VIF_factor.py +6 -5
ml_tools/datasetmaster.py +601 -527
ml_tools/ensemble_learning.py +12 -9
ml_tools/handle_excel.py +9 -10
ml_tools/logger.py +45 -8
ml_tools/utilities.py +18 -1
dragon_ml_toolbox-2.4.0.dist-info/RECORD +0 -22
ml_tools/trainer.py +0 -346
ml_tools/vision_helpers.py +0 -231
{dragon_ml_toolbox-2.4.0.dist-info → dragon_ml_toolbox-3.0.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-2.4.0.dist-info → dragon_ml_toolbox-3.0.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-2.4.0.dist-info → dragon_ml_toolbox-3.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-2.4.0.dist-info → dragon_ml_toolbox-3.0.0.dist-info}/top_level.txt +0 -0
/ml_tools/{pytorch_models.py → _pytorch_models.py} +0 -0

{dragon_ml_toolbox-2.4.0.dist-info → dragon_ml_toolbox-3.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 2.4.0
-Summary: A collection of tools for data science and machine learning projects
+Version: 3.0.0
+Summary: A collection of tools for data science and machine learning projects.
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
 Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
@@ -125,9 +125,12 @@ GUI_tools
 handle_excel
 logger
 MICE_imputation
+ML_callbacks
+ML_evaluation
+ML_trainer
+ML_tutorial
 PSO_optimization
-trainer
+RNN_forecast
 utilities
 VIF_factor
-vision_helpers
 ```

dragon_ml_toolbox-3.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,25 @@
+dragon_ml_toolbox-3.0.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
+dragon_ml_toolbox-3.0.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=6cfpIeQ6D4Mcs10nkogQrkVyq1T7i2qXjjNHFoUMOyE,1892
+ml_tools/ETL_engineering.py,sha256=SRiloWhSpopS4ay8mzUu0H4e9-37Ox_jDHzODqsQ8pc,31642
+ml_tools/GUI_tools.py,sha256=uFx6zIrQZzDPSTtOSHz8ptz-fxZiQz-lXHcrqwuYV_E,20385
+ml_tools/MICE_imputation.py,sha256=ed-YeQkEAeHxTNkWIHs09T4YeYNF0aqAnrUTcdIEp9E,11372
+ml_tools/ML_callbacks.py,sha256=gHZk-lyzAax6iEtG26zHuoobdAZCFJ6BmI6pWoXkOrw,13189
+ml_tools/ML_evaluation.py,sha256=3xOqVXLJDhbioKZ922yxFnSuO4VDQ-HFzZyZZ1MskVM,10054
+ml_tools/ML_trainer.py,sha256=zRs3crz_z4B285iJhmY7m4AFwnvvq4urOyl4zDuCLtA,14456
+ml_tools/ML_tutorial.py,sha256=-9tJO9ISPxEjRINVaF_Bu7tiiJ2W3zznQ4gNlZeP1HQ,12238
+ml_tools/PSO_optimization.py,sha256=RCvIFGyf28voo2mpbRKC6LfDzKslzY-aYoPwgv9F4Bg,25458
+ml_tools/RNN_forecast.py,sha256=IZLcPs3by0Chei7ill_Grjxs7BBUnzau0Oavi3dWiyE,1886
+ml_tools/VIF_factor.py,sha256=5GVAldH69Vkei3WRUZN1uPBMzGoOOeEOA-bgmZXbbUw,10301
+ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ml_tools/_particle_swarm_optimization.py,sha256=b_eNNkA89Y40hj76KauivT8KLScH1B9wF2IXptOqkOw,22220
+ml_tools/_pytorch_models.py,sha256=bpWZsrSwCvHJQkR6UfoPpElsMv9AvmiNErNHC8NYB_I,10132
+ml_tools/data_exploration.py,sha256=Fzbz_DKZ7F2e3-JbahLqKr3aP6lt9aCK9rNOHvR7nlA,23665
+ml_tools/datasetmaster.py,sha256=N-uwfzWnl_qnoAqjbfS98I1pVNra5u6rhKLdWbFIReA,30122
+ml_tools/ensemble_learning.py,sha256=PPtBBLgLvaYOdY-MlcjXuxWWXf3JQavLNEysFgzjc_s,37470
+ml_tools/handle_excel.py,sha256=lwds7rDLlGSCWiWGI7xNg-Z7kxAepogp0lstSFa0590,12949
+ml_tools/logger.py,sha256=jC4Q2OqmDm8ZO9VpuZqBSWdXryqaJvLscqVJ6caNMOk,6009
+ml_tools/utilities.py,sha256=opNR-ACH6BnLkWAKcb19ef5tFxfx22TI6E2o0RYwiGA,21021
+dragon_ml_toolbox-3.0.0.dist-info/METADATA,sha256=nmhUu0bwN4z1letePaDzGIQlmDUaBQ32esqGB-OasU4,3273
+dragon_ml_toolbox-3.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-3.0.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-3.0.0.dist-info/RECORD,,

ml_tools/ETL_engineering.py CHANGED Viewed

@@ -3,17 +3,18 @@ import re
 from typing import Literal, Union, Optional, Any, Callable, List, Dict
 from .utilities import _script_info
 import pandas as pd
+from .logger import _LOGGER
 __all__ = [
     "ColumnCleaner",
-    "DataFrameCleaner"
+    "DataFrameCleaner",
     "TransformationRecipe",
     "DataProcessor",
     "KeywordDummifier",
     "NumberExtractor",
     "MultiNumberExtractor",
-    "RatioCalculator"
+    "RatioCalculator",
     "CategoryMapper",
     "RegexMapper",
     "ValueBinner",
@@ -251,7 +252,7 @@ class DataProcessor:
                 raise TypeError(f"Invalid 'transform' action for '{input_col_name}': {transform_action}")
         if not processed_columns:
-            print("Warning: The transformation resulted in an empty DataFrame.")
+            _LOGGER.warning("The transformation resulted in an empty DataFrame.")
             return pl.DataFrame()
         return pl.DataFrame(processed_columns)
@@ -403,7 +404,7 @@ class NumberExtractor:
             if not isinstance(round_digits, int):
                 raise TypeError("round_digits must be an integer.")
             if dtype == "int":
-                print(f"Warning: 'round_digits' is specified but dtype is 'int'. Rounding will be ignored.")
+                _LOGGER.warning(f"'round_digits' is specified but dtype is 'int'. Rounding will be ignored.")
         self.regex_pattern = regex_pattern
         self.dtype = dtype
@@ -561,9 +562,9 @@ class RatioCalculator:
         denominator = groups.struct.field("group_2").cast(pl.Float64, strict=False)
         # Safely perform division, returning null if denominator is 0
-        return pl.when(denominator != 0).then(
-            numerator / denominator
-        ).otherwise(None)
+        final_expr = pl.when(denominator != 0).then(numerator / denominator).otherwise(None)
+        return pl.select(final_expr).to_series()
 class CategoryMapper:

ml_tools/GUI_tools.py CHANGED Viewed

@@ -7,6 +7,7 @@ from functools import wraps
 from typing import Any, Dict, Tuple, List
 from .utilities import _script_info
 import numpy as np
+from .logger import _LOGGER
 __all__ = [
@@ -46,7 +47,7 @@ class PathManager:
         if self._is_bundled:
             # In a Briefcase bundle, resource_path gives an absolute path
             # to the resource directory.
-            self.package_root = self._resource_path_func(self.package_name, "")
+            self.package_root = self._resource_path_func(self.package_name, "") # type: ignore
         else:
             # In development mode, the package root is the directory
             # containing the anchor file.
@@ -56,7 +57,7 @@ class PathManager:
         """Checks if the app is running in a bundled environment."""
         try:
             # This is the function Briefcase provides in a bundled app
-            from briefcase.platforms.base import resource_path
+            from briefcase.platforms.base import resource_path # type: ignore
             return True, resource_path
         except ImportError:
             return False, None
@@ -147,7 +148,7 @@ class ConfigManager:
         """
         path = Path(file_path)
         if path.exists() and not force_overwrite:
-            print(f"Configuration file already exists at {path}. Aborting.")
+            _LOGGER.warning(f"Configuration file already exists at {path}. Aborting.")
             return
         config = configparser.ConfigParser()
@@ -205,7 +206,7 @@ class ConfigManager:
         with open(path, 'w') as configfile:
             config.write(configfile)
-        print(f"Successfully generated config template at: '{path}'")
+        _LOGGER.info(f"Successfully generated config template at: '{path}'")
 # --- GUI Factory ---
@@ -219,8 +220,8 @@ class GUIFactory:
         Initializes the factory with a configuration object.
         """
         self.config = config
-        sg.theme(self.config.general.theme)
-        sg.set_options(font=(self.config.general.font_family, 12))
+        sg.theme(self.config.general.theme) # type: ignore
+        sg.set_options(font=(self.config.general.font_family, 12)) # type: ignore
     # --- Atomic Element Generators ---
     def make_button(self, text: str, key: str, **kwargs) -> sg.Button:
@@ -234,13 +235,13 @@ class GUIFactory:
                       (e.g., `tooltip='Click me'`, `disabled=True`).
         """
         cfg = self.config
-        font = (cfg.fonts.font_family, cfg.fonts.button_size, cfg.fonts.button_style)
+        font = (cfg.fonts.font_family, cfg.fonts.button_size, cfg.fonts.button_style) # type: ignore
         style_args = {
-            "size": cfg.layout.button_size,
+            "size": cfg.layout.button_size, # type: ignore
             "font": font,
-            "button_color": (cfg.colors.button_text, cfg.colors.button_background),
-            "mouseover_colors": (cfg.colors.button_text, cfg.colors.button_background_hover),
+            "button_color": (cfg.colors.button_text, cfg.colors.button_background), # type: ignore
+            "mouseover_colors": (cfg.colors.button_text, cfg.colors.button_background_hover), # type: ignore
             "border_width": 0,
             **kwargs
         }
@@ -257,7 +258,7 @@ class GUIFactory:
                       (e.g., `title_color='red'`, `relief=sg.RELIEF_SUNKEN`).
         """
         cfg = self.config
-        font = (cfg.fonts.font_family, cfg.fonts.frame_size)
+        font = (cfg.fonts.font_family, cfg.fonts.frame_size) # type: ignore
         style_args = {
             "font": font,
@@ -289,7 +290,7 @@ class GUIFactory:
         """
         cfg = self.config
         bg_color = sg.theme_background_color()
-        label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style)
+        label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style) # type: ignore
         columns = []
         for name, (val_min, val_max) in data_dict.items():
@@ -298,21 +299,21 @@ class GUIFactory:
             label = sg.Text(name, font=label_font, background_color=bg_color, key=f"_text_{name}")
-            input_style = {"size": cfg.layout.input_size_cont, "justification": "center"}
+            input_style = {"size": cfg.layout.input_size_cont, "justification": "center"} # type: ignore
             if is_target:
-                input_style["text_color"] = cfg.colors.target_text
-                input_style["disabled_readonly_background_color"] = cfg.colors.target_background
+                input_style["text_color"] = cfg.colors.target_text # type: ignore
+                input_style["disabled_readonly_background_color"] = cfg.colors.target_background # type: ignore
             element = sg.Input(default_text, key=key, disabled=is_target, **input_style)
             if is_target:
                 layout = [[label], [element]]
             else:
-                range_font = (cfg.fonts.font_family, cfg.fonts.range_size)
+                range_font = (cfg.fonts.font_family, cfg.fonts.range_size) # type: ignore
                 range_text = sg.Text(f"Range: {int(val_min)}-{int(val_max)}", font=range_font, background_color=bg_color)
                 layout = [[label], [element], [range_text]]
-            layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)])
+            layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)]) # type: ignore
             columns.append(sg.Column(layout, background_color=bg_color))
         if layout_mode == 'row':
@@ -340,17 +341,17 @@ class GUIFactory:
         """
         cfg = self.config
         bg_color = sg.theme_background_color()
-        label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style)
+        label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style) # type: ignore
         columns = []
         for name, values in data_dict.items():
             label = sg.Text(name, font=label_font, background_color=bg_color, key=f"_text_{name}")
             element = sg.Combo(
                 values, default_value=values[0], key=name,
-                size=cfg.layout.input_size_binary, readonly=True
+                size=cfg.layout.input_size_binary, readonly=True # type: ignore
             )
             layout = [[label], [element]]
-            layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)])
+            layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)]) # type: ignore
             columns.append(sg.Column(layout, background_color=bg_color))
         if layout_mode == 'row':
@@ -370,8 +371,8 @@ class GUIFactory:
             **kwargs: Additional arguments to pass to the sg.Window constructor
                       (e.g., `location=(100, 100)`, `keep_on_top=True`).
         """
-        cfg = self.config.general
-        version = getattr(self.config.meta, 'version', None)
+        cfg = self.config.general # type: ignore
+        version = getattr(self.config.meta, 'version', None) # type: ignore
         full_title = f"{title} v{version}" if version else title
         window_args = {
@@ -406,9 +407,7 @@ def catch_exceptions(show_popup: bool = True):
                     sg.popup_error("An error occurred:", error_msg, title="Error")
                 else:
                     # Fallback for non-GUI contexts or if popup is disabled
-                    print("--- An exception occurred ---")
-                    print(error_msg)
-                    print("-----------------------------")
+                    _LOGGER.error(error_msg)
         return wrapper
     return decorator

ml_tools/MICE_imputation.py CHANGED Viewed

@@ -6,6 +6,7 @@ import numpy as np
 from .utilities import load_dataframe, list_csv_paths, sanitize_filename, _script_info, merge_dataframes, save_dataframe, threshold_binary_values, make_fullpath
 from plotnine import ggplot, labs, theme, element_blank # type: ignore
 from typing import Optional, Union
+from .logger import _LOGGER
 __all__ = [
@@ -40,7 +41,9 @@ def apply_mice(df: pd.DataFrame, df_name: str, binary_columns: Optional[list[str
     if binary_columns is not None:
         invalid_binary_columns = set(binary_columns) - set(df.columns)
         if invalid_binary_columns:
-            print(f"⚠️ These 'binary columns' are not in the dataset: {invalid_binary_columns}")
+            _LOGGER.warning(f"⚠️ These 'binary columns' are not in the dataset:")
+            for invalid_binary_col in invalid_binary_columns:
+                print(f"  - {invalid_binary_col}")
         valid_binary_columns = [col for col in binary_columns if col not in invalid_binary_columns]
         for imputed_df in imputed_datasets:
             for binary_column_name in valid_binary_columns:
@@ -125,7 +128,7 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
             plt.savefig(save_path, bbox_inches='tight', format="svg")
             plt.close()
-        print(f"{dataset_file_dir} completed.")
+        _LOGGER.info(f"{dataset_file_dir} completed.")
 # Imputed distributions
@@ -210,7 +213,7 @@ def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_di
             fig = kernel.plot_imputed_distributions(variables=[feature])
             _process_figure(fig, feature)
-    print(f"{local_dir_name} completed.")
+    _LOGGER.info(f"{local_dir_name} completed.")
 def run_mice_pipeline(df_path_or_dir: Union[str,Path], target_columns: list[str],
@@ -240,7 +243,8 @@ def run_mice_pipeline(df_path_or_dir: Union[str,Path], target_columns: list[str]
         all_file_paths = list(list_csv_paths(input_path).values())
     for df_path in all_file_paths:
-        df, df_name = load_dataframe(df_path=df_path)
+        df: pd.DataFrame
+        df, df_name = load_dataframe(df_path=df_path, kind="pandas") # type: ignore
         df, df_targets = _skip_targets(df, target_columns)

ml_tools/ML_callbacks.py ADDED Viewed

@@ -0,0 +1,341 @@
+import numpy as np
+import torch
+from tqdm.auto import tqdm
+from .utilities import make_fullpath, LogKeys
+from .logger import _LOGGER
+from typing import Optional
+__all__ = [
+    "Callback",
+    "History",
+    "TqdmProgressBar",
+    "EarlyStopping",
+    "ModelCheckpoint",
+    "LRScheduler"
+]
+class Callback:
+    """
+    Abstract base class used to build new callbacks.
+    The methods of this class are automatically called by the Trainer at different
+    points during training. Subclasses can override these methods to implement
+    custom logic.
+    """
+    def __init__(self):
+        self.trainer = None
+    def set_trainer(self, trainer):
+        """This is called by the Trainer to associate itself with the callback."""
+        self.trainer = trainer
+    def on_train_begin(self, logs=None):
+        """Called at the beginning of training."""
+        pass
+    def on_train_end(self, logs=None):
+        """Called at the end of training."""
+        pass
+    def on_epoch_begin(self, epoch, logs=None):
+        """Called at the beginning of an epoch."""
+        pass
+    def on_epoch_end(self, epoch, logs=None):
+        """Called at the end of an epoch."""
+        pass
+    def on_batch_begin(self, batch, logs=None):
+        """Called at the beginning of a training batch."""
+        pass
+    def on_batch_end(self, batch, logs=None):
+        """Called at the end of a training batch."""
+        pass
+class History(Callback):
+    """
+    Callback that records events into a `history` dictionary.
+    This callback is automatically applied to every MyTrainer model.
+    The `history` attribute is a dictionary mapping metric names (e.g., 'val_loss')
+    to a list of metric values.
+    """
+    def on_train_begin(self, logs=None):
+        # Clear history at the beginning of training
+        self.trainer.history = {} # type: ignore
+    def on_epoch_end(self, epoch, logs=None):
+        logs = logs or {}
+        for k, v in logs.items():
+            # Append new log values to the history dictionary
+            self.trainer.history.setdefault(k, []).append(v) # type: ignore
+class TqdmProgressBar(Callback):
+    """Callback that provides a tqdm progress bar for training."""
+    def __init__(self):
+        self.epoch_bar = None
+        self.batch_bar = None
+    def on_train_begin(self, logs=None):
+        self.epochs = self.trainer.epochs # type: ignore
+        self.epoch_bar = tqdm(total=self.epochs, desc="Training Progress")
+    def on_epoch_begin(self, epoch, logs=None):
+        total_batches = len(self.trainer.train_loader) # type: ignore
+        self.batch_bar = tqdm(total=total_batches, desc=f"Epoch {epoch}/{self.epochs}", leave=False)
+    def on_batch_end(self, batch, logs=None):
+        self.batch_bar.update(1) # type: ignore
+        if logs:
+            self.batch_bar.set_postfix(loss=f"{logs.get(LogKeys.BATCH_LOSS, 0):.4f}") # type: ignore
+    def on_epoch_end(self, epoch, logs=None):
+        self.batch_bar.close() # type: ignore
+        self.epoch_bar.update(1) # type: ignore
+        if logs:
+            train_loss_str = f"{logs.get(LogKeys.TRAIN_LOSS, 0):.4f}"
+            val_loss_str = f"{logs.get(LogKeys.VAL_LOSS, 0):.4f}"
+            self.epoch_bar.set_postfix_str(f"Train Loss: {train_loss_str}, Val Loss: {val_loss_str}") # type: ignore
+    def on_train_end(self, logs=None):
+        self.epoch_bar.close() # type: ignore
+class EarlyStopping(Callback):
+    """
+    Stop training when a monitored metric has stopped improving.
+    Args:
+        monitor (str): Quantity to be monitored. Defaults to 'val_loss'.
+        min_delta (float): Minimum change in the monitored quantity to qualify as an improvement.
+        patience (int): Number of epochs with no improvement after which training will be stopped.
+        mode (str): One of {'auto', 'min', 'max'}. In 'min' mode, training will stop when the quantity
+                    monitored has stopped decreasing; in 'max' mode it will stop when the quantity
+                    monitored has stopped increasing; in 'auto' mode, the direction is automatically
+                    inferred from the name of the monitored quantity.
+        verbose (int): Verbosity mode.
+    """
+    def __init__(self, monitor: str=LogKeys.VAL_LOSS, min_delta=0.0, patience=3, mode='auto', verbose=1):
+        super().__init__()
+        self.monitor = monitor
+        self.patience = patience
+        self.min_delta = min_delta
+        self.wait = 0
+        self.stopped_epoch = 0
+        self.verbose = verbose
+        if mode not in ['auto', 'min', 'max']:
+            raise ValueError(f"EarlyStopping mode {mode} is unknown, choose one of ('auto', 'min', 'max')")
+        self.mode = mode
+        # Determine the comparison operator based on the mode
+        if self.mode == 'min':
+            self.monitor_op = np.less
+        elif self.mode == 'max':
+            self.monitor_op = np.greater
+        else: # auto mode
+            if 'acc' in self.monitor.lower():
+                self.monitor_op = np.greater
+            else: # Default to min mode for loss or other metrics
+                self.monitor_op = np.less
+        self.best = np.Inf if self.monitor_op == np.less else -np.Inf
+    def on_train_begin(self, logs=None):
+        # Reset state at the beginning of training
+        self.wait = 0
+        self.stopped_epoch = 0
+        self.best = np.Inf if self.monitor_op == np.less else -np.Inf
+    def on_epoch_end(self, epoch, logs=None):
+        current = logs.get(self.monitor) # type: ignore
+        if current is None:
+            return
+        # Determine the comparison threshold based on the mode
+        if self.monitor_op == np.less:
+            # For 'min' mode, we need to be smaller than 'best' by at least 'min_delta'
+            # Correct check: current < self.best - self.min_delta
+            is_improvement = self.monitor_op(current, self.best - self.min_delta)
+        else:
+            # For 'max' mode, we need to be greater than 'best' by at least 'min_delta'
+            # Correct check: current > self.best + self.min_delta
+            is_improvement = self.monitor_op(current, self.best + self.min_delta)
+        if is_improvement:
+            if self.verbose > 1:
+                _LOGGER.info(f"EarlyStopping: {self.monitor} improved from {self.best:.4f} to {current:.4f}")
+            self.best = current
+            self.wait = 0
+        else:
+            self.wait += 1
+            if self.wait >= self.patience:
+                self.stopped_epoch = epoch
+                self.trainer.stop_training = True # type: ignore
+                if self.verbose > 0:
+                    print("")
+                    _LOGGER.info(f"Epoch {epoch+1}: early stopping after {self.wait} epochs with no improvement.")
+class ModelCheckpoint(Callback):
+    """
+    Saves the model to a directory with automated filename generation and rotation. The filename includes the epoch and score.
+    - If `save_best_only` is True, it saves the single best model, deleting the
+      previous best.
+    - If `save_best_only` is False, it keeps the 3 most recent checkpoints,
+      deleting the oldest ones automatically.
+    Args:
+        save_dir (str): Directory where checkpoint files will be saved.
+        monitor (str): Metric to monitor for `save_best_only=True`.
+        save_best_only (bool): If true, save only the best model.
+        mode (str): One of {'auto', 'min', 'max'}.
+        verbose (int): Verbosity mode.
+    """
+    def __init__(self, save_dir: str, monitor: str = LogKeys.VAL_LOSS,
+                 save_best_only: bool = False, mode: str = 'auto', verbose: int = 1):
+        super().__init__()
+        self.save_dir = make_fullpath(save_dir, make=True)
+        if not self.save_dir.is_dir():
+            _LOGGER.error(f"{save_dir} is not a valid directory.")
+            raise IOError()
+        self.monitor = monitor
+        self.save_best_only = save_best_only
+        self.verbose = verbose
+        # State variables to be managed during training
+        self.saved_checkpoints = []
+        self.last_best_filepath = None
+        if mode not in ['auto', 'min', 'max']:
+            raise ValueError(f"ModelCheckpoint mode {mode} is unknown.")
+        self.mode = mode
+        if self.mode == 'min':
+            self.monitor_op = np.less
+        elif self.mode == 'max':
+            self.monitor_op = np.greater
+        else:
+            self.monitor_op = np.less if 'loss' in self.monitor else np.greater
+        self.best = np.Inf if self.monitor_op == np.less else -np.Inf
+    def on_train_begin(self, logs=None):
+        """Reset state when training starts."""
+        self.best = np.Inf if self.monitor_op == np.less else -np.Inf
+        self.saved_checkpoints = []
+        self.last_best_filepath = None
+    def on_epoch_end(self, epoch, logs=None):
+        logs = logs or {}
+        self.save_dir.mkdir(parents=True, exist_ok=True)
+        if self.save_best_only:
+            self._save_best_model(epoch, logs)
+        else:
+            self._save_rolling_checkpoints(epoch, logs)
+    def _save_best_model(self, epoch, logs):
+        """Saves a single best model and deletes the previous one."""
+        current = logs.get(self.monitor)
+        if current is None:
+            return
+        if self.monitor_op(current, self.best):
+            old_best_str = f"{self.best:.4f}" if self.best not in [np.Inf, -np.Inf] else "inf"
+            # Create a descriptive filename
+            filename = f"epoch_{epoch}-{self.monitor}_{current:.4f}.pth"
+            new_filepath = self.save_dir / filename
+            if self.verbose > 0:
+                print("")
+                _LOGGER.info(f"Epoch {epoch}: {self.monitor} improved from {old_best_str} to {current:.4f}, saving model to {new_filepath}")
+            # Save the new best model
+            torch.save(self.trainer.model.state_dict(), new_filepath) # type: ignore
+            # Delete the old best model file
+            if self.last_best_filepath and self.last_best_filepath.exists():
+                self.last_best_filepath.unlink()
+            # Update state
+            self.best = current
+            self.last_best_filepath = new_filepath
+    def _save_rolling_checkpoints(self, epoch, logs):
+        """Saves the latest model and keeps only the last 5."""
+        filename = f"epoch_{epoch}.pth"
+        filepath = self.save_dir / filename
+        if self.verbose > 0:
+            print("")
+            _LOGGER.info(f'Epoch {epoch}: saving model to {filepath}')
+        torch.save(self.trainer.model.state_dict(), filepath) # type: ignore
+        self.saved_checkpoints.append(filepath)
+        # If we have more than n checkpoints, remove the oldest one
+        if len(self.saved_checkpoints) > 3:
+            file_to_delete = self.saved_checkpoints.pop(0)
+            if file_to_delete.exists():
+                if self.verbose > 0:
+                    _LOGGER.info(f"  -> Deleting old checkpoint: {file_to_delete.name}")
+                file_to_delete.unlink()
+class LRScheduler(Callback):
+    """
+    Callback to manage a PyTorch learning rate scheduler.
+    This callback automatically calls the scheduler's `step()` method at the
+    end of each epoch. It also logs a message when the learning rate changes.
+    Args:
+        scheduler: An initialized PyTorch learning rate scheduler.
+        monitor (str, optional): The metric to monitor for schedulers that
+                                 require it, like `ReduceLROnPlateau`.
+                                 Should match a key in the logs (e.g., 'val_loss').
+    """
+    def __init__(self, scheduler, monitor: Optional[str] = None):
+        super().__init__()
+        self.scheduler = scheduler
+        self.monitor = monitor
+        self.previous_lr = None
+    def on_train_begin(self, logs=None):
+        """Store the initial learning rate."""
+        self.previous_lr = self.trainer.optimizer.param_groups[0]['lr'] # type: ignore
+    def on_epoch_end(self, epoch, logs=None):
+        """Step the scheduler and log any change in learning rate."""
+        # For schedulers that need a metric (e.g., val_loss)
+        if isinstance(self.scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
+            if self.monitor is None:
+                raise ValueError("LRScheduler needs a `monitor` metric for ReduceLROnPlateau.")
+            metric_val = logs.get(self.monitor) # type: ignore
+            if metric_val is not None:
+                self.scheduler.step(metric_val)
+            else:
+                print("")
+                _LOGGER.warning(f"LRScheduler could not find metric '{self.monitor}' in logs.")
+        # For all other schedulers
+        else:
+            self.scheduler.step()
+        # Log the change if the LR was updated
+        current_lr = self.trainer.optimizer.param_groups[0]['lr'] # type: ignore
+        if current_lr != self.previous_lr:
+            print("")
+            _LOGGER.info(f"Epoch {epoch}: Learning rate changed to {current_lr:.6f}")
+            self.previous_lr = current_lr

dragon-ml-toolbox 2.4.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

dragon-ml-toolbox 2.4.0py3-none-any.whl → 3.0.0py3-none-any.whl