PyPI - congrads - Versions diffs - 1.0.6__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

congrads 1.0.6py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

congrads/__init__.py +2 -3
congrads/checkpoints.py +73 -127
congrads/constraints.py +813 -476
congrads/core.py +521 -345
congrads/datasets.py +491 -191
congrads/descriptor.py +118 -82
congrads/metrics.py +55 -127
congrads/networks.py +35 -81
congrads/py.typed +0 -0
congrads/transformations.py +65 -88
congrads/utils.py +499 -131
{congrads-1.0.6.dist-info → congrads-1.1.0.dist-info}/METADATA +48 -41
congrads-1.1.0.dist-info/RECORD +14 -0
congrads-1.1.0.dist-info/WHEEL +4 -0
congrads-1.0.6.dist-info/LICENSE +0 -26
congrads-1.0.6.dist-info/RECORD +0 -15
congrads-1.0.6.dist-info/WHEEL +0 -5
congrads-1.0.6.dist-info/top_level.txt +0 -1

congrads/__init__.py CHANGED Viewed

@@ -1,6 +1,4 @@
-# pylint: skip-file
-try:
+try:  # noqa: D104
     from importlib.metadata import version as get_version  # Python 3.8+
 except ImportError:
     from pkg_resources import (
@@ -25,5 +23,6 @@ __all__ = [
     "descriptor",
     "metrics",
     "networks",
+    "transformations",
     "utils",
 ]

congrads/checkpoints.py CHANGED Viewed

@@ -1,130 +1,113 @@
-"""
-This module provides a `CheckpointManager` class for managing the saving and
-loading of checkpoints during PyTorch model training.
-The `CheckpointManager` handles:
-- Saving and loading the state of models, optimizers, and metrics.
-- Registering and evaluating performance criteria to determine if a model's
-  performance has improved, enabling automated saving of the best-performing
-  model checkpoints.
-- Resuming training from a specific checkpoint.
-Usage:
-    1. Initialize the `CheckpointManager` with a PyTorch model, optimizer,
-       and metric manager.
-    2. Register criteria for tracking and evaluating metrics.
-    3. Use the `save` and `load` methods to manage checkpoints during training.
-    4. Call `evaluate_criteria` to automatically evaluate and save the
-       best-performing checkpoints.
-Dependencies:
-    - PyTorch (`torch`)
+"""Module for managing PyTorch model checkpoints.
+Provides the `CheckpointManager` class to save and load model and optimizer
+states during training, track the best metric values, and optionally report
+checkpoint events.
 """
 import os
+from collections.abc import Callable
 from pathlib import Path
-from typing import Callable
-from torch import Tensor, gt, load, save
+from torch import Tensor, load, save
 from torch.nn import Module
 from torch.optim import Optimizer
 from .metrics import MetricManager
-from .utils import validate_comparator_pytorch, validate_type
+from .utils import validate_callable, validate_type
 class CheckpointManager:
-    """
-    A class to handle saving and loading checkpoints for
-    PyTorch models and optimizers.
-    Args:
-        network (torch.nn.Module): The network (model) to save/load.
-        optimizer (torch.optim.Optimizer): The optimizer to save/load.
-        metric_manager (MetricManager): The metric manager to restore saved
-            metric states.
-        save_dir (str): Directory where checkpoints will be saved. Defaults
-            to 'checkpoints'.
-        create_dir (bool): Whether to create the save_dir if it does not exist.
-            Defaults to False.
-    Raises:
-        TypeError: If a provided attribute has an incompatible type.
-        FileNotFoundError: If the save directory does not exist and create_dir
-            is set to False.
+    """Manage saving and loading checkpoints for PyTorch models and optimizers.
+    Handles checkpointing based on a criteria function, restores metric
+    states, and optionally reports when a checkpoint is saved.
     """
     def __init__(
         self,
+        criteria_function: Callable[[dict[str, Tensor], dict[str, Tensor]], bool],
         network: Module,
         optimizer: Optimizer,
         metric_manager: MetricManager,
         save_dir: str = "checkpoints",
         create_dir: bool = False,
+        report_save: bool = False,
     ):
-        """
-        Initialize the checkpoint manager.
-        """
+        """Initialize the CheckpointManager.
+        Args:
+            criteria_function (Callable[[dict[str, Tensor], dict[str, Tensor]], bool]):
+                Function that determines if the current checkpoint should be
+                saved based on the current and best metric values.
+            network (torch.nn.Module): The model to save/load.
+            optimizer (torch.optim.Optimizer): The optimizer to save/load.
+            metric_manager (MetricManager): Manages metric states for checkpointing.
+            save_dir (str, optional): Directory to save checkpoints. Defaults to 'checkpoints'.
+            create_dir (bool, optional): Whether to create `save_dir` if it does not exist.
+                Defaults to False.
+            report_save (bool, optional): Whether to report when a checkpoint is saved.
+                Defaults to False.
+        Raises:
+            TypeError: If any provided attribute has an incompatible type.
+            FileNotFoundError: If `save_dir` does not exist and `create_dir` is False.
+        """
         # Type checking
+        validate_callable("criteria_function", criteria_function)
         validate_type("network", network, Module)
         validate_type("optimizer", optimizer, Optimizer)
         validate_type("metric_manager", metric_manager, MetricManager)
         validate_type("create_dir", create_dir, bool)
+        validate_type("report_save", report_save, bool)
         # Create path or raise error if create_dir is not found
         if not os.path.exists(save_dir):
             if not create_dir:
                 raise FileNotFoundError(
-                    f"Save directory '{str(save_dir)}' configured in "
-                    "checkpoint manager is not found."
+                    f"Save directory '{save_dir}' configured in checkpoint manager is not found."
                 )
             Path(save_dir).mkdir(parents=True, exist_ok=True)
         # Initialize objects variables
+        self.criteria_function = criteria_function
         self.network = network
         self.optimizer = optimizer
         self.metric_manager = metric_manager
         self.save_dir = save_dir
+        self.report_save = report_save
-        self.criteria: dict[str, Callable[[Tensor, Tensor], Tensor]] = {}
-        self.best_metrics: dict[str, Tensor] = {}
+        self.best_metric_values: dict[str, Tensor] = {}
-    def register(
-        self,
-        metric_name: str,
-        comparator: Callable[[Tensor, Tensor], Tensor] = gt,
-    ):
-        """
-        Register a criterion for evaluating a performance metric
-        during training.
+    def evaluate_criteria(self, epoch: int, metric_group: str = "during_training"):
+        """Evaluate the criteria function to determine if a better model is found.
-        Stores the comparator to determine whether the current metric has
-        improved relative to the previous best metric value.
+        Aggregates the current metric values during training and applies the
+        criteria function. If the criteria function indicates improvement, the
+        best metric values are updated, a checkpoint is saved, and a message is
+        optionally printed.
         Args:
-            metric_name (str): The name of the metric to evaluate.
-            comparator (Callable[[Tensor, Tensor], Tensor], optional):
-                A function that compares the current metric value against the
-                previous best value. Defaults to a greater-than (gt) comparison.
-        Raises:
-            TypeError: If a provided attribute has an incompatible type.
+            epoch (int): The current epoch number.
+            metric_group (str, optional): The metric group to evaluate. Defaults to 'during_training'.
         """
+        current_metric_values = self.metric_manager.aggregate(metric_group)
+        if self.criteria_function is not None and self.criteria_function(
+            current_metric_values, self.best_metric_values
+        ):
+            # Print message if a new checkpoint is saved
+            if self.report_save:
+                print(f"New checkpoint saved at epoch {epoch}.")
-        validate_type("metric_name", metric_name, str)
-        validate_comparator_pytorch("comparator", comparator)
-        validate_comparator_pytorch("comparator", comparator)
+            # Update current best metric values
+            for metric_name, metric_value in current_metric_values.items():
+                self.best_metric_values[metric_name] = metric_value
-        self.criteria[metric_name] = comparator
+            # Save the current state
+            self.save(epoch)
-    def resume(
-        self, filename: str = "checkpoint.pth", ignore_missing: bool = False
-    ) -> int:
-        """
-        Resumes training from a saved checkpoint file.
+    def resume(self, filename: str = "checkpoint.pth", ignore_missing: bool = False) -> int:
+        """Resumes training from a saved checkpoint file.
         Args:
             filename (str): The name of the checkpoint file to load.
@@ -141,7 +124,6 @@ class CheckpointManager:
             TypeError: If a provided attribute has an incompatible type.
             FileNotFoundError: If the specified checkpoint file does not exist.
         """
         # Type checking
         validate_type("filename", filename, str)
         validate_type("ignore_missing", ignore_missing, bool)
@@ -149,84 +131,48 @@ class CheckpointManager:
         # Return starting epoch, either from checkpoint file or default
         filepath = os.path.join(self.save_dir, filename)
         if os.path.exists(filepath):
-            checkpoint = self.load("checkpoint.pth")
+            checkpoint = self.load(filename)
             return checkpoint["epoch"]
         elif ignore_missing:
             return 0
         else:
-            raise FileNotFoundError(
-                f"A checkpoint was not found at {filepath} to resume training."
-            )
-    def evaluate_criteria(self, epoch: int):
-        """
-        Evaluate the defined criteria for model performance metrics
-        during training.
-        Args:
-            epoch (int): The current epoch number.
-        Compares the current metrics against the previous best metrics using
-        predefined comparators. If a criterion is met, saves the model and
-        the corresponding best metric values.
-        """
-        for metric_name, comparator in self.criteria.items():
+            raise FileNotFoundError(f"A checkpoint was not found at {filepath} to resume training.")
-            current_metric_value = self.metric_manager.metrics[
-                metric_name
-            ].aggregate()
-            best_metric_value = self.best_metrics.get(metric_name)
-            # TODO improve efficiency by not checking is None each iteration
-            if best_metric_value is None or comparator(
-                current_metric_value,
-                best_metric_value,
-            ):
-                self.save(epoch)
-                self.best_metrics[metric_name] = current_metric_value
-    def save(
-        self,
-        epoch: int,
-        filename: str = "checkpoint.pth",
-    ):
-        """
-        Save a checkpoint.
+    def save(self, epoch: int, filename: str = "checkpoint.pth"):
+        """Save a checkpoint.
         Args:
             epoch (int): Current epoch number.
             filename (str): Name of the checkpoint file. Defaults to
                 'checkpoint.pth'.
         """
         state = {
             "epoch": epoch,
             "network_state": self.network.state_dict(),
             "optimizer_state": self.optimizer.state_dict(),
-            "best_metrics": self.best_metrics,
+            "best_metrics": self.best_metric_values,
         }
         filepath = os.path.join(self.save_dir, filename)
         save(state, filepath)
     def load(self, filename: str):
-        """
-        Load a checkpoint and restores the state of the network, optimizer
-        and best_metrics.
+        """Load a checkpoint and restore the training state.
+        Loads the checkpoint from the specified file and restores the network
+        weights, optimizer state, and best metric values.
         Args:
             filename (str): Name of the checkpoint file.
         Returns:
-            dict: A dictionary containing the loaded checkpoint
-            information (epoch, loss, etc.).
+            dict: A dictionary containing the loaded checkpoint information,
+                including epoch, loss, and other relevant training state.
         """
         filepath = os.path.join(self.save_dir, filename)
-        checkpoint = load(filepath)
+        checkpoint = load(filepath, weights_only=True)
         self.network.load_state_dict(checkpoint["network_state"])
         self.optimizer.load_state_dict(checkpoint["optimizer_state"])
-        self.best_metrics = checkpoint["best_metrics"]
+        self.best_metric_values = checkpoint["best_metrics"]
         return checkpoint

congrads 1.0.6__py3-none-any.whl → 1.1.0__py3-none-any.whl

congrads 1.0.6py3-none-any.whl → 1.1.0py3-none-any.whl