PyPI - congrads - Versions diffs - 0.2.0__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

congrads 0.2.0py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

congrads/__init__.py +17 -10
congrads/checkpoints.py +232 -0
congrads/constraints.py +664 -134
congrads/core.py +482 -110
congrads/datasets.py +315 -11
congrads/descriptor.py +100 -20
congrads/metrics.py +178 -16
congrads/networks.py +47 -23
congrads/requirements.txt +6 -0
congrads/transformations.py +139 -0
congrads/utils.py +439 -39
congrads-1.0.1.dist-info/METADATA +208 -0
congrads-1.0.1.dist-info/RECORD +16 -0
{congrads-0.2.0.dist-info → congrads-1.0.1.dist-info}/WHEEL +1 -1
congrads-0.2.0.dist-info/METADATA +0 -222
congrads-0.2.0.dist-info/RECORD +0 -13
{congrads-0.2.0.dist-info → congrads-1.0.1.dist-info}/LICENSE +0 -0
{congrads-0.2.0.dist-info → congrads-1.0.1.dist-info}/top_level.txt +0 -0

congrads/datasets.py CHANGED Viewed

@@ -1,16 +1,81 @@
+"""
+This module defines several PyTorch dataset classes for loading and
+working with various datasets. Each dataset class extends the
+`torch.utils.data.Dataset` class and provides functionality for
+downloading, loading, and transforming specific datasets.
+Classes:
+    - BiasCorrection: A dataset class for the Bias Correction dataset
+      focused on temperature forecast data.
+    - FamilyIncome: A dataset class for the Family Income and
+      Expenditure dataset.
+    - NoisySines: A dataset class that generates noisy sine wave
+      samples with added Gaussian noise.
+Each dataset class provides methods for downloading the data
+(if not already available), checking the integrity of the dataset, loading
+the data from CSV files or generating synthetic data, and applying
+transformations to the data.
+Key Methods:
+    - `__init__`: Initializes the dataset by specifying the root directory,
+      transformation function, and optional download flag.
+    - `__getitem__`: Retrieves a specific data point given its index,
+      returning input-output pairs.
+    - `__len__`: Returns the total number of examples in the dataset.
+    - `download`: Downloads and extracts the dataset from
+       the specified mirrors.
+    - `_load_data`: Loads the dataset from CSV files and
+      applies transformations.
+    - `_check_exists`: Checks if the dataset is already
+      downloaded and verified.
+Each dataset class allows the user to apply custom transformations to the
+dataset through the `transform` argument to allow pre-processing and offers
+the ability to download the dataset if it's not already present on
+the local disk.
+"""
 import os
-from urllib.error import URLError
-import numpy as np
 from pathlib import Path
 from typing import Callable, Union
+from urllib.error import URLError
+import numpy as np
 import pandas as pd
-from torch.utils.data import Dataset
 import torch
-from torchvision.datasets.utils import check_integrity, download_and_extract_archive
+from torch.utils.data import Dataset
+from torchvision.datasets.utils import (
+    check_integrity,
+    download_and_extract_archive,
+)
 class BiasCorrection(Dataset):
+    """
+    A dataset class for accessing the Bias Correction dataset.
+    This class extends the `Dataset` class and provides functionality for
+    downloading, loading, and transforming the Bias Correction dataset.
+    The dataset is focused on temperature forecast data and is made available
+    for use with PyTorch. If `download` is set to True, the dataset will be
+    downloaded if it is not already available. The data is then loaded,
+    and a transformation function is applied to it.
+    Args:
+        root (Union[str, Path]): The root directory where the dataset
+            will be stored or loaded from.
+        transform (Callable): A function to transform the dataset
+            (e.g., preprocessing).
+        download (bool, optional): Whether to download the dataset if it's
+            not already present. Defaults to False.
+    Raises:
+        RuntimeError: If the dataset is not found and `download`
+            is not set to True or if all mirrors fail to provide the dataset.
+    """
     mirrors = [
         "https://archive.ics.uci.edu/static/public/514/",
@@ -18,6 +83,7 @@ class BiasCorrection(Dataset):
     resources = [
         (
+            # pylint: disable-next=line-too-long
             "bias+correction+of+numerical+prediction+model+temperature+forecast.zip",
             "3deee56d461a2686887c4ae38fe3ccf3",
         ),
@@ -29,6 +95,9 @@ class BiasCorrection(Dataset):
         transform: Callable,
         download: bool = False,
     ) -> None:
+        """
+        Constructor method to initialize the dataset.
+        """
         super().__init__()
         self.root = root
@@ -45,6 +114,17 @@ class BiasCorrection(Dataset):
         self.data_input, self.data_output = self._load_data()
     def _load_data(self):
+        """
+        Loads the dataset from the CSV file and applies the transformation.
+        The data is read from the `Bias_correction_ucl.csv` file, and the
+        transformation function is applied to it.
+        The input and output data are separated and returned as numpy arrays.
+        Returns:
+            Tuple[numpy.ndarray, numpy.ndarray]: A tuple containing the input
+                and output data as numpy arrays.
+        """
         data: pd.DataFrame = pd.read_csv(
             os.path.join(self.data_folder, "Bias_correction_ucl.csv")
@@ -56,10 +136,27 @@ class BiasCorrection(Dataset):
         return data_input, data_output
     def __len__(self):
+        """
+        Returns the number of examples in the dataset.
+        Returns:
+            int: The number of examples in the dataset
+                (i.e., the number of rows in the input data).
+        """
         return self.data_input.shape[0]
     def __getitem__(self, idx):
+        """
+        Returns the input-output pair for a given index.
+        Args:
+            idx (int): The index of the example to retrieve.
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor]: The input-output pair
+                as PyTorch tensors.
+        """
         example = self.data_input[idx, :]
         target = self.data_output[idx, :]
@@ -69,16 +166,44 @@ class BiasCorrection(Dataset):
     @property
     def data_folder(self) -> str:
+        """
+        Returns the path to the folder where the dataset is stored.
+        Returns:
+            str: The path to the dataset folder.
+        """
         return os.path.join(self.root, self.__class__.__name__)
     def _check_exists(self) -> bool:
+        """
+        Checks if the dataset is already downloaded and verified.
+        This method checks that all required files exist and
+        their integrity is validated via MD5 checksums.
+        Returns:
+            bool: True if all resources exist and their
+                integrity is valid, False otherwise.
+        """
         return all(
             check_integrity(os.path.join(self.data_folder, file_path), checksum)
             for file_path, checksum in self.resources
         )
     def download(self) -> None:
+        """
+        Downloads and extracts the dataset.
+        This method attempts to download the dataset from the mirrors and
+        extract it into the appropriate folder. If any error occurs during
+        downloading, it will try each mirror in sequence.
+        Raises:
+            RuntimeError: If all mirrors fail to provide the dataset.
+        """
         if self._check_exists():
             return
@@ -91,7 +216,10 @@ class BiasCorrection(Dataset):
                 url = f"{mirror}{filename}"
                 try:
                     download_and_extract_archive(
-                        url, download_root=self.data_folder, filename=filename, md5=md5
+                        url,
+                        download_root=self.data_folder,
+                        filename=filename,
+                        md5=md5,
                     )
                 except URLError as e:
                     errors.append(e)
@@ -104,15 +232,40 @@ class BiasCorrection(Dataset):
                 raise RuntimeError(s)
-class FiniteIncome(Dataset):
+class FamilyIncome(Dataset):
+    """
+    A dataset class for accessing the Family Income and Expenditure dataset.
+    This class extends the `Dataset` class and provides functionality for
+    downloading, loading, and transforming the Family Income and
+    Expenditure dataset. The dataset is intended for use with
+    PyTorch-based projects, offering a convenient interface for data handling.
+    This class provides access to the Family Income and Expenditure dataset
+    for use with PyTorch. If `download` is set to True, the dataset will be
+    downloaded if it is not already available. The data is then loaded,
+    and a user-defined transformation function is applied to it.
+    Args:
+        root (Union[str, Path]): The root directory where the dataset will
+            be stored or loaded from.
+        transform (Callable): A function to transform the dataset
+            (e.g., preprocessing).
+        download (bool, optional): Whether to download the dataset if it's
+            not already present. Defaults to False.
+    Raises:
+        RuntimeError: If the dataset is not found and `download`
+            is not set to True or if all mirrors fail to provide the dataset.
+    """
     mirrors = [
-        "https://www.kaggle.com/api/v1/datasets/download/grosvenpaul/",
+        # pylint: disable-next=line-too-long
+        "https://www.kaggle.com/api/v1/datasets/download/grosvenpaul/family-income-and-expenditure",
     ]
     resources = [
         (
-            "family-income-and-expenditure",
+            "archive.zip",
             "7d74bc7facc3d7c07c4df1c1c6ac563e",
         ),
     ]
@@ -123,6 +276,10 @@ class FiniteIncome(Dataset):
         transform: Callable,
         download: bool = False,
     ) -> None:
+        """
+        Constructor method to initialize the dataset.
+        """
         super().__init__()
         self.root = root
         self.transform = transform
@@ -138,6 +295,18 @@ class FiniteIncome(Dataset):
         self.data_input, self.data_output = self._load_data()
     def _load_data(self):
+        """
+        Loads the Family Income and Expenditure dataset from the CSV file
+        and applies the transformation.
+        The data is read from the `Family Income and Expenditure.csv` file,
+        and the transformation function is applied to it. The input and
+        output data are separated and returned as numpy arrays.
+        Returns:
+            Tuple[numpy.ndarray, numpy.ndarray]: A tuple containing the input
+                and output data as numpy arrays.
+        """
         data: pd.DataFrame = pd.read_csv(
             os.path.join(self.data_folder, "Family Income and Expenditure.csv")
@@ -149,9 +318,28 @@ class FiniteIncome(Dataset):
         return data_input, data_output
     def __len__(self):
+        """
+        Returns the number of examples in the dataset.
+        Returns:
+            int: The number of examples in the dataset
+                (i.e., the number of rows in the input data).
+        """
         return self.data_input.shape[0]
     def __getitem__(self, idx):
+        """
+        Returns the input-output pair for a given index.
+        Args:
+            idx (int): The index of the example to retrieve.
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor]: The input-output pair
+                as PyTorch tensors.
+        """
         example = self.data_input[idx, :]
         target = self.data_output[idx, :]
         example = torch.tensor(example)
@@ -160,15 +348,43 @@ class FiniteIncome(Dataset):
     @property
     def data_folder(self) -> str:
+        """
+        Returns the path to the folder where the dataset is stored.
+        Returns:
+            str: The path to the dataset folder.
+        """
         return os.path.join(self.root, self.__class__.__name__)
     def _check_exists(self) -> bool:
+        """
+        Checks if the dataset is already downloaded and verified.
+        This method checks that all required files exist and
+        their integrity is validated via MD5 checksums.
+        Returns:
+            bool: True if all resources exist and their
+                integrity is valid, False otherwise.
+        """
         return all(
             check_integrity(os.path.join(self.data_folder, file_path), checksum)
             for file_path, checksum in self.resources
         )
     def download(self) -> None:
+        """
+        Downloads and extracts the dataset.
+        This method attempts to download the dataset from the mirrors
+        and extract it into the appropriate folder. If any error occurs
+        during downloading, it will try each mirror in sequence.
+        Raises:
+            RuntimeError: If all mirrors fail to provide the dataset.
+        """
         if self._check_exists():
             return
@@ -179,10 +395,13 @@ class FiniteIncome(Dataset):
         for filename, md5 in self.resources:
             errors = []
             for mirror in self.mirrors:
-                url = f"{mirror}{filename}"
+                url = f"{mirror}"
                 try:
                     download_and_extract_archive(
-                        url, download_root=self.data_folder, filename=filename, md5=md5
+                        url,
+                        download_root=self.data_folder,
+                        filename=filename,
+                        md5=md5,
                     )
                 except URLError as e:
                     errors.append(e)
@@ -193,3 +412,88 @@ class FiniteIncome(Dataset):
                 for mirror, err in zip(self.mirrors, errors):
                     s += f"Tried {mirror}, got:\n{str(err)}\n"
                 raise RuntimeError(s)
+class NoisySines(Dataset):
+    """
+    A PyTorch dataset generating samples from a causal
+    sine wave with added noise.
+    Args:
+        length (int): Number of data points in the dataset.
+        amplitude (float): Amplitude of the sine wave.
+        frequency (float): Frequency of the sine wave in Hz.
+        noise_std (float): Standard deviation of the Gaussian noise.
+        bias (float): Offset from zero.
+    The sine wave is zero for times before t=0 and follows a
+    standard sine wave after t=0, with Gaussian noise added to all points.
+    """
+    def __init__(
+        self,
+        length,
+        amplitude=1,
+        frequency=10.0,
+        noise_std=0.05,
+        bias=0,
+        random_seed=42,
+    ):
+        """
+        Initializes the NoisyCausalSine dataset.
+        """
+        self.length = length
+        self.amplitude = amplitude
+        self.frequency = frequency
+        self.noise_std = noise_std
+        self.bias = bias
+        self.random_seed = random_seed
+        np.random.seed(self.random_seed)
+        self.time = np.linspace(0, 1, length)
+        self.noise = np.random.normal(0, self.noise_std, length)
+    def __getitem__(self, idx):
+        """
+        Returns the time and noisy sine wave value for a given index.
+        Args:
+            idx (int): Index of the data point to retrieve.
+        Returns:
+            Tuple[torch.Tensor, torch.Tensor]: A tuple containing the
+                time value and the noisy sine wave value.
+        """
+        t = self.time[idx]
+        if idx < self.length // 2:
+            sine_value = self.bias
+            cosine_value = self.bias
+        else:
+            sine_value = (
+                self.amplitude * np.sin(2 * np.pi * self.frequency * t)
+                + self.bias
+            )
+            cosine_value = (
+                self.amplitude * np.cos(2 * np.pi * self.frequency * t)
+                + self.bias
+            )
+        # Add noise to the signals
+        noisy_sine = sine_value + self.noise[idx]
+        noisy_cosine = cosine_value + self.noise[idx]
+        # Convert to tensor
+        example, target = torch.tensor([t], dtype=torch.float32), torch.tensor(
+            [noisy_sine, noisy_cosine], dtype=torch.float32
+        )
+        return example, target
+    def __len__(self):
+        """
+        Returns the total number of data points in the dataset.
+        Returns:
+            int: The length of the dataset.
+        """
+        return self.length

congrads/descriptor.py CHANGED Viewed

@@ -1,17 +1,63 @@
+"""
+This module defines the `Descriptor` class, which is designed to manage
+the mapping between neuron names, their corresponding layers, and additional
+properties such as constant or variable status. It provides a way to easily
+place constraints on parts of your network, by referencing the neuron names
+instead of indices.
+The `Descriptor` class allows for easy constraint definitions on parts of
+your neural network. It supports registering neurons with associated layers,
+indices, and optional attributes, such as whether the layer is constant
+or variable.
+Key Methods:
+    - `__init__`: Initializes the `Descriptor` object with empty mappings
+      and sets for managing neurons and layers.
+    - `add`: Registers a neuron with its associated layer, index, and
+      optional constant status.
+"""
+from .utils import validate_type
 class Descriptor:
-    # TODO regenerate documentation
+    """
+    A class to manage the mapping between neuron names, their corresponding
+    layers, and additional properties (such as min/max values, output,
+    and constant variables).
+    This class is designed to track the relationship between neurons and
+    layers in a neural network. It allows for the assignment of properties
+    (like minimum and maximum values, and whether a layer is an output,
+    constant, or variable) to each neuron. The data is stored in
+    dictionaries and sets for efficient lookups.
+    Attributes:
+        neuron_to_layer (dict): A dictionary mapping neuron names to
+            their corresponding layer names.
+        neuron_to_index (dict): A dictionary mapping neuron names to
+            their corresponding indices in the layers.
+        constant_layers (set): A set of layer names that represent
+            constant layers.
+        variable_layers (set): A set of layer names that represent
+            variable layers.
+    """
     def __init__(
         self,
     ):
+        """
+        Initializes the Descriptor object.
+        """
-        # Define dictionaries that will translate neuron names to layer and index
+        # Define dictionaries that will translate neuron
+        # names to layer and index
         self.neuron_to_layer: dict[str, str] = {}
         self.neuron_to_index: dict[str, int] = {}
-        self.neuron_to_minmax: dict[str, tuple[float, float]] = {}
         # Define sets that will hold the layers based on which type
-        self.output_layers: set[str] = set()
         self.constant_layers: set[str] = set()
         self.variable_layers: set[str] = set()
@@ -20,31 +66,65 @@ class Descriptor:
         layer_name: str,
         index: int,
         neuron_name: str,
-        min: float = 0,
-        max: float = 1,
-        output: bool = False,
         constant: bool = False,
     ):
+        """
+        Adds a neuron to the descriptor with its associated layer,
+        index, and properties.
-        if output:
-            self.output_layers.add(layer_name)
+        This method registers a neuron name and associates it with a
+        layer, its index, and optional properties such as whether
+        the layer is an output or constant layer.
-        if constant:
-            self.constant_layers.add(layer_name)
-        else:
-            self.variable_layers.add(layer_name)
+        Args:
+            layer_name (str): The name of the layer where the neuron is located.
+            index (int): The index of the neuron within the layer.
+            neuron_name (str): The name of the neuron.
+            constant (bool, optional): Whether the layer is a constant layer.
+                Defaults to False.
-        self.neuron_to_layer[neuron_name] = layer_name
-        self.neuron_to_index[neuron_name] = index
+        Raises:
+            TypeError: If a provided attribute has an incompatible type.
+            ValueError: If a layer or index is already assigned for a neuron
+                or a duplicate index is used within a layer.
+        """
-        if min != None and max == None:
+        # Type checking
+        validate_type("layer_name", layer_name, str)
+        validate_type("index", index, int)
+        validate_type("neuron_name", neuron_name, str)
+        validate_type("constant", constant, bool)
+        # Other validations
+        if neuron_name in self.neuron_to_layer:
             raise ValueError(
-                f"The min parameter was set without setting the max parameter. Either set both or set none."
+                "There already is a layer registered for the neuron with name "
+                f"'{neuron_name}'. Please use a unique name for each neuron."
             )
-        if max != None and min == None:
+        if neuron_name in self.neuron_to_index:
             raise ValueError(
-                f"The max parameter was set without setting the min parameter. Either set both or set none."
+                "There already is an index registered for the neuron with name "
+                f"'{neuron_name}'. Please use a unique name for each neuron."
             )
-        self.neuron_to_minmax[neuron_name] = (min, max)
+        for existing_neuron, assigned_index in self.neuron_to_index.items():
+            if (
+                assigned_index == index
+                and self.neuron_to_layer[existing_neuron] == layer_name
+            ):
+                raise ValueError(
+                    f"The index {index} in layer {layer_name} is already "
+                    "assigned. Every neuron must be assigned a different "
+                    "index that matches the network's output."
+                )
+        # Add to dictionaries and sets
+        if constant:
+            self.constant_layers.add(layer_name)
+        else:
+            self.variable_layers.add(layer_name)
+        self.neuron_to_layer[neuron_name] = layer_name
+        self.neuron_to_index[neuron_name] = index

congrads 0.2.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

congrads 0.2.0py3-none-any.whl → 1.0.1py3-none-any.whl