PyPI - chemotools - Versions diffs - 0.1.7__tar.gz → 0.1.9__tar.gz - Mend

chemotools 0.1.7tar.gz → 0.1.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

{chemotools-0.1.7 → chemotools-0.1.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: chemotools
-Version: 0.1.7
+Version: 0.1.9
 Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
 License: MIT
 Author: Pau Cabaneros

chemotools-0.1.9/chemotools/augmentation/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+from ._add_noise import AddNoise
+from ._baseline_shift import BaselineShift
+from ._fractional_shift import FractionalShift
+from ._index_shift import IndexShift
+from ._spectrum_scale import SpectrumScale
+__all__ = [
+    "AddNoise",
+    "BaselineShift",
+    "FractionalShift",
+    "IndexShift",
+    "SpectrumScale",
+]

chemotools-0.1.9/chemotools/augmentation/_add_noise.py ADDED Viewed

@@ -0,0 +1,135 @@
+from typing import Literal, Optional
+import numpy as np
+from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
+from sklearn.utils.validation import check_is_fitted, validate_data
+class AddNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
+    """Add noise to input data from various probability distributions.
+    This transformer adds random noise from specified probability distributions
+    to the input data. Supported distributions include Gaussian, Poisson, and
+    exponential.
+    Parameters
+    ----------
+    distribution : {'gaussian', 'poisson', 'exponential'}, default='gaussian'
+        The probability distribution to sample noise from.
+    scale : float, default=0.0
+        Scale parameter for the noise distribution:
+        - For gaussian: standard deviation
+        - For poisson: multiplication factor for sampled values
+        - For exponential: scale parameter (1/λ)
+    random_state : int, optional
+        Random seed for reproducibility.
+    Attributes
+    ----------
+    n_features_in_ : int
+        Number of features in the training data.
+    """
+    def __init__(
+        self,
+        distribution: Literal["gaussian", "poisson", "exponential"] = "gaussian",
+        scale: float = 0.0,
+        random_state: Optional[int] = None,
+    ):
+        self.distribution = distribution
+        self.scale = scale
+        self.random_state = random_state
+    def fit(self, X: np.ndarray, y=None) -> "AddNoise":
+        """Fit the transformer to the input data.
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data.
+        y : None
+            Ignored. Present for API consistency.
+        Returns
+        -------
+        self : AddNoise
+            Fitted transformer.
+        Raises
+        ------
+        ValueError
+            If X is not a 2D array or contains non-finite values.
+        """
+        # Check that X is a 2D array and has only finite values
+        X = validate_data(
+            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
+        )
+        # Instantiate the random number generator
+        self._rng = np.random.default_rng(self.random_state)
+        return self
+    def transform(self, X: np.ndarray, y=None) -> np.ndarray:
+        """Transform the input data by adding random noise.
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Input data to transform.
+        y : None
+            Ignored. Present for API consistency.
+        Returns
+        -------
+        X_noisy : ndarray of shape (n_samples, n_features)
+            Transformed data with added noise.
+        Raises
+        ------
+        ValueError
+            If X has different number of features than the training data,
+            or if an invalid noise distribution is specified.
+        """
+        # Check that the estimator is fitted
+        check_is_fitted(self, "n_features_in_")
+        # Check that X is a 2D array and has only finite values
+        X_ = validate_data(
+            self,
+            X,
+            y="no_validation",
+            ensure_2d=True,
+            copy=True,
+            reset=False,
+            dtype=np.float64,
+        )
+        # Select the noise function based on the selected distribution
+        noise_func = {
+            "gaussian": self._add_gaussian_noise,
+            "poisson": self._add_poisson_noise,
+            "exponential": self._add_exponential_noise,
+        }.get(self.distribution)
+        if noise_func is None:
+            raise ValueError(
+                f"Invalid noise distribution: {self.distribution}. "
+                "Expected one of: gaussian, poisson, exponential"
+            )
+        return noise_func(X_)
+    def _add_gaussian_noise(self, X: np.ndarray) -> np.ndarray:
+        """Add Gaussian noise to the input array."""
+        return X + self._rng.normal(0, self.scale, size=X.shape)
+    def _add_poisson_noise(self, X: np.ndarray) -> np.ndarray:
+        """Add Poisson noise to the input array."""
+        return X + self._rng.poisson(X, size=X.shape) * self.scale
+    def _add_exponential_noise(self, X: np.ndarray) -> np.ndarray:
+        """Add exponential noise to the input array."""
+        return X + self._rng.exponential(self.scale, size=X.shape)

chemotools-0.1.9/chemotools/augmentation/_fractional_shift.py ADDED Viewed

@@ -0,0 +1,203 @@
+from typing import Literal, Optional
+import numpy as np
+from scipy.interpolate import CubicSpline
+from scipy import stats
+from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
+from sklearn.utils.validation import check_is_fitted, validate_data
+class FractionalShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
+    """
+    Shift the spectrum by a fractional amount, allowing shifts below one index.
+    Parameters
+    ----------
+    shift : float, default=0.0
+        Maximum amount by which the data is randomly shifted.
+        The actual shift is a random float between -shift and shift.
+    padding_mode : {'zeros', 'constant', 'wrap', 'extend', 'mirror', 'linear'}, default='linear'
+        Specifies how to handle padding when shifting the data:
+            - 'zeros': Pads with zeros.
+            - 'constant': Pads with a constant value defined by `pad_value`.
+            - 'wrap': Circular shift (wraps around).
+            - 'extend': Extends using edge values.
+            - 'mirror': Mirrors the signal.
+            - 'linear': Uses linear regression on 5 points to extrapolate values.
+    pad_value : float, default=0.0
+        The value used for padding when `padding_mode='constant'`.
+    random_state : int, optional, default=None
+        The random seed for reproducibility.
+    """
+    def __init__(
+        self,
+        shift: float = 0.0,
+        padding_mode: Literal[
+            "zeros", "constant", "extend", "mirror", "linear"
+        ] = "linear",
+        pad_value: float = 0.0,
+        random_state: Optional[int] = None,
+    ):
+        self.shift = shift
+        self.padding_mode = padding_mode
+        self.pad_value = pad_value
+        self.random_state = random_state
+    def fit(self, X: np.ndarray, y=None) -> "FractionalShift":
+        """
+        Fit the transformer to the input data.
+        Parameters
+        ----------
+        X : np.ndarray of shape (n_samples, n_features)
+            The input data to fit the transformer to.
+        y : None
+            Ignored.
+        Returns
+        -------
+        self : FractionalShift
+            The fitted transformer.
+        """
+        X = validate_data(
+            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
+        )
+        self._rng = np.random.default_rng(self.random_state)
+        return self
+    def transform(self, X: np.ndarray, y=None) -> np.ndarray:
+        """
+        Transform the input data by shifting the spectrum.
+        Parameters
+        ----------
+        X : np.ndarray of shape (n_samples, n_features)
+            The input data to transform.
+        y : None
+            Ignored.
+        Returns
+        -------
+        X_ : np.ndarray of shape (n_samples, n_features)
+            The transformed data with the applied shifts.
+        """
+        check_is_fitted(self, "n_features_in_")
+        X_ = validate_data(
+            self,
+            X,
+            y="no_validation",
+            ensure_2d=True,
+            copy=True,
+            reset=False,
+            dtype=np.float64,
+        )
+        for i, x in enumerate(X_):
+            X_[i] = self._shift_signal(x)
+        return X_.reshape(-1, 1) if X_.ndim == 1 else X_
+    def _shift_signal(self, x: np.ndarray) -> np.ndarray:
+        """
+        Shifts a signal by a fractional amount using cubic spline interpolation.
+        Parameters
+        ----------
+        x : np.ndarray of shape (n_features,)
+            The input signal to shift.
+        Returns
+        -------
+        shifted_signal : np.ndarray of shape (n_features,)
+            The shifted signal.
+        """
+        shift = self._rng.uniform(-self.shift, self.shift)
+        n = len(x)
+        indices = np.arange(n)
+        shifted_indices = indices + shift
+        # Create cubic spline interpolator
+        spline = CubicSpline(indices, x, bc_type="not-a-knot")
+        shifted_signal = spline(shifted_indices)
+        # Determine padding direction and length
+        if shift >= 0:
+            pad_length = len(shifted_indices[shifted_indices >= n - 1])
+            pad_left = False
+        else:
+            pad_length = len(shifted_indices[shifted_indices < 0])
+            pad_left = True
+        # Handle padding based on mode
+        if self.padding_mode == "zeros":
+            shifted_signal[shifted_indices < 0] = 0
+            shifted_signal[shifted_indices >= n - 1] = 0
+        elif self.padding_mode == "constant":
+            shifted_signal[shifted_indices < 0] = self.pad_value
+            shifted_signal[shifted_indices >= n - 1] = self.pad_value
+        elif self.padding_mode == "mirror":
+            if pad_left:
+                pad_values = x[pad_length - 1 :: -1]
+                shifted_signal[shifted_indices < 0] = pad_values[:pad_length]
+            else:
+                pad_values = x[:-1][::-1]
+                shifted_signal[shifted_indices >= n - 1] = pad_values[:pad_length]
+        elif self.padding_mode == "extend":
+            if pad_left:
+                shifted_signal[shifted_indices < 0] = x[0]
+            else:
+                shifted_signal[shifted_indices >= n - 1] = x[-1]
+        elif self.padding_mode == "linear":
+            if pad_left:
+                # Use first 5 points for regression
+                if len(x) < 5:
+                    points = x[: len(x)]  # Use all points if less than 5
+                else:
+                    points = x[:5]
+                x_coords = np.arange(len(points))
+                # Reshape arrays for linregress
+                x_coords = x_coords.reshape(-1)
+                points = points.reshape(-1)
+                # Perform regression
+                slope, intercept, _, _, _ = stats.linregress(x_coords, points)
+                # Generate new points using linear regression
+                new_x = np.arange(-pad_length, 0)
+                extrapolated = slope * new_x + intercept
+                shifted_signal[shifted_indices < 0] = extrapolated
+            else:
+                # Use last 5 points for regression
+                if len(x) < 5:
+                    points = x[-len(x) :]  # Use all points if less than 5
+                else:
+                    points = x[-5:]
+                x_coords = np.arange(len(points))
+                # Reshape arrays for linregress
+                x_coords = x_coords.reshape(-1)
+                points = points.reshape(-1)
+                # Perform regression
+                slope, intercept, _, _, _ = stats.linregress(x_coords, points)
+                # Generate new points using linear regression
+                new_x = np.arange(len(points), len(points) + pad_length)
+                extrapolated = slope * new_x + intercept
+                shifted_signal[shifted_indices >= n] = extrapolated
+        else:
+            raise ValueError(f"Unknown padding mode: {self.padding_mode}")
+        return shifted_signal

chemotools-0.1.9/chemotools/augmentation/_index_shift.py ADDED Viewed

@@ -0,0 +1,214 @@
+from typing import Literal, Optional
+import numpy as np
+from scipy.signal import convolve
+from scipy import stats
+from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
+from sklearn.utils.validation import check_is_fitted, validate_data
+class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
+    """
+    Shift the spectrum a given number of indices between -shift and +shift drawn
+    from a discrete uniform distribution.
+    Parameters
+    ----------
+    shift : int, default=0
+        Maximum number of indices by which the data is randomly shifted.
+        The actual shift is a random integer between -shift and shift (inclusive).
+    padding_mode : {'zeros', 'constant', 'wrap', 'extend', 'mirror', 'linear'}, default='linear'
+        Specifies how to handle padding when shifting the data:
+            - 'zeros': Pads with zeros.
+            - 'constant': Pads with a constant value defined by `pad_value`.
+            - 'wrap': Circular shift (wraps around).
+            - 'extend': Extends using edge values.
+            - 'mirror': Mirrors the signal.
+            - 'linear': Uses linear regression to extrapolate values.
+    pad_value : float, default=0.0
+        The value used for padding when `padding_mode='constant'`.
+    random_state : int, optional, default=None
+        The random seed for reproducibility.
+    Attributes
+    ----------
+    n_features_in_ : int
+        The number of features in the input data.
+    _is_fitted : bool
+        Whether the transformer has been fitted to data.
+    _rng : numpy.random.Generator
+        Random number generator instance used for shifting.
+    """
+    def __init__(
+        self,
+        shift: int = 0,
+        padding_mode: Literal[
+            "zeros", "constant", "wrap", "extend", "mirror", "linear"
+        ] = "linear",
+        pad_value: float = 0.0,
+        random_state: Optional[int] = None,
+    ):
+        self.shift = shift
+        self.padding_mode = padding_mode
+        self.pad_value = pad_value
+        self.random_state = random_state
+    def fit(self, X: np.ndarray, y=None) -> "IndexShift":
+        """
+        Fit the transformer to the input data.
+        Parameters
+        ----------
+        X : np.ndarray of shape (n_samples, n_features)
+            The input data to fit the transformer to.
+        y : None
+            Ignored.
+        Returns
+        -------
+        self : IndexShift
+            The fitted transformer.
+        """
+        # Check that X is a 2D array and has only finite values
+        X = validate_data(
+            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
+        )
+        # Instantiate the random number generator
+        self._rng = np.random.default_rng(self.random_state)
+        return self
+    def transform(self, X: np.ndarray, y=None) -> np.ndarray:
+        """
+        Transform the input data by shifting the spectrum.
+        Parameters
+        ----------
+        X : np.ndarray of shape (n_samples, n_features)
+            The input data to transform.
+        y : None
+            Ignored.
+        Returns
+        -------
+        X_ : np.ndarray of shape (n_samples, n_features)
+            The transformed data with the applied shifts.
+        """
+        # Check that the estimator is fitted
+        check_is_fitted(self, "n_features_in_")
+        # Check that X is a 2D array and has only finite values
+        X_ = validate_data(
+            self,
+            X,
+            y="no_validation",
+            ensure_2d=True,
+            copy=True,
+            reset=False,
+            dtype=np.float64,
+        )
+        # Calculate the standard normal variate
+        for i, x in enumerate(X_):
+            X_[i] = self._shift_signal(x)
+        return X_.reshape(-1, 1) if X_.ndim == 1 else X_
+    def _shift_signal(self, x: np.ndarray):
+        """
+        Shifts a discrete signal using convolution with a Dirac delta kernel.
+        Parameters
+        ----------
+        x : np.ndarray of shape (n_features,)
+            The input signal to shift.
+        Returns
+        -------
+        result : np.ndarray of shape (n_features,)
+            The shifted signal.
+        """
+        shift = self._rng.integers(-self.shift, self.shift, endpoint=True)
+        if self.padding_mode == "wrap":
+            return np.roll(x, shift)
+        # Create Dirac delta kernel with proper dimensions
+        if shift >= 0:
+            kernel = np.zeros(shift + 1)
+            kernel[-1] = 1
+        else:
+            kernel = np.zeros(-shift + 1)
+            kernel[0] = 1
+        # Convolve signal with kernel
+        shifted = convolve(x, kernel, mode="full")
+        if shift >= 0:
+            result = shifted[: len(x)] if x.ndim == 1 else shifted[: x.shape[0]]
+            pad_length = shift
+            pad_left = True
+        else:
+            result = shifted[-len(x) :] if x.ndim == 1 else shifted[-x.shape[0] :]
+            pad_length = -shift
+            pad_left = False
+        if self.padding_mode == "zeros":
+            return result
+        elif self.padding_mode == "constant":
+            mask = np.abs(result) < 1e-10
+            result[mask] = self.pad_value
+            return result
+        elif self.padding_mode == "mirror":
+            if pad_left:
+                pad_values = x[pad_length - 1 :: -1]
+                result[:pad_length] = pad_values[-pad_length:]
+            else:
+                pad_values = x[:-1][::-1]
+                result[-pad_length:] = pad_values[:pad_length]
+            return result
+        elif self.padding_mode == "extend":
+            if pad_left:
+                result[:pad_length] = x[0]
+            else:
+                result[-pad_length:] = x[-1]
+            return result
+        elif self.padding_mode == "linear":
+            # Get points for linear regression
+            if pad_left:
+                points = x[: pad_length + 1]  # Take first pad_length+1 points
+                x_coords = np.arange(len(points))
+                slope, intercept, _, _, _ = stats.linregress(x_coords, points)
+                # Generate new points using linear regression
+                new_x = np.arange(-pad_length, 0)
+                extrapolated = slope * new_x + intercept
+                result[:pad_length] = extrapolated
+            else:
+                points = x[-pad_length - 1 :]  # Take last pad_length+1 points
+                x_coords = np.arange(len(points))
+                slope, intercept, _, _, _ = stats.linregress(x_coords, points)
+                # Generate new points using linear regression
+                new_x = np.arange(len(points), len(points) + pad_length)
+                extrapolated = slope * new_x + intercept
+                result[-pad_length:] = extrapolated
+            return result
+        else:
+            raise ValueError(f"Unknown padding mode: {self.padding_mode}")

{chemotools-0.1.7 → chemotools-0.1.9}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "chemotools"
-version = "0.1.7"
+version = "0.1.9"
 description = "chemotools: A Python Package that Integrates Chemometrics and scikit-learn"
 authors = ["Pau Cabaneros"]
 license = "MIT License"

chemotools-0.1.7/chemotools/augmentation/__init__.py DELETED Viewed

@@ -1,16 +0,0 @@
-from .baseline_shift import BaselineShift
-from .exponential_noise import ExponentialNoise
-from .normal_noise import NormalNoise
-from .index_shift import IndexShift
-from .spectrum_scale import SpectrumScale
-from .uniform_noise import UniformNoise
-__all__ = [
-    "BaselineShift",
-    "ExponentialNoise",
-    "NormalNoise",
-    "IndexShift",
-    "SpectrumScale",
-    "UniformNoise",
-]

chemotools-0.1.7/chemotools/augmentation/exponential_noise.py DELETED Viewed

@@ -1,117 +0,0 @@
-from typing import Optional
-import numpy as np
-from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
-from sklearn.utils.validation import check_is_fitted, validate_data
-class ExponentialNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
-    """
-    Add exponential noise to the input data.
-    Parameters
-    ----------
-    scale: float, default=0.0
-        The scale of the noise to add to the input data.
-    random_state : int, default=None
-        The random state to use for the random number generator.
-    Attributes
-    ----------
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
-    Methods
-    -------
-    fit(X, y=None)
-        Fit the transformer to the input data.
-    transform(X, y=0, copy=True)
-        Transform the input data by adding random noise.
-    """
-    def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
-        self.scale = scale
-        self.random_state = random_state
-    def fit(self, X: np.ndarray, y=None) -> "ExponentialNoise":
-        """
-        Fit the transformer to the input data.
-        Parameters
-        ----------
-        X : np.ndarray of shape (n_samples, n_features)
-            The input data to fit the transformer to.
-        y : None
-            Ignored.
-        Returns
-        -------
-        self : ExponentialNoise
-            The fitted transformer.
-        """
-        # Check that X is a 2D array and has only finite values
-        X = validate_data(
-            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
-        )
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
-        # Instantiate the random number generator
-        self._rng = np.random.default_rng(self.random_state)
-        return self
-    def transform(self, X: np.ndarray, y=None) -> np.ndarray:
-        """
-        Transform the input data by adding random exponential noise.
-        Parameters
-        ----------
-        X : np.ndarray of shape (n_samples, n_features)
-            The input data to transform.
-        y : None
-            Ignored.
-        Returns
-        -------
-        X_ : np.ndarray of shape (n_samples, n_features)
-            The transformed data.
-        """
-        # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
-        # Check that X is a 2D array and has only finite values
-        X_ = validate_data(
-            self,
-            X,
-            y="no_validation",
-            ensure_2d=True,
-            copy=True,
-            reset=False,
-            dtype=np.float64,
-        )
-        # Check that the number of features is the same as the fitted data
-        if X_.shape[1] != self.n_features_in_:
-            raise ValueError(
-                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
-            )
-        # Calculate the standard exponential variate
-        for i, x in enumerate(X_):
-            X_[i] = self._add_random_noise(x)
-        return X_.reshape(-1, 1) if X_.ndim == 1 else X_
-    def _add_random_noise(self, x) -> np.ndarray:
-        return x + self._rng.exponential(self.scale, size=x.shape)

chemotools-0.1.7/chemotools/augmentation/index_shift.py DELETED Viewed

@@ -1,199 +0,0 @@
-from typing import Literal, Optional
-import numpy as np
-from numpy.polynomial import polynomial as poly
-from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
-from sklearn.utils.validation import check_is_fitted, validate_data
-class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
-    """
-    Shift the spectrum a given number of indices between - shift and + shift drawn
-    from a discrete uniform distribution.
-    Parameters
-    ----------
-    shift : float, default=0.0
-        Shifts the data by a random integer between -shift and shift.
-    random_state : int, default=None
-        The random state to use for the random number generator.
-    Attributes
-    ----------
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
-    Methods
-    -------
-    fit(X, y=None)
-        Fit the transformer to the input data.
-    transform(X, y=0, copy=True)
-        Transform the input data by shifting the spectrum.
-    """
-    def __init__(
-        self,
-        shift: int = 0,
-        fill_method: Literal["constant", "linear", "quadratic"] = "constant",
-        random_state: Optional[int] = None,
-    ):
-        self.shift = shift
-        self.fill_method = fill_method
-        self.random_state = random_state
-    def fit(self, X: np.ndarray, y=None) -> "IndexShift":
-        """
-        Fit the transformer to the input data.
-        Parameters
-        ----------
-        X : np.ndarray of shape (n_samples, n_features)
-            The input data to fit the transformer to.
-        y : None
-            Ignored.
-        Returns
-        -------
-        self : IndexShift
-            The fitted transformer.
-        """
-        # Check that X is a 2D array and has only finite values
-        X = validate_data(
-            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
-        )
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
-        # Instantiate the random number generator
-        self._rng = np.random.default_rng(self.random_state)
-        return self
-    def transform(self, X: np.ndarray, y=None) -> np.ndarray:
-        """
-        Transform the input data by shifting the spectrum.
-        Parameters
-        ----------
-        X : np.ndarray of shape (n_samples, n_features)
-            The input data to transform.
-        y : None
-            Ignored.
-        Returns
-        -------
-        X_ : np.ndarray of shape (n_samples, n_features)
-            The transformed data.
-        """
-        # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
-        # Check that X is a 2D array and has only finite values
-        X_ = validate_data(
-            self,
-            X,
-            y="no_validation",
-            ensure_2d=True,
-            copy=True,
-            reset=False,
-            dtype=np.float64,
-        )
-        # Check that the number of features is the same as the fitted data
-        if X_.shape[1] != self.n_features_in_:
-            raise ValueError(
-                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
-            )
-        # Calculate the standard normal variate
-        for i, x in enumerate(X_):
-            X_[i] = self._shift_vector(x)
-        return X_.reshape(-1, 1) if X_.ndim == 1 else X_
-    def _shift_spectrum(self, x) -> np.ndarray:
-        shift_amount = self._rng.integers(-self.shift, self.shift, endpoint=True)
-        return np.roll(x, shift_amount)
-    def _shift_vector(
-        self,
-        x: np.ndarray,
-    ) -> np.ndarray:
-        """
-        Shift vector with option to fill missing values.
-        Args:
-            arr: Input numpy array
-            shift: Number of positions to shift
-            fill_method: Method to fill missing values
-                'constant': fill with first/last value
-                'linear': fill using linear regression
-                'quadratic': fill using quadratic regression
-        Returns:
-            Shifted numpy array
-        """
-        shift = self._rng.integers(-self.shift, self.shift, endpoint=True)
-        result = np.roll(x, shift)
-        if self.fill_method == "constant":
-            if shift > 0:
-                result[:shift] = x[0]
-            elif shift < 0:
-                result[shift:] = x[-1]
-        elif self.fill_method == "linear":
-            if shift > 0:
-                x_ = np.arange(5)
-                coeffs = poly.polyfit(x_, x[:5], 1)
-                extrapolate_x = np.arange(-shift, 0)
-                extrapolated_values = poly.polyval(extrapolate_x, coeffs)
-                result[:shift] = extrapolated_values
-            elif shift < 0:
-                x_ = np.arange(5)
-                coeffs = poly.polyfit(x_, x[-5:], 1)
-                extrapolate_x = np.arange(len(x_), len(x_) - shift)
-                extrapolated_values = poly.polyval(extrapolate_x, coeffs)
-                result[shift:] = extrapolated_values
-        elif self.fill_method == "quadratic":
-            if shift > 0:
-                # Use first 3 values for quadratic regression
-                x_ = np.arange(5)
-                coeffs = poly.polyfit(x_, x[:5], 2)
-                # Extrapolate to fill shifted region
-                extrapolate_x = np.arange(-shift, 0)
-                extrapolated_values = poly.polyval(extrapolate_x, coeffs)
-                result[:shift] = extrapolated_values
-            elif shift < 0:
-                # Use last 3 values for quadratic regression
-                x_ = np.arange(5)
-                coeffs = poly.polyfit(x_, x[-5:], 2)
-                # Extrapolate to fill shifted region
-                extrapolate_x = np.arange(len(x_), len(x_) - shift)
-                extrapolated_values = poly.polyval(extrapolate_x, coeffs)
-                result[shift:] = extrapolated_values
-        return result

chemotools-0.1.7/chemotools/augmentation/normal_noise.py DELETED Viewed

@@ -1,118 +0,0 @@
-from typing import Optional
-import numpy as np
-from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
-from sklearn.utils.validation import check_is_fitted, validate_data
-class NormalNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
-    """
-    Add normal noise to the input data.
-    Parameters
-    ----------
-    scale : float, default=0.0
-        The scale of the noise to add to the input data.
-    random_state : int, default=None
-        The random state to use for the random number generator.
-    Attributes
-    ----------
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
-    Methods
-    -------
-    fit(X, y=None)
-        Fit the transformer to the input data.
-    transform(X, y=0, copy=True)
-        Transform the input data by adding random noise.
-    """
-    def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
-        self.scale = scale
-        self.random_state = random_state
-    def fit(self, X: np.ndarray, y=None) -> "NormalNoise":
-        """
-        Fit the transformer to the input data.
-        Parameters
-        ----------
-        X : np.ndarray of shape (n_samples, n_features)
-            The input data to fit the transformer to.
-        y : None
-            Ignored.
-        Returns
-        -------
-        self : NormalNoise
-            The fitted transformer.
-        """
-        # Check that X is a 2D array and has only finite values
-        X = validate_data(
-            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
-        )
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
-        # Instantiate the random number generator
-        self._rng = np.random.default_rng(self.random_state)
-        return self
-    def transform(self, X: np.ndarray, y=None) -> np.ndarray:
-        """
-        Transform the input data by adding random normal noise.
-        Parameters
-        ----------
-        X : np.ndarray of shape (n_samples, n_features)
-            The input data to transform.
-        y : None
-            Ignored.
-        Returns
-        -------
-        X_ : np.ndarray of shape (n_samples, n_features)
-            The transformed data.
-        """
-        # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
-        # Check that X is a 2D array and has only finite values
-        X_ = validate_data(
-            self,
-            X,
-            y="no_validation",
-            ensure_2d=True,
-            copy=True,
-            reset=False,
-            dtype=np.float64,
-        )
-        # Check that the number of features is the same as the fitted data
-        if X_.shape[1] != self.n_features_in_:
-            raise ValueError(
-                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
-            )
-        # Calculate the standard normal variate
-        for i, x in enumerate(X_):
-            X_[i] = self._add_random_noise(x)
-        return X_.reshape(-1, 1) if X_.ndim == 1 else X_
-    def _add_random_noise(self, x) -> np.ndarray:
-        return x + self._rng.normal(0, self.scale, size=x.shape)

chemotools-0.1.7/chemotools/augmentation/uniform_noise.py DELETED Viewed

@@ -1,124 +0,0 @@
-from typing import Optional
-import numpy as np
-from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
-from sklearn.utils.validation import check_is_fitted, validate_data
-class UniformNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
-    """
-    Add uniform noise to the input data.
-    Parameters
-    ----------
-    min : float, default=0.0
-        The lower bound of the uniform distribution.
-    max : float, default=0.0
-        The upper bound of the uniform distribution.
-    random_state : int, default=None
-        The random state to use for the random number generator.
-    Attributes
-    ----------
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
-    Methods
-    -------
-    fit(X, y=None)
-        Fit the transformer to the input data.
-    transform(X, y=0, copy=True)
-        Transform the input data by adding random noise.
-    """
-    def __init__(
-        self, min: float = 0.0, max: float = 0.0, random_state: Optional[int] = None
-    ):
-        self.min = min
-        self.max = max
-        self.random_state = random_state
-    def fit(self, X: np.ndarray, y=None) -> "UniformNoise":
-        """
-        Fit the transformer to the input data.
-        Parameters
-        ----------
-        X : np.ndarray of shape (n_samples, n_features)
-            The input data to fit the transformer to.
-        y : None
-            Ignored.
-        Returns
-        -------
-        self : UniformNoise
-            The fitted transformer.
-        """
-        # Check that X is a 2D array and has only finite values
-        X = validate_data(
-            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
-        )
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
-        # Instantiate the random number generator
-        self._rng = np.random.default_rng(self.random_state)
-        return self
-    def transform(self, X: np.ndarray, y=None) -> np.ndarray:
-        """
-        Transform the input data by adding random uniform noise.
-        Parameters
-        ----------
-        X : np.ndarray of shape (n_samples, n_features)
-            The input data to transform.
-        y : None
-            Ignored.
-        Returns
-        -------
-        X_ : np.ndarray of shape (n_samples, n_features)
-            The transformed data.
-        """
-        # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
-        # Check that X is a 2D array and has only finite values
-        X_ = validate_data(
-            self,
-            X,
-            y="no_validation",
-            ensure_2d=True,
-            copy=True,
-            reset=False,
-            dtype=np.float64,
-        )
-        # Check that the number of features is the same as the fitted data
-        if X_.shape[1] != self.n_features_in_:
-            raise ValueError(
-                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
-            )
-        # Calculate the standard uniform variate
-        for i, x in enumerate(X_):
-            X_[i] = self._add_random_noise(x)
-        return X_.reshape(-1, 1) if X_.ndim == 1 else X_
-    def _add_random_noise(self, x) -> np.ndarray:
-        return x + self._rng.uniform(self.min, self.max, size=x.shape)