PyPI - chemotools - Versions diffs - 0.1.5__tar.gz → 0.1.7__tar.gz - Mend

chemotools 0.1.5tar.gz → 0.1.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

{chemotools-0.1.5 → chemotools-0.1.7}/PKG-INFO RENAMED Viewed

@@ -1,23 +1,22 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.3
 Name: chemotools
-Version: 0.1.5
-Summary: Package to integrate chemometrics in scikit-learn pipelines
-Home-page: https://github.com/paucablop/chemotools
-Author: Pau Cabaneros Lopez
-Author-email: pau.cabaneros@gmail.com
-Project-URL: Bug Tracker, https://github.com/paucablop/chemotools/issues/
-Classifier: Programming Language :: Python :: 3
+Version: 0.1.7
+Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
+License: MIT
+Author: Pau Cabaneros
+Requires-Python: >=3.10,<4.0
 Classifier: License :: OSI Approved :: MIT License
-Classifier: Operating System :: OS Independent
-Requires-Python: >=3.9
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Requires-Dist: numpy (>=2.0.0,<3.0.0)
+Requires-Dist: pandas (>=2.0.0,<3.0.0)
+Requires-Dist: polars (>=1.17.0,<2.0.0)
+Requires-Dist: pyarrow (>=18.0.0,<19.0.0)
+Requires-Dist: scikit-learn (>=1.4.0,<2.0.0)
 Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: numpy
-Requires-Dist: pandas
-Requires-Dist: polars
-Requires-Dist: pyarrow
-Requires-Dist: scipy
-Requires-Dist: scikit-learn>=1.4.0
 ![chemotools](assets/images/logo_pixel.png)
@@ -27,6 +26,8 @@ Requires-Dist: scikit-learn>=1.4.0
 [![pypi](https://img.shields.io/pypi/l/chemotools)](https://github.com/paucablop/chemotools/blob/main/LICENSE)
 [![codecov](https://codecov.io/github/paucablop/chemotools/branch/main/graph/badge.svg?token=D7JUJM89LN)](https://codecov.io/github/paucablop/chemotools)
 [![Downloads](https://static.pepy.tech/badge/chemotools)](https://pepy.tech/project/chemotools)
+[![DOI](https://joss.theoj.org/papers/10.21105/joss.06802/status.svg)](https://doi.org/10.21105/joss.06802)
 # __chemotools__

{chemotools-0.1.5 → chemotools-0.1.7}/README.md RENAMED Viewed

@@ -6,6 +6,8 @@
 [![pypi](https://img.shields.io/pypi/l/chemotools)](https://github.com/paucablop/chemotools/blob/main/LICENSE)
 [![codecov](https://codecov.io/github/paucablop/chemotools/branch/main/graph/badge.svg?token=D7JUJM89LN)](https://codecov.io/github/paucablop/chemotools)
 [![Downloads](https://static.pepy.tech/badge/chemotools)](https://pepy.tech/project/chemotools)
+[![DOI](https://joss.theoj.org/papers/10.21105/joss.06802/status.svg)](https://doi.org/10.21105/joss.06802)
 # __chemotools__

{chemotools-0.1.5 → chemotools-0.1.7}/chemotools/augmentation/__init__.py RENAMED Viewed

@@ -4,3 +4,13 @@ from .normal_noise import NormalNoise
 from .index_shift import IndexShift
 from .spectrum_scale import SpectrumScale
 from .uniform_noise import UniformNoise
+__all__ = [
+    "BaselineShift",
+    "ExponentialNoise",
+    "NormalNoise",
+    "IndexShift",
+    "SpectrumScale",
+    "UniformNoise",
+]

{chemotools-0.1.5 → chemotools-0.1.7}/chemotools/augmentation/baseline_shift.py RENAMED Viewed

@@ -1,11 +1,11 @@
+from typing import Optional
 import numpy as np
 from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
-from sklearn.utils.validation import check_is_fitted
-from chemotools.utils.check_inputs import check_input
+from sklearn.utils.validation import check_is_fitted, validate_data
-class BaselineShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
+class BaselineShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
     """
     Adds a constant baseline to the data. The baseline is drawn from a one-sided
     uniform distribution between 0 and 0 + scale.
@@ -17,7 +17,7 @@ class BaselineShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     random_state : int, default=None
         The random state to use for the random number generator.
     Attributes
     ----------
     n_features_in_ : int
@@ -25,7 +25,7 @@ class BaselineShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     _is_fitted : bool
         Whether the transformer has been fitted to data.
     Methods
     -------
     fit(X, y=None)
@@ -35,15 +35,14 @@ class BaselineShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         Transform the input data by adding a baseline the spectrum.
     """
-    def __init__(self, scale: int = 0.0, random_state: int = None):
+    def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
         self.scale = scale
         self.random_state = random_state
     def fit(self, X: np.ndarray, y=None) -> "BaselineShift":
         """
         Fit the transformer to the input data.
         Parameters
         ----------
         X : np.ndarray of shape (n_samples, n_features)
@@ -58,8 +57,9 @@ class BaselineShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
+        X = validate_data(
+            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
+        )
         # Set the number of features
         self.n_features_in_ = X.shape[1]
@@ -92,12 +92,21 @@ class BaselineShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         check_is_fitted(self, "_is_fitted")
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        X_ = X.copy()
+        X_ = validate_data(
+            self,
+            X,
+            y="no_validation",
+            ensure_2d=True,
+            copy=True,
+            reset=False,
+            dtype=np.float64,
+        )
         # Check that the number of features is the same as the fitted data
         if X_.shape[1] != self.n_features_in_:
-            raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
+            raise ValueError(
+                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
+            )
         # Calculate the scaled spectrum
         for i, x in enumerate(X_):
@@ -108,4 +117,3 @@ class BaselineShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     def _add_baseline(self, x) -> np.ndarray:
         adding_factor = self._rng.uniform(low=0, high=self.scale)
         return np.add(x, adding_factor)

{chemotools-0.1.5 → chemotools-0.1.7}/chemotools/augmentation/exponential_noise.py RENAMED Viewed

@@ -1,11 +1,11 @@
+from typing import Optional
 import numpy as np
 from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
-from sklearn.utils.validation import check_is_fitted
-from chemotools.utils.check_inputs import check_input
+from sklearn.utils.validation import check_is_fitted, validate_data
-class ExponentialNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
+class ExponentialNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
     """
     Add exponential noise to the input data.
@@ -16,7 +16,7 @@ class ExponentialNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     random_state : int, default=None
         The random state to use for the random number generator.
     Attributes
     ----------
     n_features_in_ : int
@@ -24,7 +24,7 @@ class ExponentialNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     _is_fitted : bool
         Whether the transformer has been fitted to data.
     Methods
     -------
     fit(X, y=None)
@@ -34,15 +34,14 @@ class ExponentialNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         Transform the input data by adding random noise.
     """
-    def __init__(self, scale: float = 0.0, random_state: int = None):
+    def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
         self.scale = scale
         self.random_state = random_state
     def fit(self, X: np.ndarray, y=None) -> "ExponentialNoise":
         """
         Fit the transformer to the input data.
         Parameters
         ----------
         X : np.ndarray of shape (n_samples, n_features)
@@ -57,8 +56,9 @@ class ExponentialNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
+        X = validate_data(
+            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
+        )
         # Set the number of features
         self.n_features_in_ = X.shape[1]
@@ -91,12 +91,21 @@ class ExponentialNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         check_is_fitted(self, "_is_fitted")
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        X_ = X.copy()
+        X_ = validate_data(
+            self,
+            X,
+            y="no_validation",
+            ensure_2d=True,
+            copy=True,
+            reset=False,
+            dtype=np.float64,
+        )
         # Check that the number of features is the same as the fitted data
         if X_.shape[1] != self.n_features_in_:
-            raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
+            raise ValueError(
+                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
+            )
         # Calculate the standard exponential variate
         for i, x in enumerate(X_):
@@ -105,4 +114,4 @@ class ExponentialNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         return X_.reshape(-1, 1) if X_.ndim == 1 else X_
     def _add_random_noise(self, x) -> np.ndarray:
-        return x + self._rng.exponential(self.scale, size=x.shape)
+        return x + self._rng.exponential(self.scale, size=x.shape)

chemotools-0.1.7/chemotools/augmentation/index_shift.py ADDED Viewed

@@ -0,0 +1,199 @@
+from typing import Literal, Optional
+import numpy as np
+from numpy.polynomial import polynomial as poly
+from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
+from sklearn.utils.validation import check_is_fitted, validate_data
+class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
+    """
+    Shift the spectrum a given number of indices between - shift and + shift drawn
+    from a discrete uniform distribution.
+    Parameters
+    ----------
+    shift : float, default=0.0
+        Shifts the data by a random integer between -shift and shift.
+    random_state : int, default=None
+        The random state to use for the random number generator.
+    Attributes
+    ----------
+    n_features_in_ : int
+        The number of features in the input data.
+    _is_fitted : bool
+        Whether the transformer has been fitted to data.
+    Methods
+    -------
+    fit(X, y=None)
+        Fit the transformer to the input data.
+    transform(X, y=0, copy=True)
+        Transform the input data by shifting the spectrum.
+    """
+    def __init__(
+        self,
+        shift: int = 0,
+        fill_method: Literal["constant", "linear", "quadratic"] = "constant",
+        random_state: Optional[int] = None,
+    ):
+        self.shift = shift
+        self.fill_method = fill_method
+        self.random_state = random_state
+    def fit(self, X: np.ndarray, y=None) -> "IndexShift":
+        """
+        Fit the transformer to the input data.
+        Parameters
+        ----------
+        X : np.ndarray of shape (n_samples, n_features)
+            The input data to fit the transformer to.
+        y : None
+            Ignored.
+        Returns
+        -------
+        self : IndexShift
+            The fitted transformer.
+        """
+        # Check that X is a 2D array and has only finite values
+        X = validate_data(
+            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
+        )
+        # Set the number of features
+        self.n_features_in_ = X.shape[1]
+        # Set the fitted attribute to True
+        self._is_fitted = True
+        # Instantiate the random number generator
+        self._rng = np.random.default_rng(self.random_state)
+        return self
+    def transform(self, X: np.ndarray, y=None) -> np.ndarray:
+        """
+        Transform the input data by shifting the spectrum.
+        Parameters
+        ----------
+        X : np.ndarray of shape (n_samples, n_features)
+            The input data to transform.
+        y : None
+            Ignored.
+        Returns
+        -------
+        X_ : np.ndarray of shape (n_samples, n_features)
+            The transformed data.
+        """
+        # Check that the estimator is fitted
+        check_is_fitted(self, "_is_fitted")
+        # Check that X is a 2D array and has only finite values
+        X_ = validate_data(
+            self,
+            X,
+            y="no_validation",
+            ensure_2d=True,
+            copy=True,
+            reset=False,
+            dtype=np.float64,
+        )
+        # Check that the number of features is the same as the fitted data
+        if X_.shape[1] != self.n_features_in_:
+            raise ValueError(
+                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
+            )
+        # Calculate the standard normal variate
+        for i, x in enumerate(X_):
+            X_[i] = self._shift_vector(x)
+        return X_.reshape(-1, 1) if X_.ndim == 1 else X_
+    def _shift_spectrum(self, x) -> np.ndarray:
+        shift_amount = self._rng.integers(-self.shift, self.shift, endpoint=True)
+        return np.roll(x, shift_amount)
+    def _shift_vector(
+        self,
+        x: np.ndarray,
+    ) -> np.ndarray:
+        """
+        Shift vector with option to fill missing values.
+        Args:
+            arr: Input numpy array
+            shift: Number of positions to shift
+            fill_method: Method to fill missing values
+                'constant': fill with first/last value
+                'linear': fill using linear regression
+                'quadratic': fill using quadratic regression
+        Returns:
+            Shifted numpy array
+        """
+        shift = self._rng.integers(-self.shift, self.shift, endpoint=True)
+        result = np.roll(x, shift)
+        if self.fill_method == "constant":
+            if shift > 0:
+                result[:shift] = x[0]
+            elif shift < 0:
+                result[shift:] = x[-1]
+        elif self.fill_method == "linear":
+            if shift > 0:
+                x_ = np.arange(5)
+                coeffs = poly.polyfit(x_, x[:5], 1)
+                extrapolate_x = np.arange(-shift, 0)
+                extrapolated_values = poly.polyval(extrapolate_x, coeffs)
+                result[:shift] = extrapolated_values
+            elif shift < 0:
+                x_ = np.arange(5)
+                coeffs = poly.polyfit(x_, x[-5:], 1)
+                extrapolate_x = np.arange(len(x_), len(x_) - shift)
+                extrapolated_values = poly.polyval(extrapolate_x, coeffs)
+                result[shift:] = extrapolated_values
+        elif self.fill_method == "quadratic":
+            if shift > 0:
+                # Use first 3 values for quadratic regression
+                x_ = np.arange(5)
+                coeffs = poly.polyfit(x_, x[:5], 2)
+                # Extrapolate to fill shifted region
+                extrapolate_x = np.arange(-shift, 0)
+                extrapolated_values = poly.polyval(extrapolate_x, coeffs)
+                result[:shift] = extrapolated_values
+            elif shift < 0:
+                # Use last 3 values for quadratic regression
+                x_ = np.arange(5)
+                coeffs = poly.polyfit(x_, x[-5:], 2)
+                # Extrapolate to fill shifted region
+                extrapolate_x = np.arange(len(x_), len(x_) - shift)
+                extrapolated_values = poly.polyval(extrapolate_x, coeffs)
+                result[shift:] = extrapolated_values
+        return result

{chemotools-0.1.5 → chemotools-0.1.7}/chemotools/augmentation/normal_noise.py RENAMED Viewed

@@ -1,11 +1,11 @@
+from typing import Optional
 import numpy as np
 from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
-from sklearn.utils.validation import check_is_fitted
-from chemotools.utils.check_inputs import check_input
+from sklearn.utils.validation import check_is_fitted, validate_data
-class NormalNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
+class NormalNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
     """
     Add normal noise to the input data.
@@ -16,7 +16,7 @@ class NormalNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     random_state : int, default=None
         The random state to use for the random number generator.
     Attributes
     ----------
     n_features_in_ : int
@@ -24,7 +24,7 @@ class NormalNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     _is_fitted : bool
         Whether the transformer has been fitted to data.
     Methods
     -------
     fit(X, y=None)
@@ -34,15 +34,14 @@ class NormalNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         Transform the input data by adding random noise.
     """
-    def __init__(self, scale: float = 0.0, random_state: int = None):
+    def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
         self.scale = scale
         self.random_state = random_state
     def fit(self, X: np.ndarray, y=None) -> "NormalNoise":
         """
         Fit the transformer to the input data.
         Parameters
         ----------
         X : np.ndarray of shape (n_samples, n_features)
@@ -57,7 +56,9 @@ class NormalNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
+        X = validate_data(
+            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
+        )
         # Set the number of features
         self.n_features_in_ = X.shape[1]
@@ -91,12 +92,21 @@ class NormalNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         check_is_fitted(self, "_is_fitted")
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        X_ = X.copy()
+        X_ = validate_data(
+            self,
+            X,
+            y="no_validation",
+            ensure_2d=True,
+            copy=True,
+            reset=False,
+            dtype=np.float64,
+        )
         # Check that the number of features is the same as the fitted data
         if X_.shape[1] != self.n_features_in_:
-            raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
+            raise ValueError(
+                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
+            )
         # Calculate the standard normal variate
         for i, x in enumerate(X_):
@@ -105,4 +115,4 @@ class NormalNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         return X_.reshape(-1, 1) if X_.ndim == 1 else X_
     def _add_random_noise(self, x) -> np.ndarray:
-        return x + self._rng.normal(0, self.scale, size=x.shape)
+        return x + self._rng.normal(0, self.scale, size=x.shape)

{chemotools-0.1.5 → chemotools-0.1.7}/chemotools/augmentation/spectrum_scale.py RENAMED Viewed

@@ -1,11 +1,11 @@
+from typing import Optional
 import numpy as np
 from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
-from sklearn.utils.validation import check_is_fitted
-from chemotools.utils.check_inputs import check_input
+from sklearn.utils.validation import check_is_fitted, validate_data
-class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
+class SpectrumScale(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
     """
     Scales the data by a value drawn from the uniform distribution centered
     around 1.0.
@@ -17,7 +17,7 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     random_state : int, default=None
         The random state to use for the random number generator.
     Attributes
     ----------
     n_features_in_ : int
@@ -25,7 +25,7 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     _is_fitted : bool
         Whether the transformer has been fitted to data.
     Methods
     -------
     fit(X, y=None)
@@ -35,15 +35,14 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         Transform the input data by scaling the spectrum.
     """
-    def __init__(self, scale: int = 0.0, random_state: int = None):
+    def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
         self.scale = scale
         self.random_state = random_state
     def fit(self, X: np.ndarray, y=None) -> "SpectrumScale":
         """
         Fit the transformer to the input data.
         Parameters
         ----------
         X : np.ndarray of shape (n_samples, n_features)
@@ -58,7 +57,9 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
+        X = validate_data(
+            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
+        )
         # Set the number of features
         self.n_features_in_ = X.shape[1]
@@ -92,12 +93,21 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         check_is_fitted(self, "_is_fitted")
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        X_ = X.copy()
+        X_ = validate_data(
+            self,
+            X,
+            y="no_validation",
+            ensure_2d=True,
+            copy=True,
+            reset=False,
+            dtype=np.float64,
+        )
         # Check that the number of features is the same as the fitted data
         if X_.shape[1] != self.n_features_in_:
-            raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
+            raise ValueError(
+                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
+            )
         # Calculate the scaled spectrum
         for i, x in enumerate(X_):
@@ -106,6 +116,5 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         return X_.reshape(-1, 1) if X_.ndim == 1 else X_
     def _scale_spectrum(self, x) -> np.ndarray:
-        scaling_factor = self._rng.uniform(low=1-self.scale, high=1+self.scale)
+        scaling_factor = self._rng.uniform(low=1 - self.scale, high=1 + self.scale)
         return np.multiply(x, scaling_factor)

chemotools 0.1.5__tar.gz → 0.1.7__tar.gz

chemotools 0.1.5tar.gz → 0.1.7tar.gz