PyPI - chemotools - Versions diffs - 0.1.6__tar.gz → 0.1.7__tar.gz - Mend

chemotools 0.1.6tar.gz → 0.1.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

{chemotools-0.1.6 → chemotools-0.1.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: chemotools
-Version: 0.1.6
+Version: 0.1.7
 Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
 License: MIT
 Author: Pau Cabaneros

{chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/index_shift.py RENAMED Viewed

@@ -1,6 +1,7 @@
-from typing import Optional
+from typing import Literal, Optional
 import numpy as np
+from numpy.polynomial import polynomial as poly
 from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
 from sklearn.utils.validation import check_is_fitted, validate_data
@@ -35,8 +36,14 @@ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
         Transform the input data by shifting the spectrum.
     """
-    def __init__(self, shift: int = 0, random_state: Optional[int] = None):
+    def __init__(
+        self,
+        shift: int = 0,
+        fill_method: Literal["constant", "linear", "quadratic"] = "constant",
+        random_state: Optional[int] = None,
+    ):
         self.shift = shift
+        self.fill_method = fill_method
         self.random_state = random_state
     def fit(self, X: np.ndarray, y=None) -> "IndexShift":
@@ -111,10 +118,82 @@ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
         # Calculate the standard normal variate
         for i, x in enumerate(X_):
-            X_[i] = self._shift_spectrum(x)
+            X_[i] = self._shift_vector(x)
         return X_.reshape(-1, 1) if X_.ndim == 1 else X_
     def _shift_spectrum(self, x) -> np.ndarray:
         shift_amount = self._rng.integers(-self.shift, self.shift, endpoint=True)
         return np.roll(x, shift_amount)
+    def _shift_vector(
+        self,
+        x: np.ndarray,
+    ) -> np.ndarray:
+        """
+        Shift vector with option to fill missing values.
+        Args:
+            arr: Input numpy array
+            shift: Number of positions to shift
+            fill_method: Method to fill missing values
+                'constant': fill with first/last value
+                'linear': fill using linear regression
+                'quadratic': fill using quadratic regression
+        Returns:
+            Shifted numpy array
+        """
+        shift = self._rng.integers(-self.shift, self.shift, endpoint=True)
+        result = np.roll(x, shift)
+        if self.fill_method == "constant":
+            if shift > 0:
+                result[:shift] = x[0]
+            elif shift < 0:
+                result[shift:] = x[-1]
+        elif self.fill_method == "linear":
+            if shift > 0:
+                x_ = np.arange(5)
+                coeffs = poly.polyfit(x_, x[:5], 1)
+                extrapolate_x = np.arange(-shift, 0)
+                extrapolated_values = poly.polyval(extrapolate_x, coeffs)
+                result[:shift] = extrapolated_values
+            elif shift < 0:
+                x_ = np.arange(5)
+                coeffs = poly.polyfit(x_, x[-5:], 1)
+                extrapolate_x = np.arange(len(x_), len(x_) - shift)
+                extrapolated_values = poly.polyval(extrapolate_x, coeffs)
+                result[shift:] = extrapolated_values
+        elif self.fill_method == "quadratic":
+            if shift > 0:
+                # Use first 3 values for quadratic regression
+                x_ = np.arange(5)
+                coeffs = poly.polyfit(x_, x[:5], 2)
+                # Extrapolate to fill shifted region
+                extrapolate_x = np.arange(-shift, 0)
+                extrapolated_values = poly.polyval(extrapolate_x, coeffs)
+                result[:shift] = extrapolated_values
+            elif shift < 0:
+                # Use last 3 values for quadratic regression
+                x_ = np.arange(5)
+                coeffs = poly.polyfit(x_, x[-5:], 2)
+                # Extrapolate to fill shifted region
+                extrapolate_x = np.arange(len(x_), len(x_) - shift)
+                extrapolated_values = poly.polyval(extrapolate_x, coeffs)
+                result[shift:] = extrapolated_values
+        return result

{chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scatter/_extended_multiplicative_scatter_correction.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Literal, Optional
 import numpy as np
 from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
@@ -46,18 +46,20 @@ class ExtendedMultiplicativeScatterCorrection(
     model-based pre-processing, doi:10.1016/j.chemolab.2021.104350
     """
+    ALLOWED_METHODS = ["mean", "median"]
+    # TODO: Check method is valid in instantiation. Right now it is check on fit because it breaks the scikitlearn check_estimator()
     def __init__(
         self,
-        reference: Optional[np.ndarray] = None,
-        use_mean: bool = True,
-        use_median: bool = False,
+        method: Literal["mean", "median"] = "mean",
         order: int = 2,
+        reference: Optional[np.ndarray] = None,
         weights: Optional[np.ndarray] = None,
     ):
-        self.reference = reference
-        self.use_mean = use_mean
-        self.use_median = use_median
+        self.method = method
         self.order = order
+        self.reference = reference
         self.weights = weights
     def fit(self, X: np.ndarray, y=None) -> "ExtendedMultiplicativeScatterCorrection":
@@ -104,21 +106,24 @@ class ExtendedMultiplicativeScatterCorrection(
             self.weights_ = np.array(self.weights)
             return self
-        if self.use_median:
-            self.reference_ = np.median(X, axis=0)
+        if self.method == "mean":
+            self.reference_ = X.mean(axis=0)
             self.indices_ = self._calculate_indices(X[0])
             self.A_ = self._calculate_A(self.indices_, self.reference_)
             self.weights_ = np.array(self.weights)
             return self
-        if self.use_mean:
-            self.reference_ = X.mean(axis=0)
+        elif self.method == "median":
+            self.reference_ = np.median(X, axis=0)
             self.indices_ = self._calculate_indices(X[0])
             self.A_ = self._calculate_A(self.indices_, self.reference_)
             self.weights_ = np.array(self.weights)
             return self
-        raise ValueError("No reference was provided")
+        else:
+            raise ValueError(
+                f"Invalid method: {self.method}. Must be one of {self.ALLOWED_METHODS}"
+            )
     def transform(self, X: np.ndarray, y=None) -> np.ndarray:
         """

{chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scatter/_multiplicative_scatter_correction.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Literal, Optional
 import numpy as np
 from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
@@ -37,16 +37,18 @@ class MultiplicativeScatterCorrection(
     """
+    ALLOWED_METHODS = ["mean", "median"]
+    # TODO: Check method is valid in instantiation. Right now it is check on fit because it breaks the scikitlearn check_estimator()
     def __init__(
         self,
+        method: Literal["mean", "median"] = "mean",
         reference: Optional[np.ndarray] = None,
-        use_mean: bool = True,
-        use_median: bool = False,
         weights: Optional[np.ndarray] = None,
     ):
+        self.method = method
         self.reference = reference
-        self.use_mean = use_mean
-        self.use_median = use_median
         self.weights = weights
     def fit(self, X: np.ndarray, y=None) -> "MultiplicativeScatterCorrection":
@@ -91,18 +93,23 @@ class MultiplicativeScatterCorrection(
             self.weights_ = np.array(self.weights)
             return self
-        if self.use_median:
-            self.reference_ = np.median(X, axis=0)
+        if self.method == "mean":
+            self.reference_ = X.mean(axis=0)
             self.A_ = self._calculate_A(self.reference_)
             self.weights_ = np.array(self.weights)
             return self
-        if self.use_mean:
-            self.reference_ = X.mean(axis=0)
+        elif self.method == "median":
+            self.reference_ = np.median(X, axis=0)
             self.A_ = self._calculate_A(self.reference_)
             self.weights_ = np.array(self.weights)
             return self
+        else:
+            raise ValueError(
+                f"Invalid method: {self.method}. Must be one of {self.ALLOWED_METHODS}"
+            )
         raise ValueError("No reference was provided")
     def transform(self, X: np.ndarray, y=None) -> np.ndarray:

{chemotools-0.1.6 → chemotools-0.1.7}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "chemotools"
-version = "0.1.6"
+version = "0.1.7"
 description = "chemotools: A Python Package that Integrates Chemometrics and scikit-learn"
 authors = ["Pau Cabaneros"]
 license = "MIT License"