PyPI - chemotools - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

chemotools 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

chemotools/{variable_selection/select_features.py → feature_selection/_index_selector.py} RENAMED Viewed

@@ -1,11 +1,13 @@
 import numpy as np
-from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
+from sklearn.base import BaseEstimator
+from sklearn.feature_selection._base import SelectorMixin
 from sklearn.utils.validation import check_is_fitted
 from chemotools.utils.check_inputs import check_input
-class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
+class IndexSelector(BaseEstimator, SelectorMixin):
     """
     A transformer that Selects the spectral data to a specified array of features. This
     array can be continuous or discontinuous. The array of features is specified by:
@@ -29,12 +31,6 @@ class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     features_index_ : int
         The index of the features to select.
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
     Methods
     -------
     fit(X, y=None)
@@ -52,7 +48,7 @@ class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         self.features = features
         self.wavenumbers = wavenumbers
-    def fit(self, X: np.ndarray, y=None) -> "SelectFeatures":
+    def fit(self, X: np.ndarray, y=None) -> "IndexSelector":
         """
         Fit the transformer to the input data.
@@ -66,14 +62,11 @@ class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         Returns
         -------
-        self : SelectFeatures
+        self : IndexSelector
             The fitted transformer.
         """
-        # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
+        # validate that X is a 2D array and has only finite values
+        X = self._validate_data(X)
         # Set the fitted attribute to True
         self._is_fitted = True
@@ -91,41 +84,23 @@ class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         return self
-    def transform(self, X: np.ndarray, y=None) -> np.ndarray:
+    def _get_support_mask(self):
         """
-        Transform the input data by cutting it to the specified range.
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_features)
-            The input data to transform.
-        y : None
-            Ignored.
+        Get the boolean mask indicating which features are selected.
         Returns
         -------
-        X_ : np.ndarray of shape (n_samples, n_features)
-            The transformed data.
+        mask : ndarray of shape (n_features_in_,)
+            The mask indicating the selected features.
         """
         # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
+        check_is_fitted(self)
-        # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        X_ = X.copy()
-        # Check that the number of features is the same as the fitted data
-        if X_.shape[1] != self.n_features_in_:
-            raise ValueError(
-                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
-            )
-        # Select the features
-        if self.features is None:
-            return X_
+        # Create the mask
+        mask = np.zeros(self.n_features_in_, dtype=bool)
+        mask[self.features_index_] = True
-        return X_[:, self.features_index_]
+        return mask
     def _find_index(self, target: float) -> int:
         if self.wavenumbers is None:

chemotools/{variable_selection/range_cut.py → feature_selection/_range_cut.py} RENAMED Viewed

@@ -1,13 +1,12 @@
 import numpy as np
-from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
+from sklearn.base import BaseEstimator
+from sklearn.feature_selection._base import SelectorMixin
 from sklearn.utils.validation import check_is_fitted
-from chemotools.utils.check_inputs import check_input
-class RangeCut(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
+class RangeCut(BaseEstimator, SelectorMixin):
     """
-    A transformer that cuts the input data to a specified range. The range is specified:
+    A selector that cuts the input data to a specified range. The range is specified:
     - by the indices of the start and end of the range,
     - by the wavenumbers of the start and end of the range. In this case, the wavenumbers
         must be provided to the transformer when it is initialised. If the wavenumbers
@@ -35,19 +34,11 @@ class RangeCut(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     end_index_ : int
         The index of the end of the range. It is -1 if the wavenumbers are not provided.
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
     Methods
     -------
     fit(X, y=None)
         Fit the transformer to the input data.
-    transform(X, y=0, copy=True)
-        Transform the input data by cutting it to the specified range.
     """
     def __init__(
@@ -78,13 +69,7 @@ class RangeCut(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
+        X = self._validate_data(X)
         # Set the start and end indices
         if self.wavenumbers is None:
@@ -95,39 +80,25 @@ class RangeCut(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             self.end_index_ = self._find_index(self.end)
         return self
-    def transform(self, X: np.ndarray, y=None) -> np.ndarray:
+    def _get_support_mask(self):
         """
-        Transform the input data by cutting it to the specified range.
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_features)
-            The input data to transform.
-        y : None
-            Ignored.
+        Get the boolean mask indicating which features are selected.
         Returns
         -------
-        X_ : np.ndarray of shape (n_samples, n_features)
-            The transformed data.
+        mask : np.ndarray of shape (n_features,)
+            The boolean mask indicating which features are selected.
         """
         # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
-        # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        X_ = X.copy()
+        check_is_fitted(self, ["start_index_", "end_index_"])
-        # Check that the number of features is the same as the fitted data
-        if X_.shape[1] != self.n_features_in_:
-            raise ValueError(
-                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
-            )
+        # Create the mask
+        mask = np.zeros(self.n_features_in_, dtype=bool)
+        mask[self.start_index_ : self.end_index_] = True
-        # Range cut the spectra
-        return X_[:, self.start_index_ : self.end_index_]
+        return mask
     def _find_index(self, target: float) -> int:
         wavenumbers = np.array(self.wavenumbers)

chemotools/scale/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from .min_max_scaler import MinMaxScaler
-from .norm_scaler import NormScaler
-from .point_scaler import PointScaler
+from ._min_max_scaler import MinMaxScaler
+from ._norm_scaler import NormScaler
+from ._point_scaler import PointScaler

chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} RENAMED Viewed

@@ -8,23 +8,15 @@ from chemotools.utils.check_inputs import check_input
 class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     """
     A transformer that scales the input data by subtracting the minimum and dividing by
-    the difference between the maximum and the minimum. When the use_min parameter is False,
+    the difference between the maximum and the minimum. When the use_min parameter is False,
     the data is scaled by the maximum.
     Parameters
     ----------
     use_min : bool, default=True
-        The normalization to use. If True, the data is subtracted by the minimum and
+        The normalization to use. If True, the data is subtracted by the minimum and
         scaled by the maximum. If False, the data is scaled by the maximum.
-    Attributes
-    ----------
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
     Methods
     -------
     fit(X, y=None)
@@ -55,13 +47,7 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
+        X = self._validate_data(X)
         return self
@@ -83,7 +69,7 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The transformed data.
         """
         # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
+        check_is_fitted(self, "n_features_in_")
         # Check that X is a 2D array and has only finite values
         X = check_input(X)
@@ -97,8 +83,9 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         # Normalize the data by the maximum value
         if self.use_min:
-            X_ = (X_ - np.min(X_, axis=1, keepdims=True)) / (np.max(
-                X_, axis=1, keepdims=True) - np.min(X_, axis=1, keepdims=True))
+            X_ = (X_ - np.min(X_, axis=1, keepdims=True)) / (
+                np.max(X_, axis=1, keepdims=True) - np.min(X_, axis=1, keepdims=True)
+            )
         else:
             X_ = X_ / np.max(X_, axis=1, keepdims=True)

chemotools/scale/{norm_scaler.py → _norm_scaler.py} RENAMED Viewed

@@ -12,15 +12,7 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     Parameters
     ----------
     l_norm : int, optional
-        The L-norm to use. Default is 2.
-    Attributes
-    ----------
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
+        The L-norm to use. Default is 2.
     Methods
     -------
@@ -30,13 +22,14 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     transform(X, y=0, copy=True)
         Transform the input data by scaling by the L-norm.
     """
     def __init__(self, l_norm: int = 2):
         self.l_norm = l_norm
     def fit(self, X: np.ndarray, y=None) -> "NormScaler":
         """
         Fit the transformer to the input data.
         Parameters
         ----------
         X : np.ndarray of shape (n_samples, n_features)
@@ -51,13 +44,7 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
+        X = self._validate_data(X)
         return self
@@ -79,7 +66,7 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The transformed data.
         """
         # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
+        check_is_fitted(self, "n_features_in_")
         # Check that X is a 2D array and has only finite values
         X = check_input(X)

chemotools/scale/{point_scaler.py → _point_scaler.py} RENAMED Viewed

@@ -7,12 +7,12 @@ from chemotools.utils.check_inputs import check_input
 class PointScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     """
-    A transformer that scales the input data by the intensity value at a given point.
+    A transformer that scales the input data by the intensity value at a given point.
     The point can be specified by an index or by a wavenumber.
     Parameters
     ----------
-    point : int,
+    point : int,
         The point to scale the data by. It can be an index or a wavenumber.
     wavenumber : array-like, optional
@@ -25,12 +25,6 @@ class PointScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     point_index_ : int
         The index of the point to scale the data by. It is 0 if the wavenumbers are not provided.
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
     Methods
     -------
     fit(X, y=None)
@@ -39,11 +33,11 @@ class PointScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     transform(X, y=0, copy=True)
         Transform the input data by scaling by the value at a given Point.
     """
     def __init__(self, point: int = 0, wavenumbers: np.ndarray = None):
         self.point = point
         self.wavenumbers = wavenumbers
     def fit(self, X: np.ndarray, y=None) -> "PointScaler":
         """
         Fit the transformer to the input data.
@@ -62,13 +56,7 @@ class PointScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
+        X = self._validate_data(X)
         # Set the point index
         if self.wavenumbers is None:
@@ -76,7 +64,6 @@ class PointScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         else:
             self.point_index_ = self._find_index(self.point)
         return self
     def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -97,7 +84,7 @@ class PointScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The transformed data.
         """
         # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
+        check_is_fitted(self, "point_index_")
         # Check that X is a 2D array and has only finite values
         X = check_input(X)
@@ -105,14 +92,16 @@ class PointScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         # Check that the number of features is the same as the fitted data
         if X_.shape[1] != self.n_features_in_:
-            raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
+            raise ValueError(
+                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
+            )
         # Scale the data by Point
         for i, x in enumerate(X_):
             X_[i] = x / x[self.point_index_]
         return X_.reshape(-1, 1) if X_.ndim == 1 else X_
     def _find_index(self, target: float) -> int:
         wavenumbers = np.array(self.wavenumbers)
-        return np.argmin(np.abs(wavenumbers - target))
+        return np.argmin(np.abs(wavenumbers - target))

chemotools/scatter/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from .extended_multiplicative_scatter_correction import ExtendedMultiplicativeScatterCorrection
-from .multiplicative_scatter_correction import MultiplicativeScatterCorrection
-from .robust_normal_variate import RobustNormalVariate
-from .standard_normal_variate import StandardNormalVariate
+from ._extended_multiplicative_scatter_correction import ExtendedMultiplicativeScatterCorrection
+from ._multiplicative_scatter_correction import MultiplicativeScatterCorrection
+from ._robust_normal_variate import RobustNormalVariate
+from ._standard_normal_variate import StandardNormalVariate

chemotools/scatter/{extended_multiplicative_scatter_correction.py → _extended_multiplicative_scatter_correction.py} RENAMED Viewed

@@ -37,8 +37,6 @@ class ExtendedMultiplicativeScatterCorrection(
     ----------
     reference_ : np.ndarray
         The reference spectrum used for the correction.
-    n_features_in_ : int
-        The number of features in the training data.
     References
     ----------
@@ -82,13 +80,7 @@ class ExtendedMultiplicativeScatterCorrection(
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
+        X = self._validate_data(X)
         # Check that the length of the reference is the same as the number of features
         if self.reference is not None:
@@ -146,7 +138,7 @@ class ExtendedMultiplicativeScatterCorrection(
             The transformed data.
         """
         # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
+        check_is_fitted(self, "n_features_in_")
         # Check that X is a 2D array and has only finite values
         X = check_input(X)

chemotools/scatter/{multiplicative_scatter_correction.py → _multiplicative_scatter_correction.py} RENAMED Viewed

@@ -68,13 +68,7 @@ class MultiplicativeScatterCorrection(
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
+        X = self._validate_data(X)
         # Check that the length of the reference is the same as the number of features
         if self.reference is not None:
@@ -129,7 +123,7 @@ class MultiplicativeScatterCorrection(
             The transformed data.
         """
         # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
+        check_is_fitted(self, "n_features_in_")
         # Check that X is a 2D array and has only finite values
         X = check_input(X)

chemotools/scatter/{robust_normal_variate.py → _robust_normal_variate.py} RENAMED Viewed

@@ -15,14 +15,6 @@ class RobustNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
         The percentile to use for the robust normal variate. The value should be
         between 0 and 100. The default is 25.
-    Attributes
-    ----------
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
     Methods
     -------
     fit(X, y=None)
@@ -58,13 +50,7 @@ class RobustNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
+        X = self._validate_data(X)
         return self
@@ -86,7 +72,7 @@ class RobustNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
             The transformed data.
         """
         # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
+        check_is_fitted(self, "n_features_in_")
         # Check that X is a 2D array and has only finite values
         X = check_input(X)

chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} RENAMED Viewed

@@ -9,14 +9,6 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
     """
     A transformer that calculates the standard normal variate of the input data.
-    Attributes
-    ----------
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
     Methods
     -------
     fit(X, y=None)
@@ -25,10 +17,11 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
     transform(X, y=0, copy=True)
         Transform the input data by calculating the standard normal variate.
     """
     def fit(self, X: np.ndarray, y=None) -> "StandardNormalVariate":
         """
         Fit the transformer to the input data.
         Parameters
         ----------
         X : np.ndarray of shape (n_samples, n_features)
@@ -43,13 +36,7 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
+        X = self._validate_data(X)
         return self
@@ -71,7 +58,7 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
             The transformed data.
         """
         # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
+        check_is_fitted(self, "n_features_in_")
         # Check that X is a 2D array and has only finite values
         X = check_input(X)
@@ -79,7 +66,9 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
         # Check that the number of features is the same as the fitted data
         if X_.shape[1] != self.n_features_in_:
-            raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
+            raise ValueError(
+                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
+            )
         # Calculate the standard normal variate
         for i, x in enumerate(X_):
@@ -88,4 +77,4 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
         return X_.reshape(-1, 1) if X_.ndim == 1 else X_
     def _calculate_standard_normal_variate(self, x) -> np.ndarray:
-        return (x - x.mean()) / x.std()
+        return (x - x.mean()) / x.std()

chemotools/smooth/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from .mean_filter import MeanFilter
-from .median_filter import MedianFilter
-from .savitzky_golay_filter import SavitzkyGolayFilter
-from .whittaker_smooth import WhittakerSmooth
+from ._mean_filter import MeanFilter
+from ._median_filter import MedianFilter
+from ._savitzky_golay_filter import SavitzkyGolayFilter
+from ._whittaker_smooth import WhittakerSmooth

chemotools/smooth/{mean_filter.py → _mean_filter.py} RENAMED Viewed

@@ -14,19 +14,11 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     ----------
     window_size : int, optional
         The size of the window to use for the mean filter. Must be odd. Default is 3.
     mode : str, optional
         The mode to use for the mean filter. Can be "nearest", "constant", "reflect",
         "wrap", "mirror" or "interp". Default is "nearest".
-    Attributes
-    ----------
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
     Methods
     -------
     fit(X, y=None)
@@ -35,7 +27,8 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     transform(X, y=0, copy=True)
         Transform the input data by calculating the mean filter.
     """
-    def __init__(self, window_size: int = 3, mode='nearest') -> None:
+    def __init__(self, window_size: int = 3, mode="nearest") -> None:
         self.window_size = window_size
         self.mode = mode
@@ -57,13 +50,7 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
+        X = self._validate_data(X)
         return self
@@ -85,7 +72,7 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The transformed data.
         """
         # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
+        check_is_fitted(self, "n_features_in_")
         # Check that X is a 2D array and has only finite values
         X = check_input(X)

chemotools 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

chemotools 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl