PyPI - chemotools - Versions diffs - 0.0.22__py3-none-any.whl → 0.1.6__py3-none-any.whl - Mend

chemotools 0.0.22py3-none-any.whl → 0.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

chemotools/augmentation/__init__.py +16 -0
chemotools/augmentation/baseline_shift.py +119 -0
chemotools/augmentation/exponential_noise.py +117 -0
chemotools/augmentation/index_shift.py +120 -0
chemotools/augmentation/normal_noise.py +118 -0
chemotools/augmentation/spectrum_scale.py +120 -0
chemotools/augmentation/uniform_noise.py +124 -0
chemotools/baseline/__init__.py +20 -8
chemotools/baseline/{air_pls.py → _air_pls.py} +20 -32
chemotools/baseline/{ar_pls.py → _ar_pls.py} +18 -31
chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +37 -31
chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +26 -19
chemotools/baseline/{linear_correction.py → _linear_correction.py} +19 -28
chemotools/baseline/{non_negative.py → _non_negative.py} +15 -23
chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +29 -31
chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +23 -27
chemotools/datasets/__init__.py +5 -0
chemotools/datasets/_base.py +122 -0
chemotools/datasets/data/coffee_labels.csv +61 -0
chemotools/datasets/data/coffee_spectra.csv +61 -0
chemotools/datasets/data/fermentation_hplc.csv +35 -0
chemotools/datasets/data/fermentation_spectra.csv +1630 -0
chemotools/datasets/data/train_hplc.csv +22 -0
chemotools/datasets/data/train_spectra.csv +22 -0
chemotools/derivative/__init__.py +4 -2
chemotools/derivative/{norris_william.py → _norris_william.py} +20 -25
chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +26 -36
chemotools/feature_selection/__init__.py +4 -0
chemotools/feature_selection/_index_selector.py +113 -0
chemotools/feature_selection/_range_cut.py +111 -0
chemotools/scale/__init__.py +5 -3
chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +36 -39
chemotools/scale/{norm_scaler.py → _norm_scaler.py} +18 -25
chemotools/scale/_point_scaler.py +115 -0
chemotools/scatter/__init__.py +13 -2
chemotools/scatter/_extended_multiplicative_scatter_correction.py +183 -0
chemotools/scatter/_multiplicative_scatter_correction.py +169 -0
chemotools/scatter/_robust_normal_variate.py +101 -0
chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +21 -26
chemotools/smooth/__init__.py +6 -4
chemotools/smooth/{mean_filter.py → _mean_filter.py} +18 -25
chemotools/smooth/{median_filter.py → _median_filter.py} +32 -24
chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +22 -24
chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +24 -29
{chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/METADATA +19 -15
chemotools-0.1.6.dist-info/RECORD +51 -0
{chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/WHEEL +1 -2
chemotools/scale/index_scaler.py +0 -97
chemotools/scatter/extended_multiplicative_scatter_correction.py +0 -33
chemotools/scatter/multiplicative_scatter_correction.py +0 -123
chemotools/utils/check_inputs.py +0 -14
chemotools/variable_selection/__init__.py +0 -1
chemotools/variable_selection/range_cut.py +0 -121
chemotools-0.0.22.dist-info/RECORD +0 -39
chemotools-0.0.22.dist-info/top_level.txt +0 -2
tests/fixtures.py +0 -89
tests/test_functionality.py +0 -397
tests/test_sklearn_compliance.py +0 -192
{tests → chemotools/datasets/data}/__init__.py +0 -0
{chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/LICENSE +0 -0

chemotools/baseline/{non_negative.py → _non_negative.py} RENAMED Viewed

@@ -1,11 +1,9 @@
 import numpy as np
 from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
-from sklearn.utils.validation import check_is_fitted
+from sklearn.utils.validation import check_is_fitted, validate_data
-from chemotools.utils.check_inputs import check_input
-class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
+class NonNegative(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
     """
     A transformer that sets all negative values to zero or to abs.
@@ -14,14 +12,6 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     mode : str, optional
         The mode to use for the non-negative values. Can be "zero" or "abs".
-    Attributes
-    ----------
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
     Methods
     -------
     fit(X, y=None)
@@ -52,14 +42,9 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
+        X = validate_data(
+            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
+        )
         return self
     def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -80,11 +65,18 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The transformed data.
         """
         # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
+        check_is_fitted(self, "n_features_in_")
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        X_ = X.copy()
+        X_ = validate_data(
+            self,
+            X,
+            y="no_validation",
+            ensure_2d=True,
+            copy=True,
+            reset=False,
+            dtype=np.float64,
+        )
         # Check that the number of features is the same as the fitted data
         if X_.shape[1] != self.n_features_in_:

chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} RENAMED Viewed

@@ -1,12 +1,13 @@
+from typing import Optional
 import numpy as np
 from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
-from sklearn.utils.validation import check_is_fitted
+from sklearn.utils.validation import check_is_fitted, validate_data
-from chemotools.utils.check_inputs import check_input
-class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
+class PolynomialCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
     """
-    A transformer that subtracts a polynomial baseline from the input data. The polynomial is
+    A transformer that subtracts a polynomial baseline from the input data. The polynomial is
     fitted to the points in the spectrum specified by the indices parameter.
     Parameters
@@ -18,14 +19,6 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
         The indices of the points in the spectrum to fit the polynomial to. Defaults to None,
         which fits the polynomial to all points in the spectrum (equivalent to detrend).
-    Attributes
-    ----------
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
     Methods
     -------
     fit(X, y=None)
@@ -37,7 +30,8 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
     _baseline_correct_spectrum(x)
         Subtract the polynomial baseline from a single spectrum.
     """
-    def __init__(self, order: int = 1, indices: list = None) -> None:
+    def __init__(self, order: int = 1, indices: Optional[list] = None) -> None:
         self.order = order
         self.indices = indices
@@ -59,22 +53,17 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
+        X = validate_data(
+            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
+        )
         if self.indices is None:
-            self.indices_ = range(0, len(X[0]))
+            self.indices_ = list(range(0, len(X[0])))
         else:
             self.indices_ = self.indices
         return self
-    def transform(self, X: np.ndarray, y:int=0, copy:bool=True) -> np.ndarray:
+    def transform(self, X: np.ndarray, y: int = 0, copy: bool = True) -> np.ndarray:
         """
         Transform the input data by subtracting the polynomial baseline.
@@ -95,21 +84,30 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
             The transformed data.
         """
         # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
+        check_is_fitted(self, "n_features_in_")
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        X_ = X.copy()
+        X_ = validate_data(
+            self,
+            X,
+            y="no_validation",
+            ensure_2d=True,
+            copy=True,
+            reset=False,
+            dtype=np.float64,
+        )
         # Check that the number of features is the same as the fitted data
         if X_.shape[1] != self.n_features_in_:
-            raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
+            raise ValueError(
+                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
+            )
         # Calculate polynomial baseline correction
         for i, x in enumerate(X_):
             X_[i] = self._baseline_correct_spectrum(x)
         return X_.reshape(-1, 1) if X_.ndim == 1 else X_
     def _baseline_correct_spectrum(self, x: np.ndarray) -> np.ndarray:
         """
         Subtract the polynomial baseline from a single spectrum.
@@ -126,5 +124,5 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
         """
         intensity = x[self.indices_]
         poly = np.polyfit(self.indices_, intensity, self.order)
-        baseline = [np.polyval(poly, i) for i in range(0, len(x))]
-        return x - baseline
+        baseline = [np.polyval(poly, i) for i in range(0, len(x))]
+        return x - baseline

chemotools/baseline/{subtract_reference.py → _subtract_reference.py} RENAMED Viewed

@@ -1,11 +1,11 @@
+from typing import Optional
 import numpy as np
 from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
-from sklearn.utils.validation import check_is_fitted
-from chemotools.utils.check_inputs import check_input
+from sklearn.utils.validation import check_is_fitted, validate_data
-class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
+class SubtractReference(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
     """
     A transformer that subtracts a reference spectrum from the input data.
@@ -15,14 +15,6 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         The reference spectrum to subtract from the input data. If None, the original spectrum
         is returned.
-    Attributes
-    ----------
-    n_features_in_ : int
-        The number of features in the input data.
-    _is_fitted : bool
-        Whether the transformer has been fitted to data.
     Methods
     -------
     fit(X, y=None)
@@ -34,9 +26,10 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     _subtract_reference(x)
         Subtract the reference spectrum from a single spectrum.
     """
     def __init__(
         self,
-        reference: np.ndarray = None,
+        reference: Optional[np.ndarray] = None,
     ):
         self.reference = reference
@@ -58,20 +51,14 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The fitted transformer.
         """
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        # Set the number of features
-        self.n_features_in_ = X.shape[1]
-        # Set the fitted attribute to True
-        self._is_fitted = True
+        X = validate_data(
+            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
+        )
         # Set the reference
         if self.reference is not None:
             self.reference_ = self.reference.copy()
             return self
         return self
     def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -92,15 +79,24 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
             The transformed data.
         """
         # Check that the estimator is fitted
-        check_is_fitted(self, "_is_fitted")
+        check_is_fitted(self, "n_features_in_")
         # Check that X is a 2D array and has only finite values
-        X = check_input(X)
-        X_ = X.copy()
+        X_ = validate_data(
+            self,
+            X,
+            y="no_validation",
+            ensure_2d=True,
+            copy=True,
+            reset=False,
+            dtype=np.float64,
+        )
         # Check that the number of features is the same as the fitted data
         if X_.shape[1] != self.n_features_in_:
-            raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
+            raise ValueError(
+                f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
+            )
         if self.reference is None:
             return X_.reshape(-1, 1) if X_.ndim == 1 else X_

chemotools/datasets/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from ._base import load_coffee
+from ._base import load_fermentation_train
+from ._base import load_fermentation_test
+__all__ = ["load_coffee", "load_fermentation_train", "load_fermentation_test"]

chemotools/datasets/_base.py ADDED Viewed

@@ -0,0 +1,122 @@
+import os
+import pandas as pd
+import polars as pl
+PACKAGE_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
+def load_fermentation_train(set_output="pandas"):
+    """
+    Loads the training data of the fermentation dataset. This data corresponds to a synthetic dataset measured
+    off-line. This dataset is designed to represent the variability of real fermentation data.
+    Arguments
+    -------
+    set_output: str, default='pandas'
+        The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
+    Returns
+    -------
+    train_spectra: pd.DataFrame A pandas DataFrame containing the synthetic spectra measured to train the model.
+    train_hplc: pd.DataFrame A pandas DataFrame containing the corresponding reference measurements analyzed with HPLC.
+    References
+    -------
+    - Cabaneros Lopez Pau, Udugama Isuru A., Thomsen Sune Tjalfe, Roslander Christian, Junicke Helena,
+    Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
+    A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
+    """
+    if set_output == "pandas":
+        train_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
+        train_spectra.columns = train_spectra.columns.astype(float)
+        train_hplc = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
+        return train_spectra, train_hplc
+    if set_output == "polars":
+        train_spectra = pl.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
+        train_hplc = pl.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
+        return train_spectra, train_hplc
+    else:
+        raise ValueError(
+            "Invalid value for set_output. Please use 'pandas' or 'polars'."
+        )
+def load_fermentation_test(set_output="pandas"):
+    """
+    Loads the testing data of the fermentation dataset. This data corresponds to real fermentation data measured
+    on-line during a fermentation process.
+    Arguments
+    -------
+    set_output: str, default='pandas'
+        The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
+    Returns
+    -------
+    test_spectra: pd.DataFrame A pandas DataFrame containing the on-line spectra measured to train the model.
+    test_hplc: pd.DataFrame A pandas DataFrame containing the corresponding HPLC measurements.
+    References
+    -------
+    - Cabaneros Lopez Pau, Udugama Isuru A., Thomsen Sune Tjalfe, Roslander Christian, Junicke Helena,
+    Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
+    A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
+    """
+    if set_output == "pandas":
+        fermentation_spectra = pd.read_csv(
+            PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
+        )
+        fermentation_spectra.columns = fermentation_spectra.columns.astype(float)
+        fermentation_hplc = pd.read_csv(
+            PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv"
+        )
+        return fermentation_spectra, fermentation_hplc
+    if set_output == "polars":
+        fermentation_spectra = pl.read_csv(
+            PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
+        )
+        fermentation_hplc = pl.read_csv(
+            PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv"
+        )
+        return fermentation_spectra, fermentation_hplc
+    else:
+        raise ValueError(
+            "Invalid value for set_output. Please use 'pandas' or 'polars'."
+        )
+def load_coffee(set_output="pandas"):
+    """
+    Loads the coffee dataset. This data corresponds to a coffee spectra from three different origins
+    measured off-line using attenuated total reflectance Fourier transform infrared spectroscopy (ATR-FTIR).
+    Arguments
+    -------
+    set_output: str, default='pandas'
+        The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
+    Returns
+    -------
+    coffee_spectra: pd.DataFrame A pandas DataFrame containing the coffee spectra.
+    coffee_labels: pd.DataFrame A pandas DataFrame containing the corresponding labels.
+    """
+    if set_output == "pandas":
+        coffee_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
+        coffee_labels = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
+        return coffee_spectra, coffee_labels
+    if set_output == "polars":
+        coffee_spectra = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
+        coffee_labels = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
+        return coffee_spectra, coffee_labels
+    else:
+        raise ValueError(
+            "Invalid value for set_output. Please use 'pandas' or 'polars'."
+        )

chemotools/datasets/data/coffee_labels.csv ADDED Viewed

@@ -0,0 +1,61 @@
+labels
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Ethiopia
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Brasil
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam
+Vietnam

chemotools 0.0.22__py3-none-any.whl → 0.1.6__py3-none-any.whl

chemotools 0.0.22py3-none-any.whl → 0.1.6py3-none-any.whl