PyPI - chemotools - Versions diffs - 0.1.3__tar.gz → 0.1.5__tar.gz - Mend

chemotools 0.1.3tar.gz → 0.1.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

{chemotools-0.1.3 → chemotools-0.1.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: chemotools
-Version: 0.1.3
+Version: 0.1.5
 Summary: Package to integrate chemometrics in scikit-learn pipelines
 Home-page: https://github.com/paucablop/chemotools
 Author: Pau Cabaneros Lopez
@@ -14,8 +14,10 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: numpy
 Requires-Dist: pandas
+Requires-Dist: polars
+Requires-Dist: pyarrow
 Requires-Dist: scipy
-Requires-Dist: scikit-learn
+Requires-Dist: scikit-learn>=1.4.0
 ![chemotools](assets/images/logo_pixel.png)

{chemotools-0.1.3 → chemotools-0.1.5}/chemotools/augmentation/uniform_noise.py RENAMED Viewed

@@ -11,10 +11,10 @@ class UniformNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     Parameters
     ----------
-    low : float, default=0.0
+    min : float, default=0.0
         The lower bound of the uniform distribution.
-    high : float, default=0.0
+    max : float, default=0.0
         The upper bound of the uniform distribution.
     random_state : int, default=None
@@ -38,9 +38,9 @@ class UniformNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
     """
-    def __init__(self, low: float = 0.0, high: float = 0.0, random_state: int = None):
-        self.low = low
-        self.high = high
+    def __init__(self, min: float = 0.0, max: float = 0.0, random_state: int = None):
+        self.min = min
+        self.max = max
         self.random_state = random_state
     def fit(self, X: np.ndarray, y=None) -> "UniformNoise":
@@ -109,4 +109,4 @@ class UniformNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
         return X_.reshape(-1, 1) if X_.ndim == 1 else X_
     def _add_random_noise(self, x) -> np.ndarray:
-        return x + self._rng.uniform(self.low, self.high, size=x.shape)
+        return x + self._rng.uniform(self.min, self.max, size=x.shape)

chemotools-0.1.5/chemotools/datasets/_base.py ADDED Viewed

@@ -0,0 +1,122 @@
+import os
+import pandas as pd
+import polars as pl
+PACKAGE_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
+def load_fermentation_train(set_output="pandas"):
+    """
+    Loads the training data of the fermentation dataset. This data corresponds to a synthetic dataset measured
+    off-line. This dataset is designed to represent the variability of real fermentation data.
+    Arguments
+    -------
+    set_output: str, default='pandas'
+        The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
+    Returns
+    -------
+    train_spectra: pd.DataFrame A pandas DataFrame containing the synthetic spectra measured to train the model.
+    train_hplc: pd.DataFrame A pandas DataFrame containing the corresponding reference measurements analyzed with HPLC.
+    References
+    -------
+    - Cabaneros Lopez Pau, Udugama Isuru A., Thomsen Sune Tjalfe, Roslander Christian, Junicke Helena,
+    Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
+    A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
+    """
+    if set_output == "pandas":
+        train_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
+        train_spectra.columns = train_spectra.columns.astype(float)
+        train_hplc = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
+        return train_spectra, train_hplc
+    if set_output == "polars":
+        train_spectra = pl.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
+        train_hplc = pl.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
+        return train_spectra, train_hplc
+    else:
+        raise ValueError(
+            "Invalid value for set_output. Please use 'pandas' or 'polars'."
+        )
+def load_fermentation_test(set_output="pandas"):
+    """
+    Loads the testing data of the fermentation dataset. This data corresponds to real fermentation data measured
+    on-line during a fermentation process.
+    Arguments
+    -------
+    set_output: str, default='pandas'
+        The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
+    Returns
+    -------
+    test_spectra: pd.DataFrame A pandas DataFrame containing the on-line spectra measured to train the model.
+    test_hplc: pd.DataFrame A pandas DataFrame containing the corresponding HPLC measurements.
+    References
+    -------
+    - Cabaneros Lopez Pau, Udugama Isuru A., Thomsen Sune Tjalfe, Roslander Christian, Junicke Helena,
+    Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
+    A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
+    """
+    if set_output == "pandas":
+        fermentation_spectra = pd.read_csv(
+            PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
+        )
+        fermentation_spectra.columns = fermentation_spectra.columns.astype(float)
+        fermentation_hplc = pd.read_csv(
+            PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv"
+        )
+        return fermentation_spectra, fermentation_hplc
+    if set_output == "polars":
+        fermentation_spectra = pl.read_csv(
+            PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
+        )
+        fermentation_hplc = pl.read_csv(
+            PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv"
+        )
+        return fermentation_spectra, fermentation_hplc
+    else:
+        raise ValueError(
+            "Invalid value for set_output. Please use 'pandas' or 'polars'."
+        )
+def load_coffee(set_output="pandas"):
+    """
+    Loads the coffee dataset. This data corresponds to a coffee spectra from three different origins
+    measured off-line using attenuated total reflectance Fourier transform infrared spectroscopy (ATR-FTIR).
+    Arguments
+    -------
+    set_output: str, default='pandas'
+        The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
+    Returns
+    -------
+    coffee_spectra: pd.DataFrame A pandas DataFrame containing the coffee spectra.
+    coffee_labels: pd.DataFrame A pandas DataFrame containing the corresponding labels.
+    """
+    if set_output == "pandas":
+        coffee_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
+        coffee_labels = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
+        return coffee_spectra, coffee_labels
+    if set_output == "polars":
+        coffee_spectra = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
+        coffee_labels = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
+        return coffee_spectra, coffee_labels
+    else:
+        raise ValueError(
+            "Invalid value for set_output. Please use 'pandas' or 'polars'."
+        )

{chemotools-0.1.3 → chemotools-0.1.5}/chemotools/feature_selection/_range_cut.py RENAMED Viewed

@@ -34,6 +34,8 @@ class RangeCut(BaseEstimator, SelectorMixin):
     end_index_ : int
         The index of the end of the range. It is -1 if the wavenumbers are not provided.
+    wavenuumbers_ : array-like
+        The cut wavenumbers of the input data.
     Methods
     -------
@@ -75,9 +77,11 @@ class RangeCut(BaseEstimator, SelectorMixin):
         if self.wavenumbers is None:
             self.start_index_ = self.start
             self.end_index_ = self.end
+            self.wavenumbers_ = None
         else:
             self.start_index_ = self._find_index(self.start)
             self.end_index_ = self._find_index(self.end)
+            self.wavenumbers_ = self.wavenumbers[self.start_index_ : self.end_index_]
         return self

{chemotools-0.1.3 → chemotools-0.1.5}/chemotools.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: chemotools
-Version: 0.1.3
+Version: 0.1.5
 Summary: Package to integrate chemometrics in scikit-learn pipelines
 Home-page: https://github.com/paucablop/chemotools
 Author: Pau Cabaneros Lopez
@@ -14,8 +14,10 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: numpy
 Requires-Dist: pandas
+Requires-Dist: polars
+Requires-Dist: pyarrow
 Requires-Dist: scipy
-Requires-Dist: scikit-learn
+Requires-Dist: scikit-learn>=1.4.0
 ![chemotools](assets/images/logo_pixel.png)

chemotools-0.1.5/chemotools.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,6 @@
+numpy
+pandas
+polars
+pyarrow
+scipy
+scikit-learn>=1.4.0

{chemotools-0.1.3 → chemotools-0.1.5}/setup.py RENAMED Viewed

@@ -27,8 +27,10 @@ setuptools.setup(
     install_requires=[
         "numpy",
         "pandas",
+        "polars",
+        "pyarrow",
         "scipy",
-        "scikit-learn",
+        "scikit-learn>=1.4.0",
     ],
     include_package_data=True,
     package_data={'': ['tests/resources/*.csv',

chemotools-0.1.5/tests/test_datasets.py ADDED Viewed

@@ -0,0 +1,111 @@
+import pandas as pd
+import polars as pl
+import pytest
+from chemotools.datasets import (
+    load_coffee,
+    load_fermentation_test,
+    load_fermentation_train,
+)
+def test_load_coffee_pandas():
+    # Arrange
+    # Act
+    coffee_spectra, coffee_labels = load_coffee()
+    # Assert
+    assert coffee_spectra.shape == (60, 1841)
+    assert coffee_labels.shape == (60, 1)
+    assert isinstance(coffee_spectra, pd.DataFrame)
+    assert isinstance(coffee_labels, pd.DataFrame)
+def test_load_coffee_polars():
+    # Arrange
+    # Act
+    coffee_spectra, coffee_labels = load_coffee(set_output="polars")
+    # Assert
+    assert coffee_spectra.shape == (60, 1841)
+    assert coffee_labels.shape == (60, 1)
+    assert isinstance(coffee_spectra, pl.DataFrame)
+    assert isinstance(coffee_labels, pl.DataFrame)
+def test_load_coffee_exception():
+    # Arrange
+    # Act and Assert
+    with pytest.raises(ValueError):
+        coffee_spectra, coffee_labels = load_coffee(set_output="plars")
+def test_load_fermentation_test_pandas():
+    # Arrange
+    # Act
+    test_spectra, test_hplc = load_fermentation_test()
+    # Assert
+    assert test_spectra.shape == (1629, 1047)
+    assert test_hplc.shape == (34, 6)
+    assert isinstance(test_spectra, pd.DataFrame)
+    assert isinstance(test_hplc, pd.DataFrame)
+def test_load_fermentation_test_polars():
+    # Arrange
+    # Act
+    test_spectra, test_hplc = load_fermentation_test(set_output="polars")
+    # Assert
+    assert test_spectra.shape == (1629, 1047)
+    assert test_hplc.shape == (34, 6)
+    assert isinstance(test_spectra, pl.DataFrame)
+    assert isinstance(test_hplc, pl.DataFrame)
+def test_load_fermentation_test_exception():
+    # Arrange
+    # Act and Assert
+    with pytest.raises(ValueError):
+        test_spectra, test_hplc = load_fermentation_test(set_output="plars")
+def test_load_fermentation_train_pandas():
+    # Arrange
+    # Act
+    train_spectra, train_hplc = load_fermentation_train()
+    # Assert
+    assert train_spectra.shape == (21, 1047)
+    assert train_hplc.shape == (21, 1)
+    assert isinstance(train_spectra, pd.DataFrame)
+    assert isinstance(train_hplc, pd.DataFrame)
+def test_load_fermentation_train_polars():
+    # Arrange
+    # Act
+    train_spectra, train_hplc = load_fermentation_train(set_output="polars")
+    # Assert
+    assert train_spectra.shape == (21, 1047)
+    assert train_hplc.shape == (21, 1)
+    assert isinstance(train_spectra, pl.DataFrame)
+    assert isinstance(train_hplc, pl.DataFrame)
+def test_load_fermentation_train_exception():
+    # Arrange
+    # Act and Assert
+    with pytest.raises(ValueError):
+        train_spectra, train_hplc = load_fermentation_train(set_output="plars")

{chemotools-0.1.3 → chemotools-0.1.5}/tests/test_functionality.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import numpy as np
 import pandas as pd
+import polars as pl
 import pytest
 from chemotools.augmentation import (
@@ -622,9 +623,10 @@ def test_range_cut_by_wavenumber_with_list():
     # Assert
     assert np.allclose(spectrum_corrected[0], spectrum[0][1:7], atol=1e-8)
+    assert range_cut.wavenumbers_ == [2, 3, 4, 5, 6, 7]
-def test_range_cut_by_wavenumber_with_dataframe():
+def test_range_cut_by_wavenumber_with_pandas_dataframe():
     # Arrange
     wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
     spectrum = pd.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]]))
@@ -637,6 +639,19 @@ def test_range_cut_by_wavenumber_with_dataframe():
     assert type(spectrum_corrected) == pd.DataFrame
+def test_range_cut_by_wavenumber_with_polars_dataframe():
+    # Arrange
+    wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    spectrum = pl.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]]))
+    range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output(transform='polars')
+    # Act
+    spectrum_corrected = range_cut.fit_transform(spectrum)
+    # Assert
+    assert type(spectrum_corrected) == pl.DataFrame
 def test_robust_normal_variate():
     # Arrange
     spectrum = np.array([2, 3.5, 5, 27, 8, 9]).reshape(1, -1)
@@ -740,7 +755,7 @@ def test_subtract_reference_without_reference(spectrum):
 def test_uniform_noise():
     # Arrange
     spectrum = np.ones(10000).reshape(1, -1)
-    uniform_noise = UniformNoise(low=-1, high=1, random_state=42)
+    uniform_noise = UniformNoise(min=-1, max=1, random_state=42)
     # Act
     spectrum_corrected = uniform_noise.fit_transform(spectrum)

chemotools-0.1.3/chemotools/datasets/_base.py DELETED Viewed

@@ -1,69 +0,0 @@
-import pandas as pd
-import os
-PACKAGE_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
-def load_fermentation_train():
-    """
-    Loads the training data of the fermentation dataset. This data corresponds to a synthetic dataset measured
-    off-line. This dataset is designed to represent the variability of real fermentation data.
-    Returns
-    -------
-    train_spectra: pd.DataFrame A pandas DataFrame containing the synthetic spectra measured to train the model.
-    train_hplc: pd.DataFrame A pandas DataFrame containing the corresponding reference measurements analyzed with HPLC.
-    References
-    -------
-    - Cabaneros Lopez Pau, Udugama Isuru A., Thomsen Sune Tjalfe, Roslander Christian, Junicke Helena,
-    Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
-    A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
-    """
-    train_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
-    train_spectra.columns = train_spectra.columns.astype(float)
-    train_hplc = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
-    return train_spectra, train_hplc
-def load_fermentation_test():
-    """
-    Loads the testing data of the fermentation dataset. This data corresponds to real fermentation data measured
-    on-line during a fermentation process.
-    Returns
-    -------
-    test_spectra: pd.DataFrame A pandas DataFrame containing the on-line spectra measured to train the model.
-    test_hplc: pd.DataFrame A pandas DataFrame containing the corresponding HPLC measurements.
-    References
-    -------
-    - Cabaneros Lopez Pau, Udugama Isuru A., Thomsen Sune Tjalfe, Roslander Christian, Junicke Helena,
-    Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
-    A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
-    """
-    fermentation_spectra = pd.read_csv(
-        PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
-    )
-    fermentation_spectra.columns = fermentation_spectra.columns.astype(float)
-    fermentation_hplc = pd.read_csv(PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv")
-    return fermentation_spectra, fermentation_hplc
-def load_coffee():
-    """
-    Loads the coffee dataset. This data corresponds to a coffee spectra from three different origins
-    measured off-line using attenuated total reflectance Fourier transform infrared spectroscopy (ATR-FTIR).
-    Returns
-    -------
-    coffee_spectra: pd.DataFrame A pandas DataFrame containing the coffee spectra.
-    coffee_labels: pd.DataFrame A pandas DataFrame containing the corresponding labels.
-    """
-    coffee_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
-    coffee_labels = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
-    return coffee_spectra, coffee_labels

chemotools-0.1.3/chemotools.egg-info/requires.txt DELETED Viewed

@@ -1,4 +0,0 @@
-numpy
-pandas
-scipy
-scikit-learn

chemotools-0.1.3/tests/test_datasets.py DELETED Viewed

@@ -1,43 +0,0 @@
-import pandas as pd
-from chemotools.datasets import load_coffee, load_fermentation_test, load_fermentation_train
-def test_load_coffee():
-    # Arrange
-    # Act
-    coffee_spectra, coffee_labels = load_coffee()
-    # Assert
-    assert coffee_spectra.shape == (60, 1841)
-    assert coffee_labels.shape == (60, 1)
-    assert isinstance(coffee_spectra, pd.DataFrame)
-    assert isinstance(coffee_labels, pd.DataFrame)
-def test_load_fermentation_test():
-    # Arrange
-    # Act
-    test_spectra, test_hplc = load_fermentation_test()
-    # Assert
-    assert test_spectra.shape == (1629, 1047)
-    assert test_hplc.shape == (34, 6)
-    assert isinstance(test_spectra, pd.DataFrame)
-    assert isinstance(test_hplc, pd.DataFrame)
-def test_load_fermentation_train():
-    # Arrange
-    # Act
-    train_spectra, train_hplc = load_fermentation_train()
-    # Assert
-    assert train_spectra.shape == (21, 1047)
-    assert train_hplc.shape == (21, 1)
-    assert isinstance(train_spectra, pd.DataFrame)
-    assert isinstance(train_hplc, pd.DataFrame)