PyPI - chemotools - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

chemotools 0.1.4py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

chemotools/datasets/_base.py CHANGED Viewed

@@ -1,14 +1,22 @@
-import pandas as pd
 import os
+import pandas as pd
+import polars as pl
 PACKAGE_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
-def load_fermentation_train():
+def load_fermentation_train(set_output="pandas"):
     """
-    Loads the training data of the fermentation dataset. This data corresponds to a synthetic dataset measured
+    Loads the training data of the fermentation dataset. This data corresponds to a synthetic dataset measured
     off-line. This dataset is designed to represent the variability of real fermentation data.
+    Arguments
+    -------
+    set_output: str, default='pandas'
+        The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
     Returns
     -------
     train_spectra: pd.DataFrame A pandas DataFrame containing the synthetic spectra measured to train the model.
@@ -20,17 +28,32 @@ def load_fermentation_train():
     Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
     A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
     """
-    train_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
-    train_spectra.columns = train_spectra.columns.astype(float)
-    train_hplc = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
+    if set_output == "pandas":
+        train_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
+        train_spectra.columns = train_spectra.columns.astype(float)
+        train_hplc = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
+        return train_spectra, train_hplc
-    return train_spectra, train_hplc
+    if set_output == "polars":
+        train_spectra = pl.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
+        train_hplc = pl.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
+        return train_spectra, train_hplc
+    else:
+        raise ValueError(
+            "Invalid value for set_output. Please use 'pandas' or 'polars'."
+        )
-def load_fermentation_test():
+def load_fermentation_test(set_output="pandas"):
     """
     Loads the testing data of the fermentation dataset. This data corresponds to real fermentation data measured
-    on-line during a fermentation process.
+    on-line during a fermentation process.
+    Arguments
+    -------
+    set_output: str, default='pandas'
+        The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
     Returns
     -------
@@ -43,27 +66,57 @@ def load_fermentation_test():
     Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
     A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
     """
-    fermentation_spectra = pd.read_csv(
-        PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
-    )
-    fermentation_spectra.columns = fermentation_spectra.columns.astype(float)
-    fermentation_hplc = pd.read_csv(PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv")
-    return fermentation_spectra, fermentation_hplc
-def load_coffee():
+    if set_output == "pandas":
+        fermentation_spectra = pd.read_csv(
+            PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
+        )
+        fermentation_spectra.columns = fermentation_spectra.columns.astype(float)
+        fermentation_hplc = pd.read_csv(
+            PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv"
+        )
+        return fermentation_spectra, fermentation_hplc
+    if set_output == "polars":
+        fermentation_spectra = pl.read_csv(
+            PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
+        )
+        fermentation_hplc = pl.read_csv(
+            PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv"
+        )
+        return fermentation_spectra, fermentation_hplc
+    else:
+        raise ValueError(
+            "Invalid value for set_output. Please use 'pandas' or 'polars'."
+        )
+def load_coffee(set_output="pandas"):
     """
-    Loads the coffee dataset. This data corresponds to a coffee spectra from three different origins
+    Loads the coffee dataset. This data corresponds to a coffee spectra from three different origins
     measured off-line using attenuated total reflectance Fourier transform infrared spectroscopy (ATR-FTIR).
+    Arguments
+    -------
+    set_output: str, default='pandas'
+        The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
     Returns
     -------
     coffee_spectra: pd.DataFrame A pandas DataFrame containing the coffee spectra.
     coffee_labels: pd.DataFrame A pandas DataFrame containing the corresponding labels.
     """
-    coffee_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
-    coffee_labels = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
-    return coffee_spectra, coffee_labels
+    if set_output == "pandas":
+        coffee_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
+        coffee_labels = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
+        return coffee_spectra, coffee_labels
+    if set_output == "polars":
+        coffee_spectra = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
+        coffee_labels = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
+        return coffee_spectra, coffee_labels
+    else:
+        raise ValueError(
+            "Invalid value for set_output. Please use 'pandas' or 'polars'."
+        )

chemotools/feature_selection/_range_cut.py CHANGED Viewed

@@ -34,6 +34,8 @@ class RangeCut(BaseEstimator, SelectorMixin):
     end_index_ : int
         The index of the end of the range. It is -1 if the wavenumbers are not provided.
+    wavenuumbers_ : array-like
+        The cut wavenumbers of the input data.
     Methods
     -------
@@ -75,6 +77,7 @@ class RangeCut(BaseEstimator, SelectorMixin):
         if self.wavenumbers is None:
             self.start_index_ = self.start
             self.end_index_ = self.end
+            self.wavenumbers_ = None
         else:
             self.start_index_ = self._find_index(self.start)
             self.end_index_ = self._find_index(self.end)

{chemotools-0.1.4.dist-info → chemotools-0.1.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: chemotools
-Version: 0.1.4
+Version: 0.1.5
 Summary: Package to integrate chemometrics in scikit-learn pipelines
 Home-page: https://github.com/paucablop/chemotools
 Author: Pau Cabaneros Lopez
@@ -14,8 +14,10 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: numpy
 Requires-Dist: pandas
+Requires-Dist: polars
+Requires-Dist: pyarrow
 Requires-Dist: scipy
-Requires-Dist: scikit-learn
+Requires-Dist: scikit-learn >=1.4.0
 ![chemotools](assets/images/logo_pixel.png)

{chemotools-0.1.4.dist-info → chemotools-0.1.5.dist-info}/RECORD RENAMED Viewed

@@ -16,7 +16,7 @@ chemotools/baseline/_non_negative.py,sha256=SyiS_-cfnypLXY3gC80oo7doqXUlHAAgmwrk
 chemotools/baseline/_polynomial_correction.py,sha256=0w9qA_w5dc9IIv5KMmAOZ06hWDuk-uyealsTaZX2qgw,3749
 chemotools/baseline/_subtract_reference.py,sha256=vfre6Z-bgDCwwl3VnpahmGJTBFJVK9HGBrUsjfl2O9o,3135
 chemotools/datasets/__init__.py,sha256=ojqxb-C_eDmizwUqVCJ8BqJxwULD7_hWCyVIA1uRO0c,116
-chemotools/datasets/_base.py,sha256=Z174CaIlpx17Yu8Pg1qZPuHWkS3BYWn7gtOYsoe8zNk,2895
+chemotools/datasets/_base.py,sha256=ftAmf2jHWUW_YQobXCsIFC617PeXwsmZIwAgab9EvL8,4890
 chemotools/datasets/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 chemotools/datasets/data/coffee_labels.csv,sha256=ZXQWQIf8faLHjdnHfRoXfxMR56kq9Q1BGPZBkQyhGlY,487
 chemotools/datasets/data/coffee_spectra.csv,sha256=VA-sN4u0hC5iALlRxxkj-K87Lz3b3mmUHBJPoDXychI,2206147
@@ -29,7 +29,7 @@ chemotools/derivative/_norris_william.py,sha256=NKmuo95vNWHQOdcww7APU9Z4s1wWExIR
 chemotools/derivative/_savitzky_golay.py,sha256=5At4sexJH0RvjkrvVfJvhIfaxXD3vE4Ozq1VClb3qlU,3417
 chemotools/feature_selection/__init__.py,sha256=p47SuyI7jMpV7kiaAsv2hA20smKf5Yo6447LfrNdDhY,76
 chemotools/feature_selection/_index_selector.py,sha256=2z2aAyMUOuP7x1n19RV5JGf6ZcM3mtJZby8tEgBOix4,3379
-chemotools/feature_selection/_range_cut.py,sha256=gcKjmCGn0SwKRHck3QIrqWN3q-S9qRgGlSbqzaOxG7Y,3309
+chemotools/feature_selection/_range_cut.py,sha256=ikWW9FhsbyzijSUYTcx048eOyK65mdbfOuFRF_Ee3rk,3424
 chemotools/scale/__init__.py,sha256=CQPUPx-8pUeHHbN9p5smFro3xtl_UEE0YeXHLVd7Lfk,118
 chemotools/scale/_min_max_scaler.py,sha256=-Wnr7zW-zmW6nR5J5yPdBm1KNuQDa9w27Un7rAr-s8E,2806
 chemotools/scale/_norm_scaler.py,sha256=bjMg1-x2I1xZmmbIgl4vXZZweJV-w3Euta0KGff_2Gk,2363
@@ -48,11 +48,11 @@ chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
 chemotools/utils/check_inputs.py,sha256=fRAV4HIaGamdj_PNXSNnl7LurXytACNTGO51rhPpMUY,512
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/fixtures.py,sha256=Xa-Vd62Kd1fyWg3PLUSP6iIkOK8etrbyOkMJTn3dvX8,1933
-tests/test_datasets.py,sha256=_3mMDYC-vUnb5BenMqvuhmkHI2PPIdsyq_nNu2ggH20,1055
-tests/test_functionality.py,sha256=cWFWSVTaEkMoZD1tB6-wfEXX59bEDzE8EVo0NcmEABw,21237
+tests/test_datasets.py,sha256=ZdyjSJVX-iJyz8SoRgFfRLP9-ajNEyqWxs00ZfIv0eo,2712
+tests/test_functionality.py,sha256=v8dH7TPA2D-5byl1nwpPW9ejx1Fzd5QsKuQQ4aouCjo,21707
 tests/test_sklearn_compliance.py,sha256=CRB_0X9HRGj0pOpUCmiSHwJkCsVB-yK_apsyUONmfmw,5856
-chemotools-0.1.4.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
-chemotools-0.1.4.dist-info/METADATA,sha256=TawdLG6hGhatxlxsWZ4ZoQO3FZIBhJdcgBfi6_P4CxA,5018
-chemotools-0.1.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-chemotools-0.1.4.dist-info/top_level.txt,sha256=eNcNcKSdo-1H_2gwSDrS__dr7BM3R73Cnn-pBiW5FEw,17
-chemotools-0.1.4.dist-info/RECORD,,
+chemotools-0.1.5.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
+chemotools-0.1.5.dist-info/METADATA,sha256=s3KJEhQ3jgq6DPl7PW5Hl3x9f5kKyCDi-Cedon48DDA,5071
+chemotools-0.1.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+chemotools-0.1.5.dist-info/top_level.txt,sha256=eNcNcKSdo-1H_2gwSDrS__dr7BM3R73Cnn-pBiW5FEw,17
+chemotools-0.1.5.dist-info/RECORD,,

tests/test_datasets.py CHANGED Viewed

@@ -1,9 +1,15 @@
 import pandas as pd
+import polars as pl
+import pytest
-from chemotools.datasets import load_coffee, load_fermentation_test, load_fermentation_train
+from chemotools.datasets import (
+    load_coffee,
+    load_fermentation_test,
+    load_fermentation_train,
+)
-def test_load_coffee():
+def test_load_coffee_pandas():
     # Arrange
     # Act
@@ -16,7 +22,28 @@ def test_load_coffee():
     assert isinstance(coffee_labels, pd.DataFrame)
-def test_load_fermentation_test():
+def test_load_coffee_polars():
+    # Arrange
+    # Act
+    coffee_spectra, coffee_labels = load_coffee(set_output="polars")
+    # Assert
+    assert coffee_spectra.shape == (60, 1841)
+    assert coffee_labels.shape == (60, 1)
+    assert isinstance(coffee_spectra, pl.DataFrame)
+    assert isinstance(coffee_labels, pl.DataFrame)
+def test_load_coffee_exception():
+    # Arrange
+    # Act and Assert
+    with pytest.raises(ValueError):
+        coffee_spectra, coffee_labels = load_coffee(set_output="plars")
+def test_load_fermentation_test_pandas():
     # Arrange
     # Act
@@ -28,7 +55,29 @@ def test_load_fermentation_test():
     assert isinstance(test_spectra, pd.DataFrame)
     assert isinstance(test_hplc, pd.DataFrame)
-def test_load_fermentation_train():
+def test_load_fermentation_test_polars():
+    # Arrange
+    # Act
+    test_spectra, test_hplc = load_fermentation_test(set_output="polars")
+    # Assert
+    assert test_spectra.shape == (1629, 1047)
+    assert test_hplc.shape == (34, 6)
+    assert isinstance(test_spectra, pl.DataFrame)
+    assert isinstance(test_hplc, pl.DataFrame)
+def test_load_fermentation_test_exception():
+    # Arrange
+    # Act and Assert
+    with pytest.raises(ValueError):
+        test_spectra, test_hplc = load_fermentation_test(set_output="plars")
+def test_load_fermentation_train_pandas():
     # Arrange
     # Act
@@ -40,4 +89,23 @@ def test_load_fermentation_train():
     assert isinstance(train_spectra, pd.DataFrame)
     assert isinstance(train_hplc, pd.DataFrame)
+def test_load_fermentation_train_polars():
+    # Arrange
+    # Act
+    train_spectra, train_hplc = load_fermentation_train(set_output="polars")
+    # Assert
+    assert train_spectra.shape == (21, 1047)
+    assert train_hplc.shape == (21, 1)
+    assert isinstance(train_spectra, pl.DataFrame)
+    assert isinstance(train_hplc, pl.DataFrame)
+def test_load_fermentation_train_exception():
+    # Arrange
+    # Act and Assert
+    with pytest.raises(ValueError):
+        train_spectra, train_hplc = load_fermentation_train(set_output="plars")

tests/test_functionality.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import numpy as np
 import pandas as pd
+import polars as pl
 import pytest
 from chemotools.augmentation import (
@@ -625,7 +626,7 @@ def test_range_cut_by_wavenumber_with_list():
     assert range_cut.wavenumbers_ == [2, 3, 4, 5, 6, 7]
-def test_range_cut_by_wavenumber_with_dataframe():
+def test_range_cut_by_wavenumber_with_pandas_dataframe():
     # Arrange
     wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
     spectrum = pd.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]]))
@@ -638,6 +639,19 @@ def test_range_cut_by_wavenumber_with_dataframe():
     assert type(spectrum_corrected) == pd.DataFrame
+def test_range_cut_by_wavenumber_with_polars_dataframe():
+    # Arrange
+    wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    spectrum = pl.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]]))
+    range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output(transform='polars')
+    # Act
+    spectrum_corrected = range_cut.fit_transform(spectrum)
+    # Assert
+    assert type(spectrum_corrected) == pl.DataFrame
 def test_robust_normal_variate():
     # Arrange
     spectrum = np.array([2, 3.5, 5, 27, 8, 9]).reshape(1, -1)

{chemotools-0.1.4.dist-info → chemotools-0.1.5.dist-info}/LICENSE RENAMED Viewed

File without changes

{chemotools-0.1.4.dist-info → chemotools-0.1.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{chemotools-0.1.4.dist-info → chemotools-0.1.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

chemotools 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

chemotools 0.1.4py3-none-any.whl → 0.1.5py3-none-any.whl