chemotools 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,10 +11,10 @@ class UniformNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
11
11
 
12
12
  Parameters
13
13
  ----------
14
- low : float, default=0.0
14
+ min : float, default=0.0
15
15
  The lower bound of the uniform distribution.
16
16
 
17
- high : float, default=0.0
17
+ max : float, default=0.0
18
18
  The upper bound of the uniform distribution.
19
19
 
20
20
  random_state : int, default=None
@@ -38,9 +38,9 @@ class UniformNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
38
38
  """
39
39
 
40
40
 
41
- def __init__(self, low: float = 0.0, high: float = 0.0, random_state: int = None):
42
- self.low = low
43
- self.high = high
41
+ def __init__(self, min: float = 0.0, max: float = 0.0, random_state: int = None):
42
+ self.min = min
43
+ self.max = max
44
44
  self.random_state = random_state
45
45
 
46
46
  def fit(self, X: np.ndarray, y=None) -> "UniformNoise":
@@ -109,4 +109,4 @@ class UniformNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
109
109
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
110
110
 
111
111
  def _add_random_noise(self, x) -> np.ndarray:
112
- return x + self._rng.uniform(self.low, self.high, size=x.shape)
112
+ return x + self._rng.uniform(self.min, self.max, size=x.shape)
@@ -1,14 +1,22 @@
1
- import pandas as pd
2
1
  import os
3
2
 
3
+
4
+ import pandas as pd
5
+ import polars as pl
6
+
4
7
  PACKAGE_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
5
8
 
6
9
 
7
- def load_fermentation_train():
10
+ def load_fermentation_train(set_output="pandas"):
8
11
  """
9
- Loads the training data of the fermentation dataset. This data corresponds to a synthetic dataset measured
12
+ Loads the training data of the fermentation dataset. This data corresponds to a synthetic dataset measured
10
13
  off-line. This dataset is designed to represent the variability of real fermentation data.
11
14
 
15
+ Arguments
16
+ -------
17
+ set_output: str, default='pandas'
18
+ The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
19
+
12
20
  Returns
13
21
  -------
14
22
  train_spectra: pd.DataFrame A pandas DataFrame containing the synthetic spectra measured to train the model.
@@ -20,17 +28,32 @@ def load_fermentation_train():
20
28
  Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
21
29
  A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
22
30
  """
23
- train_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
24
- train_spectra.columns = train_spectra.columns.astype(float)
25
- train_hplc = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
31
+ if set_output == "pandas":
32
+ train_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
33
+ train_spectra.columns = train_spectra.columns.astype(float)
34
+ train_hplc = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
35
+ return train_spectra, train_hplc
26
36
 
27
- return train_spectra, train_hplc
37
+ if set_output == "polars":
38
+ train_spectra = pl.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
39
+ train_hplc = pl.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
40
+ return train_spectra, train_hplc
28
41
 
42
+ else:
43
+ raise ValueError(
44
+ "Invalid value for set_output. Please use 'pandas' or 'polars'."
45
+ )
29
46
 
30
- def load_fermentation_test():
47
+
48
+ def load_fermentation_test(set_output="pandas"):
31
49
  """
32
50
  Loads the testing data of the fermentation dataset. This data corresponds to real fermentation data measured
33
- on-line during a fermentation process.
51
+ on-line during a fermentation process.
52
+
53
+ Arguments
54
+ -------
55
+ set_output: str, default='pandas'
56
+ The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
34
57
 
35
58
  Returns
36
59
  -------
@@ -43,27 +66,57 @@ def load_fermentation_test():
43
66
  Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
44
67
  A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
45
68
  """
46
- fermentation_spectra = pd.read_csv(
47
- PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
48
- )
49
- fermentation_spectra.columns = fermentation_spectra.columns.astype(float)
50
- fermentation_hplc = pd.read_csv(PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv")
51
-
52
- return fermentation_spectra, fermentation_hplc
53
-
54
-
55
- def load_coffee():
69
+ if set_output == "pandas":
70
+ fermentation_spectra = pd.read_csv(
71
+ PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
72
+ )
73
+ fermentation_spectra.columns = fermentation_spectra.columns.astype(float)
74
+ fermentation_hplc = pd.read_csv(
75
+ PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv"
76
+ )
77
+ return fermentation_spectra, fermentation_hplc
78
+
79
+ if set_output == "polars":
80
+ fermentation_spectra = pl.read_csv(
81
+ PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
82
+ )
83
+ fermentation_hplc = pl.read_csv(
84
+ PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv"
85
+ )
86
+ return fermentation_spectra, fermentation_hplc
87
+
88
+ else:
89
+ raise ValueError(
90
+ "Invalid value for set_output. Please use 'pandas' or 'polars'."
91
+ )
92
+
93
+
94
+ def load_coffee(set_output="pandas"):
56
95
  """
57
- Loads the coffee dataset. This data corresponds to a coffee spectra from three different origins
96
+ Loads the coffee dataset. This data corresponds to a coffee spectra from three different origins
58
97
  measured off-line using attenuated total reflectance Fourier transform infrared spectroscopy (ATR-FTIR).
59
98
 
99
+ Arguments
100
+ -------
101
+ set_output: str, default='pandas'
102
+ The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
103
+
60
104
  Returns
61
105
  -------
62
106
  coffee_spectra: pd.DataFrame A pandas DataFrame containing the coffee spectra.
63
107
  coffee_labels: pd.DataFrame A pandas DataFrame containing the corresponding labels.
64
108
  """
65
-
66
- coffee_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
67
- coffee_labels = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
68
-
69
- return coffee_spectra, coffee_labels
109
+ if set_output == "pandas":
110
+ coffee_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
111
+ coffee_labels = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
112
+ return coffee_spectra, coffee_labels
113
+
114
+ if set_output == "polars":
115
+ coffee_spectra = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
116
+ coffee_labels = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
117
+ return coffee_spectra, coffee_labels
118
+
119
+ else:
120
+ raise ValueError(
121
+ "Invalid value for set_output. Please use 'pandas' or 'polars'."
122
+ )
@@ -34,6 +34,8 @@ class RangeCut(BaseEstimator, SelectorMixin):
34
34
  end_index_ : int
35
35
  The index of the end of the range. It is -1 if the wavenumbers are not provided.
36
36
 
37
+ wavenuumbers_ : array-like
38
+ The cut wavenumbers of the input data.
37
39
 
38
40
  Methods
39
41
  -------
@@ -75,9 +77,11 @@ class RangeCut(BaseEstimator, SelectorMixin):
75
77
  if self.wavenumbers is None:
76
78
  self.start_index_ = self.start
77
79
  self.end_index_ = self.end
80
+ self.wavenumbers_ = None
78
81
  else:
79
82
  self.start_index_ = self._find_index(self.start)
80
83
  self.end_index_ = self._find_index(self.end)
84
+ self.wavenumbers_ = self.wavenumbers[self.start_index_ : self.end_index_]
81
85
 
82
86
  return self
83
87
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: chemotools
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: Package to integrate chemometrics in scikit-learn pipelines
5
5
  Home-page: https://github.com/paucablop/chemotools
6
6
  Author: Pau Cabaneros Lopez
@@ -14,8 +14,10 @@ Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
15
  Requires-Dist: numpy
16
16
  Requires-Dist: pandas
17
+ Requires-Dist: polars
18
+ Requires-Dist: pyarrow
17
19
  Requires-Dist: scipy
18
- Requires-Dist: scikit-learn
20
+ Requires-Dist: scikit-learn >=1.4.0
19
21
 
20
22
  ![chemotools](assets/images/logo_pixel.png)
21
23
 
@@ -5,7 +5,7 @@ chemotools/augmentation/exponential_noise.py,sha256=X2HTpL9zoiu0cFq3VsTxS3x_IO_t
5
5
  chemotools/augmentation/index_shift.py,sha256=7ujZ_sz4mWEUJMDCHyaLxhTZ5-_K3nQPwtk6y6SLR9Q,3198
6
6
  chemotools/augmentation/normal_noise.py,sha256=NmzTuIJKyk6tfDJgmeX9iAzsKlJJk3984tS8nLLG9dg,3051
7
7
  chemotools/augmentation/spectrum_scale.py,sha256=WgMw_bCxWbyAYgYBO3q4PbbzcTDyBvVD73kxPfj3cdY,3174
8
- chemotools/augmentation/uniform_noise.py,sha256=gc0WdREItRiPHjNiZg79n0yK6bfntXkcImrEjkoRdus,3180
8
+ chemotools/augmentation/uniform_noise.py,sha256=szGhk9T7SDe_6v5N8n8ztf7lxHVMiqqzrgL0JGHystw,3175
9
9
  chemotools/baseline/__init__.py,sha256=LFhsmzqv9RYxDS5-vK9jIf3ArNUSZ6yOF4SeUyVF6iA,381
10
10
  chemotools/baseline/_air_pls.py,sha256=bYAjemEWZr7oiYJegO0r5gtO16zr0BdJYjmEikA1yBc,5116
11
11
  chemotools/baseline/_ar_pls.py,sha256=tZi-89GMIStZUufz9AXVHU6TC1J6fAX4M1rAaIqgSvE,4431
@@ -16,7 +16,7 @@ chemotools/baseline/_non_negative.py,sha256=SyiS_-cfnypLXY3gC80oo7doqXUlHAAgmwrk
16
16
  chemotools/baseline/_polynomial_correction.py,sha256=0w9qA_w5dc9IIv5KMmAOZ06hWDuk-uyealsTaZX2qgw,3749
17
17
  chemotools/baseline/_subtract_reference.py,sha256=vfre6Z-bgDCwwl3VnpahmGJTBFJVK9HGBrUsjfl2O9o,3135
18
18
  chemotools/datasets/__init__.py,sha256=ojqxb-C_eDmizwUqVCJ8BqJxwULD7_hWCyVIA1uRO0c,116
19
- chemotools/datasets/_base.py,sha256=Z174CaIlpx17Yu8Pg1qZPuHWkS3BYWn7gtOYsoe8zNk,2895
19
+ chemotools/datasets/_base.py,sha256=ftAmf2jHWUW_YQobXCsIFC617PeXwsmZIwAgab9EvL8,4890
20
20
  chemotools/datasets/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  chemotools/datasets/data/coffee_labels.csv,sha256=ZXQWQIf8faLHjdnHfRoXfxMR56kq9Q1BGPZBkQyhGlY,487
22
22
  chemotools/datasets/data/coffee_spectra.csv,sha256=VA-sN4u0hC5iALlRxxkj-K87Lz3b3mmUHBJPoDXychI,2206147
@@ -29,7 +29,7 @@ chemotools/derivative/_norris_william.py,sha256=NKmuo95vNWHQOdcww7APU9Z4s1wWExIR
29
29
  chemotools/derivative/_savitzky_golay.py,sha256=5At4sexJH0RvjkrvVfJvhIfaxXD3vE4Ozq1VClb3qlU,3417
30
30
  chemotools/feature_selection/__init__.py,sha256=p47SuyI7jMpV7kiaAsv2hA20smKf5Yo6447LfrNdDhY,76
31
31
  chemotools/feature_selection/_index_selector.py,sha256=2z2aAyMUOuP7x1n19RV5JGf6ZcM3mtJZby8tEgBOix4,3379
32
- chemotools/feature_selection/_range_cut.py,sha256=HI2OoeQYNph9uBICSA1cF2C_u-0UjTf0FDv5093tTnU,3223
32
+ chemotools/feature_selection/_range_cut.py,sha256=ikWW9FhsbyzijSUYTcx048eOyK65mdbfOuFRF_Ee3rk,3424
33
33
  chemotools/scale/__init__.py,sha256=CQPUPx-8pUeHHbN9p5smFro3xtl_UEE0YeXHLVd7Lfk,118
34
34
  chemotools/scale/_min_max_scaler.py,sha256=-Wnr7zW-zmW6nR5J5yPdBm1KNuQDa9w27Un7rAr-s8E,2806
35
35
  chemotools/scale/_norm_scaler.py,sha256=bjMg1-x2I1xZmmbIgl4vXZZweJV-w3Euta0KGff_2Gk,2363
@@ -48,11 +48,11 @@ chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
48
48
  chemotools/utils/check_inputs.py,sha256=fRAV4HIaGamdj_PNXSNnl7LurXytACNTGO51rhPpMUY,512
49
49
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
50
  tests/fixtures.py,sha256=Xa-Vd62Kd1fyWg3PLUSP6iIkOK8etrbyOkMJTn3dvX8,1933
51
- tests/test_datasets.py,sha256=_3mMDYC-vUnb5BenMqvuhmkHI2PPIdsyq_nNu2ggH20,1055
52
- tests/test_functionality.py,sha256=UhOYEShJZJOwROjNMf3UtXl5MrQBeQQbEMEt0ph7yQ0,21182
51
+ tests/test_datasets.py,sha256=ZdyjSJVX-iJyz8SoRgFfRLP9-ajNEyqWxs00ZfIv0eo,2712
52
+ tests/test_functionality.py,sha256=v8dH7TPA2D-5byl1nwpPW9ejx1Fzd5QsKuQQ4aouCjo,21707
53
53
  tests/test_sklearn_compliance.py,sha256=CRB_0X9HRGj0pOpUCmiSHwJkCsVB-yK_apsyUONmfmw,5856
54
- chemotools-0.1.3.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
55
- chemotools-0.1.3.dist-info/METADATA,sha256=K_8Kuy1_hHBEK3p1WSMLfR0NfHuptAzCa5uijUT6RLc,5018
56
- chemotools-0.1.3.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
57
- chemotools-0.1.3.dist-info/top_level.txt,sha256=eNcNcKSdo-1H_2gwSDrS__dr7BM3R73Cnn-pBiW5FEw,17
58
- chemotools-0.1.3.dist-info/RECORD,,
54
+ chemotools-0.1.5.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
55
+ chemotools-0.1.5.dist-info/METADATA,sha256=s3KJEhQ3jgq6DPl7PW5Hl3x9f5kKyCDi-Cedon48DDA,5071
56
+ chemotools-0.1.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
57
+ chemotools-0.1.5.dist-info/top_level.txt,sha256=eNcNcKSdo-1H_2gwSDrS__dr7BM3R73Cnn-pBiW5FEw,17
58
+ chemotools-0.1.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.3)
2
+ Generator: bdist_wheel (0.42.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
tests/test_datasets.py CHANGED
@@ -1,9 +1,15 @@
1
1
  import pandas as pd
2
+ import polars as pl
3
+ import pytest
2
4
 
3
- from chemotools.datasets import load_coffee, load_fermentation_test, load_fermentation_train
5
+ from chemotools.datasets import (
6
+ load_coffee,
7
+ load_fermentation_test,
8
+ load_fermentation_train,
9
+ )
4
10
 
5
11
 
6
- def test_load_coffee():
12
+ def test_load_coffee_pandas():
7
13
  # Arrange
8
14
 
9
15
  # Act
@@ -16,7 +22,28 @@ def test_load_coffee():
16
22
  assert isinstance(coffee_labels, pd.DataFrame)
17
23
 
18
24
 
19
- def test_load_fermentation_test():
25
+ def test_load_coffee_polars():
26
+ # Arrange
27
+
28
+ # Act
29
+ coffee_spectra, coffee_labels = load_coffee(set_output="polars")
30
+
31
+ # Assert
32
+ assert coffee_spectra.shape == (60, 1841)
33
+ assert coffee_labels.shape == (60, 1)
34
+ assert isinstance(coffee_spectra, pl.DataFrame)
35
+ assert isinstance(coffee_labels, pl.DataFrame)
36
+
37
+
38
+ def test_load_coffee_exception():
39
+ # Arrange
40
+
41
+ # Act and Assert
42
+ with pytest.raises(ValueError):
43
+ coffee_spectra, coffee_labels = load_coffee(set_output="plars")
44
+
45
+
46
+ def test_load_fermentation_test_pandas():
20
47
  # Arrange
21
48
 
22
49
  # Act
@@ -28,7 +55,29 @@ def test_load_fermentation_test():
28
55
  assert isinstance(test_spectra, pd.DataFrame)
29
56
  assert isinstance(test_hplc, pd.DataFrame)
30
57
 
31
- def test_load_fermentation_train():
58
+
59
+ def test_load_fermentation_test_polars():
60
+ # Arrange
61
+
62
+ # Act
63
+ test_spectra, test_hplc = load_fermentation_test(set_output="polars")
64
+
65
+ # Assert
66
+ assert test_spectra.shape == (1629, 1047)
67
+ assert test_hplc.shape == (34, 6)
68
+ assert isinstance(test_spectra, pl.DataFrame)
69
+ assert isinstance(test_hplc, pl.DataFrame)
70
+
71
+
72
+ def test_load_fermentation_test_exception():
73
+ # Arrange
74
+
75
+ # Act and Assert
76
+ with pytest.raises(ValueError):
77
+ test_spectra, test_hplc = load_fermentation_test(set_output="plars")
78
+
79
+
80
+ def test_load_fermentation_train_pandas():
32
81
  # Arrange
33
82
 
34
83
  # Act
@@ -40,4 +89,23 @@ def test_load_fermentation_train():
40
89
  assert isinstance(train_spectra, pd.DataFrame)
41
90
  assert isinstance(train_hplc, pd.DataFrame)
42
91
 
43
-
92
+
93
+ def test_load_fermentation_train_polars():
94
+ # Arrange
95
+
96
+ # Act
97
+ train_spectra, train_hplc = load_fermentation_train(set_output="polars")
98
+
99
+ # Assert
100
+ assert train_spectra.shape == (21, 1047)
101
+ assert train_hplc.shape == (21, 1)
102
+ assert isinstance(train_spectra, pl.DataFrame)
103
+ assert isinstance(train_hplc, pl.DataFrame)
104
+
105
+
106
+ def test_load_fermentation_train_exception():
107
+ # Arrange
108
+
109
+ # Act and Assert
110
+ with pytest.raises(ValueError):
111
+ train_spectra, train_hplc = load_fermentation_train(set_output="plars")
@@ -1,5 +1,6 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
+ import polars as pl
3
4
  import pytest
4
5
 
5
6
  from chemotools.augmentation import (
@@ -622,9 +623,10 @@ def test_range_cut_by_wavenumber_with_list():
622
623
 
623
624
  # Assert
624
625
  assert np.allclose(spectrum_corrected[0], spectrum[0][1:7], atol=1e-8)
626
+ assert range_cut.wavenumbers_ == [2, 3, 4, 5, 6, 7]
625
627
 
626
628
 
627
- def test_range_cut_by_wavenumber_with_dataframe():
629
+ def test_range_cut_by_wavenumber_with_pandas_dataframe():
628
630
  # Arrange
629
631
  wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
630
632
  spectrum = pd.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]]))
@@ -637,6 +639,19 @@ def test_range_cut_by_wavenumber_with_dataframe():
637
639
  assert type(spectrum_corrected) == pd.DataFrame
638
640
 
639
641
 
642
+ def test_range_cut_by_wavenumber_with_polars_dataframe():
643
+ # Arrange
644
+ wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
645
+ spectrum = pl.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]]))
646
+ range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output(transform='polars')
647
+
648
+ # Act
649
+ spectrum_corrected = range_cut.fit_transform(spectrum)
650
+
651
+ # Assert
652
+ assert type(spectrum_corrected) == pl.DataFrame
653
+
654
+
640
655
  def test_robust_normal_variate():
641
656
  # Arrange
642
657
  spectrum = np.array([2, 3.5, 5, 27, 8, 9]).reshape(1, -1)
@@ -740,7 +755,7 @@ def test_subtract_reference_without_reference(spectrum):
740
755
  def test_uniform_noise():
741
756
  # Arrange
742
757
  spectrum = np.ones(10000).reshape(1, -1)
743
- uniform_noise = UniformNoise(low=-1, high=1, random_state=42)
758
+ uniform_noise = UniformNoise(min=-1, max=1, random_state=42)
744
759
 
745
760
  # Act
746
761
  spectrum_corrected = uniform_noise.fit_transform(spectrum)