chemotools 0.1.3__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {chemotools-0.1.3 → chemotools-0.1.5}/PKG-INFO +4 -2
  2. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/augmentation/uniform_noise.py +6 -6
  3. chemotools-0.1.5/chemotools/datasets/_base.py +122 -0
  4. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/feature_selection/_range_cut.py +4 -0
  5. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools.egg-info/PKG-INFO +4 -2
  6. chemotools-0.1.5/chemotools.egg-info/requires.txt +6 -0
  7. {chemotools-0.1.3 → chemotools-0.1.5}/setup.py +3 -1
  8. chemotools-0.1.5/tests/test_datasets.py +111 -0
  9. {chemotools-0.1.3 → chemotools-0.1.5}/tests/test_functionality.py +17 -2
  10. chemotools-0.1.3/chemotools/datasets/_base.py +0 -69
  11. chemotools-0.1.3/chemotools.egg-info/requires.txt +0 -4
  12. chemotools-0.1.3/tests/test_datasets.py +0 -43
  13. {chemotools-0.1.3 → chemotools-0.1.5}/LICENSE +0 -0
  14. {chemotools-0.1.3 → chemotools-0.1.5}/README.md +0 -0
  15. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/__init__.py +0 -0
  16. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/augmentation/__init__.py +0 -0
  17. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/augmentation/baseline_shift.py +0 -0
  18. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/augmentation/exponential_noise.py +0 -0
  19. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/augmentation/index_shift.py +0 -0
  20. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/augmentation/normal_noise.py +0 -0
  21. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/augmentation/spectrum_scale.py +0 -0
  22. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/baseline/__init__.py +0 -0
  23. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/baseline/_air_pls.py +0 -0
  24. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/baseline/_ar_pls.py +0 -0
  25. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/baseline/_constant_baseline_correction.py +0 -0
  26. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/baseline/_cubic_spline_correction.py +0 -0
  27. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/baseline/_linear_correction.py +0 -0
  28. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/baseline/_non_negative.py +0 -0
  29. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/baseline/_polynomial_correction.py +0 -0
  30. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/baseline/_subtract_reference.py +0 -0
  31. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/datasets/__init__.py +0 -0
  32. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/datasets/data/__init__.py +0 -0
  33. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/datasets/data/coffee_labels.csv +0 -0
  34. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/datasets/data/coffee_spectra.csv +0 -0
  35. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/datasets/data/fermentation_hplc.csv +0 -0
  36. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/datasets/data/fermentation_spectra.csv +0 -0
  37. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/datasets/data/train_hplc.csv +0 -0
  38. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/datasets/data/train_spectra.csv +0 -0
  39. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/derivative/__init__.py +0 -0
  40. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/derivative/_norris_william.py +0 -0
  41. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/derivative/_savitzky_golay.py +0 -0
  42. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/feature_selection/__init__.py +0 -0
  43. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/feature_selection/_index_selector.py +0 -0
  44. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/scale/__init__.py +0 -0
  45. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/scale/_min_max_scaler.py +0 -0
  46. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/scale/_norm_scaler.py +0 -0
  47. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/scale/_point_scaler.py +0 -0
  48. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/scatter/__init__.py +0 -0
  49. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/scatter/_extended_multiplicative_scatter_correction.py +0 -0
  50. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/scatter/_multiplicative_scatter_correction.py +0 -0
  51. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/scatter/_robust_normal_variate.py +0 -0
  52. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/scatter/_standard_normal_variate.py +0 -0
  53. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/smooth/__init__.py +0 -0
  54. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/smooth/_mean_filter.py +0 -0
  55. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/smooth/_median_filter.py +0 -0
  56. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/smooth/_savitzky_golay_filter.py +0 -0
  57. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/smooth/_whittaker_smooth.py +0 -0
  58. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/utils/__init__.py +0 -0
  59. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools/utils/check_inputs.py +0 -0
  60. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools.egg-info/SOURCES.txt +0 -0
  61. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools.egg-info/dependency_links.txt +0 -0
  62. {chemotools-0.1.3 → chemotools-0.1.5}/chemotools.egg-info/top_level.txt +0 -0
  63. {chemotools-0.1.3 → chemotools-0.1.5}/pyproject.toml +0 -0
  64. {chemotools-0.1.3 → chemotools-0.1.5}/setup.cfg +0 -0
  65. {chemotools-0.1.3 → chemotools-0.1.5}/tests/__init__.py +0 -0
  66. {chemotools-0.1.3 → chemotools-0.1.5}/tests/fixtures.py +0 -0
  67. {chemotools-0.1.3 → chemotools-0.1.5}/tests/test_sklearn_compliance.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: chemotools
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: Package to integrate chemometrics in scikit-learn pipelines
5
5
  Home-page: https://github.com/paucablop/chemotools
6
6
  Author: Pau Cabaneros Lopez
@@ -14,8 +14,10 @@ Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
15
  Requires-Dist: numpy
16
16
  Requires-Dist: pandas
17
+ Requires-Dist: polars
18
+ Requires-Dist: pyarrow
17
19
  Requires-Dist: scipy
18
- Requires-Dist: scikit-learn
20
+ Requires-Dist: scikit-learn>=1.4.0
19
21
 
20
22
  ![chemotools](assets/images/logo_pixel.png)
21
23
 
@@ -11,10 +11,10 @@ class UniformNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
11
11
 
12
12
  Parameters
13
13
  ----------
14
- low : float, default=0.0
14
+ min : float, default=0.0
15
15
  The lower bound of the uniform distribution.
16
16
 
17
- high : float, default=0.0
17
+ max : float, default=0.0
18
18
  The upper bound of the uniform distribution.
19
19
 
20
20
  random_state : int, default=None
@@ -38,9 +38,9 @@ class UniformNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
38
38
  """
39
39
 
40
40
 
41
- def __init__(self, low: float = 0.0, high: float = 0.0, random_state: int = None):
42
- self.low = low
43
- self.high = high
41
+ def __init__(self, min: float = 0.0, max: float = 0.0, random_state: int = None):
42
+ self.min = min
43
+ self.max = max
44
44
  self.random_state = random_state
45
45
 
46
46
  def fit(self, X: np.ndarray, y=None) -> "UniformNoise":
@@ -109,4 +109,4 @@ class UniformNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
109
109
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
110
110
 
111
111
  def _add_random_noise(self, x) -> np.ndarray:
112
- return x + self._rng.uniform(self.low, self.high, size=x.shape)
112
+ return x + self._rng.uniform(self.min, self.max, size=x.shape)
@@ -0,0 +1,122 @@
1
+ import os
2
+
3
+
4
+ import pandas as pd
5
+ import polars as pl
6
+
7
+ PACKAGE_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
8
+
9
+
10
+ def load_fermentation_train(set_output="pandas"):
11
+ """
12
+ Loads the training data of the fermentation dataset. This data corresponds to a synthetic dataset measured
13
+ off-line. This dataset is designed to represent the variability of real fermentation data.
14
+
15
+ Arguments
16
+ -------
17
+ set_output: str, default='pandas'
18
+ The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
19
+
20
+ Returns
21
+ -------
22
+ train_spectra: pd.DataFrame A pandas DataFrame containing the synthetic spectra measured to train the model.
23
+ train_hplc: pd.DataFrame A pandas DataFrame containing the corresponding reference measurements analyzed with HPLC.
24
+
25
+ References
26
+ -------
27
+ - Cabaneros Lopez Pau, Udugama Isuru A., Thomsen Sune Tjalfe, Roslander Christian, Junicke Helena,
28
+ Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
29
+ A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
30
+ """
31
+ if set_output == "pandas":
32
+ train_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
33
+ train_spectra.columns = train_spectra.columns.astype(float)
34
+ train_hplc = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
35
+ return train_spectra, train_hplc
36
+
37
+ if set_output == "polars":
38
+ train_spectra = pl.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
39
+ train_hplc = pl.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
40
+ return train_spectra, train_hplc
41
+
42
+ else:
43
+ raise ValueError(
44
+ "Invalid value for set_output. Please use 'pandas' or 'polars'."
45
+ )
46
+
47
+
48
+ def load_fermentation_test(set_output="pandas"):
49
+ """
50
+ Loads the testing data of the fermentation dataset. This data corresponds to real fermentation data measured
51
+ on-line during a fermentation process.
52
+
53
+ Arguments
54
+ -------
55
+ set_output: str, default='pandas'
56
+ The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
57
+
58
+ Returns
59
+ -------
60
+ test_spectra: pd.DataFrame A pandas DataFrame containing the on-line spectra measured to train the model.
61
+ test_hplc: pd.DataFrame A pandas DataFrame containing the corresponding HPLC measurements.
62
+
63
+ References
64
+ -------
65
+ - Cabaneros Lopez Pau, Udugama Isuru A., Thomsen Sune Tjalfe, Roslander Christian, Junicke Helena,
66
+ Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
67
+ A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
68
+ """
69
+ if set_output == "pandas":
70
+ fermentation_spectra = pd.read_csv(
71
+ PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
72
+ )
73
+ fermentation_spectra.columns = fermentation_spectra.columns.astype(float)
74
+ fermentation_hplc = pd.read_csv(
75
+ PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv"
76
+ )
77
+ return fermentation_spectra, fermentation_hplc
78
+
79
+ if set_output == "polars":
80
+ fermentation_spectra = pl.read_csv(
81
+ PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
82
+ )
83
+ fermentation_hplc = pl.read_csv(
84
+ PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv"
85
+ )
86
+ return fermentation_spectra, fermentation_hplc
87
+
88
+ else:
89
+ raise ValueError(
90
+ "Invalid value for set_output. Please use 'pandas' or 'polars'."
91
+ )
92
+
93
+
94
+ def load_coffee(set_output="pandas"):
95
+ """
96
+ Loads the coffee dataset. This data corresponds to a coffee spectra from three different origins
97
+ measured off-line using attenuated total reflectance Fourier transform infrared spectroscopy (ATR-FTIR).
98
+
99
+ Arguments
100
+ -------
101
+ set_output: str, default='pandas'
102
+ The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
103
+
104
+ Returns
105
+ -------
106
+ coffee_spectra: pd.DataFrame A pandas DataFrame containing the coffee spectra.
107
+ coffee_labels: pd.DataFrame A pandas DataFrame containing the corresponding labels.
108
+ """
109
+ if set_output == "pandas":
110
+ coffee_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
111
+ coffee_labels = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
112
+ return coffee_spectra, coffee_labels
113
+
114
+ if set_output == "polars":
115
+ coffee_spectra = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
116
+ coffee_labels = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
117
+ return coffee_spectra, coffee_labels
118
+
119
+ else:
120
+ raise ValueError(
121
+ "Invalid value for set_output. Please use 'pandas' or 'polars'."
122
+ )
@@ -34,6 +34,8 @@ class RangeCut(BaseEstimator, SelectorMixin):
34
34
  end_index_ : int
35
35
  The index of the end of the range. It is -1 if the wavenumbers are not provided.
36
36
 
37
+ wavenuumbers_ : array-like
38
+ The cut wavenumbers of the input data.
37
39
 
38
40
  Methods
39
41
  -------
@@ -75,9 +77,11 @@ class RangeCut(BaseEstimator, SelectorMixin):
75
77
  if self.wavenumbers is None:
76
78
  self.start_index_ = self.start
77
79
  self.end_index_ = self.end
80
+ self.wavenumbers_ = None
78
81
  else:
79
82
  self.start_index_ = self._find_index(self.start)
80
83
  self.end_index_ = self._find_index(self.end)
84
+ self.wavenumbers_ = self.wavenumbers[self.start_index_ : self.end_index_]
81
85
 
82
86
  return self
83
87
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: chemotools
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: Package to integrate chemometrics in scikit-learn pipelines
5
5
  Home-page: https://github.com/paucablop/chemotools
6
6
  Author: Pau Cabaneros Lopez
@@ -14,8 +14,10 @@ Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
15
  Requires-Dist: numpy
16
16
  Requires-Dist: pandas
17
+ Requires-Dist: polars
18
+ Requires-Dist: pyarrow
17
19
  Requires-Dist: scipy
18
- Requires-Dist: scikit-learn
20
+ Requires-Dist: scikit-learn>=1.4.0
19
21
 
20
22
  ![chemotools](assets/images/logo_pixel.png)
21
23
 
@@ -0,0 +1,6 @@
1
+ numpy
2
+ pandas
3
+ polars
4
+ pyarrow
5
+ scipy
6
+ scikit-learn>=1.4.0
@@ -27,8 +27,10 @@ setuptools.setup(
27
27
  install_requires=[
28
28
  "numpy",
29
29
  "pandas",
30
+ "polars",
31
+ "pyarrow",
30
32
  "scipy",
31
- "scikit-learn",
33
+ "scikit-learn>=1.4.0",
32
34
  ],
33
35
  include_package_data=True,
34
36
  package_data={'': ['tests/resources/*.csv',
@@ -0,0 +1,111 @@
1
+ import pandas as pd
2
+ import polars as pl
3
+ import pytest
4
+
5
+ from chemotools.datasets import (
6
+ load_coffee,
7
+ load_fermentation_test,
8
+ load_fermentation_train,
9
+ )
10
+
11
+
12
+ def test_load_coffee_pandas():
13
+ # Arrange
14
+
15
+ # Act
16
+ coffee_spectra, coffee_labels = load_coffee()
17
+
18
+ # Assert
19
+ assert coffee_spectra.shape == (60, 1841)
20
+ assert coffee_labels.shape == (60, 1)
21
+ assert isinstance(coffee_spectra, pd.DataFrame)
22
+ assert isinstance(coffee_labels, pd.DataFrame)
23
+
24
+
25
+ def test_load_coffee_polars():
26
+ # Arrange
27
+
28
+ # Act
29
+ coffee_spectra, coffee_labels = load_coffee(set_output="polars")
30
+
31
+ # Assert
32
+ assert coffee_spectra.shape == (60, 1841)
33
+ assert coffee_labels.shape == (60, 1)
34
+ assert isinstance(coffee_spectra, pl.DataFrame)
35
+ assert isinstance(coffee_labels, pl.DataFrame)
36
+
37
+
38
+ def test_load_coffee_exception():
39
+ # Arrange
40
+
41
+ # Act and Assert
42
+ with pytest.raises(ValueError):
43
+ coffee_spectra, coffee_labels = load_coffee(set_output="plars")
44
+
45
+
46
+ def test_load_fermentation_test_pandas():
47
+ # Arrange
48
+
49
+ # Act
50
+ test_spectra, test_hplc = load_fermentation_test()
51
+
52
+ # Assert
53
+ assert test_spectra.shape == (1629, 1047)
54
+ assert test_hplc.shape == (34, 6)
55
+ assert isinstance(test_spectra, pd.DataFrame)
56
+ assert isinstance(test_hplc, pd.DataFrame)
57
+
58
+
59
+ def test_load_fermentation_test_polars():
60
+ # Arrange
61
+
62
+ # Act
63
+ test_spectra, test_hplc = load_fermentation_test(set_output="polars")
64
+
65
+ # Assert
66
+ assert test_spectra.shape == (1629, 1047)
67
+ assert test_hplc.shape == (34, 6)
68
+ assert isinstance(test_spectra, pl.DataFrame)
69
+ assert isinstance(test_hplc, pl.DataFrame)
70
+
71
+
72
+ def test_load_fermentation_test_exception():
73
+ # Arrange
74
+
75
+ # Act and Assert
76
+ with pytest.raises(ValueError):
77
+ test_spectra, test_hplc = load_fermentation_test(set_output="plars")
78
+
79
+
80
+ def test_load_fermentation_train_pandas():
81
+ # Arrange
82
+
83
+ # Act
84
+ train_spectra, train_hplc = load_fermentation_train()
85
+
86
+ # Assert
87
+ assert train_spectra.shape == (21, 1047)
88
+ assert train_hplc.shape == (21, 1)
89
+ assert isinstance(train_spectra, pd.DataFrame)
90
+ assert isinstance(train_hplc, pd.DataFrame)
91
+
92
+
93
+ def test_load_fermentation_train_polars():
94
+ # Arrange
95
+
96
+ # Act
97
+ train_spectra, train_hplc = load_fermentation_train(set_output="polars")
98
+
99
+ # Assert
100
+ assert train_spectra.shape == (21, 1047)
101
+ assert train_hplc.shape == (21, 1)
102
+ assert isinstance(train_spectra, pl.DataFrame)
103
+ assert isinstance(train_hplc, pl.DataFrame)
104
+
105
+
106
+ def test_load_fermentation_train_exception():
107
+ # Arrange
108
+
109
+ # Act and Assert
110
+ with pytest.raises(ValueError):
111
+ train_spectra, train_hplc = load_fermentation_train(set_output="plars")
@@ -1,5 +1,6 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
+ import polars as pl
3
4
  import pytest
4
5
 
5
6
  from chemotools.augmentation import (
@@ -622,9 +623,10 @@ def test_range_cut_by_wavenumber_with_list():
622
623
 
623
624
  # Assert
624
625
  assert np.allclose(spectrum_corrected[0], spectrum[0][1:7], atol=1e-8)
626
+ assert range_cut.wavenumbers_ == [2, 3, 4, 5, 6, 7]
625
627
 
626
628
 
627
- def test_range_cut_by_wavenumber_with_dataframe():
629
+ def test_range_cut_by_wavenumber_with_pandas_dataframe():
628
630
  # Arrange
629
631
  wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
630
632
  spectrum = pd.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]]))
@@ -637,6 +639,19 @@ def test_range_cut_by_wavenumber_with_dataframe():
637
639
  assert type(spectrum_corrected) == pd.DataFrame
638
640
 
639
641
 
642
+ def test_range_cut_by_wavenumber_with_polars_dataframe():
643
+ # Arrange
644
+ wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
645
+ spectrum = pl.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]]))
646
+ range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output(transform='polars')
647
+
648
+ # Act
649
+ spectrum_corrected = range_cut.fit_transform(spectrum)
650
+
651
+ # Assert
652
+ assert type(spectrum_corrected) == pl.DataFrame
653
+
654
+
640
655
  def test_robust_normal_variate():
641
656
  # Arrange
642
657
  spectrum = np.array([2, 3.5, 5, 27, 8, 9]).reshape(1, -1)
@@ -740,7 +755,7 @@ def test_subtract_reference_without_reference(spectrum):
740
755
  def test_uniform_noise():
741
756
  # Arrange
742
757
  spectrum = np.ones(10000).reshape(1, -1)
743
- uniform_noise = UniformNoise(low=-1, high=1, random_state=42)
758
+ uniform_noise = UniformNoise(min=-1, max=1, random_state=42)
744
759
 
745
760
  # Act
746
761
  spectrum_corrected = uniform_noise.fit_transform(spectrum)
@@ -1,69 +0,0 @@
1
- import pandas as pd
2
- import os
3
-
4
- PACKAGE_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
5
-
6
-
7
- def load_fermentation_train():
8
- """
9
- Loads the training data of the fermentation dataset. This data corresponds to a synthetic dataset measured
10
- off-line. This dataset is designed to represent the variability of real fermentation data.
11
-
12
- Returns
13
- -------
14
- train_spectra: pd.DataFrame A pandas DataFrame containing the synthetic spectra measured to train the model.
15
- train_hplc: pd.DataFrame A pandas DataFrame containing the corresponding reference measurements analyzed with HPLC.
16
-
17
- References
18
- -------
19
- - Cabaneros Lopez Pau, Udugama Isuru A., Thomsen Sune Tjalfe, Roslander Christian, Junicke Helena,
20
- Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
21
- A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
22
- """
23
- train_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
24
- train_spectra.columns = train_spectra.columns.astype(float)
25
- train_hplc = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
26
-
27
- return train_spectra, train_hplc
28
-
29
-
30
- def load_fermentation_test():
31
- """
32
- Loads the testing data of the fermentation dataset. This data corresponds to real fermentation data measured
33
- on-line during a fermentation process.
34
-
35
- Returns
36
- -------
37
- test_spectra: pd.DataFrame A pandas DataFrame containing the on-line spectra measured to train the model.
38
- test_hplc: pd.DataFrame A pandas DataFrame containing the corresponding HPLC measurements.
39
-
40
- References
41
- -------
42
- - Cabaneros Lopez Pau, Udugama Isuru A., Thomsen Sune Tjalfe, Roslander Christian, Junicke Helena,
43
- Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
44
- A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
45
- """
46
- fermentation_spectra = pd.read_csv(
47
- PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
48
- )
49
- fermentation_spectra.columns = fermentation_spectra.columns.astype(float)
50
- fermentation_hplc = pd.read_csv(PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv")
51
-
52
- return fermentation_spectra, fermentation_hplc
53
-
54
-
55
- def load_coffee():
56
- """
57
- Loads the coffee dataset. This data corresponds to a coffee spectra from three different origins
58
- measured off-line using attenuated total reflectance Fourier transform infrared spectroscopy (ATR-FTIR).
59
-
60
- Returns
61
- -------
62
- coffee_spectra: pd.DataFrame A pandas DataFrame containing the coffee spectra.
63
- coffee_labels: pd.DataFrame A pandas DataFrame containing the corresponding labels.
64
- """
65
-
66
- coffee_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
67
- coffee_labels = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
68
-
69
- return coffee_spectra, coffee_labels
@@ -1,4 +0,0 @@
1
- numpy
2
- pandas
3
- scipy
4
- scikit-learn
@@ -1,43 +0,0 @@
1
- import pandas as pd
2
-
3
- from chemotools.datasets import load_coffee, load_fermentation_test, load_fermentation_train
4
-
5
-
6
- def test_load_coffee():
7
- # Arrange
8
-
9
- # Act
10
- coffee_spectra, coffee_labels = load_coffee()
11
-
12
- # Assert
13
- assert coffee_spectra.shape == (60, 1841)
14
- assert coffee_labels.shape == (60, 1)
15
- assert isinstance(coffee_spectra, pd.DataFrame)
16
- assert isinstance(coffee_labels, pd.DataFrame)
17
-
18
-
19
- def test_load_fermentation_test():
20
- # Arrange
21
-
22
- # Act
23
- test_spectra, test_hplc = load_fermentation_test()
24
-
25
- # Assert
26
- assert test_spectra.shape == (1629, 1047)
27
- assert test_hplc.shape == (34, 6)
28
- assert isinstance(test_spectra, pd.DataFrame)
29
- assert isinstance(test_hplc, pd.DataFrame)
30
-
31
- def test_load_fermentation_train():
32
- # Arrange
33
-
34
- # Act
35
- train_spectra, train_hplc = load_fermentation_train()
36
-
37
- # Assert
38
- assert train_spectra.shape == (21, 1047)
39
- assert train_hplc.shape == (21, 1)
40
- assert isinstance(train_spectra, pd.DataFrame)
41
- assert isinstance(train_hplc, pd.DataFrame)
42
-
43
-
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes