chemotools 0.0.22__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. chemotools/augmentation/__init__.py +16 -0
  2. chemotools/augmentation/baseline_shift.py +119 -0
  3. chemotools/augmentation/exponential_noise.py +117 -0
  4. chemotools/augmentation/index_shift.py +120 -0
  5. chemotools/augmentation/normal_noise.py +118 -0
  6. chemotools/augmentation/spectrum_scale.py +120 -0
  7. chemotools/augmentation/uniform_noise.py +124 -0
  8. chemotools/baseline/__init__.py +20 -8
  9. chemotools/baseline/{air_pls.py → _air_pls.py} +20 -32
  10. chemotools/baseline/{ar_pls.py → _ar_pls.py} +18 -31
  11. chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +37 -31
  12. chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +26 -19
  13. chemotools/baseline/{linear_correction.py → _linear_correction.py} +19 -28
  14. chemotools/baseline/{non_negative.py → _non_negative.py} +15 -23
  15. chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +29 -31
  16. chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +23 -27
  17. chemotools/datasets/__init__.py +5 -0
  18. chemotools/datasets/_base.py +122 -0
  19. chemotools/datasets/data/coffee_labels.csv +61 -0
  20. chemotools/datasets/data/coffee_spectra.csv +61 -0
  21. chemotools/datasets/data/fermentation_hplc.csv +35 -0
  22. chemotools/datasets/data/fermentation_spectra.csv +1630 -0
  23. chemotools/datasets/data/train_hplc.csv +22 -0
  24. chemotools/datasets/data/train_spectra.csv +22 -0
  25. chemotools/derivative/__init__.py +4 -2
  26. chemotools/derivative/{norris_william.py → _norris_william.py} +20 -25
  27. chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +26 -36
  28. chemotools/feature_selection/__init__.py +4 -0
  29. chemotools/feature_selection/_index_selector.py +113 -0
  30. chemotools/feature_selection/_range_cut.py +111 -0
  31. chemotools/scale/__init__.py +5 -3
  32. chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +36 -39
  33. chemotools/scale/{norm_scaler.py → _norm_scaler.py} +18 -25
  34. chemotools/scale/_point_scaler.py +115 -0
  35. chemotools/scatter/__init__.py +13 -2
  36. chemotools/scatter/_extended_multiplicative_scatter_correction.py +183 -0
  37. chemotools/scatter/_multiplicative_scatter_correction.py +169 -0
  38. chemotools/scatter/_robust_normal_variate.py +101 -0
  39. chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +21 -26
  40. chemotools/smooth/__init__.py +6 -4
  41. chemotools/smooth/{mean_filter.py → _mean_filter.py} +18 -25
  42. chemotools/smooth/{median_filter.py → _median_filter.py} +32 -24
  43. chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +22 -24
  44. chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +24 -29
  45. {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/METADATA +19 -15
  46. chemotools-0.1.6.dist-info/RECORD +51 -0
  47. {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/WHEEL +1 -2
  48. chemotools/scale/index_scaler.py +0 -97
  49. chemotools/scatter/extended_multiplicative_scatter_correction.py +0 -33
  50. chemotools/scatter/multiplicative_scatter_correction.py +0 -123
  51. chemotools/utils/check_inputs.py +0 -14
  52. chemotools/variable_selection/__init__.py +0 -1
  53. chemotools/variable_selection/range_cut.py +0 -121
  54. chemotools-0.0.22.dist-info/RECORD +0 -39
  55. chemotools-0.0.22.dist-info/top_level.txt +0 -2
  56. tests/fixtures.py +0 -89
  57. tests/test_functionality.py +0 -397
  58. tests/test_sklearn_compliance.py +0 -192
  59. {tests → chemotools/datasets/data}/__init__.py +0 -0
  60. {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/LICENSE +0 -0
@@ -1,11 +1,9 @@
1
1
  import numpy as np
2
2
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
3
+ from sklearn.utils.validation import check_is_fitted, validate_data
4
4
 
5
- from chemotools.utils.check_inputs import check_input
6
5
 
7
-
8
- class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
6
+ class NonNegative(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
7
  """
10
8
  A transformer that sets all negative values to zero or to abs.
11
9
 
@@ -14,14 +12,6 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
14
12
  mode : str, optional
15
13
  The mode to use for the non-negative values. Can be "zero" or "abs".
16
14
 
17
- Attributes
18
- ----------
19
- n_features_in_ : int
20
- The number of features in the input data.
21
-
22
- _is_fitted : bool
23
- Whether the transformer has been fitted to data.
24
-
25
15
  Methods
26
16
  -------
27
17
  fit(X, y=None)
@@ -52,14 +42,9 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
52
42
  The fitted transformer.
53
43
  """
54
44
  # Check that X is a 2D array and has only finite values
55
- X = check_input(X)
56
-
57
- # Set the number of features
58
- self.n_features_in_ = X.shape[1]
59
-
60
- # Set the fitted attribute to True
61
- self._is_fitted = True
62
-
45
+ X = validate_data(
46
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
47
+ )
63
48
  return self
64
49
 
65
50
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -80,11 +65,18 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
80
65
  The transformed data.
81
66
  """
82
67
  # Check that the estimator is fitted
83
- check_is_fitted(self, "_is_fitted")
68
+ check_is_fitted(self, "n_features_in_")
84
69
 
85
70
  # Check that X is a 2D array and has only finite values
86
- X = check_input(X)
87
- X_ = X.copy()
71
+ X_ = validate_data(
72
+ self,
73
+ X,
74
+ y="no_validation",
75
+ ensure_2d=True,
76
+ copy=True,
77
+ reset=False,
78
+ dtype=np.float64,
79
+ )
88
80
 
89
81
  # Check that the number of features is the same as the fitted data
90
82
  if X_.shape[1] != self.n_features_in_:
@@ -1,12 +1,13 @@
1
+ from typing import Optional
2
+
1
3
  import numpy as np
2
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
4
6
 
5
- from chemotools.utils.check_inputs import check_input
6
7
 
7
- class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
8
+ class PolynomialCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
8
9
  """
9
- A transformer that subtracts a polynomial baseline from the input data. The polynomial is
10
+ A transformer that subtracts a polynomial baseline from the input data. The polynomial is
10
11
  fitted to the points in the spectrum specified by the indices parameter.
11
12
 
12
13
  Parameters
@@ -18,14 +19,6 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
18
19
  The indices of the points in the spectrum to fit the polynomial to. Defaults to None,
19
20
  which fits the polynomial to all points in the spectrum (equivalent to detrend).
20
21
 
21
- Attributes
22
- ----------
23
- n_features_in_ : int
24
- The number of features in the input data.
25
-
26
- _is_fitted : bool
27
- Whether the transformer has been fitted to data.
28
-
29
22
  Methods
30
23
  -------
31
24
  fit(X, y=None)
@@ -37,7 +30,8 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
37
30
  _baseline_correct_spectrum(x)
38
31
  Subtract the polynomial baseline from a single spectrum.
39
32
  """
40
- def __init__(self, order: int = 1, indices: list = None) -> None:
33
+
34
+ def __init__(self, order: int = 1, indices: Optional[list] = None) -> None:
41
35
  self.order = order
42
36
  self.indices = indices
43
37
 
@@ -59,22 +53,17 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
59
53
  The fitted transformer.
60
54
  """
61
55
  # Check that X is a 2D array and has only finite values
62
- X = check_input(X)
63
-
64
- # Set the number of features
65
- self.n_features_in_ = X.shape[1]
66
-
67
- # Set the fitted attribute to True
68
- self._is_fitted = True
69
-
56
+ X = validate_data(
57
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
58
+ )
70
59
  if self.indices is None:
71
- self.indices_ = range(0, len(X[0]))
60
+ self.indices_ = list(range(0, len(X[0])))
72
61
  else:
73
62
  self.indices_ = self.indices
74
63
 
75
64
  return self
76
-
77
- def transform(self, X: np.ndarray, y:int=0, copy:bool=True) -> np.ndarray:
65
+
66
+ def transform(self, X: np.ndarray, y: int = 0, copy: bool = True) -> np.ndarray:
78
67
  """
79
68
  Transform the input data by subtracting the polynomial baseline.
80
69
 
@@ -95,21 +84,30 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
95
84
  The transformed data.
96
85
  """
97
86
  # Check that the estimator is fitted
98
- check_is_fitted(self, "_is_fitted")
87
+ check_is_fitted(self, "n_features_in_")
99
88
 
100
89
  # Check that X is a 2D array and has only finite values
101
- X = check_input(X)
102
- X_ = X.copy()
90
+ X_ = validate_data(
91
+ self,
92
+ X,
93
+ y="no_validation",
94
+ ensure_2d=True,
95
+ copy=True,
96
+ reset=False,
97
+ dtype=np.float64,
98
+ )
103
99
 
104
100
  # Check that the number of features is the same as the fitted data
105
101
  if X_.shape[1] != self.n_features_in_:
106
- raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
102
+ raise ValueError(
103
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
104
+ )
107
105
 
108
106
  # Calculate polynomial baseline correction
109
107
  for i, x in enumerate(X_):
110
108
  X_[i] = self._baseline_correct_spectrum(x)
111
109
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
112
-
110
+
113
111
  def _baseline_correct_spectrum(self, x: np.ndarray) -> np.ndarray:
114
112
  """
115
113
  Subtract the polynomial baseline from a single spectrum.
@@ -126,5 +124,5 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
126
124
  """
127
125
  intensity = x[self.indices_]
128
126
  poly = np.polyfit(self.indices_, intensity, self.order)
129
- baseline = [np.polyval(poly, i) for i in range(0, len(x))]
130
- return x - baseline
127
+ baseline = [np.polyval(poly, i) for i in range(0, len(x))]
128
+ return x - baseline
@@ -1,11 +1,11 @@
1
+ from typing import Optional
2
+
1
3
  import numpy as np
2
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
4
-
5
- from chemotools.utils.check_inputs import check_input
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
6
 
7
7
 
8
- class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
8
+ class SubtractReference(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
9
  """
10
10
  A transformer that subtracts a reference spectrum from the input data.
11
11
 
@@ -15,14 +15,6 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
15
15
  The reference spectrum to subtract from the input data. If None, the original spectrum
16
16
  is returned.
17
17
 
18
- Attributes
19
- ----------
20
- n_features_in_ : int
21
- The number of features in the input data.
22
-
23
- _is_fitted : bool
24
- Whether the transformer has been fitted to data.
25
-
26
18
  Methods
27
19
  -------
28
20
  fit(X, y=None)
@@ -34,9 +26,10 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
34
26
  _subtract_reference(x)
35
27
  Subtract the reference spectrum from a single spectrum.
36
28
  """
29
+
37
30
  def __init__(
38
31
  self,
39
- reference: np.ndarray = None,
32
+ reference: Optional[np.ndarray] = None,
40
33
  ):
41
34
  self.reference = reference
42
35
 
@@ -58,20 +51,14 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
58
51
  The fitted transformer.
59
52
  """
60
53
  # Check that X is a 2D array and has only finite values
61
- X = check_input(X)
62
-
63
- # Set the number of features
64
- self.n_features_in_ = X.shape[1]
65
-
66
- # Set the fitted attribute to True
67
- self._is_fitted = True
68
-
54
+ X = validate_data(
55
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
56
+ )
69
57
  # Set the reference
70
-
71
58
  if self.reference is not None:
72
59
  self.reference_ = self.reference.copy()
73
60
  return self
74
-
61
+
75
62
  return self
76
63
 
77
64
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -92,15 +79,24 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
92
79
  The transformed data.
93
80
  """
94
81
  # Check that the estimator is fitted
95
- check_is_fitted(self, "_is_fitted")
82
+ check_is_fitted(self, "n_features_in_")
96
83
 
97
84
  # Check that X is a 2D array and has only finite values
98
- X = check_input(X)
99
- X_ = X.copy()
85
+ X_ = validate_data(
86
+ self,
87
+ X,
88
+ y="no_validation",
89
+ ensure_2d=True,
90
+ copy=True,
91
+ reset=False,
92
+ dtype=np.float64,
93
+ )
100
94
 
101
95
  # Check that the number of features is the same as the fitted data
102
96
  if X_.shape[1] != self.n_features_in_:
103
- raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
97
+ raise ValueError(
98
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
99
+ )
104
100
 
105
101
  if self.reference is None:
106
102
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
@@ -0,0 +1,5 @@
1
+ from ._base import load_coffee
2
+ from ._base import load_fermentation_train
3
+ from ._base import load_fermentation_test
4
+
5
+ __all__ = ["load_coffee", "load_fermentation_train", "load_fermentation_test"]
@@ -0,0 +1,122 @@
1
+ import os
2
+
3
+
4
+ import pandas as pd
5
+ import polars as pl
6
+
7
+ PACKAGE_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
8
+
9
+
10
+ def load_fermentation_train(set_output="pandas"):
11
+ """
12
+ Loads the training data of the fermentation dataset. This data corresponds to a synthetic dataset measured
13
+ off-line. This dataset is designed to represent the variability of real fermentation data.
14
+
15
+ Arguments
16
+ -------
17
+ set_output: str, default='pandas'
18
+ The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
19
+
20
+ Returns
21
+ -------
22
+ train_spectra: pd.DataFrame A pandas DataFrame containing the synthetic spectra measured to train the model.
23
+ train_hplc: pd.DataFrame A pandas DataFrame containing the corresponding reference measurements analyzed with HPLC.
24
+
25
+ References
26
+ -------
27
+ - Cabaneros Lopez Pau, Udugama Isuru A., Thomsen Sune Tjalfe, Roslander Christian, Junicke Helena,
28
+ Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
29
+ A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
30
+ """
31
+ if set_output == "pandas":
32
+ train_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
33
+ train_spectra.columns = train_spectra.columns.astype(float)
34
+ train_hplc = pd.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
35
+ return train_spectra, train_hplc
36
+
37
+ if set_output == "polars":
38
+ train_spectra = pl.read_csv(PACKAGE_DIRECTORY + "/data/train_spectra.csv")
39
+ train_hplc = pl.read_csv(PACKAGE_DIRECTORY + "/data/train_hplc.csv")
40
+ return train_spectra, train_hplc
41
+
42
+ else:
43
+ raise ValueError(
44
+ "Invalid value for set_output. Please use 'pandas' or 'polars'."
45
+ )
46
+
47
+
48
+ def load_fermentation_test(set_output="pandas"):
49
+ """
50
+ Loads the testing data of the fermentation dataset. This data corresponds to real fermentation data measured
51
+ on-line during a fermentation process.
52
+
53
+ Arguments
54
+ -------
55
+ set_output: str, default='pandas'
56
+ The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
57
+
58
+ Returns
59
+ -------
60
+ test_spectra: pd.DataFrame A pandas DataFrame containing the on-line spectra measured to train the model.
61
+ test_hplc: pd.DataFrame A pandas DataFrame containing the corresponding HPLC measurements.
62
+
63
+ References
64
+ -------
65
+ - Cabaneros Lopez Pau, Udugama Isuru A., Thomsen Sune Tjalfe, Roslander Christian, Junicke Helena,
66
+ Mauricio Iglesias Miguel, Gernaey Krist V. Transforming data into information:
67
+ A parallel hybrid model for real-time state estimation in lignocellulose ethanol fermentations.
68
+ """
69
+ if set_output == "pandas":
70
+ fermentation_spectra = pd.read_csv(
71
+ PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
72
+ )
73
+ fermentation_spectra.columns = fermentation_spectra.columns.astype(float)
74
+ fermentation_hplc = pd.read_csv(
75
+ PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv"
76
+ )
77
+ return fermentation_spectra, fermentation_hplc
78
+
79
+ if set_output == "polars":
80
+ fermentation_spectra = pl.read_csv(
81
+ PACKAGE_DIRECTORY + "/data/fermentation_spectra.csv"
82
+ )
83
+ fermentation_hplc = pl.read_csv(
84
+ PACKAGE_DIRECTORY + "/data/fermentation_hplc.csv"
85
+ )
86
+ return fermentation_spectra, fermentation_hplc
87
+
88
+ else:
89
+ raise ValueError(
90
+ "Invalid value for set_output. Please use 'pandas' or 'polars'."
91
+ )
92
+
93
+
94
+ def load_coffee(set_output="pandas"):
95
+ """
96
+ Loads the coffee dataset. This data corresponds to a coffee spectra from three different origins
97
+ measured off-line using attenuated total reflectance Fourier transform infrared spectroscopy (ATR-FTIR).
98
+
99
+ Arguments
100
+ -------
101
+ set_output: str, default='pandas'
102
+ The output format of the data. It can be 'pandas' or 'polars'. If 'polars', the data is returned as a polars DataFrame.
103
+
104
+ Returns
105
+ -------
106
+ coffee_spectra: pd.DataFrame A pandas DataFrame containing the coffee spectra.
107
+ coffee_labels: pd.DataFrame A pandas DataFrame containing the corresponding labels.
108
+ """
109
+ if set_output == "pandas":
110
+ coffee_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
111
+ coffee_labels = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
112
+ return coffee_spectra, coffee_labels
113
+
114
+ if set_output == "polars":
115
+ coffee_spectra = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
116
+ coffee_labels = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
117
+ return coffee_spectra, coffee_labels
118
+
119
+ else:
120
+ raise ValueError(
121
+ "Invalid value for set_output. Please use 'pandas' or 'polars'."
122
+ )
@@ -0,0 +1,61 @@
1
+ labels
2
+ Ethiopia
3
+ Ethiopia
4
+ Ethiopia
5
+ Ethiopia
6
+ Ethiopia
7
+ Ethiopia
8
+ Ethiopia
9
+ Ethiopia
10
+ Ethiopia
11
+ Ethiopia
12
+ Ethiopia
13
+ Ethiopia
14
+ Ethiopia
15
+ Ethiopia
16
+ Ethiopia
17
+ Ethiopia
18
+ Ethiopia
19
+ Ethiopia
20
+ Ethiopia
21
+ Ethiopia
22
+ Brasil
23
+ Brasil
24
+ Brasil
25
+ Brasil
26
+ Brasil
27
+ Brasil
28
+ Brasil
29
+ Brasil
30
+ Brasil
31
+ Brasil
32
+ Brasil
33
+ Brasil
34
+ Brasil
35
+ Brasil
36
+ Brasil
37
+ Brasil
38
+ Brasil
39
+ Brasil
40
+ Brasil
41
+ Brasil
42
+ Vietnam
43
+ Vietnam
44
+ Vietnam
45
+ Vietnam
46
+ Vietnam
47
+ Vietnam
48
+ Vietnam
49
+ Vietnam
50
+ Vietnam
51
+ Vietnam
52
+ Vietnam
53
+ Vietnam
54
+ Vietnam
55
+ Vietnam
56
+ Vietnam
57
+ Vietnam
58
+ Vietnam
59
+ Vietnam
60
+ Vietnam
61
+ Vietnam