chemotools 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemotools/datasets/__init__.py +2 -0
- chemotools/datasets/_base.py +52 -0
- chemotools/datasets/data/__init__.py +0 -0
- chemotools/datasets/data/fermentation_hplc.csv +35 -0
- chemotools/datasets/data/fermentation_spectra.csv +1630 -0
- chemotools/datasets/data/train_hplc.csv +22 -0
- chemotools/datasets/data/train_spectra.csv +22 -0
- chemotools/derivative/norris_william.py +3 -1
- chemotools/scatter/__init__.py +2 -0
- chemotools/scatter/extended_multiplicative_scatter_correction.py +187 -28
- chemotools/scatter/multiplicative_scatter_correction.py +60 -16
- chemotools/scatter/robust_normal_variate.py +109 -0
- {chemotools-0.0.22.dist-info → chemotools-0.0.24.dist-info}/METADATA +2 -1
- {chemotools-0.0.22.dist-info → chemotools-0.0.24.dist-info}/RECORD +20 -11
- {chemotools-0.0.22.dist-info → chemotools-0.0.24.dist-info}/WHEEL +1 -1
- tests/test_datasets.py +30 -0
- tests/test_functionality.py +174 -5
- tests/test_sklearn_compliance.py +20 -1
- {chemotools-0.0.22.dist-info → chemotools-0.0.24.dist-info}/LICENSE +0 -0
- {chemotools-0.0.22.dist-info → chemotools-0.0.24.dist-info}/top_level.txt +0 -0
@@ -1,33 +1,192 @@
|
|
1
1
|
import numpy as np
|
2
|
-
from sklearn.base import BaseEstimator, TransformerMixin
|
2
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
+
from sklearn.preprocessing import StandardScaler
|
3
4
|
from sklearn.utils.validation import check_is_fitted
|
4
5
|
|
5
6
|
from chemotools.utils.check_inputs import check_input
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
8
|
+
|
9
|
+
class ExtendedMultiplicativeScatterCorrection(
|
10
|
+
OneToOneFeatureMixin, BaseEstimator, TransformerMixin
|
11
|
+
):
|
12
|
+
"""Extended multiplicative scatter correction (EMSC) is a preprocessing technique for
|
13
|
+
removing non linear scatter effects from spectra. It is based on fitting a polynomial
|
14
|
+
regression model to the spectrum using a reference spectrum. The reference spectrum
|
15
|
+
can be the mean or median spectrum of a set of spectra or a selected reerence.
|
16
|
+
|
17
|
+
Note that this implementation does not include further extensions of the model using
|
18
|
+
orthogonal subspace models.
|
19
|
+
|
20
|
+
Parameters
|
21
|
+
----------
|
22
|
+
reference : np.ndarray, optional
|
23
|
+
The reference spectrum to use for the correction. If None, the mean
|
24
|
+
spectrum will be used. The default is None.
|
25
|
+
use_mean : bool, optional
|
26
|
+
Whether to use the mean spectrum as the reference. The default is True.
|
27
|
+
use_median : bool, optional
|
28
|
+
Whether to use the median spectrum as the reference. The default is False.
|
29
|
+
order : int, optional
|
30
|
+
The order of the polynomial to fit to the spectrum. The default is 2.
|
31
|
+
weights : np.ndarray, optional
|
32
|
+
The weights to use for the weighted EMSC. If None, the standard EMSC
|
33
|
+
will be used. The default is None.
|
34
|
+
|
35
|
+
|
36
|
+
Attributes
|
37
|
+
----------
|
38
|
+
reference_ : np.ndarray
|
39
|
+
The reference spectrum used for the correction.
|
40
|
+
n_features_in_ : int
|
41
|
+
The number of features in the training data.
|
42
|
+
|
43
|
+
References
|
44
|
+
----------
|
45
|
+
Nils Kristian Afseth, Achim Kohler. Extended multiplicative signal correction
|
46
|
+
in vibrational spectroscopy, a tutorial, doi:10.1016/j.chemolab.2012.03.004
|
47
|
+
|
48
|
+
Valeria Tafintseva et al. Correcting replicate variation in spectroscopic data by machine learning and
|
49
|
+
model-based pre-processing, doi:10.1016/j.chemolab.2021.104350
|
50
|
+
"""
|
51
|
+
|
52
|
+
def __init__(
|
53
|
+
self,
|
54
|
+
reference: np.ndarray = None,
|
55
|
+
use_mean: bool = True,
|
56
|
+
use_median: bool = False,
|
57
|
+
order: int = 2,
|
58
|
+
weights: np.ndarray = None,
|
59
|
+
):
|
60
|
+
self.reference = reference
|
61
|
+
self.use_mean = use_mean
|
62
|
+
self.use_median = use_median
|
63
|
+
self.order = order
|
64
|
+
self.weights = weights
|
65
|
+
|
66
|
+
def fit(self, X: np.ndarray, y=None) -> "ExtendedMultiplicativeScatterCorrection":
|
67
|
+
"""
|
68
|
+
Fit the transformer to the input data. If no reference is provided, the
|
69
|
+
mean or median spectrum will be calculated from the input data.
|
70
|
+
|
71
|
+
Parameters
|
72
|
+
----------
|
73
|
+
X : np.ndarray of shape (n_samples, n_features)
|
74
|
+
The input data to fit the transformer to.
|
75
|
+
|
76
|
+
y : None
|
77
|
+
Ignored.
|
78
|
+
|
79
|
+
Returns
|
80
|
+
-------
|
81
|
+
self : MultiplicativeScatterCorrection
|
82
|
+
The fitted transformer.
|
83
|
+
"""
|
84
|
+
# Check that X is a 2D array and has only finite values
|
85
|
+
X = check_input(X)
|
86
|
+
|
87
|
+
# Set the number of features
|
88
|
+
self.n_features_in_ = X.shape[1]
|
89
|
+
|
90
|
+
# Set the fitted attribute to True
|
91
|
+
self._is_fitted = True
|
92
|
+
|
93
|
+
# Check that the length of the reference is the same as the number of features
|
94
|
+
if self.reference is not None:
|
95
|
+
if len(self.reference) != self.n_features_in_:
|
96
|
+
raise ValueError(
|
97
|
+
f"Expected {self.n_features_in_} features in reference but got {len(self.reference)}"
|
98
|
+
)
|
99
|
+
|
100
|
+
if self.weights is not None:
|
101
|
+
if len(self.weights) != self.n_features_in_:
|
102
|
+
raise ValueError(
|
103
|
+
f"Expected {self.n_features_in_} features in weights but got {len(self.weights)}"
|
104
|
+
)
|
105
|
+
|
106
|
+
# Set the reference
|
107
|
+
if self.reference is not None:
|
108
|
+
self.reference_ = np.array(self.reference)
|
109
|
+
self.indices_ = self._calculate_indices(self.reference_)
|
110
|
+
self.A_ = self._calculate_A(self.indices_, self.reference_)
|
111
|
+
self.weights_ = np.array(self.weights)
|
112
|
+
return self
|
113
|
+
|
114
|
+
if self.use_median:
|
115
|
+
self.reference_ = np.median(X, axis=0)
|
116
|
+
self.indices_ = self._calculate_indices(X[0])
|
117
|
+
self.A_ = self._calculate_A(self.indices_, self.reference_)
|
118
|
+
self.weights_ = np.array(self.weights)
|
119
|
+
return self
|
120
|
+
|
121
|
+
if self.use_mean:
|
122
|
+
self.reference_ = X.mean(axis=0)
|
123
|
+
self.indices_ = self._calculate_indices(X[0])
|
124
|
+
self.A_ = self._calculate_A(self.indices_, self.reference_)
|
125
|
+
self.weights_ = np.array(self.weights)
|
126
|
+
return self
|
127
|
+
|
128
|
+
raise ValueError("No reference was provided")
|
129
|
+
|
130
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
131
|
+
"""
|
132
|
+
Transform the input data by applying the multiplicative scatter
|
133
|
+
correction.
|
134
|
+
|
135
|
+
Parameters
|
136
|
+
----------
|
137
|
+
X : np.ndarray of shape (n_samples, n_features)
|
138
|
+
The input data to transform.
|
139
|
+
|
140
|
+
y : None
|
141
|
+
Ignored.
|
142
|
+
|
143
|
+
Returns
|
144
|
+
-------
|
145
|
+
X_ : np.ndarray of shape (n_samples, n_features)
|
146
|
+
The transformed data.
|
147
|
+
"""
|
148
|
+
# Check that the estimator is fitted
|
149
|
+
check_is_fitted(self, "_is_fitted")
|
150
|
+
|
151
|
+
# Check that X is a 2D array and has only finite values
|
152
|
+
X = check_input(X)
|
153
|
+
X_ = X.copy()
|
154
|
+
|
155
|
+
# Check that the number of features is the same as the fitted data
|
156
|
+
if X_.shape[1] != self.n_features_in_:
|
157
|
+
raise ValueError(
|
158
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
159
|
+
)
|
160
|
+
|
161
|
+
# Calculate the extended multiplicative scatter correction
|
162
|
+
X_ = X.copy()
|
163
|
+
|
164
|
+
if self.weights is None:
|
165
|
+
for i, x in enumerate(X_):
|
166
|
+
X_[i] = self._calculate_emsc(x)
|
167
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
168
|
+
|
169
|
+
if self.weights is not None:
|
170
|
+
for i, x in enumerate(X_):
|
171
|
+
X_[i] = self._calculate_weighted_emsc(x)
|
172
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
173
|
+
|
174
|
+
def _calculate_weighted_emsc(self, x):
|
175
|
+
reg = np.linalg.lstsq(
|
176
|
+
np.diag(self.weights_) @ self.A_, x * self.weights_, rcond=None
|
177
|
+
)[0]
|
178
|
+
x_ = (x - np.dot(self.A_[:, 0:-1], reg[0:-1])) / reg[-1]
|
179
|
+
return x_
|
180
|
+
|
181
|
+
def _calculate_emsc(self, x):
|
182
|
+
reg = np.linalg.lstsq(self.A_, x, rcond=None)[0]
|
183
|
+
x_ = (x - np.dot(self.A_[:, 0:-1], reg[0:-1])) / reg[-1]
|
184
|
+
return x_
|
185
|
+
|
186
|
+
def _calculate_indices(self, reference):
|
187
|
+
return np.linspace(0, len(reference) - 1, len(reference))
|
188
|
+
|
189
|
+
def _calculate_A(self, indices, reference):
|
190
|
+
return np.vstack(
|
191
|
+
[[np.power(indices, o) for o in range(self.order + 1)], reference]
|
192
|
+
).T
|
@@ -5,7 +5,9 @@ from sklearn.utils.validation import check_is_fitted
|
|
5
5
|
from chemotools.utils.check_inputs import check_input
|
6
6
|
|
7
7
|
|
8
|
-
class MultiplicativeScatterCorrection(
|
8
|
+
class MultiplicativeScatterCorrection(
|
9
|
+
OneToOneFeatureMixin, BaseEstimator, TransformerMixin
|
10
|
+
):
|
9
11
|
"""Multiplicative scatter correction (MSC) is a preprocessing technique for
|
10
12
|
removing scatter effects from spectra. It is based on fitting a linear
|
11
13
|
regression model to the spectrum using a reference spectrum. The reference
|
@@ -13,7 +15,7 @@ class MultiplicativeScatterCorrection(OneToOneFeatureMixin, BaseEstimator, Trans
|
|
13
15
|
|
14
16
|
Parameters
|
15
17
|
----------
|
16
|
-
reference : np.ndarray, optional
|
18
|
+
reference : np.ndarray of shape (n_freatures), optional
|
17
19
|
The reference spectrum to use for the correction. If None, the mean
|
18
20
|
spectrum will be used. The default is None.
|
19
21
|
use_mean : bool, optional
|
@@ -32,17 +34,20 @@ class MultiplicativeScatterCorrection(OneToOneFeatureMixin, BaseEstimator, Trans
|
|
32
34
|
------
|
33
35
|
ValueError
|
34
36
|
If no reference is provided.
|
35
|
-
|
37
|
+
|
36
38
|
"""
|
39
|
+
|
37
40
|
def __init__(
|
38
41
|
self,
|
39
42
|
reference: np.ndarray = None,
|
40
43
|
use_mean: bool = True,
|
41
44
|
use_median: bool = False,
|
45
|
+
weights: np.ndarray = None,
|
42
46
|
):
|
43
47
|
self.reference = reference
|
44
48
|
self.use_mean = use_mean
|
45
49
|
self.use_median = use_median
|
50
|
+
self.weights = weights
|
46
51
|
|
47
52
|
def fit(self, X: np.ndarray, y=None) -> "MultiplicativeScatterCorrection":
|
48
53
|
"""
|
@@ -71,17 +76,36 @@ class MultiplicativeScatterCorrection(OneToOneFeatureMixin, BaseEstimator, Trans
|
|
71
76
|
# Set the fitted attribute to True
|
72
77
|
self._is_fitted = True
|
73
78
|
|
79
|
+
# Check that the length of the reference is the same as the number of features
|
80
|
+
if self.reference is not None:
|
81
|
+
if len(self.reference) != self.n_features_in_:
|
82
|
+
raise ValueError(
|
83
|
+
f"Expected {self.n_features_in_} features in reference but got {len(self.reference)}"
|
84
|
+
)
|
85
|
+
|
86
|
+
if self.weights is not None:
|
87
|
+
if len(self.weights) != self.n_features_in_:
|
88
|
+
raise ValueError(
|
89
|
+
f"Expected {self.n_features_in_} features in weights but got {len(self.weights)}"
|
90
|
+
)
|
91
|
+
|
74
92
|
# Set the reference
|
75
|
-
if self.reference is None
|
76
|
-
self.reference_ =
|
93
|
+
if self.reference is not None:
|
94
|
+
self.reference_ = np.array(self.reference)
|
95
|
+
self.A_ = self._calculate_A(self.reference_)
|
96
|
+
self.weights_ = np.array(self.weights)
|
77
97
|
return self
|
78
98
|
|
79
|
-
if self.
|
99
|
+
if self.use_median:
|
80
100
|
self.reference_ = np.median(X, axis=0)
|
101
|
+
self.A_ = self._calculate_A(self.reference_)
|
102
|
+
self.weights_ = np.array(self.weights)
|
81
103
|
return self
|
82
104
|
|
83
|
-
if self.
|
84
|
-
self.reference_ =
|
105
|
+
if self.use_mean:
|
106
|
+
self.reference_ = X.mean(axis=0)
|
107
|
+
self.A_ = self._calculate_A(self.reference_)
|
108
|
+
self.weights_ = np.array(self.weights)
|
85
109
|
return self
|
86
110
|
|
87
111
|
raise ValueError("No reference was provided")
|
@@ -111,13 +135,33 @@ class MultiplicativeScatterCorrection(OneToOneFeatureMixin, BaseEstimator, Trans
|
|
111
135
|
X = check_input(X)
|
112
136
|
X_ = X.copy()
|
113
137
|
|
138
|
+
# Check that the number of features is the same as the fitted data
|
139
|
+
if X_.shape[1] != self.n_features_in_:
|
140
|
+
raise ValueError(
|
141
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
142
|
+
)
|
143
|
+
|
114
144
|
# Calculate the multiplicative signal correction
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
145
|
+
if self.weights is None:
|
146
|
+
for i, x in enumerate(X_):
|
147
|
+
X_[i] = self._calculate_multiplicative_correction(x)
|
148
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
149
|
+
|
150
|
+
if self.weights is not None:
|
151
|
+
for i, x in enumerate(X_):
|
152
|
+
X_[i] = self._calculate_weighted_multiplicative_correction(x)
|
153
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
154
|
+
|
155
|
+
def _calculate_weighted_multiplicative_correction(self, x) -> np.ndarray:
|
156
|
+
m, c = np.linalg.lstsq(
|
157
|
+
np.diag(self.weights_) @ self.A_, x * self.weights_, rcond=None
|
158
|
+
)[0]
|
123
159
|
return (x - c) / m
|
160
|
+
|
161
|
+
def _calculate_multiplicative_correction(self, x) -> np.ndarray:
|
162
|
+
m, c = np.linalg.lstsq(self.A_, x, rcond=None)[0]
|
163
|
+
return (x - c) / m
|
164
|
+
|
165
|
+
def _calculate_A(self, reference):
|
166
|
+
ones = np.ones(reference.shape[0])
|
167
|
+
return np.vstack([reference, ones]).T
|
@@ -0,0 +1,109 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
+
from sklearn.utils.validation import check_is_fitted
|
4
|
+
|
5
|
+
from chemotools.utils.check_inputs import check_input
|
6
|
+
|
7
|
+
|
8
|
+
class RobustNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
9
|
+
"""
|
10
|
+
A transformer that calculates the robust normal variate of the input data.
|
11
|
+
|
12
|
+
Parameters
|
13
|
+
----------
|
14
|
+
percentile : float, optional
|
15
|
+
The percentile to use for the robust normal variate. The value should be
|
16
|
+
between 0 and 100. The default is 25.
|
17
|
+
|
18
|
+
Attributes
|
19
|
+
----------
|
20
|
+
n_features_in_ : int
|
21
|
+
The number of features in the input data.
|
22
|
+
|
23
|
+
_is_fitted : bool
|
24
|
+
Whether the transformer has been fitted to data.
|
25
|
+
|
26
|
+
Methods
|
27
|
+
-------
|
28
|
+
fit(X, y=None)
|
29
|
+
Fit the transformer to the input data.
|
30
|
+
|
31
|
+
transform(X, y=0, copy=True)
|
32
|
+
Transform the input data by calculating the standard normal variate.
|
33
|
+
|
34
|
+
References
|
35
|
+
----------
|
36
|
+
Q. Guo, W. Wu, D.L. Massart. The robust normal variate transform for pattern
|
37
|
+
recognition with near-infrared data. doi:10.1016/S0003-2670(98)00737-5
|
38
|
+
"""
|
39
|
+
|
40
|
+
def __init__(self, percentile: float = 25):
|
41
|
+
self.percentile = percentile
|
42
|
+
|
43
|
+
def fit(self, X: np.ndarray, y=None) -> "RobustNormalVariate":
|
44
|
+
"""
|
45
|
+
Fit the transformer to the input data.
|
46
|
+
|
47
|
+
Parameters
|
48
|
+
----------
|
49
|
+
X : np.ndarray of shape (n_samples, n_features)
|
50
|
+
The input data to fit the transformer to.
|
51
|
+
|
52
|
+
y : None
|
53
|
+
Ignored.
|
54
|
+
|
55
|
+
Returns
|
56
|
+
-------
|
57
|
+
self : RobustNormalVariate
|
58
|
+
The fitted transformer.
|
59
|
+
"""
|
60
|
+
# Check that X is a 2D array and has only finite values
|
61
|
+
X = check_input(X)
|
62
|
+
|
63
|
+
# Set the number of features
|
64
|
+
self.n_features_in_ = X.shape[1]
|
65
|
+
|
66
|
+
# Set the fitted attribute to True
|
67
|
+
self._is_fitted = True
|
68
|
+
|
69
|
+
return self
|
70
|
+
|
71
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
72
|
+
"""
|
73
|
+
Transform the input data by calculating the standard normal variate.
|
74
|
+
|
75
|
+
Parameters
|
76
|
+
----------
|
77
|
+
X : np.ndarray of shape (n_samples, n_features)
|
78
|
+
The input data to transform.
|
79
|
+
|
80
|
+
y : None
|
81
|
+
Ignored.
|
82
|
+
|
83
|
+
Returns
|
84
|
+
-------
|
85
|
+
X_ : np.ndarray of shape (n_samples, n_features)
|
86
|
+
The transformed data.
|
87
|
+
"""
|
88
|
+
# Check that the estimator is fitted
|
89
|
+
check_is_fitted(self, "_is_fitted")
|
90
|
+
|
91
|
+
# Check that X is a 2D array and has only finite values
|
92
|
+
X = check_input(X)
|
93
|
+
X_ = X.copy()
|
94
|
+
|
95
|
+
# Check that the number of features is the same as the fitted data
|
96
|
+
if X_.shape[1] != self.n_features_in_:
|
97
|
+
raise ValueError(
|
98
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
99
|
+
)
|
100
|
+
|
101
|
+
# Calculate the standard normal variate
|
102
|
+
for i, x in enumerate(X_):
|
103
|
+
X_[i] = self._calculate_robust_normal_variate(x)
|
104
|
+
|
105
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
106
|
+
|
107
|
+
def _calculate_robust_normal_variate(self, x) -> np.ndarray:
|
108
|
+
percentile = np.percentile(x, self.percentile)
|
109
|
+
return (x - percentile) / np.std(x[x <= percentile])
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: chemotools
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.24
|
4
4
|
Summary: Package to integrate chemometrics in scikit-learn pipelines
|
5
5
|
Home-page: https://github.com/paucablop/chemotools
|
6
6
|
Author: Pau Cabaneros Lopez
|
@@ -13,6 +13,7 @@ Requires-Python: >=3.9
|
|
13
13
|
Description-Content-Type: text/markdown
|
14
14
|
License-File: LICENSE
|
15
15
|
Requires-Dist: numpy
|
16
|
+
Requires-Dist: pandas
|
16
17
|
Requires-Dist: scipy
|
17
18
|
Requires-Dist: scikit-learn
|
18
19
|
|
@@ -8,16 +8,24 @@ chemotools/baseline/linear_correction.py,sha256=6Sw2n4QTvIDKWRdJpFD48hMvOEwqbctU
|
|
8
8
|
chemotools/baseline/non_negative.py,sha256=17_82l95U9kgoQ3Pdz3-jGv8B51JzqPdHODt6PegWRw,2864
|
9
9
|
chemotools/baseline/polynomial_correction.py,sha256=caP866fwZb7PASyz6oezgg8hdZtFMT0EimK89TGSTSc,4059
|
10
10
|
chemotools/baseline/subtract_reference.py,sha256=Pht87XadXK0URq2fun66OHaUk_cx56AkF84ta3VJy_8,3441
|
11
|
+
chemotools/datasets/__init__.py,sha256=yarhf-7bKB-mbStdWfi9LA_apOusoxY5A9bcwyzj10M,85
|
12
|
+
chemotools/datasets/_base.py,sha256=ArZrVRW5m5yO13iK_EycvV8gheiWKR9hoSZCD_OfS1g,2249
|
13
|
+
chemotools/datasets/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
+
chemotools/datasets/data/fermentation_hplc.csv,sha256=o80j8kQGBgmzmwrqDP9bmMxQoeEgtIu6q3Yng7VUKyk,2090
|
15
|
+
chemotools/datasets/data/fermentation_spectra.csv,sha256=MaaNMQP0lygJgFbEoUX0OUqdA-id8mF5Llvf_vj9tJk,15237508
|
16
|
+
chemotools/datasets/data/train_hplc.csv,sha256=DjtmqiePOWB-F6TsOGFngE1pKyXkb7Xmsi-1CLxsTnE,249
|
17
|
+
chemotools/datasets/data/train_spectra.csv,sha256=iVF19W52NHlbqq8BbLomn8n47kSPT0QxJv7wtQX4yjQ,203244
|
11
18
|
chemotools/derivative/__init__.py,sha256=x2F0IJ-uCbEYFoXFbZl_RTPCbSq82vqGOwlM9R_2Klo,84
|
12
|
-
chemotools/derivative/norris_william.py,sha256=
|
19
|
+
chemotools/derivative/norris_william.py,sha256=JaJ7zlSiC_0tiITu7VWXtgKrmkQP7gLvuFb0_n1j9Dw,5081
|
13
20
|
chemotools/derivative/savitzky_golay.py,sha256=fFzQRVGVXQIUkHp1x9dqfLVPlyStubIhSj9aGfZKuXY,3745
|
14
21
|
chemotools/scale/__init__.py,sha256=qRDhHXhkwXrr0a9ctKVpjp8X8H8Wcu2pelavehv-8ik,115
|
15
22
|
chemotools/scale/index_scaler.py,sha256=GsSVEfhVud-ZSVF7YwJBbix976W4a-1SXtbjUtQdqZ4,2661
|
16
23
|
chemotools/scale/min_max_scaler.py,sha256=zjhPhP5PcLh796VhNxo73INutGkUThe08B6IxMVD3X8,2850
|
17
24
|
chemotools/scale/norm_scaler.py,sha256=qNs-npf5Jqcp8RYqt88_5-zwd-yIo-J1jItgUTFeozs,2699
|
18
|
-
chemotools/scatter/__init__.py,sha256=
|
19
|
-
chemotools/scatter/extended_multiplicative_scatter_correction.py,sha256=
|
20
|
-
chemotools/scatter/multiplicative_scatter_correction.py,sha256=
|
25
|
+
chemotools/scatter/__init__.py,sha256=M0_B4hXVoDc2Qx00QreUfhFqPUTs6LbU4CWaFU17hg4,288
|
26
|
+
chemotools/scatter/extended_multiplicative_scatter_correction.py,sha256=J65hyEFBzKNo_35Ta9MKWO35CjTw-8hDbSr8xd8RIfc,6912
|
27
|
+
chemotools/scatter/multiplicative_scatter_correction.py,sha256=MFemiwS-KWFOtlcXVhLnY4mn6QQ8pttuj6UP0rodXEM,5689
|
28
|
+
chemotools/scatter/robust_normal_variate.py,sha256=joIL-nGUja0nG8YcCuT32ehxmy2xOy3OD0t0yP5vWfM,3233
|
21
29
|
chemotools/scatter/standard_normal_variate.py,sha256=wmK_8ea2CvoLaGebBFKr8zAU7QjGbaKAg04y6iZ4sDc,2681
|
22
30
|
chemotools/smooth/__init__.py,sha256=Kwg3jVnl-W-efTHMR6-6hQsTp-An1lYQ1lZFj6sNMtg,176
|
23
31
|
chemotools/smooth/mean_filter.py,sha256=fcC4EjO57Br3I9SJqWDJRxPxAv2WjjmXTECdBmBYXLI,2953
|
@@ -30,10 +38,11 @@ chemotools/variable_selection/__init__.py,sha256=E5WmqGRkM6XgzmhTolP3Tu9KyCtEDk_
|
|
30
38
|
chemotools/variable_selection/range_cut.py,sha256=Gh6flGp616k8gFBNBzxjfz49lncrFulMWukTOPExDTg,3709
|
31
39
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
32
40
|
tests/fixtures.py,sha256=Xa-Vd62Kd1fyWg3PLUSP6iIkOK8etrbyOkMJTn3dvX8,1933
|
33
|
-
tests/
|
34
|
-
tests/
|
35
|
-
|
36
|
-
chemotools-0.0.
|
37
|
-
chemotools-0.0.
|
38
|
-
chemotools-0.0.
|
39
|
-
chemotools-0.0.
|
41
|
+
tests/test_datasets.py,sha256=QwqZPLTcXG8f5ZeUJs5bq39v3kVnwSVxPRZ28spobUI,736
|
42
|
+
tests/test_functionality.py,sha256=tlvzKSh1OVbWAmSE4L1bPOVN3I205-D3x7a_x2jxpPg,15462
|
43
|
+
tests/test_sklearn_compliance.py,sha256=93RMkmqk4uhuz_wXIntPKCerxOxkQaAzJQwpDL57EaA,4593
|
44
|
+
chemotools-0.0.24.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
|
45
|
+
chemotools-0.0.24.dist-info/METADATA,sha256=gd8kokfQExRBNZgurl6vCI6O-j3QVviwYBfvisuKRd4,5015
|
46
|
+
chemotools-0.0.24.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
47
|
+
chemotools-0.0.24.dist-info/top_level.txt,sha256=eNcNcKSdo-1H_2gwSDrS__dr7BM3R73Cnn-pBiW5FEw,17
|
48
|
+
chemotools-0.0.24.dist-info/RECORD,,
|
tests/test_datasets.py
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
|
3
|
+
from chemotools.datasets import load_fermentation_test, load_fermentation_train
|
4
|
+
|
5
|
+
|
6
|
+
def test_load_fermentation_test():
|
7
|
+
# Arrange
|
8
|
+
|
9
|
+
# Act
|
10
|
+
test_spectra, test_hplc = load_fermentation_test()
|
11
|
+
|
12
|
+
# Assert
|
13
|
+
assert test_spectra.shape == (1629, 1047)
|
14
|
+
assert test_hplc.shape == (34, 6)
|
15
|
+
assert isinstance(test_spectra, pd.DataFrame)
|
16
|
+
assert isinstance(test_hplc, pd.DataFrame)
|
17
|
+
|
18
|
+
def test_load_fermentation_train():
|
19
|
+
# Arrange
|
20
|
+
|
21
|
+
# Act
|
22
|
+
train_spectra, train_hplc = load_fermentation_train()
|
23
|
+
|
24
|
+
# Assert
|
25
|
+
assert train_spectra.shape == (21, 1047)
|
26
|
+
assert train_hplc.shape == (21, 1)
|
27
|
+
assert isinstance(train_spectra, pd.DataFrame)
|
28
|
+
assert isinstance(train_hplc, pd.DataFrame)
|
29
|
+
|
30
|
+
|