chemotools 0.0.22__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemotools/augmentation/__init__.py +16 -0
- chemotools/augmentation/baseline_shift.py +119 -0
- chemotools/augmentation/exponential_noise.py +117 -0
- chemotools/augmentation/index_shift.py +120 -0
- chemotools/augmentation/normal_noise.py +118 -0
- chemotools/augmentation/spectrum_scale.py +120 -0
- chemotools/augmentation/uniform_noise.py +124 -0
- chemotools/baseline/__init__.py +20 -8
- chemotools/baseline/{air_pls.py → _air_pls.py} +20 -32
- chemotools/baseline/{ar_pls.py → _ar_pls.py} +18 -31
- chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +37 -31
- chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +26 -19
- chemotools/baseline/{linear_correction.py → _linear_correction.py} +19 -28
- chemotools/baseline/{non_negative.py → _non_negative.py} +15 -23
- chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +29 -31
- chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +23 -27
- chemotools/datasets/__init__.py +5 -0
- chemotools/datasets/_base.py +122 -0
- chemotools/datasets/data/coffee_labels.csv +61 -0
- chemotools/datasets/data/coffee_spectra.csv +61 -0
- chemotools/datasets/data/fermentation_hplc.csv +35 -0
- chemotools/datasets/data/fermentation_spectra.csv +1630 -0
- chemotools/datasets/data/train_hplc.csv +22 -0
- chemotools/datasets/data/train_spectra.csv +22 -0
- chemotools/derivative/__init__.py +4 -2
- chemotools/derivative/{norris_william.py → _norris_william.py} +20 -25
- chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +26 -36
- chemotools/feature_selection/__init__.py +4 -0
- chemotools/feature_selection/_index_selector.py +113 -0
- chemotools/feature_selection/_range_cut.py +111 -0
- chemotools/scale/__init__.py +5 -3
- chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +36 -39
- chemotools/scale/{norm_scaler.py → _norm_scaler.py} +18 -25
- chemotools/scale/_point_scaler.py +115 -0
- chemotools/scatter/__init__.py +13 -2
- chemotools/scatter/_extended_multiplicative_scatter_correction.py +183 -0
- chemotools/scatter/_multiplicative_scatter_correction.py +169 -0
- chemotools/scatter/_robust_normal_variate.py +101 -0
- chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +21 -26
- chemotools/smooth/__init__.py +6 -4
- chemotools/smooth/{mean_filter.py → _mean_filter.py} +18 -25
- chemotools/smooth/{median_filter.py → _median_filter.py} +32 -24
- chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +22 -24
- chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +24 -29
- {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/METADATA +19 -15
- chemotools-0.1.6.dist-info/RECORD +51 -0
- {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/WHEEL +1 -2
- chemotools/scale/index_scaler.py +0 -97
- chemotools/scatter/extended_multiplicative_scatter_correction.py +0 -33
- chemotools/scatter/multiplicative_scatter_correction.py +0 -123
- chemotools/utils/check_inputs.py +0 -14
- chemotools/variable_selection/__init__.py +0 -1
- chemotools/variable_selection/range_cut.py +0 -121
- chemotools-0.0.22.dist-info/RECORD +0 -39
- chemotools-0.0.22.dist-info/top_level.txt +0 -2
- tests/fixtures.py +0 -89
- tests/test_functionality.py +0 -397
- tests/test_sklearn_compliance.py +0 -192
- {tests → chemotools/datasets/data}/__init__.py +0 -0
- {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/LICENSE +0 -0
@@ -0,0 +1,183 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
|
+
|
7
|
+
|
8
|
+
class ExtendedMultiplicativeScatterCorrection(
|
9
|
+
TransformerMixin, OneToOneFeatureMixin, BaseEstimator
|
10
|
+
):
|
11
|
+
"""Extended multiplicative scatter correction (EMSC) is a preprocessing technique for
|
12
|
+
removing non linear scatter effects from spectra. It is based on fitting a polynomial
|
13
|
+
regression model to the spectrum using a reference spectrum. The reference spectrum
|
14
|
+
can be the mean or median spectrum of a set of spectra or a selected reerence.
|
15
|
+
|
16
|
+
Note that this implementation does not include further extensions of the model using
|
17
|
+
orthogonal subspace models.
|
18
|
+
|
19
|
+
Parameters
|
20
|
+
----------
|
21
|
+
reference : np.ndarray, optional
|
22
|
+
The reference spectrum to use for the correction. If None, the mean
|
23
|
+
spectrum will be used. The default is None.
|
24
|
+
use_mean : bool, optional
|
25
|
+
Whether to use the mean spectrum as the reference. The default is True.
|
26
|
+
use_median : bool, optional
|
27
|
+
Whether to use the median spectrum as the reference. The default is False.
|
28
|
+
order : int, optional
|
29
|
+
The order of the polynomial to fit to the spectrum. The default is 2.
|
30
|
+
weights : np.ndarray, optional
|
31
|
+
The weights to use for the weighted EMSC. If None, the standard EMSC
|
32
|
+
will be used. The default is None.
|
33
|
+
|
34
|
+
|
35
|
+
Attributes
|
36
|
+
----------
|
37
|
+
reference_ : np.ndarray
|
38
|
+
The reference spectrum used for the correction.
|
39
|
+
|
40
|
+
References
|
41
|
+
----------
|
42
|
+
Nils Kristian Afseth, Achim Kohler. Extended multiplicative signal correction
|
43
|
+
in vibrational spectroscopy, a tutorial, doi:10.1016/j.chemolab.2012.03.004
|
44
|
+
|
45
|
+
Valeria Tafintseva et al. Correcting replicate variation in spectroscopic data by machine learning and
|
46
|
+
model-based pre-processing, doi:10.1016/j.chemolab.2021.104350
|
47
|
+
"""
|
48
|
+
|
49
|
+
def __init__(
|
50
|
+
self,
|
51
|
+
reference: Optional[np.ndarray] = None,
|
52
|
+
use_mean: bool = True,
|
53
|
+
use_median: bool = False,
|
54
|
+
order: int = 2,
|
55
|
+
weights: Optional[np.ndarray] = None,
|
56
|
+
):
|
57
|
+
self.reference = reference
|
58
|
+
self.use_mean = use_mean
|
59
|
+
self.use_median = use_median
|
60
|
+
self.order = order
|
61
|
+
self.weights = weights
|
62
|
+
|
63
|
+
def fit(self, X: np.ndarray, y=None) -> "ExtendedMultiplicativeScatterCorrection":
|
64
|
+
"""
|
65
|
+
Fit the transformer to the input data. If no reference is provided, the
|
66
|
+
mean or median spectrum will be calculated from the input data.
|
67
|
+
|
68
|
+
Parameters
|
69
|
+
----------
|
70
|
+
X : np.ndarray of shape (n_samples, n_features)
|
71
|
+
The input data to fit the transformer to.
|
72
|
+
|
73
|
+
y : None
|
74
|
+
Ignored.
|
75
|
+
|
76
|
+
Returns
|
77
|
+
-------
|
78
|
+
self : MultiplicativeScatterCorrection
|
79
|
+
The fitted transformer.
|
80
|
+
"""
|
81
|
+
# Check that X is a 2D array and has only finite values
|
82
|
+
X = validate_data(
|
83
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
84
|
+
)
|
85
|
+
|
86
|
+
# Check that the length of the reference is the same as the number of features
|
87
|
+
if self.reference is not None:
|
88
|
+
if len(self.reference) != self.n_features_in_:
|
89
|
+
raise ValueError(
|
90
|
+
f"Expected {self.n_features_in_} features in reference but got {len(self.reference)}"
|
91
|
+
)
|
92
|
+
|
93
|
+
if self.weights is not None:
|
94
|
+
if len(self.weights) != self.n_features_in_:
|
95
|
+
raise ValueError(
|
96
|
+
f"Expected {self.n_features_in_} features in weights but got {len(self.weights)}"
|
97
|
+
)
|
98
|
+
|
99
|
+
# Set the reference
|
100
|
+
if self.reference is not None:
|
101
|
+
self.reference_ = np.array(self.reference)
|
102
|
+
self.indices_ = self._calculate_indices(self.reference_)
|
103
|
+
self.A_ = self._calculate_A(self.indices_, self.reference_)
|
104
|
+
self.weights_ = np.array(self.weights)
|
105
|
+
return self
|
106
|
+
|
107
|
+
if self.use_median:
|
108
|
+
self.reference_ = np.median(X, axis=0)
|
109
|
+
self.indices_ = self._calculate_indices(X[0])
|
110
|
+
self.A_ = self._calculate_A(self.indices_, self.reference_)
|
111
|
+
self.weights_ = np.array(self.weights)
|
112
|
+
return self
|
113
|
+
|
114
|
+
if self.use_mean:
|
115
|
+
self.reference_ = X.mean(axis=0)
|
116
|
+
self.indices_ = self._calculate_indices(X[0])
|
117
|
+
self.A_ = self._calculate_A(self.indices_, self.reference_)
|
118
|
+
self.weights_ = np.array(self.weights)
|
119
|
+
return self
|
120
|
+
|
121
|
+
raise ValueError("No reference was provided")
|
122
|
+
|
123
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
124
|
+
"""
|
125
|
+
Transform the input data by applying the multiplicative scatter
|
126
|
+
correction.
|
127
|
+
|
128
|
+
Parameters
|
129
|
+
----------
|
130
|
+
X : np.ndarray of shape (n_samples, n_features)
|
131
|
+
The input data to transform.
|
132
|
+
|
133
|
+
y : None
|
134
|
+
Ignored.
|
135
|
+
|
136
|
+
Returns
|
137
|
+
-------
|
138
|
+
X_ : np.ndarray of shape (n_samples, n_features)
|
139
|
+
The transformed data.
|
140
|
+
"""
|
141
|
+
# Check that the estimator is fitted
|
142
|
+
check_is_fitted(self, "n_features_in_")
|
143
|
+
|
144
|
+
# Check that X is a 2D array and has only finite values
|
145
|
+
X_ = validate_data(
|
146
|
+
self,
|
147
|
+
X,
|
148
|
+
y="no_validation",
|
149
|
+
ensure_2d=True,
|
150
|
+
copy=True,
|
151
|
+
reset=False,
|
152
|
+
dtype=np.float64,
|
153
|
+
)
|
154
|
+
|
155
|
+
if self.weights is None:
|
156
|
+
for i, x in enumerate(X_):
|
157
|
+
X_[i] = self._calculate_emsc(x)
|
158
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
159
|
+
|
160
|
+
if self.weights is not None:
|
161
|
+
for i, x in enumerate(X_):
|
162
|
+
X_[i] = self._calculate_weighted_emsc(x)
|
163
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
164
|
+
|
165
|
+
def _calculate_weighted_emsc(self, x):
|
166
|
+
reg = np.linalg.lstsq(
|
167
|
+
np.diag(self.weights_) @ self.A_, x * self.weights_, rcond=None
|
168
|
+
)[0]
|
169
|
+
x_ = (x - np.dot(self.A_[:, 0:-1], reg[0:-1])) / reg[-1]
|
170
|
+
return x_
|
171
|
+
|
172
|
+
def _calculate_emsc(self, x):
|
173
|
+
reg = np.linalg.lstsq(self.A_, x, rcond=None)[0]
|
174
|
+
x_ = (x - np.dot(self.A_[:, 0:-1], reg[0:-1])) / reg[-1]
|
175
|
+
return x_
|
176
|
+
|
177
|
+
def _calculate_indices(self, reference):
|
178
|
+
return np.linspace(0, len(reference) - 1, len(reference))
|
179
|
+
|
180
|
+
def _calculate_A(self, indices, reference):
|
181
|
+
return np.vstack(
|
182
|
+
[[np.power(indices, o) for o in range(self.order + 1)], reference]
|
183
|
+
).T
|
@@ -0,0 +1,169 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
|
+
|
7
|
+
|
8
|
+
class MultiplicativeScatterCorrection(
|
9
|
+
TransformerMixin, OneToOneFeatureMixin, BaseEstimator
|
10
|
+
):
|
11
|
+
"""Multiplicative scatter correction (MSC) is a preprocessing technique for
|
12
|
+
removing scatter effects from spectra. It is based on fitting a linear
|
13
|
+
regression model to the spectrum using a reference spectrum. The reference
|
14
|
+
spectrum is usually a mean or median spectrum of a set of spectra.
|
15
|
+
|
16
|
+
Parameters
|
17
|
+
----------
|
18
|
+
reference : np.ndarray of shape (n_freatures), optional
|
19
|
+
The reference spectrum to use for the correction. If None, the mean
|
20
|
+
spectrum will be used. The default is None.
|
21
|
+
use_mean : bool, optional
|
22
|
+
Whether to use the mean spectrum as the reference. The default is True.
|
23
|
+
use_median : bool, optional
|
24
|
+
Whether to use the median spectrum as the reference. The default is False.
|
25
|
+
|
26
|
+
Attributes
|
27
|
+
----------
|
28
|
+
reference_ : np.ndarray
|
29
|
+
The reference spectrum used for the correction.
|
30
|
+
n_features_in_ : int
|
31
|
+
The number of features in the training data.
|
32
|
+
|
33
|
+
Raises
|
34
|
+
------
|
35
|
+
ValueError
|
36
|
+
If no reference is provided.
|
37
|
+
|
38
|
+
"""
|
39
|
+
|
40
|
+
def __init__(
|
41
|
+
self,
|
42
|
+
reference: Optional[np.ndarray] = None,
|
43
|
+
use_mean: bool = True,
|
44
|
+
use_median: bool = False,
|
45
|
+
weights: Optional[np.ndarray] = None,
|
46
|
+
):
|
47
|
+
self.reference = reference
|
48
|
+
self.use_mean = use_mean
|
49
|
+
self.use_median = use_median
|
50
|
+
self.weights = weights
|
51
|
+
|
52
|
+
def fit(self, X: np.ndarray, y=None) -> "MultiplicativeScatterCorrection":
|
53
|
+
"""
|
54
|
+
Fit the transformer to the input data. If no reference is provided, the
|
55
|
+
mean or median spectrum will be calculated from the input data.
|
56
|
+
|
57
|
+
Parameters
|
58
|
+
----------
|
59
|
+
X : np.ndarray of shape (n_samples, n_features)
|
60
|
+
The input data to fit the transformer to.
|
61
|
+
|
62
|
+
y : None
|
63
|
+
Ignored.
|
64
|
+
|
65
|
+
Returns
|
66
|
+
-------
|
67
|
+
self : MultiplicativeScatterCorrection
|
68
|
+
The fitted transformer.
|
69
|
+
"""
|
70
|
+
# Check that X is a 2D array and has only finite values
|
71
|
+
X = validate_data(
|
72
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
73
|
+
)
|
74
|
+
# Check that the length of the reference is the same as the number of features
|
75
|
+
if self.reference is not None:
|
76
|
+
if len(self.reference) != self.n_features_in_:
|
77
|
+
raise ValueError(
|
78
|
+
f"Expected {self.n_features_in_} features in reference but got {len(self.reference)}"
|
79
|
+
)
|
80
|
+
|
81
|
+
if self.weights is not None:
|
82
|
+
if len(self.weights) != self.n_features_in_:
|
83
|
+
raise ValueError(
|
84
|
+
f"Expected {self.n_features_in_} features in weights but got {len(self.weights)}"
|
85
|
+
)
|
86
|
+
|
87
|
+
# Set the reference
|
88
|
+
if self.reference is not None:
|
89
|
+
self.reference_ = np.array(self.reference)
|
90
|
+
self.A_ = self._calculate_A(self.reference_)
|
91
|
+
self.weights_ = np.array(self.weights)
|
92
|
+
return self
|
93
|
+
|
94
|
+
if self.use_median:
|
95
|
+
self.reference_ = np.median(X, axis=0)
|
96
|
+
self.A_ = self._calculate_A(self.reference_)
|
97
|
+
self.weights_ = np.array(self.weights)
|
98
|
+
return self
|
99
|
+
|
100
|
+
if self.use_mean:
|
101
|
+
self.reference_ = X.mean(axis=0)
|
102
|
+
self.A_ = self._calculate_A(self.reference_)
|
103
|
+
self.weights_ = np.array(self.weights)
|
104
|
+
return self
|
105
|
+
|
106
|
+
raise ValueError("No reference was provided")
|
107
|
+
|
108
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
109
|
+
"""
|
110
|
+
Transform the input data by applying the multiplicative scatter
|
111
|
+
correction.
|
112
|
+
|
113
|
+
Parameters
|
114
|
+
----------
|
115
|
+
X : np.ndarray of shape (n_samples, n_features)
|
116
|
+
The input data to transform.
|
117
|
+
|
118
|
+
y : None
|
119
|
+
Ignored.
|
120
|
+
|
121
|
+
Returns
|
122
|
+
-------
|
123
|
+
X_ : np.ndarray of shape (n_samples, n_features)
|
124
|
+
The transformed data.
|
125
|
+
"""
|
126
|
+
# Check that the estimator is fitted
|
127
|
+
check_is_fitted(self, "n_features_in_")
|
128
|
+
|
129
|
+
# Check that X is a 2D array and has only finite values
|
130
|
+
X_ = validate_data(
|
131
|
+
self,
|
132
|
+
X,
|
133
|
+
y="no_validation",
|
134
|
+
ensure_2d=True,
|
135
|
+
copy=True,
|
136
|
+
reset=False,
|
137
|
+
dtype=np.float64,
|
138
|
+
)
|
139
|
+
|
140
|
+
# Check that the number of features is the same as the fitted data
|
141
|
+
if X_.shape[1] != self.n_features_in_:
|
142
|
+
raise ValueError(
|
143
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
144
|
+
)
|
145
|
+
|
146
|
+
# Calculate the multiplicative signal correction
|
147
|
+
if self.weights is None:
|
148
|
+
for i, x in enumerate(X_):
|
149
|
+
X_[i] = self._calculate_multiplicative_correction(x)
|
150
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
151
|
+
|
152
|
+
if self.weights is not None:
|
153
|
+
for i, x in enumerate(X_):
|
154
|
+
X_[i] = self._calculate_weighted_multiplicative_correction(x)
|
155
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
156
|
+
|
157
|
+
def _calculate_weighted_multiplicative_correction(self, x) -> np.ndarray:
|
158
|
+
m, c = np.linalg.lstsq(
|
159
|
+
np.diag(self.weights_) @ self.A_, x * self.weights_, rcond=None
|
160
|
+
)[0]
|
161
|
+
return (x - c) / m
|
162
|
+
|
163
|
+
def _calculate_multiplicative_correction(self, x) -> np.ndarray:
|
164
|
+
m, c = np.linalg.lstsq(self.A_, x, rcond=None)[0]
|
165
|
+
return (x - c) / m
|
166
|
+
|
167
|
+
def _calculate_A(self, reference):
|
168
|
+
ones = np.ones(reference.shape[0])
|
169
|
+
return np.vstack([reference, ones]).T
|
@@ -0,0 +1,101 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
4
|
+
|
5
|
+
|
6
|
+
class RobustNormalVariate(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
7
|
+
"""
|
8
|
+
A transformer that calculates the robust normal variate of the input data.
|
9
|
+
|
10
|
+
Parameters
|
11
|
+
----------
|
12
|
+
percentile : float, optional
|
13
|
+
The percentile to use for the robust normal variate. The value should be
|
14
|
+
between 0 and 100. The default is 25.
|
15
|
+
|
16
|
+
Methods
|
17
|
+
-------
|
18
|
+
fit(X, y=None)
|
19
|
+
Fit the transformer to the input data.
|
20
|
+
|
21
|
+
transform(X, y=0, copy=True)
|
22
|
+
Transform the input data by calculating the standard normal variate.
|
23
|
+
|
24
|
+
References
|
25
|
+
----------
|
26
|
+
Q. Guo, W. Wu, D.L. Massart. The robust normal variate transform for pattern
|
27
|
+
recognition with near-infrared data. doi:10.1016/S0003-2670(98)00737-5
|
28
|
+
"""
|
29
|
+
|
30
|
+
def __init__(self, percentile: float = 25):
|
31
|
+
self.percentile = percentile
|
32
|
+
|
33
|
+
def fit(self, X: np.ndarray, y=None) -> "RobustNormalVariate":
|
34
|
+
"""
|
35
|
+
Fit the transformer to the input data.
|
36
|
+
|
37
|
+
Parameters
|
38
|
+
----------
|
39
|
+
X : np.ndarray of shape (n_samples, n_features)
|
40
|
+
The input data to fit the transformer to.
|
41
|
+
|
42
|
+
y : None
|
43
|
+
Ignored.
|
44
|
+
|
45
|
+
Returns
|
46
|
+
-------
|
47
|
+
self : RobustNormalVariate
|
48
|
+
The fitted transformer.
|
49
|
+
"""
|
50
|
+
# Check that X is a 2D array and has only finite values
|
51
|
+
X = validate_data(
|
52
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
53
|
+
)
|
54
|
+
return self
|
55
|
+
|
56
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
57
|
+
"""
|
58
|
+
Transform the input data by calculating the standard normal variate.
|
59
|
+
|
60
|
+
Parameters
|
61
|
+
----------
|
62
|
+
X : np.ndarray of shape (n_samples, n_features)
|
63
|
+
The input data to transform.
|
64
|
+
|
65
|
+
y : None
|
66
|
+
Ignored.
|
67
|
+
|
68
|
+
Returns
|
69
|
+
-------
|
70
|
+
X_ : np.ndarray of shape (n_samples, n_features)
|
71
|
+
The transformed data.
|
72
|
+
"""
|
73
|
+
# Check that the estimator is fitted
|
74
|
+
check_is_fitted(self, "n_features_in_")
|
75
|
+
|
76
|
+
# Check that X is a 2D array and has only finite values
|
77
|
+
X_ = validate_data(
|
78
|
+
self,
|
79
|
+
X,
|
80
|
+
y="no_validation",
|
81
|
+
ensure_2d=True,
|
82
|
+
copy=True,
|
83
|
+
reset=False,
|
84
|
+
dtype=np.float64,
|
85
|
+
)
|
86
|
+
|
87
|
+
# Check that the number of features is the same as the fitted data
|
88
|
+
if X_.shape[1] != self.n_features_in_:
|
89
|
+
raise ValueError(
|
90
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
91
|
+
)
|
92
|
+
|
93
|
+
# Calculate the standard normal variate
|
94
|
+
for i, x in enumerate(X_):
|
95
|
+
X_[i] = self._calculate_robust_normal_variate(x)
|
96
|
+
|
97
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
98
|
+
|
99
|
+
def _calculate_robust_normal_variate(self, x) -> np.ndarray:
|
100
|
+
percentile = np.percentile(x, self.percentile)
|
101
|
+
return (x - percentile) / np.std(x[x <= percentile])
|
@@ -1,22 +1,12 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
3
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
4
4
|
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
5
|
|
7
|
-
|
8
|
-
class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
6
|
+
class StandardNormalVariate(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
7
|
"""
|
10
8
|
A transformer that calculates the standard normal variate of the input data.
|
11
9
|
|
12
|
-
Attributes
|
13
|
-
----------
|
14
|
-
n_features_in_ : int
|
15
|
-
The number of features in the input data.
|
16
|
-
|
17
|
-
_is_fitted : bool
|
18
|
-
Whether the transformer has been fitted to data.
|
19
|
-
|
20
10
|
Methods
|
21
11
|
-------
|
22
12
|
fit(X, y=None)
|
@@ -25,10 +15,11 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
|
|
25
15
|
transform(X, y=0, copy=True)
|
26
16
|
Transform the input data by calculating the standard normal variate.
|
27
17
|
"""
|
18
|
+
|
28
19
|
def fit(self, X: np.ndarray, y=None) -> "StandardNormalVariate":
|
29
20
|
"""
|
30
21
|
Fit the transformer to the input data.
|
31
|
-
|
22
|
+
|
32
23
|
Parameters
|
33
24
|
----------
|
34
25
|
X : np.ndarray of shape (n_samples, n_features)
|
@@ -43,14 +34,9 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
|
|
43
34
|
The fitted transformer.
|
44
35
|
"""
|
45
36
|
# Check that X is a 2D array and has only finite values
|
46
|
-
X =
|
47
|
-
|
48
|
-
|
49
|
-
self.n_features_in_ = X.shape[1]
|
50
|
-
|
51
|
-
# Set the fitted attribute to True
|
52
|
-
self._is_fitted = True
|
53
|
-
|
37
|
+
X = validate_data(
|
38
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
39
|
+
)
|
54
40
|
return self
|
55
41
|
|
56
42
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -71,15 +57,24 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
|
|
71
57
|
The transformed data.
|
72
58
|
"""
|
73
59
|
# Check that the estimator is fitted
|
74
|
-
check_is_fitted(self, "
|
60
|
+
check_is_fitted(self, "n_features_in_")
|
75
61
|
|
76
62
|
# Check that X is a 2D array and has only finite values
|
77
|
-
|
78
|
-
|
63
|
+
X_ = validate_data(
|
64
|
+
self,
|
65
|
+
X,
|
66
|
+
y="no_validation",
|
67
|
+
ensure_2d=True,
|
68
|
+
copy=True,
|
69
|
+
reset=False,
|
70
|
+
dtype=np.float64,
|
71
|
+
)
|
79
72
|
|
80
73
|
# Check that the number of features is the same as the fitted data
|
81
74
|
if X_.shape[1] != self.n_features_in_:
|
82
|
-
raise ValueError(
|
75
|
+
raise ValueError(
|
76
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
77
|
+
)
|
83
78
|
|
84
79
|
# Calculate the standard normal variate
|
85
80
|
for i, x in enumerate(X_):
|
@@ -88,4 +83,4 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
|
|
88
83
|
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
89
84
|
|
90
85
|
def _calculate_standard_normal_variate(self, x) -> np.ndarray:
|
91
|
-
return (x - x.mean()) / x.std()
|
86
|
+
return (x - x.mean()) / x.std()
|
chemotools/smooth/__init__.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
from .
|
2
|
-
from .
|
3
|
-
from .
|
4
|
-
from .
|
1
|
+
from ._mean_filter import MeanFilter
|
2
|
+
from ._median_filter import MedianFilter
|
3
|
+
from ._savitzky_golay_filter import SavitzkyGolayFilter
|
4
|
+
from ._whittaker_smooth import WhittakerSmooth
|
5
|
+
|
6
|
+
__all__ = ["MeanFilter", "MedianFilter", "SavitzkyGolayFilter", "WhittakerSmooth"]
|
@@ -1,12 +1,10 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from scipy.ndimage import uniform_filter1d
|
3
3
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
4
|
-
from sklearn.utils.validation import check_is_fitted
|
4
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
5
5
|
|
6
|
-
from chemotools.utils.check_inputs import check_input
|
7
6
|
|
8
|
-
|
9
|
-
class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
7
|
+
class MeanFilter(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
10
8
|
"""
|
11
9
|
A transformer that calculates the mean filter of the input data.
|
12
10
|
|
@@ -14,19 +12,11 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
14
12
|
----------
|
15
13
|
window_size : int, optional
|
16
14
|
The size of the window to use for the mean filter. Must be odd. Default is 3.
|
17
|
-
|
15
|
+
|
18
16
|
mode : str, optional
|
19
17
|
The mode to use for the mean filter. Can be "nearest", "constant", "reflect",
|
20
18
|
"wrap", "mirror" or "interp". Default is "nearest".
|
21
19
|
|
22
|
-
Attributes
|
23
|
-
----------
|
24
|
-
n_features_in_ : int
|
25
|
-
The number of features in the input data.
|
26
|
-
|
27
|
-
_is_fitted : bool
|
28
|
-
Whether the transformer has been fitted to data.
|
29
|
-
|
30
20
|
Methods
|
31
21
|
-------
|
32
22
|
fit(X, y=None)
|
@@ -35,7 +25,8 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
35
25
|
transform(X, y=0, copy=True)
|
36
26
|
Transform the input data by calculating the mean filter.
|
37
27
|
"""
|
38
|
-
|
28
|
+
|
29
|
+
def __init__(self, window_size: int = 3, mode="nearest") -> None:
|
39
30
|
self.window_size = window_size
|
40
31
|
self.mode = mode
|
41
32
|
|
@@ -57,14 +48,9 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
57
48
|
The fitted transformer.
|
58
49
|
"""
|
59
50
|
# Check that X is a 2D array and has only finite values
|
60
|
-
X =
|
61
|
-
|
62
|
-
|
63
|
-
self.n_features_in_ = X.shape[1]
|
64
|
-
|
65
|
-
# Set the fitted attribute to True
|
66
|
-
self._is_fitted = True
|
67
|
-
|
51
|
+
X = validate_data(
|
52
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
53
|
+
)
|
68
54
|
return self
|
69
55
|
|
70
56
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -85,11 +71,18 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
85
71
|
The transformed data.
|
86
72
|
"""
|
87
73
|
# Check that the estimator is fitted
|
88
|
-
check_is_fitted(self, "
|
74
|
+
check_is_fitted(self, "n_features_in_")
|
89
75
|
|
90
76
|
# Check that X is a 2D array and has only finite values
|
91
|
-
|
92
|
-
|
77
|
+
X_ = validate_data(
|
78
|
+
self,
|
79
|
+
X,
|
80
|
+
y="no_validation",
|
81
|
+
ensure_2d=True,
|
82
|
+
copy=True,
|
83
|
+
reset=False,
|
84
|
+
dtype=np.float64,
|
85
|
+
)
|
93
86
|
|
94
87
|
if X_.shape[1] != self.n_features_in_:
|
95
88
|
raise ValueError(
|