chemotools 0.0.27__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemotools/augmentation/__init__.py +16 -0
- chemotools/augmentation/baseline_shift.py +119 -0
- chemotools/augmentation/exponential_noise.py +117 -0
- chemotools/augmentation/index_shift.py +120 -0
- chemotools/augmentation/normal_noise.py +118 -0
- chemotools/augmentation/spectrum_scale.py +120 -0
- chemotools/augmentation/uniform_noise.py +124 -0
- chemotools/baseline/__init__.py +20 -8
- chemotools/baseline/{air_pls.py → _air_pls.py} +20 -32
- chemotools/baseline/{ar_pls.py → _ar_pls.py} +18 -31
- chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +22 -30
- chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +26 -19
- chemotools/baseline/{linear_correction.py → _linear_correction.py} +19 -28
- chemotools/baseline/{non_negative.py → _non_negative.py} +15 -23
- chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +29 -31
- chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +23 -27
- chemotools/datasets/__init__.py +3 -0
- chemotools/datasets/_base.py +85 -15
- chemotools/datasets/data/coffee_labels.csv +61 -0
- chemotools/datasets/data/coffee_spectra.csv +61 -0
- chemotools/derivative/__init__.py +4 -2
- chemotools/derivative/{norris_william.py → _norris_william.py} +17 -24
- chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +26 -36
- chemotools/feature_selection/__init__.py +4 -0
- chemotools/{variable_selection/select_features.py → feature_selection/_index_selector.py} +32 -56
- chemotools/{variable_selection/range_cut.py → feature_selection/_range_cut.py} +25 -50
- chemotools/scale/__init__.py +5 -3
- chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +20 -27
- chemotools/scale/{norm_scaler.py → _norm_scaler.py} +18 -25
- chemotools/scale/{point_scaler.py → _point_scaler.py} +27 -32
- chemotools/scatter/__init__.py +13 -4
- chemotools/scatter/{extended_multiplicative_scatter_correction.py → _extended_multiplicative_scatter_correction.py} +19 -28
- chemotools/scatter/{multiplicative_scatter_correction.py → _multiplicative_scatter_correction.py} +19 -17
- chemotools/scatter/{robust_normal_variate.py → _robust_normal_variate.py} +15 -23
- chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +21 -26
- chemotools/smooth/__init__.py +6 -4
- chemotools/smooth/{mean_filter.py → _mean_filter.py} +18 -25
- chemotools/smooth/{median_filter.py → _median_filter.py} +32 -24
- chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +22 -24
- chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +24 -29
- {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/METADATA +19 -16
- chemotools-0.1.6.dist-info/RECORD +51 -0
- {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/WHEEL +1 -2
- chemotools/utils/check_inputs.py +0 -14
- chemotools/variable_selection/__init__.py +0 -2
- chemotools-0.0.27.dist-info/RECORD +0 -49
- chemotools-0.0.27.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/fixtures.py +0 -89
- tests/test_datasets.py +0 -30
- tests/test_functionality.py +0 -616
- tests/test_sklearn_compliance.py +0 -220
- {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/LICENSE +0 -0
@@ -0,0 +1,124 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
|
+
|
7
|
+
|
8
|
+
class UniformNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
|
+
"""
|
10
|
+
Add uniform noise to the input data.
|
11
|
+
|
12
|
+
Parameters
|
13
|
+
----------
|
14
|
+
min : float, default=0.0
|
15
|
+
The lower bound of the uniform distribution.
|
16
|
+
|
17
|
+
max : float, default=0.0
|
18
|
+
The upper bound of the uniform distribution.
|
19
|
+
|
20
|
+
random_state : int, default=None
|
21
|
+
The random state to use for the random number generator.
|
22
|
+
|
23
|
+
Attributes
|
24
|
+
----------
|
25
|
+
n_features_in_ : int
|
26
|
+
The number of features in the input data.
|
27
|
+
|
28
|
+
_is_fitted : bool
|
29
|
+
Whether the transformer has been fitted to data.
|
30
|
+
|
31
|
+
Methods
|
32
|
+
-------
|
33
|
+
fit(X, y=None)
|
34
|
+
Fit the transformer to the input data.
|
35
|
+
|
36
|
+
transform(X, y=0, copy=True)
|
37
|
+
Transform the input data by adding random noise.
|
38
|
+
"""
|
39
|
+
|
40
|
+
def __init__(
|
41
|
+
self, min: float = 0.0, max: float = 0.0, random_state: Optional[int] = None
|
42
|
+
):
|
43
|
+
self.min = min
|
44
|
+
self.max = max
|
45
|
+
self.random_state = random_state
|
46
|
+
|
47
|
+
def fit(self, X: np.ndarray, y=None) -> "UniformNoise":
|
48
|
+
"""
|
49
|
+
Fit the transformer to the input data.
|
50
|
+
|
51
|
+
Parameters
|
52
|
+
----------
|
53
|
+
X : np.ndarray of shape (n_samples, n_features)
|
54
|
+
The input data to fit the transformer to.
|
55
|
+
|
56
|
+
y : None
|
57
|
+
Ignored.
|
58
|
+
|
59
|
+
Returns
|
60
|
+
-------
|
61
|
+
self : UniformNoise
|
62
|
+
The fitted transformer.
|
63
|
+
"""
|
64
|
+
# Check that X is a 2D array and has only finite values
|
65
|
+
X = validate_data(
|
66
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
67
|
+
)
|
68
|
+
|
69
|
+
# Set the number of features
|
70
|
+
self.n_features_in_ = X.shape[1]
|
71
|
+
|
72
|
+
# Set the fitted attribute to True
|
73
|
+
self._is_fitted = True
|
74
|
+
|
75
|
+
# Instantiate the random number generator
|
76
|
+
self._rng = np.random.default_rng(self.random_state)
|
77
|
+
|
78
|
+
return self
|
79
|
+
|
80
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
81
|
+
"""
|
82
|
+
Transform the input data by adding random uniform noise.
|
83
|
+
|
84
|
+
Parameters
|
85
|
+
----------
|
86
|
+
X : np.ndarray of shape (n_samples, n_features)
|
87
|
+
The input data to transform.
|
88
|
+
|
89
|
+
y : None
|
90
|
+
Ignored.
|
91
|
+
|
92
|
+
Returns
|
93
|
+
-------
|
94
|
+
X_ : np.ndarray of shape (n_samples, n_features)
|
95
|
+
The transformed data.
|
96
|
+
"""
|
97
|
+
# Check that the estimator is fitted
|
98
|
+
check_is_fitted(self, "_is_fitted")
|
99
|
+
|
100
|
+
# Check that X is a 2D array and has only finite values
|
101
|
+
X_ = validate_data(
|
102
|
+
self,
|
103
|
+
X,
|
104
|
+
y="no_validation",
|
105
|
+
ensure_2d=True,
|
106
|
+
copy=True,
|
107
|
+
reset=False,
|
108
|
+
dtype=np.float64,
|
109
|
+
)
|
110
|
+
|
111
|
+
# Check that the number of features is the same as the fitted data
|
112
|
+
if X_.shape[1] != self.n_features_in_:
|
113
|
+
raise ValueError(
|
114
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
115
|
+
)
|
116
|
+
|
117
|
+
# Calculate the standard uniform variate
|
118
|
+
for i, x in enumerate(X_):
|
119
|
+
X_[i] = self._add_random_noise(x)
|
120
|
+
|
121
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
122
|
+
|
123
|
+
def _add_random_noise(self, x) -> np.ndarray:
|
124
|
+
return x + self._rng.uniform(self.min, self.max, size=x.shape)
|
chemotools/baseline/__init__.py
CHANGED
@@ -1,8 +1,20 @@
|
|
1
|
-
from .
|
2
|
-
from .
|
3
|
-
from .
|
4
|
-
from .
|
5
|
-
from .
|
6
|
-
from .
|
7
|
-
from .
|
8
|
-
from .
|
1
|
+
from ._air_pls import AirPls
|
2
|
+
from ._ar_pls import ArPls
|
3
|
+
from ._constant_baseline_correction import ConstantBaselineCorrection
|
4
|
+
from ._cubic_spline_correction import CubicSplineCorrection
|
5
|
+
from ._linear_correction import LinearCorrection
|
6
|
+
from ._non_negative import NonNegative
|
7
|
+
from ._polynomial_correction import PolynomialCorrection
|
8
|
+
from ._subtract_reference import SubtractReference
|
9
|
+
|
10
|
+
|
11
|
+
__all__ = [
|
12
|
+
"AirPls",
|
13
|
+
"ArPls",
|
14
|
+
"ConstantBaselineCorrection",
|
15
|
+
"CubicSplineCorrection",
|
16
|
+
"LinearCorrection",
|
17
|
+
"NonNegative",
|
18
|
+
"PolynomialCorrection",
|
19
|
+
"SubtractReference",
|
20
|
+
]
|
@@ -3,14 +3,12 @@ import numpy as np
|
|
3
3
|
from scipy.sparse import csc_matrix, eye, diags
|
4
4
|
from scipy.sparse.linalg import spsolve
|
5
5
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
6
|
-
from sklearn.utils.validation import check_is_fitted
|
7
|
-
|
8
|
-
from chemotools.utils.check_inputs import check_input
|
6
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
9
7
|
|
10
8
|
logger = logging.getLogger(__name__)
|
11
9
|
|
12
10
|
|
13
|
-
class AirPls(OneToOneFeatureMixin, BaseEstimator
|
11
|
+
class AirPls(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
14
12
|
"""
|
15
13
|
This class implements the AirPLS (Adaptive Iteratively Reweighted Penalized Least Squares) algorithm for baseline
|
16
14
|
correction of spectra data. AirPLS is a common approach for removing the baseline from spectra, which can be useful
|
@@ -30,14 +28,6 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
30
28
|
The number of iterations used to calculate the baseline. Increasing the number of iterations can improve the
|
31
29
|
accuracy of the baseline correction, but also increases the computation time.
|
32
30
|
|
33
|
-
Attributes
|
34
|
-
----------
|
35
|
-
n_features_in_ : int
|
36
|
-
The number of features in the input data.
|
37
|
-
|
38
|
-
_is_fitted : bool
|
39
|
-
A flag indicating whether the estimator has been fitted to data.
|
40
|
-
|
41
31
|
Methods
|
42
32
|
-------
|
43
33
|
fit(X, y=None)
|
@@ -48,7 +38,7 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
48
38
|
|
49
39
|
_calculate_whittaker_smooth(x, w)
|
50
40
|
Calculate the Whittaker smooth of a given input vector x, with weights w.
|
51
|
-
|
41
|
+
|
52
42
|
_calculate_air_pls(x)
|
53
43
|
Calculate the AirPLS baseline of a given input vector x.
|
54
44
|
|
@@ -84,14 +74,11 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
84
74
|
self : AirPls
|
85
75
|
Returns the instance itself.
|
86
76
|
"""
|
87
|
-
# Check that X is a 2D array and has only finite values
|
88
|
-
X = check_input(X)
|
89
|
-
|
90
|
-
# Set the number of features
|
91
|
-
self.n_features_in_ = X.shape[1]
|
92
77
|
|
93
|
-
#
|
94
|
-
|
78
|
+
# Check that X is a 2D array and has only finite values
|
79
|
+
X = validate_data(
|
80
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
81
|
+
)
|
95
82
|
|
96
83
|
return self
|
97
84
|
|
@@ -113,17 +100,18 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
113
100
|
"""
|
114
101
|
|
115
102
|
# Check that the estimator is fitted
|
116
|
-
check_is_fitted(self, "
|
103
|
+
check_is_fitted(self, "n_features_in_")
|
117
104
|
|
118
105
|
# Check that X is a 2D array and has only finite values
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
106
|
+
X_ = validate_data(
|
107
|
+
self,
|
108
|
+
X,
|
109
|
+
y="no_validation",
|
110
|
+
ensure_2d=True,
|
111
|
+
copy=True,
|
112
|
+
reset=False,
|
113
|
+
dtype=np.float64,
|
114
|
+
)
|
127
115
|
|
128
116
|
# Calculate the air pls smooth
|
129
117
|
for i, x in enumerate(X_):
|
@@ -132,14 +120,14 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
132
120
|
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
133
121
|
|
134
122
|
def _calculate_whittaker_smooth(self, x, w):
|
135
|
-
X = np.
|
123
|
+
X = np.array(x)
|
136
124
|
m = X.size
|
137
125
|
E = eye(m, format="csc")
|
138
126
|
for i in range(self.polynomial_order):
|
139
127
|
E = E[1:] - E[:-1]
|
140
128
|
W = diags(w, 0, shape=(m, m))
|
141
|
-
A = csc_matrix(W + (self.lam * E.T
|
142
|
-
B = csc_matrix(W
|
129
|
+
A = csc_matrix(W + (self.lam * E.T @ E))
|
130
|
+
B = csc_matrix(W @ X.T).toarray().ravel()
|
143
131
|
background = spsolve(A, B)
|
144
132
|
return np.array(background)
|
145
133
|
|
@@ -5,14 +5,12 @@ from scipy.sparse import spdiags, csc_matrix
|
|
5
5
|
from scipy.sparse.linalg import splu
|
6
6
|
|
7
7
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
8
|
-
from sklearn.utils.validation import check_is_fitted
|
9
|
-
|
10
|
-
from chemotools.utils.check_inputs import check_input
|
8
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
11
9
|
|
12
10
|
logger = logging.getLogger(__name__)
|
13
11
|
|
14
12
|
|
15
|
-
class ArPls(OneToOneFeatureMixin, BaseEstimator
|
13
|
+
class ArPls(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
16
14
|
"""
|
17
15
|
This class implements the Assymmetrically Reweighted Penalized Least Squares (ArPls) is a baseline
|
18
16
|
correction method for spectroscopy data. It uses an iterative process
|
@@ -29,13 +27,6 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
29
27
|
nr_iterations : int, optional (default=100)
|
30
28
|
The maximum number of iterations for the weight updating scheme.
|
31
29
|
|
32
|
-
Attributes
|
33
|
-
----------
|
34
|
-
n_features_in_ : int
|
35
|
-
The number of input features.
|
36
|
-
|
37
|
-
_is_fitted : bool
|
38
|
-
Whether the estimator has been fitted.
|
39
30
|
|
40
31
|
Methods
|
41
32
|
-------
|
@@ -53,8 +44,8 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
53
44
|
|
54
45
|
References
|
55
46
|
----------
|
56
|
-
- Sung-June Baek, Aaron Park, Young-Jin Ahn, Jaebum Choo
|
57
|
-
Baseline correction using asymmetrically reweighted penalized
|
47
|
+
- Sung-June Baek, Aaron Park, Young-Jin Ahn, Jaebum Choo
|
48
|
+
Baseline correction using asymmetrically reweighted penalized
|
58
49
|
least squares smoothing
|
59
50
|
"""
|
60
51
|
|
@@ -86,13 +77,9 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
86
77
|
"""
|
87
78
|
|
88
79
|
# Check that X is a 2D array and has only finite values
|
89
|
-
X =
|
90
|
-
|
91
|
-
|
92
|
-
self.n_features_in_ = X.shape[1]
|
93
|
-
|
94
|
-
# Set the fitted attribute to True
|
95
|
-
self._is_fitted = True
|
80
|
+
X = validate_data(
|
81
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
82
|
+
)
|
96
83
|
|
97
84
|
return self
|
98
85
|
|
@@ -114,17 +101,17 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
114
101
|
"""
|
115
102
|
|
116
103
|
# Check that the estimator is fitted
|
117
|
-
check_is_fitted(self, "
|
104
|
+
check_is_fitted(self, "n_features_in_")
|
118
105
|
|
119
106
|
# Check that X is a 2D array and has only finite values
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
107
|
+
X_ = validate_data(
|
108
|
+
self,
|
109
|
+
X,
|
110
|
+
y="no_validation",
|
111
|
+
ensure_2d=True,
|
112
|
+
copy=True,
|
113
|
+
reset=False,
|
114
|
+
)
|
128
115
|
|
129
116
|
# Calculate the ar pls baseline
|
130
117
|
for i, x in enumerate(X_):
|
@@ -133,9 +120,9 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
133
120
|
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
134
121
|
|
135
122
|
def _calculate_diff(self, N):
|
136
|
-
|
123
|
+
identity_matrix = sp.eye(N, format="csc")
|
137
124
|
D2 = sp.diags([1, -2, 1], [0, 1, 2], shape=(N - 2, N), format="csc")
|
138
|
-
return D2.dot(
|
125
|
+
return D2.dot(identity_matrix).T
|
139
126
|
|
140
127
|
def _calculate_ar_pls(self, x):
|
141
128
|
N = len(x)
|
@@ -1,11 +1,11 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
4
|
-
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
6
|
|
7
7
|
|
8
|
-
class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator
|
8
|
+
class ConstantBaselineCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
9
|
"""
|
10
10
|
A transformer that corrects a baseline by subtracting a constant value.
|
11
11
|
The constant value is taken by the mean of the features between the start
|
@@ -30,12 +30,6 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
|
|
30
30
|
end_index_ : int
|
31
31
|
The index of the end of the range. It is 1 if the wavenumbers are not provided.
|
32
32
|
|
33
|
-
n_features_in_ : int
|
34
|
-
The number of features in the input data.
|
35
|
-
|
36
|
-
_is_fitted : bool
|
37
|
-
Whether the transformer has been fitted to data.
|
38
|
-
|
39
33
|
Methods
|
40
34
|
-------
|
41
35
|
fit(X, y=None)
|
@@ -46,7 +40,10 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
|
|
46
40
|
"""
|
47
41
|
|
48
42
|
def __init__(
|
49
|
-
self,
|
43
|
+
self,
|
44
|
+
start: int = 0,
|
45
|
+
end: int = 1,
|
46
|
+
wavenumbers: Optional[np.ndarray] = None,
|
50
47
|
) -> None:
|
51
48
|
self.start = start
|
52
49
|
self.end = end
|
@@ -70,13 +67,9 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
|
|
70
67
|
The fitted transformer.
|
71
68
|
"""
|
72
69
|
# Check that X is a 2D array and has only finite values
|
73
|
-
X =
|
74
|
-
|
75
|
-
|
76
|
-
self.n_features_in_ = X.shape[1]
|
77
|
-
|
78
|
-
# Set the fitted attribute to True
|
79
|
-
self._is_fitted = True
|
70
|
+
X = validate_data(
|
71
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
72
|
+
)
|
80
73
|
|
81
74
|
# Set the start and end indices
|
82
75
|
if self.wavenumbers is None:
|
@@ -109,17 +102,18 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
|
|
109
102
|
The transformed input data.
|
110
103
|
"""
|
111
104
|
# Check that the estimator is fitted
|
112
|
-
check_is_fitted(self, "
|
105
|
+
check_is_fitted(self, "n_features_in_")
|
113
106
|
|
114
107
|
# Check that X is a 2D array and has only finite values
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
108
|
+
X_ = validate_data(
|
109
|
+
self,
|
110
|
+
X,
|
111
|
+
y="no_validation",
|
112
|
+
ensure_2d=True,
|
113
|
+
copy=True,
|
114
|
+
reset=False,
|
115
|
+
dtype=np.float64,
|
116
|
+
)
|
123
117
|
|
124
118
|
# Base line correct the spectra
|
125
119
|
for i, x in enumerate(X_):
|
@@ -128,7 +122,5 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
|
|
128
122
|
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
129
123
|
|
130
124
|
def _find_index(self, target: float) -> int:
|
131
|
-
if self.wavenumbers is None:
|
132
|
-
return target
|
133
125
|
wavenumbers = np.array(self.wavenumbers)
|
134
|
-
return np.argmin(np.abs(wavenumbers - target))
|
126
|
+
return np.argmin(np.abs(wavenumbers - target)).astype(int)
|
@@ -1,13 +1,14 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from scipy.interpolate import CubicSpline
|
3
5
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
4
|
-
from sklearn.utils.validation import check_is_fitted
|
6
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
5
7
|
|
6
|
-
from chemotools.utils.check_inputs import check_input
|
7
8
|
|
8
|
-
class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator
|
9
|
+
class CubicSplineCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
10
|
"""
|
10
|
-
A transformer that corrects a baseline by subtracting a cubic spline through the
|
11
|
+
A transformer that corrects a baseline by subtracting a cubic spline through the
|
11
12
|
points defined by the indices.
|
12
13
|
|
13
14
|
Parameters
|
@@ -32,7 +33,8 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
|
|
32
33
|
Transform the input data by subtracting the constant baseline value.
|
33
34
|
|
34
35
|
"""
|
35
|
-
|
36
|
+
|
37
|
+
def __init__(self, indices: Optional[list] = None) -> None:
|
36
38
|
self.indices = indices
|
37
39
|
|
38
40
|
def fit(self, X: np.ndarray, y=None) -> "CubicSplineCorrection":
|
@@ -53,13 +55,9 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
|
|
53
55
|
The fitted transformer.
|
54
56
|
"""
|
55
57
|
# Check that X is a 2D array and has only finite values
|
56
|
-
X =
|
57
|
-
|
58
|
-
|
59
|
-
self.n_features_in_ = X.shape[1]
|
60
|
-
|
61
|
-
# Set the fitted attribute to True
|
62
|
-
self._is_fitted = True
|
58
|
+
X = validate_data(
|
59
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
60
|
+
)
|
63
61
|
|
64
62
|
if self.indices is None:
|
65
63
|
self.indices_ = [0, len(X[0]) - 1]
|
@@ -89,15 +87,24 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
|
|
89
87
|
The transformed data.
|
90
88
|
"""
|
91
89
|
# Check that the estimator is fitted
|
92
|
-
check_is_fitted(self, "
|
90
|
+
check_is_fitted(self, "indices_")
|
93
91
|
|
94
92
|
# Check that X is a 2D array and has only finite values
|
95
|
-
|
96
|
-
|
93
|
+
X_ = validate_data(
|
94
|
+
self,
|
95
|
+
X,
|
96
|
+
y="no_validation",
|
97
|
+
ensure_2d=True,
|
98
|
+
copy=True,
|
99
|
+
reset=False,
|
100
|
+
dtype=np.float64,
|
101
|
+
)
|
97
102
|
|
98
103
|
# Check that the number of features is the same as the fitted data
|
99
104
|
if X_.shape[1] != self.n_features_in_:
|
100
|
-
raise ValueError(
|
105
|
+
raise ValueError(
|
106
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
107
|
+
)
|
101
108
|
|
102
109
|
# Calculate spline baseline correction
|
103
110
|
for i, x in enumerate(X_):
|
@@ -106,7 +113,7 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
|
|
106
113
|
|
107
114
|
def _spline_baseline_correct(self, x: np.ndarray) -> np.ndarray:
|
108
115
|
indices = self.indices_
|
109
|
-
intensity = x[indices]
|
116
|
+
intensity = x[indices]
|
110
117
|
spl = CubicSpline(indices, intensity)
|
111
|
-
baseline = spl(range(len(x)))
|
112
|
-
return x - baseline
|
118
|
+
baseline = spl(range(len(x)))
|
119
|
+
return x - baseline
|
@@ -1,26 +1,13 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
3
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
4
4
|
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
5
|
|
7
|
-
|
8
|
-
class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
6
|
+
class LinearCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
7
|
"""
|
10
8
|
A transformer that corrects a baseline by subtracting a linear baseline through the
|
11
9
|
initial and final points of the spectrum.
|
12
10
|
|
13
|
-
Parameters
|
14
|
-
----------
|
15
|
-
|
16
|
-
Attributes
|
17
|
-
----------
|
18
|
-
n_features_in_ : int
|
19
|
-
The number of features in the input data.
|
20
|
-
|
21
|
-
_is_fitted : bool
|
22
|
-
Whether the transformer has been fitted to data.
|
23
|
-
|
24
11
|
Methods
|
25
12
|
-------
|
26
13
|
fit(X, y=None)
|
@@ -31,7 +18,6 @@ class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
31
18
|
"""
|
32
19
|
|
33
20
|
def _drift_correct_spectrum(self, x: np.ndarray) -> np.ndarray:
|
34
|
-
|
35
21
|
# Can take any array and returns with a linear baseline correction
|
36
22
|
# Find the x values at the edges of the spectrum
|
37
23
|
y1: float = x[0]
|
@@ -68,13 +54,9 @@ class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
68
54
|
The fitted transformer.
|
69
55
|
"""
|
70
56
|
# Check that X is a 2D array and has only finite values
|
71
|
-
X =
|
72
|
-
|
73
|
-
|
74
|
-
self.n_features_in_ = X.shape[1]
|
75
|
-
|
76
|
-
# Set the fitted attribute to True
|
77
|
-
self._is_fitted = True
|
57
|
+
X = validate_data(
|
58
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
59
|
+
)
|
78
60
|
|
79
61
|
return self
|
80
62
|
|
@@ -99,17 +81,26 @@ class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
99
81
|
The transformed data.
|
100
82
|
"""
|
101
83
|
# Check that the estimator is fitted
|
102
|
-
check_is_fitted(self, "
|
84
|
+
check_is_fitted(self, "n_features_in_")
|
103
85
|
|
104
86
|
# Check that X is a 2D array and has only finite values
|
105
|
-
|
106
|
-
|
87
|
+
X_ = validate_data(
|
88
|
+
self,
|
89
|
+
X,
|
90
|
+
y="no_validation",
|
91
|
+
ensure_2d=True,
|
92
|
+
copy=True,
|
93
|
+
reset=False,
|
94
|
+
dtype=np.float64,
|
95
|
+
)
|
107
96
|
|
108
97
|
# Check that the number of features is the same as the fitted data
|
109
98
|
if X_.shape[1] != self.n_features_in_:
|
110
|
-
raise ValueError(
|
99
|
+
raise ValueError(
|
100
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
101
|
+
)
|
111
102
|
|
112
103
|
# Calculate non-negative values
|
113
104
|
for i, x in enumerate(X_):
|
114
105
|
X_[i, :] = self._drift_correct_spectrum(x)
|
115
|
-
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
106
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
@@ -1,11 +1,9 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
3
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
4
4
|
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
5
|
|
7
|
-
|
8
|
-
class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
6
|
+
class NonNegative(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
7
|
"""
|
10
8
|
A transformer that sets all negative values to zero or to abs.
|
11
9
|
|
@@ -14,14 +12,6 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
14
12
|
mode : str, optional
|
15
13
|
The mode to use for the non-negative values. Can be "zero" or "abs".
|
16
14
|
|
17
|
-
Attributes
|
18
|
-
----------
|
19
|
-
n_features_in_ : int
|
20
|
-
The number of features in the input data.
|
21
|
-
|
22
|
-
_is_fitted : bool
|
23
|
-
Whether the transformer has been fitted to data.
|
24
|
-
|
25
15
|
Methods
|
26
16
|
-------
|
27
17
|
fit(X, y=None)
|
@@ -52,14 +42,9 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
52
42
|
The fitted transformer.
|
53
43
|
"""
|
54
44
|
# Check that X is a 2D array and has only finite values
|
55
|
-
X =
|
56
|
-
|
57
|
-
|
58
|
-
self.n_features_in_ = X.shape[1]
|
59
|
-
|
60
|
-
# Set the fitted attribute to True
|
61
|
-
self._is_fitted = True
|
62
|
-
|
45
|
+
X = validate_data(
|
46
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
47
|
+
)
|
63
48
|
return self
|
64
49
|
|
65
50
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -80,11 +65,18 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
80
65
|
The transformed data.
|
81
66
|
"""
|
82
67
|
# Check that the estimator is fitted
|
83
|
-
check_is_fitted(self, "
|
68
|
+
check_is_fitted(self, "n_features_in_")
|
84
69
|
|
85
70
|
# Check that X is a 2D array and has only finite values
|
86
|
-
|
87
|
-
|
71
|
+
X_ = validate_data(
|
72
|
+
self,
|
73
|
+
X,
|
74
|
+
y="no_validation",
|
75
|
+
ensure_2d=True,
|
76
|
+
copy=True,
|
77
|
+
reset=False,
|
78
|
+
dtype=np.float64,
|
79
|
+
)
|
88
80
|
|
89
81
|
# Check that the number of features is the same as the fitted data
|
90
82
|
if X_.shape[1] != self.n_features_in_:
|