chemotools 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemotools/augmentation/__init__.py +10 -0
- chemotools/augmentation/baseline_shift.py +23 -15
- chemotools/augmentation/exponential_noise.py +24 -15
- chemotools/augmentation/index_shift.py +104 -16
- chemotools/augmentation/normal_noise.py +24 -14
- chemotools/augmentation/spectrum_scale.py +24 -15
- chemotools/augmentation/uniform_noise.py +26 -14
- chemotools/baseline/__init__.py +13 -1
- chemotools/baseline/_air_pls.py +16 -14
- chemotools/baseline/_ar_pls.py +17 -17
- chemotools/baseline/_constant_baseline_correction.py +19 -16
- chemotools/baseline/_cubic_spline_correction.py +17 -8
- chemotools/baseline/_linear_correction.py +18 -10
- chemotools/baseline/_non_negative.py +14 -8
- chemotools/baseline/_polynomial_correction.py +19 -11
- chemotools/baseline/_subtract_reference.py +17 -9
- chemotools/datasets/__init__.py +2 -0
- chemotools/datasets/_base.py +3 -3
- chemotools/derivative/__init__.py +3 -1
- chemotools/derivative/_norris_william.py +14 -8
- chemotools/derivative/_savitzky_golay.py +25 -21
- chemotools/feature_selection/__init__.py +2 -0
- chemotools/feature_selection/_index_selector.py +18 -17
- chemotools/feature_selection/_range_cut.py +9 -7
- chemotools/scale/__init__.py +2 -0
- chemotools/scale/_min_max_scaler.py +14 -8
- chemotools/scale/_norm_scaler.py +14 -8
- chemotools/scale/_point_scaler.py +18 -10
- chemotools/scatter/__init__.py +11 -2
- chemotools/scatter/_extended_multiplicative_scatter_correction.py +33 -29
- chemotools/scatter/_multiplicative_scatter_correction.py +33 -18
- chemotools/scatter/_robust_normal_variate.py +14 -8
- chemotools/scatter/_standard_normal_variate.py +14 -8
- chemotools/smooth/__init__.py +3 -1
- chemotools/smooth/_mean_filter.py +14 -8
- chemotools/smooth/_median_filter.py +31 -9
- chemotools/smooth/_savitzky_golay_filter.py +20 -9
- chemotools/smooth/_whittaker_smooth.py +20 -11
- {chemotools-0.1.5.dist-info → chemotools-0.1.7.dist-info}/METADATA +18 -17
- chemotools-0.1.7.dist-info/RECORD +51 -0
- {chemotools-0.1.5.dist-info → chemotools-0.1.7.dist-info}/WHEEL +1 -2
- chemotools/utils/check_inputs.py +0 -14
- chemotools-0.1.5.dist-info/RECORD +0 -58
- chemotools-0.1.5.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/fixtures.py +0 -89
- tests/test_datasets.py +0 -111
- tests/test_functionality.py +0 -777
- tests/test_sklearn_compliance.py +0 -277
- {chemotools-0.1.5.dist-info → chemotools-0.1.7.dist-info}/LICENSE +0 -0
chemotools/baseline/_air_pls.py
CHANGED
@@ -3,14 +3,12 @@ import numpy as np
|
|
3
3
|
from scipy.sparse import csc_matrix, eye, diags
|
4
4
|
from scipy.sparse.linalg import spsolve
|
5
5
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
6
|
-
from sklearn.utils.validation import check_is_fitted
|
7
|
-
|
8
|
-
from chemotools.utils.check_inputs import check_input
|
6
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
9
7
|
|
10
8
|
logger = logging.getLogger(__name__)
|
11
9
|
|
12
10
|
|
13
|
-
class AirPls(OneToOneFeatureMixin, BaseEstimator
|
11
|
+
class AirPls(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
14
12
|
"""
|
15
13
|
This class implements the AirPLS (Adaptive Iteratively Reweighted Penalized Least Squares) algorithm for baseline
|
16
14
|
correction of spectra data. AirPLS is a common approach for removing the baseline from spectra, which can be useful
|
@@ -40,7 +38,7 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
40
38
|
|
41
39
|
_calculate_whittaker_smooth(x, w)
|
42
40
|
Calculate the Whittaker smooth of a given input vector x, with weights w.
|
43
|
-
|
41
|
+
|
44
42
|
_calculate_air_pls(x)
|
45
43
|
Calculate the AirPLS baseline of a given input vector x.
|
46
44
|
|
@@ -76,8 +74,11 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
76
74
|
self : AirPls
|
77
75
|
Returns the instance itself.
|
78
76
|
"""
|
77
|
+
|
79
78
|
# Check that X is a 2D array and has only finite values
|
80
|
-
X =
|
79
|
+
X = validate_data(
|
80
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
81
|
+
)
|
81
82
|
|
82
83
|
return self
|
83
84
|
|
@@ -102,14 +103,15 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
102
103
|
check_is_fitted(self, "n_features_in_")
|
103
104
|
|
104
105
|
# Check that X is a 2D array and has only finite values
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
106
|
+
X_ = validate_data(
|
107
|
+
self,
|
108
|
+
X,
|
109
|
+
y="no_validation",
|
110
|
+
ensure_2d=True,
|
111
|
+
copy=True,
|
112
|
+
reset=False,
|
113
|
+
dtype=np.float64,
|
114
|
+
)
|
113
115
|
|
114
116
|
# Calculate the air pls smooth
|
115
117
|
for i, x in enumerate(X_):
|
chemotools/baseline/_ar_pls.py
CHANGED
@@ -5,14 +5,12 @@ from scipy.sparse import spdiags, csc_matrix
|
|
5
5
|
from scipy.sparse.linalg import splu
|
6
6
|
|
7
7
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
8
|
-
from sklearn.utils.validation import check_is_fitted
|
9
|
-
|
10
|
-
from chemotools.utils.check_inputs import check_input
|
8
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
11
9
|
|
12
10
|
logger = logging.getLogger(__name__)
|
13
11
|
|
14
12
|
|
15
|
-
class ArPls(OneToOneFeatureMixin, BaseEstimator
|
13
|
+
class ArPls(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
16
14
|
"""
|
17
15
|
This class implements the Assymmetrically Reweighted Penalized Least Squares (ArPls) is a baseline
|
18
16
|
correction method for spectroscopy data. It uses an iterative process
|
@@ -46,8 +44,8 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
46
44
|
|
47
45
|
References
|
48
46
|
----------
|
49
|
-
- Sung-June Baek, Aaron Park, Young-Jin Ahn, Jaebum Choo
|
50
|
-
Baseline correction using asymmetrically reweighted penalized
|
47
|
+
- Sung-June Baek, Aaron Park, Young-Jin Ahn, Jaebum Choo
|
48
|
+
Baseline correction using asymmetrically reweighted penalized
|
51
49
|
least squares smoothing
|
52
50
|
"""
|
53
51
|
|
@@ -79,7 +77,9 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
79
77
|
"""
|
80
78
|
|
81
79
|
# Check that X is a 2D array and has only finite values
|
82
|
-
X =
|
80
|
+
X = validate_data(
|
81
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
82
|
+
)
|
83
83
|
|
84
84
|
return self
|
85
85
|
|
@@ -104,14 +104,14 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
104
104
|
check_is_fitted(self, "n_features_in_")
|
105
105
|
|
106
106
|
# Check that X is a 2D array and has only finite values
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
107
|
+
X_ = validate_data(
|
108
|
+
self,
|
109
|
+
X,
|
110
|
+
y="no_validation",
|
111
|
+
ensure_2d=True,
|
112
|
+
copy=True,
|
113
|
+
reset=False,
|
114
|
+
)
|
115
115
|
|
116
116
|
# Calculate the ar pls baseline
|
117
117
|
for i, x in enumerate(X_):
|
@@ -120,9 +120,9 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
120
120
|
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
121
121
|
|
122
122
|
def _calculate_diff(self, N):
|
123
|
-
|
123
|
+
identity_matrix = sp.eye(N, format="csc")
|
124
124
|
D2 = sp.diags([1, -2, 1], [0, 1, 2], shape=(N - 2, N), format="csc")
|
125
|
-
return D2.dot(
|
125
|
+
return D2.dot(identity_matrix).T
|
126
126
|
|
127
127
|
def _calculate_ar_pls(self, x):
|
128
128
|
N = len(x)
|
@@ -1,11 +1,11 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
4
|
-
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
6
|
|
7
7
|
|
8
|
-
class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator
|
8
|
+
class ConstantBaselineCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
9
|
"""
|
10
10
|
A transformer that corrects a baseline by subtracting a constant value.
|
11
11
|
The constant value is taken by the mean of the features between the start
|
@@ -43,7 +43,7 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
|
|
43
43
|
self,
|
44
44
|
start: int = 0,
|
45
45
|
end: int = 1,
|
46
|
-
wavenumbers: np.ndarray = None,
|
46
|
+
wavenumbers: Optional[np.ndarray] = None,
|
47
47
|
) -> None:
|
48
48
|
self.start = start
|
49
49
|
self.end = end
|
@@ -67,7 +67,9 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
|
|
67
67
|
The fitted transformer.
|
68
68
|
"""
|
69
69
|
# Check that X is a 2D array and has only finite values
|
70
|
-
X =
|
70
|
+
X = validate_data(
|
71
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
72
|
+
)
|
71
73
|
|
72
74
|
# Set the start and end indices
|
73
75
|
if self.wavenumbers is None:
|
@@ -100,17 +102,18 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
|
|
100
102
|
The transformed input data.
|
101
103
|
"""
|
102
104
|
# Check that the estimator is fitted
|
103
|
-
check_is_fitted(self,
|
105
|
+
check_is_fitted(self, "n_features_in_")
|
104
106
|
|
105
107
|
# Check that X is a 2D array and has only finite values
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
108
|
+
X_ = validate_data(
|
109
|
+
self,
|
110
|
+
X,
|
111
|
+
y="no_validation",
|
112
|
+
ensure_2d=True,
|
113
|
+
copy=True,
|
114
|
+
reset=False,
|
115
|
+
dtype=np.float64,
|
116
|
+
)
|
114
117
|
|
115
118
|
# Base line correct the spectra
|
116
119
|
for i, x in enumerate(X_):
|
@@ -120,4 +123,4 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
|
|
120
123
|
|
121
124
|
def _find_index(self, target: float) -> int:
|
122
125
|
wavenumbers = np.array(self.wavenumbers)
|
123
|
-
return np.argmin(np.abs(wavenumbers - target))
|
126
|
+
return np.argmin(np.abs(wavenumbers - target)).astype(int)
|
@@ -1,12 +1,12 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from scipy.interpolate import CubicSpline
|
3
5
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
4
|
-
from sklearn.utils.validation import check_is_fitted
|
5
|
-
|
6
|
-
from chemotools.utils.check_inputs import check_input
|
6
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
7
7
|
|
8
8
|
|
9
|
-
class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator
|
9
|
+
class CubicSplineCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
10
10
|
"""
|
11
11
|
A transformer that corrects a baseline by subtracting a cubic spline through the
|
12
12
|
points defined by the indices.
|
@@ -34,7 +34,7 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
|
|
34
34
|
|
35
35
|
"""
|
36
36
|
|
37
|
-
def __init__(self, indices: list = None) -> None:
|
37
|
+
def __init__(self, indices: Optional[list] = None) -> None:
|
38
38
|
self.indices = indices
|
39
39
|
|
40
40
|
def fit(self, X: np.ndarray, y=None) -> "CubicSplineCorrection":
|
@@ -55,7 +55,9 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
|
|
55
55
|
The fitted transformer.
|
56
56
|
"""
|
57
57
|
# Check that X is a 2D array and has only finite values
|
58
|
-
X =
|
58
|
+
X = validate_data(
|
59
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
60
|
+
)
|
59
61
|
|
60
62
|
if self.indices is None:
|
61
63
|
self.indices_ = [0, len(X[0]) - 1]
|
@@ -88,8 +90,15 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
|
|
88
90
|
check_is_fitted(self, "indices_")
|
89
91
|
|
90
92
|
# Check that X is a 2D array and has only finite values
|
91
|
-
|
92
|
-
|
93
|
+
X_ = validate_data(
|
94
|
+
self,
|
95
|
+
X,
|
96
|
+
y="no_validation",
|
97
|
+
ensure_2d=True,
|
98
|
+
copy=True,
|
99
|
+
reset=False,
|
100
|
+
dtype=np.float64,
|
101
|
+
)
|
93
102
|
|
94
103
|
# Check that the number of features is the same as the fitted data
|
95
104
|
if X_.shape[1] != self.n_features_in_:
|
@@ -1,11 +1,9 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
3
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
4
4
|
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
5
|
|
7
|
-
|
8
|
-
class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
6
|
+
class LinearCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
7
|
"""
|
10
8
|
A transformer that corrects a baseline by subtracting a linear baseline through the
|
11
9
|
initial and final points of the spectrum.
|
@@ -20,7 +18,6 @@ class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
20
18
|
"""
|
21
19
|
|
22
20
|
def _drift_correct_spectrum(self, x: np.ndarray) -> np.ndarray:
|
23
|
-
|
24
21
|
# Can take any array and returns with a linear baseline correction
|
25
22
|
# Find the x values at the edges of the spectrum
|
26
23
|
y1: float = x[0]
|
@@ -57,7 +54,9 @@ class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
57
54
|
The fitted transformer.
|
58
55
|
"""
|
59
56
|
# Check that X is a 2D array and has only finite values
|
60
|
-
X =
|
57
|
+
X = validate_data(
|
58
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
59
|
+
)
|
61
60
|
|
62
61
|
return self
|
63
62
|
|
@@ -85,14 +84,23 @@ class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
85
84
|
check_is_fitted(self, "n_features_in_")
|
86
85
|
|
87
86
|
# Check that X is a 2D array and has only finite values
|
88
|
-
|
89
|
-
|
87
|
+
X_ = validate_data(
|
88
|
+
self,
|
89
|
+
X,
|
90
|
+
y="no_validation",
|
91
|
+
ensure_2d=True,
|
92
|
+
copy=True,
|
93
|
+
reset=False,
|
94
|
+
dtype=np.float64,
|
95
|
+
)
|
90
96
|
|
91
97
|
# Check that the number of features is the same as the fitted data
|
92
98
|
if X_.shape[1] != self.n_features_in_:
|
93
|
-
raise ValueError(
|
99
|
+
raise ValueError(
|
100
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
101
|
+
)
|
94
102
|
|
95
103
|
# Calculate non-negative values
|
96
104
|
for i, x in enumerate(X_):
|
97
105
|
X_[i, :] = self._drift_correct_spectrum(x)
|
98
|
-
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
106
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
@@ -1,11 +1,9 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
3
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
4
4
|
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
5
|
|
7
|
-
|
8
|
-
class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
6
|
+
class NonNegative(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
7
|
"""
|
10
8
|
A transformer that sets all negative values to zero or to abs.
|
11
9
|
|
@@ -44,8 +42,9 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
44
42
|
The fitted transformer.
|
45
43
|
"""
|
46
44
|
# Check that X is a 2D array and has only finite values
|
47
|
-
X =
|
48
|
-
|
45
|
+
X = validate_data(
|
46
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
47
|
+
)
|
49
48
|
return self
|
50
49
|
|
51
50
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -69,8 +68,15 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
69
68
|
check_is_fitted(self, "n_features_in_")
|
70
69
|
|
71
70
|
# Check that X is a 2D array and has only finite values
|
72
|
-
|
73
|
-
|
71
|
+
X_ = validate_data(
|
72
|
+
self,
|
73
|
+
X,
|
74
|
+
y="no_validation",
|
75
|
+
ensure_2d=True,
|
76
|
+
copy=True,
|
77
|
+
reset=False,
|
78
|
+
dtype=np.float64,
|
79
|
+
)
|
74
80
|
|
75
81
|
# Check that the number of features is the same as the fitted data
|
76
82
|
if X_.shape[1] != self.n_features_in_:
|
@@ -1,11 +1,11 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
4
|
-
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
6
|
|
7
7
|
|
8
|
-
class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator
|
8
|
+
class PolynomialCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
9
|
"""
|
10
10
|
A transformer that subtracts a polynomial baseline from the input data. The polynomial is
|
11
11
|
fitted to the points in the spectrum specified by the indices parameter.
|
@@ -31,7 +31,7 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
|
|
31
31
|
Subtract the polynomial baseline from a single spectrum.
|
32
32
|
"""
|
33
33
|
|
34
|
-
def __init__(self, order: int = 1, indices: list = None) -> None:
|
34
|
+
def __init__(self, order: int = 1, indices: Optional[list] = None) -> None:
|
35
35
|
self.order = order
|
36
36
|
self.indices = indices
|
37
37
|
|
@@ -53,10 +53,11 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
|
|
53
53
|
The fitted transformer.
|
54
54
|
"""
|
55
55
|
# Check that X is a 2D array and has only finite values
|
56
|
-
X =
|
57
|
-
|
56
|
+
X = validate_data(
|
57
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
58
|
+
)
|
58
59
|
if self.indices is None:
|
59
|
-
self.indices_ = range(0, len(X[0]))
|
60
|
+
self.indices_ = list(range(0, len(X[0])))
|
60
61
|
else:
|
61
62
|
self.indices_ = self.indices
|
62
63
|
|
@@ -83,11 +84,18 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
|
|
83
84
|
The transformed data.
|
84
85
|
"""
|
85
86
|
# Check that the estimator is fitted
|
86
|
-
check_is_fitted(self, "
|
87
|
+
check_is_fitted(self, "n_features_in_")
|
87
88
|
|
88
89
|
# Check that X is a 2D array and has only finite values
|
89
|
-
|
90
|
-
|
90
|
+
X_ = validate_data(
|
91
|
+
self,
|
92
|
+
X,
|
93
|
+
y="no_validation",
|
94
|
+
ensure_2d=True,
|
95
|
+
copy=True,
|
96
|
+
reset=False,
|
97
|
+
dtype=np.float64,
|
98
|
+
)
|
91
99
|
|
92
100
|
# Check that the number of features is the same as the fitted data
|
93
101
|
if X_.shape[1] != self.n_features_in_:
|
@@ -1,11 +1,11 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
4
|
-
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
6
|
|
7
7
|
|
8
|
-
class SubtractReference(OneToOneFeatureMixin, BaseEstimator
|
8
|
+
class SubtractReference(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
9
|
"""
|
10
10
|
A transformer that subtracts a reference spectrum from the input data.
|
11
11
|
|
@@ -29,7 +29,7 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
29
29
|
|
30
30
|
def __init__(
|
31
31
|
self,
|
32
|
-
reference: np.ndarray = None,
|
32
|
+
reference: Optional[np.ndarray] = None,
|
33
33
|
):
|
34
34
|
self.reference = reference
|
35
35
|
|
@@ -51,8 +51,9 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
51
51
|
The fitted transformer.
|
52
52
|
"""
|
53
53
|
# Check that X is a 2D array and has only finite values
|
54
|
-
X =
|
55
|
-
|
54
|
+
X = validate_data(
|
55
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
56
|
+
)
|
56
57
|
# Set the reference
|
57
58
|
if self.reference is not None:
|
58
59
|
self.reference_ = self.reference.copy()
|
@@ -81,8 +82,15 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
81
82
|
check_is_fitted(self, "n_features_in_")
|
82
83
|
|
83
84
|
# Check that X is a 2D array and has only finite values
|
84
|
-
|
85
|
-
|
85
|
+
X_ = validate_data(
|
86
|
+
self,
|
87
|
+
X,
|
88
|
+
y="no_validation",
|
89
|
+
ensure_2d=True,
|
90
|
+
copy=True,
|
91
|
+
reset=False,
|
92
|
+
dtype=np.float64,
|
93
|
+
)
|
86
94
|
|
87
95
|
# Check that the number of features is the same as the fitted data
|
88
96
|
if X_.shape[1] != self.n_features_in_:
|
chemotools/datasets/__init__.py
CHANGED
chemotools/datasets/_base.py
CHANGED
@@ -110,13 +110,13 @@ def load_coffee(set_output="pandas"):
|
|
110
110
|
coffee_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
|
111
111
|
coffee_labels = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
|
112
112
|
return coffee_spectra, coffee_labels
|
113
|
-
|
113
|
+
|
114
114
|
if set_output == "polars":
|
115
115
|
coffee_spectra = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
|
116
116
|
coffee_labels = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
|
117
117
|
return coffee_spectra, coffee_labels
|
118
|
-
|
118
|
+
|
119
119
|
else:
|
120
120
|
raise ValueError(
|
121
121
|
"Invalid value for set_output. Please use 'pandas' or 'polars'."
|
122
|
-
)
|
122
|
+
)
|
@@ -1,12 +1,10 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from scipy.ndimage import convolve1d
|
3
3
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
4
|
-
from sklearn.utils.validation import check_is_fitted
|
4
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
5
5
|
|
6
|
-
from chemotools.utils.check_inputs import check_input
|
7
6
|
|
8
|
-
|
9
|
-
class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
7
|
+
class NorrisWilliams(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
10
8
|
"""
|
11
9
|
A transformer that calculates the Norris-Williams derivative of the input data.
|
12
10
|
|
@@ -64,8 +62,9 @@ class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
64
62
|
The fitted transformer.
|
65
63
|
"""
|
66
64
|
# Check that X is a 2D array and has only finite values
|
67
|
-
X =
|
68
|
-
|
65
|
+
X = validate_data(
|
66
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
67
|
+
)
|
69
68
|
return self
|
70
69
|
|
71
70
|
def transform(self, X: np.ndarray, y=None):
|
@@ -89,8 +88,15 @@ class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
89
88
|
check_is_fitted(self, "n_features_in_")
|
90
89
|
|
91
90
|
# Check that X is a 2D array and has only finite values
|
92
|
-
|
93
|
-
|
91
|
+
X_ = validate_data(
|
92
|
+
self,
|
93
|
+
X,
|
94
|
+
y="no_validation",
|
95
|
+
ensure_2d=True,
|
96
|
+
copy=True,
|
97
|
+
reset=False,
|
98
|
+
dtype=np.float64,
|
99
|
+
)
|
94
100
|
|
95
101
|
if X_.shape[1] != self.n_features_in_:
|
96
102
|
raise ValueError(
|
@@ -1,12 +1,12 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from scipy.signal import savgol_filter
|
3
5
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
4
|
-
from sklearn.utils.validation import check_is_fitted
|
5
|
-
|
6
|
-
from chemotools.utils.check_inputs import check_input
|
6
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
7
7
|
|
8
8
|
|
9
|
-
class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator
|
9
|
+
class SavitzkyGolay(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
10
10
|
"""
|
11
11
|
A transformer that calculates the Savitzky-Golay derivative of the input data.
|
12
12
|
|
@@ -41,7 +41,7 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
41
41
|
window_size: int = 3,
|
42
42
|
polynomial_order: int = 1,
|
43
43
|
derivate_order: int = 1,
|
44
|
-
mode:
|
44
|
+
mode: Literal["mirror", "constant", "nearest", "wrap", "interp"] = "nearest",
|
45
45
|
) -> None:
|
46
46
|
self.window_size = window_size
|
47
47
|
self.polynomial_order = polynomial_order
|
@@ -66,8 +66,9 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
66
66
|
The fitted transformer.
|
67
67
|
"""
|
68
68
|
# Check that X is a 2D array and has only finite values
|
69
|
-
X =
|
70
|
-
|
69
|
+
X = validate_data(
|
70
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
71
|
+
)
|
71
72
|
return self
|
72
73
|
|
73
74
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -91,26 +92,29 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
91
92
|
check_is_fitted(self, "n_features_in_")
|
92
93
|
|
93
94
|
# Check that X is a 2D array and has only finite values
|
94
|
-
|
95
|
-
|
95
|
+
X_ = validate_data(
|
96
|
+
self,
|
97
|
+
X,
|
98
|
+
y="no_validation",
|
99
|
+
ensure_2d=True,
|
100
|
+
copy=True,
|
101
|
+
reset=False,
|
102
|
+
dtype=np.float64,
|
103
|
+
)
|
96
104
|
|
97
105
|
if X_.shape[1] != self.n_features_in_:
|
98
106
|
raise ValueError(
|
99
107
|
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
100
108
|
)
|
101
|
-
|
102
109
|
# Calculate the standard normal variate
|
103
110
|
for i, x in enumerate(X_):
|
104
|
-
X_[i] =
|
111
|
+
X_[i] = savgol_filter(
|
112
|
+
x,
|
113
|
+
self.window_size,
|
114
|
+
self.polynomial_order,
|
115
|
+
deriv=self.derivate_order,
|
116
|
+
axis=0,
|
117
|
+
mode=self.mode,
|
118
|
+
)
|
105
119
|
|
106
120
|
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
107
|
-
|
108
|
-
def _calculate_derivative(self, x) -> np.ndarray:
|
109
|
-
return savgol_filter(
|
110
|
-
x,
|
111
|
-
self.window_size,
|
112
|
-
self.polynomial_order,
|
113
|
-
deriv=self.derivate_order,
|
114
|
-
axis=0,
|
115
|
-
mode=self.mode,
|
116
|
-
)
|