chemotools 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemotools/augmentation/__init__.py +10 -0
- chemotools/augmentation/baseline_shift.py +23 -15
- chemotools/augmentation/exponential_noise.py +24 -15
- chemotools/augmentation/index_shift.py +104 -16
- chemotools/augmentation/normal_noise.py +24 -14
- chemotools/augmentation/spectrum_scale.py +24 -15
- chemotools/augmentation/uniform_noise.py +26 -14
- chemotools/baseline/__init__.py +13 -1
- chemotools/baseline/_air_pls.py +16 -14
- chemotools/baseline/_ar_pls.py +17 -17
- chemotools/baseline/_constant_baseline_correction.py +19 -16
- chemotools/baseline/_cubic_spline_correction.py +17 -8
- chemotools/baseline/_linear_correction.py +18 -10
- chemotools/baseline/_non_negative.py +14 -8
- chemotools/baseline/_polynomial_correction.py +19 -11
- chemotools/baseline/_subtract_reference.py +17 -9
- chemotools/datasets/__init__.py +2 -0
- chemotools/datasets/_base.py +3 -3
- chemotools/derivative/__init__.py +3 -1
- chemotools/derivative/_norris_william.py +14 -8
- chemotools/derivative/_savitzky_golay.py +25 -21
- chemotools/feature_selection/__init__.py +2 -0
- chemotools/feature_selection/_index_selector.py +18 -17
- chemotools/feature_selection/_range_cut.py +9 -7
- chemotools/scale/__init__.py +2 -0
- chemotools/scale/_min_max_scaler.py +14 -8
- chemotools/scale/_norm_scaler.py +14 -8
- chemotools/scale/_point_scaler.py +18 -10
- chemotools/scatter/__init__.py +11 -2
- chemotools/scatter/_extended_multiplicative_scatter_correction.py +33 -29
- chemotools/scatter/_multiplicative_scatter_correction.py +33 -18
- chemotools/scatter/_robust_normal_variate.py +14 -8
- chemotools/scatter/_standard_normal_variate.py +14 -8
- chemotools/smooth/__init__.py +3 -1
- chemotools/smooth/_mean_filter.py +14 -8
- chemotools/smooth/_median_filter.py +31 -9
- chemotools/smooth/_savitzky_golay_filter.py +20 -9
- chemotools/smooth/_whittaker_smooth.py +20 -11
- {chemotools-0.1.5.dist-info → chemotools-0.1.7.dist-info}/METADATA +18 -17
- chemotools-0.1.7.dist-info/RECORD +51 -0
- {chemotools-0.1.5.dist-info → chemotools-0.1.7.dist-info}/WHEEL +1 -2
- chemotools/utils/check_inputs.py +0 -14
- chemotools-0.1.5.dist-info/RECORD +0 -58
- chemotools-0.1.5.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/fixtures.py +0 -89
- tests/test_datasets.py +0 -111
- tests/test_functionality.py +0 -777
- tests/test_sklearn_compliance.py +0 -277
- {chemotools-0.1.5.dist-info → chemotools-0.1.7.dist-info}/LICENSE +0 -0
@@ -1,13 +1,13 @@
|
|
1
|
+
from typing import Optional, Union
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from sklearn.base import BaseEstimator
|
3
5
|
from sklearn.feature_selection._base import SelectorMixin
|
4
6
|
|
5
|
-
from sklearn.utils.validation import check_is_fitted
|
6
|
-
|
7
|
-
from chemotools.utils.check_inputs import check_input
|
7
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
8
8
|
|
9
9
|
|
10
|
-
class IndexSelector(
|
10
|
+
class IndexSelector(SelectorMixin, BaseEstimator):
|
11
11
|
"""
|
12
12
|
A transformer that Selects the spectral data to a specified array of features. This
|
13
13
|
array can be continuous or discontinuous. The array of features is specified by:
|
@@ -42,8 +42,8 @@ class IndexSelector(BaseEstimator, SelectorMixin):
|
|
42
42
|
|
43
43
|
def __init__(
|
44
44
|
self,
|
45
|
-
features: np.ndarray = None,
|
46
|
-
wavenumbers: np.ndarray = None,
|
45
|
+
features: Optional[np.ndarray] = None,
|
46
|
+
wavenumbers: Optional[np.ndarray] = None,
|
47
47
|
):
|
48
48
|
self.features = features
|
49
49
|
self.wavenumbers = wavenumbers
|
@@ -66,8 +66,9 @@ class IndexSelector(BaseEstimator, SelectorMixin):
|
|
66
66
|
The fitted transformer.
|
67
67
|
"""
|
68
68
|
# validate that X is a 2D array and has only finite values
|
69
|
-
X =
|
70
|
-
|
69
|
+
X = validate_data(
|
70
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
71
|
+
)
|
71
72
|
# Set the fitted attribute to True
|
72
73
|
self._is_fitted = True
|
73
74
|
|
@@ -76,13 +77,13 @@ class IndexSelector(BaseEstimator, SelectorMixin):
|
|
76
77
|
self.features_index_ = self.features
|
77
78
|
return self
|
78
79
|
|
79
|
-
|
80
|
+
elif self.wavenumbers is None:
|
80
81
|
self.features_index_ = self.features
|
81
82
|
return self
|
82
83
|
|
83
|
-
|
84
|
-
|
85
|
-
|
84
|
+
else:
|
85
|
+
self.features_index_ = self._find_indices(self.features)
|
86
|
+
return self
|
86
87
|
|
87
88
|
def _get_support_mask(self):
|
88
89
|
"""
|
@@ -102,11 +103,11 @@ class IndexSelector(BaseEstimator, SelectorMixin):
|
|
102
103
|
|
103
104
|
return mask
|
104
105
|
|
105
|
-
def _find_index(self, target: float) -> int:
|
106
|
+
def _find_index(self, target: Union[float, int]) -> int:
|
106
107
|
if self.wavenumbers is None:
|
107
|
-
return target
|
108
|
+
return int(target)
|
108
109
|
wavenumbers = np.array(self.wavenumbers)
|
109
|
-
return np.argmin(np.abs(wavenumbers - target))
|
110
|
+
return int(np.argmin(np.abs(wavenumbers - target)))
|
110
111
|
|
111
|
-
def _find_indices(self) -> np.ndarray:
|
112
|
-
return np.array([self._find_index(feature) for feature in
|
112
|
+
def _find_indices(self, features: np.ndarray) -> np.ndarray:
|
113
|
+
return np.array([self._find_index(feature) for feature in features])
|
@@ -1,10 +1,12 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from sklearn.base import BaseEstimator
|
3
5
|
from sklearn.feature_selection._base import SelectorMixin
|
4
|
-
from sklearn.utils.validation import check_is_fitted
|
6
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
5
7
|
|
6
8
|
|
7
|
-
class RangeCut(
|
9
|
+
class RangeCut(SelectorMixin, BaseEstimator):
|
8
10
|
"""
|
9
11
|
A selector that cuts the input data to a specified range. The range is specified:
|
10
12
|
- by the indices of the start and end of the range,
|
@@ -47,7 +49,7 @@ class RangeCut(BaseEstimator, SelectorMixin):
|
|
47
49
|
self,
|
48
50
|
start: int = 0,
|
49
51
|
end: int = -1,
|
50
|
-
wavenumbers: np.ndarray = None,
|
52
|
+
wavenumbers: Optional[np.ndarray] = None,
|
51
53
|
):
|
52
54
|
self.start = start
|
53
55
|
self.end = end
|
@@ -71,8 +73,9 @@ class RangeCut(BaseEstimator, SelectorMixin):
|
|
71
73
|
The fitted transformer.
|
72
74
|
"""
|
73
75
|
# Check that X is a 2D array and has only finite values
|
74
|
-
X =
|
75
|
-
|
76
|
+
X = validate_data(
|
77
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
78
|
+
)
|
76
79
|
# Set the start and end indices
|
77
80
|
if self.wavenumbers is None:
|
78
81
|
self.start_index_ = self.start
|
@@ -84,7 +87,6 @@ class RangeCut(BaseEstimator, SelectorMixin):
|
|
84
87
|
self.wavenumbers_ = self.wavenumbers[self.start_index_ : self.end_index_]
|
85
88
|
|
86
89
|
return self
|
87
|
-
|
88
90
|
|
89
91
|
def _get_support_mask(self):
|
90
92
|
"""
|
@@ -106,4 +108,4 @@ class RangeCut(BaseEstimator, SelectorMixin):
|
|
106
108
|
|
107
109
|
def _find_index(self, target: float) -> int:
|
108
110
|
wavenumbers = np.array(self.wavenumbers)
|
109
|
-
return np.argmin(np.abs(wavenumbers - target))
|
111
|
+
return int(np.argmin(np.abs(wavenumbers - target)))
|
chemotools/scale/__init__.py
CHANGED
@@ -1,11 +1,9 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
3
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
4
4
|
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
5
|
|
7
|
-
|
8
|
-
class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
6
|
+
class MinMaxScaler(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
7
|
"""
|
10
8
|
A transformer that scales the input data by subtracting the minimum and dividing by
|
11
9
|
the difference between the maximum and the minimum. When the use_min parameter is False,
|
@@ -47,8 +45,9 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
47
45
|
The fitted transformer.
|
48
46
|
"""
|
49
47
|
# Check that X is a 2D array and has only finite values
|
50
|
-
X =
|
51
|
-
|
48
|
+
X = validate_data(
|
49
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
50
|
+
)
|
52
51
|
return self
|
53
52
|
|
54
53
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -72,8 +71,15 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
72
71
|
check_is_fitted(self, "n_features_in_")
|
73
72
|
|
74
73
|
# Check that X is a 2D array and has only finite values
|
75
|
-
|
76
|
-
|
74
|
+
X_ = validate_data(
|
75
|
+
self,
|
76
|
+
X,
|
77
|
+
y="no_validation",
|
78
|
+
ensure_2d=True,
|
79
|
+
copy=True,
|
80
|
+
reset=False,
|
81
|
+
dtype=np.float64,
|
82
|
+
)
|
77
83
|
|
78
84
|
# Check that the number of features is the same as the fitted data
|
79
85
|
if X_.shape[1] != self.n_features_in_:
|
chemotools/scale/_norm_scaler.py
CHANGED
@@ -1,11 +1,9 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
3
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
4
4
|
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
5
|
|
7
|
-
|
8
|
-
class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
6
|
+
class NormScaler(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
7
|
"""
|
10
8
|
A transformer that scales the input data by the L-norm of the spectrum.
|
11
9
|
|
@@ -44,8 +42,9 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
44
42
|
The fitted transformer.
|
45
43
|
"""
|
46
44
|
# Check that X is a 2D array and has only finite values
|
47
|
-
X =
|
48
|
-
|
45
|
+
X = validate_data(
|
46
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
47
|
+
)
|
49
48
|
return self
|
50
49
|
|
51
50
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -69,8 +68,15 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
69
68
|
check_is_fitted(self, "n_features_in_")
|
70
69
|
|
71
70
|
# Check that X is a 2D array and has only finite values
|
72
|
-
|
73
|
-
|
71
|
+
X_ = validate_data(
|
72
|
+
self,
|
73
|
+
X,
|
74
|
+
y="no_validation",
|
75
|
+
ensure_2d=True,
|
76
|
+
copy=True,
|
77
|
+
reset=False,
|
78
|
+
dtype=np.float64,
|
79
|
+
)
|
74
80
|
|
75
81
|
# Check that the number of features is the same as the fitted data
|
76
82
|
if X_.shape[1] != self.n_features_in_:
|
@@ -1,11 +1,11 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
4
|
-
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
6
|
|
7
7
|
|
8
|
-
class PointScaler(OneToOneFeatureMixin, BaseEstimator
|
8
|
+
class PointScaler(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
9
|
"""
|
10
10
|
A transformer that scales the input data by the intensity value at a given point.
|
11
11
|
The point can be specified by an index or by a wavenumber.
|
@@ -34,7 +34,7 @@ class PointScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
34
34
|
Transform the input data by scaling by the value at a given Point.
|
35
35
|
"""
|
36
36
|
|
37
|
-
def __init__(self, point: int = 0, wavenumbers: np.ndarray = None):
|
37
|
+
def __init__(self, point: int = 0, wavenumbers: Optional[np.ndarray] = None):
|
38
38
|
self.point = point
|
39
39
|
self.wavenumbers = wavenumbers
|
40
40
|
|
@@ -56,8 +56,9 @@ class PointScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
56
56
|
The fitted transformer.
|
57
57
|
"""
|
58
58
|
# Check that X is a 2D array and has only finite values
|
59
|
-
X =
|
60
|
-
|
59
|
+
X = validate_data(
|
60
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
61
|
+
)
|
61
62
|
# Set the point index
|
62
63
|
if self.wavenumbers is None:
|
63
64
|
self.point_index_ = self.point
|
@@ -87,8 +88,15 @@ class PointScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
87
88
|
check_is_fitted(self, "point_index_")
|
88
89
|
|
89
90
|
# Check that X is a 2D array and has only finite values
|
90
|
-
|
91
|
-
|
91
|
+
X_ = validate_data(
|
92
|
+
self,
|
93
|
+
X,
|
94
|
+
y="no_validation",
|
95
|
+
ensure_2d=True,
|
96
|
+
copy=True,
|
97
|
+
reset=False,
|
98
|
+
dtype=np.float64,
|
99
|
+
)
|
92
100
|
|
93
101
|
# Check that the number of features is the same as the fitted data
|
94
102
|
if X_.shape[1] != self.n_features_in_:
|
@@ -104,4 +112,4 @@ class PointScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
104
112
|
|
105
113
|
def _find_index(self, target: float) -> int:
|
106
114
|
wavenumbers = np.array(self.wavenumbers)
|
107
|
-
return np.argmin(np.abs(wavenumbers - target))
|
115
|
+
return int(np.argmin(np.abs(wavenumbers - target)))
|
chemotools/scatter/__init__.py
CHANGED
@@ -1,4 +1,13 @@
|
|
1
|
-
from ._extended_multiplicative_scatter_correction import
|
1
|
+
from ._extended_multiplicative_scatter_correction import (
|
2
|
+
ExtendedMultiplicativeScatterCorrection,
|
3
|
+
)
|
2
4
|
from ._multiplicative_scatter_correction import MultiplicativeScatterCorrection
|
3
5
|
from ._robust_normal_variate import RobustNormalVariate
|
4
|
-
from ._standard_normal_variate import StandardNormalVariate
|
6
|
+
from ._standard_normal_variate import StandardNormalVariate
|
7
|
+
|
8
|
+
__all__ = [
|
9
|
+
"ExtendedMultiplicativeScatterCorrection",
|
10
|
+
"MultiplicativeScatterCorrection",
|
11
|
+
"RobustNormalVariate",
|
12
|
+
"StandardNormalVariate",
|
13
|
+
]
|
@@ -1,13 +1,12 @@
|
|
1
|
+
from typing import Literal, Optional
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.
|
4
|
-
from sklearn.utils.validation import check_is_fitted
|
5
|
-
|
6
|
-
from chemotools.utils.check_inputs import check_input
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
7
6
|
|
8
7
|
|
9
8
|
class ExtendedMultiplicativeScatterCorrection(
|
10
|
-
OneToOneFeatureMixin, BaseEstimator
|
9
|
+
TransformerMixin, OneToOneFeatureMixin, BaseEstimator
|
11
10
|
):
|
12
11
|
"""Extended multiplicative scatter correction (EMSC) is a preprocessing technique for
|
13
12
|
removing non linear scatter effects from spectra. It is based on fitting a polynomial
|
@@ -47,18 +46,20 @@ class ExtendedMultiplicativeScatterCorrection(
|
|
47
46
|
model-based pre-processing, doi:10.1016/j.chemolab.2021.104350
|
48
47
|
"""
|
49
48
|
|
49
|
+
ALLOWED_METHODS = ["mean", "median"]
|
50
|
+
|
51
|
+
# TODO: Check method is valid in instantiation. Right now it is check on fit because it breaks the scikitlearn check_estimator()
|
52
|
+
|
50
53
|
def __init__(
|
51
54
|
self,
|
52
|
-
|
53
|
-
use_mean: bool = True,
|
54
|
-
use_median: bool = False,
|
55
|
+
method: Literal["mean", "median"] = "mean",
|
55
56
|
order: int = 2,
|
56
|
-
|
57
|
+
reference: Optional[np.ndarray] = None,
|
58
|
+
weights: Optional[np.ndarray] = None,
|
57
59
|
):
|
58
|
-
self.
|
59
|
-
self.use_mean = use_mean
|
60
|
-
self.use_median = use_median
|
60
|
+
self.method = method
|
61
61
|
self.order = order
|
62
|
+
self.reference = reference
|
62
63
|
self.weights = weights
|
63
64
|
|
64
65
|
def fit(self, X: np.ndarray, y=None) -> "ExtendedMultiplicativeScatterCorrection":
|
@@ -80,7 +81,9 @@ class ExtendedMultiplicativeScatterCorrection(
|
|
80
81
|
The fitted transformer.
|
81
82
|
"""
|
82
83
|
# Check that X is a 2D array and has only finite values
|
83
|
-
X =
|
84
|
+
X = validate_data(
|
85
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
86
|
+
)
|
84
87
|
|
85
88
|
# Check that the length of the reference is the same as the number of features
|
86
89
|
if self.reference is not None:
|
@@ -103,21 +106,24 @@ class ExtendedMultiplicativeScatterCorrection(
|
|
103
106
|
self.weights_ = np.array(self.weights)
|
104
107
|
return self
|
105
108
|
|
106
|
-
if self.
|
107
|
-
self.reference_ =
|
109
|
+
if self.method == "mean":
|
110
|
+
self.reference_ = X.mean(axis=0)
|
108
111
|
self.indices_ = self._calculate_indices(X[0])
|
109
112
|
self.A_ = self._calculate_A(self.indices_, self.reference_)
|
110
113
|
self.weights_ = np.array(self.weights)
|
111
114
|
return self
|
112
115
|
|
113
|
-
|
114
|
-
self.reference_ =
|
116
|
+
elif self.method == "median":
|
117
|
+
self.reference_ = np.median(X, axis=0)
|
115
118
|
self.indices_ = self._calculate_indices(X[0])
|
116
119
|
self.A_ = self._calculate_A(self.indices_, self.reference_)
|
117
120
|
self.weights_ = np.array(self.weights)
|
118
121
|
return self
|
119
122
|
|
120
|
-
|
123
|
+
else:
|
124
|
+
raise ValueError(
|
125
|
+
f"Invalid method: {self.method}. Must be one of {self.ALLOWED_METHODS}"
|
126
|
+
)
|
121
127
|
|
122
128
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
123
129
|
"""
|
@@ -141,17 +147,15 @@ class ExtendedMultiplicativeScatterCorrection(
|
|
141
147
|
check_is_fitted(self, "n_features_in_")
|
142
148
|
|
143
149
|
# Check that X is a 2D array and has only finite values
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
# Calculate the extended multiplicative scatter correction
|
154
|
-
X_ = X.copy()
|
150
|
+
X_ = validate_data(
|
151
|
+
self,
|
152
|
+
X,
|
153
|
+
y="no_validation",
|
154
|
+
ensure_2d=True,
|
155
|
+
copy=True,
|
156
|
+
reset=False,
|
157
|
+
dtype=np.float64,
|
158
|
+
)
|
155
159
|
|
156
160
|
if self.weights is None:
|
157
161
|
for i, x in enumerate(X_):
|
@@ -1,12 +1,12 @@
|
|
1
|
+
from typing import Literal, Optional
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
4
|
-
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
6
|
|
7
7
|
|
8
8
|
class MultiplicativeScatterCorrection(
|
9
|
-
OneToOneFeatureMixin, BaseEstimator
|
9
|
+
TransformerMixin, OneToOneFeatureMixin, BaseEstimator
|
10
10
|
):
|
11
11
|
"""Multiplicative scatter correction (MSC) is a preprocessing technique for
|
12
12
|
removing scatter effects from spectra. It is based on fitting a linear
|
@@ -37,16 +37,18 @@ class MultiplicativeScatterCorrection(
|
|
37
37
|
|
38
38
|
"""
|
39
39
|
|
40
|
+
ALLOWED_METHODS = ["mean", "median"]
|
41
|
+
|
42
|
+
# TODO: Check method is valid in instantiation. Right now it is check on fit because it breaks the scikitlearn check_estimator()
|
43
|
+
|
40
44
|
def __init__(
|
41
45
|
self,
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
weights: np.ndarray = None,
|
46
|
+
method: Literal["mean", "median"] = "mean",
|
47
|
+
reference: Optional[np.ndarray] = None,
|
48
|
+
weights: Optional[np.ndarray] = None,
|
46
49
|
):
|
50
|
+
self.method = method
|
47
51
|
self.reference = reference
|
48
|
-
self.use_mean = use_mean
|
49
|
-
self.use_median = use_median
|
50
52
|
self.weights = weights
|
51
53
|
|
52
54
|
def fit(self, X: np.ndarray, y=None) -> "MultiplicativeScatterCorrection":
|
@@ -68,8 +70,9 @@ class MultiplicativeScatterCorrection(
|
|
68
70
|
The fitted transformer.
|
69
71
|
"""
|
70
72
|
# Check that X is a 2D array and has only finite values
|
71
|
-
X =
|
72
|
-
|
73
|
+
X = validate_data(
|
74
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
75
|
+
)
|
73
76
|
# Check that the length of the reference is the same as the number of features
|
74
77
|
if self.reference is not None:
|
75
78
|
if len(self.reference) != self.n_features_in_:
|
@@ -90,18 +93,23 @@ class MultiplicativeScatterCorrection(
|
|
90
93
|
self.weights_ = np.array(self.weights)
|
91
94
|
return self
|
92
95
|
|
93
|
-
if self.
|
94
|
-
self.reference_ =
|
96
|
+
if self.method == "mean":
|
97
|
+
self.reference_ = X.mean(axis=0)
|
95
98
|
self.A_ = self._calculate_A(self.reference_)
|
96
99
|
self.weights_ = np.array(self.weights)
|
97
100
|
return self
|
98
101
|
|
99
|
-
|
100
|
-
self.reference_ =
|
102
|
+
elif self.method == "median":
|
103
|
+
self.reference_ = np.median(X, axis=0)
|
101
104
|
self.A_ = self._calculate_A(self.reference_)
|
102
105
|
self.weights_ = np.array(self.weights)
|
103
106
|
return self
|
104
107
|
|
108
|
+
else:
|
109
|
+
raise ValueError(
|
110
|
+
f"Invalid method: {self.method}. Must be one of {self.ALLOWED_METHODS}"
|
111
|
+
)
|
112
|
+
|
105
113
|
raise ValueError("No reference was provided")
|
106
114
|
|
107
115
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -126,8 +134,15 @@ class MultiplicativeScatterCorrection(
|
|
126
134
|
check_is_fitted(self, "n_features_in_")
|
127
135
|
|
128
136
|
# Check that X is a 2D array and has only finite values
|
129
|
-
|
130
|
-
|
137
|
+
X_ = validate_data(
|
138
|
+
self,
|
139
|
+
X,
|
140
|
+
y="no_validation",
|
141
|
+
ensure_2d=True,
|
142
|
+
copy=True,
|
143
|
+
reset=False,
|
144
|
+
dtype=np.float64,
|
145
|
+
)
|
131
146
|
|
132
147
|
# Check that the number of features is the same as the fitted data
|
133
148
|
if X_.shape[1] != self.n_features_in_:
|
@@ -1,11 +1,9 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
3
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
4
4
|
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
5
|
|
7
|
-
|
8
|
-
class RobustNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
6
|
+
class RobustNormalVariate(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
7
|
"""
|
10
8
|
A transformer that calculates the robust normal variate of the input data.
|
11
9
|
|
@@ -50,8 +48,9 @@ class RobustNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
|
|
50
48
|
The fitted transformer.
|
51
49
|
"""
|
52
50
|
# Check that X is a 2D array and has only finite values
|
53
|
-
X =
|
54
|
-
|
51
|
+
X = validate_data(
|
52
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
53
|
+
)
|
55
54
|
return self
|
56
55
|
|
57
56
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -75,8 +74,15 @@ class RobustNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
|
|
75
74
|
check_is_fitted(self, "n_features_in_")
|
76
75
|
|
77
76
|
# Check that X is a 2D array and has only finite values
|
78
|
-
|
79
|
-
|
77
|
+
X_ = validate_data(
|
78
|
+
self,
|
79
|
+
X,
|
80
|
+
y="no_validation",
|
81
|
+
ensure_2d=True,
|
82
|
+
copy=True,
|
83
|
+
reset=False,
|
84
|
+
dtype=np.float64,
|
85
|
+
)
|
80
86
|
|
81
87
|
# Check that the number of features is the same as the fitted data
|
82
88
|
if X_.shape[1] != self.n_features_in_:
|
@@ -1,11 +1,9 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
3
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
4
4
|
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
5
|
|
7
|
-
|
8
|
-
class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
6
|
+
class StandardNormalVariate(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
7
|
"""
|
10
8
|
A transformer that calculates the standard normal variate of the input data.
|
11
9
|
|
@@ -36,8 +34,9 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
|
|
36
34
|
The fitted transformer.
|
37
35
|
"""
|
38
36
|
# Check that X is a 2D array and has only finite values
|
39
|
-
X =
|
40
|
-
|
37
|
+
X = validate_data(
|
38
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
39
|
+
)
|
41
40
|
return self
|
42
41
|
|
43
42
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -61,8 +60,15 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
|
|
61
60
|
check_is_fitted(self, "n_features_in_")
|
62
61
|
|
63
62
|
# Check that X is a 2D array and has only finite values
|
64
|
-
|
65
|
-
|
63
|
+
X_ = validate_data(
|
64
|
+
self,
|
65
|
+
X,
|
66
|
+
y="no_validation",
|
67
|
+
ensure_2d=True,
|
68
|
+
copy=True,
|
69
|
+
reset=False,
|
70
|
+
dtype=np.float64,
|
71
|
+
)
|
66
72
|
|
67
73
|
# Check that the number of features is the same as the fitted data
|
68
74
|
if X_.shape[1] != self.n_features_in_:
|
chemotools/smooth/__init__.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
from ._mean_filter import MeanFilter
|
2
2
|
from ._median_filter import MedianFilter
|
3
3
|
from ._savitzky_golay_filter import SavitzkyGolayFilter
|
4
|
-
from ._whittaker_smooth import WhittakerSmooth
|
4
|
+
from ._whittaker_smooth import WhittakerSmooth
|
5
|
+
|
6
|
+
__all__ = ["MeanFilter", "MedianFilter", "SavitzkyGolayFilter", "WhittakerSmooth"]
|
@@ -1,12 +1,10 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from scipy.ndimage import uniform_filter1d
|
3
3
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
4
|
-
from sklearn.utils.validation import check_is_fitted
|
4
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
5
5
|
|
6
|
-
from chemotools.utils.check_inputs import check_input
|
7
6
|
|
8
|
-
|
9
|
-
class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
7
|
+
class MeanFilter(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
10
8
|
"""
|
11
9
|
A transformer that calculates the mean filter of the input data.
|
12
10
|
|
@@ -50,8 +48,9 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
50
48
|
The fitted transformer.
|
51
49
|
"""
|
52
50
|
# Check that X is a 2D array and has only finite values
|
53
|
-
X =
|
54
|
-
|
51
|
+
X = validate_data(
|
52
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
53
|
+
)
|
55
54
|
return self
|
56
55
|
|
57
56
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -75,8 +74,15 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
75
74
|
check_is_fitted(self, "n_features_in_")
|
76
75
|
|
77
76
|
# Check that X is a 2D array and has only finite values
|
78
|
-
|
79
|
-
|
77
|
+
X_ = validate_data(
|
78
|
+
self,
|
79
|
+
X,
|
80
|
+
y="no_validation",
|
81
|
+
ensure_2d=True,
|
82
|
+
copy=True,
|
83
|
+
reset=False,
|
84
|
+
dtype=np.float64,
|
85
|
+
)
|
80
86
|
|
81
87
|
if X_.shape[1] != self.n_features_in_:
|
82
88
|
raise ValueError(
|