chemotools 0.0.27__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemotools/augmentation/__init__.py +16 -0
- chemotools/augmentation/baseline_shift.py +119 -0
- chemotools/augmentation/exponential_noise.py +117 -0
- chemotools/augmentation/index_shift.py +120 -0
- chemotools/augmentation/normal_noise.py +118 -0
- chemotools/augmentation/spectrum_scale.py +120 -0
- chemotools/augmentation/uniform_noise.py +124 -0
- chemotools/baseline/__init__.py +20 -8
- chemotools/baseline/{air_pls.py → _air_pls.py} +20 -32
- chemotools/baseline/{ar_pls.py → _ar_pls.py} +18 -31
- chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +22 -30
- chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +26 -19
- chemotools/baseline/{linear_correction.py → _linear_correction.py} +19 -28
- chemotools/baseline/{non_negative.py → _non_negative.py} +15 -23
- chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +29 -31
- chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +23 -27
- chemotools/datasets/__init__.py +3 -0
- chemotools/datasets/_base.py +85 -15
- chemotools/datasets/data/coffee_labels.csv +61 -0
- chemotools/datasets/data/coffee_spectra.csv +61 -0
- chemotools/derivative/__init__.py +4 -2
- chemotools/derivative/{norris_william.py → _norris_william.py} +17 -24
- chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +26 -36
- chemotools/feature_selection/__init__.py +4 -0
- chemotools/{variable_selection/select_features.py → feature_selection/_index_selector.py} +32 -56
- chemotools/{variable_selection/range_cut.py → feature_selection/_range_cut.py} +25 -50
- chemotools/scale/__init__.py +5 -3
- chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +20 -27
- chemotools/scale/{norm_scaler.py → _norm_scaler.py} +18 -25
- chemotools/scale/{point_scaler.py → _point_scaler.py} +27 -32
- chemotools/scatter/__init__.py +13 -4
- chemotools/scatter/{extended_multiplicative_scatter_correction.py → _extended_multiplicative_scatter_correction.py} +19 -28
- chemotools/scatter/{multiplicative_scatter_correction.py → _multiplicative_scatter_correction.py} +19 -17
- chemotools/scatter/{robust_normal_variate.py → _robust_normal_variate.py} +15 -23
- chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +21 -26
- chemotools/smooth/__init__.py +6 -4
- chemotools/smooth/{mean_filter.py → _mean_filter.py} +18 -25
- chemotools/smooth/{median_filter.py → _median_filter.py} +32 -24
- chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +22 -24
- chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +24 -29
- {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/METADATA +19 -16
- chemotools-0.1.6.dist-info/RECORD +51 -0
- {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/WHEEL +1 -2
- chemotools/utils/check_inputs.py +0 -14
- chemotools/variable_selection/__init__.py +0 -2
- chemotools-0.0.27.dist-info/RECORD +0 -49
- chemotools-0.0.27.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/fixtures.py +0 -89
- tests/test_datasets.py +0 -30
- tests/test_functionality.py +0 -616
- tests/test_sklearn_compliance.py +0 -220
- {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/LICENSE +0 -0
@@ -1,12 +1,10 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from scipy.ndimage import convolve1d
|
3
3
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
4
|
-
from sklearn.utils.validation import check_is_fitted
|
4
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
5
5
|
|
6
|
-
from chemotools.utils.check_inputs import check_input
|
7
6
|
|
8
|
-
|
9
|
-
class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
7
|
+
class NorrisWilliams(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
10
8
|
"""
|
11
9
|
A transformer that calculates the Norris-Williams derivative of the input data.
|
12
10
|
|
@@ -22,17 +20,9 @@ class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
22
20
|
The order of the derivative to calculate. Can be 1 or 2. Default is 1.
|
23
21
|
|
24
22
|
mode : str, optional
|
25
|
-
The mode to use for the derivative calculation. Can be "nearest", "constant",
|
23
|
+
The mode to use for the derivative calculation. Can be "nearest", "constant",
|
26
24
|
"reflect", "wrap", "mirror" or "interp". Default is "nearest".
|
27
25
|
|
28
|
-
Attributes
|
29
|
-
----------
|
30
|
-
n_features_in_ : int
|
31
|
-
The number of features in the input data.
|
32
|
-
|
33
|
-
_is_fitted : bool
|
34
|
-
Whether the transformer has been fitted to data.
|
35
|
-
|
36
26
|
Methods
|
37
27
|
-------
|
38
28
|
fit(X, y=None)
|
@@ -41,6 +31,7 @@ class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
41
31
|
transform(X, y=0, copy=True)
|
42
32
|
Transform the input data by calculating the Norris-Williams derivative.
|
43
33
|
"""
|
34
|
+
|
44
35
|
def __init__(
|
45
36
|
self,
|
46
37
|
window_size: int = 5,
|
@@ -71,14 +62,9 @@ class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
71
62
|
The fitted transformer.
|
72
63
|
"""
|
73
64
|
# Check that X is a 2D array and has only finite values
|
74
|
-
X =
|
75
|
-
|
76
|
-
|
77
|
-
self.n_features_in_ = X.shape[1]
|
78
|
-
|
79
|
-
# Set the fitted attribute to True
|
80
|
-
self._is_fitted = True
|
81
|
-
|
65
|
+
X = validate_data(
|
66
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
67
|
+
)
|
82
68
|
return self
|
83
69
|
|
84
70
|
def transform(self, X: np.ndarray, y=None):
|
@@ -99,11 +85,18 @@ class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
99
85
|
The transformed data.
|
100
86
|
"""
|
101
87
|
# Check that the estimator is fitted
|
102
|
-
check_is_fitted(self, "
|
88
|
+
check_is_fitted(self, "n_features_in_")
|
103
89
|
|
104
90
|
# Check that X is a 2D array and has only finite values
|
105
|
-
|
106
|
-
|
91
|
+
X_ = validate_data(
|
92
|
+
self,
|
93
|
+
X,
|
94
|
+
y="no_validation",
|
95
|
+
ensure_2d=True,
|
96
|
+
copy=True,
|
97
|
+
reset=False,
|
98
|
+
dtype=np.float64,
|
99
|
+
)
|
107
100
|
|
108
101
|
if X_.shape[1] != self.n_features_in_:
|
109
102
|
raise ValueError(
|
@@ -1,12 +1,12 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from scipy.signal import savgol_filter
|
3
5
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
4
|
-
from sklearn.utils.validation import check_is_fitted
|
5
|
-
|
6
|
-
from chemotools.utils.check_inputs import check_input
|
6
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
7
7
|
|
8
8
|
|
9
|
-
class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator
|
9
|
+
class SavitzkyGolay(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
10
10
|
"""
|
11
11
|
A transformer that calculates the Savitzky-Golay derivative of the input data.
|
12
12
|
|
@@ -27,14 +27,6 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
27
27
|
The mode to use for the derivative calculation. Can be "nearest", "constant",
|
28
28
|
"reflect", "wrap", "mirror" or "interp". Default is "nearest".
|
29
29
|
|
30
|
-
Attributes
|
31
|
-
----------
|
32
|
-
n_features_in_ : int
|
33
|
-
The number of features in the input data.
|
34
|
-
|
35
|
-
_is_fitted : bool
|
36
|
-
Whether the transformer has been fitted to data.
|
37
|
-
|
38
30
|
Methods
|
39
31
|
-------
|
40
32
|
fit(X, y=None)
|
@@ -49,7 +41,7 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
49
41
|
window_size: int = 3,
|
50
42
|
polynomial_order: int = 1,
|
51
43
|
derivate_order: int = 1,
|
52
|
-
mode:
|
44
|
+
mode: Literal["mirror", "constant", "nearest", "wrap", "interp"] = "nearest",
|
53
45
|
) -> None:
|
54
46
|
self.window_size = window_size
|
55
47
|
self.polynomial_order = polynomial_order
|
@@ -74,14 +66,9 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
74
66
|
The fitted transformer.
|
75
67
|
"""
|
76
68
|
# Check that X is a 2D array and has only finite values
|
77
|
-
X =
|
78
|
-
|
79
|
-
|
80
|
-
self.n_features_in_ = X.shape[1]
|
81
|
-
|
82
|
-
# Set the fitted attribute to True
|
83
|
-
self._is_fitted = True
|
84
|
-
|
69
|
+
X = validate_data(
|
70
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
71
|
+
)
|
85
72
|
return self
|
86
73
|
|
87
74
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -102,29 +89,32 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
102
89
|
The transformed data.
|
103
90
|
"""
|
104
91
|
# Check that the estimator is fitted
|
105
|
-
check_is_fitted(self, "
|
92
|
+
check_is_fitted(self, "n_features_in_")
|
106
93
|
|
107
94
|
# Check that X is a 2D array and has only finite values
|
108
|
-
|
109
|
-
|
95
|
+
X_ = validate_data(
|
96
|
+
self,
|
97
|
+
X,
|
98
|
+
y="no_validation",
|
99
|
+
ensure_2d=True,
|
100
|
+
copy=True,
|
101
|
+
reset=False,
|
102
|
+
dtype=np.float64,
|
103
|
+
)
|
110
104
|
|
111
105
|
if X_.shape[1] != self.n_features_in_:
|
112
106
|
raise ValueError(
|
113
107
|
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
114
108
|
)
|
115
|
-
|
116
109
|
# Calculate the standard normal variate
|
117
110
|
for i, x in enumerate(X_):
|
118
|
-
X_[i] =
|
111
|
+
X_[i] = savgol_filter(
|
112
|
+
x,
|
113
|
+
self.window_size,
|
114
|
+
self.polynomial_order,
|
115
|
+
deriv=self.derivate_order,
|
116
|
+
axis=0,
|
117
|
+
mode=self.mode,
|
118
|
+
)
|
119
119
|
|
120
120
|
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
121
|
-
|
122
|
-
def _calculate_derivative(self, x) -> np.ndarray:
|
123
|
-
return savgol_filter(
|
124
|
-
x,
|
125
|
-
self.window_size,
|
126
|
-
self.polynomial_order,
|
127
|
-
deriv=self.derivate_order,
|
128
|
-
axis=0,
|
129
|
-
mode=self.mode,
|
130
|
-
)
|
@@ -1,11 +1,13 @@
|
|
1
|
+
from typing import Optional, Union
|
2
|
+
|
1
3
|
import numpy as np
|
2
|
-
from sklearn.base import BaseEstimator
|
3
|
-
from sklearn.
|
4
|
+
from sklearn.base import BaseEstimator
|
5
|
+
from sklearn.feature_selection._base import SelectorMixin
|
4
6
|
|
5
|
-
from
|
7
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
8
|
|
7
9
|
|
8
|
-
class
|
10
|
+
class IndexSelector(SelectorMixin, BaseEstimator):
|
9
11
|
"""
|
10
12
|
A transformer that Selects the spectral data to a specified array of features. This
|
11
13
|
array can be continuous or discontinuous. The array of features is specified by:
|
@@ -29,12 +31,6 @@ class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
29
31
|
features_index_ : int
|
30
32
|
The index of the features to select.
|
31
33
|
|
32
|
-
n_features_in_ : int
|
33
|
-
The number of features in the input data.
|
34
|
-
|
35
|
-
_is_fitted : bool
|
36
|
-
Whether the transformer has been fitted to data.
|
37
|
-
|
38
34
|
Methods
|
39
35
|
-------
|
40
36
|
fit(X, y=None)
|
@@ -46,13 +42,13 @@ class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
46
42
|
|
47
43
|
def __init__(
|
48
44
|
self,
|
49
|
-
features: np.ndarray = None,
|
50
|
-
wavenumbers: np.ndarray = None,
|
45
|
+
features: Optional[np.ndarray] = None,
|
46
|
+
wavenumbers: Optional[np.ndarray] = None,
|
51
47
|
):
|
52
48
|
self.features = features
|
53
49
|
self.wavenumbers = wavenumbers
|
54
50
|
|
55
|
-
def fit(self, X: np.ndarray, y=None) -> "
|
51
|
+
def fit(self, X: np.ndarray, y=None) -> "IndexSelector":
|
56
52
|
"""
|
57
53
|
Fit the transformer to the input data.
|
58
54
|
|
@@ -66,15 +62,13 @@ class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
66
62
|
|
67
63
|
Returns
|
68
64
|
-------
|
69
|
-
self :
|
65
|
+
self : IndexSelector
|
70
66
|
The fitted transformer.
|
71
67
|
"""
|
72
|
-
#
|
73
|
-
X =
|
74
|
-
|
75
|
-
|
76
|
-
self.n_features_in_ = X.shape[1]
|
77
|
-
|
68
|
+
# validate that X is a 2D array and has only finite values
|
69
|
+
X = validate_data(
|
70
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
71
|
+
)
|
78
72
|
# Set the fitted attribute to True
|
79
73
|
self._is_fitted = True
|
80
74
|
|
@@ -83,55 +77,37 @@ class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
83
77
|
self.features_index_ = self.features
|
84
78
|
return self
|
85
79
|
|
86
|
-
|
80
|
+
elif self.wavenumbers is None:
|
87
81
|
self.features_index_ = self.features
|
88
82
|
return self
|
89
83
|
|
90
|
-
|
91
|
-
|
92
|
-
|
84
|
+
else:
|
85
|
+
self.features_index_ = self._find_indices(self.features)
|
86
|
+
return self
|
93
87
|
|
94
|
-
def
|
88
|
+
def _get_support_mask(self):
|
95
89
|
"""
|
96
|
-
|
97
|
-
|
98
|
-
Parameters
|
99
|
-
----------
|
100
|
-
X : array-like of shape (n_samples, n_features)
|
101
|
-
The input data to transform.
|
102
|
-
|
103
|
-
y : None
|
104
|
-
Ignored.
|
90
|
+
Get the boolean mask indicating which features are selected.
|
105
91
|
|
106
92
|
Returns
|
107
93
|
-------
|
108
|
-
|
109
|
-
The
|
94
|
+
mask : ndarray of shape (n_features_in_,)
|
95
|
+
The mask indicating the selected features.
|
110
96
|
"""
|
111
97
|
# Check that the estimator is fitted
|
112
|
-
check_is_fitted(self
|
113
|
-
|
114
|
-
# Check that X is a 2D array and has only finite values
|
115
|
-
X = check_input(X)
|
116
|
-
X_ = X.copy()
|
98
|
+
check_is_fitted(self)
|
117
99
|
|
118
|
-
#
|
119
|
-
|
120
|
-
|
121
|
-
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
122
|
-
)
|
123
|
-
|
124
|
-
# Select the features
|
125
|
-
if self.features is None:
|
126
|
-
return X_
|
100
|
+
# Create the mask
|
101
|
+
mask = np.zeros(self.n_features_in_, dtype=bool)
|
102
|
+
mask[self.features_index_] = True
|
127
103
|
|
128
|
-
return
|
104
|
+
return mask
|
129
105
|
|
130
|
-
def _find_index(self, target: float) -> int:
|
106
|
+
def _find_index(self, target: Union[float, int]) -> int:
|
131
107
|
if self.wavenumbers is None:
|
132
|
-
return target
|
108
|
+
return int(target)
|
133
109
|
wavenumbers = np.array(self.wavenumbers)
|
134
|
-
return np.argmin(np.abs(wavenumbers - target))
|
110
|
+
return int(np.argmin(np.abs(wavenumbers - target)))
|
135
111
|
|
136
|
-
def _find_indices(self) -> np.ndarray:
|
137
|
-
return np.array([self._find_index(feature) for feature in
|
112
|
+
def _find_indices(self, features: np.ndarray) -> np.ndarray:
|
113
|
+
return np.array([self._find_index(feature) for feature in features])
|
@@ -1,13 +1,14 @@
|
|
1
|
-
|
2
|
-
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
1
|
+
from typing import Optional
|
4
2
|
|
5
|
-
|
3
|
+
import numpy as np
|
4
|
+
from sklearn.base import BaseEstimator
|
5
|
+
from sklearn.feature_selection._base import SelectorMixin
|
6
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
7
|
|
7
8
|
|
8
|
-
class RangeCut(
|
9
|
+
class RangeCut(SelectorMixin, BaseEstimator):
|
9
10
|
"""
|
10
|
-
A
|
11
|
+
A selector that cuts the input data to a specified range. The range is specified:
|
11
12
|
- by the indices of the start and end of the range,
|
12
13
|
- by the wavenumbers of the start and end of the range. In this case, the wavenumbers
|
13
14
|
must be provided to the transformer when it is initialised. If the wavenumbers
|
@@ -35,26 +36,20 @@ class RangeCut(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
35
36
|
end_index_ : int
|
36
37
|
The index of the end of the range. It is -1 if the wavenumbers are not provided.
|
37
38
|
|
38
|
-
|
39
|
-
The
|
40
|
-
|
41
|
-
_is_fitted : bool
|
42
|
-
Whether the transformer has been fitted to data.
|
39
|
+
wavenuumbers_ : array-like
|
40
|
+
The cut wavenumbers of the input data.
|
43
41
|
|
44
42
|
Methods
|
45
43
|
-------
|
46
44
|
fit(X, y=None)
|
47
45
|
Fit the transformer to the input data.
|
48
|
-
|
49
|
-
transform(X, y=0, copy=True)
|
50
|
-
Transform the input data by cutting it to the specified range.
|
51
46
|
"""
|
52
47
|
|
53
48
|
def __init__(
|
54
49
|
self,
|
55
50
|
start: int = 0,
|
56
51
|
end: int = -1,
|
57
|
-
wavenumbers: np.ndarray = None,
|
52
|
+
wavenumbers: Optional[np.ndarray] = None,
|
58
53
|
):
|
59
54
|
self.start = start
|
60
55
|
self.end = end
|
@@ -78,59 +73,39 @@ class RangeCut(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
78
73
|
The fitted transformer.
|
79
74
|
"""
|
80
75
|
# Check that X is a 2D array and has only finite values
|
81
|
-
X =
|
82
|
-
|
83
|
-
|
84
|
-
self.n_features_in_ = X.shape[1]
|
85
|
-
|
86
|
-
# Set the fitted attribute to True
|
87
|
-
self._is_fitted = True
|
88
|
-
|
76
|
+
X = validate_data(
|
77
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
78
|
+
)
|
89
79
|
# Set the start and end indices
|
90
80
|
if self.wavenumbers is None:
|
91
81
|
self.start_index_ = self.start
|
92
82
|
self.end_index_ = self.end
|
83
|
+
self.wavenumbers_ = None
|
93
84
|
else:
|
94
85
|
self.start_index_ = self._find_index(self.start)
|
95
86
|
self.end_index_ = self._find_index(self.end)
|
87
|
+
self.wavenumbers_ = self.wavenumbers[self.start_index_ : self.end_index_]
|
96
88
|
|
97
89
|
return self
|
98
90
|
|
99
|
-
def
|
91
|
+
def _get_support_mask(self):
|
100
92
|
"""
|
101
|
-
|
102
|
-
|
103
|
-
Parameters
|
104
|
-
----------
|
105
|
-
X : array-like of shape (n_samples, n_features)
|
106
|
-
The input data to transform.
|
107
|
-
|
108
|
-
y : None
|
109
|
-
Ignored.
|
93
|
+
Get the boolean mask indicating which features are selected.
|
110
94
|
|
111
95
|
Returns
|
112
96
|
-------
|
113
|
-
|
114
|
-
The
|
97
|
+
mask : np.ndarray of shape (n_features,)
|
98
|
+
The boolean mask indicating which features are selected.
|
115
99
|
"""
|
116
100
|
# Check that the estimator is fitted
|
117
|
-
check_is_fitted(self, "
|
118
|
-
|
119
|
-
# Check that X is a 2D array and has only finite values
|
120
|
-
X = check_input(X)
|
121
|
-
X_ = X.copy()
|
101
|
+
check_is_fitted(self, ["start_index_", "end_index_"])
|
122
102
|
|
123
|
-
#
|
124
|
-
|
125
|
-
|
126
|
-
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
127
|
-
)
|
103
|
+
# Create the mask
|
104
|
+
mask = np.zeros(self.n_features_in_, dtype=bool)
|
105
|
+
mask[self.start_index_ : self.end_index_] = True
|
128
106
|
|
129
|
-
|
130
|
-
return X_[:, self.start_index_ : self.end_index_]
|
107
|
+
return mask
|
131
108
|
|
132
109
|
def _find_index(self, target: float) -> int:
|
133
|
-
if self.wavenumbers is None:
|
134
|
-
return target
|
135
110
|
wavenumbers = np.array(self.wavenumbers)
|
136
|
-
return np.argmin(np.abs(wavenumbers - target))
|
111
|
+
return int(np.argmin(np.abs(wavenumbers - target)))
|
chemotools/scale/__init__.py
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
-
from .
|
2
|
-
from .
|
3
|
-
from .
|
1
|
+
from ._min_max_scaler import MinMaxScaler
|
2
|
+
from ._norm_scaler import NormScaler
|
3
|
+
from ._point_scaler import PointScaler
|
4
|
+
|
5
|
+
__all__ = ["MinMaxScaler", "NormScaler", "PointScaler"]
|
@@ -1,30 +1,20 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
3
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
4
4
|
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
5
|
|
7
|
-
|
8
|
-
class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
6
|
+
class MinMaxScaler(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
7
|
"""
|
10
8
|
A transformer that scales the input data by subtracting the minimum and dividing by
|
11
|
-
the difference between the maximum and the minimum. When the use_min parameter is False,
|
9
|
+
the difference between the maximum and the minimum. When the use_min parameter is False,
|
12
10
|
the data is scaled by the maximum.
|
13
11
|
|
14
12
|
Parameters
|
15
13
|
----------
|
16
14
|
use_min : bool, default=True
|
17
|
-
The normalization to use. If True, the data is subtracted by the minimum and
|
15
|
+
The normalization to use. If True, the data is subtracted by the minimum and
|
18
16
|
scaled by the maximum. If False, the data is scaled by the maximum.
|
19
17
|
|
20
|
-
Attributes
|
21
|
-
----------
|
22
|
-
n_features_in_ : int
|
23
|
-
The number of features in the input data.
|
24
|
-
|
25
|
-
_is_fitted : bool
|
26
|
-
Whether the transformer has been fitted to data.
|
27
|
-
|
28
18
|
Methods
|
29
19
|
-------
|
30
20
|
fit(X, y=None)
|
@@ -55,14 +45,9 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
55
45
|
The fitted transformer.
|
56
46
|
"""
|
57
47
|
# Check that X is a 2D array and has only finite values
|
58
|
-
X =
|
59
|
-
|
60
|
-
|
61
|
-
self.n_features_in_ = X.shape[1]
|
62
|
-
|
63
|
-
# Set the fitted attribute to True
|
64
|
-
self._is_fitted = True
|
65
|
-
|
48
|
+
X = validate_data(
|
49
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
50
|
+
)
|
66
51
|
return self
|
67
52
|
|
68
53
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -83,11 +68,18 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
83
68
|
The transformed data.
|
84
69
|
"""
|
85
70
|
# Check that the estimator is fitted
|
86
|
-
check_is_fitted(self, "
|
71
|
+
check_is_fitted(self, "n_features_in_")
|
87
72
|
|
88
73
|
# Check that X is a 2D array and has only finite values
|
89
|
-
|
90
|
-
|
74
|
+
X_ = validate_data(
|
75
|
+
self,
|
76
|
+
X,
|
77
|
+
y="no_validation",
|
78
|
+
ensure_2d=True,
|
79
|
+
copy=True,
|
80
|
+
reset=False,
|
81
|
+
dtype=np.float64,
|
82
|
+
)
|
91
83
|
|
92
84
|
# Check that the number of features is the same as the fitted data
|
93
85
|
if X_.shape[1] != self.n_features_in_:
|
@@ -97,8 +89,9 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
97
89
|
|
98
90
|
# Normalize the data by the maximum value
|
99
91
|
if self.use_min:
|
100
|
-
X_ = (X_ - np.min(X_, axis=1, keepdims=True)) / (
|
101
|
-
X_, axis=1, keepdims=True) - np.min(X_, axis=1, keepdims=True)
|
92
|
+
X_ = (X_ - np.min(X_, axis=1, keepdims=True)) / (
|
93
|
+
np.max(X_, axis=1, keepdims=True) - np.min(X_, axis=1, keepdims=True)
|
94
|
+
)
|
102
95
|
|
103
96
|
else:
|
104
97
|
X_ = X_ / np.max(X_, axis=1, keepdims=True)
|
@@ -1,26 +1,16 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
3
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
4
4
|
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
5
|
|
7
|
-
|
8
|
-
class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
6
|
+
class NormScaler(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
7
|
"""
|
10
8
|
A transformer that scales the input data by the L-norm of the spectrum.
|
11
9
|
|
12
10
|
Parameters
|
13
11
|
----------
|
14
12
|
l_norm : int, optional
|
15
|
-
The L-norm to use. Default is 2.
|
16
|
-
|
17
|
-
Attributes
|
18
|
-
----------
|
19
|
-
n_features_in_ : int
|
20
|
-
The number of features in the input data.
|
21
|
-
|
22
|
-
_is_fitted : bool
|
23
|
-
Whether the transformer has been fitted to data.
|
13
|
+
The L-norm to use. Default is 2.
|
24
14
|
|
25
15
|
Methods
|
26
16
|
-------
|
@@ -30,13 +20,14 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
30
20
|
transform(X, y=0, copy=True)
|
31
21
|
Transform the input data by scaling by the L-norm.
|
32
22
|
"""
|
23
|
+
|
33
24
|
def __init__(self, l_norm: int = 2):
|
34
25
|
self.l_norm = l_norm
|
35
26
|
|
36
27
|
def fit(self, X: np.ndarray, y=None) -> "NormScaler":
|
37
28
|
"""
|
38
29
|
Fit the transformer to the input data.
|
39
|
-
|
30
|
+
|
40
31
|
Parameters
|
41
32
|
----------
|
42
33
|
X : np.ndarray of shape (n_samples, n_features)
|
@@ -51,14 +42,9 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
51
42
|
The fitted transformer.
|
52
43
|
"""
|
53
44
|
# Check that X is a 2D array and has only finite values
|
54
|
-
X =
|
55
|
-
|
56
|
-
|
57
|
-
self.n_features_in_ = X.shape[1]
|
58
|
-
|
59
|
-
# Set the fitted attribute to True
|
60
|
-
self._is_fitted = True
|
61
|
-
|
45
|
+
X = validate_data(
|
46
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
47
|
+
)
|
62
48
|
return self
|
63
49
|
|
64
50
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -79,11 +65,18 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
79
65
|
The transformed data.
|
80
66
|
"""
|
81
67
|
# Check that the estimator is fitted
|
82
|
-
check_is_fitted(self, "
|
68
|
+
check_is_fitted(self, "n_features_in_")
|
83
69
|
|
84
70
|
# Check that X is a 2D array and has only finite values
|
85
|
-
|
86
|
-
|
71
|
+
X_ = validate_data(
|
72
|
+
self,
|
73
|
+
X,
|
74
|
+
y="no_validation",
|
75
|
+
ensure_2d=True,
|
76
|
+
copy=True,
|
77
|
+
reset=False,
|
78
|
+
dtype=np.float64,
|
79
|
+
)
|
87
80
|
|
88
81
|
# Check that the number of features is the same as the fitted data
|
89
82
|
if X_.shape[1] != self.n_features_in_:
|