chemotools 0.0.22__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemotools/augmentation/__init__.py +16 -0
- chemotools/augmentation/baseline_shift.py +119 -0
- chemotools/augmentation/exponential_noise.py +117 -0
- chemotools/augmentation/index_shift.py +120 -0
- chemotools/augmentation/normal_noise.py +118 -0
- chemotools/augmentation/spectrum_scale.py +120 -0
- chemotools/augmentation/uniform_noise.py +124 -0
- chemotools/baseline/__init__.py +20 -8
- chemotools/baseline/{air_pls.py → _air_pls.py} +20 -32
- chemotools/baseline/{ar_pls.py → _ar_pls.py} +18 -31
- chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +37 -31
- chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +26 -19
- chemotools/baseline/{linear_correction.py → _linear_correction.py} +19 -28
- chemotools/baseline/{non_negative.py → _non_negative.py} +15 -23
- chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +29 -31
- chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +23 -27
- chemotools/datasets/__init__.py +5 -0
- chemotools/datasets/_base.py +122 -0
- chemotools/datasets/data/coffee_labels.csv +61 -0
- chemotools/datasets/data/coffee_spectra.csv +61 -0
- chemotools/datasets/data/fermentation_hplc.csv +35 -0
- chemotools/datasets/data/fermentation_spectra.csv +1630 -0
- chemotools/datasets/data/train_hplc.csv +22 -0
- chemotools/datasets/data/train_spectra.csv +22 -0
- chemotools/derivative/__init__.py +4 -2
- chemotools/derivative/{norris_william.py → _norris_william.py} +20 -25
- chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +26 -36
- chemotools/feature_selection/__init__.py +4 -0
- chemotools/feature_selection/_index_selector.py +113 -0
- chemotools/feature_selection/_range_cut.py +111 -0
- chemotools/scale/__init__.py +5 -3
- chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +36 -39
- chemotools/scale/{norm_scaler.py → _norm_scaler.py} +18 -25
- chemotools/scale/_point_scaler.py +115 -0
- chemotools/scatter/__init__.py +13 -2
- chemotools/scatter/_extended_multiplicative_scatter_correction.py +183 -0
- chemotools/scatter/_multiplicative_scatter_correction.py +169 -0
- chemotools/scatter/_robust_normal_variate.py +101 -0
- chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +21 -26
- chemotools/smooth/__init__.py +6 -4
- chemotools/smooth/{mean_filter.py → _mean_filter.py} +18 -25
- chemotools/smooth/{median_filter.py → _median_filter.py} +32 -24
- chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +22 -24
- chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +24 -29
- {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/METADATA +19 -15
- chemotools-0.1.6.dist-info/RECORD +51 -0
- {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/WHEEL +1 -2
- chemotools/scale/index_scaler.py +0 -97
- chemotools/scatter/extended_multiplicative_scatter_correction.py +0 -33
- chemotools/scatter/multiplicative_scatter_correction.py +0 -123
- chemotools/utils/check_inputs.py +0 -14
- chemotools/variable_selection/__init__.py +0 -1
- chemotools/variable_selection/range_cut.py +0 -121
- chemotools-0.0.22.dist-info/RECORD +0 -39
- chemotools-0.0.22.dist-info/top_level.txt +0 -2
- tests/fixtures.py +0 -89
- tests/test_functionality.py +0 -397
- tests/test_sklearn_compliance.py +0 -192
- {tests → chemotools/datasets/data}/__init__.py +0 -0
- {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/LICENSE +0 -0
@@ -1,12 +1,12 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from scipy.signal import savgol_filter
|
3
5
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
4
|
-
from sklearn.utils.validation import check_is_fitted
|
5
|
-
|
6
|
-
from chemotools.utils.check_inputs import check_input
|
6
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
7
7
|
|
8
8
|
|
9
|
-
class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator
|
9
|
+
class SavitzkyGolay(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
10
10
|
"""
|
11
11
|
A transformer that calculates the Savitzky-Golay derivative of the input data.
|
12
12
|
|
@@ -27,14 +27,6 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
27
27
|
The mode to use for the derivative calculation. Can be "nearest", "constant",
|
28
28
|
"reflect", "wrap", "mirror" or "interp". Default is "nearest".
|
29
29
|
|
30
|
-
Attributes
|
31
|
-
----------
|
32
|
-
n_features_in_ : int
|
33
|
-
The number of features in the input data.
|
34
|
-
|
35
|
-
_is_fitted : bool
|
36
|
-
Whether the transformer has been fitted to data.
|
37
|
-
|
38
30
|
Methods
|
39
31
|
-------
|
40
32
|
fit(X, y=None)
|
@@ -49,7 +41,7 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
49
41
|
window_size: int = 3,
|
50
42
|
polynomial_order: int = 1,
|
51
43
|
derivate_order: int = 1,
|
52
|
-
mode:
|
44
|
+
mode: Literal["mirror", "constant", "nearest", "wrap", "interp"] = "nearest",
|
53
45
|
) -> None:
|
54
46
|
self.window_size = window_size
|
55
47
|
self.polynomial_order = polynomial_order
|
@@ -74,14 +66,9 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
74
66
|
The fitted transformer.
|
75
67
|
"""
|
76
68
|
# Check that X is a 2D array and has only finite values
|
77
|
-
X =
|
78
|
-
|
79
|
-
|
80
|
-
self.n_features_in_ = X.shape[1]
|
81
|
-
|
82
|
-
# Set the fitted attribute to True
|
83
|
-
self._is_fitted = True
|
84
|
-
|
69
|
+
X = validate_data(
|
70
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
71
|
+
)
|
85
72
|
return self
|
86
73
|
|
87
74
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -102,29 +89,32 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
102
89
|
The transformed data.
|
103
90
|
"""
|
104
91
|
# Check that the estimator is fitted
|
105
|
-
check_is_fitted(self, "
|
92
|
+
check_is_fitted(self, "n_features_in_")
|
106
93
|
|
107
94
|
# Check that X is a 2D array and has only finite values
|
108
|
-
|
109
|
-
|
95
|
+
X_ = validate_data(
|
96
|
+
self,
|
97
|
+
X,
|
98
|
+
y="no_validation",
|
99
|
+
ensure_2d=True,
|
100
|
+
copy=True,
|
101
|
+
reset=False,
|
102
|
+
dtype=np.float64,
|
103
|
+
)
|
110
104
|
|
111
105
|
if X_.shape[1] != self.n_features_in_:
|
112
106
|
raise ValueError(
|
113
107
|
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
114
108
|
)
|
115
|
-
|
116
109
|
# Calculate the standard normal variate
|
117
110
|
for i, x in enumerate(X_):
|
118
|
-
X_[i] =
|
111
|
+
X_[i] = savgol_filter(
|
112
|
+
x,
|
113
|
+
self.window_size,
|
114
|
+
self.polynomial_order,
|
115
|
+
deriv=self.derivate_order,
|
116
|
+
axis=0,
|
117
|
+
mode=self.mode,
|
118
|
+
)
|
119
119
|
|
120
120
|
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
121
|
-
|
122
|
-
def _calculate_derivative(self, x) -> np.ndarray:
|
123
|
-
return savgol_filter(
|
124
|
-
x,
|
125
|
-
self.window_size,
|
126
|
-
self.polynomial_order,
|
127
|
-
deriv=self.derivate_order,
|
128
|
-
axis=0,
|
129
|
-
mode=self.mode,
|
130
|
-
)
|
@@ -0,0 +1,113 @@
|
|
1
|
+
from typing import Optional, Union
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from sklearn.base import BaseEstimator
|
5
|
+
from sklearn.feature_selection._base import SelectorMixin
|
6
|
+
|
7
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
8
|
+
|
9
|
+
|
10
|
+
class IndexSelector(SelectorMixin, BaseEstimator):
|
11
|
+
"""
|
12
|
+
A transformer that Selects the spectral data to a specified array of features. This
|
13
|
+
array can be continuous or discontinuous. The array of features is specified by:
|
14
|
+
- by the indices of the wavenumbers to select,
|
15
|
+
- by the wavenumbers to select, the wavenumbers must be provided to the transformer
|
16
|
+
when it is initialised. If the wavenumbers are not provided, the indices will be
|
17
|
+
used instead. The wavenumbers must be provided in ascending order.
|
18
|
+
|
19
|
+
Parameters
|
20
|
+
----------
|
21
|
+
features : narray-like, optional
|
22
|
+
The index of the features to select. Default is None.
|
23
|
+
|
24
|
+
wavenumbers : array-like, optional
|
25
|
+
The wavenumbers of the input data. If not provided, the indices will be used
|
26
|
+
instead. Default is None. If provided, the wavenumbers must be provided in
|
27
|
+
ascending order.
|
28
|
+
|
29
|
+
Attributes
|
30
|
+
----------
|
31
|
+
features_index_ : int
|
32
|
+
The index of the features to select.
|
33
|
+
|
34
|
+
Methods
|
35
|
+
-------
|
36
|
+
fit(X, y=None)
|
37
|
+
Fit the transformer to the input data.
|
38
|
+
|
39
|
+
transform(X, y=0, copy=True)
|
40
|
+
Transform the input data by cutting it to the specified range.
|
41
|
+
"""
|
42
|
+
|
43
|
+
def __init__(
|
44
|
+
self,
|
45
|
+
features: Optional[np.ndarray] = None,
|
46
|
+
wavenumbers: Optional[np.ndarray] = None,
|
47
|
+
):
|
48
|
+
self.features = features
|
49
|
+
self.wavenumbers = wavenumbers
|
50
|
+
|
51
|
+
def fit(self, X: np.ndarray, y=None) -> "IndexSelector":
|
52
|
+
"""
|
53
|
+
Fit the transformer to the input data.
|
54
|
+
|
55
|
+
Parameters
|
56
|
+
----------
|
57
|
+
X : array-like of shape (n_samples, n_features)
|
58
|
+
The input data to fit the transformer to.
|
59
|
+
|
60
|
+
y : None
|
61
|
+
Ignored.
|
62
|
+
|
63
|
+
Returns
|
64
|
+
-------
|
65
|
+
self : IndexSelector
|
66
|
+
The fitted transformer.
|
67
|
+
"""
|
68
|
+
# validate that X is a 2D array and has only finite values
|
69
|
+
X = validate_data(
|
70
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
71
|
+
)
|
72
|
+
# Set the fitted attribute to True
|
73
|
+
self._is_fitted = True
|
74
|
+
|
75
|
+
# Set the start and end indices
|
76
|
+
if self.features is None:
|
77
|
+
self.features_index_ = self.features
|
78
|
+
return self
|
79
|
+
|
80
|
+
elif self.wavenumbers is None:
|
81
|
+
self.features_index_ = self.features
|
82
|
+
return self
|
83
|
+
|
84
|
+
else:
|
85
|
+
self.features_index_ = self._find_indices(self.features)
|
86
|
+
return self
|
87
|
+
|
88
|
+
def _get_support_mask(self):
|
89
|
+
"""
|
90
|
+
Get the boolean mask indicating which features are selected.
|
91
|
+
|
92
|
+
Returns
|
93
|
+
-------
|
94
|
+
mask : ndarray of shape (n_features_in_,)
|
95
|
+
The mask indicating the selected features.
|
96
|
+
"""
|
97
|
+
# Check that the estimator is fitted
|
98
|
+
check_is_fitted(self)
|
99
|
+
|
100
|
+
# Create the mask
|
101
|
+
mask = np.zeros(self.n_features_in_, dtype=bool)
|
102
|
+
mask[self.features_index_] = True
|
103
|
+
|
104
|
+
return mask
|
105
|
+
|
106
|
+
def _find_index(self, target: Union[float, int]) -> int:
|
107
|
+
if self.wavenumbers is None:
|
108
|
+
return int(target)
|
109
|
+
wavenumbers = np.array(self.wavenumbers)
|
110
|
+
return int(np.argmin(np.abs(wavenumbers - target)))
|
111
|
+
|
112
|
+
def _find_indices(self, features: np.ndarray) -> np.ndarray:
|
113
|
+
return np.array([self._find_index(feature) for feature in features])
|
@@ -0,0 +1,111 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from sklearn.base import BaseEstimator
|
5
|
+
from sklearn.feature_selection._base import SelectorMixin
|
6
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
7
|
+
|
8
|
+
|
9
|
+
class RangeCut(SelectorMixin, BaseEstimator):
|
10
|
+
"""
|
11
|
+
A selector that cuts the input data to a specified range. The range is specified:
|
12
|
+
- by the indices of the start and end of the range,
|
13
|
+
- by the wavenumbers of the start and end of the range. In this case, the wavenumbers
|
14
|
+
must be provided to the transformer when it is initialised. If the wavenumbers
|
15
|
+
are not provided, the indices will be used instead. The wavenumbers must be
|
16
|
+
provided in ascending order.
|
17
|
+
|
18
|
+
Parameters
|
19
|
+
----------
|
20
|
+
start : int, optional
|
21
|
+
The index or wavenumber of the start of the range. Default is 0.
|
22
|
+
|
23
|
+
end : int, optional
|
24
|
+
The index or wavenumber of the end of the range. Default is -1.
|
25
|
+
|
26
|
+
wavenumbers : array-like, optional
|
27
|
+
The wavenumbers of the input data. If not provided, the indices will be used
|
28
|
+
instead. Default is None. If provided, the wavenumbers must be provided in
|
29
|
+
ascending order.
|
30
|
+
|
31
|
+
Attributes
|
32
|
+
----------
|
33
|
+
start_index_ : int
|
34
|
+
The index of the start of the range. It is 0 if the wavenumbers are not provided.
|
35
|
+
|
36
|
+
end_index_ : int
|
37
|
+
The index of the end of the range. It is -1 if the wavenumbers are not provided.
|
38
|
+
|
39
|
+
wavenuumbers_ : array-like
|
40
|
+
The cut wavenumbers of the input data.
|
41
|
+
|
42
|
+
Methods
|
43
|
+
-------
|
44
|
+
fit(X, y=None)
|
45
|
+
Fit the transformer to the input data.
|
46
|
+
"""
|
47
|
+
|
48
|
+
def __init__(
|
49
|
+
self,
|
50
|
+
start: int = 0,
|
51
|
+
end: int = -1,
|
52
|
+
wavenumbers: Optional[np.ndarray] = None,
|
53
|
+
):
|
54
|
+
self.start = start
|
55
|
+
self.end = end
|
56
|
+
self.wavenumbers = wavenumbers
|
57
|
+
|
58
|
+
def fit(self, X: np.ndarray, y=None) -> "RangeCut":
|
59
|
+
"""
|
60
|
+
Fit the transformer to the input data.
|
61
|
+
|
62
|
+
Parameters
|
63
|
+
----------
|
64
|
+
X : array-like of shape (n_samples, n_features)
|
65
|
+
The input data to fit the transformer to.
|
66
|
+
|
67
|
+
y : None
|
68
|
+
Ignored.
|
69
|
+
|
70
|
+
Returns
|
71
|
+
-------
|
72
|
+
self : RangeCut
|
73
|
+
The fitted transformer.
|
74
|
+
"""
|
75
|
+
# Check that X is a 2D array and has only finite values
|
76
|
+
X = validate_data(
|
77
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
78
|
+
)
|
79
|
+
# Set the start and end indices
|
80
|
+
if self.wavenumbers is None:
|
81
|
+
self.start_index_ = self.start
|
82
|
+
self.end_index_ = self.end
|
83
|
+
self.wavenumbers_ = None
|
84
|
+
else:
|
85
|
+
self.start_index_ = self._find_index(self.start)
|
86
|
+
self.end_index_ = self._find_index(self.end)
|
87
|
+
self.wavenumbers_ = self.wavenumbers[self.start_index_ : self.end_index_]
|
88
|
+
|
89
|
+
return self
|
90
|
+
|
91
|
+
def _get_support_mask(self):
|
92
|
+
"""
|
93
|
+
Get the boolean mask indicating which features are selected.
|
94
|
+
|
95
|
+
Returns
|
96
|
+
-------
|
97
|
+
mask : np.ndarray of shape (n_features,)
|
98
|
+
The boolean mask indicating which features are selected.
|
99
|
+
"""
|
100
|
+
# Check that the estimator is fitted
|
101
|
+
check_is_fitted(self, ["start_index_", "end_index_"])
|
102
|
+
|
103
|
+
# Create the mask
|
104
|
+
mask = np.zeros(self.n_features_in_, dtype=bool)
|
105
|
+
mask[self.start_index_ : self.end_index_] = True
|
106
|
+
|
107
|
+
return mask
|
108
|
+
|
109
|
+
def _find_index(self, target: float) -> int:
|
110
|
+
wavenumbers = np.array(self.wavenumbers)
|
111
|
+
return int(np.argmin(np.abs(wavenumbers - target)))
|
chemotools/scale/__init__.py
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
-
from .
|
2
|
-
from .
|
3
|
-
from .
|
1
|
+
from ._min_max_scaler import MinMaxScaler
|
2
|
+
from ._norm_scaler import NormScaler
|
3
|
+
from ._point_scaler import PointScaler
|
4
|
+
|
5
|
+
__all__ = ["MinMaxScaler", "NormScaler", "PointScaler"]
|
@@ -1,27 +1,19 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
3
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
4
4
|
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
5
|
|
7
|
-
|
8
|
-
class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
6
|
+
class MinMaxScaler(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
7
|
"""
|
10
|
-
A transformer that scales the input data by the
|
11
|
-
|
8
|
+
A transformer that scales the input data by subtracting the minimum and dividing by
|
9
|
+
the difference between the maximum and the minimum. When the use_min parameter is False,
|
10
|
+
the data is scaled by the maximum.
|
12
11
|
|
13
12
|
Parameters
|
14
13
|
----------
|
15
|
-
|
16
|
-
The normalization to use.
|
17
|
-
|
18
|
-
Attributes
|
19
|
-
----------
|
20
|
-
n_features_in_ : int
|
21
|
-
The number of features in the input data.
|
22
|
-
|
23
|
-
_is_fitted : bool
|
24
|
-
Whether the transformer has been fitted to data.
|
14
|
+
use_min : bool, default=True
|
15
|
+
The normalization to use. If True, the data is subtracted by the minimum and
|
16
|
+
scaled by the maximum. If False, the data is scaled by the maximum.
|
25
17
|
|
26
18
|
Methods
|
27
19
|
-------
|
@@ -31,9 +23,9 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
31
23
|
transform(X, y=0, copy=True)
|
32
24
|
Transform the input data by scaling by the maximum value.
|
33
25
|
"""
|
34
|
-
def __init__(self, norm: str = 'max'):
|
35
|
-
self.norm = norm
|
36
26
|
|
27
|
+
def __init__(self, use_min: bool = True):
|
28
|
+
self.use_min = use_min
|
37
29
|
|
38
30
|
def fit(self, X: np.ndarray, y=None) -> "MinMaxScaler":
|
39
31
|
"""
|
@@ -53,19 +45,14 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
53
45
|
The fitted transformer.
|
54
46
|
"""
|
55
47
|
# Check that X is a 2D array and has only finite values
|
56
|
-
X =
|
57
|
-
|
58
|
-
|
59
|
-
self.n_features_in_ = X.shape[1]
|
60
|
-
|
61
|
-
# Set the fitted attribute to True
|
62
|
-
self._is_fitted = True
|
63
|
-
|
48
|
+
X = validate_data(
|
49
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
50
|
+
)
|
64
51
|
return self
|
65
52
|
|
66
53
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
67
54
|
"""
|
68
|
-
Transform the input data by scaling
|
55
|
+
Transform the input data by scaling it.
|
69
56
|
|
70
57
|
Parameters
|
71
58
|
----------
|
@@ -81,22 +68,32 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
81
68
|
The transformed data.
|
82
69
|
"""
|
83
70
|
# Check that the estimator is fitted
|
84
|
-
check_is_fitted(self, "
|
71
|
+
check_is_fitted(self, "n_features_in_")
|
85
72
|
|
86
73
|
# Check that X is a 2D array and has only finite values
|
87
|
-
|
88
|
-
|
74
|
+
X_ = validate_data(
|
75
|
+
self,
|
76
|
+
X,
|
77
|
+
y="no_validation",
|
78
|
+
ensure_2d=True,
|
79
|
+
copy=True,
|
80
|
+
reset=False,
|
81
|
+
dtype=np.float64,
|
82
|
+
)
|
89
83
|
|
90
84
|
# Check that the number of features is the same as the fitted data
|
91
85
|
if X_.shape[1] != self.n_features_in_:
|
92
|
-
raise ValueError(
|
86
|
+
raise ValueError(
|
87
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
88
|
+
)
|
93
89
|
|
94
90
|
# Normalize the data by the maximum value
|
95
|
-
|
96
|
-
|
97
|
-
X_
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
91
|
+
if self.use_min:
|
92
|
+
X_ = (X_ - np.min(X_, axis=1, keepdims=True)) / (
|
93
|
+
np.max(X_, axis=1, keepdims=True) - np.min(X_, axis=1, keepdims=True)
|
94
|
+
)
|
95
|
+
|
96
|
+
else:
|
97
|
+
X_ = X_ / np.max(X_, axis=1, keepdims=True)
|
98
|
+
|
99
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
@@ -1,26 +1,16 @@
|
|
1
1
|
import numpy as np
|
2
2
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
3
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
4
4
|
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
5
|
|
7
|
-
|
8
|
-
class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
6
|
+
class NormScaler(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
7
|
"""
|
10
8
|
A transformer that scales the input data by the L-norm of the spectrum.
|
11
9
|
|
12
10
|
Parameters
|
13
11
|
----------
|
14
12
|
l_norm : int, optional
|
15
|
-
The L-norm to use. Default is 2.
|
16
|
-
|
17
|
-
Attributes
|
18
|
-
----------
|
19
|
-
n_features_in_ : int
|
20
|
-
The number of features in the input data.
|
21
|
-
|
22
|
-
_is_fitted : bool
|
23
|
-
Whether the transformer has been fitted to data.
|
13
|
+
The L-norm to use. Default is 2.
|
24
14
|
|
25
15
|
Methods
|
26
16
|
-------
|
@@ -30,13 +20,14 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
30
20
|
transform(X, y=0, copy=True)
|
31
21
|
Transform the input data by scaling by the L-norm.
|
32
22
|
"""
|
23
|
+
|
33
24
|
def __init__(self, l_norm: int = 2):
|
34
25
|
self.l_norm = l_norm
|
35
26
|
|
36
27
|
def fit(self, X: np.ndarray, y=None) -> "NormScaler":
|
37
28
|
"""
|
38
29
|
Fit the transformer to the input data.
|
39
|
-
|
30
|
+
|
40
31
|
Parameters
|
41
32
|
----------
|
42
33
|
X : np.ndarray of shape (n_samples, n_features)
|
@@ -51,14 +42,9 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
51
42
|
The fitted transformer.
|
52
43
|
"""
|
53
44
|
# Check that X is a 2D array and has only finite values
|
54
|
-
X =
|
55
|
-
|
56
|
-
|
57
|
-
self.n_features_in_ = X.shape[1]
|
58
|
-
|
59
|
-
# Set the fitted attribute to True
|
60
|
-
self._is_fitted = True
|
61
|
-
|
45
|
+
X = validate_data(
|
46
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
47
|
+
)
|
62
48
|
return self
|
63
49
|
|
64
50
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -79,11 +65,18 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
79
65
|
The transformed data.
|
80
66
|
"""
|
81
67
|
# Check that the estimator is fitted
|
82
|
-
check_is_fitted(self, "
|
68
|
+
check_is_fitted(self, "n_features_in_")
|
83
69
|
|
84
70
|
# Check that X is a 2D array and has only finite values
|
85
|
-
|
86
|
-
|
71
|
+
X_ = validate_data(
|
72
|
+
self,
|
73
|
+
X,
|
74
|
+
y="no_validation",
|
75
|
+
ensure_2d=True,
|
76
|
+
copy=True,
|
77
|
+
reset=False,
|
78
|
+
dtype=np.float64,
|
79
|
+
)
|
87
80
|
|
88
81
|
# Check that the number of features is the same as the fitted data
|
89
82
|
if X_.shape[1] != self.n_features_in_:
|
@@ -0,0 +1,115 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
|
+
|
7
|
+
|
8
|
+
class PointScaler(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
|
+
"""
|
10
|
+
A transformer that scales the input data by the intensity value at a given point.
|
11
|
+
The point can be specified by an index or by a wavenumber.
|
12
|
+
|
13
|
+
Parameters
|
14
|
+
----------
|
15
|
+
point : int,
|
16
|
+
The point to scale the data by. It can be an index or a wavenumber.
|
17
|
+
|
18
|
+
wavenumber : array-like, optional
|
19
|
+
The wavenumbers of the input data. If not provided, the indices will be used
|
20
|
+
instead. Default is None. If provided, the wavenumbers must be provided in
|
21
|
+
ascending order.
|
22
|
+
|
23
|
+
Attributes
|
24
|
+
----------
|
25
|
+
point_index_ : int
|
26
|
+
The index of the point to scale the data by. It is 0 if the wavenumbers are not provided.
|
27
|
+
|
28
|
+
Methods
|
29
|
+
-------
|
30
|
+
fit(X, y=None)
|
31
|
+
Fit the transformer to the input data.
|
32
|
+
|
33
|
+
transform(X, y=0, copy=True)
|
34
|
+
Transform the input data by scaling by the value at a given Point.
|
35
|
+
"""
|
36
|
+
|
37
|
+
def __init__(self, point: int = 0, wavenumbers: Optional[np.ndarray] = None):
|
38
|
+
self.point = point
|
39
|
+
self.wavenumbers = wavenumbers
|
40
|
+
|
41
|
+
def fit(self, X: np.ndarray, y=None) -> "PointScaler":
|
42
|
+
"""
|
43
|
+
Fit the transformer to the input data.
|
44
|
+
|
45
|
+
Parameters
|
46
|
+
----------
|
47
|
+
X : np.ndarray of shape (n_samples, n_features)
|
48
|
+
The input data to fit the transformer to.
|
49
|
+
|
50
|
+
y : None
|
51
|
+
Ignored.
|
52
|
+
|
53
|
+
Returns
|
54
|
+
-------
|
55
|
+
self : PointScaler
|
56
|
+
The fitted transformer.
|
57
|
+
"""
|
58
|
+
# Check that X is a 2D array and has only finite values
|
59
|
+
X = validate_data(
|
60
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
61
|
+
)
|
62
|
+
# Set the point index
|
63
|
+
if self.wavenumbers is None:
|
64
|
+
self.point_index_ = self.point
|
65
|
+
else:
|
66
|
+
self.point_index_ = self._find_index(self.point)
|
67
|
+
|
68
|
+
return self
|
69
|
+
|
70
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
71
|
+
"""
|
72
|
+
Transform the input data by scaling by the value at a given Point.
|
73
|
+
|
74
|
+
Parameters
|
75
|
+
----------
|
76
|
+
X : np.ndarray of shape (n_samples, n_features)
|
77
|
+
The input data to transform.
|
78
|
+
|
79
|
+
y : None
|
80
|
+
Ignored.
|
81
|
+
|
82
|
+
Returns
|
83
|
+
-------
|
84
|
+
X_ : np.ndarray of shape (n_samples, n_features)
|
85
|
+
The transformed data.
|
86
|
+
"""
|
87
|
+
# Check that the estimator is fitted
|
88
|
+
check_is_fitted(self, "point_index_")
|
89
|
+
|
90
|
+
# Check that X is a 2D array and has only finite values
|
91
|
+
X_ = validate_data(
|
92
|
+
self,
|
93
|
+
X,
|
94
|
+
y="no_validation",
|
95
|
+
ensure_2d=True,
|
96
|
+
copy=True,
|
97
|
+
reset=False,
|
98
|
+
dtype=np.float64,
|
99
|
+
)
|
100
|
+
|
101
|
+
# Check that the number of features is the same as the fitted data
|
102
|
+
if X_.shape[1] != self.n_features_in_:
|
103
|
+
raise ValueError(
|
104
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
105
|
+
)
|
106
|
+
|
107
|
+
# Scale the data by Point
|
108
|
+
for i, x in enumerate(X_):
|
109
|
+
X_[i] = x / x[self.point_index_]
|
110
|
+
|
111
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
112
|
+
|
113
|
+
def _find_index(self, target: float) -> int:
|
114
|
+
wavenumbers = np.array(self.wavenumbers)
|
115
|
+
return int(np.argmin(np.abs(wavenumbers - target)))
|
chemotools/scatter/__init__.py
CHANGED
@@ -1,2 +1,13 @@
|
|
1
|
-
from .
|
2
|
-
|
1
|
+
from ._extended_multiplicative_scatter_correction import (
|
2
|
+
ExtendedMultiplicativeScatterCorrection,
|
3
|
+
)
|
4
|
+
from ._multiplicative_scatter_correction import MultiplicativeScatterCorrection
|
5
|
+
from ._robust_normal_variate import RobustNormalVariate
|
6
|
+
from ._standard_normal_variate import StandardNormalVariate
|
7
|
+
|
8
|
+
__all__ = [
|
9
|
+
"ExtendedMultiplicativeScatterCorrection",
|
10
|
+
"MultiplicativeScatterCorrection",
|
11
|
+
"RobustNormalVariate",
|
12
|
+
"StandardNormalVariate",
|
13
|
+
]
|