chemotools 0.0.22__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemotools/augmentation/__init__.py +16 -0
- chemotools/augmentation/baseline_shift.py +119 -0
- chemotools/augmentation/exponential_noise.py +117 -0
- chemotools/augmentation/index_shift.py +120 -0
- chemotools/augmentation/normal_noise.py +118 -0
- chemotools/augmentation/spectrum_scale.py +120 -0
- chemotools/augmentation/uniform_noise.py +124 -0
- chemotools/baseline/__init__.py +20 -8
- chemotools/baseline/{air_pls.py → _air_pls.py} +20 -32
- chemotools/baseline/{ar_pls.py → _ar_pls.py} +18 -31
- chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +37 -31
- chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +26 -19
- chemotools/baseline/{linear_correction.py → _linear_correction.py} +19 -28
- chemotools/baseline/{non_negative.py → _non_negative.py} +15 -23
- chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +29 -31
- chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +23 -27
- chemotools/datasets/__init__.py +5 -0
- chemotools/datasets/_base.py +122 -0
- chemotools/datasets/data/coffee_labels.csv +61 -0
- chemotools/datasets/data/coffee_spectra.csv +61 -0
- chemotools/datasets/data/fermentation_hplc.csv +35 -0
- chemotools/datasets/data/fermentation_spectra.csv +1630 -0
- chemotools/datasets/data/train_hplc.csv +22 -0
- chemotools/datasets/data/train_spectra.csv +22 -0
- chemotools/derivative/__init__.py +4 -2
- chemotools/derivative/{norris_william.py → _norris_william.py} +20 -25
- chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +26 -36
- chemotools/feature_selection/__init__.py +4 -0
- chemotools/feature_selection/_index_selector.py +113 -0
- chemotools/feature_selection/_range_cut.py +111 -0
- chemotools/scale/__init__.py +5 -3
- chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +36 -39
- chemotools/scale/{norm_scaler.py → _norm_scaler.py} +18 -25
- chemotools/scale/_point_scaler.py +115 -0
- chemotools/scatter/__init__.py +13 -2
- chemotools/scatter/_extended_multiplicative_scatter_correction.py +183 -0
- chemotools/scatter/_multiplicative_scatter_correction.py +169 -0
- chemotools/scatter/_robust_normal_variate.py +101 -0
- chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +21 -26
- chemotools/smooth/__init__.py +6 -4
- chemotools/smooth/{mean_filter.py → _mean_filter.py} +18 -25
- chemotools/smooth/{median_filter.py → _median_filter.py} +32 -24
- chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +22 -24
- chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +24 -29
- {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/METADATA +19 -15
- chemotools-0.1.6.dist-info/RECORD +51 -0
- {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/WHEEL +1 -2
- chemotools/scale/index_scaler.py +0 -97
- chemotools/scatter/extended_multiplicative_scatter_correction.py +0 -33
- chemotools/scatter/multiplicative_scatter_correction.py +0 -123
- chemotools/utils/check_inputs.py +0 -14
- chemotools/variable_selection/__init__.py +0 -1
- chemotools/variable_selection/range_cut.py +0 -121
- chemotools-0.0.22.dist-info/RECORD +0 -39
- chemotools-0.0.22.dist-info/top_level.txt +0 -2
- tests/fixtures.py +0 -89
- tests/test_functionality.py +0 -397
- tests/test_sklearn_compliance.py +0 -192
- {tests → chemotools/datasets/data}/__init__.py +0 -0
- {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/LICENSE +0 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
from .baseline_shift import BaselineShift
|
2
|
+
from .exponential_noise import ExponentialNoise
|
3
|
+
from .normal_noise import NormalNoise
|
4
|
+
from .index_shift import IndexShift
|
5
|
+
from .spectrum_scale import SpectrumScale
|
6
|
+
from .uniform_noise import UniformNoise
|
7
|
+
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
"BaselineShift",
|
11
|
+
"ExponentialNoise",
|
12
|
+
"NormalNoise",
|
13
|
+
"IndexShift",
|
14
|
+
"SpectrumScale",
|
15
|
+
"UniformNoise",
|
16
|
+
]
|
@@ -0,0 +1,119 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
|
+
|
7
|
+
|
8
|
+
class BaselineShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
|
+
"""
|
10
|
+
Adds a constant baseline to the data. The baseline is drawn from a one-sided
|
11
|
+
uniform distribution between 0 and 0 + scale.
|
12
|
+
|
13
|
+
Parameters
|
14
|
+
----------
|
15
|
+
scale : float, default=0.0
|
16
|
+
Range of the uniform distribution to draw the baseline factor from.
|
17
|
+
|
18
|
+
random_state : int, default=None
|
19
|
+
The random state to use for the random number generator.
|
20
|
+
|
21
|
+
Attributes
|
22
|
+
----------
|
23
|
+
n_features_in_ : int
|
24
|
+
The number of features in the input data.
|
25
|
+
|
26
|
+
_is_fitted : bool
|
27
|
+
Whether the transformer has been fitted to data.
|
28
|
+
|
29
|
+
Methods
|
30
|
+
-------
|
31
|
+
fit(X, y=None)
|
32
|
+
Fit the transformer to the input data.
|
33
|
+
|
34
|
+
transform(X, y=0, copy=True)
|
35
|
+
Transform the input data by adding a baseline the spectrum.
|
36
|
+
"""
|
37
|
+
|
38
|
+
def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
|
39
|
+
self.scale = scale
|
40
|
+
self.random_state = random_state
|
41
|
+
|
42
|
+
def fit(self, X: np.ndarray, y=None) -> "BaselineShift":
|
43
|
+
"""
|
44
|
+
Fit the transformer to the input data.
|
45
|
+
|
46
|
+
Parameters
|
47
|
+
----------
|
48
|
+
X : np.ndarray of shape (n_samples, n_features)
|
49
|
+
The input data to fit the transformer to.
|
50
|
+
|
51
|
+
y : None
|
52
|
+
Ignored.
|
53
|
+
|
54
|
+
Returns
|
55
|
+
-------
|
56
|
+
self : BaselineShift
|
57
|
+
The fitted transformer.
|
58
|
+
"""
|
59
|
+
# Check that X is a 2D array and has only finite values
|
60
|
+
X = validate_data(
|
61
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
62
|
+
)
|
63
|
+
# Set the number of features
|
64
|
+
self.n_features_in_ = X.shape[1]
|
65
|
+
|
66
|
+
# Set the fitted attribute to True
|
67
|
+
self._is_fitted = True
|
68
|
+
|
69
|
+
# Instantiate the random number generator
|
70
|
+
self._rng = np.random.default_rng(self.random_state)
|
71
|
+
|
72
|
+
return self
|
73
|
+
|
74
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
75
|
+
"""
|
76
|
+
Transform the input data by adding a baseline to the spectrum.
|
77
|
+
|
78
|
+
Parameters
|
79
|
+
----------
|
80
|
+
X : np.ndarray of shape (n_samples, n_features)
|
81
|
+
The input data to transform.
|
82
|
+
|
83
|
+
y : None
|
84
|
+
Ignored.
|
85
|
+
|
86
|
+
Returns
|
87
|
+
-------
|
88
|
+
X_ : np.ndarray of shape (n_samples, n_features)
|
89
|
+
The transformed data.
|
90
|
+
"""
|
91
|
+
# Check that the estimator is fitted
|
92
|
+
check_is_fitted(self, "_is_fitted")
|
93
|
+
|
94
|
+
# Check that X is a 2D array and has only finite values
|
95
|
+
X_ = validate_data(
|
96
|
+
self,
|
97
|
+
X,
|
98
|
+
y="no_validation",
|
99
|
+
ensure_2d=True,
|
100
|
+
copy=True,
|
101
|
+
reset=False,
|
102
|
+
dtype=np.float64,
|
103
|
+
)
|
104
|
+
|
105
|
+
# Check that the number of features is the same as the fitted data
|
106
|
+
if X_.shape[1] != self.n_features_in_:
|
107
|
+
raise ValueError(
|
108
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
109
|
+
)
|
110
|
+
|
111
|
+
# Calculate the scaled spectrum
|
112
|
+
for i, x in enumerate(X_):
|
113
|
+
X_[i] = self._add_baseline(x)
|
114
|
+
|
115
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
116
|
+
|
117
|
+
def _add_baseline(self, x) -> np.ndarray:
|
118
|
+
adding_factor = self._rng.uniform(low=0, high=self.scale)
|
119
|
+
return np.add(x, adding_factor)
|
@@ -0,0 +1,117 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
|
+
|
7
|
+
|
8
|
+
class ExponentialNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
|
+
"""
|
10
|
+
Add exponential noise to the input data.
|
11
|
+
|
12
|
+
Parameters
|
13
|
+
----------
|
14
|
+
scale: float, default=0.0
|
15
|
+
The scale of the noise to add to the input data.
|
16
|
+
|
17
|
+
random_state : int, default=None
|
18
|
+
The random state to use for the random number generator.
|
19
|
+
|
20
|
+
Attributes
|
21
|
+
----------
|
22
|
+
n_features_in_ : int
|
23
|
+
The number of features in the input data.
|
24
|
+
|
25
|
+
_is_fitted : bool
|
26
|
+
Whether the transformer has been fitted to data.
|
27
|
+
|
28
|
+
Methods
|
29
|
+
-------
|
30
|
+
fit(X, y=None)
|
31
|
+
Fit the transformer to the input data.
|
32
|
+
|
33
|
+
transform(X, y=0, copy=True)
|
34
|
+
Transform the input data by adding random noise.
|
35
|
+
"""
|
36
|
+
|
37
|
+
def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
|
38
|
+
self.scale = scale
|
39
|
+
self.random_state = random_state
|
40
|
+
|
41
|
+
def fit(self, X: np.ndarray, y=None) -> "ExponentialNoise":
|
42
|
+
"""
|
43
|
+
Fit the transformer to the input data.
|
44
|
+
|
45
|
+
Parameters
|
46
|
+
----------
|
47
|
+
X : np.ndarray of shape (n_samples, n_features)
|
48
|
+
The input data to fit the transformer to.
|
49
|
+
|
50
|
+
y : None
|
51
|
+
Ignored.
|
52
|
+
|
53
|
+
Returns
|
54
|
+
-------
|
55
|
+
self : ExponentialNoise
|
56
|
+
The fitted transformer.
|
57
|
+
"""
|
58
|
+
# Check that X is a 2D array and has only finite values
|
59
|
+
X = validate_data(
|
60
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
61
|
+
)
|
62
|
+
# Set the number of features
|
63
|
+
self.n_features_in_ = X.shape[1]
|
64
|
+
|
65
|
+
# Set the fitted attribute to True
|
66
|
+
self._is_fitted = True
|
67
|
+
|
68
|
+
# Instantiate the random number generator
|
69
|
+
self._rng = np.random.default_rng(self.random_state)
|
70
|
+
|
71
|
+
return self
|
72
|
+
|
73
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
74
|
+
"""
|
75
|
+
Transform the input data by adding random exponential noise.
|
76
|
+
|
77
|
+
Parameters
|
78
|
+
----------
|
79
|
+
X : np.ndarray of shape (n_samples, n_features)
|
80
|
+
The input data to transform.
|
81
|
+
|
82
|
+
y : None
|
83
|
+
Ignored.
|
84
|
+
|
85
|
+
Returns
|
86
|
+
-------
|
87
|
+
X_ : np.ndarray of shape (n_samples, n_features)
|
88
|
+
The transformed data.
|
89
|
+
"""
|
90
|
+
# Check that the estimator is fitted
|
91
|
+
check_is_fitted(self, "_is_fitted")
|
92
|
+
|
93
|
+
# Check that X is a 2D array and has only finite values
|
94
|
+
X_ = validate_data(
|
95
|
+
self,
|
96
|
+
X,
|
97
|
+
y="no_validation",
|
98
|
+
ensure_2d=True,
|
99
|
+
copy=True,
|
100
|
+
reset=False,
|
101
|
+
dtype=np.float64,
|
102
|
+
)
|
103
|
+
|
104
|
+
# Check that the number of features is the same as the fitted data
|
105
|
+
if X_.shape[1] != self.n_features_in_:
|
106
|
+
raise ValueError(
|
107
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
108
|
+
)
|
109
|
+
|
110
|
+
# Calculate the standard exponential variate
|
111
|
+
for i, x in enumerate(X_):
|
112
|
+
X_[i] = self._add_random_noise(x)
|
113
|
+
|
114
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
115
|
+
|
116
|
+
def _add_random_noise(self, x) -> np.ndarray:
|
117
|
+
return x + self._rng.exponential(self.scale, size=x.shape)
|
@@ -0,0 +1,120 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
|
+
|
7
|
+
|
8
|
+
class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
|
+
"""
|
10
|
+
Shift the spectrum a given number of indices between - shift and + shift drawn
|
11
|
+
from a discrete uniform distribution.
|
12
|
+
|
13
|
+
Parameters
|
14
|
+
----------
|
15
|
+
shift : float, default=0.0
|
16
|
+
Shifts the data by a random integer between -shift and shift.
|
17
|
+
|
18
|
+
random_state : int, default=None
|
19
|
+
The random state to use for the random number generator.
|
20
|
+
|
21
|
+
Attributes
|
22
|
+
----------
|
23
|
+
n_features_in_ : int
|
24
|
+
The number of features in the input data.
|
25
|
+
|
26
|
+
_is_fitted : bool
|
27
|
+
Whether the transformer has been fitted to data.
|
28
|
+
|
29
|
+
Methods
|
30
|
+
-------
|
31
|
+
fit(X, y=None)
|
32
|
+
Fit the transformer to the input data.
|
33
|
+
|
34
|
+
transform(X, y=0, copy=True)
|
35
|
+
Transform the input data by shifting the spectrum.
|
36
|
+
"""
|
37
|
+
|
38
|
+
def __init__(self, shift: int = 0, random_state: Optional[int] = None):
|
39
|
+
self.shift = shift
|
40
|
+
self.random_state = random_state
|
41
|
+
|
42
|
+
def fit(self, X: np.ndarray, y=None) -> "IndexShift":
|
43
|
+
"""
|
44
|
+
Fit the transformer to the input data.
|
45
|
+
|
46
|
+
Parameters
|
47
|
+
----------
|
48
|
+
X : np.ndarray of shape (n_samples, n_features)
|
49
|
+
The input data to fit the transformer to.
|
50
|
+
|
51
|
+
y : None
|
52
|
+
Ignored.
|
53
|
+
|
54
|
+
Returns
|
55
|
+
-------
|
56
|
+
self : IndexShift
|
57
|
+
The fitted transformer.
|
58
|
+
"""
|
59
|
+
# Check that X is a 2D array and has only finite values
|
60
|
+
X = validate_data(
|
61
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
62
|
+
)
|
63
|
+
|
64
|
+
# Set the number of features
|
65
|
+
self.n_features_in_ = X.shape[1]
|
66
|
+
|
67
|
+
# Set the fitted attribute to True
|
68
|
+
self._is_fitted = True
|
69
|
+
|
70
|
+
# Instantiate the random number generator
|
71
|
+
self._rng = np.random.default_rng(self.random_state)
|
72
|
+
|
73
|
+
return self
|
74
|
+
|
75
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
76
|
+
"""
|
77
|
+
Transform the input data by shifting the spectrum.
|
78
|
+
|
79
|
+
Parameters
|
80
|
+
----------
|
81
|
+
X : np.ndarray of shape (n_samples, n_features)
|
82
|
+
The input data to transform.
|
83
|
+
|
84
|
+
y : None
|
85
|
+
Ignored.
|
86
|
+
|
87
|
+
Returns
|
88
|
+
-------
|
89
|
+
X_ : np.ndarray of shape (n_samples, n_features)
|
90
|
+
The transformed data.
|
91
|
+
"""
|
92
|
+
# Check that the estimator is fitted
|
93
|
+
check_is_fitted(self, "_is_fitted")
|
94
|
+
|
95
|
+
# Check that X is a 2D array and has only finite values
|
96
|
+
X_ = validate_data(
|
97
|
+
self,
|
98
|
+
X,
|
99
|
+
y="no_validation",
|
100
|
+
ensure_2d=True,
|
101
|
+
copy=True,
|
102
|
+
reset=False,
|
103
|
+
dtype=np.float64,
|
104
|
+
)
|
105
|
+
|
106
|
+
# Check that the number of features is the same as the fitted data
|
107
|
+
if X_.shape[1] != self.n_features_in_:
|
108
|
+
raise ValueError(
|
109
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
110
|
+
)
|
111
|
+
|
112
|
+
# Calculate the standard normal variate
|
113
|
+
for i, x in enumerate(X_):
|
114
|
+
X_[i] = self._shift_spectrum(x)
|
115
|
+
|
116
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
117
|
+
|
118
|
+
def _shift_spectrum(self, x) -> np.ndarray:
|
119
|
+
shift_amount = self._rng.integers(-self.shift, self.shift, endpoint=True)
|
120
|
+
return np.roll(x, shift_amount)
|
@@ -0,0 +1,118 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
|
+
|
7
|
+
|
8
|
+
class NormalNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
|
+
"""
|
10
|
+
Add normal noise to the input data.
|
11
|
+
|
12
|
+
Parameters
|
13
|
+
----------
|
14
|
+
scale : float, default=0.0
|
15
|
+
The scale of the noise to add to the input data.
|
16
|
+
|
17
|
+
random_state : int, default=None
|
18
|
+
The random state to use for the random number generator.
|
19
|
+
|
20
|
+
Attributes
|
21
|
+
----------
|
22
|
+
n_features_in_ : int
|
23
|
+
The number of features in the input data.
|
24
|
+
|
25
|
+
_is_fitted : bool
|
26
|
+
Whether the transformer has been fitted to data.
|
27
|
+
|
28
|
+
Methods
|
29
|
+
-------
|
30
|
+
fit(X, y=None)
|
31
|
+
Fit the transformer to the input data.
|
32
|
+
|
33
|
+
transform(X, y=0, copy=True)
|
34
|
+
Transform the input data by adding random noise.
|
35
|
+
"""
|
36
|
+
|
37
|
+
def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
|
38
|
+
self.scale = scale
|
39
|
+
self.random_state = random_state
|
40
|
+
|
41
|
+
def fit(self, X: np.ndarray, y=None) -> "NormalNoise":
|
42
|
+
"""
|
43
|
+
Fit the transformer to the input data.
|
44
|
+
|
45
|
+
Parameters
|
46
|
+
----------
|
47
|
+
X : np.ndarray of shape (n_samples, n_features)
|
48
|
+
The input data to fit the transformer to.
|
49
|
+
|
50
|
+
y : None
|
51
|
+
Ignored.
|
52
|
+
|
53
|
+
Returns
|
54
|
+
-------
|
55
|
+
self : NormalNoise
|
56
|
+
The fitted transformer.
|
57
|
+
"""
|
58
|
+
# Check that X is a 2D array and has only finite values
|
59
|
+
X = validate_data(
|
60
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
61
|
+
)
|
62
|
+
|
63
|
+
# Set the number of features
|
64
|
+
self.n_features_in_ = X.shape[1]
|
65
|
+
|
66
|
+
# Set the fitted attribute to True
|
67
|
+
self._is_fitted = True
|
68
|
+
|
69
|
+
# Instantiate the random number generator
|
70
|
+
self._rng = np.random.default_rng(self.random_state)
|
71
|
+
|
72
|
+
return self
|
73
|
+
|
74
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
75
|
+
"""
|
76
|
+
Transform the input data by adding random normal noise.
|
77
|
+
|
78
|
+
Parameters
|
79
|
+
----------
|
80
|
+
X : np.ndarray of shape (n_samples, n_features)
|
81
|
+
The input data to transform.
|
82
|
+
|
83
|
+
y : None
|
84
|
+
Ignored.
|
85
|
+
|
86
|
+
Returns
|
87
|
+
-------
|
88
|
+
X_ : np.ndarray of shape (n_samples, n_features)
|
89
|
+
The transformed data.
|
90
|
+
"""
|
91
|
+
# Check that the estimator is fitted
|
92
|
+
check_is_fitted(self, "_is_fitted")
|
93
|
+
|
94
|
+
# Check that X is a 2D array and has only finite values
|
95
|
+
X_ = validate_data(
|
96
|
+
self,
|
97
|
+
X,
|
98
|
+
y="no_validation",
|
99
|
+
ensure_2d=True,
|
100
|
+
copy=True,
|
101
|
+
reset=False,
|
102
|
+
dtype=np.float64,
|
103
|
+
)
|
104
|
+
|
105
|
+
# Check that the number of features is the same as the fitted data
|
106
|
+
if X_.shape[1] != self.n_features_in_:
|
107
|
+
raise ValueError(
|
108
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
109
|
+
)
|
110
|
+
|
111
|
+
# Calculate the standard normal variate
|
112
|
+
for i, x in enumerate(X_):
|
113
|
+
X_[i] = self._add_random_noise(x)
|
114
|
+
|
115
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
116
|
+
|
117
|
+
def _add_random_noise(self, x) -> np.ndarray:
|
118
|
+
return x + self._rng.normal(0, self.scale, size=x.shape)
|
@@ -0,0 +1,120 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
|
+
|
7
|
+
|
8
|
+
class SpectrumScale(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
|
+
"""
|
10
|
+
Scales the data by a value drawn from the uniform distribution centered
|
11
|
+
around 1.0.
|
12
|
+
|
13
|
+
Parameters
|
14
|
+
----------
|
15
|
+
scale : float, default=0.0
|
16
|
+
Range of the uniform distribution to draw the scaling factor from.
|
17
|
+
|
18
|
+
random_state : int, default=None
|
19
|
+
The random state to use for the random number generator.
|
20
|
+
|
21
|
+
Attributes
|
22
|
+
----------
|
23
|
+
n_features_in_ : int
|
24
|
+
The number of features in the input data.
|
25
|
+
|
26
|
+
_is_fitted : bool
|
27
|
+
Whether the transformer has been fitted to data.
|
28
|
+
|
29
|
+
Methods
|
30
|
+
-------
|
31
|
+
fit(X, y=None)
|
32
|
+
Fit the transformer to the input data.
|
33
|
+
|
34
|
+
transform(X, y=0, copy=True)
|
35
|
+
Transform the input data by scaling the spectrum.
|
36
|
+
"""
|
37
|
+
|
38
|
+
def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
|
39
|
+
self.scale = scale
|
40
|
+
self.random_state = random_state
|
41
|
+
|
42
|
+
def fit(self, X: np.ndarray, y=None) -> "SpectrumScale":
|
43
|
+
"""
|
44
|
+
Fit the transformer to the input data.
|
45
|
+
|
46
|
+
Parameters
|
47
|
+
----------
|
48
|
+
X : np.ndarray of shape (n_samples, n_features)
|
49
|
+
The input data to fit the transformer to.
|
50
|
+
|
51
|
+
y : None
|
52
|
+
Ignored.
|
53
|
+
|
54
|
+
Returns
|
55
|
+
-------
|
56
|
+
self : SpectrumScale
|
57
|
+
The fitted transformer.
|
58
|
+
"""
|
59
|
+
# Check that X is a 2D array and has only finite values
|
60
|
+
X = validate_data(
|
61
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
62
|
+
)
|
63
|
+
|
64
|
+
# Set the number of features
|
65
|
+
self.n_features_in_ = X.shape[1]
|
66
|
+
|
67
|
+
# Set the fitted attribute to True
|
68
|
+
self._is_fitted = True
|
69
|
+
|
70
|
+
# Instantiate the random number generator
|
71
|
+
self._rng = np.random.default_rng(self.random_state)
|
72
|
+
|
73
|
+
return self
|
74
|
+
|
75
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
76
|
+
"""
|
77
|
+
Transform the input data by scaling the spectrum.
|
78
|
+
|
79
|
+
Parameters
|
80
|
+
----------
|
81
|
+
X : np.ndarray of shape (n_samples, n_features)
|
82
|
+
The input data to transform.
|
83
|
+
|
84
|
+
y : None
|
85
|
+
Ignored.
|
86
|
+
|
87
|
+
Returns
|
88
|
+
-------
|
89
|
+
X_ : np.ndarray of shape (n_samples, n_features)
|
90
|
+
The transformed data.
|
91
|
+
"""
|
92
|
+
# Check that the estimator is fitted
|
93
|
+
check_is_fitted(self, "_is_fitted")
|
94
|
+
|
95
|
+
# Check that X is a 2D array and has only finite values
|
96
|
+
X_ = validate_data(
|
97
|
+
self,
|
98
|
+
X,
|
99
|
+
y="no_validation",
|
100
|
+
ensure_2d=True,
|
101
|
+
copy=True,
|
102
|
+
reset=False,
|
103
|
+
dtype=np.float64,
|
104
|
+
)
|
105
|
+
|
106
|
+
# Check that the number of features is the same as the fitted data
|
107
|
+
if X_.shape[1] != self.n_features_in_:
|
108
|
+
raise ValueError(
|
109
|
+
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
110
|
+
)
|
111
|
+
|
112
|
+
# Calculate the scaled spectrum
|
113
|
+
for i, x in enumerate(X_):
|
114
|
+
X_[i] = self._scale_spectrum(x)
|
115
|
+
|
116
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
117
|
+
|
118
|
+
def _scale_spectrum(self, x) -> np.ndarray:
|
119
|
+
scaling_factor = self._rng.uniform(low=1 - self.scale, high=1 + self.scale)
|
120
|
+
return np.multiply(x, scaling_factor)
|