chemotools 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemotools/augmentation/__init__.py +5 -9
- chemotools/augmentation/{normal_noise.py → _add_noise.py} +26 -30
- chemotools/augmentation/{index_shift.py → _index_shift.py} +82 -3
- chemotools/scatter/_extended_multiplicative_scatter_correction.py +17 -12
- chemotools/scatter/_multiplicative_scatter_correction.py +16 -9
- {chemotools-0.1.6.dist-info → chemotools-0.1.8.dist-info}/METADATA +1 -1
- {chemotools-0.1.6.dist-info → chemotools-0.1.8.dist-info}/RECORD +11 -13
- chemotools/augmentation/exponential_noise.py +0 -117
- chemotools/augmentation/uniform_noise.py +0 -124
- /chemotools/augmentation/{baseline_shift.py → _baseline_shift.py} +0 -0
- /chemotools/augmentation/{spectrum_scale.py → _spectrum_scale.py} +0 -0
- {chemotools-0.1.6.dist-info → chemotools-0.1.8.dist-info}/LICENSE +0 -0
- {chemotools-0.1.6.dist-info → chemotools-0.1.8.dist-info}/WHEEL +0 -0
@@ -1,16 +1,12 @@
|
|
1
|
-
from .
|
2
|
-
from .
|
3
|
-
from .
|
4
|
-
from .
|
5
|
-
from .spectrum_scale import SpectrumScale
|
6
|
-
from .uniform_noise import UniformNoise
|
1
|
+
from ._add_noise import AddNoise
|
2
|
+
from ._baseline_shift import BaselineShift
|
3
|
+
from ._index_shift import IndexShift
|
4
|
+
from ._spectrum_scale import SpectrumScale
|
7
5
|
|
8
6
|
|
9
7
|
__all__ = [
|
8
|
+
"AddNoise",
|
10
9
|
"BaselineShift",
|
11
|
-
"ExponentialNoise",
|
12
|
-
"NormalNoise",
|
13
10
|
"IndexShift",
|
14
11
|
"SpectrumScale",
|
15
|
-
"UniformNoise",
|
16
12
|
]
|
@@ -1,44 +1,26 @@
|
|
1
|
-
from typing import Optional
|
1
|
+
from typing import Literal, Optional
|
2
2
|
|
3
3
|
import numpy as np
|
4
4
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
5
|
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
6
|
|
7
7
|
|
8
|
-
class
|
8
|
+
class AddNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
9
|
"""
|
10
10
|
Add normal noise to the input data.
|
11
|
-
|
12
|
-
Parameters
|
13
|
-
----------
|
14
|
-
scale : float, default=0.0
|
15
|
-
The scale of the noise to add to the input data.
|
16
|
-
|
17
|
-
random_state : int, default=None
|
18
|
-
The random state to use for the random number generator.
|
19
|
-
|
20
|
-
Attributes
|
21
|
-
----------
|
22
|
-
n_features_in_ : int
|
23
|
-
The number of features in the input data.
|
24
|
-
|
25
|
-
_is_fitted : bool
|
26
|
-
Whether the transformer has been fitted to data.
|
27
|
-
|
28
|
-
Methods
|
29
|
-
-------
|
30
|
-
fit(X, y=None)
|
31
|
-
Fit the transformer to the input data.
|
32
|
-
|
33
|
-
transform(X, y=0, copy=True)
|
34
|
-
Transform the input data by adding random noise.
|
35
11
|
"""
|
36
12
|
|
37
|
-
def __init__(
|
13
|
+
def __init__(
|
14
|
+
self,
|
15
|
+
noise_distribution: Literal["gaussian", "poisson", "exponential"] = "gaussian",
|
16
|
+
scale: float = 0.0,
|
17
|
+
random_state: Optional[int] = None,
|
18
|
+
):
|
19
|
+
self.noise_distribution = noise_distribution
|
38
20
|
self.scale = scale
|
39
21
|
self.random_state = random_state
|
40
22
|
|
41
|
-
def fit(self, X: np.ndarray, y=None) -> "
|
23
|
+
def fit(self, X: np.ndarray, y=None) -> "AddNoise":
|
42
24
|
"""
|
43
25
|
Fit the transformer to the input data.
|
44
26
|
|
@@ -110,9 +92,23 @@ class NormalNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
|
110
92
|
|
111
93
|
# Calculate the standard normal variate
|
112
94
|
for i, x in enumerate(X_):
|
113
|
-
|
95
|
+
match self.noise_distribution:
|
96
|
+
case "gaussian":
|
97
|
+
X_[i] = self._add_gaussian_noise(x)
|
98
|
+
case "poisson":
|
99
|
+
X_[i] = self._add_poisson_noise(x)
|
100
|
+
case "exponential":
|
101
|
+
X_[i] = self._add_exponential_noise(x)
|
102
|
+
case _:
|
103
|
+
raise ValueError("Invalid noise distribution")
|
114
104
|
|
115
105
|
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
116
106
|
|
117
|
-
def
|
107
|
+
def _add_gaussian_noise(self, x) -> np.ndarray:
|
118
108
|
return x + self._rng.normal(0, self.scale, size=x.shape)
|
109
|
+
|
110
|
+
def _add_poisson_noise(self, x) -> np.ndarray:
|
111
|
+
return self._rng.poisson(x, size=x.shape) * self.scale
|
112
|
+
|
113
|
+
def _add_exponential_noise(self, x) -> np.ndarray:
|
114
|
+
return x + self._rng.exponential(self.scale, size=x.shape)
|
@@ -1,6 +1,7 @@
|
|
1
|
-
from typing import Optional
|
1
|
+
from typing import Literal, Optional
|
2
2
|
|
3
3
|
import numpy as np
|
4
|
+
from numpy.polynomial import polynomial as poly
|
4
5
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
6
|
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
7
|
|
@@ -35,8 +36,14 @@ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
|
35
36
|
Transform the input data by shifting the spectrum.
|
36
37
|
"""
|
37
38
|
|
38
|
-
def __init__(
|
39
|
+
def __init__(
|
40
|
+
self,
|
41
|
+
shift: int = 0,
|
42
|
+
fill_method: Literal["constant", "linear", "quadratic"] = "constant",
|
43
|
+
random_state: Optional[int] = None,
|
44
|
+
):
|
39
45
|
self.shift = shift
|
46
|
+
self.fill_method = fill_method
|
40
47
|
self.random_state = random_state
|
41
48
|
|
42
49
|
def fit(self, X: np.ndarray, y=None) -> "IndexShift":
|
@@ -111,10 +118,82 @@ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
|
111
118
|
|
112
119
|
# Calculate the standard normal variate
|
113
120
|
for i, x in enumerate(X_):
|
114
|
-
X_[i] = self.
|
121
|
+
X_[i] = self._shift_vector(x)
|
115
122
|
|
116
123
|
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
117
124
|
|
118
125
|
def _shift_spectrum(self, x) -> np.ndarray:
|
119
126
|
shift_amount = self._rng.integers(-self.shift, self.shift, endpoint=True)
|
120
127
|
return np.roll(x, shift_amount)
|
128
|
+
|
129
|
+
def _shift_vector(
|
130
|
+
self,
|
131
|
+
x: np.ndarray,
|
132
|
+
) -> np.ndarray:
|
133
|
+
"""
|
134
|
+
Shift vector with option to fill missing values.
|
135
|
+
|
136
|
+
Args:
|
137
|
+
arr: Input numpy array
|
138
|
+
shift: Number of positions to shift
|
139
|
+
fill_method: Method to fill missing values
|
140
|
+
'constant': fill with first/last value
|
141
|
+
'linear': fill using linear regression
|
142
|
+
'quadratic': fill using quadratic regression
|
143
|
+
|
144
|
+
Returns:
|
145
|
+
Shifted numpy array
|
146
|
+
"""
|
147
|
+
shift = self._rng.integers(-self.shift, self.shift, endpoint=True)
|
148
|
+
|
149
|
+
result = np.roll(x, shift)
|
150
|
+
|
151
|
+
if self.fill_method == "constant":
|
152
|
+
if shift > 0:
|
153
|
+
result[:shift] = x[0]
|
154
|
+
elif shift < 0:
|
155
|
+
result[shift:] = x[-1]
|
156
|
+
|
157
|
+
elif self.fill_method == "linear":
|
158
|
+
if shift > 0:
|
159
|
+
x_ = np.arange(5)
|
160
|
+
coeffs = poly.polyfit(x_, x[:5], 1)
|
161
|
+
|
162
|
+
extrapolate_x = np.arange(-shift, 0)
|
163
|
+
extrapolated_values = poly.polyval(extrapolate_x, coeffs)
|
164
|
+
|
165
|
+
result[:shift] = extrapolated_values
|
166
|
+
|
167
|
+
elif shift < 0:
|
168
|
+
x_ = np.arange(5)
|
169
|
+
coeffs = poly.polyfit(x_, x[-5:], 1)
|
170
|
+
|
171
|
+
extrapolate_x = np.arange(len(x_), len(x_) - shift)
|
172
|
+
extrapolated_values = poly.polyval(extrapolate_x, coeffs)
|
173
|
+
|
174
|
+
result[shift:] = extrapolated_values
|
175
|
+
|
176
|
+
elif self.fill_method == "quadratic":
|
177
|
+
if shift > 0:
|
178
|
+
# Use first 3 values for quadratic regression
|
179
|
+
x_ = np.arange(5)
|
180
|
+
coeffs = poly.polyfit(x_, x[:5], 2)
|
181
|
+
|
182
|
+
# Extrapolate to fill shifted region
|
183
|
+
extrapolate_x = np.arange(-shift, 0)
|
184
|
+
extrapolated_values = poly.polyval(extrapolate_x, coeffs)
|
185
|
+
|
186
|
+
result[:shift] = extrapolated_values
|
187
|
+
|
188
|
+
elif shift < 0:
|
189
|
+
# Use last 3 values for quadratic regression
|
190
|
+
x_ = np.arange(5)
|
191
|
+
coeffs = poly.polyfit(x_, x[-5:], 2)
|
192
|
+
|
193
|
+
# Extrapolate to fill shifted region
|
194
|
+
extrapolate_x = np.arange(len(x_), len(x_) - shift)
|
195
|
+
extrapolated_values = poly.polyval(extrapolate_x, coeffs)
|
196
|
+
|
197
|
+
result[shift:] = extrapolated_values
|
198
|
+
|
199
|
+
return result
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Optional
|
1
|
+
from typing import Literal, Optional
|
2
2
|
|
3
3
|
import numpy as np
|
4
4
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
@@ -46,18 +46,20 @@ class ExtendedMultiplicativeScatterCorrection(
|
|
46
46
|
model-based pre-processing, doi:10.1016/j.chemolab.2021.104350
|
47
47
|
"""
|
48
48
|
|
49
|
+
ALLOWED_METHODS = ["mean", "median"]
|
50
|
+
|
51
|
+
# TODO: Check method is valid in instantiation. Right now it is check on fit because it breaks the scikitlearn check_estimator()
|
52
|
+
|
49
53
|
def __init__(
|
50
54
|
self,
|
51
|
-
|
52
|
-
use_mean: bool = True,
|
53
|
-
use_median: bool = False,
|
55
|
+
method: Literal["mean", "median"] = "mean",
|
54
56
|
order: int = 2,
|
57
|
+
reference: Optional[np.ndarray] = None,
|
55
58
|
weights: Optional[np.ndarray] = None,
|
56
59
|
):
|
57
|
-
self.
|
58
|
-
self.use_mean = use_mean
|
59
|
-
self.use_median = use_median
|
60
|
+
self.method = method
|
60
61
|
self.order = order
|
62
|
+
self.reference = reference
|
61
63
|
self.weights = weights
|
62
64
|
|
63
65
|
def fit(self, X: np.ndarray, y=None) -> "ExtendedMultiplicativeScatterCorrection":
|
@@ -104,21 +106,24 @@ class ExtendedMultiplicativeScatterCorrection(
|
|
104
106
|
self.weights_ = np.array(self.weights)
|
105
107
|
return self
|
106
108
|
|
107
|
-
if self.
|
108
|
-
self.reference_ =
|
109
|
+
if self.method == "mean":
|
110
|
+
self.reference_ = X.mean(axis=0)
|
109
111
|
self.indices_ = self._calculate_indices(X[0])
|
110
112
|
self.A_ = self._calculate_A(self.indices_, self.reference_)
|
111
113
|
self.weights_ = np.array(self.weights)
|
112
114
|
return self
|
113
115
|
|
114
|
-
|
115
|
-
self.reference_ =
|
116
|
+
elif self.method == "median":
|
117
|
+
self.reference_ = np.median(X, axis=0)
|
116
118
|
self.indices_ = self._calculate_indices(X[0])
|
117
119
|
self.A_ = self._calculate_A(self.indices_, self.reference_)
|
118
120
|
self.weights_ = np.array(self.weights)
|
119
121
|
return self
|
120
122
|
|
121
|
-
|
123
|
+
else:
|
124
|
+
raise ValueError(
|
125
|
+
f"Invalid method: {self.method}. Must be one of {self.ALLOWED_METHODS}"
|
126
|
+
)
|
122
127
|
|
123
128
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
124
129
|
"""
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Optional
|
1
|
+
from typing import Literal, Optional
|
2
2
|
|
3
3
|
import numpy as np
|
4
4
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
@@ -37,16 +37,18 @@ class MultiplicativeScatterCorrection(
|
|
37
37
|
|
38
38
|
"""
|
39
39
|
|
40
|
+
ALLOWED_METHODS = ["mean", "median"]
|
41
|
+
|
42
|
+
# TODO: Check method is valid in instantiation. Right now it is check on fit because it breaks the scikitlearn check_estimator()
|
43
|
+
|
40
44
|
def __init__(
|
41
45
|
self,
|
46
|
+
method: Literal["mean", "median"] = "mean",
|
42
47
|
reference: Optional[np.ndarray] = None,
|
43
|
-
use_mean: bool = True,
|
44
|
-
use_median: bool = False,
|
45
48
|
weights: Optional[np.ndarray] = None,
|
46
49
|
):
|
50
|
+
self.method = method
|
47
51
|
self.reference = reference
|
48
|
-
self.use_mean = use_mean
|
49
|
-
self.use_median = use_median
|
50
52
|
self.weights = weights
|
51
53
|
|
52
54
|
def fit(self, X: np.ndarray, y=None) -> "MultiplicativeScatterCorrection":
|
@@ -91,18 +93,23 @@ class MultiplicativeScatterCorrection(
|
|
91
93
|
self.weights_ = np.array(self.weights)
|
92
94
|
return self
|
93
95
|
|
94
|
-
if self.
|
95
|
-
self.reference_ =
|
96
|
+
if self.method == "mean":
|
97
|
+
self.reference_ = X.mean(axis=0)
|
96
98
|
self.A_ = self._calculate_A(self.reference_)
|
97
99
|
self.weights_ = np.array(self.weights)
|
98
100
|
return self
|
99
101
|
|
100
|
-
|
101
|
-
self.reference_ =
|
102
|
+
elif self.method == "median":
|
103
|
+
self.reference_ = np.median(X, axis=0)
|
102
104
|
self.A_ = self._calculate_A(self.reference_)
|
103
105
|
self.weights_ = np.array(self.weights)
|
104
106
|
return self
|
105
107
|
|
108
|
+
else:
|
109
|
+
raise ValueError(
|
110
|
+
f"Invalid method: {self.method}. Must be one of {self.ALLOWED_METHODS}"
|
111
|
+
)
|
112
|
+
|
106
113
|
raise ValueError("No reference was provided")
|
107
114
|
|
108
115
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -1,11 +1,9 @@
|
|
1
1
|
chemotools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
chemotools/augmentation/__init__.py,sha256=
|
3
|
-
chemotools/augmentation/
|
4
|
-
chemotools/augmentation/
|
5
|
-
chemotools/augmentation/
|
6
|
-
chemotools/augmentation/
|
7
|
-
chemotools/augmentation/spectrum_scale.py,sha256=hMsmzXpssbI7tGm_YnQn9wjbByso3CgVxd3Hs8kfLS8,3442
|
8
|
-
chemotools/augmentation/uniform_noise.py,sha256=8a-AYzEDIkLckL6FK2i8mr_jXnQGcFaKXh_roGCICaQ,3456
|
2
|
+
chemotools/augmentation/__init__.py,sha256=_DiyO7M0xztix8Ea_esxe0xjEYHTneJVJZ52bu5WFpg,248
|
3
|
+
chemotools/augmentation/_add_noise.py,sha256=4SQFiU9Snl0Dz5EfvRjimpndlNGdXxW2ya3YplHL2fg,3502
|
4
|
+
chemotools/augmentation/_baseline_shift.py,sha256=kIlYvmKS9pu9vh_-eZ7PSHPuH_58V9mgYbSJt6Gq3BA,3476
|
5
|
+
chemotools/augmentation/_index_shift.py,sha256=w1maDHGLAKSiGAQ8c9yYHofs_PJnxeN0nB1RU-pINcE,6042
|
6
|
+
chemotools/augmentation/_spectrum_scale.py,sha256=hMsmzXpssbI7tGm_YnQn9wjbByso3CgVxd3Hs8kfLS8,3442
|
9
7
|
chemotools/baseline/__init__.py,sha256=VzoblGg8Hx_FkTc_n7a-ZjGvtKP8JE_NwJKWenGFQkM,584
|
10
8
|
chemotools/baseline/_air_pls.py,sha256=eotXuIEsus7Z-c17oLx8UbiwOHM7DzQJ6rruHnwCGPQ,5067
|
11
9
|
chemotools/baseline/_ar_pls.py,sha256=Cl0tN0DGQA8JpnbIge4cBqT7aGQ7yltppYEDI6tWqiM,4385
|
@@ -35,8 +33,8 @@ chemotools/scale/_min_max_scaler.py,sha256=YvqRkV2pXu-viQrpjzWcp9KmSSCYSoubSnrZH
|
|
35
33
|
chemotools/scale/_norm_scaler.py,sha256=CHWSir2q-pL1hxzw_ZB45yi4mw-SkJ4YOa1CUL4nm2I,2568
|
36
34
|
chemotools/scale/_point_scaler.py,sha256=je-vomAk7g3Q7yxmisQK4-3ndKEKI2wDwLrUiNuwzzA,3505
|
37
35
|
chemotools/scatter/__init__.py,sha256=ftyC_MGurzxpWMie8WlFDGh5ylalK2K3aCSN4qUzQAw,459
|
38
|
-
chemotools/scatter/_extended_multiplicative_scatter_correction.py,sha256=
|
39
|
-
chemotools/scatter/_multiplicative_scatter_correction.py,sha256=
|
36
|
+
chemotools/scatter/_extended_multiplicative_scatter_correction.py,sha256=2OitT0QBYepvigmfmfpGWOLjq9y3iycOdTt-WhqLNhs,6801
|
37
|
+
chemotools/scatter/_multiplicative_scatter_correction.py,sha256=XKa19Vk7F6-JxWPMIt7qmxdySdbliAVJwsKwPhY02O0,6097
|
40
38
|
chemotools/scatter/_robust_normal_variate.py,sha256=nPfcvjHEpwkcSCjdvD86WN9q2wVMCeZ2Z8wMzcBpM3Y,3110
|
41
39
|
chemotools/scatter/_standard_normal_variate.py,sha256=22mJzbbZoXQY-_hHAhGO0vzfYwr3oMqaR6xPjJryHtk,2582
|
42
40
|
chemotools/smooth/__init__.py,sha256=G8JvAoBK9d18-k6XgukqN6dbJP-dsEgeDdbKbZdCIkA,265
|
@@ -45,7 +43,7 @@ chemotools/smooth/_median_filter.py,sha256=9ndTJCwrZirWlvDNldiigMddy79KIGq9OwwYN
|
|
45
43
|
chemotools/smooth/_savitzky_golay_filter.py,sha256=27iFUWxdL9_7oZabR0R5L0ZTpBmYfVUjx2XCTukihBE,3509
|
46
44
|
chemotools/smooth/_whittaker_smooth.py,sha256=lpLAyf4GdyDW4ulT1nyEoK6xQEl2cVUKquawQdGWbHU,3571
|
47
45
|
chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
48
|
-
chemotools-0.1.
|
49
|
-
chemotools-0.1.
|
50
|
-
chemotools-0.1.
|
51
|
-
chemotools-0.1.
|
46
|
+
chemotools-0.1.8.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
|
47
|
+
chemotools-0.1.8.dist-info/METADATA,sha256=gK71zOTZyaFxCqjxXGGKfQi4TvN43AXhBIaWdMWVJh4,5239
|
48
|
+
chemotools-0.1.8.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
49
|
+
chemotools-0.1.8.dist-info/RECORD,,
|
@@ -1,117 +0,0 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
|
3
|
-
import numpy as np
|
4
|
-
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
|
-
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
|
-
|
7
|
-
|
8
|
-
class ExponentialNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
|
-
"""
|
10
|
-
Add exponential noise to the input data.
|
11
|
-
|
12
|
-
Parameters
|
13
|
-
----------
|
14
|
-
scale: float, default=0.0
|
15
|
-
The scale of the noise to add to the input data.
|
16
|
-
|
17
|
-
random_state : int, default=None
|
18
|
-
The random state to use for the random number generator.
|
19
|
-
|
20
|
-
Attributes
|
21
|
-
----------
|
22
|
-
n_features_in_ : int
|
23
|
-
The number of features in the input data.
|
24
|
-
|
25
|
-
_is_fitted : bool
|
26
|
-
Whether the transformer has been fitted to data.
|
27
|
-
|
28
|
-
Methods
|
29
|
-
-------
|
30
|
-
fit(X, y=None)
|
31
|
-
Fit the transformer to the input data.
|
32
|
-
|
33
|
-
transform(X, y=0, copy=True)
|
34
|
-
Transform the input data by adding random noise.
|
35
|
-
"""
|
36
|
-
|
37
|
-
def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
|
38
|
-
self.scale = scale
|
39
|
-
self.random_state = random_state
|
40
|
-
|
41
|
-
def fit(self, X: np.ndarray, y=None) -> "ExponentialNoise":
|
42
|
-
"""
|
43
|
-
Fit the transformer to the input data.
|
44
|
-
|
45
|
-
Parameters
|
46
|
-
----------
|
47
|
-
X : np.ndarray of shape (n_samples, n_features)
|
48
|
-
The input data to fit the transformer to.
|
49
|
-
|
50
|
-
y : None
|
51
|
-
Ignored.
|
52
|
-
|
53
|
-
Returns
|
54
|
-
-------
|
55
|
-
self : ExponentialNoise
|
56
|
-
The fitted transformer.
|
57
|
-
"""
|
58
|
-
# Check that X is a 2D array and has only finite values
|
59
|
-
X = validate_data(
|
60
|
-
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
61
|
-
)
|
62
|
-
# Set the number of features
|
63
|
-
self.n_features_in_ = X.shape[1]
|
64
|
-
|
65
|
-
# Set the fitted attribute to True
|
66
|
-
self._is_fitted = True
|
67
|
-
|
68
|
-
# Instantiate the random number generator
|
69
|
-
self._rng = np.random.default_rng(self.random_state)
|
70
|
-
|
71
|
-
return self
|
72
|
-
|
73
|
-
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
74
|
-
"""
|
75
|
-
Transform the input data by adding random exponential noise.
|
76
|
-
|
77
|
-
Parameters
|
78
|
-
----------
|
79
|
-
X : np.ndarray of shape (n_samples, n_features)
|
80
|
-
The input data to transform.
|
81
|
-
|
82
|
-
y : None
|
83
|
-
Ignored.
|
84
|
-
|
85
|
-
Returns
|
86
|
-
-------
|
87
|
-
X_ : np.ndarray of shape (n_samples, n_features)
|
88
|
-
The transformed data.
|
89
|
-
"""
|
90
|
-
# Check that the estimator is fitted
|
91
|
-
check_is_fitted(self, "_is_fitted")
|
92
|
-
|
93
|
-
# Check that X is a 2D array and has only finite values
|
94
|
-
X_ = validate_data(
|
95
|
-
self,
|
96
|
-
X,
|
97
|
-
y="no_validation",
|
98
|
-
ensure_2d=True,
|
99
|
-
copy=True,
|
100
|
-
reset=False,
|
101
|
-
dtype=np.float64,
|
102
|
-
)
|
103
|
-
|
104
|
-
# Check that the number of features is the same as the fitted data
|
105
|
-
if X_.shape[1] != self.n_features_in_:
|
106
|
-
raise ValueError(
|
107
|
-
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
108
|
-
)
|
109
|
-
|
110
|
-
# Calculate the standard exponential variate
|
111
|
-
for i, x in enumerate(X_):
|
112
|
-
X_[i] = self._add_random_noise(x)
|
113
|
-
|
114
|
-
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
115
|
-
|
116
|
-
def _add_random_noise(self, x) -> np.ndarray:
|
117
|
-
return x + self._rng.exponential(self.scale, size=x.shape)
|
@@ -1,124 +0,0 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
|
3
|
-
import numpy as np
|
4
|
-
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
|
-
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
|
-
|
7
|
-
|
8
|
-
class UniformNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
|
-
"""
|
10
|
-
Add uniform noise to the input data.
|
11
|
-
|
12
|
-
Parameters
|
13
|
-
----------
|
14
|
-
min : float, default=0.0
|
15
|
-
The lower bound of the uniform distribution.
|
16
|
-
|
17
|
-
max : float, default=0.0
|
18
|
-
The upper bound of the uniform distribution.
|
19
|
-
|
20
|
-
random_state : int, default=None
|
21
|
-
The random state to use for the random number generator.
|
22
|
-
|
23
|
-
Attributes
|
24
|
-
----------
|
25
|
-
n_features_in_ : int
|
26
|
-
The number of features in the input data.
|
27
|
-
|
28
|
-
_is_fitted : bool
|
29
|
-
Whether the transformer has been fitted to data.
|
30
|
-
|
31
|
-
Methods
|
32
|
-
-------
|
33
|
-
fit(X, y=None)
|
34
|
-
Fit the transformer to the input data.
|
35
|
-
|
36
|
-
transform(X, y=0, copy=True)
|
37
|
-
Transform the input data by adding random noise.
|
38
|
-
"""
|
39
|
-
|
40
|
-
def __init__(
|
41
|
-
self, min: float = 0.0, max: float = 0.0, random_state: Optional[int] = None
|
42
|
-
):
|
43
|
-
self.min = min
|
44
|
-
self.max = max
|
45
|
-
self.random_state = random_state
|
46
|
-
|
47
|
-
def fit(self, X: np.ndarray, y=None) -> "UniformNoise":
|
48
|
-
"""
|
49
|
-
Fit the transformer to the input data.
|
50
|
-
|
51
|
-
Parameters
|
52
|
-
----------
|
53
|
-
X : np.ndarray of shape (n_samples, n_features)
|
54
|
-
The input data to fit the transformer to.
|
55
|
-
|
56
|
-
y : None
|
57
|
-
Ignored.
|
58
|
-
|
59
|
-
Returns
|
60
|
-
-------
|
61
|
-
self : UniformNoise
|
62
|
-
The fitted transformer.
|
63
|
-
"""
|
64
|
-
# Check that X is a 2D array and has only finite values
|
65
|
-
X = validate_data(
|
66
|
-
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
67
|
-
)
|
68
|
-
|
69
|
-
# Set the number of features
|
70
|
-
self.n_features_in_ = X.shape[1]
|
71
|
-
|
72
|
-
# Set the fitted attribute to True
|
73
|
-
self._is_fitted = True
|
74
|
-
|
75
|
-
# Instantiate the random number generator
|
76
|
-
self._rng = np.random.default_rng(self.random_state)
|
77
|
-
|
78
|
-
return self
|
79
|
-
|
80
|
-
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
81
|
-
"""
|
82
|
-
Transform the input data by adding random uniform noise.
|
83
|
-
|
84
|
-
Parameters
|
85
|
-
----------
|
86
|
-
X : np.ndarray of shape (n_samples, n_features)
|
87
|
-
The input data to transform.
|
88
|
-
|
89
|
-
y : None
|
90
|
-
Ignored.
|
91
|
-
|
92
|
-
Returns
|
93
|
-
-------
|
94
|
-
X_ : np.ndarray of shape (n_samples, n_features)
|
95
|
-
The transformed data.
|
96
|
-
"""
|
97
|
-
# Check that the estimator is fitted
|
98
|
-
check_is_fitted(self, "_is_fitted")
|
99
|
-
|
100
|
-
# Check that X is a 2D array and has only finite values
|
101
|
-
X_ = validate_data(
|
102
|
-
self,
|
103
|
-
X,
|
104
|
-
y="no_validation",
|
105
|
-
ensure_2d=True,
|
106
|
-
copy=True,
|
107
|
-
reset=False,
|
108
|
-
dtype=np.float64,
|
109
|
-
)
|
110
|
-
|
111
|
-
# Check that the number of features is the same as the fitted data
|
112
|
-
if X_.shape[1] != self.n_features_in_:
|
113
|
-
raise ValueError(
|
114
|
-
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
115
|
-
)
|
116
|
-
|
117
|
-
# Calculate the standard uniform variate
|
118
|
-
for i, x in enumerate(X_):
|
119
|
-
X_[i] = self._add_random_noise(x)
|
120
|
-
|
121
|
-
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
122
|
-
|
123
|
-
def _add_random_noise(self, x) -> np.ndarray:
|
124
|
-
return x + self._rng.uniform(self.min, self.max, size=x.shape)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|