chemotools 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,12 @@
1
- from .baseline_shift import BaselineShift
2
- from .exponential_noise import ExponentialNoise
3
- from .normal_noise import NormalNoise
4
- from .index_shift import IndexShift
5
- from .spectrum_scale import SpectrumScale
6
- from .uniform_noise import UniformNoise
1
+ from ._add_noise import AddNoise
2
+ from ._baseline_shift import BaselineShift
3
+ from ._index_shift import IndexShift
4
+ from ._spectrum_scale import SpectrumScale
7
5
 
8
6
 
9
7
  __all__ = [
8
+ "AddNoise",
10
9
  "BaselineShift",
11
- "ExponentialNoise",
12
- "NormalNoise",
13
10
  "IndexShift",
14
11
  "SpectrumScale",
15
- "UniformNoise",
16
12
  ]
@@ -1,44 +1,26 @@
1
- from typing import Optional
1
+ from typing import Literal, Optional
2
2
 
3
3
  import numpy as np
4
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
5
  from sklearn.utils.validation import check_is_fitted, validate_data
6
6
 
7
7
 
8
- class NormalNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
8
+ class AddNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
9
  """
10
10
  Add normal noise to the input data.
11
-
12
- Parameters
13
- ----------
14
- scale : float, default=0.0
15
- The scale of the noise to add to the input data.
16
-
17
- random_state : int, default=None
18
- The random state to use for the random number generator.
19
-
20
- Attributes
21
- ----------
22
- n_features_in_ : int
23
- The number of features in the input data.
24
-
25
- _is_fitted : bool
26
- Whether the transformer has been fitted to data.
27
-
28
- Methods
29
- -------
30
- fit(X, y=None)
31
- Fit the transformer to the input data.
32
-
33
- transform(X, y=0, copy=True)
34
- Transform the input data by adding random noise.
35
11
  """
36
12
 
37
- def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
13
+ def __init__(
14
+ self,
15
+ noise_distribution: Literal["gaussian", "poisson", "exponential"] = "gaussian",
16
+ scale: float = 0.0,
17
+ random_state: Optional[int] = None,
18
+ ):
19
+ self.noise_distribution = noise_distribution
38
20
  self.scale = scale
39
21
  self.random_state = random_state
40
22
 
41
- def fit(self, X: np.ndarray, y=None) -> "NormalNoise":
23
+ def fit(self, X: np.ndarray, y=None) -> "AddNoise":
42
24
  """
43
25
  Fit the transformer to the input data.
44
26
 
@@ -110,9 +92,23 @@ class NormalNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
110
92
 
111
93
  # Calculate the standard normal variate
112
94
  for i, x in enumerate(X_):
113
- X_[i] = self._add_random_noise(x)
95
+ match self.noise_distribution:
96
+ case "gaussian":
97
+ X_[i] = self._add_gaussian_noise(x)
98
+ case "poisson":
99
+ X_[i] = self._add_poisson_noise(x)
100
+ case "exponential":
101
+ X_[i] = self._add_exponential_noise(x)
102
+ case _:
103
+ raise ValueError("Invalid noise distribution")
114
104
 
115
105
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
116
106
 
117
- def _add_random_noise(self, x) -> np.ndarray:
107
+ def _add_gaussian_noise(self, x) -> np.ndarray:
118
108
  return x + self._rng.normal(0, self.scale, size=x.shape)
109
+
110
+ def _add_poisson_noise(self, x) -> np.ndarray:
111
+ return self._rng.poisson(x, size=x.shape) * self.scale
112
+
113
+ def _add_exponential_noise(self, x) -> np.ndarray:
114
+ return x + self._rng.exponential(self.scale, size=x.shape)
@@ -1,6 +1,7 @@
1
- from typing import Optional
1
+ from typing import Literal, Optional
2
2
 
3
3
  import numpy as np
4
+ from numpy.polynomial import polynomial as poly
4
5
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
6
  from sklearn.utils.validation import check_is_fitted, validate_data
6
7
 
@@ -35,8 +36,14 @@ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
35
36
  Transform the input data by shifting the spectrum.
36
37
  """
37
38
 
38
- def __init__(self, shift: int = 0, random_state: Optional[int] = None):
39
+ def __init__(
40
+ self,
41
+ shift: int = 0,
42
+ fill_method: Literal["constant", "linear", "quadratic"] = "constant",
43
+ random_state: Optional[int] = None,
44
+ ):
39
45
  self.shift = shift
46
+ self.fill_method = fill_method
40
47
  self.random_state = random_state
41
48
 
42
49
  def fit(self, X: np.ndarray, y=None) -> "IndexShift":
@@ -111,10 +118,82 @@ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
111
118
 
112
119
  # Calculate the standard normal variate
113
120
  for i, x in enumerate(X_):
114
- X_[i] = self._shift_spectrum(x)
121
+ X_[i] = self._shift_vector(x)
115
122
 
116
123
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
117
124
 
118
125
  def _shift_spectrum(self, x) -> np.ndarray:
119
126
  shift_amount = self._rng.integers(-self.shift, self.shift, endpoint=True)
120
127
  return np.roll(x, shift_amount)
128
+
129
+ def _shift_vector(
130
+ self,
131
+ x: np.ndarray,
132
+ ) -> np.ndarray:
133
+ """
134
+ Shift vector with option to fill missing values.
135
+
136
+ Args:
137
+ arr: Input numpy array
138
+ shift: Number of positions to shift
139
+ fill_method: Method to fill missing values
140
+ 'constant': fill with first/last value
141
+ 'linear': fill using linear regression
142
+ 'quadratic': fill using quadratic regression
143
+
144
+ Returns:
145
+ Shifted numpy array
146
+ """
147
+ shift = self._rng.integers(-self.shift, self.shift, endpoint=True)
148
+
149
+ result = np.roll(x, shift)
150
+
151
+ if self.fill_method == "constant":
152
+ if shift > 0:
153
+ result[:shift] = x[0]
154
+ elif shift < 0:
155
+ result[shift:] = x[-1]
156
+
157
+ elif self.fill_method == "linear":
158
+ if shift > 0:
159
+ x_ = np.arange(5)
160
+ coeffs = poly.polyfit(x_, x[:5], 1)
161
+
162
+ extrapolate_x = np.arange(-shift, 0)
163
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
164
+
165
+ result[:shift] = extrapolated_values
166
+
167
+ elif shift < 0:
168
+ x_ = np.arange(5)
169
+ coeffs = poly.polyfit(x_, x[-5:], 1)
170
+
171
+ extrapolate_x = np.arange(len(x_), len(x_) - shift)
172
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
173
+
174
+ result[shift:] = extrapolated_values
175
+
176
+ elif self.fill_method == "quadratic":
177
+ if shift > 0:
178
+ # Use first 3 values for quadratic regression
179
+ x_ = np.arange(5)
180
+ coeffs = poly.polyfit(x_, x[:5], 2)
181
+
182
+ # Extrapolate to fill shifted region
183
+ extrapolate_x = np.arange(-shift, 0)
184
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
185
+
186
+ result[:shift] = extrapolated_values
187
+
188
+ elif shift < 0:
189
+ # Use last 3 values for quadratic regression
190
+ x_ = np.arange(5)
191
+ coeffs = poly.polyfit(x_, x[-5:], 2)
192
+
193
+ # Extrapolate to fill shifted region
194
+ extrapolate_x = np.arange(len(x_), len(x_) - shift)
195
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
196
+
197
+ result[shift:] = extrapolated_values
198
+
199
+ return result
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Literal, Optional
2
2
 
3
3
  import numpy as np
4
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
@@ -46,18 +46,20 @@ class ExtendedMultiplicativeScatterCorrection(
46
46
  model-based pre-processing, doi:10.1016/j.chemolab.2021.104350
47
47
  """
48
48
 
49
+ ALLOWED_METHODS = ["mean", "median"]
50
+
51
+ # TODO: Check method is valid in instantiation. Right now it is check on fit because it breaks the scikitlearn check_estimator()
52
+
49
53
  def __init__(
50
54
  self,
51
- reference: Optional[np.ndarray] = None,
52
- use_mean: bool = True,
53
- use_median: bool = False,
55
+ method: Literal["mean", "median"] = "mean",
54
56
  order: int = 2,
57
+ reference: Optional[np.ndarray] = None,
55
58
  weights: Optional[np.ndarray] = None,
56
59
  ):
57
- self.reference = reference
58
- self.use_mean = use_mean
59
- self.use_median = use_median
60
+ self.method = method
60
61
  self.order = order
62
+ self.reference = reference
61
63
  self.weights = weights
62
64
 
63
65
  def fit(self, X: np.ndarray, y=None) -> "ExtendedMultiplicativeScatterCorrection":
@@ -104,21 +106,24 @@ class ExtendedMultiplicativeScatterCorrection(
104
106
  self.weights_ = np.array(self.weights)
105
107
  return self
106
108
 
107
- if self.use_median:
108
- self.reference_ = np.median(X, axis=0)
109
+ if self.method == "mean":
110
+ self.reference_ = X.mean(axis=0)
109
111
  self.indices_ = self._calculate_indices(X[0])
110
112
  self.A_ = self._calculate_A(self.indices_, self.reference_)
111
113
  self.weights_ = np.array(self.weights)
112
114
  return self
113
115
 
114
- if self.use_mean:
115
- self.reference_ = X.mean(axis=0)
116
+ elif self.method == "median":
117
+ self.reference_ = np.median(X, axis=0)
116
118
  self.indices_ = self._calculate_indices(X[0])
117
119
  self.A_ = self._calculate_A(self.indices_, self.reference_)
118
120
  self.weights_ = np.array(self.weights)
119
121
  return self
120
122
 
121
- raise ValueError("No reference was provided")
123
+ else:
124
+ raise ValueError(
125
+ f"Invalid method: {self.method}. Must be one of {self.ALLOWED_METHODS}"
126
+ )
122
127
 
123
128
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
124
129
  """
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Literal, Optional
2
2
 
3
3
  import numpy as np
4
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
@@ -37,16 +37,18 @@ class MultiplicativeScatterCorrection(
37
37
 
38
38
  """
39
39
 
40
+ ALLOWED_METHODS = ["mean", "median"]
41
+
42
+ # TODO: Check method is valid in instantiation. Right now it is check on fit because it breaks the scikitlearn check_estimator()
43
+
40
44
  def __init__(
41
45
  self,
46
+ method: Literal["mean", "median"] = "mean",
42
47
  reference: Optional[np.ndarray] = None,
43
- use_mean: bool = True,
44
- use_median: bool = False,
45
48
  weights: Optional[np.ndarray] = None,
46
49
  ):
50
+ self.method = method
47
51
  self.reference = reference
48
- self.use_mean = use_mean
49
- self.use_median = use_median
50
52
  self.weights = weights
51
53
 
52
54
  def fit(self, X: np.ndarray, y=None) -> "MultiplicativeScatterCorrection":
@@ -91,18 +93,23 @@ class MultiplicativeScatterCorrection(
91
93
  self.weights_ = np.array(self.weights)
92
94
  return self
93
95
 
94
- if self.use_median:
95
- self.reference_ = np.median(X, axis=0)
96
+ if self.method == "mean":
97
+ self.reference_ = X.mean(axis=0)
96
98
  self.A_ = self._calculate_A(self.reference_)
97
99
  self.weights_ = np.array(self.weights)
98
100
  return self
99
101
 
100
- if self.use_mean:
101
- self.reference_ = X.mean(axis=0)
102
+ elif self.method == "median":
103
+ self.reference_ = np.median(X, axis=0)
102
104
  self.A_ = self._calculate_A(self.reference_)
103
105
  self.weights_ = np.array(self.weights)
104
106
  return self
105
107
 
108
+ else:
109
+ raise ValueError(
110
+ f"Invalid method: {self.method}. Must be one of {self.ALLOWED_METHODS}"
111
+ )
112
+
106
113
  raise ValueError("No reference was provided")
107
114
 
108
115
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: chemotools
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
5
5
  License: MIT
6
6
  Author: Pau Cabaneros
@@ -1,11 +1,9 @@
1
1
  chemotools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- chemotools/augmentation/__init__.py,sha256=xIUoWov_aluoW5L3zpVAahyPdkWA5erApW-duzdE_9A,385
3
- chemotools/augmentation/baseline_shift.py,sha256=kIlYvmKS9pu9vh_-eZ7PSHPuH_58V9mgYbSJt6Gq3BA,3476
4
- chemotools/augmentation/exponential_noise.py,sha256=fhZ4zQGGqmW-OiSu388th6IhgXrFj1xOguqKYAgj8Y4,3348
5
- chemotools/augmentation/index_shift.py,sha256=DWVfnxCUgm2NNQfASTpqNoMkfhlW1WZT8EoWVsSSF4c,3459
6
- chemotools/augmentation/normal_noise.py,sha256=-se2Xv1pAWt9HY7H5yC4XlxRArPKZWGeTy2MdyN4lBE,3318
7
- chemotools/augmentation/spectrum_scale.py,sha256=hMsmzXpssbI7tGm_YnQn9wjbByso3CgVxd3Hs8kfLS8,3442
8
- chemotools/augmentation/uniform_noise.py,sha256=8a-AYzEDIkLckL6FK2i8mr_jXnQGcFaKXh_roGCICaQ,3456
2
+ chemotools/augmentation/__init__.py,sha256=_DiyO7M0xztix8Ea_esxe0xjEYHTneJVJZ52bu5WFpg,248
3
+ chemotools/augmentation/_add_noise.py,sha256=4SQFiU9Snl0Dz5EfvRjimpndlNGdXxW2ya3YplHL2fg,3502
4
+ chemotools/augmentation/_baseline_shift.py,sha256=kIlYvmKS9pu9vh_-eZ7PSHPuH_58V9mgYbSJt6Gq3BA,3476
5
+ chemotools/augmentation/_index_shift.py,sha256=w1maDHGLAKSiGAQ8c9yYHofs_PJnxeN0nB1RU-pINcE,6042
6
+ chemotools/augmentation/_spectrum_scale.py,sha256=hMsmzXpssbI7tGm_YnQn9wjbByso3CgVxd3Hs8kfLS8,3442
9
7
  chemotools/baseline/__init__.py,sha256=VzoblGg8Hx_FkTc_n7a-ZjGvtKP8JE_NwJKWenGFQkM,584
10
8
  chemotools/baseline/_air_pls.py,sha256=eotXuIEsus7Z-c17oLx8UbiwOHM7DzQJ6rruHnwCGPQ,5067
11
9
  chemotools/baseline/_ar_pls.py,sha256=Cl0tN0DGQA8JpnbIge4cBqT7aGQ7yltppYEDI6tWqiM,4385
@@ -35,8 +33,8 @@ chemotools/scale/_min_max_scaler.py,sha256=YvqRkV2pXu-viQrpjzWcp9KmSSCYSoubSnrZH
35
33
  chemotools/scale/_norm_scaler.py,sha256=CHWSir2q-pL1hxzw_ZB45yi4mw-SkJ4YOa1CUL4nm2I,2568
36
34
  chemotools/scale/_point_scaler.py,sha256=je-vomAk7g3Q7yxmisQK4-3ndKEKI2wDwLrUiNuwzzA,3505
37
35
  chemotools/scatter/__init__.py,sha256=ftyC_MGurzxpWMie8WlFDGh5ylalK2K3aCSN4qUzQAw,459
38
- chemotools/scatter/_extended_multiplicative_scatter_correction.py,sha256=7OpOcvWX1hlMUR18tC29pkSiADLZViDrTh-wro738E4,6560
39
- chemotools/scatter/_multiplicative_scatter_correction.py,sha256=nPMPYKHl6-U--GAuQdZL8KVNPlr3V52teUAoJ0iRs3g,5801
36
+ chemotools/scatter/_extended_multiplicative_scatter_correction.py,sha256=2OitT0QBYepvigmfmfpGWOLjq9y3iycOdTt-WhqLNhs,6801
37
+ chemotools/scatter/_multiplicative_scatter_correction.py,sha256=XKa19Vk7F6-JxWPMIt7qmxdySdbliAVJwsKwPhY02O0,6097
40
38
  chemotools/scatter/_robust_normal_variate.py,sha256=nPfcvjHEpwkcSCjdvD86WN9q2wVMCeZ2Z8wMzcBpM3Y,3110
41
39
  chemotools/scatter/_standard_normal_variate.py,sha256=22mJzbbZoXQY-_hHAhGO0vzfYwr3oMqaR6xPjJryHtk,2582
42
40
  chemotools/smooth/__init__.py,sha256=G8JvAoBK9d18-k6XgukqN6dbJP-dsEgeDdbKbZdCIkA,265
@@ -45,7 +43,7 @@ chemotools/smooth/_median_filter.py,sha256=9ndTJCwrZirWlvDNldiigMddy79KIGq9OwwYN
45
43
  chemotools/smooth/_savitzky_golay_filter.py,sha256=27iFUWxdL9_7oZabR0R5L0ZTpBmYfVUjx2XCTukihBE,3509
46
44
  chemotools/smooth/_whittaker_smooth.py,sha256=lpLAyf4GdyDW4ulT1nyEoK6xQEl2cVUKquawQdGWbHU,3571
47
45
  chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- chemotools-0.1.6.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
49
- chemotools-0.1.6.dist-info/METADATA,sha256=79TZ--QC_SOHj3ou6bDaRYsJsQoFS0sx2Rfe2BUOrG4,5239
50
- chemotools-0.1.6.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
51
- chemotools-0.1.6.dist-info/RECORD,,
46
+ chemotools-0.1.8.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
47
+ chemotools-0.1.8.dist-info/METADATA,sha256=gK71zOTZyaFxCqjxXGGKfQi4TvN43AXhBIaWdMWVJh4,5239
48
+ chemotools-0.1.8.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
49
+ chemotools-0.1.8.dist-info/RECORD,,
@@ -1,117 +0,0 @@
1
- from typing import Optional
2
-
3
- import numpy as np
4
- from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
- from sklearn.utils.validation import check_is_fitted, validate_data
6
-
7
-
8
- class ExponentialNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
- """
10
- Add exponential noise to the input data.
11
-
12
- Parameters
13
- ----------
14
- scale: float, default=0.0
15
- The scale of the noise to add to the input data.
16
-
17
- random_state : int, default=None
18
- The random state to use for the random number generator.
19
-
20
- Attributes
21
- ----------
22
- n_features_in_ : int
23
- The number of features in the input data.
24
-
25
- _is_fitted : bool
26
- Whether the transformer has been fitted to data.
27
-
28
- Methods
29
- -------
30
- fit(X, y=None)
31
- Fit the transformer to the input data.
32
-
33
- transform(X, y=0, copy=True)
34
- Transform the input data by adding random noise.
35
- """
36
-
37
- def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
38
- self.scale = scale
39
- self.random_state = random_state
40
-
41
- def fit(self, X: np.ndarray, y=None) -> "ExponentialNoise":
42
- """
43
- Fit the transformer to the input data.
44
-
45
- Parameters
46
- ----------
47
- X : np.ndarray of shape (n_samples, n_features)
48
- The input data to fit the transformer to.
49
-
50
- y : None
51
- Ignored.
52
-
53
- Returns
54
- -------
55
- self : ExponentialNoise
56
- The fitted transformer.
57
- """
58
- # Check that X is a 2D array and has only finite values
59
- X = validate_data(
60
- self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
61
- )
62
- # Set the number of features
63
- self.n_features_in_ = X.shape[1]
64
-
65
- # Set the fitted attribute to True
66
- self._is_fitted = True
67
-
68
- # Instantiate the random number generator
69
- self._rng = np.random.default_rng(self.random_state)
70
-
71
- return self
72
-
73
- def transform(self, X: np.ndarray, y=None) -> np.ndarray:
74
- """
75
- Transform the input data by adding random exponential noise.
76
-
77
- Parameters
78
- ----------
79
- X : np.ndarray of shape (n_samples, n_features)
80
- The input data to transform.
81
-
82
- y : None
83
- Ignored.
84
-
85
- Returns
86
- -------
87
- X_ : np.ndarray of shape (n_samples, n_features)
88
- The transformed data.
89
- """
90
- # Check that the estimator is fitted
91
- check_is_fitted(self, "_is_fitted")
92
-
93
- # Check that X is a 2D array and has only finite values
94
- X_ = validate_data(
95
- self,
96
- X,
97
- y="no_validation",
98
- ensure_2d=True,
99
- copy=True,
100
- reset=False,
101
- dtype=np.float64,
102
- )
103
-
104
- # Check that the number of features is the same as the fitted data
105
- if X_.shape[1] != self.n_features_in_:
106
- raise ValueError(
107
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
108
- )
109
-
110
- # Calculate the standard exponential variate
111
- for i, x in enumerate(X_):
112
- X_[i] = self._add_random_noise(x)
113
-
114
- return X_.reshape(-1, 1) if X_.ndim == 1 else X_
115
-
116
- def _add_random_noise(self, x) -> np.ndarray:
117
- return x + self._rng.exponential(self.scale, size=x.shape)
@@ -1,124 +0,0 @@
1
- from typing import Optional
2
-
3
- import numpy as np
4
- from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
- from sklearn.utils.validation import check_is_fitted, validate_data
6
-
7
-
8
- class UniformNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
- """
10
- Add uniform noise to the input data.
11
-
12
- Parameters
13
- ----------
14
- min : float, default=0.0
15
- The lower bound of the uniform distribution.
16
-
17
- max : float, default=0.0
18
- The upper bound of the uniform distribution.
19
-
20
- random_state : int, default=None
21
- The random state to use for the random number generator.
22
-
23
- Attributes
24
- ----------
25
- n_features_in_ : int
26
- The number of features in the input data.
27
-
28
- _is_fitted : bool
29
- Whether the transformer has been fitted to data.
30
-
31
- Methods
32
- -------
33
- fit(X, y=None)
34
- Fit the transformer to the input data.
35
-
36
- transform(X, y=0, copy=True)
37
- Transform the input data by adding random noise.
38
- """
39
-
40
- def __init__(
41
- self, min: float = 0.0, max: float = 0.0, random_state: Optional[int] = None
42
- ):
43
- self.min = min
44
- self.max = max
45
- self.random_state = random_state
46
-
47
- def fit(self, X: np.ndarray, y=None) -> "UniformNoise":
48
- """
49
- Fit the transformer to the input data.
50
-
51
- Parameters
52
- ----------
53
- X : np.ndarray of shape (n_samples, n_features)
54
- The input data to fit the transformer to.
55
-
56
- y : None
57
- Ignored.
58
-
59
- Returns
60
- -------
61
- self : UniformNoise
62
- The fitted transformer.
63
- """
64
- # Check that X is a 2D array and has only finite values
65
- X = validate_data(
66
- self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
67
- )
68
-
69
- # Set the number of features
70
- self.n_features_in_ = X.shape[1]
71
-
72
- # Set the fitted attribute to True
73
- self._is_fitted = True
74
-
75
- # Instantiate the random number generator
76
- self._rng = np.random.default_rng(self.random_state)
77
-
78
- return self
79
-
80
- def transform(self, X: np.ndarray, y=None) -> np.ndarray:
81
- """
82
- Transform the input data by adding random uniform noise.
83
-
84
- Parameters
85
- ----------
86
- X : np.ndarray of shape (n_samples, n_features)
87
- The input data to transform.
88
-
89
- y : None
90
- Ignored.
91
-
92
- Returns
93
- -------
94
- X_ : np.ndarray of shape (n_samples, n_features)
95
- The transformed data.
96
- """
97
- # Check that the estimator is fitted
98
- check_is_fitted(self, "_is_fitted")
99
-
100
- # Check that X is a 2D array and has only finite values
101
- X_ = validate_data(
102
- self,
103
- X,
104
- y="no_validation",
105
- ensure_2d=True,
106
- copy=True,
107
- reset=False,
108
- dtype=np.float64,
109
- )
110
-
111
- # Check that the number of features is the same as the fitted data
112
- if X_.shape[1] != self.n_features_in_:
113
- raise ValueError(
114
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
115
- )
116
-
117
- # Calculate the standard uniform variate
118
- for i, x in enumerate(X_):
119
- X_[i] = self._add_random_noise(x)
120
-
121
- return X_.reshape(-1, 1) if X_.ndim == 1 else X_
122
-
123
- def _add_random_noise(self, x) -> np.ndarray:
124
- return x + self._rng.uniform(self.min, self.max, size=x.shape)