chemotools 0.1.8__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chemotools-0.1.8 → chemotools-0.1.9}/PKG-INFO +1 -1
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/augmentation/__init__.py +2 -0
- chemotools-0.1.9/chemotools/augmentation/_add_noise.py +135 -0
- chemotools-0.1.9/chemotools/augmentation/_fractional_shift.py +203 -0
- chemotools-0.1.9/chemotools/augmentation/_index_shift.py +214 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/pyproject.toml +1 -1
- chemotools-0.1.8/chemotools/augmentation/_add_noise.py +0 -114
- chemotools-0.1.8/chemotools/augmentation/_index_shift.py +0 -199
- {chemotools-0.1.8 → chemotools-0.1.9}/LICENSE +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/README.md +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/__init__.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/augmentation/_baseline_shift.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/augmentation/_spectrum_scale.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/baseline/__init__.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/baseline/_air_pls.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/baseline/_ar_pls.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/baseline/_constant_baseline_correction.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/baseline/_cubic_spline_correction.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/baseline/_linear_correction.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/baseline/_non_negative.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/baseline/_polynomial_correction.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/baseline/_subtract_reference.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/datasets/__init__.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/datasets/_base.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/datasets/data/__init__.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/datasets/data/coffee_labels.csv +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/datasets/data/coffee_spectra.csv +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/datasets/data/fermentation_hplc.csv +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/datasets/data/fermentation_spectra.csv +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/datasets/data/train_hplc.csv +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/datasets/data/train_spectra.csv +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/derivative/__init__.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/derivative/_norris_william.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/derivative/_savitzky_golay.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/feature_selection/__init__.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/feature_selection/_index_selector.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/feature_selection/_range_cut.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/scale/__init__.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/scale/_min_max_scaler.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/scale/_norm_scaler.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/scale/_point_scaler.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/scatter/__init__.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/scatter/_extended_multiplicative_scatter_correction.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/scatter/_multiplicative_scatter_correction.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/scatter/_robust_normal_variate.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/scatter/_standard_normal_variate.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/smooth/__init__.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/smooth/_mean_filter.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/smooth/_median_filter.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/smooth/_savitzky_golay_filter.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/smooth/_whittaker_smooth.py +0 -0
- {chemotools-0.1.8 → chemotools-0.1.9}/chemotools/utils/__init__.py +0 -0
@@ -1,5 +1,6 @@
|
|
1
1
|
from ._add_noise import AddNoise
|
2
2
|
from ._baseline_shift import BaselineShift
|
3
|
+
from ._fractional_shift import FractionalShift
|
3
4
|
from ._index_shift import IndexShift
|
4
5
|
from ._spectrum_scale import SpectrumScale
|
5
6
|
|
@@ -7,6 +8,7 @@ from ._spectrum_scale import SpectrumScale
|
|
7
8
|
__all__ = [
|
8
9
|
"AddNoise",
|
9
10
|
"BaselineShift",
|
11
|
+
"FractionalShift",
|
10
12
|
"IndexShift",
|
11
13
|
"SpectrumScale",
|
12
14
|
]
|
@@ -0,0 +1,135 @@
|
|
1
|
+
from typing import Literal, Optional
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
|
+
|
7
|
+
|
8
|
+
class AddNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
|
+
"""Add noise to input data from various probability distributions.
|
10
|
+
|
11
|
+
This transformer adds random noise from specified probability distributions
|
12
|
+
to the input data. Supported distributions include Gaussian, Poisson, and
|
13
|
+
exponential.
|
14
|
+
|
15
|
+
Parameters
|
16
|
+
----------
|
17
|
+
distribution : {'gaussian', 'poisson', 'exponential'}, default='gaussian'
|
18
|
+
The probability distribution to sample noise from.
|
19
|
+
scale : float, default=0.0
|
20
|
+
Scale parameter for the noise distribution:
|
21
|
+
- For gaussian: standard deviation
|
22
|
+
- For poisson: multiplication factor for sampled values
|
23
|
+
- For exponential: scale parameter (1/λ)
|
24
|
+
random_state : int, optional
|
25
|
+
Random seed for reproducibility.
|
26
|
+
|
27
|
+
Attributes
|
28
|
+
----------
|
29
|
+
n_features_in_ : int
|
30
|
+
Number of features in the training data.
|
31
|
+
"""
|
32
|
+
|
33
|
+
def __init__(
|
34
|
+
self,
|
35
|
+
distribution: Literal["gaussian", "poisson", "exponential"] = "gaussian",
|
36
|
+
scale: float = 0.0,
|
37
|
+
random_state: Optional[int] = None,
|
38
|
+
):
|
39
|
+
self.distribution = distribution
|
40
|
+
self.scale = scale
|
41
|
+
self.random_state = random_state
|
42
|
+
|
43
|
+
def fit(self, X: np.ndarray, y=None) -> "AddNoise":
|
44
|
+
"""Fit the transformer to the input data.
|
45
|
+
|
46
|
+
Parameters
|
47
|
+
----------
|
48
|
+
X : array-like of shape (n_samples, n_features)
|
49
|
+
Training data.
|
50
|
+
y : None
|
51
|
+
Ignored. Present for API consistency.
|
52
|
+
|
53
|
+
Returns
|
54
|
+
-------
|
55
|
+
self : AddNoise
|
56
|
+
Fitted transformer.
|
57
|
+
|
58
|
+
Raises
|
59
|
+
------
|
60
|
+
ValueError
|
61
|
+
If X is not a 2D array or contains non-finite values.
|
62
|
+
"""
|
63
|
+
|
64
|
+
# Check that X is a 2D array and has only finite values
|
65
|
+
X = validate_data(
|
66
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
67
|
+
)
|
68
|
+
|
69
|
+
# Instantiate the random number generator
|
70
|
+
self._rng = np.random.default_rng(self.random_state)
|
71
|
+
|
72
|
+
return self
|
73
|
+
|
74
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
75
|
+
"""Transform the input data by adding random noise.
|
76
|
+
|
77
|
+
Parameters
|
78
|
+
----------
|
79
|
+
X : array-like of shape (n_samples, n_features)
|
80
|
+
Input data to transform.
|
81
|
+
y : None
|
82
|
+
Ignored. Present for API consistency.
|
83
|
+
|
84
|
+
Returns
|
85
|
+
-------
|
86
|
+
X_noisy : ndarray of shape (n_samples, n_features)
|
87
|
+
Transformed data with added noise.
|
88
|
+
|
89
|
+
Raises
|
90
|
+
------
|
91
|
+
ValueError
|
92
|
+
If X has different number of features than the training data,
|
93
|
+
or if an invalid noise distribution is specified.
|
94
|
+
"""
|
95
|
+
|
96
|
+
# Check that the estimator is fitted
|
97
|
+
check_is_fitted(self, "n_features_in_")
|
98
|
+
|
99
|
+
# Check that X is a 2D array and has only finite values
|
100
|
+
X_ = validate_data(
|
101
|
+
self,
|
102
|
+
X,
|
103
|
+
y="no_validation",
|
104
|
+
ensure_2d=True,
|
105
|
+
copy=True,
|
106
|
+
reset=False,
|
107
|
+
dtype=np.float64,
|
108
|
+
)
|
109
|
+
|
110
|
+
# Select the noise function based on the selected distribution
|
111
|
+
noise_func = {
|
112
|
+
"gaussian": self._add_gaussian_noise,
|
113
|
+
"poisson": self._add_poisson_noise,
|
114
|
+
"exponential": self._add_exponential_noise,
|
115
|
+
}.get(self.distribution)
|
116
|
+
|
117
|
+
if noise_func is None:
|
118
|
+
raise ValueError(
|
119
|
+
f"Invalid noise distribution: {self.distribution}. "
|
120
|
+
"Expected one of: gaussian, poisson, exponential"
|
121
|
+
)
|
122
|
+
|
123
|
+
return noise_func(X_)
|
124
|
+
|
125
|
+
def _add_gaussian_noise(self, X: np.ndarray) -> np.ndarray:
|
126
|
+
"""Add Gaussian noise to the input array."""
|
127
|
+
return X + self._rng.normal(0, self.scale, size=X.shape)
|
128
|
+
|
129
|
+
def _add_poisson_noise(self, X: np.ndarray) -> np.ndarray:
|
130
|
+
"""Add Poisson noise to the input array."""
|
131
|
+
return X + self._rng.poisson(X, size=X.shape) * self.scale
|
132
|
+
|
133
|
+
def _add_exponential_noise(self, X: np.ndarray) -> np.ndarray:
|
134
|
+
"""Add exponential noise to the input array."""
|
135
|
+
return X + self._rng.exponential(self.scale, size=X.shape)
|
@@ -0,0 +1,203 @@
|
|
1
|
+
from typing import Literal, Optional
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from scipy.interpolate import CubicSpline
|
5
|
+
from scipy import stats
|
6
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
7
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
8
|
+
|
9
|
+
|
10
|
+
class FractionalShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
11
|
+
"""
|
12
|
+
Shift the spectrum by a fractional amount, allowing shifts below one index.
|
13
|
+
|
14
|
+
Parameters
|
15
|
+
----------
|
16
|
+
shift : float, default=0.0
|
17
|
+
Maximum amount by which the data is randomly shifted.
|
18
|
+
The actual shift is a random float between -shift and shift.
|
19
|
+
|
20
|
+
padding_mode : {'zeros', 'constant', 'wrap', 'extend', 'mirror', 'linear'}, default='linear'
|
21
|
+
Specifies how to handle padding when shifting the data:
|
22
|
+
- 'zeros': Pads with zeros.
|
23
|
+
- 'constant': Pads with a constant value defined by `pad_value`.
|
24
|
+
- 'wrap': Circular shift (wraps around).
|
25
|
+
- 'extend': Extends using edge values.
|
26
|
+
- 'mirror': Mirrors the signal.
|
27
|
+
- 'linear': Uses linear regression on 5 points to extrapolate values.
|
28
|
+
|
29
|
+
pad_value : float, default=0.0
|
30
|
+
The value used for padding when `padding_mode='constant'`.
|
31
|
+
|
32
|
+
random_state : int, optional, default=None
|
33
|
+
The random seed for reproducibility.
|
34
|
+
"""
|
35
|
+
|
36
|
+
def __init__(
|
37
|
+
self,
|
38
|
+
shift: float = 0.0,
|
39
|
+
padding_mode: Literal[
|
40
|
+
"zeros", "constant", "extend", "mirror", "linear"
|
41
|
+
] = "linear",
|
42
|
+
pad_value: float = 0.0,
|
43
|
+
random_state: Optional[int] = None,
|
44
|
+
):
|
45
|
+
self.shift = shift
|
46
|
+
self.padding_mode = padding_mode
|
47
|
+
self.pad_value = pad_value
|
48
|
+
self.random_state = random_state
|
49
|
+
|
50
|
+
def fit(self, X: np.ndarray, y=None) -> "FractionalShift":
|
51
|
+
"""
|
52
|
+
Fit the transformer to the input data.
|
53
|
+
|
54
|
+
Parameters
|
55
|
+
----------
|
56
|
+
X : np.ndarray of shape (n_samples, n_features)
|
57
|
+
The input data to fit the transformer to.
|
58
|
+
|
59
|
+
y : None
|
60
|
+
Ignored.
|
61
|
+
|
62
|
+
Returns
|
63
|
+
-------
|
64
|
+
self : FractionalShift
|
65
|
+
The fitted transformer.
|
66
|
+
"""
|
67
|
+
X = validate_data(
|
68
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
69
|
+
)
|
70
|
+
self._rng = np.random.default_rng(self.random_state)
|
71
|
+
return self
|
72
|
+
|
73
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
74
|
+
"""
|
75
|
+
Transform the input data by shifting the spectrum.
|
76
|
+
|
77
|
+
Parameters
|
78
|
+
----------
|
79
|
+
X : np.ndarray of shape (n_samples, n_features)
|
80
|
+
The input data to transform.
|
81
|
+
|
82
|
+
y : None
|
83
|
+
Ignored.
|
84
|
+
|
85
|
+
Returns
|
86
|
+
-------
|
87
|
+
X_ : np.ndarray of shape (n_samples, n_features)
|
88
|
+
The transformed data with the applied shifts.
|
89
|
+
"""
|
90
|
+
check_is_fitted(self, "n_features_in_")
|
91
|
+
X_ = validate_data(
|
92
|
+
self,
|
93
|
+
X,
|
94
|
+
y="no_validation",
|
95
|
+
ensure_2d=True,
|
96
|
+
copy=True,
|
97
|
+
reset=False,
|
98
|
+
dtype=np.float64,
|
99
|
+
)
|
100
|
+
|
101
|
+
for i, x in enumerate(X_):
|
102
|
+
X_[i] = self._shift_signal(x)
|
103
|
+
|
104
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
105
|
+
|
106
|
+
def _shift_signal(self, x: np.ndarray) -> np.ndarray:
|
107
|
+
"""
|
108
|
+
Shifts a signal by a fractional amount using cubic spline interpolation.
|
109
|
+
|
110
|
+
Parameters
|
111
|
+
----------
|
112
|
+
x : np.ndarray of shape (n_features,)
|
113
|
+
The input signal to shift.
|
114
|
+
|
115
|
+
Returns
|
116
|
+
-------
|
117
|
+
shifted_signal : np.ndarray of shape (n_features,)
|
118
|
+
The shifted signal.
|
119
|
+
"""
|
120
|
+
shift = self._rng.uniform(-self.shift, self.shift)
|
121
|
+
n = len(x)
|
122
|
+
indices = np.arange(n)
|
123
|
+
shifted_indices = indices + shift
|
124
|
+
|
125
|
+
# Create cubic spline interpolator
|
126
|
+
spline = CubicSpline(indices, x, bc_type="not-a-knot")
|
127
|
+
shifted_signal = spline(shifted_indices)
|
128
|
+
|
129
|
+
# Determine padding direction and length
|
130
|
+
if shift >= 0:
|
131
|
+
pad_length = len(shifted_indices[shifted_indices >= n - 1])
|
132
|
+
pad_left = False
|
133
|
+
else:
|
134
|
+
pad_length = len(shifted_indices[shifted_indices < 0])
|
135
|
+
pad_left = True
|
136
|
+
|
137
|
+
# Handle padding based on mode
|
138
|
+
if self.padding_mode == "zeros":
|
139
|
+
shifted_signal[shifted_indices < 0] = 0
|
140
|
+
shifted_signal[shifted_indices >= n - 1] = 0
|
141
|
+
|
142
|
+
elif self.padding_mode == "constant":
|
143
|
+
shifted_signal[shifted_indices < 0] = self.pad_value
|
144
|
+
shifted_signal[shifted_indices >= n - 1] = self.pad_value
|
145
|
+
|
146
|
+
elif self.padding_mode == "mirror":
|
147
|
+
if pad_left:
|
148
|
+
pad_values = x[pad_length - 1 :: -1]
|
149
|
+
shifted_signal[shifted_indices < 0] = pad_values[:pad_length]
|
150
|
+
else:
|
151
|
+
pad_values = x[:-1][::-1]
|
152
|
+
shifted_signal[shifted_indices >= n - 1] = pad_values[:pad_length]
|
153
|
+
|
154
|
+
elif self.padding_mode == "extend":
|
155
|
+
if pad_left:
|
156
|
+
shifted_signal[shifted_indices < 0] = x[0]
|
157
|
+
else:
|
158
|
+
shifted_signal[shifted_indices >= n - 1] = x[-1]
|
159
|
+
|
160
|
+
elif self.padding_mode == "linear":
|
161
|
+
if pad_left:
|
162
|
+
# Use first 5 points for regression
|
163
|
+
if len(x) < 5:
|
164
|
+
points = x[: len(x)] # Use all points if less than 5
|
165
|
+
else:
|
166
|
+
points = x[:5]
|
167
|
+
x_coords = np.arange(len(points))
|
168
|
+
|
169
|
+
# Reshape arrays for linregress
|
170
|
+
x_coords = x_coords.reshape(-1)
|
171
|
+
points = points.reshape(-1)
|
172
|
+
|
173
|
+
# Perform regression
|
174
|
+
slope, intercept, _, _, _ = stats.linregress(x_coords, points)
|
175
|
+
|
176
|
+
# Generate new points using linear regression
|
177
|
+
new_x = np.arange(-pad_length, 0)
|
178
|
+
extrapolated = slope * new_x + intercept
|
179
|
+
shifted_signal[shifted_indices < 0] = extrapolated
|
180
|
+
else:
|
181
|
+
# Use last 5 points for regression
|
182
|
+
if len(x) < 5:
|
183
|
+
points = x[-len(x) :] # Use all points if less than 5
|
184
|
+
else:
|
185
|
+
points = x[-5:]
|
186
|
+
x_coords = np.arange(len(points))
|
187
|
+
|
188
|
+
# Reshape arrays for linregress
|
189
|
+
x_coords = x_coords.reshape(-1)
|
190
|
+
points = points.reshape(-1)
|
191
|
+
|
192
|
+
# Perform regression
|
193
|
+
slope, intercept, _, _, _ = stats.linregress(x_coords, points)
|
194
|
+
|
195
|
+
# Generate new points using linear regression
|
196
|
+
new_x = np.arange(len(points), len(points) + pad_length)
|
197
|
+
extrapolated = slope * new_x + intercept
|
198
|
+
shifted_signal[shifted_indices >= n] = extrapolated
|
199
|
+
|
200
|
+
else:
|
201
|
+
raise ValueError(f"Unknown padding mode: {self.padding_mode}")
|
202
|
+
|
203
|
+
return shifted_signal
|
@@ -0,0 +1,214 @@
|
|
1
|
+
from typing import Literal, Optional
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from scipy.signal import convolve
|
5
|
+
from scipy import stats
|
6
|
+
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
7
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
8
|
+
|
9
|
+
|
10
|
+
class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
11
|
+
"""
|
12
|
+
Shift the spectrum a given number of indices between -shift and +shift drawn
|
13
|
+
from a discrete uniform distribution.
|
14
|
+
|
15
|
+
Parameters
|
16
|
+
----------
|
17
|
+
shift : int, default=0
|
18
|
+
Maximum number of indices by which the data is randomly shifted.
|
19
|
+
The actual shift is a random integer between -shift and shift (inclusive).
|
20
|
+
|
21
|
+
padding_mode : {'zeros', 'constant', 'wrap', 'extend', 'mirror', 'linear'}, default='linear'
|
22
|
+
Specifies how to handle padding when shifting the data:
|
23
|
+
- 'zeros': Pads with zeros.
|
24
|
+
- 'constant': Pads with a constant value defined by `pad_value`.
|
25
|
+
- 'wrap': Circular shift (wraps around).
|
26
|
+
- 'extend': Extends using edge values.
|
27
|
+
- 'mirror': Mirrors the signal.
|
28
|
+
- 'linear': Uses linear regression to extrapolate values.
|
29
|
+
|
30
|
+
pad_value : float, default=0.0
|
31
|
+
The value used for padding when `padding_mode='constant'`.
|
32
|
+
|
33
|
+
random_state : int, optional, default=None
|
34
|
+
The random seed for reproducibility.
|
35
|
+
|
36
|
+
Attributes
|
37
|
+
----------
|
38
|
+
n_features_in_ : int
|
39
|
+
The number of features in the input data.
|
40
|
+
|
41
|
+
_is_fitted : bool
|
42
|
+
Whether the transformer has been fitted to data.
|
43
|
+
|
44
|
+
_rng : numpy.random.Generator
|
45
|
+
Random number generator instance used for shifting.
|
46
|
+
"""
|
47
|
+
|
48
|
+
def __init__(
|
49
|
+
self,
|
50
|
+
shift: int = 0,
|
51
|
+
padding_mode: Literal[
|
52
|
+
"zeros", "constant", "wrap", "extend", "mirror", "linear"
|
53
|
+
] = "linear",
|
54
|
+
pad_value: float = 0.0,
|
55
|
+
random_state: Optional[int] = None,
|
56
|
+
):
|
57
|
+
self.shift = shift
|
58
|
+
self.padding_mode = padding_mode
|
59
|
+
self.pad_value = pad_value
|
60
|
+
self.random_state = random_state
|
61
|
+
|
62
|
+
def fit(self, X: np.ndarray, y=None) -> "IndexShift":
|
63
|
+
"""
|
64
|
+
Fit the transformer to the input data.
|
65
|
+
|
66
|
+
Parameters
|
67
|
+
----------
|
68
|
+
X : np.ndarray of shape (n_samples, n_features)
|
69
|
+
The input data to fit the transformer to.
|
70
|
+
|
71
|
+
y : None
|
72
|
+
Ignored.
|
73
|
+
|
74
|
+
Returns
|
75
|
+
-------
|
76
|
+
self : IndexShift
|
77
|
+
The fitted transformer.
|
78
|
+
"""
|
79
|
+
# Check that X is a 2D array and has only finite values
|
80
|
+
X = validate_data(
|
81
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
82
|
+
)
|
83
|
+
|
84
|
+
# Instantiate the random number generator
|
85
|
+
self._rng = np.random.default_rng(self.random_state)
|
86
|
+
|
87
|
+
return self
|
88
|
+
|
89
|
+
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
90
|
+
"""
|
91
|
+
Transform the input data by shifting the spectrum.
|
92
|
+
|
93
|
+
Parameters
|
94
|
+
----------
|
95
|
+
X : np.ndarray of shape (n_samples, n_features)
|
96
|
+
The input data to transform.
|
97
|
+
|
98
|
+
y : None
|
99
|
+
Ignored.
|
100
|
+
|
101
|
+
Returns
|
102
|
+
-------
|
103
|
+
X_ : np.ndarray of shape (n_samples, n_features)
|
104
|
+
The transformed data with the applied shifts.
|
105
|
+
"""
|
106
|
+
# Check that the estimator is fitted
|
107
|
+
check_is_fitted(self, "n_features_in_")
|
108
|
+
|
109
|
+
# Check that X is a 2D array and has only finite values
|
110
|
+
X_ = validate_data(
|
111
|
+
self,
|
112
|
+
X,
|
113
|
+
y="no_validation",
|
114
|
+
ensure_2d=True,
|
115
|
+
copy=True,
|
116
|
+
reset=False,
|
117
|
+
dtype=np.float64,
|
118
|
+
)
|
119
|
+
|
120
|
+
# Calculate the standard normal variate
|
121
|
+
for i, x in enumerate(X_):
|
122
|
+
X_[i] = self._shift_signal(x)
|
123
|
+
|
124
|
+
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
125
|
+
|
126
|
+
def _shift_signal(self, x: np.ndarray):
|
127
|
+
"""
|
128
|
+
Shifts a discrete signal using convolution with a Dirac delta kernel.
|
129
|
+
|
130
|
+
Parameters
|
131
|
+
----------
|
132
|
+
x : np.ndarray of shape (n_features,)
|
133
|
+
The input signal to shift.
|
134
|
+
|
135
|
+
Returns
|
136
|
+
-------
|
137
|
+
result : np.ndarray of shape (n_features,)
|
138
|
+
The shifted signal.
|
139
|
+
"""
|
140
|
+
shift = self._rng.integers(-self.shift, self.shift, endpoint=True)
|
141
|
+
|
142
|
+
if self.padding_mode == "wrap":
|
143
|
+
return np.roll(x, shift)
|
144
|
+
|
145
|
+
# Create Dirac delta kernel with proper dimensions
|
146
|
+
|
147
|
+
if shift >= 0:
|
148
|
+
kernel = np.zeros(shift + 1)
|
149
|
+
kernel[-1] = 1
|
150
|
+
else:
|
151
|
+
kernel = np.zeros(-shift + 1)
|
152
|
+
kernel[0] = 1
|
153
|
+
|
154
|
+
# Convolve signal with kernel
|
155
|
+
shifted = convolve(x, kernel, mode="full")
|
156
|
+
|
157
|
+
if shift >= 0:
|
158
|
+
result = shifted[: len(x)] if x.ndim == 1 else shifted[: x.shape[0]]
|
159
|
+
pad_length = shift
|
160
|
+
pad_left = True
|
161
|
+
else:
|
162
|
+
result = shifted[-len(x) :] if x.ndim == 1 else shifted[-x.shape[0] :]
|
163
|
+
pad_length = -shift
|
164
|
+
pad_left = False
|
165
|
+
|
166
|
+
if self.padding_mode == "zeros":
|
167
|
+
return result
|
168
|
+
|
169
|
+
elif self.padding_mode == "constant":
|
170
|
+
mask = np.abs(result) < 1e-10
|
171
|
+
result[mask] = self.pad_value
|
172
|
+
return result
|
173
|
+
|
174
|
+
elif self.padding_mode == "mirror":
|
175
|
+
if pad_left:
|
176
|
+
pad_values = x[pad_length - 1 :: -1]
|
177
|
+
result[:pad_length] = pad_values[-pad_length:]
|
178
|
+
else:
|
179
|
+
pad_values = x[:-1][::-1]
|
180
|
+
result[-pad_length:] = pad_values[:pad_length]
|
181
|
+
|
182
|
+
return result
|
183
|
+
|
184
|
+
elif self.padding_mode == "extend":
|
185
|
+
if pad_left:
|
186
|
+
result[:pad_length] = x[0]
|
187
|
+
else:
|
188
|
+
result[-pad_length:] = x[-1]
|
189
|
+
return result
|
190
|
+
|
191
|
+
elif self.padding_mode == "linear":
|
192
|
+
# Get points for linear regression
|
193
|
+
if pad_left:
|
194
|
+
points = x[: pad_length + 1] # Take first pad_length+1 points
|
195
|
+
x_coords = np.arange(len(points))
|
196
|
+
slope, intercept, _, _, _ = stats.linregress(x_coords, points)
|
197
|
+
|
198
|
+
# Generate new points using linear regression
|
199
|
+
new_x = np.arange(-pad_length, 0)
|
200
|
+
extrapolated = slope * new_x + intercept
|
201
|
+
result[:pad_length] = extrapolated
|
202
|
+
else:
|
203
|
+
points = x[-pad_length - 1 :] # Take last pad_length+1 points
|
204
|
+
x_coords = np.arange(len(points))
|
205
|
+
slope, intercept, _, _, _ = stats.linregress(x_coords, points)
|
206
|
+
|
207
|
+
# Generate new points using linear regression
|
208
|
+
new_x = np.arange(len(points), len(points) + pad_length)
|
209
|
+
extrapolated = slope * new_x + intercept
|
210
|
+
result[-pad_length:] = extrapolated
|
211
|
+
return result
|
212
|
+
|
213
|
+
else:
|
214
|
+
raise ValueError(f"Unknown padding mode: {self.padding_mode}")
|
@@ -1,114 +0,0 @@
|
|
1
|
-
from typing import Literal, Optional
|
2
|
-
|
3
|
-
import numpy as np
|
4
|
-
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
|
-
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
|
-
|
7
|
-
|
8
|
-
class AddNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
9
|
-
"""
|
10
|
-
Add normal noise to the input data.
|
11
|
-
"""
|
12
|
-
|
13
|
-
def __init__(
|
14
|
-
self,
|
15
|
-
noise_distribution: Literal["gaussian", "poisson", "exponential"] = "gaussian",
|
16
|
-
scale: float = 0.0,
|
17
|
-
random_state: Optional[int] = None,
|
18
|
-
):
|
19
|
-
self.noise_distribution = noise_distribution
|
20
|
-
self.scale = scale
|
21
|
-
self.random_state = random_state
|
22
|
-
|
23
|
-
def fit(self, X: np.ndarray, y=None) -> "AddNoise":
|
24
|
-
"""
|
25
|
-
Fit the transformer to the input data.
|
26
|
-
|
27
|
-
Parameters
|
28
|
-
----------
|
29
|
-
X : np.ndarray of shape (n_samples, n_features)
|
30
|
-
The input data to fit the transformer to.
|
31
|
-
|
32
|
-
y : None
|
33
|
-
Ignored.
|
34
|
-
|
35
|
-
Returns
|
36
|
-
-------
|
37
|
-
self : NormalNoise
|
38
|
-
The fitted transformer.
|
39
|
-
"""
|
40
|
-
# Check that X is a 2D array and has only finite values
|
41
|
-
X = validate_data(
|
42
|
-
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
43
|
-
)
|
44
|
-
|
45
|
-
# Set the number of features
|
46
|
-
self.n_features_in_ = X.shape[1]
|
47
|
-
|
48
|
-
# Set the fitted attribute to True
|
49
|
-
self._is_fitted = True
|
50
|
-
|
51
|
-
# Instantiate the random number generator
|
52
|
-
self._rng = np.random.default_rng(self.random_state)
|
53
|
-
|
54
|
-
return self
|
55
|
-
|
56
|
-
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
57
|
-
"""
|
58
|
-
Transform the input data by adding random normal noise.
|
59
|
-
|
60
|
-
Parameters
|
61
|
-
----------
|
62
|
-
X : np.ndarray of shape (n_samples, n_features)
|
63
|
-
The input data to transform.
|
64
|
-
|
65
|
-
y : None
|
66
|
-
Ignored.
|
67
|
-
|
68
|
-
Returns
|
69
|
-
-------
|
70
|
-
X_ : np.ndarray of shape (n_samples, n_features)
|
71
|
-
The transformed data.
|
72
|
-
"""
|
73
|
-
# Check that the estimator is fitted
|
74
|
-
check_is_fitted(self, "_is_fitted")
|
75
|
-
|
76
|
-
# Check that X is a 2D array and has only finite values
|
77
|
-
X_ = validate_data(
|
78
|
-
self,
|
79
|
-
X,
|
80
|
-
y="no_validation",
|
81
|
-
ensure_2d=True,
|
82
|
-
copy=True,
|
83
|
-
reset=False,
|
84
|
-
dtype=np.float64,
|
85
|
-
)
|
86
|
-
|
87
|
-
# Check that the number of features is the same as the fitted data
|
88
|
-
if X_.shape[1] != self.n_features_in_:
|
89
|
-
raise ValueError(
|
90
|
-
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
91
|
-
)
|
92
|
-
|
93
|
-
# Calculate the standard normal variate
|
94
|
-
for i, x in enumerate(X_):
|
95
|
-
match self.noise_distribution:
|
96
|
-
case "gaussian":
|
97
|
-
X_[i] = self._add_gaussian_noise(x)
|
98
|
-
case "poisson":
|
99
|
-
X_[i] = self._add_poisson_noise(x)
|
100
|
-
case "exponential":
|
101
|
-
X_[i] = self._add_exponential_noise(x)
|
102
|
-
case _:
|
103
|
-
raise ValueError("Invalid noise distribution")
|
104
|
-
|
105
|
-
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
106
|
-
|
107
|
-
def _add_gaussian_noise(self, x) -> np.ndarray:
|
108
|
-
return x + self._rng.normal(0, self.scale, size=x.shape)
|
109
|
-
|
110
|
-
def _add_poisson_noise(self, x) -> np.ndarray:
|
111
|
-
return self._rng.poisson(x, size=x.shape) * self.scale
|
112
|
-
|
113
|
-
def _add_exponential_noise(self, x) -> np.ndarray:
|
114
|
-
return x + self._rng.exponential(self.scale, size=x.shape)
|
@@ -1,199 +0,0 @@
|
|
1
|
-
from typing import Literal, Optional
|
2
|
-
|
3
|
-
import numpy as np
|
4
|
-
from numpy.polynomial import polynomial as poly
|
5
|
-
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
6
|
-
from sklearn.utils.validation import check_is_fitted, validate_data
|
7
|
-
|
8
|
-
|
9
|
-
class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
10
|
-
"""
|
11
|
-
Shift the spectrum a given number of indices between - shift and + shift drawn
|
12
|
-
from a discrete uniform distribution.
|
13
|
-
|
14
|
-
Parameters
|
15
|
-
----------
|
16
|
-
shift : float, default=0.0
|
17
|
-
Shifts the data by a random integer between -shift and shift.
|
18
|
-
|
19
|
-
random_state : int, default=None
|
20
|
-
The random state to use for the random number generator.
|
21
|
-
|
22
|
-
Attributes
|
23
|
-
----------
|
24
|
-
n_features_in_ : int
|
25
|
-
The number of features in the input data.
|
26
|
-
|
27
|
-
_is_fitted : bool
|
28
|
-
Whether the transformer has been fitted to data.
|
29
|
-
|
30
|
-
Methods
|
31
|
-
-------
|
32
|
-
fit(X, y=None)
|
33
|
-
Fit the transformer to the input data.
|
34
|
-
|
35
|
-
transform(X, y=0, copy=True)
|
36
|
-
Transform the input data by shifting the spectrum.
|
37
|
-
"""
|
38
|
-
|
39
|
-
def __init__(
|
40
|
-
self,
|
41
|
-
shift: int = 0,
|
42
|
-
fill_method: Literal["constant", "linear", "quadratic"] = "constant",
|
43
|
-
random_state: Optional[int] = None,
|
44
|
-
):
|
45
|
-
self.shift = shift
|
46
|
-
self.fill_method = fill_method
|
47
|
-
self.random_state = random_state
|
48
|
-
|
49
|
-
def fit(self, X: np.ndarray, y=None) -> "IndexShift":
|
50
|
-
"""
|
51
|
-
Fit the transformer to the input data.
|
52
|
-
|
53
|
-
Parameters
|
54
|
-
----------
|
55
|
-
X : np.ndarray of shape (n_samples, n_features)
|
56
|
-
The input data to fit the transformer to.
|
57
|
-
|
58
|
-
y : None
|
59
|
-
Ignored.
|
60
|
-
|
61
|
-
Returns
|
62
|
-
-------
|
63
|
-
self : IndexShift
|
64
|
-
The fitted transformer.
|
65
|
-
"""
|
66
|
-
# Check that X is a 2D array and has only finite values
|
67
|
-
X = validate_data(
|
68
|
-
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
69
|
-
)
|
70
|
-
|
71
|
-
# Set the number of features
|
72
|
-
self.n_features_in_ = X.shape[1]
|
73
|
-
|
74
|
-
# Set the fitted attribute to True
|
75
|
-
self._is_fitted = True
|
76
|
-
|
77
|
-
# Instantiate the random number generator
|
78
|
-
self._rng = np.random.default_rng(self.random_state)
|
79
|
-
|
80
|
-
return self
|
81
|
-
|
82
|
-
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
83
|
-
"""
|
84
|
-
Transform the input data by shifting the spectrum.
|
85
|
-
|
86
|
-
Parameters
|
87
|
-
----------
|
88
|
-
X : np.ndarray of shape (n_samples, n_features)
|
89
|
-
The input data to transform.
|
90
|
-
|
91
|
-
y : None
|
92
|
-
Ignored.
|
93
|
-
|
94
|
-
Returns
|
95
|
-
-------
|
96
|
-
X_ : np.ndarray of shape (n_samples, n_features)
|
97
|
-
The transformed data.
|
98
|
-
"""
|
99
|
-
# Check that the estimator is fitted
|
100
|
-
check_is_fitted(self, "_is_fitted")
|
101
|
-
|
102
|
-
# Check that X is a 2D array and has only finite values
|
103
|
-
X_ = validate_data(
|
104
|
-
self,
|
105
|
-
X,
|
106
|
-
y="no_validation",
|
107
|
-
ensure_2d=True,
|
108
|
-
copy=True,
|
109
|
-
reset=False,
|
110
|
-
dtype=np.float64,
|
111
|
-
)
|
112
|
-
|
113
|
-
# Check that the number of features is the same as the fitted data
|
114
|
-
if X_.shape[1] != self.n_features_in_:
|
115
|
-
raise ValueError(
|
116
|
-
f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
|
117
|
-
)
|
118
|
-
|
119
|
-
# Calculate the standard normal variate
|
120
|
-
for i, x in enumerate(X_):
|
121
|
-
X_[i] = self._shift_vector(x)
|
122
|
-
|
123
|
-
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
124
|
-
|
125
|
-
def _shift_spectrum(self, x) -> np.ndarray:
|
126
|
-
shift_amount = self._rng.integers(-self.shift, self.shift, endpoint=True)
|
127
|
-
return np.roll(x, shift_amount)
|
128
|
-
|
129
|
-
def _shift_vector(
|
130
|
-
self,
|
131
|
-
x: np.ndarray,
|
132
|
-
) -> np.ndarray:
|
133
|
-
"""
|
134
|
-
Shift vector with option to fill missing values.
|
135
|
-
|
136
|
-
Args:
|
137
|
-
arr: Input numpy array
|
138
|
-
shift: Number of positions to shift
|
139
|
-
fill_method: Method to fill missing values
|
140
|
-
'constant': fill with first/last value
|
141
|
-
'linear': fill using linear regression
|
142
|
-
'quadratic': fill using quadratic regression
|
143
|
-
|
144
|
-
Returns:
|
145
|
-
Shifted numpy array
|
146
|
-
"""
|
147
|
-
shift = self._rng.integers(-self.shift, self.shift, endpoint=True)
|
148
|
-
|
149
|
-
result = np.roll(x, shift)
|
150
|
-
|
151
|
-
if self.fill_method == "constant":
|
152
|
-
if shift > 0:
|
153
|
-
result[:shift] = x[0]
|
154
|
-
elif shift < 0:
|
155
|
-
result[shift:] = x[-1]
|
156
|
-
|
157
|
-
elif self.fill_method == "linear":
|
158
|
-
if shift > 0:
|
159
|
-
x_ = np.arange(5)
|
160
|
-
coeffs = poly.polyfit(x_, x[:5], 1)
|
161
|
-
|
162
|
-
extrapolate_x = np.arange(-shift, 0)
|
163
|
-
extrapolated_values = poly.polyval(extrapolate_x, coeffs)
|
164
|
-
|
165
|
-
result[:shift] = extrapolated_values
|
166
|
-
|
167
|
-
elif shift < 0:
|
168
|
-
x_ = np.arange(5)
|
169
|
-
coeffs = poly.polyfit(x_, x[-5:], 1)
|
170
|
-
|
171
|
-
extrapolate_x = np.arange(len(x_), len(x_) - shift)
|
172
|
-
extrapolated_values = poly.polyval(extrapolate_x, coeffs)
|
173
|
-
|
174
|
-
result[shift:] = extrapolated_values
|
175
|
-
|
176
|
-
elif self.fill_method == "quadratic":
|
177
|
-
if shift > 0:
|
178
|
-
# Use first 3 values for quadratic regression
|
179
|
-
x_ = np.arange(5)
|
180
|
-
coeffs = poly.polyfit(x_, x[:5], 2)
|
181
|
-
|
182
|
-
# Extrapolate to fill shifted region
|
183
|
-
extrapolate_x = np.arange(-shift, 0)
|
184
|
-
extrapolated_values = poly.polyval(extrapolate_x, coeffs)
|
185
|
-
|
186
|
-
result[:shift] = extrapolated_values
|
187
|
-
|
188
|
-
elif shift < 0:
|
189
|
-
# Use last 3 values for quadratic regression
|
190
|
-
x_ = np.arange(5)
|
191
|
-
coeffs = poly.polyfit(x_, x[-5:], 2)
|
192
|
-
|
193
|
-
# Extrapolate to fill shifted region
|
194
|
-
extrapolate_x = np.arange(len(x_), len(x_) - shift)
|
195
|
-
extrapolated_values = poly.polyval(extrapolate_x, coeffs)
|
196
|
-
|
197
|
-
result[shift:] = extrapolated_values
|
198
|
-
|
199
|
-
return result
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{chemotools-0.1.8 → chemotools-0.1.9}/chemotools/scatter/_multiplicative_scatter_correction.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|