chemotools 0.1.7__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {chemotools-0.1.7 → chemotools-0.1.9}/PKG-INFO +1 -1
  2. chemotools-0.1.9/chemotools/augmentation/__init__.py +14 -0
  3. chemotools-0.1.9/chemotools/augmentation/_add_noise.py +135 -0
  4. chemotools-0.1.9/chemotools/augmentation/_fractional_shift.py +203 -0
  5. chemotools-0.1.9/chemotools/augmentation/_index_shift.py +214 -0
  6. {chemotools-0.1.7 → chemotools-0.1.9}/pyproject.toml +1 -1
  7. chemotools-0.1.7/chemotools/augmentation/__init__.py +0 -16
  8. chemotools-0.1.7/chemotools/augmentation/exponential_noise.py +0 -117
  9. chemotools-0.1.7/chemotools/augmentation/index_shift.py +0 -199
  10. chemotools-0.1.7/chemotools/augmentation/normal_noise.py +0 -118
  11. chemotools-0.1.7/chemotools/augmentation/uniform_noise.py +0 -124
  12. {chemotools-0.1.7 → chemotools-0.1.9}/LICENSE +0 -0
  13. {chemotools-0.1.7 → chemotools-0.1.9}/README.md +0 -0
  14. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/__init__.py +0 -0
  15. /chemotools-0.1.7/chemotools/augmentation/baseline_shift.py → /chemotools-0.1.9/chemotools/augmentation/_baseline_shift.py +0 -0
  16. /chemotools-0.1.7/chemotools/augmentation/spectrum_scale.py → /chemotools-0.1.9/chemotools/augmentation/_spectrum_scale.py +0 -0
  17. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/baseline/__init__.py +0 -0
  18. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/baseline/_air_pls.py +0 -0
  19. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/baseline/_ar_pls.py +0 -0
  20. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/baseline/_constant_baseline_correction.py +0 -0
  21. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/baseline/_cubic_spline_correction.py +0 -0
  22. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/baseline/_linear_correction.py +0 -0
  23. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/baseline/_non_negative.py +0 -0
  24. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/baseline/_polynomial_correction.py +0 -0
  25. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/baseline/_subtract_reference.py +0 -0
  26. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/datasets/__init__.py +0 -0
  27. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/datasets/_base.py +0 -0
  28. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/datasets/data/__init__.py +0 -0
  29. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/datasets/data/coffee_labels.csv +0 -0
  30. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/datasets/data/coffee_spectra.csv +0 -0
  31. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/datasets/data/fermentation_hplc.csv +0 -0
  32. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/datasets/data/fermentation_spectra.csv +0 -0
  33. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/datasets/data/train_hplc.csv +0 -0
  34. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/datasets/data/train_spectra.csv +0 -0
  35. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/derivative/__init__.py +0 -0
  36. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/derivative/_norris_william.py +0 -0
  37. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/derivative/_savitzky_golay.py +0 -0
  38. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/feature_selection/__init__.py +0 -0
  39. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/feature_selection/_index_selector.py +0 -0
  40. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/feature_selection/_range_cut.py +0 -0
  41. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/scale/__init__.py +0 -0
  42. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/scale/_min_max_scaler.py +0 -0
  43. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/scale/_norm_scaler.py +0 -0
  44. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/scale/_point_scaler.py +0 -0
  45. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/scatter/__init__.py +0 -0
  46. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/scatter/_extended_multiplicative_scatter_correction.py +0 -0
  47. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/scatter/_multiplicative_scatter_correction.py +0 -0
  48. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/scatter/_robust_normal_variate.py +0 -0
  49. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/scatter/_standard_normal_variate.py +0 -0
  50. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/smooth/__init__.py +0 -0
  51. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/smooth/_mean_filter.py +0 -0
  52. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/smooth/_median_filter.py +0 -0
  53. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/smooth/_savitzky_golay_filter.py +0 -0
  54. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/smooth/_whittaker_smooth.py +0 -0
  55. {chemotools-0.1.7 → chemotools-0.1.9}/chemotools/utils/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: chemotools
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
5
5
  License: MIT
6
6
  Author: Pau Cabaneros
@@ -0,0 +1,14 @@
1
+ from ._add_noise import AddNoise
2
+ from ._baseline_shift import BaselineShift
3
+ from ._fractional_shift import FractionalShift
4
+ from ._index_shift import IndexShift
5
+ from ._spectrum_scale import SpectrumScale
6
+
7
+
8
+ __all__ = [
9
+ "AddNoise",
10
+ "BaselineShift",
11
+ "FractionalShift",
12
+ "IndexShift",
13
+ "SpectrumScale",
14
+ ]
@@ -0,0 +1,135 @@
1
+ from typing import Literal, Optional
2
+
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
+
7
+
8
+ class AddNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
+ """Add noise to input data from various probability distributions.
10
+
11
+ This transformer adds random noise from specified probability distributions
12
+ to the input data. Supported distributions include Gaussian, Poisson, and
13
+ exponential.
14
+
15
+ Parameters
16
+ ----------
17
+ distribution : {'gaussian', 'poisson', 'exponential'}, default='gaussian'
18
+ The probability distribution to sample noise from.
19
+ scale : float, default=0.0
20
+ Scale parameter for the noise distribution:
21
+ - For gaussian: standard deviation
22
+ - For poisson: multiplication factor for sampled values
23
+ - For exponential: scale parameter (1/λ)
24
+ random_state : int, optional
25
+ Random seed for reproducibility.
26
+
27
+ Attributes
28
+ ----------
29
+ n_features_in_ : int
30
+ Number of features in the training data.
31
+ """
32
+
33
+ def __init__(
34
+ self,
35
+ distribution: Literal["gaussian", "poisson", "exponential"] = "gaussian",
36
+ scale: float = 0.0,
37
+ random_state: Optional[int] = None,
38
+ ):
39
+ self.distribution = distribution
40
+ self.scale = scale
41
+ self.random_state = random_state
42
+
43
+ def fit(self, X: np.ndarray, y=None) -> "AddNoise":
44
+ """Fit the transformer to the input data.
45
+
46
+ Parameters
47
+ ----------
48
+ X : array-like of shape (n_samples, n_features)
49
+ Training data.
50
+ y : None
51
+ Ignored. Present for API consistency.
52
+
53
+ Returns
54
+ -------
55
+ self : AddNoise
56
+ Fitted transformer.
57
+
58
+ Raises
59
+ ------
60
+ ValueError
61
+ If X is not a 2D array or contains non-finite values.
62
+ """
63
+
64
+ # Check that X is a 2D array and has only finite values
65
+ X = validate_data(
66
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
67
+ )
68
+
69
+ # Instantiate the random number generator
70
+ self._rng = np.random.default_rng(self.random_state)
71
+
72
+ return self
73
+
74
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
75
+ """Transform the input data by adding random noise.
76
+
77
+ Parameters
78
+ ----------
79
+ X : array-like of shape (n_samples, n_features)
80
+ Input data to transform.
81
+ y : None
82
+ Ignored. Present for API consistency.
83
+
84
+ Returns
85
+ -------
86
+ X_noisy : ndarray of shape (n_samples, n_features)
87
+ Transformed data with added noise.
88
+
89
+ Raises
90
+ ------
91
+ ValueError
92
+ If X has different number of features than the training data,
93
+ or if an invalid noise distribution is specified.
94
+ """
95
+
96
+ # Check that the estimator is fitted
97
+ check_is_fitted(self, "n_features_in_")
98
+
99
+ # Check that X is a 2D array and has only finite values
100
+ X_ = validate_data(
101
+ self,
102
+ X,
103
+ y="no_validation",
104
+ ensure_2d=True,
105
+ copy=True,
106
+ reset=False,
107
+ dtype=np.float64,
108
+ )
109
+
110
+ # Select the noise function based on the selected distribution
111
+ noise_func = {
112
+ "gaussian": self._add_gaussian_noise,
113
+ "poisson": self._add_poisson_noise,
114
+ "exponential": self._add_exponential_noise,
115
+ }.get(self.distribution)
116
+
117
+ if noise_func is None:
118
+ raise ValueError(
119
+ f"Invalid noise distribution: {self.distribution}. "
120
+ "Expected one of: gaussian, poisson, exponential"
121
+ )
122
+
123
+ return noise_func(X_)
124
+
125
+ def _add_gaussian_noise(self, X: np.ndarray) -> np.ndarray:
126
+ """Add Gaussian noise to the input array."""
127
+ return X + self._rng.normal(0, self.scale, size=X.shape)
128
+
129
+ def _add_poisson_noise(self, X: np.ndarray) -> np.ndarray:
130
+ """Add Poisson noise to the input array."""
131
+ return X + self._rng.poisson(X, size=X.shape) * self.scale
132
+
133
+ def _add_exponential_noise(self, X: np.ndarray) -> np.ndarray:
134
+ """Add exponential noise to the input array."""
135
+ return X + self._rng.exponential(self.scale, size=X.shape)
@@ -0,0 +1,203 @@
1
+ from typing import Literal, Optional
2
+
3
+ import numpy as np
4
+ from scipy.interpolate import CubicSpline
5
+ from scipy import stats
6
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
7
+ from sklearn.utils.validation import check_is_fitted, validate_data
8
+
9
+
10
+ class FractionalShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
11
+ """
12
+ Shift the spectrum by a fractional amount, allowing shifts below one index.
13
+
14
+ Parameters
15
+ ----------
16
+ shift : float, default=0.0
17
+ Maximum amount by which the data is randomly shifted.
18
+ The actual shift is a random float between -shift and shift.
19
+
20
+ padding_mode : {'zeros', 'constant', 'wrap', 'extend', 'mirror', 'linear'}, default='linear'
21
+ Specifies how to handle padding when shifting the data:
22
+ - 'zeros': Pads with zeros.
23
+ - 'constant': Pads with a constant value defined by `pad_value`.
24
+ - 'wrap': Circular shift (wraps around).
25
+ - 'extend': Extends using edge values.
26
+ - 'mirror': Mirrors the signal.
27
+ - 'linear': Uses linear regression on 5 points to extrapolate values.
28
+
29
+ pad_value : float, default=0.0
30
+ The value used for padding when `padding_mode='constant'`.
31
+
32
+ random_state : int, optional, default=None
33
+ The random seed for reproducibility.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ shift: float = 0.0,
39
+ padding_mode: Literal[
40
+ "zeros", "constant", "extend", "mirror", "linear"
41
+ ] = "linear",
42
+ pad_value: float = 0.0,
43
+ random_state: Optional[int] = None,
44
+ ):
45
+ self.shift = shift
46
+ self.padding_mode = padding_mode
47
+ self.pad_value = pad_value
48
+ self.random_state = random_state
49
+
50
+ def fit(self, X: np.ndarray, y=None) -> "FractionalShift":
51
+ """
52
+ Fit the transformer to the input data.
53
+
54
+ Parameters
55
+ ----------
56
+ X : np.ndarray of shape (n_samples, n_features)
57
+ The input data to fit the transformer to.
58
+
59
+ y : None
60
+ Ignored.
61
+
62
+ Returns
63
+ -------
64
+ self : FractionalShift
65
+ The fitted transformer.
66
+ """
67
+ X = validate_data(
68
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
69
+ )
70
+ self._rng = np.random.default_rng(self.random_state)
71
+ return self
72
+
73
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
74
+ """
75
+ Transform the input data by shifting the spectrum.
76
+
77
+ Parameters
78
+ ----------
79
+ X : np.ndarray of shape (n_samples, n_features)
80
+ The input data to transform.
81
+
82
+ y : None
83
+ Ignored.
84
+
85
+ Returns
86
+ -------
87
+ X_ : np.ndarray of shape (n_samples, n_features)
88
+ The transformed data with the applied shifts.
89
+ """
90
+ check_is_fitted(self, "n_features_in_")
91
+ X_ = validate_data(
92
+ self,
93
+ X,
94
+ y="no_validation",
95
+ ensure_2d=True,
96
+ copy=True,
97
+ reset=False,
98
+ dtype=np.float64,
99
+ )
100
+
101
+ for i, x in enumerate(X_):
102
+ X_[i] = self._shift_signal(x)
103
+
104
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
105
+
106
+ def _shift_signal(self, x: np.ndarray) -> np.ndarray:
107
+ """
108
+ Shifts a signal by a fractional amount using cubic spline interpolation.
109
+
110
+ Parameters
111
+ ----------
112
+ x : np.ndarray of shape (n_features,)
113
+ The input signal to shift.
114
+
115
+ Returns
116
+ -------
117
+ shifted_signal : np.ndarray of shape (n_features,)
118
+ The shifted signal.
119
+ """
120
+ shift = self._rng.uniform(-self.shift, self.shift)
121
+ n = len(x)
122
+ indices = np.arange(n)
123
+ shifted_indices = indices + shift
124
+
125
+ # Create cubic spline interpolator
126
+ spline = CubicSpline(indices, x, bc_type="not-a-knot")
127
+ shifted_signal = spline(shifted_indices)
128
+
129
+ # Determine padding direction and length
130
+ if shift >= 0:
131
+ pad_length = len(shifted_indices[shifted_indices >= n - 1])
132
+ pad_left = False
133
+ else:
134
+ pad_length = len(shifted_indices[shifted_indices < 0])
135
+ pad_left = True
136
+
137
+ # Handle padding based on mode
138
+ if self.padding_mode == "zeros":
139
+ shifted_signal[shifted_indices < 0] = 0
140
+ shifted_signal[shifted_indices >= n - 1] = 0
141
+
142
+ elif self.padding_mode == "constant":
143
+ shifted_signal[shifted_indices < 0] = self.pad_value
144
+ shifted_signal[shifted_indices >= n - 1] = self.pad_value
145
+
146
+ elif self.padding_mode == "mirror":
147
+ if pad_left:
148
+ pad_values = x[pad_length - 1 :: -1]
149
+ shifted_signal[shifted_indices < 0] = pad_values[:pad_length]
150
+ else:
151
+ pad_values = x[:-1][::-1]
152
+ shifted_signal[shifted_indices >= n - 1] = pad_values[:pad_length]
153
+
154
+ elif self.padding_mode == "extend":
155
+ if pad_left:
156
+ shifted_signal[shifted_indices < 0] = x[0]
157
+ else:
158
+ shifted_signal[shifted_indices >= n - 1] = x[-1]
159
+
160
+ elif self.padding_mode == "linear":
161
+ if pad_left:
162
+ # Use first 5 points for regression
163
+ if len(x) < 5:
164
+ points = x[: len(x)] # Use all points if less than 5
165
+ else:
166
+ points = x[:5]
167
+ x_coords = np.arange(len(points))
168
+
169
+ # Reshape arrays for linregress
170
+ x_coords = x_coords.reshape(-1)
171
+ points = points.reshape(-1)
172
+
173
+ # Perform regression
174
+ slope, intercept, _, _, _ = stats.linregress(x_coords, points)
175
+
176
+ # Generate new points using linear regression
177
+ new_x = np.arange(-pad_length, 0)
178
+ extrapolated = slope * new_x + intercept
179
+ shifted_signal[shifted_indices < 0] = extrapolated
180
+ else:
181
+ # Use last 5 points for regression
182
+ if len(x) < 5:
183
+ points = x[-len(x) :] # Use all points if less than 5
184
+ else:
185
+ points = x[-5:]
186
+ x_coords = np.arange(len(points))
187
+
188
+ # Reshape arrays for linregress
189
+ x_coords = x_coords.reshape(-1)
190
+ points = points.reshape(-1)
191
+
192
+ # Perform regression
193
+ slope, intercept, _, _, _ = stats.linregress(x_coords, points)
194
+
195
+ # Generate new points using linear regression
196
+ new_x = np.arange(len(points), len(points) + pad_length)
197
+ extrapolated = slope * new_x + intercept
198
+ shifted_signal[shifted_indices >= n] = extrapolated
199
+
200
+ else:
201
+ raise ValueError(f"Unknown padding mode: {self.padding_mode}")
202
+
203
+ return shifted_signal
@@ -0,0 +1,214 @@
1
+ from typing import Literal, Optional
2
+
3
+ import numpy as np
4
+ from scipy.signal import convolve
5
+ from scipy import stats
6
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
7
+ from sklearn.utils.validation import check_is_fitted, validate_data
8
+
9
+
10
+ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
11
+ """
12
+ Shift the spectrum a given number of indices between -shift and +shift drawn
13
+ from a discrete uniform distribution.
14
+
15
+ Parameters
16
+ ----------
17
+ shift : int, default=0
18
+ Maximum number of indices by which the data is randomly shifted.
19
+ The actual shift is a random integer between -shift and shift (inclusive).
20
+
21
+ padding_mode : {'zeros', 'constant', 'wrap', 'extend', 'mirror', 'linear'}, default='linear'
22
+ Specifies how to handle padding when shifting the data:
23
+ - 'zeros': Pads with zeros.
24
+ - 'constant': Pads with a constant value defined by `pad_value`.
25
+ - 'wrap': Circular shift (wraps around).
26
+ - 'extend': Extends using edge values.
27
+ - 'mirror': Mirrors the signal.
28
+ - 'linear': Uses linear regression to extrapolate values.
29
+
30
+ pad_value : float, default=0.0
31
+ The value used for padding when `padding_mode='constant'`.
32
+
33
+ random_state : int, optional, default=None
34
+ The random seed for reproducibility.
35
+
36
+ Attributes
37
+ ----------
38
+ n_features_in_ : int
39
+ The number of features in the input data.
40
+
41
+ _is_fitted : bool
42
+ Whether the transformer has been fitted to data.
43
+
44
+ _rng : numpy.random.Generator
45
+ Random number generator instance used for shifting.
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ shift: int = 0,
51
+ padding_mode: Literal[
52
+ "zeros", "constant", "wrap", "extend", "mirror", "linear"
53
+ ] = "linear",
54
+ pad_value: float = 0.0,
55
+ random_state: Optional[int] = None,
56
+ ):
57
+ self.shift = shift
58
+ self.padding_mode = padding_mode
59
+ self.pad_value = pad_value
60
+ self.random_state = random_state
61
+
62
+ def fit(self, X: np.ndarray, y=None) -> "IndexShift":
63
+ """
64
+ Fit the transformer to the input data.
65
+
66
+ Parameters
67
+ ----------
68
+ X : np.ndarray of shape (n_samples, n_features)
69
+ The input data to fit the transformer to.
70
+
71
+ y : None
72
+ Ignored.
73
+
74
+ Returns
75
+ -------
76
+ self : IndexShift
77
+ The fitted transformer.
78
+ """
79
+ # Check that X is a 2D array and has only finite values
80
+ X = validate_data(
81
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
82
+ )
83
+
84
+ # Instantiate the random number generator
85
+ self._rng = np.random.default_rng(self.random_state)
86
+
87
+ return self
88
+
89
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
90
+ """
91
+ Transform the input data by shifting the spectrum.
92
+
93
+ Parameters
94
+ ----------
95
+ X : np.ndarray of shape (n_samples, n_features)
96
+ The input data to transform.
97
+
98
+ y : None
99
+ Ignored.
100
+
101
+ Returns
102
+ -------
103
+ X_ : np.ndarray of shape (n_samples, n_features)
104
+ The transformed data with the applied shifts.
105
+ """
106
+ # Check that the estimator is fitted
107
+ check_is_fitted(self, "n_features_in_")
108
+
109
+ # Check that X is a 2D array and has only finite values
110
+ X_ = validate_data(
111
+ self,
112
+ X,
113
+ y="no_validation",
114
+ ensure_2d=True,
115
+ copy=True,
116
+ reset=False,
117
+ dtype=np.float64,
118
+ )
119
+
120
+ # Calculate the standard normal variate
121
+ for i, x in enumerate(X_):
122
+ X_[i] = self._shift_signal(x)
123
+
124
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
125
+
126
+ def _shift_signal(self, x: np.ndarray):
127
+ """
128
+ Shifts a discrete signal using convolution with a Dirac delta kernel.
129
+
130
+ Parameters
131
+ ----------
132
+ x : np.ndarray of shape (n_features,)
133
+ The input signal to shift.
134
+
135
+ Returns
136
+ -------
137
+ result : np.ndarray of shape (n_features,)
138
+ The shifted signal.
139
+ """
140
+ shift = self._rng.integers(-self.shift, self.shift, endpoint=True)
141
+
142
+ if self.padding_mode == "wrap":
143
+ return np.roll(x, shift)
144
+
145
+ # Create Dirac delta kernel with proper dimensions
146
+
147
+ if shift >= 0:
148
+ kernel = np.zeros(shift + 1)
149
+ kernel[-1] = 1
150
+ else:
151
+ kernel = np.zeros(-shift + 1)
152
+ kernel[0] = 1
153
+
154
+ # Convolve signal with kernel
155
+ shifted = convolve(x, kernel, mode="full")
156
+
157
+ if shift >= 0:
158
+ result = shifted[: len(x)] if x.ndim == 1 else shifted[: x.shape[0]]
159
+ pad_length = shift
160
+ pad_left = True
161
+ else:
162
+ result = shifted[-len(x) :] if x.ndim == 1 else shifted[-x.shape[0] :]
163
+ pad_length = -shift
164
+ pad_left = False
165
+
166
+ if self.padding_mode == "zeros":
167
+ return result
168
+
169
+ elif self.padding_mode == "constant":
170
+ mask = np.abs(result) < 1e-10
171
+ result[mask] = self.pad_value
172
+ return result
173
+
174
+ elif self.padding_mode == "mirror":
175
+ if pad_left:
176
+ pad_values = x[pad_length - 1 :: -1]
177
+ result[:pad_length] = pad_values[-pad_length:]
178
+ else:
179
+ pad_values = x[:-1][::-1]
180
+ result[-pad_length:] = pad_values[:pad_length]
181
+
182
+ return result
183
+
184
+ elif self.padding_mode == "extend":
185
+ if pad_left:
186
+ result[:pad_length] = x[0]
187
+ else:
188
+ result[-pad_length:] = x[-1]
189
+ return result
190
+
191
+ elif self.padding_mode == "linear":
192
+ # Get points for linear regression
193
+ if pad_left:
194
+ points = x[: pad_length + 1] # Take first pad_length+1 points
195
+ x_coords = np.arange(len(points))
196
+ slope, intercept, _, _, _ = stats.linregress(x_coords, points)
197
+
198
+ # Generate new points using linear regression
199
+ new_x = np.arange(-pad_length, 0)
200
+ extrapolated = slope * new_x + intercept
201
+ result[:pad_length] = extrapolated
202
+ else:
203
+ points = x[-pad_length - 1 :] # Take last pad_length+1 points
204
+ x_coords = np.arange(len(points))
205
+ slope, intercept, _, _, _ = stats.linregress(x_coords, points)
206
+
207
+ # Generate new points using linear regression
208
+ new_x = np.arange(len(points), len(points) + pad_length)
209
+ extrapolated = slope * new_x + intercept
210
+ result[-pad_length:] = extrapolated
211
+ return result
212
+
213
+ else:
214
+ raise ValueError(f"Unknown padding mode: {self.padding_mode}")
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "chemotools"
3
- version = "0.1.7"
3
+ version = "0.1.9"
4
4
  description = "chemotools: A Python Package that Integrates Chemometrics and scikit-learn"
5
5
  authors = ["Pau Cabaneros"]
6
6
  license = "MIT License"
@@ -1,16 +0,0 @@
1
- from .baseline_shift import BaselineShift
2
- from .exponential_noise import ExponentialNoise
3
- from .normal_noise import NormalNoise
4
- from .index_shift import IndexShift
5
- from .spectrum_scale import SpectrumScale
6
- from .uniform_noise import UniformNoise
7
-
8
-
9
- __all__ = [
10
- "BaselineShift",
11
- "ExponentialNoise",
12
- "NormalNoise",
13
- "IndexShift",
14
- "SpectrumScale",
15
- "UniformNoise",
16
- ]
@@ -1,117 +0,0 @@
1
- from typing import Optional
2
-
3
- import numpy as np
4
- from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
- from sklearn.utils.validation import check_is_fitted, validate_data
6
-
7
-
8
- class ExponentialNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
- """
10
- Add exponential noise to the input data.
11
-
12
- Parameters
13
- ----------
14
- scale: float, default=0.0
15
- The scale of the noise to add to the input data.
16
-
17
- random_state : int, default=None
18
- The random state to use for the random number generator.
19
-
20
- Attributes
21
- ----------
22
- n_features_in_ : int
23
- The number of features in the input data.
24
-
25
- _is_fitted : bool
26
- Whether the transformer has been fitted to data.
27
-
28
- Methods
29
- -------
30
- fit(X, y=None)
31
- Fit the transformer to the input data.
32
-
33
- transform(X, y=0, copy=True)
34
- Transform the input data by adding random noise.
35
- """
36
-
37
- def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
38
- self.scale = scale
39
- self.random_state = random_state
40
-
41
- def fit(self, X: np.ndarray, y=None) -> "ExponentialNoise":
42
- """
43
- Fit the transformer to the input data.
44
-
45
- Parameters
46
- ----------
47
- X : np.ndarray of shape (n_samples, n_features)
48
- The input data to fit the transformer to.
49
-
50
- y : None
51
- Ignored.
52
-
53
- Returns
54
- -------
55
- self : ExponentialNoise
56
- The fitted transformer.
57
- """
58
- # Check that X is a 2D array and has only finite values
59
- X = validate_data(
60
- self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
61
- )
62
- # Set the number of features
63
- self.n_features_in_ = X.shape[1]
64
-
65
- # Set the fitted attribute to True
66
- self._is_fitted = True
67
-
68
- # Instantiate the random number generator
69
- self._rng = np.random.default_rng(self.random_state)
70
-
71
- return self
72
-
73
- def transform(self, X: np.ndarray, y=None) -> np.ndarray:
74
- """
75
- Transform the input data by adding random exponential noise.
76
-
77
- Parameters
78
- ----------
79
- X : np.ndarray of shape (n_samples, n_features)
80
- The input data to transform.
81
-
82
- y : None
83
- Ignored.
84
-
85
- Returns
86
- -------
87
- X_ : np.ndarray of shape (n_samples, n_features)
88
- The transformed data.
89
- """
90
- # Check that the estimator is fitted
91
- check_is_fitted(self, "_is_fitted")
92
-
93
- # Check that X is a 2D array and has only finite values
94
- X_ = validate_data(
95
- self,
96
- X,
97
- y="no_validation",
98
- ensure_2d=True,
99
- copy=True,
100
- reset=False,
101
- dtype=np.float64,
102
- )
103
-
104
- # Check that the number of features is the same as the fitted data
105
- if X_.shape[1] != self.n_features_in_:
106
- raise ValueError(
107
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
108
- )
109
-
110
- # Calculate the standard exponential variate
111
- for i, x in enumerate(X_):
112
- X_[i] = self._add_random_noise(x)
113
-
114
- return X_.reshape(-1, 1) if X_.ndim == 1 else X_
115
-
116
- def _add_random_noise(self, x) -> np.ndarray:
117
- return x + self._rng.exponential(self.scale, size=x.shape)
@@ -1,199 +0,0 @@
1
- from typing import Literal, Optional
2
-
3
- import numpy as np
4
- from numpy.polynomial import polynomial as poly
5
- from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
6
- from sklearn.utils.validation import check_is_fitted, validate_data
7
-
8
-
9
- class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
10
- """
11
- Shift the spectrum a given number of indices between - shift and + shift drawn
12
- from a discrete uniform distribution.
13
-
14
- Parameters
15
- ----------
16
- shift : float, default=0.0
17
- Shifts the data by a random integer between -shift and shift.
18
-
19
- random_state : int, default=None
20
- The random state to use for the random number generator.
21
-
22
- Attributes
23
- ----------
24
- n_features_in_ : int
25
- The number of features in the input data.
26
-
27
- _is_fitted : bool
28
- Whether the transformer has been fitted to data.
29
-
30
- Methods
31
- -------
32
- fit(X, y=None)
33
- Fit the transformer to the input data.
34
-
35
- transform(X, y=0, copy=True)
36
- Transform the input data by shifting the spectrum.
37
- """
38
-
39
- def __init__(
40
- self,
41
- shift: int = 0,
42
- fill_method: Literal["constant", "linear", "quadratic"] = "constant",
43
- random_state: Optional[int] = None,
44
- ):
45
- self.shift = shift
46
- self.fill_method = fill_method
47
- self.random_state = random_state
48
-
49
- def fit(self, X: np.ndarray, y=None) -> "IndexShift":
50
- """
51
- Fit the transformer to the input data.
52
-
53
- Parameters
54
- ----------
55
- X : np.ndarray of shape (n_samples, n_features)
56
- The input data to fit the transformer to.
57
-
58
- y : None
59
- Ignored.
60
-
61
- Returns
62
- -------
63
- self : IndexShift
64
- The fitted transformer.
65
- """
66
- # Check that X is a 2D array and has only finite values
67
- X = validate_data(
68
- self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
69
- )
70
-
71
- # Set the number of features
72
- self.n_features_in_ = X.shape[1]
73
-
74
- # Set the fitted attribute to True
75
- self._is_fitted = True
76
-
77
- # Instantiate the random number generator
78
- self._rng = np.random.default_rng(self.random_state)
79
-
80
- return self
81
-
82
- def transform(self, X: np.ndarray, y=None) -> np.ndarray:
83
- """
84
- Transform the input data by shifting the spectrum.
85
-
86
- Parameters
87
- ----------
88
- X : np.ndarray of shape (n_samples, n_features)
89
- The input data to transform.
90
-
91
- y : None
92
- Ignored.
93
-
94
- Returns
95
- -------
96
- X_ : np.ndarray of shape (n_samples, n_features)
97
- The transformed data.
98
- """
99
- # Check that the estimator is fitted
100
- check_is_fitted(self, "_is_fitted")
101
-
102
- # Check that X is a 2D array and has only finite values
103
- X_ = validate_data(
104
- self,
105
- X,
106
- y="no_validation",
107
- ensure_2d=True,
108
- copy=True,
109
- reset=False,
110
- dtype=np.float64,
111
- )
112
-
113
- # Check that the number of features is the same as the fitted data
114
- if X_.shape[1] != self.n_features_in_:
115
- raise ValueError(
116
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
117
- )
118
-
119
- # Calculate the standard normal variate
120
- for i, x in enumerate(X_):
121
- X_[i] = self._shift_vector(x)
122
-
123
- return X_.reshape(-1, 1) if X_.ndim == 1 else X_
124
-
125
- def _shift_spectrum(self, x) -> np.ndarray:
126
- shift_amount = self._rng.integers(-self.shift, self.shift, endpoint=True)
127
- return np.roll(x, shift_amount)
128
-
129
- def _shift_vector(
130
- self,
131
- x: np.ndarray,
132
- ) -> np.ndarray:
133
- """
134
- Shift vector with option to fill missing values.
135
-
136
- Args:
137
- arr: Input numpy array
138
- shift: Number of positions to shift
139
- fill_method: Method to fill missing values
140
- 'constant': fill with first/last value
141
- 'linear': fill using linear regression
142
- 'quadratic': fill using quadratic regression
143
-
144
- Returns:
145
- Shifted numpy array
146
- """
147
- shift = self._rng.integers(-self.shift, self.shift, endpoint=True)
148
-
149
- result = np.roll(x, shift)
150
-
151
- if self.fill_method == "constant":
152
- if shift > 0:
153
- result[:shift] = x[0]
154
- elif shift < 0:
155
- result[shift:] = x[-1]
156
-
157
- elif self.fill_method == "linear":
158
- if shift > 0:
159
- x_ = np.arange(5)
160
- coeffs = poly.polyfit(x_, x[:5], 1)
161
-
162
- extrapolate_x = np.arange(-shift, 0)
163
- extrapolated_values = poly.polyval(extrapolate_x, coeffs)
164
-
165
- result[:shift] = extrapolated_values
166
-
167
- elif shift < 0:
168
- x_ = np.arange(5)
169
- coeffs = poly.polyfit(x_, x[-5:], 1)
170
-
171
- extrapolate_x = np.arange(len(x_), len(x_) - shift)
172
- extrapolated_values = poly.polyval(extrapolate_x, coeffs)
173
-
174
- result[shift:] = extrapolated_values
175
-
176
- elif self.fill_method == "quadratic":
177
- if shift > 0:
178
- # Use first 3 values for quadratic regression
179
- x_ = np.arange(5)
180
- coeffs = poly.polyfit(x_, x[:5], 2)
181
-
182
- # Extrapolate to fill shifted region
183
- extrapolate_x = np.arange(-shift, 0)
184
- extrapolated_values = poly.polyval(extrapolate_x, coeffs)
185
-
186
- result[:shift] = extrapolated_values
187
-
188
- elif shift < 0:
189
- # Use last 3 values for quadratic regression
190
- x_ = np.arange(5)
191
- coeffs = poly.polyfit(x_, x[-5:], 2)
192
-
193
- # Extrapolate to fill shifted region
194
- extrapolate_x = np.arange(len(x_), len(x_) - shift)
195
- extrapolated_values = poly.polyval(extrapolate_x, coeffs)
196
-
197
- result[shift:] = extrapolated_values
198
-
199
- return result
@@ -1,118 +0,0 @@
1
- from typing import Optional
2
-
3
- import numpy as np
4
- from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
- from sklearn.utils.validation import check_is_fitted, validate_data
6
-
7
-
8
- class NormalNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
- """
10
- Add normal noise to the input data.
11
-
12
- Parameters
13
- ----------
14
- scale : float, default=0.0
15
- The scale of the noise to add to the input data.
16
-
17
- random_state : int, default=None
18
- The random state to use for the random number generator.
19
-
20
- Attributes
21
- ----------
22
- n_features_in_ : int
23
- The number of features in the input data.
24
-
25
- _is_fitted : bool
26
- Whether the transformer has been fitted to data.
27
-
28
- Methods
29
- -------
30
- fit(X, y=None)
31
- Fit the transformer to the input data.
32
-
33
- transform(X, y=0, copy=True)
34
- Transform the input data by adding random noise.
35
- """
36
-
37
- def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
38
- self.scale = scale
39
- self.random_state = random_state
40
-
41
- def fit(self, X: np.ndarray, y=None) -> "NormalNoise":
42
- """
43
- Fit the transformer to the input data.
44
-
45
- Parameters
46
- ----------
47
- X : np.ndarray of shape (n_samples, n_features)
48
- The input data to fit the transformer to.
49
-
50
- y : None
51
- Ignored.
52
-
53
- Returns
54
- -------
55
- self : NormalNoise
56
- The fitted transformer.
57
- """
58
- # Check that X is a 2D array and has only finite values
59
- X = validate_data(
60
- self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
61
- )
62
-
63
- # Set the number of features
64
- self.n_features_in_ = X.shape[1]
65
-
66
- # Set the fitted attribute to True
67
- self._is_fitted = True
68
-
69
- # Instantiate the random number generator
70
- self._rng = np.random.default_rng(self.random_state)
71
-
72
- return self
73
-
74
- def transform(self, X: np.ndarray, y=None) -> np.ndarray:
75
- """
76
- Transform the input data by adding random normal noise.
77
-
78
- Parameters
79
- ----------
80
- X : np.ndarray of shape (n_samples, n_features)
81
- The input data to transform.
82
-
83
- y : None
84
- Ignored.
85
-
86
- Returns
87
- -------
88
- X_ : np.ndarray of shape (n_samples, n_features)
89
- The transformed data.
90
- """
91
- # Check that the estimator is fitted
92
- check_is_fitted(self, "_is_fitted")
93
-
94
- # Check that X is a 2D array and has only finite values
95
- X_ = validate_data(
96
- self,
97
- X,
98
- y="no_validation",
99
- ensure_2d=True,
100
- copy=True,
101
- reset=False,
102
- dtype=np.float64,
103
- )
104
-
105
- # Check that the number of features is the same as the fitted data
106
- if X_.shape[1] != self.n_features_in_:
107
- raise ValueError(
108
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
109
- )
110
-
111
- # Calculate the standard normal variate
112
- for i, x in enumerate(X_):
113
- X_[i] = self._add_random_noise(x)
114
-
115
- return X_.reshape(-1, 1) if X_.ndim == 1 else X_
116
-
117
- def _add_random_noise(self, x) -> np.ndarray:
118
- return x + self._rng.normal(0, self.scale, size=x.shape)
@@ -1,124 +0,0 @@
1
- from typing import Optional
2
-
3
- import numpy as np
4
- from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
- from sklearn.utils.validation import check_is_fitted, validate_data
6
-
7
-
8
- class UniformNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
- """
10
- Add uniform noise to the input data.
11
-
12
- Parameters
13
- ----------
14
- min : float, default=0.0
15
- The lower bound of the uniform distribution.
16
-
17
- max : float, default=0.0
18
- The upper bound of the uniform distribution.
19
-
20
- random_state : int, default=None
21
- The random state to use for the random number generator.
22
-
23
- Attributes
24
- ----------
25
- n_features_in_ : int
26
- The number of features in the input data.
27
-
28
- _is_fitted : bool
29
- Whether the transformer has been fitted to data.
30
-
31
- Methods
32
- -------
33
- fit(X, y=None)
34
- Fit the transformer to the input data.
35
-
36
- transform(X, y=0, copy=True)
37
- Transform the input data by adding random noise.
38
- """
39
-
40
- def __init__(
41
- self, min: float = 0.0, max: float = 0.0, random_state: Optional[int] = None
42
- ):
43
- self.min = min
44
- self.max = max
45
- self.random_state = random_state
46
-
47
- def fit(self, X: np.ndarray, y=None) -> "UniformNoise":
48
- """
49
- Fit the transformer to the input data.
50
-
51
- Parameters
52
- ----------
53
- X : np.ndarray of shape (n_samples, n_features)
54
- The input data to fit the transformer to.
55
-
56
- y : None
57
- Ignored.
58
-
59
- Returns
60
- -------
61
- self : UniformNoise
62
- The fitted transformer.
63
- """
64
- # Check that X is a 2D array and has only finite values
65
- X = validate_data(
66
- self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
67
- )
68
-
69
- # Set the number of features
70
- self.n_features_in_ = X.shape[1]
71
-
72
- # Set the fitted attribute to True
73
- self._is_fitted = True
74
-
75
- # Instantiate the random number generator
76
- self._rng = np.random.default_rng(self.random_state)
77
-
78
- return self
79
-
80
- def transform(self, X: np.ndarray, y=None) -> np.ndarray:
81
- """
82
- Transform the input data by adding random uniform noise.
83
-
84
- Parameters
85
- ----------
86
- X : np.ndarray of shape (n_samples, n_features)
87
- The input data to transform.
88
-
89
- y : None
90
- Ignored.
91
-
92
- Returns
93
- -------
94
- X_ : np.ndarray of shape (n_samples, n_features)
95
- The transformed data.
96
- """
97
- # Check that the estimator is fitted
98
- check_is_fitted(self, "_is_fitted")
99
-
100
- # Check that X is a 2D array and has only finite values
101
- X_ = validate_data(
102
- self,
103
- X,
104
- y="no_validation",
105
- ensure_2d=True,
106
- copy=True,
107
- reset=False,
108
- dtype=np.float64,
109
- )
110
-
111
- # Check that the number of features is the same as the fitted data
112
- if X_.shape[1] != self.n_features_in_:
113
- raise ValueError(
114
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
115
- )
116
-
117
- # Calculate the standard uniform variate
118
- for i, x in enumerate(X_):
119
- X_[i] = self._add_random_noise(x)
120
-
121
- return X_.reshape(-1, 1) if X_.ndim == 1 else X_
122
-
123
- def _add_random_noise(self, x) -> np.ndarray:
124
- return x + self._rng.uniform(self.min, self.max, size=x.shape)
File without changes
File without changes