chemotools 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  from ._add_noise import AddNoise
2
2
  from ._baseline_shift import BaselineShift
3
+ from ._fractional_shift import FractionalShift
4
+ from ._gaussian_broadening import GaussianBroadening
3
5
  from ._index_shift import IndexShift
4
6
  from ._spectrum_scale import SpectrumScale
5
7
 
@@ -7,6 +9,8 @@ from ._spectrum_scale import SpectrumScale
7
9
  __all__ = [
8
10
  "AddNoise",
9
11
  "BaselineShift",
12
+ "FractionalShift",
13
+ "GaussianBroadening",
10
14
  "IndexShift",
11
15
  "SpectrumScale",
12
16
  ]
@@ -6,72 +6,95 @@ from sklearn.utils.validation import check_is_fitted, validate_data
6
6
 
7
7
 
8
8
  class AddNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
- """
10
- Add normal noise to the input data.
9
+ """Add noise to input data from various probability distributions.
10
+
11
+ This transformer adds random noise from specified probability distributions
12
+ to the input data. Supported distributions include Gaussian, Poisson, and
13
+ exponential.
14
+
15
+ Parameters
16
+ ----------
17
+ distribution : {'gaussian', 'poisson', 'exponential'}, default='gaussian'
18
+ The probability distribution to sample noise from.
19
+ scale : float, default=0.0
20
+ Scale parameter for the noise distribution:
21
+ - For gaussian: standard deviation
22
+ - For poisson: multiplication factor for sampled values
23
+ - For exponential: scale parameter (1/λ)
24
+ random_state : int, optional
25
+ Random seed for reproducibility.
26
+
27
+ Attributes
28
+ ----------
29
+ n_features_in_ : int
30
+ Number of features in the training data.
11
31
  """
12
32
 
13
33
  def __init__(
14
34
  self,
15
- noise_distribution: Literal["gaussian", "poisson", "exponential"] = "gaussian",
35
+ distribution: Literal["gaussian", "poisson", "exponential"] = "gaussian",
16
36
  scale: float = 0.0,
17
37
  random_state: Optional[int] = None,
18
38
  ):
19
- self.noise_distribution = noise_distribution
39
+ self.distribution = distribution
20
40
  self.scale = scale
21
41
  self.random_state = random_state
22
42
 
23
43
  def fit(self, X: np.ndarray, y=None) -> "AddNoise":
24
- """
25
- Fit the transformer to the input data.
44
+ """Fit the transformer to the input data.
26
45
 
27
46
  Parameters
28
47
  ----------
29
- X : np.ndarray of shape (n_samples, n_features)
30
- The input data to fit the transformer to.
31
-
48
+ X : array-like of shape (n_samples, n_features)
49
+ Training data.
32
50
  y : None
33
- Ignored.
51
+ Ignored. Present for API consistency.
34
52
 
35
53
  Returns
36
54
  -------
37
- self : NormalNoise
38
- The fitted transformer.
55
+ self : AddNoise
56
+ Fitted transformer.
57
+
58
+ Raises
59
+ ------
60
+ ValueError
61
+ If X is not a 2D array or contains non-finite values.
39
62
  """
63
+
40
64
  # Check that X is a 2D array and has only finite values
41
65
  X = validate_data(
42
66
  self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
43
67
  )
44
68
 
45
- # Set the number of features
46
- self.n_features_in_ = X.shape[1]
47
-
48
- # Set the fitted attribute to True
49
- self._is_fitted = True
50
-
51
69
  # Instantiate the random number generator
52
70
  self._rng = np.random.default_rng(self.random_state)
53
71
 
54
72
  return self
55
73
 
56
74
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
57
- """
58
- Transform the input data by adding random normal noise.
75
+ """Transform the input data by adding random noise.
59
76
 
60
77
  Parameters
61
78
  ----------
62
- X : np.ndarray of shape (n_samples, n_features)
63
- The input data to transform.
64
-
79
+ X : array-like of shape (n_samples, n_features)
80
+ Input data to transform.
65
81
  y : None
66
- Ignored.
82
+ Ignored. Present for API consistency.
67
83
 
68
84
  Returns
69
85
  -------
70
- X_ : np.ndarray of shape (n_samples, n_features)
71
- The transformed data.
86
+ X_noisy : ndarray of shape (n_samples, n_features)
87
+ Transformed data with added noise.
88
+
89
+ Raises
90
+ ------
91
+ ValueError
92
+ If X has different number of features than the training data,
93
+ or if an invalid noise distribution is specified.
72
94
  """
95
+
73
96
  # Check that the estimator is fitted
74
- check_is_fitted(self, "_is_fitted")
97
+ check_is_fitted(self, "n_features_in_")
75
98
 
76
99
  # Check that X is a 2D array and has only finite values
77
100
  X_ = validate_data(
@@ -84,31 +107,29 @@ class AddNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
84
107
  dtype=np.float64,
85
108
  )
86
109
 
87
- # Check that the number of features is the same as the fitted data
88
- if X_.shape[1] != self.n_features_in_:
110
+ # Select the noise function based on the selected distribution
111
+ noise_func = {
112
+ "gaussian": self._add_gaussian_noise,
113
+ "poisson": self._add_poisson_noise,
114
+ "exponential": self._add_exponential_noise,
115
+ }.get(self.distribution)
116
+
117
+ if noise_func is None:
89
118
  raise ValueError(
90
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
119
+ f"Invalid noise distribution: {self.distribution}. "
120
+ "Expected one of: gaussian, poisson, exponential"
91
121
  )
92
122
 
93
- # Calculate the standard normal variate
94
- for i, x in enumerate(X_):
95
- match self.noise_distribution:
96
- case "gaussian":
97
- X_[i] = self._add_gaussian_noise(x)
98
- case "poisson":
99
- X_[i] = self._add_poisson_noise(x)
100
- case "exponential":
101
- X_[i] = self._add_exponential_noise(x)
102
- case _:
103
- raise ValueError("Invalid noise distribution")
104
-
105
- return X_.reshape(-1, 1) if X_.ndim == 1 else X_
123
+ return noise_func(X_)
106
124
 
107
- def _add_gaussian_noise(self, x) -> np.ndarray:
108
- return x + self._rng.normal(0, self.scale, size=x.shape)
125
+ def _add_gaussian_noise(self, X: np.ndarray) -> np.ndarray:
126
+ """Add Gaussian noise to the input array."""
127
+ return X + self._rng.normal(0, self.scale, size=X.shape)
109
128
 
110
- def _add_poisson_noise(self, x) -> np.ndarray:
111
- return self._rng.poisson(x, size=x.shape) * self.scale
129
+ def _add_poisson_noise(self, X: np.ndarray) -> np.ndarray:
130
+ """Add Poisson noise to the input array."""
131
+ return X + self._rng.poisson(X, size=X.shape) * self.scale
112
132
 
113
- def _add_exponential_noise(self, x) -> np.ndarray:
114
- return x + self._rng.exponential(self.scale, size=x.shape)
133
+ def _add_exponential_noise(self, X: np.ndarray) -> np.ndarray:
134
+ """Add exponential noise to the input array."""
135
+ return X + self._rng.exponential(self.scale, size=X.shape)
@@ -0,0 +1,203 @@
1
+ from typing import Literal, Optional
2
+
3
+ import numpy as np
4
+ from scipy.interpolate import CubicSpline
5
+ from scipy import stats
6
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
7
+ from sklearn.utils.validation import check_is_fitted, validate_data
8
+
9
+
10
+ class FractionalShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
11
+ """
12
+ Shift the spectrum by a fractional amount, allowing shifts below one index.
13
+
14
+ Parameters
15
+ ----------
16
+ shift : float, default=0.0
17
+ Maximum amount by which the data is randomly shifted.
18
+ The actual shift is a random float between -shift and shift.
19
+
20
+ padding_mode : {'zeros', 'constant', 'wrap', 'extend', 'mirror', 'linear'}, default='linear'
21
+ Specifies how to handle padding when shifting the data:
22
+ - 'zeros': Pads with zeros.
23
+ - 'constant': Pads with a constant value defined by `pad_value`.
24
+ - 'wrap': Circular shift (wraps around).
25
+ - 'extend': Extends using edge values.
26
+ - 'mirror': Mirrors the signal.
27
+ - 'linear': Uses linear regression on 5 points to extrapolate values.
28
+
29
+ pad_value : float, default=0.0
30
+ The value used for padding when `padding_mode='constant'`.
31
+
32
+ random_state : int, optional, default=None
33
+ The random seed for reproducibility.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ shift: float = 0.0,
39
+ padding_mode: Literal[
40
+ "zeros", "constant", "extend", "mirror", "linear"
41
+ ] = "linear",
42
+ pad_value: float = 0.0,
43
+ random_state: Optional[int] = None,
44
+ ):
45
+ self.shift = shift
46
+ self.padding_mode = padding_mode
47
+ self.pad_value = pad_value
48
+ self.random_state = random_state
49
+
50
+ def fit(self, X: np.ndarray, y=None) -> "FractionalShift":
51
+ """
52
+ Fit the transformer to the input data.
53
+
54
+ Parameters
55
+ ----------
56
+ X : np.ndarray of shape (n_samples, n_features)
57
+ The input data to fit the transformer to.
58
+
59
+ y : None
60
+ Ignored.
61
+
62
+ Returns
63
+ -------
64
+ self : FractionalShift
65
+ The fitted transformer.
66
+ """
67
+ X = validate_data(
68
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
69
+ )
70
+ self._rng = np.random.default_rng(self.random_state)
71
+ return self
72
+
73
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
74
+ """
75
+ Transform the input data by shifting the spectrum.
76
+
77
+ Parameters
78
+ ----------
79
+ X : np.ndarray of shape (n_samples, n_features)
80
+ The input data to transform.
81
+
82
+ y : None
83
+ Ignored.
84
+
85
+ Returns
86
+ -------
87
+ X_ : np.ndarray of shape (n_samples, n_features)
88
+ The transformed data with the applied shifts.
89
+ """
90
+ check_is_fitted(self, "n_features_in_")
91
+ X_ = validate_data(
92
+ self,
93
+ X,
94
+ y="no_validation",
95
+ ensure_2d=True,
96
+ copy=True,
97
+ reset=False,
98
+ dtype=np.float64,
99
+ )
100
+
101
+ for i, x in enumerate(X_):
102
+ X_[i] = self._shift_signal(x)
103
+
104
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
105
+
106
+ def _shift_signal(self, x: np.ndarray) -> np.ndarray:
107
+ """
108
+ Shifts a signal by a fractional amount using cubic spline interpolation.
109
+
110
+ Parameters
111
+ ----------
112
+ x : np.ndarray of shape (n_features,)
113
+ The input signal to shift.
114
+
115
+ Returns
116
+ -------
117
+ shifted_signal : np.ndarray of shape (n_features,)
118
+ The shifted signal.
119
+ """
120
+ shift = self._rng.uniform(-self.shift, self.shift)
121
+ n = len(x)
122
+ indices = np.arange(n)
123
+ shifted_indices = indices + shift
124
+
125
+ # Create cubic spline interpolator
126
+ spline = CubicSpline(indices, x, bc_type="not-a-knot")
127
+ shifted_signal = spline(shifted_indices)
128
+
129
+ # Determine padding direction and length
130
+ if shift >= 0:
131
+ pad_length = len(shifted_indices[shifted_indices >= n - 1])
132
+ pad_left = False
133
+ else:
134
+ pad_length = len(shifted_indices[shifted_indices < 0])
135
+ pad_left = True
136
+
137
+ # Handle padding based on mode
138
+ if self.padding_mode == "zeros":
139
+ shifted_signal[shifted_indices < 0] = 0
140
+ shifted_signal[shifted_indices >= n - 1] = 0
141
+
142
+ elif self.padding_mode == "constant":
143
+ shifted_signal[shifted_indices < 0] = self.pad_value
144
+ shifted_signal[shifted_indices >= n - 1] = self.pad_value
145
+
146
+ elif self.padding_mode == "mirror":
147
+ if pad_left:
148
+ pad_values = x[pad_length - 1 :: -1]
149
+ shifted_signal[shifted_indices < 0] = pad_values[:pad_length]
150
+ else:
151
+ pad_values = x[:-1][::-1]
152
+ shifted_signal[shifted_indices >= n - 1] = pad_values[:pad_length]
153
+
154
+ elif self.padding_mode == "extend":
155
+ if pad_left:
156
+ shifted_signal[shifted_indices < 0] = x[0]
157
+ else:
158
+ shifted_signal[shifted_indices >= n - 1] = x[-1]
159
+
160
+ elif self.padding_mode == "linear":
161
+ if pad_left:
162
+ # Use first 5 points for regression
163
+ if len(x) < 5:
164
+ points = x[: len(x)] # Use all points if less than 5
165
+ else:
166
+ points = x[:5]
167
+ x_coords = np.arange(len(points))
168
+
169
+ # Reshape arrays for linregress
170
+ x_coords = x_coords.reshape(-1)
171
+ points = points.reshape(-1)
172
+
173
+ # Perform regression
174
+ slope, intercept, _, _, _ = stats.linregress(x_coords, points)
175
+
176
+ # Generate new points using linear regression
177
+ new_x = np.arange(-pad_length, 0)
178
+ extrapolated = slope * new_x + intercept
179
+ shifted_signal[shifted_indices < 0] = extrapolated
180
+ else:
181
+ # Use last 5 points for regression
182
+ if len(x) < 5:
183
+ points = x[-len(x) :] # Use all points if less than 5
184
+ else:
185
+ points = x[-5:]
186
+ x_coords = np.arange(len(points))
187
+
188
+ # Reshape arrays for linregress
189
+ x_coords = x_coords.reshape(-1)
190
+ points = points.reshape(-1)
191
+
192
+ # Perform regression
193
+ slope, intercept, _, _, _ = stats.linregress(x_coords, points)
194
+
195
+ # Generate new points using linear regression
196
+ new_x = np.arange(len(points), len(points) + pad_length)
197
+ extrapolated = slope * new_x + intercept
198
+ shifted_signal[shifted_indices >= n] = extrapolated
199
+
200
+ else:
201
+ raise ValueError(f"Unknown padding mode: {self.padding_mode}")
202
+
203
+ return shifted_signal
@@ -0,0 +1,136 @@
1
+ from typing import Literal, Optional
2
+ import numpy as np
3
+ from scipy.ndimage import gaussian_filter1d
4
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
+
7
+
8
+ class GaussianBroadening(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
+ """
10
+ Transform spectral data by broadening peaks using Gaussian convolution.
11
+
12
+ This transformer applies Gaussian smoothing to broaden peaks in spectral data.
13
+ For each signal, a random sigma is chosen between 0 and the specified sigma value.
14
+
15
+ Parameters
16
+ ----------
17
+ sigma : float, default=1.0
18
+ Maximum standard deviation for the Gaussian kernel.
19
+ The actual sigma used will be randomly chosen between 0 and this value.
20
+
21
+ mode : {'reflect', 'constant', 'nearest', 'mirror', 'wrap'}, default='reflect'
22
+ The mode parameter determines how the input array is extended when
23
+ the filter overlaps a border. Default is 'reflect'.
24
+
25
+ pad_value : float, default=0.0
26
+ Value to fill past edges of input if mode is 'constant'.
27
+
28
+ random_state : int, optional, default=None
29
+ Random state for reproducible sigma selection.
30
+
31
+ truncate : float, default=4.0
32
+ Truncate the filter at this many standard deviations.
33
+ Larger values increase computation time but improve accuracy.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ sigma: float = 1.0,
39
+ mode: Literal["reflect", "constant", "nearest", "mirror", "wrap"] = "reflect",
40
+ pad_value: float = 0.0,
41
+ random_state: Optional[int] = None,
42
+ truncate: float = 4.0,
43
+ ):
44
+ self.sigma = sigma
45
+ self.mode = mode
46
+ self.pad_value = pad_value
47
+ self.random_state = random_state
48
+ self.truncate = truncate
49
+
50
+ def fit(self, X: np.ndarray, y=None) -> "GaussianBroadening":
51
+ """
52
+ Fit the transformer to the data (in this case, only validates input).
53
+
54
+ Parameters
55
+ ----------
56
+ X : array-like of shape (n_samples, n_features)
57
+ Input data to validate.
58
+
59
+ y : None
60
+ Ignored.
61
+
62
+ Returns
63
+ -------
64
+ self : GaussianBroadening
65
+ The fitted transformer.
66
+ """
67
+ X = validate_data(
68
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
69
+ )
70
+
71
+ # Validate sigma parameter
72
+ if not isinstance(self.sigma, (int, float)):
73
+ raise ValueError("sigma must be a number")
74
+ if self.sigma < 0:
75
+ raise ValueError("sigma must be non-negative")
76
+
77
+ # Initialize random number generator
78
+ self._rng = np.random.default_rng(self.random_state)
79
+
80
+ return self
81
+
82
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
83
+ """
84
+ Apply Gaussian broadening to the input data.
85
+
86
+ Parameters
87
+ ----------
88
+ X : array-like of shape (n_samples, n_features)
89
+ The data to transform.
90
+
91
+ y : None
92
+ Ignored.
93
+
94
+ Returns
95
+ -------
96
+ X_transformed : ndarray of shape (n_samples, n_features)
97
+ The transformed data with broadened peaks.
98
+ """
99
+ check_is_fitted(self, "n_features_in_")
100
+ X_ = validate_data(
101
+ self,
102
+ X,
103
+ y="no_validation",
104
+ ensure_2d=True,
105
+ copy=True,
106
+ reset=False,
107
+ dtype=np.float64,
108
+ )
109
+
110
+ # Transform each sample
111
+ for i, x in enumerate(X_):
112
+ X_[i] = self._broaden_signal(x)
113
+
114
+ return X_
115
+
116
+ def _broaden_signal(self, x: np.ndarray) -> np.ndarray:
117
+ """
118
+ Apply Gaussian broadening to a single signal.
119
+
120
+ Parameters
121
+ ----------
122
+ x : ndarray of shape (n_features,)
123
+ The input signal to broaden.
124
+
125
+ Returns
126
+ -------
127
+ broadened_signal : ndarray of shape (n_features,)
128
+ The broadened signal.
129
+ """
130
+ # Randomly choose sigma between 0 and max sigma
131
+ sigma = self._rng.uniform(0, self.sigma)
132
+
133
+ # Apply Gaussian filter
134
+ return gaussian_filter1d(
135
+ x, sigma=sigma, mode=self.mode, cval=self.pad_value, truncate=self.truncate
136
+ )