chemotools 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
- from typing import Optional
1
+ from typing import Literal, Optional
2
2
 
3
3
  import numpy as np
4
+ from numpy.polynomial import polynomial as poly
4
5
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
6
  from sklearn.utils.validation import check_is_fitted, validate_data
6
7
 
@@ -35,8 +36,14 @@ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
35
36
  Transform the input data by shifting the spectrum.
36
37
  """
37
38
 
38
- def __init__(self, shift: int = 0, random_state: Optional[int] = None):
39
+ def __init__(
40
+ self,
41
+ shift: int = 0,
42
+ fill_method: Literal["constant", "linear", "quadratic"] = "constant",
43
+ random_state: Optional[int] = None,
44
+ ):
39
45
  self.shift = shift
46
+ self.fill_method = fill_method
40
47
  self.random_state = random_state
41
48
 
42
49
  def fit(self, X: np.ndarray, y=None) -> "IndexShift":
@@ -111,10 +118,82 @@ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
111
118
 
112
119
  # Calculate the standard normal variate
113
120
  for i, x in enumerate(X_):
114
- X_[i] = self._shift_spectrum(x)
121
+ X_[i] = self._shift_vector(x)
115
122
 
116
123
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
117
124
 
118
125
  def _shift_spectrum(self, x) -> np.ndarray:
119
126
  shift_amount = self._rng.integers(-self.shift, self.shift, endpoint=True)
120
127
  return np.roll(x, shift_amount)
128
+
129
+ def _shift_vector(
130
+ self,
131
+ x: np.ndarray,
132
+ ) -> np.ndarray:
133
+ """
134
+ Shift vector with option to fill missing values.
135
+
136
+ Args:
137
+ arr: Input numpy array
138
+ shift: Number of positions to shift
139
+ fill_method: Method to fill missing values
140
+ 'constant': fill with first/last value
141
+ 'linear': fill using linear regression
142
+ 'quadratic': fill using quadratic regression
143
+
144
+ Returns:
145
+ Shifted numpy array
146
+ """
147
+ shift = self._rng.integers(-self.shift, self.shift, endpoint=True)
148
+
149
+ result = np.roll(x, shift)
150
+
151
+ if self.fill_method == "constant":
152
+ if shift > 0:
153
+ result[:shift] = x[0]
154
+ elif shift < 0:
155
+ result[shift:] = x[-1]
156
+
157
+ elif self.fill_method == "linear":
158
+ if shift > 0:
159
+ x_ = np.arange(5)
160
+ coeffs = poly.polyfit(x_, x[:5], 1)
161
+
162
+ extrapolate_x = np.arange(-shift, 0)
163
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
164
+
165
+ result[:shift] = extrapolated_values
166
+
167
+ elif shift < 0:
168
+ x_ = np.arange(5)
169
+ coeffs = poly.polyfit(x_, x[-5:], 1)
170
+
171
+ extrapolate_x = np.arange(len(x_), len(x_) - shift)
172
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
173
+
174
+ result[shift:] = extrapolated_values
175
+
176
+ elif self.fill_method == "quadratic":
177
+ if shift > 0:
178
+ # Use first 3 values for quadratic regression
179
+ x_ = np.arange(5)
180
+ coeffs = poly.polyfit(x_, x[:5], 2)
181
+
182
+ # Extrapolate to fill shifted region
183
+ extrapolate_x = np.arange(-shift, 0)
184
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
185
+
186
+ result[:shift] = extrapolated_values
187
+
188
+ elif shift < 0:
189
+ # Use last 3 values for quadratic regression
190
+ x_ = np.arange(5)
191
+ coeffs = poly.polyfit(x_, x[-5:], 2)
192
+
193
+ # Extrapolate to fill shifted region
194
+ extrapolate_x = np.arange(len(x_), len(x_) - shift)
195
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
196
+
197
+ result[shift:] = extrapolated_values
198
+
199
+ return result
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Literal, Optional
2
2
 
3
3
  import numpy as np
4
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
@@ -46,18 +46,20 @@ class ExtendedMultiplicativeScatterCorrection(
46
46
  model-based pre-processing, doi:10.1016/j.chemolab.2021.104350
47
47
  """
48
48
 
49
+ ALLOWED_METHODS = ["mean", "median"]
50
+
51
+ # TODO: Check method is valid in instantiation. Right now it is check on fit because it breaks the scikitlearn check_estimator()
52
+
49
53
  def __init__(
50
54
  self,
51
- reference: Optional[np.ndarray] = None,
52
- use_mean: bool = True,
53
- use_median: bool = False,
55
+ method: Literal["mean", "median"] = "mean",
54
56
  order: int = 2,
57
+ reference: Optional[np.ndarray] = None,
55
58
  weights: Optional[np.ndarray] = None,
56
59
  ):
57
- self.reference = reference
58
- self.use_mean = use_mean
59
- self.use_median = use_median
60
+ self.method = method
60
61
  self.order = order
62
+ self.reference = reference
61
63
  self.weights = weights
62
64
 
63
65
  def fit(self, X: np.ndarray, y=None) -> "ExtendedMultiplicativeScatterCorrection":
@@ -104,21 +106,24 @@ class ExtendedMultiplicativeScatterCorrection(
104
106
  self.weights_ = np.array(self.weights)
105
107
  return self
106
108
 
107
- if self.use_median:
108
- self.reference_ = np.median(X, axis=0)
109
+ if self.method == "mean":
110
+ self.reference_ = X.mean(axis=0)
109
111
  self.indices_ = self._calculate_indices(X[0])
110
112
  self.A_ = self._calculate_A(self.indices_, self.reference_)
111
113
  self.weights_ = np.array(self.weights)
112
114
  return self
113
115
 
114
- if self.use_mean:
115
- self.reference_ = X.mean(axis=0)
116
+ elif self.method == "median":
117
+ self.reference_ = np.median(X, axis=0)
116
118
  self.indices_ = self._calculate_indices(X[0])
117
119
  self.A_ = self._calculate_A(self.indices_, self.reference_)
118
120
  self.weights_ = np.array(self.weights)
119
121
  return self
120
122
 
121
- raise ValueError("No reference was provided")
123
+ else:
124
+ raise ValueError(
125
+ f"Invalid method: {self.method}. Must be one of {self.ALLOWED_METHODS}"
126
+ )
122
127
 
123
128
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
124
129
  """
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Literal, Optional
2
2
 
3
3
  import numpy as np
4
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
@@ -37,16 +37,18 @@ class MultiplicativeScatterCorrection(
37
37
 
38
38
  """
39
39
 
40
+ ALLOWED_METHODS = ["mean", "median"]
41
+
42
+ # TODO: Check method is valid in instantiation. Right now it is check on fit because it breaks the scikitlearn check_estimator()
43
+
40
44
  def __init__(
41
45
  self,
46
+ method: Literal["mean", "median"] = "mean",
42
47
  reference: Optional[np.ndarray] = None,
43
- use_mean: bool = True,
44
- use_median: bool = False,
45
48
  weights: Optional[np.ndarray] = None,
46
49
  ):
50
+ self.method = method
47
51
  self.reference = reference
48
- self.use_mean = use_mean
49
- self.use_median = use_median
50
52
  self.weights = weights
51
53
 
52
54
  def fit(self, X: np.ndarray, y=None) -> "MultiplicativeScatterCorrection":
@@ -91,18 +93,23 @@ class MultiplicativeScatterCorrection(
91
93
  self.weights_ = np.array(self.weights)
92
94
  return self
93
95
 
94
- if self.use_median:
95
- self.reference_ = np.median(X, axis=0)
96
+ if self.method == "mean":
97
+ self.reference_ = X.mean(axis=0)
96
98
  self.A_ = self._calculate_A(self.reference_)
97
99
  self.weights_ = np.array(self.weights)
98
100
  return self
99
101
 
100
- if self.use_mean:
101
- self.reference_ = X.mean(axis=0)
102
+ elif self.method == "median":
103
+ self.reference_ = np.median(X, axis=0)
102
104
  self.A_ = self._calculate_A(self.reference_)
103
105
  self.weights_ = np.array(self.weights)
104
106
  return self
105
107
 
108
+ else:
109
+ raise ValueError(
110
+ f"Invalid method: {self.method}. Must be one of {self.ALLOWED_METHODS}"
111
+ )
112
+
106
113
  raise ValueError("No reference was provided")
107
114
 
108
115
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: chemotools
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
5
5
  License: MIT
6
6
  Author: Pau Cabaneros
@@ -2,7 +2,7 @@ chemotools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  chemotools/augmentation/__init__.py,sha256=xIUoWov_aluoW5L3zpVAahyPdkWA5erApW-duzdE_9A,385
3
3
  chemotools/augmentation/baseline_shift.py,sha256=kIlYvmKS9pu9vh_-eZ7PSHPuH_58V9mgYbSJt6Gq3BA,3476
4
4
  chemotools/augmentation/exponential_noise.py,sha256=fhZ4zQGGqmW-OiSu388th6IhgXrFj1xOguqKYAgj8Y4,3348
5
- chemotools/augmentation/index_shift.py,sha256=DWVfnxCUgm2NNQfASTpqNoMkfhlW1WZT8EoWVsSSF4c,3459
5
+ chemotools/augmentation/index_shift.py,sha256=w1maDHGLAKSiGAQ8c9yYHofs_PJnxeN0nB1RU-pINcE,6042
6
6
  chemotools/augmentation/normal_noise.py,sha256=-se2Xv1pAWt9HY7H5yC4XlxRArPKZWGeTy2MdyN4lBE,3318
7
7
  chemotools/augmentation/spectrum_scale.py,sha256=hMsmzXpssbI7tGm_YnQn9wjbByso3CgVxd3Hs8kfLS8,3442
8
8
  chemotools/augmentation/uniform_noise.py,sha256=8a-AYzEDIkLckL6FK2i8mr_jXnQGcFaKXh_roGCICaQ,3456
@@ -35,8 +35,8 @@ chemotools/scale/_min_max_scaler.py,sha256=YvqRkV2pXu-viQrpjzWcp9KmSSCYSoubSnrZH
35
35
  chemotools/scale/_norm_scaler.py,sha256=CHWSir2q-pL1hxzw_ZB45yi4mw-SkJ4YOa1CUL4nm2I,2568
36
36
  chemotools/scale/_point_scaler.py,sha256=je-vomAk7g3Q7yxmisQK4-3ndKEKI2wDwLrUiNuwzzA,3505
37
37
  chemotools/scatter/__init__.py,sha256=ftyC_MGurzxpWMie8WlFDGh5ylalK2K3aCSN4qUzQAw,459
38
- chemotools/scatter/_extended_multiplicative_scatter_correction.py,sha256=7OpOcvWX1hlMUR18tC29pkSiADLZViDrTh-wro738E4,6560
39
- chemotools/scatter/_multiplicative_scatter_correction.py,sha256=nPMPYKHl6-U--GAuQdZL8KVNPlr3V52teUAoJ0iRs3g,5801
38
+ chemotools/scatter/_extended_multiplicative_scatter_correction.py,sha256=2OitT0QBYepvigmfmfpGWOLjq9y3iycOdTt-WhqLNhs,6801
39
+ chemotools/scatter/_multiplicative_scatter_correction.py,sha256=XKa19Vk7F6-JxWPMIt7qmxdySdbliAVJwsKwPhY02O0,6097
40
40
  chemotools/scatter/_robust_normal_variate.py,sha256=nPfcvjHEpwkcSCjdvD86WN9q2wVMCeZ2Z8wMzcBpM3Y,3110
41
41
  chemotools/scatter/_standard_normal_variate.py,sha256=22mJzbbZoXQY-_hHAhGO0vzfYwr3oMqaR6xPjJryHtk,2582
42
42
  chemotools/smooth/__init__.py,sha256=G8JvAoBK9d18-k6XgukqN6dbJP-dsEgeDdbKbZdCIkA,265
@@ -45,7 +45,7 @@ chemotools/smooth/_median_filter.py,sha256=9ndTJCwrZirWlvDNldiigMddy79KIGq9OwwYN
45
45
  chemotools/smooth/_savitzky_golay_filter.py,sha256=27iFUWxdL9_7oZabR0R5L0ZTpBmYfVUjx2XCTukihBE,3509
46
46
  chemotools/smooth/_whittaker_smooth.py,sha256=lpLAyf4GdyDW4ulT1nyEoK6xQEl2cVUKquawQdGWbHU,3571
47
47
  chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- chemotools-0.1.6.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
49
- chemotools-0.1.6.dist-info/METADATA,sha256=79TZ--QC_SOHj3ou6bDaRYsJsQoFS0sx2Rfe2BUOrG4,5239
50
- chemotools-0.1.6.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
51
- chemotools-0.1.6.dist-info/RECORD,,
48
+ chemotools-0.1.7.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
49
+ chemotools-0.1.7.dist-info/METADATA,sha256=cLjx8z4fGKdTHdIyDZ1VGd6WgXX-WlXtw9OwqaYm0Fo,5239
50
+ chemotools-0.1.7.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
51
+ chemotools-0.1.7.dist-info/RECORD,,