chemotools 0.1.6__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {chemotools-0.1.6 → chemotools-0.1.7}/PKG-INFO +1 -1
  2. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/index_shift.py +82 -3
  3. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scatter/_extended_multiplicative_scatter_correction.py +17 -12
  4. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scatter/_multiplicative_scatter_correction.py +16 -9
  5. {chemotools-0.1.6 → chemotools-0.1.7}/pyproject.toml +1 -1
  6. {chemotools-0.1.6 → chemotools-0.1.7}/LICENSE +0 -0
  7. {chemotools-0.1.6 → chemotools-0.1.7}/README.md +0 -0
  8. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/__init__.py +0 -0
  9. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/__init__.py +0 -0
  10. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/baseline_shift.py +0 -0
  11. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/exponential_noise.py +0 -0
  12. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/normal_noise.py +0 -0
  13. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/spectrum_scale.py +0 -0
  14. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/uniform_noise.py +0 -0
  15. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/__init__.py +0 -0
  16. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_air_pls.py +0 -0
  17. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_ar_pls.py +0 -0
  18. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_constant_baseline_correction.py +0 -0
  19. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_cubic_spline_correction.py +0 -0
  20. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_linear_correction.py +0 -0
  21. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_non_negative.py +0 -0
  22. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_polynomial_correction.py +0 -0
  23. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_subtract_reference.py +0 -0
  24. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/__init__.py +0 -0
  25. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/_base.py +0 -0
  26. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/data/__init__.py +0 -0
  27. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/data/coffee_labels.csv +0 -0
  28. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/data/coffee_spectra.csv +0 -0
  29. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/data/fermentation_hplc.csv +0 -0
  30. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/data/fermentation_spectra.csv +0 -0
  31. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/data/train_hplc.csv +0 -0
  32. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/data/train_spectra.csv +0 -0
  33. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/derivative/__init__.py +0 -0
  34. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/derivative/_norris_william.py +0 -0
  35. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/derivative/_savitzky_golay.py +0 -0
  36. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/feature_selection/__init__.py +0 -0
  37. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/feature_selection/_index_selector.py +0 -0
  38. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/feature_selection/_range_cut.py +0 -0
  39. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scale/__init__.py +0 -0
  40. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scale/_min_max_scaler.py +0 -0
  41. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scale/_norm_scaler.py +0 -0
  42. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scale/_point_scaler.py +0 -0
  43. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scatter/__init__.py +0 -0
  44. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scatter/_robust_normal_variate.py +0 -0
  45. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scatter/_standard_normal_variate.py +0 -0
  46. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/smooth/__init__.py +0 -0
  47. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/smooth/_mean_filter.py +0 -0
  48. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/smooth/_median_filter.py +0 -0
  49. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/smooth/_savitzky_golay_filter.py +0 -0
  50. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/smooth/_whittaker_smooth.py +0 -0
  51. {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/utils/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: chemotools
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
5
5
  License: MIT
6
6
  Author: Pau Cabaneros
@@ -1,6 +1,7 @@
1
- from typing import Optional
1
+ from typing import Literal, Optional
2
2
 
3
3
  import numpy as np
4
+ from numpy.polynomial import polynomial as poly
4
5
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
6
  from sklearn.utils.validation import check_is_fitted, validate_data
6
7
 
@@ -35,8 +36,14 @@ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
35
36
  Transform the input data by shifting the spectrum.
36
37
  """
37
38
 
38
- def __init__(self, shift: int = 0, random_state: Optional[int] = None):
39
+ def __init__(
40
+ self,
41
+ shift: int = 0,
42
+ fill_method: Literal["constant", "linear", "quadratic"] = "constant",
43
+ random_state: Optional[int] = None,
44
+ ):
39
45
  self.shift = shift
46
+ self.fill_method = fill_method
40
47
  self.random_state = random_state
41
48
 
42
49
  def fit(self, X: np.ndarray, y=None) -> "IndexShift":
@@ -111,10 +118,82 @@ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
111
118
 
112
119
  # Calculate the standard normal variate
113
120
  for i, x in enumerate(X_):
114
- X_[i] = self._shift_spectrum(x)
121
+ X_[i] = self._shift_vector(x)
115
122
 
116
123
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
117
124
 
118
125
  def _shift_spectrum(self, x) -> np.ndarray:
119
126
  shift_amount = self._rng.integers(-self.shift, self.shift, endpoint=True)
120
127
  return np.roll(x, shift_amount)
128
+
129
+ def _shift_vector(
130
+ self,
131
+ x: np.ndarray,
132
+ ) -> np.ndarray:
133
+ """
134
+ Shift vector with option to fill missing values.
135
+
136
+ Args:
137
+ arr: Input numpy array
138
+ shift: Number of positions to shift
139
+ fill_method: Method to fill missing values
140
+ 'constant': fill with first/last value
141
+ 'linear': fill using linear regression
142
+ 'quadratic': fill using quadratic regression
143
+
144
+ Returns:
145
+ Shifted numpy array
146
+ """
147
+ shift = self._rng.integers(-self.shift, self.shift, endpoint=True)
148
+
149
+ result = np.roll(x, shift)
150
+
151
+ if self.fill_method == "constant":
152
+ if shift > 0:
153
+ result[:shift] = x[0]
154
+ elif shift < 0:
155
+ result[shift:] = x[-1]
156
+
157
+ elif self.fill_method == "linear":
158
+ if shift > 0:
159
+ x_ = np.arange(5)
160
+ coeffs = poly.polyfit(x_, x[:5], 1)
161
+
162
+ extrapolate_x = np.arange(-shift, 0)
163
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
164
+
165
+ result[:shift] = extrapolated_values
166
+
167
+ elif shift < 0:
168
+ x_ = np.arange(5)
169
+ coeffs = poly.polyfit(x_, x[-5:], 1)
170
+
171
+ extrapolate_x = np.arange(len(x_), len(x_) - shift)
172
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
173
+
174
+ result[shift:] = extrapolated_values
175
+
176
+ elif self.fill_method == "quadratic":
177
+ if shift > 0:
178
+ # Use first 3 values for quadratic regression
179
+ x_ = np.arange(5)
180
+ coeffs = poly.polyfit(x_, x[:5], 2)
181
+
182
+ # Extrapolate to fill shifted region
183
+ extrapolate_x = np.arange(-shift, 0)
184
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
185
+
186
+ result[:shift] = extrapolated_values
187
+
188
+ elif shift < 0:
189
+ # Use last 3 values for quadratic regression
190
+ x_ = np.arange(5)
191
+ coeffs = poly.polyfit(x_, x[-5:], 2)
192
+
193
+ # Extrapolate to fill shifted region
194
+ extrapolate_x = np.arange(len(x_), len(x_) - shift)
195
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
196
+
197
+ result[shift:] = extrapolated_values
198
+
199
+ return result
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Literal, Optional
2
2
 
3
3
  import numpy as np
4
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
@@ -46,18 +46,20 @@ class ExtendedMultiplicativeScatterCorrection(
46
46
  model-based pre-processing, doi:10.1016/j.chemolab.2021.104350
47
47
  """
48
48
 
49
+ ALLOWED_METHODS = ["mean", "median"]
50
+
51
+ # TODO: Check method is valid in instantiation. Right now it is check on fit because it breaks the scikitlearn check_estimator()
52
+
49
53
  def __init__(
50
54
  self,
51
- reference: Optional[np.ndarray] = None,
52
- use_mean: bool = True,
53
- use_median: bool = False,
55
+ method: Literal["mean", "median"] = "mean",
54
56
  order: int = 2,
57
+ reference: Optional[np.ndarray] = None,
55
58
  weights: Optional[np.ndarray] = None,
56
59
  ):
57
- self.reference = reference
58
- self.use_mean = use_mean
59
- self.use_median = use_median
60
+ self.method = method
60
61
  self.order = order
62
+ self.reference = reference
61
63
  self.weights = weights
62
64
 
63
65
  def fit(self, X: np.ndarray, y=None) -> "ExtendedMultiplicativeScatterCorrection":
@@ -104,21 +106,24 @@ class ExtendedMultiplicativeScatterCorrection(
104
106
  self.weights_ = np.array(self.weights)
105
107
  return self
106
108
 
107
- if self.use_median:
108
- self.reference_ = np.median(X, axis=0)
109
+ if self.method == "mean":
110
+ self.reference_ = X.mean(axis=0)
109
111
  self.indices_ = self._calculate_indices(X[0])
110
112
  self.A_ = self._calculate_A(self.indices_, self.reference_)
111
113
  self.weights_ = np.array(self.weights)
112
114
  return self
113
115
 
114
- if self.use_mean:
115
- self.reference_ = X.mean(axis=0)
116
+ elif self.method == "median":
117
+ self.reference_ = np.median(X, axis=0)
116
118
  self.indices_ = self._calculate_indices(X[0])
117
119
  self.A_ = self._calculate_A(self.indices_, self.reference_)
118
120
  self.weights_ = np.array(self.weights)
119
121
  return self
120
122
 
121
- raise ValueError("No reference was provided")
123
+ else:
124
+ raise ValueError(
125
+ f"Invalid method: {self.method}. Must be one of {self.ALLOWED_METHODS}"
126
+ )
122
127
 
123
128
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
124
129
  """
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Literal, Optional
2
2
 
3
3
  import numpy as np
4
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
@@ -37,16 +37,18 @@ class MultiplicativeScatterCorrection(
37
37
 
38
38
  """
39
39
 
40
+ ALLOWED_METHODS = ["mean", "median"]
41
+
42
+ # TODO: Check method is valid in instantiation. Right now it is check on fit because it breaks the scikitlearn check_estimator()
43
+
40
44
  def __init__(
41
45
  self,
46
+ method: Literal["mean", "median"] = "mean",
42
47
  reference: Optional[np.ndarray] = None,
43
- use_mean: bool = True,
44
- use_median: bool = False,
45
48
  weights: Optional[np.ndarray] = None,
46
49
  ):
50
+ self.method = method
47
51
  self.reference = reference
48
- self.use_mean = use_mean
49
- self.use_median = use_median
50
52
  self.weights = weights
51
53
 
52
54
  def fit(self, X: np.ndarray, y=None) -> "MultiplicativeScatterCorrection":
@@ -91,18 +93,23 @@ class MultiplicativeScatterCorrection(
91
93
  self.weights_ = np.array(self.weights)
92
94
  return self
93
95
 
94
- if self.use_median:
95
- self.reference_ = np.median(X, axis=0)
96
+ if self.method == "mean":
97
+ self.reference_ = X.mean(axis=0)
96
98
  self.A_ = self._calculate_A(self.reference_)
97
99
  self.weights_ = np.array(self.weights)
98
100
  return self
99
101
 
100
- if self.use_mean:
101
- self.reference_ = X.mean(axis=0)
102
+ elif self.method == "median":
103
+ self.reference_ = np.median(X, axis=0)
102
104
  self.A_ = self._calculate_A(self.reference_)
103
105
  self.weights_ = np.array(self.weights)
104
106
  return self
105
107
 
108
+ else:
109
+ raise ValueError(
110
+ f"Invalid method: {self.method}. Must be one of {self.ALLOWED_METHODS}"
111
+ )
112
+
106
113
  raise ValueError("No reference was provided")
107
114
 
108
115
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "chemotools"
3
- version = "0.1.6"
3
+ version = "0.1.7"
4
4
  description = "chemotools: A Python Package that Integrates Chemometrics and scikit-learn"
5
5
  authors = ["Pau Cabaneros"]
6
6
  license = "MIT License"
File without changes
File without changes