chemotools 0.1.6__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chemotools-0.1.6 → chemotools-0.1.7}/PKG-INFO +1 -1
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/index_shift.py +82 -3
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scatter/_extended_multiplicative_scatter_correction.py +17 -12
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scatter/_multiplicative_scatter_correction.py +16 -9
- {chemotools-0.1.6 → chemotools-0.1.7}/pyproject.toml +1 -1
- {chemotools-0.1.6 → chemotools-0.1.7}/LICENSE +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/README.md +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/__init__.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/__init__.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/baseline_shift.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/exponential_noise.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/normal_noise.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/spectrum_scale.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/augmentation/uniform_noise.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/__init__.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_air_pls.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_ar_pls.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_constant_baseline_correction.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_cubic_spline_correction.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_linear_correction.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_non_negative.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_polynomial_correction.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/baseline/_subtract_reference.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/__init__.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/_base.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/data/__init__.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/data/coffee_labels.csv +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/data/coffee_spectra.csv +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/data/fermentation_hplc.csv +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/data/fermentation_spectra.csv +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/data/train_hplc.csv +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/datasets/data/train_spectra.csv +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/derivative/__init__.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/derivative/_norris_william.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/derivative/_savitzky_golay.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/feature_selection/__init__.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/feature_selection/_index_selector.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/feature_selection/_range_cut.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scale/__init__.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scale/_min_max_scaler.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scale/_norm_scaler.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scale/_point_scaler.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scatter/__init__.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scatter/_robust_normal_variate.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scatter/_standard_normal_variate.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/smooth/__init__.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/smooth/_mean_filter.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/smooth/_median_filter.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/smooth/_savitzky_golay_filter.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/smooth/_whittaker_smooth.py +0 -0
- {chemotools-0.1.6 → chemotools-0.1.7}/chemotools/utils/__init__.py +0 -0
@@ -1,6 +1,7 @@
|
|
1
|
-
from typing import Optional
|
1
|
+
from typing import Literal, Optional
|
2
2
|
|
3
3
|
import numpy as np
|
4
|
+
from numpy.polynomial import polynomial as poly
|
4
5
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
6
|
from sklearn.utils.validation import check_is_fitted, validate_data
|
6
7
|
|
@@ -35,8 +36,14 @@ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
|
35
36
|
Transform the input data by shifting the spectrum.
|
36
37
|
"""
|
37
38
|
|
38
|
-
def __init__(
|
39
|
+
def __init__(
|
40
|
+
self,
|
41
|
+
shift: int = 0,
|
42
|
+
fill_method: Literal["constant", "linear", "quadratic"] = "constant",
|
43
|
+
random_state: Optional[int] = None,
|
44
|
+
):
|
39
45
|
self.shift = shift
|
46
|
+
self.fill_method = fill_method
|
40
47
|
self.random_state = random_state
|
41
48
|
|
42
49
|
def fit(self, X: np.ndarray, y=None) -> "IndexShift":
|
@@ -111,10 +118,82 @@ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
|
111
118
|
|
112
119
|
# Calculate the standard normal variate
|
113
120
|
for i, x in enumerate(X_):
|
114
|
-
X_[i] = self.
|
121
|
+
X_[i] = self._shift_vector(x)
|
115
122
|
|
116
123
|
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
117
124
|
|
118
125
|
def _shift_spectrum(self, x) -> np.ndarray:
|
119
126
|
shift_amount = self._rng.integers(-self.shift, self.shift, endpoint=True)
|
120
127
|
return np.roll(x, shift_amount)
|
128
|
+
|
129
|
+
def _shift_vector(
|
130
|
+
self,
|
131
|
+
x: np.ndarray,
|
132
|
+
) -> np.ndarray:
|
133
|
+
"""
|
134
|
+
Shift vector with option to fill missing values.
|
135
|
+
|
136
|
+
Args:
|
137
|
+
arr: Input numpy array
|
138
|
+
shift: Number of positions to shift
|
139
|
+
fill_method: Method to fill missing values
|
140
|
+
'constant': fill with first/last value
|
141
|
+
'linear': fill using linear regression
|
142
|
+
'quadratic': fill using quadratic regression
|
143
|
+
|
144
|
+
Returns:
|
145
|
+
Shifted numpy array
|
146
|
+
"""
|
147
|
+
shift = self._rng.integers(-self.shift, self.shift, endpoint=True)
|
148
|
+
|
149
|
+
result = np.roll(x, shift)
|
150
|
+
|
151
|
+
if self.fill_method == "constant":
|
152
|
+
if shift > 0:
|
153
|
+
result[:shift] = x[0]
|
154
|
+
elif shift < 0:
|
155
|
+
result[shift:] = x[-1]
|
156
|
+
|
157
|
+
elif self.fill_method == "linear":
|
158
|
+
if shift > 0:
|
159
|
+
x_ = np.arange(5)
|
160
|
+
coeffs = poly.polyfit(x_, x[:5], 1)
|
161
|
+
|
162
|
+
extrapolate_x = np.arange(-shift, 0)
|
163
|
+
extrapolated_values = poly.polyval(extrapolate_x, coeffs)
|
164
|
+
|
165
|
+
result[:shift] = extrapolated_values
|
166
|
+
|
167
|
+
elif shift < 0:
|
168
|
+
x_ = np.arange(5)
|
169
|
+
coeffs = poly.polyfit(x_, x[-5:], 1)
|
170
|
+
|
171
|
+
extrapolate_x = np.arange(len(x_), len(x_) - shift)
|
172
|
+
extrapolated_values = poly.polyval(extrapolate_x, coeffs)
|
173
|
+
|
174
|
+
result[shift:] = extrapolated_values
|
175
|
+
|
176
|
+
elif self.fill_method == "quadratic":
|
177
|
+
if shift > 0:
|
178
|
+
# Use first 3 values for quadratic regression
|
179
|
+
x_ = np.arange(5)
|
180
|
+
coeffs = poly.polyfit(x_, x[:5], 2)
|
181
|
+
|
182
|
+
# Extrapolate to fill shifted region
|
183
|
+
extrapolate_x = np.arange(-shift, 0)
|
184
|
+
extrapolated_values = poly.polyval(extrapolate_x, coeffs)
|
185
|
+
|
186
|
+
result[:shift] = extrapolated_values
|
187
|
+
|
188
|
+
elif shift < 0:
|
189
|
+
# Use last 3 values for quadratic regression
|
190
|
+
x_ = np.arange(5)
|
191
|
+
coeffs = poly.polyfit(x_, x[-5:], 2)
|
192
|
+
|
193
|
+
# Extrapolate to fill shifted region
|
194
|
+
extrapolate_x = np.arange(len(x_), len(x_) - shift)
|
195
|
+
extrapolated_values = poly.polyval(extrapolate_x, coeffs)
|
196
|
+
|
197
|
+
result[shift:] = extrapolated_values
|
198
|
+
|
199
|
+
return result
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Optional
|
1
|
+
from typing import Literal, Optional
|
2
2
|
|
3
3
|
import numpy as np
|
4
4
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
@@ -46,18 +46,20 @@ class ExtendedMultiplicativeScatterCorrection(
|
|
46
46
|
model-based pre-processing, doi:10.1016/j.chemolab.2021.104350
|
47
47
|
"""
|
48
48
|
|
49
|
+
ALLOWED_METHODS = ["mean", "median"]
|
50
|
+
|
51
|
+
# TODO: Check method is valid in instantiation. Right now it is check on fit because it breaks the scikitlearn check_estimator()
|
52
|
+
|
49
53
|
def __init__(
|
50
54
|
self,
|
51
|
-
|
52
|
-
use_mean: bool = True,
|
53
|
-
use_median: bool = False,
|
55
|
+
method: Literal["mean", "median"] = "mean",
|
54
56
|
order: int = 2,
|
57
|
+
reference: Optional[np.ndarray] = None,
|
55
58
|
weights: Optional[np.ndarray] = None,
|
56
59
|
):
|
57
|
-
self.
|
58
|
-
self.use_mean = use_mean
|
59
|
-
self.use_median = use_median
|
60
|
+
self.method = method
|
60
61
|
self.order = order
|
62
|
+
self.reference = reference
|
61
63
|
self.weights = weights
|
62
64
|
|
63
65
|
def fit(self, X: np.ndarray, y=None) -> "ExtendedMultiplicativeScatterCorrection":
|
@@ -104,21 +106,24 @@ class ExtendedMultiplicativeScatterCorrection(
|
|
104
106
|
self.weights_ = np.array(self.weights)
|
105
107
|
return self
|
106
108
|
|
107
|
-
if self.
|
108
|
-
self.reference_ =
|
109
|
+
if self.method == "mean":
|
110
|
+
self.reference_ = X.mean(axis=0)
|
109
111
|
self.indices_ = self._calculate_indices(X[0])
|
110
112
|
self.A_ = self._calculate_A(self.indices_, self.reference_)
|
111
113
|
self.weights_ = np.array(self.weights)
|
112
114
|
return self
|
113
115
|
|
114
|
-
|
115
|
-
self.reference_ =
|
116
|
+
elif self.method == "median":
|
117
|
+
self.reference_ = np.median(X, axis=0)
|
116
118
|
self.indices_ = self._calculate_indices(X[0])
|
117
119
|
self.A_ = self._calculate_A(self.indices_, self.reference_)
|
118
120
|
self.weights_ = np.array(self.weights)
|
119
121
|
return self
|
120
122
|
|
121
|
-
|
123
|
+
else:
|
124
|
+
raise ValueError(
|
125
|
+
f"Invalid method: {self.method}. Must be one of {self.ALLOWED_METHODS}"
|
126
|
+
)
|
122
127
|
|
123
128
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
124
129
|
"""
|
{chemotools-0.1.6 → chemotools-0.1.7}/chemotools/scatter/_multiplicative_scatter_correction.py
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Optional
|
1
|
+
from typing import Literal, Optional
|
2
2
|
|
3
3
|
import numpy as np
|
4
4
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
@@ -37,16 +37,18 @@ class MultiplicativeScatterCorrection(
|
|
37
37
|
|
38
38
|
"""
|
39
39
|
|
40
|
+
ALLOWED_METHODS = ["mean", "median"]
|
41
|
+
|
42
|
+
# TODO: Check method is valid in instantiation. Right now it is check on fit because it breaks the scikitlearn check_estimator()
|
43
|
+
|
40
44
|
def __init__(
|
41
45
|
self,
|
46
|
+
method: Literal["mean", "median"] = "mean",
|
42
47
|
reference: Optional[np.ndarray] = None,
|
43
|
-
use_mean: bool = True,
|
44
|
-
use_median: bool = False,
|
45
48
|
weights: Optional[np.ndarray] = None,
|
46
49
|
):
|
50
|
+
self.method = method
|
47
51
|
self.reference = reference
|
48
|
-
self.use_mean = use_mean
|
49
|
-
self.use_median = use_median
|
50
52
|
self.weights = weights
|
51
53
|
|
52
54
|
def fit(self, X: np.ndarray, y=None) -> "MultiplicativeScatterCorrection":
|
@@ -91,18 +93,23 @@ class MultiplicativeScatterCorrection(
|
|
91
93
|
self.weights_ = np.array(self.weights)
|
92
94
|
return self
|
93
95
|
|
94
|
-
if self.
|
95
|
-
self.reference_ =
|
96
|
+
if self.method == "mean":
|
97
|
+
self.reference_ = X.mean(axis=0)
|
96
98
|
self.A_ = self._calculate_A(self.reference_)
|
97
99
|
self.weights_ = np.array(self.weights)
|
98
100
|
return self
|
99
101
|
|
100
|
-
|
101
|
-
self.reference_ =
|
102
|
+
elif self.method == "median":
|
103
|
+
self.reference_ = np.median(X, axis=0)
|
102
104
|
self.A_ = self._calculate_A(self.reference_)
|
103
105
|
self.weights_ = np.array(self.weights)
|
104
106
|
return self
|
105
107
|
|
108
|
+
else:
|
109
|
+
raise ValueError(
|
110
|
+
f"Invalid method: {self.method}. Must be one of {self.ALLOWED_METHODS}"
|
111
|
+
)
|
112
|
+
|
106
113
|
raise ValueError("No reference was provided")
|
107
114
|
|
108
115
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|