chemotools 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemotools/augmentation/__init__.py +10 -0
- chemotools/augmentation/baseline_shift.py +23 -15
- chemotools/augmentation/exponential_noise.py +24 -15
- chemotools/augmentation/index_shift.py +104 -16
- chemotools/augmentation/normal_noise.py +24 -14
- chemotools/augmentation/spectrum_scale.py +24 -15
- chemotools/augmentation/uniform_noise.py +26 -14
- chemotools/baseline/__init__.py +13 -1
- chemotools/baseline/_air_pls.py +16 -14
- chemotools/baseline/_ar_pls.py +17 -17
- chemotools/baseline/_constant_baseline_correction.py +19 -16
- chemotools/baseline/_cubic_spline_correction.py +17 -8
- chemotools/baseline/_linear_correction.py +18 -10
- chemotools/baseline/_non_negative.py +14 -8
- chemotools/baseline/_polynomial_correction.py +19 -11
- chemotools/baseline/_subtract_reference.py +17 -9
- chemotools/datasets/__init__.py +2 -0
- chemotools/datasets/_base.py +3 -3
- chemotools/derivative/__init__.py +3 -1
- chemotools/derivative/_norris_william.py +14 -8
- chemotools/derivative/_savitzky_golay.py +25 -21
- chemotools/feature_selection/__init__.py +2 -0
- chemotools/feature_selection/_index_selector.py +18 -17
- chemotools/feature_selection/_range_cut.py +9 -7
- chemotools/scale/__init__.py +2 -0
- chemotools/scale/_min_max_scaler.py +14 -8
- chemotools/scale/_norm_scaler.py +14 -8
- chemotools/scale/_point_scaler.py +18 -10
- chemotools/scatter/__init__.py +11 -2
- chemotools/scatter/_extended_multiplicative_scatter_correction.py +33 -29
- chemotools/scatter/_multiplicative_scatter_correction.py +33 -18
- chemotools/scatter/_robust_normal_variate.py +14 -8
- chemotools/scatter/_standard_normal_variate.py +14 -8
- chemotools/smooth/__init__.py +3 -1
- chemotools/smooth/_mean_filter.py +14 -8
- chemotools/smooth/_median_filter.py +31 -9
- chemotools/smooth/_savitzky_golay_filter.py +20 -9
- chemotools/smooth/_whittaker_smooth.py +20 -11
- {chemotools-0.1.5.dist-info → chemotools-0.1.7.dist-info}/METADATA +18 -17
- chemotools-0.1.7.dist-info/RECORD +51 -0
- {chemotools-0.1.5.dist-info → chemotools-0.1.7.dist-info}/WHEEL +1 -2
- chemotools/utils/check_inputs.py +0 -14
- chemotools-0.1.5.dist-info/RECORD +0 -58
- chemotools-0.1.5.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/fixtures.py +0 -89
- tests/test_datasets.py +0 -111
- tests/test_functionality.py +0 -777
- tests/test_sklearn_compliance.py +0 -277
- {chemotools-0.1.5.dist-info → chemotools-0.1.7.dist-info}/LICENSE +0 -0
tests/test_functionality.py
DELETED
@@ -1,777 +0,0 @@
|
|
1
|
-
import numpy as np
|
2
|
-
import pandas as pd
|
3
|
-
import polars as pl
|
4
|
-
import pytest
|
5
|
-
|
6
|
-
from chemotools.augmentation import (
|
7
|
-
BaselineShift,
|
8
|
-
ExponentialNoise,
|
9
|
-
IndexShift,
|
10
|
-
NormalNoise,
|
11
|
-
SpectrumScale,
|
12
|
-
UniformNoise,
|
13
|
-
)
|
14
|
-
|
15
|
-
from chemotools.baseline import (
|
16
|
-
AirPls,
|
17
|
-
ArPls,
|
18
|
-
ConstantBaselineCorrection,
|
19
|
-
LinearCorrection,
|
20
|
-
NonNegative,
|
21
|
-
SubtractReference,
|
22
|
-
)
|
23
|
-
from chemotools.derivative import NorrisWilliams, SavitzkyGolay
|
24
|
-
from chemotools.scale import MinMaxScaler, NormScaler, PointScaler
|
25
|
-
from chemotools.scatter import (
|
26
|
-
ExtendedMultiplicativeScatterCorrection,
|
27
|
-
MultiplicativeScatterCorrection,
|
28
|
-
RobustNormalVariate,
|
29
|
-
StandardNormalVariate,
|
30
|
-
)
|
31
|
-
from chemotools.smooth import MeanFilter, MedianFilter, WhittakerSmooth
|
32
|
-
from chemotools.feature_selection import IndexSelector, RangeCut
|
33
|
-
from tests.fixtures import (
|
34
|
-
spectrum,
|
35
|
-
spectrum_arpls,
|
36
|
-
reference_airpls,
|
37
|
-
reference_arpls,
|
38
|
-
reference_msc_mean,
|
39
|
-
reference_msc_median,
|
40
|
-
reference_sg_15_2,
|
41
|
-
reference_snv,
|
42
|
-
reference_whitakker,
|
43
|
-
)
|
44
|
-
|
45
|
-
|
46
|
-
def test_air_pls(spectrum, reference_airpls):
|
47
|
-
# Arrange
|
48
|
-
air_pls = AirPls()
|
49
|
-
|
50
|
-
# Act
|
51
|
-
spectrum_corrected = air_pls.fit_transform(spectrum)
|
52
|
-
|
53
|
-
# Assert
|
54
|
-
assert np.allclose(spectrum_corrected[0], reference_airpls[0], atol=1e-7)
|
55
|
-
|
56
|
-
|
57
|
-
def test_ar_pls(spectrum_arpls, reference_arpls):
|
58
|
-
# Arrange
|
59
|
-
arpls = ArPls(1e2, 0.0001)
|
60
|
-
reference = np.array(spectrum_arpls) - np.array(reference_arpls)
|
61
|
-
|
62
|
-
# Act
|
63
|
-
spectrum_corrected = arpls.fit_transform(spectrum_arpls)
|
64
|
-
|
65
|
-
# Assert
|
66
|
-
assert np.allclose(spectrum_corrected[0], reference[0], atol=1e-4)
|
67
|
-
|
68
|
-
|
69
|
-
def test_baseline_shift():
|
70
|
-
# Arrange
|
71
|
-
spectrum = np.ones(100).reshape(1, -1)
|
72
|
-
baseline_shift = BaselineShift(scale=1, random_state=42)
|
73
|
-
|
74
|
-
# Act
|
75
|
-
spectrum_corrected = baseline_shift.fit_transform(spectrum)
|
76
|
-
|
77
|
-
# Assert
|
78
|
-
assert spectrum.shape == spectrum_corrected.shape
|
79
|
-
assert np.mean(spectrum_corrected[0]) > np.mean(spectrum[0])
|
80
|
-
assert np.isclose(np.std(spectrum_corrected[0]), 0.0, atol=1e-8)
|
81
|
-
assert np.isclose(np.mean(spectrum_corrected[0]) - np.mean(spectrum[0]), 0.77395605, atol=1e-8)
|
82
|
-
|
83
|
-
|
84
|
-
def test_constant_baseline_correction():
|
85
|
-
# Arrange
|
86
|
-
spectrum = np.array([1, 1, 1, 1, 1, 1, 1, 2, 2, 1]).reshape(1, -1)
|
87
|
-
constant_baseline_correction = ConstantBaselineCorrection(start=7, end=8)
|
88
|
-
|
89
|
-
# Act
|
90
|
-
spectrum_corrected = constant_baseline_correction.fit_transform(spectrum)
|
91
|
-
|
92
|
-
# Assert
|
93
|
-
expected = np.array([-1, -1, -1, -1, -1, -1, -1, 0, 0, -1])
|
94
|
-
assert np.allclose(spectrum_corrected[0], expected, atol=1e-8)
|
95
|
-
|
96
|
-
|
97
|
-
def test_constant_baseline_correction_with_wavenumbers():
|
98
|
-
# Arrange
|
99
|
-
spectrum = np.array([1, 1, 1, 1, 1, 1, 1, 2, 2, 1]).reshape(1, -1)
|
100
|
-
wavenumbers = np.array([2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
|
101
|
-
constant_baseline_correction = ConstantBaselineCorrection(
|
102
|
-
start=9, end=10, wavenumbers=wavenumbers
|
103
|
-
)
|
104
|
-
|
105
|
-
# Act
|
106
|
-
spectrum_corrected = constant_baseline_correction.fit_transform(spectrum)
|
107
|
-
|
108
|
-
# Assert
|
109
|
-
expected = np.array([-1, -1, -1, -1, -1, -1, -1, 0, 0, -1])
|
110
|
-
assert np.allclose(spectrum_corrected[0], expected, atol=1e-8)
|
111
|
-
|
112
|
-
|
113
|
-
def test_exponential_noise():
|
114
|
-
# Arrange
|
115
|
-
spectrum = np.ones(10000).reshape(1, -1)
|
116
|
-
exponential_noise = ExponentialNoise(scale=0.1, random_state=42)
|
117
|
-
|
118
|
-
# Act
|
119
|
-
spectrum_corrected = exponential_noise.fit_transform(spectrum)
|
120
|
-
|
121
|
-
# Assert
|
122
|
-
assert spectrum.shape == spectrum_corrected.shape
|
123
|
-
assert np.allclose(np.mean(spectrum_corrected[0])-1, 0.1, atol=1e-2)
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
def test_extended_baseline_correction():
|
128
|
-
# Arrange
|
129
|
-
spectrum = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]).reshape(
|
130
|
-
1, -1
|
131
|
-
)
|
132
|
-
reference = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
|
133
|
-
emsc = ExtendedMultiplicativeScatterCorrection(reference=reference)
|
134
|
-
|
135
|
-
# Act
|
136
|
-
spectrum_emsc = emsc.fit_transform(spectrum)
|
137
|
-
|
138
|
-
# Assert
|
139
|
-
assert np.allclose(spectrum_emsc[0], reference, atol=1e-8)
|
140
|
-
|
141
|
-
|
142
|
-
def test_extended_baseline_correction_with_weights():
|
143
|
-
# Arrange
|
144
|
-
spectrum = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]).reshape(
|
145
|
-
1, -1
|
146
|
-
)
|
147
|
-
reference = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
|
148
|
-
weights = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
|
149
|
-
emsc = ExtendedMultiplicativeScatterCorrection(reference=reference, weights=weights)
|
150
|
-
|
151
|
-
# Act
|
152
|
-
spectrum_emsc = emsc.fit_transform(spectrum)
|
153
|
-
|
154
|
-
# Assert
|
155
|
-
assert np.allclose(spectrum_emsc[0], reference, atol=1e-8)
|
156
|
-
|
157
|
-
|
158
|
-
def test_extended_baseline_correction_with_no_reference():
|
159
|
-
# Arrange
|
160
|
-
spectrum = np.array([1.0, 2.0, 3.0, 4.0, 5.0]).reshape(1, -1)
|
161
|
-
|
162
|
-
# Act
|
163
|
-
emsc = ExtendedMultiplicativeScatterCorrection(use_mean=False, use_median=False)
|
164
|
-
|
165
|
-
# Assert
|
166
|
-
with pytest.raises(ValueError):
|
167
|
-
emsc.fit_transform(spectrum)
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
def test_extended_baseline_correction_with_wrong_reference():
|
172
|
-
# Arrange
|
173
|
-
spectrum = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]).reshape(
|
174
|
-
1, -1
|
175
|
-
)
|
176
|
-
reference = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
|
177
|
-
|
178
|
-
# Act
|
179
|
-
emsc = ExtendedMultiplicativeScatterCorrection(reference=reference)
|
180
|
-
|
181
|
-
# Assert
|
182
|
-
with pytest.raises(ValueError):
|
183
|
-
emsc.fit_transform(spectrum)
|
184
|
-
|
185
|
-
|
186
|
-
def test_extended_baseline_correction_with_wrong_weights():
|
187
|
-
# Arrange
|
188
|
-
spectrum = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]).reshape(
|
189
|
-
1, -1
|
190
|
-
)
|
191
|
-
weights = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
|
192
|
-
|
193
|
-
# Act
|
194
|
-
emsc = ExtendedMultiplicativeScatterCorrection(weights=weights)
|
195
|
-
|
196
|
-
# Assert
|
197
|
-
with pytest.raises(ValueError):
|
198
|
-
emsc.fit_transform(spectrum)
|
199
|
-
|
200
|
-
|
201
|
-
def test_extended_baseline_correction_with_noreference_no_median_no_mean():
|
202
|
-
# Arrange
|
203
|
-
emsc = ExtendedMultiplicativeScatterCorrection(use_mean=False)
|
204
|
-
|
205
|
-
# Act & Assert
|
206
|
-
with pytest.raises(ValueError):
|
207
|
-
emsc.fit_transform(spectrum)
|
208
|
-
|
209
|
-
|
210
|
-
def test_extended_baseline_correction_through_msc(spectrum):
|
211
|
-
# EMSC of 0 order should be equivalient to MSC
|
212
|
-
# Arrange
|
213
|
-
msc = MultiplicativeScatterCorrection()
|
214
|
-
emsc = ExtendedMultiplicativeScatterCorrection(order=0)
|
215
|
-
|
216
|
-
# Act
|
217
|
-
spectrum_msc = msc.fit_transform(spectrum)
|
218
|
-
spectrum_emsc = emsc.fit_transform(spectrum)
|
219
|
-
|
220
|
-
# Assert
|
221
|
-
assert np.allclose(spectrum_emsc[0], spectrum_msc, atol=1e-8)
|
222
|
-
|
223
|
-
|
224
|
-
def test_extended_baseline_correction_through_msc_median(spectrum):
|
225
|
-
# EMSC of 0 order should be equivalient to MSC
|
226
|
-
# Arrange
|
227
|
-
msc = MultiplicativeScatterCorrection(use_median=True)
|
228
|
-
emsc = ExtendedMultiplicativeScatterCorrection(order=0, use_median=True)
|
229
|
-
|
230
|
-
# Act
|
231
|
-
spectrum_msc = msc.fit_transform(spectrum)
|
232
|
-
spectrum_emsc = emsc.fit_transform(spectrum)
|
233
|
-
|
234
|
-
# Assert
|
235
|
-
assert np.allclose(spectrum_emsc[0], spectrum_msc, atol=1e-8)
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
def test_index_selector():
|
240
|
-
# Arrange
|
241
|
-
spectrum = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
|
242
|
-
|
243
|
-
# Act
|
244
|
-
select_features = IndexSelector()
|
245
|
-
spectrum_corrected = select_features.fit_transform(spectrum)
|
246
|
-
|
247
|
-
# Assert
|
248
|
-
assert np.allclose(spectrum_corrected[0], spectrum[0], atol=1e-8)
|
249
|
-
|
250
|
-
|
251
|
-
def test_index_selector_with_index():
|
252
|
-
# Arrange
|
253
|
-
spectrum = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
|
254
|
-
expected = np.array([[1, 2, 3, 8, 9, 10]])
|
255
|
-
|
256
|
-
# Act
|
257
|
-
select_features = IndexSelector(features=np.array([0, 1, 2, 7, 8, 9]))
|
258
|
-
spectrum_corrected = select_features.fit_transform(spectrum)
|
259
|
-
|
260
|
-
# Assert
|
261
|
-
assert np.allclose(spectrum_corrected[0], expected, atol=1e-8)
|
262
|
-
|
263
|
-
|
264
|
-
def test_index_selector_with_wavenumbers():
|
265
|
-
# Arrange
|
266
|
-
wavenumbers = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
|
267
|
-
spectrum = np.array([[1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 21.0, 34.0, 55.0, 89.0]])
|
268
|
-
expected = np.array([[1.0, 2.0, 3.0, 34.0, 55.0, 89.0]])
|
269
|
-
|
270
|
-
# Act
|
271
|
-
select_features = IndexSelector(
|
272
|
-
features=np.array([1, 2, 3, 8, 9, 10]), wavenumbers=wavenumbers
|
273
|
-
)
|
274
|
-
spectrum_corrected = select_features.fit_transform(spectrum)
|
275
|
-
|
276
|
-
# Assert
|
277
|
-
assert np.allclose(spectrum_corrected[0], expected, atol=1e-8)
|
278
|
-
|
279
|
-
|
280
|
-
def test_index_selector_with_wavenumbers_and_dataframe():
|
281
|
-
# Arrange
|
282
|
-
wavenumbers = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
|
283
|
-
spectrum = pd.DataFrame(np.array([[1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 21.0, 34.0, 55.0, 89.0]]))
|
284
|
-
expected = np.array([[1.0, 2.0, 3.0, 34.0, 55.0, 89.0]])
|
285
|
-
|
286
|
-
# Act
|
287
|
-
select_features = IndexSelector(
|
288
|
-
features=np.array([1, 2, 3, 8, 9, 10]), wavenumbers=wavenumbers
|
289
|
-
).set_output(transform='pandas')
|
290
|
-
|
291
|
-
spectrum_corrected = select_features.fit_transform(spectrum)
|
292
|
-
|
293
|
-
# Assert
|
294
|
-
assert type(spectrum_corrected) == pd.DataFrame
|
295
|
-
|
296
|
-
|
297
|
-
def test_index_shift():
|
298
|
-
# Arrange
|
299
|
-
spectrum = np.array([[1, 1, 1, 1, 1, 2, 1, 1, 1, 1]])
|
300
|
-
spectrum_shift = IndexShift(shift=1, random_state=42)
|
301
|
-
|
302
|
-
# Act
|
303
|
-
spectrum_corrected = spectrum_shift.fit_transform(spectrum)
|
304
|
-
|
305
|
-
# Assert
|
306
|
-
assert spectrum_corrected[0][4] == 2
|
307
|
-
|
308
|
-
|
309
|
-
def test_l1_norm(spectrum):
|
310
|
-
# Arrange
|
311
|
-
norm = 1
|
312
|
-
l1_norm = NormScaler(l_norm=norm)
|
313
|
-
spectrum_norm = np.linalg.norm(spectrum[0], ord=norm)
|
314
|
-
|
315
|
-
# Act
|
316
|
-
spectrum_corrected = l1_norm.fit_transform(spectrum)
|
317
|
-
|
318
|
-
# Assert
|
319
|
-
assert np.allclose(spectrum_corrected[0], spectrum[0] / spectrum_norm, atol=1e-8)
|
320
|
-
|
321
|
-
|
322
|
-
def test_l2_norm(spectrum):
|
323
|
-
# Arrange
|
324
|
-
norm = 2
|
325
|
-
l1_norm = NormScaler(l_norm=norm)
|
326
|
-
spectrum_norm = np.linalg.norm(spectrum[0], ord=norm)
|
327
|
-
|
328
|
-
# Act
|
329
|
-
spectrum_corrected = l1_norm.fit_transform(spectrum)
|
330
|
-
|
331
|
-
# Assert
|
332
|
-
assert np.allclose(spectrum_corrected[0], spectrum[0] / spectrum_norm, atol=1e-8)
|
333
|
-
|
334
|
-
|
335
|
-
def test_linear_correction(spectrum):
|
336
|
-
# Arrange
|
337
|
-
linear_correction = LinearCorrection()
|
338
|
-
|
339
|
-
# Act
|
340
|
-
spectrum_corrected = linear_correction.fit_transform(spectrum)
|
341
|
-
|
342
|
-
# Assert
|
343
|
-
assert spectrum_corrected[0][0] == 0
|
344
|
-
assert spectrum_corrected[-1][0] == 0
|
345
|
-
|
346
|
-
|
347
|
-
def test_max_norm(spectrum):
|
348
|
-
# Arrange
|
349
|
-
max_norm = MinMaxScaler(use_min=False)
|
350
|
-
|
351
|
-
# Act
|
352
|
-
spectrum_corrected = max_norm.fit_transform(spectrum)
|
353
|
-
|
354
|
-
# Assert
|
355
|
-
assert np.allclose(
|
356
|
-
spectrum_corrected[0], spectrum[0] / np.max(spectrum[0]), atol=1e-8
|
357
|
-
)
|
358
|
-
|
359
|
-
|
360
|
-
def test_mean_filter():
|
361
|
-
# Arrange
|
362
|
-
array = np.array([[1.0, 2.0, 3.0, 4.0, 5.0]])
|
363
|
-
mean_filter = MeanFilter(window_size=2)
|
364
|
-
|
365
|
-
# Act
|
366
|
-
array_corrected = mean_filter.fit_transform(array)
|
367
|
-
|
368
|
-
# Assert
|
369
|
-
assert np.allclose(array_corrected[0], [1, 1.5, 2.5, 3.5, 4.5], atol=1e-8)
|
370
|
-
|
371
|
-
|
372
|
-
def test_median_filter():
|
373
|
-
# Arrange
|
374
|
-
array = np.array([[1.0, 2.0, 30.0, 4.0, 5.0]])
|
375
|
-
mean_filter = MedianFilter(window_size=3)
|
376
|
-
|
377
|
-
# Act
|
378
|
-
array_corrected = mean_filter.fit_transform(array)
|
379
|
-
|
380
|
-
# Assert
|
381
|
-
assert np.allclose(array_corrected[0], [1, 2.0, 4.0, 5.0, 5.0], atol=1e-8)
|
382
|
-
|
383
|
-
|
384
|
-
def test_min_norm(spectrum):
|
385
|
-
# Arrange
|
386
|
-
min_norm = MinMaxScaler()
|
387
|
-
|
388
|
-
# Act
|
389
|
-
spectrum_corrected = min_norm.fit_transform(spectrum)
|
390
|
-
|
391
|
-
# Assert
|
392
|
-
assert np.allclose(
|
393
|
-
spectrum_corrected[0],
|
394
|
-
(spectrum[0] - np.min(spectrum[0]))
|
395
|
-
/ (np.max(spectrum[0]) - np.min(spectrum[0])),
|
396
|
-
atol=1e-8,
|
397
|
-
)
|
398
|
-
|
399
|
-
|
400
|
-
def test_multiplicative_scatter_correction_mean(spectrum, reference_msc_mean):
|
401
|
-
# Arrange
|
402
|
-
msc = MultiplicativeScatterCorrection()
|
403
|
-
|
404
|
-
# Act
|
405
|
-
spectrum_corrected = msc.fit_transform(spectrum)
|
406
|
-
|
407
|
-
# Assert
|
408
|
-
assert np.allclose(spectrum_corrected[0], reference_msc_mean[0], atol=1e-8)
|
409
|
-
|
410
|
-
|
411
|
-
def test_multiplicative_scatter_correction_with_reference(spectrum, reference_msc_mean):
|
412
|
-
# Arrange
|
413
|
-
msc = MultiplicativeScatterCorrection(reference=reference_msc_mean[0])
|
414
|
-
|
415
|
-
# Act
|
416
|
-
spectrum_corrected = msc.fit_transform(spectrum)
|
417
|
-
|
418
|
-
# Assert
|
419
|
-
assert np.allclose(spectrum_corrected[0], reference_msc_mean[0], atol=1e-8)
|
420
|
-
|
421
|
-
|
422
|
-
def test_multiplicative_scatter_correction_median(spectrum, reference_msc_median):
|
423
|
-
# Arrange
|
424
|
-
msc = MultiplicativeScatterCorrection(use_median=True)
|
425
|
-
|
426
|
-
# Act
|
427
|
-
spectrum_corrected = msc.fit_transform(spectrum)
|
428
|
-
|
429
|
-
# Assert
|
430
|
-
assert np.allclose(spectrum_corrected[0], reference_msc_median[0], atol=1e-8)
|
431
|
-
|
432
|
-
|
433
|
-
def test_multiplicative_scatter_correction_with_reference_median(
|
434
|
-
spectrum, reference_msc_median
|
435
|
-
):
|
436
|
-
# Arrange
|
437
|
-
msc = MultiplicativeScatterCorrection(
|
438
|
-
reference=reference_msc_median[0], use_median=True
|
439
|
-
)
|
440
|
-
|
441
|
-
# Act
|
442
|
-
spectrum_corrected = msc.fit_transform(spectrum)
|
443
|
-
|
444
|
-
# Assert
|
445
|
-
assert np.allclose(spectrum_corrected[0], reference_msc_median[0], atol=1e-8)
|
446
|
-
|
447
|
-
|
448
|
-
def test_multiplicative_scatter_correction_with_weights(spectrum, reference_msc_mean):
|
449
|
-
# Arrange
|
450
|
-
weights = np.ones(len(spectrum[0]))
|
451
|
-
|
452
|
-
msc = MultiplicativeScatterCorrection(weights=weights)
|
453
|
-
|
454
|
-
# Act
|
455
|
-
spectrum_corrected = msc.fit_transform(spectrum)
|
456
|
-
|
457
|
-
# Assert
|
458
|
-
assert np.allclose(spectrum_corrected[0], reference_msc_mean[0], atol=1e-8)
|
459
|
-
|
460
|
-
|
461
|
-
def test_multiplicative_scatter_correction_with_wrong_weights(
|
462
|
-
spectrum, reference_msc_mean
|
463
|
-
):
|
464
|
-
# Arrange
|
465
|
-
weights = np.ones(10)
|
466
|
-
msc = MultiplicativeScatterCorrection(weights=weights)
|
467
|
-
|
468
|
-
# Act & Assert
|
469
|
-
with pytest.raises(ValueError):
|
470
|
-
msc.fit_transform(spectrum)
|
471
|
-
|
472
|
-
|
473
|
-
def test_multiplicative_scatter_correction_with_wrong_reference(spectrum):
|
474
|
-
# Arrange
|
475
|
-
reference = np.ones(10)
|
476
|
-
msc = MultiplicativeScatterCorrection(reference=reference)
|
477
|
-
|
478
|
-
# Act & Assert
|
479
|
-
with pytest.raises(ValueError):
|
480
|
-
msc.fit_transform(spectrum)
|
481
|
-
|
482
|
-
|
483
|
-
def test_multiplicative_scatter_correction_no_mean_no_median_no_reference(spectrum):
|
484
|
-
# Arrange
|
485
|
-
reference = np.ones(10)
|
486
|
-
msc = MultiplicativeScatterCorrection(use_mean=False)
|
487
|
-
|
488
|
-
# Act & Assert
|
489
|
-
with pytest.raises(ValueError):
|
490
|
-
msc.fit_transform(spectrum)
|
491
|
-
|
492
|
-
|
493
|
-
def test_non_negative_zeroes():
|
494
|
-
# Arrange
|
495
|
-
spectrum = np.array([[-1, 0, 1]])
|
496
|
-
non_negative = NonNegative(mode="zero")
|
497
|
-
|
498
|
-
# Act
|
499
|
-
spectrum_corrected = non_negative.fit_transform(spectrum)
|
500
|
-
|
501
|
-
# Assert
|
502
|
-
assert np.allclose(spectrum_corrected[0], [0, 0, 1], atol=1e-8)
|
503
|
-
|
504
|
-
|
505
|
-
def test_non_negative_absolute():
|
506
|
-
# Arrange
|
507
|
-
spectrum = np.array([[-1, 0, 1]])
|
508
|
-
non_negative = NonNegative(mode="abs")
|
509
|
-
|
510
|
-
# Act
|
511
|
-
spectrum_corrected = non_negative.fit_transform(spectrum)
|
512
|
-
|
513
|
-
# Assert
|
514
|
-
assert np.allclose(spectrum_corrected[0], [1, 0, 1], atol=1e-8)
|
515
|
-
|
516
|
-
|
517
|
-
def test_normal_noise():
|
518
|
-
# Arrange
|
519
|
-
spectrum = np.ones(10000).reshape(1, -1)
|
520
|
-
normal_noise = NormalNoise(scale=0.5, random_state=42)
|
521
|
-
|
522
|
-
# Act
|
523
|
-
spectrum_corrected = normal_noise.fit_transform(spectrum)
|
524
|
-
|
525
|
-
# Assert
|
526
|
-
assert spectrum.shape == spectrum_corrected.shape
|
527
|
-
assert np.allclose(np.mean(spectrum_corrected[0])-1, 0, atol=1e-2)
|
528
|
-
assert np.allclose(np.std(spectrum_corrected[0]), 0.5, atol=1e-2)
|
529
|
-
|
530
|
-
|
531
|
-
def test_norris_williams_filter_1():
|
532
|
-
# Arrange
|
533
|
-
norris_williams_filter = NorrisWilliams()
|
534
|
-
array = np.ones((1, 10)).reshape(1, -1)
|
535
|
-
|
536
|
-
# Act
|
537
|
-
spectrum_corrected = norris_williams_filter.fit_transform(array)
|
538
|
-
|
539
|
-
# Assert
|
540
|
-
assert np.allclose(spectrum_corrected[0], np.zeros((1, 10)), atol=1e-2)
|
541
|
-
|
542
|
-
|
543
|
-
def test_norris_williams_filter_2():
|
544
|
-
# Arrange
|
545
|
-
norris_williams_filter = NorrisWilliams(derivative_order=2)
|
546
|
-
array = np.ones((1, 10)).reshape(1, -1)
|
547
|
-
|
548
|
-
# Act
|
549
|
-
spectrum_corrected = norris_williams_filter.fit_transform(array)
|
550
|
-
|
551
|
-
# Assert
|
552
|
-
assert np.allclose(spectrum_corrected[0], np.zeros((1, 10)), atol=1e-2)
|
553
|
-
|
554
|
-
|
555
|
-
def test_norris_williams_wrong_filter():
|
556
|
-
# Arrange
|
557
|
-
norris_williams_filter = NorrisWilliams(derivative_order=5)
|
558
|
-
array = np.ones((1, 10)).reshape(1, -1)
|
559
|
-
|
560
|
-
# Act & Assert
|
561
|
-
|
562
|
-
with pytest.raises(ValueError):
|
563
|
-
norris_williams_filter.fit_transform(array)
|
564
|
-
|
565
|
-
|
566
|
-
def test_point_scaler(spectrum):
|
567
|
-
# Arrange
|
568
|
-
index_scaler = PointScaler(point=0)
|
569
|
-
reference_spectrum = [value / spectrum[0][0] for value in spectrum[0]]
|
570
|
-
|
571
|
-
# Act
|
572
|
-
spectrum_corrected = index_scaler.fit_transform(spectrum)
|
573
|
-
|
574
|
-
# Assert
|
575
|
-
assert np.allclose(spectrum_corrected[0], reference_spectrum, atol=1e-8)
|
576
|
-
|
577
|
-
|
578
|
-
def test_point_scaler_with_wavenumbers():
|
579
|
-
# Arrange
|
580
|
-
wavenumbers = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
|
581
|
-
spectrum = np.array([[10.0, 12.0, 14.0, 16.0, 14.0, 12.0, 10.0, 12.0, 14.0, 16.0]])
|
582
|
-
|
583
|
-
# Act
|
584
|
-
index_scaler = PointScaler(point=4, wavenumbers=wavenumbers)
|
585
|
-
spectrum_corrected = index_scaler.fit_transform(spectrum)
|
586
|
-
|
587
|
-
# Assert
|
588
|
-
assert np.allclose(spectrum_corrected[0], spectrum[0] / spectrum[0][3], atol=1e-8)
|
589
|
-
|
590
|
-
|
591
|
-
def test_range_cut_by_index(spectrum):
|
592
|
-
# Arrange
|
593
|
-
range_cut = RangeCut(start=0, end=10)
|
594
|
-
|
595
|
-
# Act
|
596
|
-
spectrum_corrected = range_cut.fit_transform(spectrum)
|
597
|
-
|
598
|
-
# Assert
|
599
|
-
assert np.allclose(spectrum_corrected[0], spectrum[0][:10], atol=1e-8)
|
600
|
-
|
601
|
-
|
602
|
-
def test_range_cut_by_wavenumber():
|
603
|
-
# Arrange
|
604
|
-
wavenumbers = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
|
605
|
-
spectrum = np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]])
|
606
|
-
range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers)
|
607
|
-
|
608
|
-
# Act
|
609
|
-
spectrum_corrected = range_cut.fit_transform(spectrum)
|
610
|
-
|
611
|
-
# Assert
|
612
|
-
assert np.allclose(spectrum_corrected[0], spectrum[0][1:7], atol=1e-8)
|
613
|
-
|
614
|
-
|
615
|
-
def test_range_cut_by_wavenumber_with_list():
|
616
|
-
# Arrange
|
617
|
-
wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
618
|
-
spectrum = np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]])
|
619
|
-
range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers)
|
620
|
-
|
621
|
-
# Act
|
622
|
-
spectrum_corrected = range_cut.fit_transform(spectrum)
|
623
|
-
|
624
|
-
# Assert
|
625
|
-
assert np.allclose(spectrum_corrected[0], spectrum[0][1:7], atol=1e-8)
|
626
|
-
assert range_cut.wavenumbers_ == [2, 3, 4, 5, 6, 7]
|
627
|
-
|
628
|
-
|
629
|
-
def test_range_cut_by_wavenumber_with_pandas_dataframe():
|
630
|
-
# Arrange
|
631
|
-
wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
632
|
-
spectrum = pd.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]]))
|
633
|
-
range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output(transform='pandas')
|
634
|
-
|
635
|
-
# Act
|
636
|
-
spectrum_corrected = range_cut.fit_transform(spectrum)
|
637
|
-
|
638
|
-
# Assert
|
639
|
-
assert type(spectrum_corrected) == pd.DataFrame
|
640
|
-
|
641
|
-
|
642
|
-
def test_range_cut_by_wavenumber_with_polars_dataframe():
|
643
|
-
# Arrange
|
644
|
-
wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
645
|
-
spectrum = pl.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]]))
|
646
|
-
range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output(transform='polars')
|
647
|
-
|
648
|
-
# Act
|
649
|
-
spectrum_corrected = range_cut.fit_transform(spectrum)
|
650
|
-
|
651
|
-
# Assert
|
652
|
-
assert type(spectrum_corrected) == pl.DataFrame
|
653
|
-
|
654
|
-
|
655
|
-
def test_robust_normal_variate():
|
656
|
-
# Arrange
|
657
|
-
spectrum = np.array([2, 3.5, 5, 27, 8, 9]).reshape(1, -1)
|
658
|
-
reference = np.array([-2.5, -0.5, 1.5, 30.833333, 5.5, 6.83333333])
|
659
|
-
rnv = RobustNormalVariate()
|
660
|
-
|
661
|
-
# Act
|
662
|
-
spectrum_corrected = rnv.fit_transform(spectrum)
|
663
|
-
|
664
|
-
# Assert
|
665
|
-
assert np.allclose(spectrum_corrected[0], reference, atol=1e-8)
|
666
|
-
|
667
|
-
|
668
|
-
def test_savizky_golay_filter_1(spectrum, reference_sg_15_2):
|
669
|
-
# Arrange
|
670
|
-
savitzky_golay_filter = SavitzkyGolay(
|
671
|
-
window_size=15, polynomial_order=2, derivate_order=1, mode="interp"
|
672
|
-
)
|
673
|
-
|
674
|
-
# Act
|
675
|
-
spectrum_corrected = savitzky_golay_filter.fit_transform(spectrum)
|
676
|
-
|
677
|
-
# Assert
|
678
|
-
assert np.allclose(spectrum_corrected[0], reference_sg_15_2[0], atol=1e-2)
|
679
|
-
|
680
|
-
|
681
|
-
def test_saviszky_golay_filter_2():
|
682
|
-
# Arrange
|
683
|
-
savitzky_golay_filter = SavitzkyGolay(
|
684
|
-
window_size=3, polynomial_order=2, derivate_order=1, mode="interp"
|
685
|
-
)
|
686
|
-
|
687
|
-
array = np.ones((1, 10)).reshape(1, -1)
|
688
|
-
|
689
|
-
# Act
|
690
|
-
spectrum_corrected = savitzky_golay_filter.fit_transform(array)
|
691
|
-
|
692
|
-
# Assert
|
693
|
-
assert np.allclose(spectrum_corrected[0], np.zeros((1, 10)), atol=1e-2)
|
694
|
-
|
695
|
-
|
696
|
-
def test_saviszky_golay_filter_3():
|
697
|
-
# Arrange
|
698
|
-
savitzky_golay_filter = SavitzkyGolay(
|
699
|
-
window_size=3, polynomial_order=2, derivate_order=1, mode="interp"
|
700
|
-
)
|
701
|
-
|
702
|
-
array = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]).reshape(1, -1)
|
703
|
-
|
704
|
-
# Act
|
705
|
-
spectrum_corrected = savitzky_golay_filter.fit_transform(array)
|
706
|
-
|
707
|
-
# Assert
|
708
|
-
assert np.allclose(spectrum_corrected[0], np.ones((1, 10)), atol=1e-2)
|
709
|
-
|
710
|
-
|
711
|
-
def test_spectrum_scale(spectrum):
|
712
|
-
# Arrange
|
713
|
-
spectrum_scale = SpectrumScale(scale=0.01, random_state=42)
|
714
|
-
|
715
|
-
# Act
|
716
|
-
spectrum_corrected = spectrum_scale.fit_transform(spectrum)
|
717
|
-
|
718
|
-
# Assert
|
719
|
-
assert np.allclose(spectrum_corrected[0], spectrum[0], atol=0.01)
|
720
|
-
|
721
|
-
|
722
|
-
def test_standard_normal_variate(spectrum, reference_snv):
|
723
|
-
# Arrange
|
724
|
-
snv = StandardNormalVariate()
|
725
|
-
|
726
|
-
# Act
|
727
|
-
spectrum_corrected = snv.fit_transform(spectrum)
|
728
|
-
|
729
|
-
# Assert
|
730
|
-
assert np.allclose(spectrum_corrected[0], reference_snv[0], atol=1e-2)
|
731
|
-
|
732
|
-
|
733
|
-
def test_subtract_reference(spectrum):
|
734
|
-
# Arrange
|
735
|
-
baseline = SubtractReference(reference=spectrum)
|
736
|
-
|
737
|
-
# Act
|
738
|
-
spectrum_corrected = baseline.fit_transform(spectrum)
|
739
|
-
|
740
|
-
# Assert
|
741
|
-
assert np.allclose(spectrum_corrected[0], np.zeros(len(spectrum)), atol=1e-8)
|
742
|
-
|
743
|
-
|
744
|
-
def test_subtract_reference_without_reference(spectrum):
|
745
|
-
# Arrange
|
746
|
-
baseline = SubtractReference()
|
747
|
-
|
748
|
-
# Act
|
749
|
-
spectrum_corrected = baseline.fit_transform(spectrum)
|
750
|
-
|
751
|
-
# Assert
|
752
|
-
assert np.allclose(spectrum_corrected[0], spectrum, atol=1e-8)
|
753
|
-
|
754
|
-
|
755
|
-
def test_uniform_noise():
|
756
|
-
# Arrange
|
757
|
-
spectrum = np.ones(10000).reshape(1, -1)
|
758
|
-
uniform_noise = UniformNoise(min=-1, max=1, random_state=42)
|
759
|
-
|
760
|
-
# Act
|
761
|
-
spectrum_corrected = uniform_noise.fit_transform(spectrum)
|
762
|
-
|
763
|
-
# Assert
|
764
|
-
assert spectrum.shape == spectrum_corrected.shape
|
765
|
-
assert np.allclose(np.mean(spectrum_corrected[0])-1, 0, atol=1e-2)
|
766
|
-
assert np.allclose(np.std(spectrum_corrected[0]), np.sqrt(1/3), atol=1e-2)
|
767
|
-
|
768
|
-
|
769
|
-
def test_whitakker_smooth(spectrum, reference_whitakker):
|
770
|
-
# Arrange
|
771
|
-
whitakker_smooth = WhittakerSmooth()
|
772
|
-
|
773
|
-
# Act
|
774
|
-
spectrum_corrected = whitakker_smooth.fit_transform(spectrum)
|
775
|
-
|
776
|
-
# Assert
|
777
|
-
assert np.allclose(spectrum_corrected[0], reference_whitakker[0], atol=1e-8)
|