chemotools 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. chemotools/{augmenation → augmentation}/baseline_shift.py +1 -1
  2. chemotools/{augmenation → augmentation}/index_shift.py +4 -3
  3. chemotools/baseline/__init__.py +8 -8
  4. chemotools/baseline/{air_pls.py → _air_pls.py} +5 -19
  5. chemotools/baseline/{ar_pls.py → _ar_pls.py} +2 -15
  6. chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +6 -15
  7. chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +11 -13
  8. chemotools/baseline/{linear_correction.py → _linear_correction.py} +2 -19
  9. chemotools/baseline/{non_negative.py → _non_negative.py} +2 -16
  10. chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +13 -23
  11. chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +7 -19
  12. chemotools/derivative/__init__.py +2 -2
  13. chemotools/derivative/{norris_william.py → _norris_william.py} +4 -17
  14. chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +2 -16
  15. chemotools/feature_selection/__init__.py +2 -0
  16. chemotools/{variable_selection/select_features.py → feature_selection/_index_selector.py} +17 -42
  17. chemotools/{variable_selection/range_cut.py → feature_selection/_range_cut.py} +15 -44
  18. chemotools/scale/__init__.py +3 -3
  19. chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +7 -20
  20. chemotools/scale/{norm_scaler.py → _norm_scaler.py} +5 -18
  21. chemotools/scale/{point_scaler.py → _point_scaler.py} +11 -22
  22. chemotools/scatter/__init__.py +4 -4
  23. chemotools/scatter/{extended_multiplicative_scatter_correction.py → _extended_multiplicative_scatter_correction.py} +2 -10
  24. chemotools/scatter/{multiplicative_scatter_correction.py → _multiplicative_scatter_correction.py} +2 -8
  25. chemotools/scatter/{robust_normal_variate.py → _robust_normal_variate.py} +2 -16
  26. chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +8 -19
  27. chemotools/smooth/__init__.py +4 -4
  28. chemotools/smooth/{mean_filter.py → _mean_filter.py} +5 -18
  29. chemotools/smooth/{median_filter.py → _median_filter.py} +2 -16
  30. chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +3 -16
  31. chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +5 -19
  32. {chemotools-0.1.1.dist-info → chemotools-0.1.3.dist-info}/METADATA +1 -1
  33. chemotools-0.1.3.dist-info/RECORD +58 -0
  34. {chemotools-0.1.1.dist-info → chemotools-0.1.3.dist-info}/WHEEL +1 -1
  35. tests/test_functionality.py +88 -56
  36. tests/test_sklearn_compliance.py +26 -25
  37. chemotools/augmenation/spectrum_shift.py +0 -110
  38. chemotools/variable_selection/__init__.py +0 -2
  39. chemotools-0.1.1.dist-info/RECORD +0 -59
  40. /chemotools/{augmenation → augmentation}/__init__.py +0 -0
  41. /chemotools/{augmenation → augmentation}/exponential_noise.py +0 -0
  42. /chemotools/{augmenation → augmentation}/normal_noise.py +0 -0
  43. /chemotools/{augmenation → augmentation}/spectrum_scale.py +0 -0
  44. /chemotools/{augmenation → augmentation}/uniform_noise.py +0 -0
  45. {chemotools-0.1.1.dist-info → chemotools-0.1.3.dist-info}/LICENSE +0 -0
  46. {chemotools-0.1.1.dist-info → chemotools-0.1.3.dist-info}/top_level.txt +0 -0
@@ -19,14 +19,6 @@ class MedianFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
19
19
  The mode to use for the median filter. Can be "nearest", "constant", "reflect",
20
20
  "wrap", "mirror" or "interp". Default is "nearest".
21
21
 
22
- Attributes
23
- ----------
24
- n_features_in_ : int
25
- The number of features in the input data.
26
-
27
- _is_fitted : bool
28
- Whether the transformer has been fitted to data.
29
-
30
22
  Methods
31
23
  -------
32
24
  fit(X, y=None)
@@ -57,13 +49,7 @@ class MedianFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
57
49
  The fitted transformer.
58
50
  """
59
51
  # Check that X is a 2D array and has only finite values
60
- X = check_input(X)
61
-
62
- # Set the number of features
63
- self.n_features_in_ = X.shape[1]
64
-
65
- # Set the fitted attribute to True
66
- self._is_fitted = True
52
+ X = self._validate_data(X)
67
53
 
68
54
  return self
69
55
 
@@ -85,7 +71,7 @@ class MedianFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
85
71
  The transformed data.
86
72
  """
87
73
  # Check that the estimator is fitted
88
- check_is_fitted(self, "_is_fitted")
74
+ check_is_fitted(self, "n_features_in_")
89
75
 
90
76
  # Check that X is a 2D array and has only finite values
91
77
  X = check_input(X)
@@ -24,14 +24,6 @@ class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
24
24
  The mode to use for the Savitzky-Golay filter. Can be "nearest", "constant",
25
25
  "reflect", "wrap", "mirror" or "interp". Default is "nearest".
26
26
 
27
- Attributes
28
- ----------
29
- n_features_in_ : int
30
- The number of features in the input data.
31
-
32
- _is_fitted : bool
33
- Whether the transformer has been fitted to data.
34
-
35
27
  Methods
36
28
  -------
37
29
  fit(X, y=None)
@@ -40,6 +32,7 @@ class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
40
32
  transform(X, y=0, copy=True)
41
33
  Transform the input data by calculating the Savitzky-Golay filter.
42
34
  """
35
+
43
36
  def __init__(
44
37
  self, window_size: int = 3, polynomial_order: int = 1, mode: str = "nearest"
45
38
  ) -> None:
@@ -65,13 +58,7 @@ class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
65
58
  The fitted transformer.
66
59
  """
67
60
  # Check that X is a 2D array and has only finite values
68
- X = check_input(X)
69
-
70
- # Set the number of features
71
- self.n_features_in_ = X.shape[1]
72
-
73
- # Set the fitted attribute to True
74
- self._is_fitted = True
61
+ self._validate_data(X)
75
62
 
76
63
  return self
77
64
 
@@ -93,7 +80,7 @@ class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
93
80
  The transformed data.
94
81
  """
95
82
  # Check that the estimator is fitted
96
- check_is_fitted(self, "_is_fitted")
83
+ check_is_fitted(self, "n_features_in_")
97
84
 
98
85
  # Check that X is a 2D array and has only finite values
99
86
  X = check_input(X)
@@ -24,14 +24,6 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
24
24
  differences : int, optional
25
25
  The number of differences to use for the Whittaker smooth. Default is 1.
26
26
 
27
- Attributes
28
- ----------
29
- n_features_in_ : int
30
- The number of features in the input data.
31
-
32
- _is_fitted : bool
33
- Whether the transformer has been fitted to data.
34
-
35
27
  Methods
36
28
  -------
37
29
  fit(X, y=None)
@@ -66,13 +58,7 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
66
58
  The fitted transformer.
67
59
  """
68
60
  # Check that X is a 2D array and has only finite values
69
- X = check_input(X)
70
-
71
- # Set the number of features
72
- self.n_features_in_ = X.shape[1]
73
-
74
- # Set the fitted attribute to True
75
- self._is_fitted = True
61
+ X = self._validate_data(X)
76
62
 
77
63
  return self
78
64
 
@@ -94,7 +80,7 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
94
80
  The transformed data.
95
81
  """
96
82
  # Check that the estimator is fitted
97
- check_is_fitted(self, "_is_fitted")
83
+ check_is_fitted(self, "n_features_in_")
98
84
 
99
85
  # Check that X is a 2D array and has only finite values
100
86
  X = check_input(X)
@@ -113,14 +99,14 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
113
99
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
114
100
 
115
101
  def _calculate_whittaker_smooth(self, x):
116
- X = np.matrix(x)
102
+ X = np.array(x)
117
103
  m = X.size
118
104
  E = eye(m, format="csc")
119
105
  w = np.ones(m)
120
106
  for i in range(self.differences):
121
107
  E = E[1:] - E[:-1]
122
108
  W = diags(w, 0, shape=(m, m))
123
- A = csc_matrix(W + (self.lam * E.T * E))
124
- B = csc_matrix(W * X.T)
109
+ A = csc_matrix(W + (self.lam * E.T @ E))
110
+ B = csc_matrix(W @ X.T).toarray().ravel()
125
111
  background = spsolve(A, B)
126
112
  return np.array(background)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: chemotools
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Package to integrate chemometrics in scikit-learn pipelines
5
5
  Home-page: https://github.com/paucablop/chemotools
6
6
  Author: Pau Cabaneros Lopez
@@ -0,0 +1,58 @@
1
+ chemotools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ chemotools/augmentation/__init__.py,sha256=LiYw-QE-cxiYY0ua4SOgL0sC_-uAjkykkcj7gRP8Mic,246
3
+ chemotools/augmentation/baseline_shift.py,sha256=Zs0-3zHWaK26f2qGBRRMxA-q6FPxPG00g-8sHe61UAc,3213
4
+ chemotools/augmentation/exponential_noise.py,sha256=X2HTpL9zoiu0cFq3VsTxS3x_IO_tA_DF2vJyKgh4_UA,3082
5
+ chemotools/augmentation/index_shift.py,sha256=7ujZ_sz4mWEUJMDCHyaLxhTZ5-_K3nQPwtk6y6SLR9Q,3198
6
+ chemotools/augmentation/normal_noise.py,sha256=NmzTuIJKyk6tfDJgmeX9iAzsKlJJk3984tS8nLLG9dg,3051
7
+ chemotools/augmentation/spectrum_scale.py,sha256=WgMw_bCxWbyAYgYBO3q4PbbzcTDyBvVD73kxPfj3cdY,3174
8
+ chemotools/augmentation/uniform_noise.py,sha256=gc0WdREItRiPHjNiZg79n0yK6bfntXkcImrEjkoRdus,3180
9
+ chemotools/baseline/__init__.py,sha256=LFhsmzqv9RYxDS5-vK9jIf3ArNUSZ6yOF4SeUyVF6iA,381
10
+ chemotools/baseline/_air_pls.py,sha256=bYAjemEWZr7oiYJegO0r5gtO16zr0BdJYjmEikA1yBc,5116
11
+ chemotools/baseline/_ar_pls.py,sha256=tZi-89GMIStZUufz9AXVHU6TC1J6fAX4M1rAaIqgSvE,4431
12
+ chemotools/baseline/_constant_baseline_correction.py,sha256=oxxzgCtnSHTEb9QczrxsmcHLtvCoKj6IQrH4M_5yNfw,3898
13
+ chemotools/baseline/_cubic_spline_correction.py,sha256=pHpRdD6oVnn4BRg9CumlPJdAikG076kGjCU8mkMNpgw,3187
14
+ chemotools/baseline/_linear_correction.py,sha256=DJow940emZQdcAKpCrkp7l5wyTYURLkr-hhHU6Pzlgw,3022
15
+ chemotools/baseline/_non_negative.py,sha256=SyiS_-cfnypLXY3gC80oo7doqXUlHAAgmwrkRN4iNX8,2536
16
+ chemotools/baseline/_polynomial_correction.py,sha256=0w9qA_w5dc9IIv5KMmAOZ06hWDuk-uyealsTaZX2qgw,3749
17
+ chemotools/baseline/_subtract_reference.py,sha256=vfre6Z-bgDCwwl3VnpahmGJTBFJVK9HGBrUsjfl2O9o,3135
18
+ chemotools/datasets/__init__.py,sha256=ojqxb-C_eDmizwUqVCJ8BqJxwULD7_hWCyVIA1uRO0c,116
19
+ chemotools/datasets/_base.py,sha256=Z174CaIlpx17Yu8Pg1qZPuHWkS3BYWn7gtOYsoe8zNk,2895
20
+ chemotools/datasets/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ chemotools/datasets/data/coffee_labels.csv,sha256=ZXQWQIf8faLHjdnHfRoXfxMR56kq9Q1BGPZBkQyhGlY,487
22
+ chemotools/datasets/data/coffee_spectra.csv,sha256=VA-sN4u0hC5iALlRxxkj-K87Lz3b3mmUHBJPoDXychI,2206147
23
+ chemotools/datasets/data/fermentation_hplc.csv,sha256=AMmiFQxwaXrH8aN310-3h1YQDiDrT8JNRv1RDvhEvg4,2140
24
+ chemotools/datasets/data/fermentation_spectra.csv,sha256=MaaNMQP0lygJgFbEoUX0OUqdA-id8mF5Llvf_vj9tJk,15237508
25
+ chemotools/datasets/data/train_hplc.csv,sha256=DjtmqiePOWB-F6TsOGFngE1pKyXkb7Xmsi-1CLxsTnE,249
26
+ chemotools/datasets/data/train_spectra.csv,sha256=iVF19W52NHlbqq8BbLomn8n47kSPT0QxJv7wtQX4yjQ,203244
27
+ chemotools/derivative/__init__.py,sha256=a9RAUYDG4C8VNJBbirRCpslKjEcKfRxUtSa39c3gp1s,86
28
+ chemotools/derivative/_norris_william.py,sha256=NKmuo95vNWHQOdcww7APU9Z4s1wWExIRaj9O2Xrx8Bs,4753
29
+ chemotools/derivative/_savitzky_golay.py,sha256=5At4sexJH0RvjkrvVfJvhIfaxXD3vE4Ozq1VClb3qlU,3417
30
+ chemotools/feature_selection/__init__.py,sha256=p47SuyI7jMpV7kiaAsv2hA20smKf5Yo6447LfrNdDhY,76
31
+ chemotools/feature_selection/_index_selector.py,sha256=2z2aAyMUOuP7x1n19RV5JGf6ZcM3mtJZby8tEgBOix4,3379
32
+ chemotools/feature_selection/_range_cut.py,sha256=HI2OoeQYNph9uBICSA1cF2C_u-0UjTf0FDv5093tTnU,3223
33
+ chemotools/scale/__init__.py,sha256=CQPUPx-8pUeHHbN9p5smFro3xtl_UEE0YeXHLVd7Lfk,118
34
+ chemotools/scale/_min_max_scaler.py,sha256=-Wnr7zW-zmW6nR5J5yPdBm1KNuQDa9w27Un7rAr-s8E,2806
35
+ chemotools/scale/_norm_scaler.py,sha256=bjMg1-x2I1xZmmbIgl4vXZZweJV-w3Euta0KGff_2Gk,2363
36
+ chemotools/scale/_point_scaler.py,sha256=u2QELIHF35TReMk3RzXliacNPEAZJmVrjjJy9Rmn1q0,3256
37
+ chemotools/scatter/__init__.py,sha256=-Zs5HBpPL3NaO25n8gh0JZI8f5z88cnt-kVFYT3s3a8,292
38
+ chemotools/scatter/_extended_multiplicative_scatter_correction.py,sha256=SbTEMOPl3oWrzqIvYeVLrFhJKgPH9Ra32RO7OvzLJ00,6692
39
+ chemotools/scatter/_multiplicative_scatter_correction.py,sha256=ZQaypqJhjmqSqW_f7SB_8qJxaHax1Jmz3hAs5fOves4,5547
40
+ chemotools/scatter/_robust_normal_variate.py,sha256=DXHTVGx7rXRwoi-DDULN1CjA4gKv8dQDQ8giJ9X3oZs,2905
41
+ chemotools/scatter/_standard_normal_variate.py,sha256=Q4Cr8aMp5u9pOSDFKM7NIRU5BSRbY7C2A_kDeNcOl4I,2377
42
+ chemotools/smooth/__init__.py,sha256=x-QksF-Z_TIIRDR1EZMf44G0K1Fn7plofsufyaIwuvw,180
43
+ chemotools/smooth/_mean_filter.py,sha256=D-v_GaNgAWxb2NTESVmAcSi-Nqw045hCvJRKLb5ksuc,2622
44
+ chemotools/smooth/_median_filter.py,sha256=tDp_8JK2n9yVKeznf47vaYs8UTOt3D3p1f6PJpZpqy4,2638
45
+ chemotools/smooth/_savitzky_golay_filter.py,sha256=gNIu7drl-Drb5WK0gBRlLu7AY_JHDIiiEDAEEAZJ8M4,3192
46
+ chemotools/smooth/_whittaker_smooth.py,sha256=w9ZecU3A2SM0cWSGGGmYutE0KGpNgzln7w7ocao3nnU,3353
47
+ chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
+ chemotools/utils/check_inputs.py,sha256=fRAV4HIaGamdj_PNXSNnl7LurXytACNTGO51rhPpMUY,512
49
+ tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
+ tests/fixtures.py,sha256=Xa-Vd62Kd1fyWg3PLUSP6iIkOK8etrbyOkMJTn3dvX8,1933
51
+ tests/test_datasets.py,sha256=_3mMDYC-vUnb5BenMqvuhmkHI2PPIdsyq_nNu2ggH20,1055
52
+ tests/test_functionality.py,sha256=UhOYEShJZJOwROjNMf3UtXl5MrQBeQQbEMEt0ph7yQ0,21182
53
+ tests/test_sklearn_compliance.py,sha256=CRB_0X9HRGj0pOpUCmiSHwJkCsVB-yK_apsyUONmfmw,5856
54
+ chemotools-0.1.3.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
55
+ chemotools-0.1.3.dist-info/METADATA,sha256=K_8Kuy1_hHBEK3p1WSMLfR0NfHuptAzCa5uijUT6RLc,5018
56
+ chemotools-0.1.3.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
57
+ chemotools-0.1.3.dist-info/top_level.txt,sha256=eNcNcKSdo-1H_2gwSDrS__dr7BM3R73Cnn-pBiW5FEw,17
58
+ chemotools-0.1.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.2)
2
+ Generator: bdist_wheel (0.41.3)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,7 +1,8 @@
1
1
  import numpy as np
2
+ import pandas as pd
2
3
  import pytest
3
4
 
4
- from chemotools.augmenation import (
5
+ from chemotools.augmentation import (
5
6
  BaselineShift,
6
7
  ExponentialNoise,
7
8
  IndexShift,
@@ -27,7 +28,7 @@ from chemotools.scatter import (
27
28
  StandardNormalVariate,
28
29
  )
29
30
  from chemotools.smooth import MeanFilter, MedianFilter, WhittakerSmooth
30
- from chemotools.variable_selection import RangeCut, SelectFeatures
31
+ from chemotools.feature_selection import IndexSelector, RangeCut
31
32
  from tests.fixtures import (
32
33
  spectrum,
33
34
  spectrum_arpls,
@@ -231,6 +232,77 @@ def test_extended_baseline_correction_through_msc_median(spectrum):
231
232
 
232
233
  # Assert
233
234
  assert np.allclose(spectrum_emsc[0], spectrum_msc, atol=1e-8)
235
+
236
+
237
+
238
+ def test_index_selector():
239
+ # Arrange
240
+ spectrum = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
241
+
242
+ # Act
243
+ select_features = IndexSelector()
244
+ spectrum_corrected = select_features.fit_transform(spectrum)
245
+
246
+ # Assert
247
+ assert np.allclose(spectrum_corrected[0], spectrum[0], atol=1e-8)
248
+
249
+
250
+ def test_index_selector_with_index():
251
+ # Arrange
252
+ spectrum = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
253
+ expected = np.array([[1, 2, 3, 8, 9, 10]])
254
+
255
+ # Act
256
+ select_features = IndexSelector(features=np.array([0, 1, 2, 7, 8, 9]))
257
+ spectrum_corrected = select_features.fit_transform(spectrum)
258
+
259
+ # Assert
260
+ assert np.allclose(spectrum_corrected[0], expected, atol=1e-8)
261
+
262
+
263
+ def test_index_selector_with_wavenumbers():
264
+ # Arrange
265
+ wavenumbers = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
266
+ spectrum = np.array([[1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 21.0, 34.0, 55.0, 89.0]])
267
+ expected = np.array([[1.0, 2.0, 3.0, 34.0, 55.0, 89.0]])
268
+
269
+ # Act
270
+ select_features = IndexSelector(
271
+ features=np.array([1, 2, 3, 8, 9, 10]), wavenumbers=wavenumbers
272
+ )
273
+ spectrum_corrected = select_features.fit_transform(spectrum)
274
+
275
+ # Assert
276
+ assert np.allclose(spectrum_corrected[0], expected, atol=1e-8)
277
+
278
+
279
+ def test_index_selector_with_wavenumbers_and_dataframe():
280
+ # Arrange
281
+ wavenumbers = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
282
+ spectrum = pd.DataFrame(np.array([[1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 21.0, 34.0, 55.0, 89.0]]))
283
+ expected = np.array([[1.0, 2.0, 3.0, 34.0, 55.0, 89.0]])
284
+
285
+ # Act
286
+ select_features = IndexSelector(
287
+ features=np.array([1, 2, 3, 8, 9, 10]), wavenumbers=wavenumbers
288
+ ).set_output(transform='pandas')
289
+
290
+ spectrum_corrected = select_features.fit_transform(spectrum)
291
+
292
+ # Assert
293
+ assert type(spectrum_corrected) == pd.DataFrame
294
+
295
+
296
+ def test_index_shift():
297
+ # Arrange
298
+ spectrum = np.array([[1, 1, 1, 1, 1, 2, 1, 1, 1, 1]])
299
+ spectrum_shift = IndexShift(shift=1, random_state=42)
300
+
301
+ # Act
302
+ spectrum_corrected = spectrum_shift.fit_transform(spectrum)
303
+
304
+ # Assert
305
+ assert spectrum_corrected[0][4] == 2
234
306
 
235
307
 
236
308
  def test_l1_norm(spectrum):
@@ -539,7 +611,7 @@ def test_range_cut_by_wavenumber():
539
611
  assert np.allclose(spectrum_corrected[0], spectrum[0][1:7], atol=1e-8)
540
612
 
541
613
 
542
- def test_range_cut_by_wavenumber_2():
614
+ def test_range_cut_by_wavenumber_with_list():
543
615
  # Arrange
544
616
  wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
545
617
  spectrum = np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]])
@@ -552,6 +624,19 @@ def test_range_cut_by_wavenumber_2():
552
624
  assert np.allclose(spectrum_corrected[0], spectrum[0][1:7], atol=1e-8)
553
625
 
554
626
 
627
+ def test_range_cut_by_wavenumber_with_dataframe():
628
+ # Arrange
629
+ wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
630
+ spectrum = pd.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]]))
631
+ range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output(transform='pandas')
632
+
633
+ # Act
634
+ spectrum_corrected = range_cut.fit_transform(spectrum)
635
+
636
+ # Assert
637
+ assert type(spectrum_corrected) == pd.DataFrame
638
+
639
+
555
640
  def test_robust_normal_variate():
556
641
  # Arrange
557
642
  spectrum = np.array([2, 3.5, 5, 27, 8, 9]).reshape(1, -1)
@@ -608,59 +693,6 @@ def test_saviszky_golay_filter_3():
608
693
  assert np.allclose(spectrum_corrected[0], np.ones((1, 10)), atol=1e-2)
609
694
 
610
695
 
611
- def test_select_features():
612
- # Arrange
613
- spectrum = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
614
-
615
- # Act
616
- select_features = SelectFeatures()
617
- spectrum_corrected = select_features.fit_transform(spectrum)
618
-
619
- # Assert
620
- assert np.allclose(spectrum_corrected[0], spectrum[0], atol=1e-8)
621
-
622
-
623
- def test_select_features_with_index():
624
- # Arrange
625
- spectrum = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
626
- expected = np.array([[1, 2, 3, 8, 9, 10]])
627
-
628
- # Act
629
- select_features = SelectFeatures(features=np.array([0, 1, 2, 7, 8, 9]))
630
- spectrum_corrected = select_features.fit_transform(spectrum)
631
-
632
- # Assert
633
- assert np.allclose(spectrum_corrected[0], expected, atol=1e-8)
634
-
635
-
636
- def test_select_features_with_wavenumbers():
637
- # Arrange
638
- wavenumbers = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
639
- spectrum = np.array([[1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 21.0, 34.0, 55.0, 89.0]])
640
- expected = np.array([[1.0, 2.0, 3.0, 34.0, 55.0, 89.0]])
641
-
642
- # Act
643
- select_features = SelectFeatures(
644
- features=np.array([1, 2, 3, 8, 9, 10]), wavenumbers=wavenumbers
645
- )
646
- spectrum_corrected = select_features.fit_transform(spectrum)
647
-
648
- # Assert
649
- assert np.allclose(spectrum_corrected[0], expected, atol=1e-8)
650
-
651
-
652
- def test_index_shift():
653
- # Arrange
654
- spectrum = np.array([[1, 1, 1, 1, 1, 2, 1, 1, 1, 1]])
655
- spectrum_shift = IndexShift(shift=1, random_state=42)
656
-
657
- # Act
658
- spectrum_corrected = spectrum_shift.fit_transform(spectrum)
659
-
660
- # Assert
661
- assert spectrum_corrected[0][4] == 2
662
-
663
-
664
696
  def test_spectrum_scale(spectrum):
665
697
  # Arrange
666
698
  spectrum_scale = SpectrumScale(scale=0.01, random_state=42)
@@ -1,6 +1,6 @@
1
1
  from sklearn.utils.estimator_checks import check_estimator
2
2
 
3
- from chemotools.augmenation import (
3
+ from chemotools.augmentation import (
4
4
  BaselineShift,
5
5
  ExponentialNoise,
6
6
  NormalNoise,
@@ -33,7 +33,7 @@ from chemotools.smooth import (
33
33
  SavitzkyGolayFilter,
34
34
  WhittakerSmooth,
35
35
  )
36
- from chemotools.variable_selection import RangeCut, SelectFeatures
36
+ from chemotools.feature_selection import RangeCut, IndexSelector
37
37
 
38
38
  from tests.fixtures import spectrum
39
39
 
@@ -94,6 +94,14 @@ def test_compliance_extended_multiplicative_scatter_correction():
94
94
  check_estimator(transformer)
95
95
 
96
96
 
97
+ # IndexSelector
98
+ def test_compliance_index_selector():
99
+ # Arrange
100
+ transformer = IndexSelector()
101
+ # Act & Assert
102
+ check_estimator(transformer)
103
+
104
+
97
105
  # IndexShift
98
106
  def test_compliance_spectrum_shift():
99
107
  # Arrange
@@ -197,6 +205,22 @@ def test_compliance_polynomial_correction():
197
205
  check_estimator(transformer)
198
206
 
199
207
 
208
+ # RangeCut
209
+ def test_compliance_range_cut():
210
+ # Arrange
211
+ transformer = RangeCut()
212
+ # Act & Assert
213
+ check_estimator(transformer)
214
+
215
+
216
+ # RobustNormalVariate
217
+ def test_compliance_robust_normal_variate():
218
+ # Arrange
219
+ transformer = RobustNormalVariate()
220
+ # Act & Assert
221
+ check_estimator(transformer)
222
+
223
+
200
224
  # SavitzkyGolay
201
225
  def test_compliance_savitzky_golay():
202
226
  # Arrange
@@ -213,14 +237,6 @@ def test_compliance_savitzky_golay_filter():
213
237
  check_estimator(transformer)
214
238
 
215
239
 
216
- # SelectFeatures
217
- def test_compliance_select_features():
218
- # Arrange
219
- transformer = SelectFeatures()
220
- # Act & Assert
221
- check_estimator(transformer)
222
-
223
-
224
240
  # SpectrumScale
225
241
  def test_compliance_spectrum_scale():
226
242
  # Arrange
@@ -237,21 +253,6 @@ def test_compliance_standard_normal_variate():
237
253
  check_estimator(transformer)
238
254
 
239
255
 
240
- # RangeCut
241
- def test_compliance_range_cut():
242
- # Arrange
243
- transformer = RangeCut()
244
- # Act & Assert
245
- check_estimator(transformer)
246
-
247
-
248
- # RobustNormalVariate
249
- def test_compliance_robust_normal_variate():
250
- # Arrange
251
- transformer = RobustNormalVariate()
252
- # Act & Assert
253
- check_estimator(transformer)
254
-
255
256
  # SubtractReference
256
257
  def test_compliance_subtract_reference():
257
258
  # Arrange
@@ -1,110 +0,0 @@
1
- import numpy as np
2
- from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
4
-
5
- from chemotools.utils.check_inputs import check_input
6
-
7
-
8
- class SpectrumShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
9
- """
10
- Shift the spectrum a given number of indices.
11
-
12
- Parameters
13
- ----------
14
- shift : float, default=1.0
15
- Shifts the data by a random integer between -shift and shift.
16
-
17
- random_state : int, default=None
18
- The random state to use for the random number generator.
19
-
20
- Attributes
21
- ----------
22
- n_features_in_ : int
23
- The number of features in the input data.
24
-
25
- _is_fitted : bool
26
- Whether the transformer has been fitted to data.
27
-
28
- Methods
29
- -------
30
- fit(X, y=None)
31
- Fit the transformer to the input data.
32
-
33
- transform(X, y=0, copy=True)
34
- Transform the input data by shifting the spectrum.
35
- """
36
-
37
-
38
- def __init__(self, shift: int = 0.0, random_state: int = None):
39
- self.shift = shift
40
- self.random_state = random_state
41
-
42
- def fit(self, X: np.ndarray, y=None) -> "SpectrumShift":
43
- """
44
- Fit the transformer to the input data.
45
-
46
- Parameters
47
- ----------
48
- X : np.ndarray of shape (n_samples, n_features)
49
- The input data to fit the transformer to.
50
-
51
- y : None
52
- Ignored.
53
-
54
- Returns
55
- -------
56
- self : PeakShift
57
- The fitted transformer.
58
- """
59
- # Check that X is a 2D array and has only finite values
60
- X = check_input(X)
61
-
62
- # Set the number of features
63
- self.n_features_in_ = X.shape[1]
64
-
65
- # Set the fitted attribute to True
66
- self._is_fitted = True
67
-
68
- # Instantiate the random number generator
69
- self._rng = np.random.default_rng(self.random_state)
70
-
71
- return self
72
-
73
- def transform(self, X: np.ndarray, y=None) -> np.ndarray:
74
- """
75
- Transform the input data by shifting the spectrum.
76
-
77
- Parameters
78
- ----------
79
- X : np.ndarray of shape (n_samples, n_features)
80
- The input data to transform.
81
-
82
- y : None
83
- Ignored.
84
-
85
- Returns
86
- -------
87
- X_ : np.ndarray of shape (n_samples, n_features)
88
- The transformed data.
89
- """
90
- # Check that the estimator is fitted
91
- check_is_fitted(self, "_is_fitted")
92
-
93
- # Check that X is a 2D array and has only finite values
94
- X = check_input(X)
95
- X_ = X.copy()
96
-
97
- # Check that the number of features is the same as the fitted data
98
- if X_.shape[1] != self.n_features_in_:
99
- raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
100
-
101
- # Calculate the standard normal variate
102
- for i, x in enumerate(X_):
103
- X_[i] = self._shift_spectrum(x)
104
-
105
- return X_.reshape(-1, 1) if X_.ndim == 1 else X_
106
-
107
- def _shift_spectrum(self, x) -> np.ndarray:
108
- shift_amount = self._rng.integers(-self.shift, self.shift+1)
109
- return np.roll(x, shift_amount)
110
-
@@ -1,2 +0,0 @@
1
- from .range_cut import RangeCut
2
- from .select_features import SelectFeatures
@@ -1,59 +0,0 @@
1
- chemotools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- chemotools/augmenation/__init__.py,sha256=LiYw-QE-cxiYY0ua4SOgL0sC_-uAjkykkcj7gRP8Mic,246
3
- chemotools/augmenation/baseline_shift.py,sha256=bazDs0FmowW_0al_xYuTZyUMdGmJ2YBkkFKuhFpkHN4,3215
4
- chemotools/augmenation/exponential_noise.py,sha256=X2HTpL9zoiu0cFq3VsTxS3x_IO_tA_DF2vJyKgh4_UA,3082
5
- chemotools/augmenation/index_shift.py,sha256=H0rQtjSP9SROqnVPh1k7WzwcaGAxPsaINsTVFB1O8eI,3111
6
- chemotools/augmenation/normal_noise.py,sha256=NmzTuIJKyk6tfDJgmeX9iAzsKlJJk3984tS8nLLG9dg,3051
7
- chemotools/augmenation/spectrum_scale.py,sha256=WgMw_bCxWbyAYgYBO3q4PbbzcTDyBvVD73kxPfj3cdY,3174
8
- chemotools/augmenation/spectrum_shift.py,sha256=vaenidCQnQ1WyMv8o-Yr5QdwNPAzAd1zDIKbK-suckU,3116
9
- chemotools/augmenation/uniform_noise.py,sha256=gc0WdREItRiPHjNiZg79n0yK6bfntXkcImrEjkoRdus,3180
10
- chemotools/baseline/__init__.py,sha256=W61mEZU_9-sVGRkP2MJOIhd6e9KsOS1BYjxm1NOMIyM,373
11
- chemotools/baseline/air_pls.py,sha256=qn03l66IrxW7woFbRmRqCmfZHzQ7KKW8A7ogTxTHKo0,5443
12
- chemotools/baseline/ar_pls.py,sha256=OY2cpU2X6KIBR9ag3PAJXo_uQbniIV58zbUJxCxvZWs,4736
13
- chemotools/baseline/constant_baseline_correction.py,sha256=97xpKOBOwT5EhrD5tf32ZfkyZpf0_bL-VtyUFng1hn4,4158
14
- chemotools/baseline/cubic_spline_correction.py,sha256=PCHqR7TAhbdlTZrxgedlk0PU0kRUwQd_jymh0g-ieo8,3311
15
- chemotools/baseline/linear_correction.py,sha256=6Sw2n4QTvIDKWRdJpFD48hMvOEwqbctUAQLF1WwcoXs,3381
16
- chemotools/baseline/non_negative.py,sha256=17_82l95U9kgoQ3Pdz3-jGv8B51JzqPdHODt6PegWRw,2864
17
- chemotools/baseline/polynomial_correction.py,sha256=caP866fwZb7PASyz6oezgg8hdZtFMT0EimK89TGSTSc,4059
18
- chemotools/baseline/subtract_reference.py,sha256=Pht87XadXK0URq2fun66OHaUk_cx56AkF84ta3VJy_8,3441
19
- chemotools/datasets/__init__.py,sha256=ojqxb-C_eDmizwUqVCJ8BqJxwULD7_hWCyVIA1uRO0c,116
20
- chemotools/datasets/_base.py,sha256=Z174CaIlpx17Yu8Pg1qZPuHWkS3BYWn7gtOYsoe8zNk,2895
21
- chemotools/datasets/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- chemotools/datasets/data/coffee_labels.csv,sha256=ZXQWQIf8faLHjdnHfRoXfxMR56kq9Q1BGPZBkQyhGlY,487
23
- chemotools/datasets/data/coffee_spectra.csv,sha256=VA-sN4u0hC5iALlRxxkj-K87Lz3b3mmUHBJPoDXychI,2206147
24
- chemotools/datasets/data/fermentation_hplc.csv,sha256=AMmiFQxwaXrH8aN310-3h1YQDiDrT8JNRv1RDvhEvg4,2140
25
- chemotools/datasets/data/fermentation_spectra.csv,sha256=MaaNMQP0lygJgFbEoUX0OUqdA-id8mF5Llvf_vj9tJk,15237508
26
- chemotools/datasets/data/train_hplc.csv,sha256=DjtmqiePOWB-F6TsOGFngE1pKyXkb7Xmsi-1CLxsTnE,249
27
- chemotools/datasets/data/train_spectra.csv,sha256=iVF19W52NHlbqq8BbLomn8n47kSPT0QxJv7wtQX4yjQ,203244
28
- chemotools/derivative/__init__.py,sha256=x2F0IJ-uCbEYFoXFbZl_RTPCbSq82vqGOwlM9R_2Klo,84
29
- chemotools/derivative/norris_william.py,sha256=JaJ7zlSiC_0tiITu7VWXtgKrmkQP7gLvuFb0_n1j9Dw,5081
30
- chemotools/derivative/savitzky_golay.py,sha256=fFzQRVGVXQIUkHp1x9dqfLVPlyStubIhSj9aGfZKuXY,3745
31
- chemotools/scale/__init__.py,sha256=HuXy_TktvXLTMWoW0pKhVCzMOkRkMRnvWCGiIKvjvZ8,115
32
- chemotools/scale/min_max_scaler.py,sha256=f1bGkODTWGwfnfMfWPimVxIZC3WIikgthQh-zUiaQUU,3123
33
- chemotools/scale/norm_scaler.py,sha256=qNs-npf5Jqcp8RYqt88_5-zwd-yIo-J1jItgUTFeozs,2699
34
- chemotools/scale/point_scaler.py,sha256=LGSmZwuEYLxzVPgH-_aRk9SjOdmyQTxdguqRdBfqCwc,3540
35
- chemotools/scatter/__init__.py,sha256=M0_B4hXVoDc2Qx00QreUfhFqPUTs6LbU4CWaFU17hg4,288
36
- chemotools/scatter/extended_multiplicative_scatter_correction.py,sha256=J65hyEFBzKNo_35Ta9MKWO35CjTw-8hDbSr8xd8RIfc,6912
37
- chemotools/scatter/multiplicative_scatter_correction.py,sha256=MFemiwS-KWFOtlcXVhLnY4mn6QQ8pttuj6UP0rodXEM,5689
38
- chemotools/scatter/robust_normal_variate.py,sha256=joIL-nGUja0nG8YcCuT32ehxmy2xOy3OD0t0yP5vWfM,3233
39
- chemotools/scatter/standard_normal_variate.py,sha256=wmK_8ea2CvoLaGebBFKr8zAU7QjGbaKAg04y6iZ4sDc,2681
40
- chemotools/smooth/__init__.py,sha256=Kwg3jVnl-W-efTHMR6-6hQsTp-An1lYQ1lZFj6sNMtg,176
41
- chemotools/smooth/mean_filter.py,sha256=fcC4EjO57Br3I9SJqWDJRxPxAv2WjjmXTECdBmBYXLI,2953
42
- chemotools/smooth/median_filter.py,sha256=5tR931HIej-yrw1SoV9t09gi55QKbZ3eCTeO-EjNSU8,2966
43
- chemotools/smooth/savitzky_golay_filter.py,sha256=OlkW4-gHsgk7HFf7yeweKkL6aOZpNqMSbUpvKjC66KY,3523
44
- chemotools/smooth/whittaker_smooth.py,sha256=OVEYEstsURgkLbjwRiBWeN_XNs_JOFeD60uyZsVtrHQ,3664
45
- chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
- chemotools/utils/check_inputs.py,sha256=fRAV4HIaGamdj_PNXSNnl7LurXytACNTGO51rhPpMUY,512
47
- chemotools/variable_selection/__init__.py,sha256=6gKxCAoGKAOhhTerUyBg_62YKCIr0K4mbDcoDfbMJeA,75
48
- chemotools/variable_selection/range_cut.py,sha256=1uH_nwYXEn_N1NY14n4uXpVvO6VVpM8zHea1cbHyZu4,4141
49
- chemotools/variable_selection/select_features.py,sha256=pcoFmGZLUPjtRytGpnqK8YdVj3Z5hwKGSJ10VxCpg58,4164
50
- tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
- tests/fixtures.py,sha256=Xa-Vd62Kd1fyWg3PLUSP6iIkOK8etrbyOkMJTn3dvX8,1933
52
- tests/test_datasets.py,sha256=_3mMDYC-vUnb5BenMqvuhmkHI2PPIdsyq_nNu2ggH20,1055
53
- tests/test_functionality.py,sha256=b71JDPjrOJkAp1F-2TV-iWfftol46066y_jGxL8LN5I,20119
54
- tests/test_sklearn_compliance.py,sha256=vMHOfayAwtj9GVkbB0rIwhZzFTZBekwqvgY6zy6GTzA,5859
55
- chemotools-0.1.1.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
56
- chemotools-0.1.1.dist-info/METADATA,sha256=h-BS9J_XWlbr10p_EVKtoJsgCtPbGZSgx2d3cpdjGlY,5018
57
- chemotools-0.1.1.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
58
- chemotools-0.1.1.dist-info/top_level.txt,sha256=eNcNcKSdo-1H_2gwSDrS__dr7BM3R73Cnn-pBiW5FEw,17
59
- chemotools-0.1.1.dist-info/RECORD,,
File without changes