chemotools 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. chemotools/augmentation/__init__.py +10 -0
  2. chemotools/augmentation/baseline_shift.py +23 -15
  3. chemotools/augmentation/exponential_noise.py +24 -15
  4. chemotools/augmentation/index_shift.py +104 -16
  5. chemotools/augmentation/normal_noise.py +24 -14
  6. chemotools/augmentation/spectrum_scale.py +24 -15
  7. chemotools/augmentation/uniform_noise.py +26 -14
  8. chemotools/baseline/__init__.py +13 -1
  9. chemotools/baseline/_air_pls.py +16 -14
  10. chemotools/baseline/_ar_pls.py +17 -17
  11. chemotools/baseline/_constant_baseline_correction.py +19 -16
  12. chemotools/baseline/_cubic_spline_correction.py +17 -8
  13. chemotools/baseline/_linear_correction.py +18 -10
  14. chemotools/baseline/_non_negative.py +14 -8
  15. chemotools/baseline/_polynomial_correction.py +19 -11
  16. chemotools/baseline/_subtract_reference.py +17 -9
  17. chemotools/datasets/__init__.py +2 -0
  18. chemotools/datasets/_base.py +3 -3
  19. chemotools/derivative/__init__.py +3 -1
  20. chemotools/derivative/_norris_william.py +14 -8
  21. chemotools/derivative/_savitzky_golay.py +25 -21
  22. chemotools/feature_selection/__init__.py +2 -0
  23. chemotools/feature_selection/_index_selector.py +18 -17
  24. chemotools/feature_selection/_range_cut.py +9 -7
  25. chemotools/scale/__init__.py +2 -0
  26. chemotools/scale/_min_max_scaler.py +14 -8
  27. chemotools/scale/_norm_scaler.py +14 -8
  28. chemotools/scale/_point_scaler.py +18 -10
  29. chemotools/scatter/__init__.py +11 -2
  30. chemotools/scatter/_extended_multiplicative_scatter_correction.py +33 -29
  31. chemotools/scatter/_multiplicative_scatter_correction.py +33 -18
  32. chemotools/scatter/_robust_normal_variate.py +14 -8
  33. chemotools/scatter/_standard_normal_variate.py +14 -8
  34. chemotools/smooth/__init__.py +3 -1
  35. chemotools/smooth/_mean_filter.py +14 -8
  36. chemotools/smooth/_median_filter.py +31 -9
  37. chemotools/smooth/_savitzky_golay_filter.py +20 -9
  38. chemotools/smooth/_whittaker_smooth.py +20 -11
  39. {chemotools-0.1.5.dist-info → chemotools-0.1.7.dist-info}/METADATA +18 -17
  40. chemotools-0.1.7.dist-info/RECORD +51 -0
  41. {chemotools-0.1.5.dist-info → chemotools-0.1.7.dist-info}/WHEEL +1 -2
  42. chemotools/utils/check_inputs.py +0 -14
  43. chemotools-0.1.5.dist-info/RECORD +0 -58
  44. chemotools-0.1.5.dist-info/top_level.txt +0 -2
  45. tests/__init__.py +0 -0
  46. tests/fixtures.py +0 -89
  47. tests/test_datasets.py +0 -111
  48. tests/test_functionality.py +0 -777
  49. tests/test_sklearn_compliance.py +0 -277
  50. {chemotools-0.1.5.dist-info → chemotools-0.1.7.dist-info}/LICENSE +0 -0
@@ -3,14 +3,12 @@ import numpy as np
3
3
  from scipy.sparse import csc_matrix, eye, diags
4
4
  from scipy.sparse.linalg import spsolve
5
5
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
6
- from sklearn.utils.validation import check_is_fitted
7
-
8
- from chemotools.utils.check_inputs import check_input
6
+ from sklearn.utils.validation import check_is_fitted, validate_data
9
7
 
10
8
  logger = logging.getLogger(__name__)
11
9
 
12
10
 
13
- class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
11
+ class AirPls(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
14
12
  """
15
13
  This class implements the AirPLS (Adaptive Iteratively Reweighted Penalized Least Squares) algorithm for baseline
16
14
  correction of spectra data. AirPLS is a common approach for removing the baseline from spectra, which can be useful
@@ -40,7 +38,7 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
40
38
 
41
39
  _calculate_whittaker_smooth(x, w)
42
40
  Calculate the Whittaker smooth of a given input vector x, with weights w.
43
-
41
+
44
42
  _calculate_air_pls(x)
45
43
  Calculate the AirPLS baseline of a given input vector x.
46
44
 
@@ -76,8 +74,11 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
76
74
  self : AirPls
77
75
  Returns the instance itself.
78
76
  """
77
+
79
78
  # Check that X is a 2D array and has only finite values
80
- X = self._validate_data(X)
79
+ X = validate_data(
80
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
81
+ )
81
82
 
82
83
  return self
83
84
 
@@ -102,14 +103,15 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
102
103
  check_is_fitted(self, "n_features_in_")
103
104
 
104
105
  # Check that X is a 2D array and has only finite values
105
- X = check_input(X)
106
- X_ = X.copy()
107
-
108
- # Check that the number of features is the same as the fitted data
109
- if X_.shape[1] != self.n_features_in_:
110
- raise ValueError(
111
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
112
- )
106
+ X_ = validate_data(
107
+ self,
108
+ X,
109
+ y="no_validation",
110
+ ensure_2d=True,
111
+ copy=True,
112
+ reset=False,
113
+ dtype=np.float64,
114
+ )
113
115
 
114
116
  # Calculate the air pls smooth
115
117
  for i, x in enumerate(X_):
@@ -5,14 +5,12 @@ from scipy.sparse import spdiags, csc_matrix
5
5
  from scipy.sparse.linalg import splu
6
6
 
7
7
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
8
- from sklearn.utils.validation import check_is_fitted
9
-
10
- from chemotools.utils.check_inputs import check_input
8
+ from sklearn.utils.validation import check_is_fitted, validate_data
11
9
 
12
10
  logger = logging.getLogger(__name__)
13
11
 
14
12
 
15
- class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
13
+ class ArPls(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
16
14
  """
17
15
  This class implements the Assymmetrically Reweighted Penalized Least Squares (ArPls) is a baseline
18
16
  correction method for spectroscopy data. It uses an iterative process
@@ -46,8 +44,8 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
46
44
 
47
45
  References
48
46
  ----------
49
- - Sung-June Baek, Aaron Park, Young-Jin Ahn, Jaebum Choo
50
- Baseline correction using asymmetrically reweighted penalized
47
+ - Sung-June Baek, Aaron Park, Young-Jin Ahn, Jaebum Choo
48
+ Baseline correction using asymmetrically reweighted penalized
51
49
  least squares smoothing
52
50
  """
53
51
 
@@ -79,7 +77,9 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
79
77
  """
80
78
 
81
79
  # Check that X is a 2D array and has only finite values
82
- X = self._validate_data(X)
80
+ X = validate_data(
81
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
82
+ )
83
83
 
84
84
  return self
85
85
 
@@ -104,14 +104,14 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
104
104
  check_is_fitted(self, "n_features_in_")
105
105
 
106
106
  # Check that X is a 2D array and has only finite values
107
- X = check_input(X)
108
- X_ = X.copy()
109
-
110
- # Check that the number of features is the same as the fitted data
111
- if X_.shape[1] != self.n_features_in_:
112
- raise ValueError(
113
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
114
- )
107
+ X_ = validate_data(
108
+ self,
109
+ X,
110
+ y="no_validation",
111
+ ensure_2d=True,
112
+ copy=True,
113
+ reset=False,
114
+ )
115
115
 
116
116
  # Calculate the ar pls baseline
117
117
  for i, x in enumerate(X_):
@@ -120,9 +120,9 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
120
120
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
121
121
 
122
122
  def _calculate_diff(self, N):
123
- I = sp.eye(N, format="csc")
123
+ identity_matrix = sp.eye(N, format="csc")
124
124
  D2 = sp.diags([1, -2, 1], [0, 1, 2], shape=(N - 2, N), format="csc")
125
- return D2.dot(I).T
125
+ return D2.dot(identity_matrix).T
126
126
 
127
127
  def _calculate_ar_pls(self, x):
128
128
  N = len(x)
@@ -1,11 +1,11 @@
1
+ from typing import Optional
2
+
1
3
  import numpy as np
2
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
4
-
5
- from chemotools.utils.check_inputs import check_input
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
6
 
7
7
 
8
- class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
8
+ class ConstantBaselineCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
9
  """
10
10
  A transformer that corrects a baseline by subtracting a constant value.
11
11
  The constant value is taken by the mean of the features between the start
@@ -43,7 +43,7 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
43
43
  self,
44
44
  start: int = 0,
45
45
  end: int = 1,
46
- wavenumbers: np.ndarray = None,
46
+ wavenumbers: Optional[np.ndarray] = None,
47
47
  ) -> None:
48
48
  self.start = start
49
49
  self.end = end
@@ -67,7 +67,9 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
67
67
  The fitted transformer.
68
68
  """
69
69
  # Check that X is a 2D array and has only finite values
70
- X = self._validate_data(X)
70
+ X = validate_data(
71
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
72
+ )
71
73
 
72
74
  # Set the start and end indices
73
75
  if self.wavenumbers is None:
@@ -100,17 +102,18 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
100
102
  The transformed input data.
101
103
  """
102
104
  # Check that the estimator is fitted
103
- check_is_fitted(self, ["start_index_", "end_index_"])
105
+ check_is_fitted(self, "n_features_in_")
104
106
 
105
107
  # Check that X is a 2D array and has only finite values
106
- X = check_input(X)
107
- X_ = X.copy()
108
-
109
- # Check that the number of features is the same as the fitted data
110
- if X_.shape[1] != self.n_features_in_:
111
- raise ValueError(
112
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
113
- )
108
+ X_ = validate_data(
109
+ self,
110
+ X,
111
+ y="no_validation",
112
+ ensure_2d=True,
113
+ copy=True,
114
+ reset=False,
115
+ dtype=np.float64,
116
+ )
114
117
 
115
118
  # Base line correct the spectra
116
119
  for i, x in enumerate(X_):
@@ -120,4 +123,4 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
120
123
 
121
124
  def _find_index(self, target: float) -> int:
122
125
  wavenumbers = np.array(self.wavenumbers)
123
- return np.argmin(np.abs(wavenumbers - target))
126
+ return np.argmin(np.abs(wavenumbers - target)).astype(int)
@@ -1,12 +1,12 @@
1
+ from typing import Optional
2
+
1
3
  import numpy as np
2
4
  from scipy.interpolate import CubicSpline
3
5
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
4
- from sklearn.utils.validation import check_is_fitted
5
-
6
- from chemotools.utils.check_inputs import check_input
6
+ from sklearn.utils.validation import check_is_fitted, validate_data
7
7
 
8
8
 
9
- class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
9
+ class CubicSplineCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
10
10
  """
11
11
  A transformer that corrects a baseline by subtracting a cubic spline through the
12
12
  points defined by the indices.
@@ -34,7 +34,7 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
34
34
 
35
35
  """
36
36
 
37
- def __init__(self, indices: list = None) -> None:
37
+ def __init__(self, indices: Optional[list] = None) -> None:
38
38
  self.indices = indices
39
39
 
40
40
  def fit(self, X: np.ndarray, y=None) -> "CubicSplineCorrection":
@@ -55,7 +55,9 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
55
55
  The fitted transformer.
56
56
  """
57
57
  # Check that X is a 2D array and has only finite values
58
- X = self._validate_data(X)
58
+ X = validate_data(
59
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
60
+ )
59
61
 
60
62
  if self.indices is None:
61
63
  self.indices_ = [0, len(X[0]) - 1]
@@ -88,8 +90,15 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
88
90
  check_is_fitted(self, "indices_")
89
91
 
90
92
  # Check that X is a 2D array and has only finite values
91
- X = check_input(X)
92
- X_ = X.copy()
93
+ X_ = validate_data(
94
+ self,
95
+ X,
96
+ y="no_validation",
97
+ ensure_2d=True,
98
+ copy=True,
99
+ reset=False,
100
+ dtype=np.float64,
101
+ )
93
102
 
94
103
  # Check that the number of features is the same as the fitted data
95
104
  if X_.shape[1] != self.n_features_in_:
@@ -1,11 +1,9 @@
1
1
  import numpy as np
2
2
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
3
+ from sklearn.utils.validation import check_is_fitted, validate_data
4
4
 
5
- from chemotools.utils.check_inputs import check_input
6
5
 
7
-
8
- class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
6
+ class LinearCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
7
  """
10
8
  A transformer that corrects a baseline by subtracting a linear baseline through the
11
9
  initial and final points of the spectrum.
@@ -20,7 +18,6 @@ class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
20
18
  """
21
19
 
22
20
  def _drift_correct_spectrum(self, x: np.ndarray) -> np.ndarray:
23
-
24
21
  # Can take any array and returns with a linear baseline correction
25
22
  # Find the x values at the edges of the spectrum
26
23
  y1: float = x[0]
@@ -57,7 +54,9 @@ class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
57
54
  The fitted transformer.
58
55
  """
59
56
  # Check that X is a 2D array and has only finite values
60
- X = self._validate_data(X)
57
+ X = validate_data(
58
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
59
+ )
61
60
 
62
61
  return self
63
62
 
@@ -85,14 +84,23 @@ class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
85
84
  check_is_fitted(self, "n_features_in_")
86
85
 
87
86
  # Check that X is a 2D array and has only finite values
88
- X = check_input(X)
89
- X_ = X.copy()
87
+ X_ = validate_data(
88
+ self,
89
+ X,
90
+ y="no_validation",
91
+ ensure_2d=True,
92
+ copy=True,
93
+ reset=False,
94
+ dtype=np.float64,
95
+ )
90
96
 
91
97
  # Check that the number of features is the same as the fitted data
92
98
  if X_.shape[1] != self.n_features_in_:
93
- raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
99
+ raise ValueError(
100
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
101
+ )
94
102
 
95
103
  # Calculate non-negative values
96
104
  for i, x in enumerate(X_):
97
105
  X_[i, :] = self._drift_correct_spectrum(x)
98
- return X_.reshape(-1, 1) if X_.ndim == 1 else X_
106
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
@@ -1,11 +1,9 @@
1
1
  import numpy as np
2
2
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
3
+ from sklearn.utils.validation import check_is_fitted, validate_data
4
4
 
5
- from chemotools.utils.check_inputs import check_input
6
5
 
7
-
8
- class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
6
+ class NonNegative(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
7
  """
10
8
  A transformer that sets all negative values to zero or to abs.
11
9
 
@@ -44,8 +42,9 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
44
42
  The fitted transformer.
45
43
  """
46
44
  # Check that X is a 2D array and has only finite values
47
- X = self._validate_data(X)
48
-
45
+ X = validate_data(
46
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
47
+ )
49
48
  return self
50
49
 
51
50
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -69,8 +68,15 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
69
68
  check_is_fitted(self, "n_features_in_")
70
69
 
71
70
  # Check that X is a 2D array and has only finite values
72
- X = check_input(X)
73
- X_ = X.copy()
71
+ X_ = validate_data(
72
+ self,
73
+ X,
74
+ y="no_validation",
75
+ ensure_2d=True,
76
+ copy=True,
77
+ reset=False,
78
+ dtype=np.float64,
79
+ )
74
80
 
75
81
  # Check that the number of features is the same as the fitted data
76
82
  if X_.shape[1] != self.n_features_in_:
@@ -1,11 +1,11 @@
1
+ from typing import Optional
2
+
1
3
  import numpy as np
2
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
4
-
5
- from chemotools.utils.check_inputs import check_input
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
6
 
7
7
 
8
- class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
8
+ class PolynomialCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
9
  """
10
10
  A transformer that subtracts a polynomial baseline from the input data. The polynomial is
11
11
  fitted to the points in the spectrum specified by the indices parameter.
@@ -31,7 +31,7 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
31
31
  Subtract the polynomial baseline from a single spectrum.
32
32
  """
33
33
 
34
- def __init__(self, order: int = 1, indices: list = None) -> None:
34
+ def __init__(self, order: int = 1, indices: Optional[list] = None) -> None:
35
35
  self.order = order
36
36
  self.indices = indices
37
37
 
@@ -53,10 +53,11 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
53
53
  The fitted transformer.
54
54
  """
55
55
  # Check that X is a 2D array and has only finite values
56
- X = self._validate_data(X)
57
-
56
+ X = validate_data(
57
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
58
+ )
58
59
  if self.indices is None:
59
- self.indices_ = range(0, len(X[0]))
60
+ self.indices_ = list(range(0, len(X[0])))
60
61
  else:
61
62
  self.indices_ = self.indices
62
63
 
@@ -83,11 +84,18 @@ class PolynomialCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin
83
84
  The transformed data.
84
85
  """
85
86
  # Check that the estimator is fitted
86
- check_is_fitted(self, "indices_")
87
+ check_is_fitted(self, "n_features_in_")
87
88
 
88
89
  # Check that X is a 2D array and has only finite values
89
- X = check_input(X)
90
- X_ = X.copy()
90
+ X_ = validate_data(
91
+ self,
92
+ X,
93
+ y="no_validation",
94
+ ensure_2d=True,
95
+ copy=True,
96
+ reset=False,
97
+ dtype=np.float64,
98
+ )
91
99
 
92
100
  # Check that the number of features is the same as the fitted data
93
101
  if X_.shape[1] != self.n_features_in_:
@@ -1,11 +1,11 @@
1
+ from typing import Optional
2
+
1
3
  import numpy as np
2
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
4
-
5
- from chemotools.utils.check_inputs import check_input
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
6
 
7
7
 
8
- class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
8
+ class SubtractReference(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
9
  """
10
10
  A transformer that subtracts a reference spectrum from the input data.
11
11
 
@@ -29,7 +29,7 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
29
29
 
30
30
  def __init__(
31
31
  self,
32
- reference: np.ndarray = None,
32
+ reference: Optional[np.ndarray] = None,
33
33
  ):
34
34
  self.reference = reference
35
35
 
@@ -51,8 +51,9 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
51
51
  The fitted transformer.
52
52
  """
53
53
  # Check that X is a 2D array and has only finite values
54
- X = self._validate_data(X)
55
-
54
+ X = validate_data(
55
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
56
+ )
56
57
  # Set the reference
57
58
  if self.reference is not None:
58
59
  self.reference_ = self.reference.copy()
@@ -81,8 +82,15 @@ class SubtractReference(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
81
82
  check_is_fitted(self, "n_features_in_")
82
83
 
83
84
  # Check that X is a 2D array and has only finite values
84
- X = check_input(X)
85
- X_ = X.copy()
85
+ X_ = validate_data(
86
+ self,
87
+ X,
88
+ y="no_validation",
89
+ ensure_2d=True,
90
+ copy=True,
91
+ reset=False,
92
+ dtype=np.float64,
93
+ )
86
94
 
87
95
  # Check that the number of features is the same as the fitted data
88
96
  if X_.shape[1] != self.n_features_in_:
@@ -1,3 +1,5 @@
1
1
  from ._base import load_coffee
2
2
  from ._base import load_fermentation_train
3
3
  from ._base import load_fermentation_test
4
+
5
+ __all__ = ["load_coffee", "load_fermentation_train", "load_fermentation_test"]
@@ -110,13 +110,13 @@ def load_coffee(set_output="pandas"):
110
110
  coffee_spectra = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
111
111
  coffee_labels = pd.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
112
112
  return coffee_spectra, coffee_labels
113
-
113
+
114
114
  if set_output == "polars":
115
115
  coffee_spectra = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_spectra.csv")
116
116
  coffee_labels = pl.read_csv(PACKAGE_DIRECTORY + "/data/coffee_labels.csv")
117
117
  return coffee_spectra, coffee_labels
118
-
118
+
119
119
  else:
120
120
  raise ValueError(
121
121
  "Invalid value for set_output. Please use 'pandas' or 'polars'."
122
- )
122
+ )
@@ -1,2 +1,4 @@
1
1
  from ._norris_william import NorrisWilliams
2
- from ._savitzky_golay import SavitzkyGolay
2
+ from ._savitzky_golay import SavitzkyGolay
3
+
4
+ __all__ = ["NorrisWilliams", "SavitzkyGolay"]
@@ -1,12 +1,10 @@
1
1
  import numpy as np
2
2
  from scipy.ndimage import convolve1d
3
3
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
4
- from sklearn.utils.validation import check_is_fitted
4
+ from sklearn.utils.validation import check_is_fitted, validate_data
5
5
 
6
- from chemotools.utils.check_inputs import check_input
7
6
 
8
-
9
- class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
7
+ class NorrisWilliams(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
10
8
  """
11
9
  A transformer that calculates the Norris-Williams derivative of the input data.
12
10
 
@@ -64,8 +62,9 @@ class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
64
62
  The fitted transformer.
65
63
  """
66
64
  # Check that X is a 2D array and has only finite values
67
- X = self._validate_data(X)
68
-
65
+ X = validate_data(
66
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
67
+ )
69
68
  return self
70
69
 
71
70
  def transform(self, X: np.ndarray, y=None):
@@ -89,8 +88,15 @@ class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
89
88
  check_is_fitted(self, "n_features_in_")
90
89
 
91
90
  # Check that X is a 2D array and has only finite values
92
- X = check_input(X)
93
- X_ = X.copy()
91
+ X_ = validate_data(
92
+ self,
93
+ X,
94
+ y="no_validation",
95
+ ensure_2d=True,
96
+ copy=True,
97
+ reset=False,
98
+ dtype=np.float64,
99
+ )
94
100
 
95
101
  if X_.shape[1] != self.n_features_in_:
96
102
  raise ValueError(
@@ -1,12 +1,12 @@
1
+ from typing import Literal
2
+
1
3
  import numpy as np
2
4
  from scipy.signal import savgol_filter
3
5
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
4
- from sklearn.utils.validation import check_is_fitted
5
-
6
- from chemotools.utils.check_inputs import check_input
6
+ from sklearn.utils.validation import check_is_fitted, validate_data
7
7
 
8
8
 
9
- class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
9
+ class SavitzkyGolay(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
10
10
  """
11
11
  A transformer that calculates the Savitzky-Golay derivative of the input data.
12
12
 
@@ -41,7 +41,7 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
41
41
  window_size: int = 3,
42
42
  polynomial_order: int = 1,
43
43
  derivate_order: int = 1,
44
- mode: str = "nearest",
44
+ mode: Literal["mirror", "constant", "nearest", "wrap", "interp"] = "nearest",
45
45
  ) -> None:
46
46
  self.window_size = window_size
47
47
  self.polynomial_order = polynomial_order
@@ -66,8 +66,9 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
66
66
  The fitted transformer.
67
67
  """
68
68
  # Check that X is a 2D array and has only finite values
69
- X = self._validate_data(X)
70
-
69
+ X = validate_data(
70
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
71
+ )
71
72
  return self
72
73
 
73
74
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -91,26 +92,29 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
91
92
  check_is_fitted(self, "n_features_in_")
92
93
 
93
94
  # Check that X is a 2D array and has only finite values
94
- X = check_input(X)
95
- X_ = X.copy()
95
+ X_ = validate_data(
96
+ self,
97
+ X,
98
+ y="no_validation",
99
+ ensure_2d=True,
100
+ copy=True,
101
+ reset=False,
102
+ dtype=np.float64,
103
+ )
96
104
 
97
105
  if X_.shape[1] != self.n_features_in_:
98
106
  raise ValueError(
99
107
  f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
100
108
  )
101
-
102
109
  # Calculate the standard normal variate
103
110
  for i, x in enumerate(X_):
104
- X_[i] = self._calculate_derivative(x)
111
+ X_[i] = savgol_filter(
112
+ x,
113
+ self.window_size,
114
+ self.polynomial_order,
115
+ deriv=self.derivate_order,
116
+ axis=0,
117
+ mode=self.mode,
118
+ )
105
119
 
106
120
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
107
-
108
- def _calculate_derivative(self, x) -> np.ndarray:
109
- return savgol_filter(
110
- x,
111
- self.window_size,
112
- self.polynomial_order,
113
- deriv=self.derivate_order,
114
- axis=0,
115
- mode=self.mode,
116
- )
@@ -1,2 +1,4 @@
1
1
  from ._index_selector import IndexSelector
2
2
  from ._range_cut import RangeCut
3
+
4
+ __all__ = ["IndexSelector", "RangeCut"]