chemotools 0.0.22__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. chemotools/augmentation/__init__.py +16 -0
  2. chemotools/augmentation/baseline_shift.py +119 -0
  3. chemotools/augmentation/exponential_noise.py +117 -0
  4. chemotools/augmentation/index_shift.py +120 -0
  5. chemotools/augmentation/normal_noise.py +118 -0
  6. chemotools/augmentation/spectrum_scale.py +120 -0
  7. chemotools/augmentation/uniform_noise.py +124 -0
  8. chemotools/baseline/__init__.py +20 -8
  9. chemotools/baseline/{air_pls.py → _air_pls.py} +20 -32
  10. chemotools/baseline/{ar_pls.py → _ar_pls.py} +18 -31
  11. chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +37 -31
  12. chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +26 -19
  13. chemotools/baseline/{linear_correction.py → _linear_correction.py} +19 -28
  14. chemotools/baseline/{non_negative.py → _non_negative.py} +15 -23
  15. chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +29 -31
  16. chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +23 -27
  17. chemotools/datasets/__init__.py +5 -0
  18. chemotools/datasets/_base.py +122 -0
  19. chemotools/datasets/data/coffee_labels.csv +61 -0
  20. chemotools/datasets/data/coffee_spectra.csv +61 -0
  21. chemotools/datasets/data/fermentation_hplc.csv +35 -0
  22. chemotools/datasets/data/fermentation_spectra.csv +1630 -0
  23. chemotools/datasets/data/train_hplc.csv +22 -0
  24. chemotools/datasets/data/train_spectra.csv +22 -0
  25. chemotools/derivative/__init__.py +4 -2
  26. chemotools/derivative/{norris_william.py → _norris_william.py} +20 -25
  27. chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +26 -36
  28. chemotools/feature_selection/__init__.py +4 -0
  29. chemotools/feature_selection/_index_selector.py +113 -0
  30. chemotools/feature_selection/_range_cut.py +111 -0
  31. chemotools/scale/__init__.py +5 -3
  32. chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +36 -39
  33. chemotools/scale/{norm_scaler.py → _norm_scaler.py} +18 -25
  34. chemotools/scale/_point_scaler.py +115 -0
  35. chemotools/scatter/__init__.py +13 -2
  36. chemotools/scatter/_extended_multiplicative_scatter_correction.py +183 -0
  37. chemotools/scatter/_multiplicative_scatter_correction.py +169 -0
  38. chemotools/scatter/_robust_normal_variate.py +101 -0
  39. chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +21 -26
  40. chemotools/smooth/__init__.py +6 -4
  41. chemotools/smooth/{mean_filter.py → _mean_filter.py} +18 -25
  42. chemotools/smooth/{median_filter.py → _median_filter.py} +32 -24
  43. chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +22 -24
  44. chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +24 -29
  45. {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/METADATA +19 -15
  46. chemotools-0.1.6.dist-info/RECORD +51 -0
  47. {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/WHEEL +1 -2
  48. chemotools/scale/index_scaler.py +0 -97
  49. chemotools/scatter/extended_multiplicative_scatter_correction.py +0 -33
  50. chemotools/scatter/multiplicative_scatter_correction.py +0 -123
  51. chemotools/utils/check_inputs.py +0 -14
  52. chemotools/variable_selection/__init__.py +0 -1
  53. chemotools/variable_selection/range_cut.py +0 -121
  54. chemotools-0.0.22.dist-info/RECORD +0 -39
  55. chemotools-0.0.22.dist-info/top_level.txt +0 -2
  56. tests/fixtures.py +0 -89
  57. tests/test_functionality.py +0 -397
  58. tests/test_sklearn_compliance.py +0 -192
  59. {tests → chemotools/datasets/data}/__init__.py +0 -0
  60. {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/LICENSE +0 -0
@@ -0,0 +1,183 @@
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
+
7
+
8
+ class ExtendedMultiplicativeScatterCorrection(
9
+ TransformerMixin, OneToOneFeatureMixin, BaseEstimator
10
+ ):
11
+ """Extended multiplicative scatter correction (EMSC) is a preprocessing technique for
12
+ removing non linear scatter effects from spectra. It is based on fitting a polynomial
13
+ regression model to the spectrum using a reference spectrum. The reference spectrum
14
+ can be the mean or median spectrum of a set of spectra or a selected reerence.
15
+
16
+ Note that this implementation does not include further extensions of the model using
17
+ orthogonal subspace models.
18
+
19
+ Parameters
20
+ ----------
21
+ reference : np.ndarray, optional
22
+ The reference spectrum to use for the correction. If None, the mean
23
+ spectrum will be used. The default is None.
24
+ use_mean : bool, optional
25
+ Whether to use the mean spectrum as the reference. The default is True.
26
+ use_median : bool, optional
27
+ Whether to use the median spectrum as the reference. The default is False.
28
+ order : int, optional
29
+ The order of the polynomial to fit to the spectrum. The default is 2.
30
+ weights : np.ndarray, optional
31
+ The weights to use for the weighted EMSC. If None, the standard EMSC
32
+ will be used. The default is None.
33
+
34
+
35
+ Attributes
36
+ ----------
37
+ reference_ : np.ndarray
38
+ The reference spectrum used for the correction.
39
+
40
+ References
41
+ ----------
42
+ Nils Kristian Afseth, Achim Kohler. Extended multiplicative signal correction
43
+ in vibrational spectroscopy, a tutorial, doi:10.1016/j.chemolab.2012.03.004
44
+
45
+ Valeria Tafintseva et al. Correcting replicate variation in spectroscopic data by machine learning and
46
+ model-based pre-processing, doi:10.1016/j.chemolab.2021.104350
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ reference: Optional[np.ndarray] = None,
52
+ use_mean: bool = True,
53
+ use_median: bool = False,
54
+ order: int = 2,
55
+ weights: Optional[np.ndarray] = None,
56
+ ):
57
+ self.reference = reference
58
+ self.use_mean = use_mean
59
+ self.use_median = use_median
60
+ self.order = order
61
+ self.weights = weights
62
+
63
+ def fit(self, X: np.ndarray, y=None) -> "ExtendedMultiplicativeScatterCorrection":
64
+ """
65
+ Fit the transformer to the input data. If no reference is provided, the
66
+ mean or median spectrum will be calculated from the input data.
67
+
68
+ Parameters
69
+ ----------
70
+ X : np.ndarray of shape (n_samples, n_features)
71
+ The input data to fit the transformer to.
72
+
73
+ y : None
74
+ Ignored.
75
+
76
+ Returns
77
+ -------
78
+ self : MultiplicativeScatterCorrection
79
+ The fitted transformer.
80
+ """
81
+ # Check that X is a 2D array and has only finite values
82
+ X = validate_data(
83
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
84
+ )
85
+
86
+ # Check that the length of the reference is the same as the number of features
87
+ if self.reference is not None:
88
+ if len(self.reference) != self.n_features_in_:
89
+ raise ValueError(
90
+ f"Expected {self.n_features_in_} features in reference but got {len(self.reference)}"
91
+ )
92
+
93
+ if self.weights is not None:
94
+ if len(self.weights) != self.n_features_in_:
95
+ raise ValueError(
96
+ f"Expected {self.n_features_in_} features in weights but got {len(self.weights)}"
97
+ )
98
+
99
+ # Set the reference
100
+ if self.reference is not None:
101
+ self.reference_ = np.array(self.reference)
102
+ self.indices_ = self._calculate_indices(self.reference_)
103
+ self.A_ = self._calculate_A(self.indices_, self.reference_)
104
+ self.weights_ = np.array(self.weights)
105
+ return self
106
+
107
+ if self.use_median:
108
+ self.reference_ = np.median(X, axis=0)
109
+ self.indices_ = self._calculate_indices(X[0])
110
+ self.A_ = self._calculate_A(self.indices_, self.reference_)
111
+ self.weights_ = np.array(self.weights)
112
+ return self
113
+
114
+ if self.use_mean:
115
+ self.reference_ = X.mean(axis=0)
116
+ self.indices_ = self._calculate_indices(X[0])
117
+ self.A_ = self._calculate_A(self.indices_, self.reference_)
118
+ self.weights_ = np.array(self.weights)
119
+ return self
120
+
121
+ raise ValueError("No reference was provided")
122
+
123
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
124
+ """
125
+ Transform the input data by applying the multiplicative scatter
126
+ correction.
127
+
128
+ Parameters
129
+ ----------
130
+ X : np.ndarray of shape (n_samples, n_features)
131
+ The input data to transform.
132
+
133
+ y : None
134
+ Ignored.
135
+
136
+ Returns
137
+ -------
138
+ X_ : np.ndarray of shape (n_samples, n_features)
139
+ The transformed data.
140
+ """
141
+ # Check that the estimator is fitted
142
+ check_is_fitted(self, "n_features_in_")
143
+
144
+ # Check that X is a 2D array and has only finite values
145
+ X_ = validate_data(
146
+ self,
147
+ X,
148
+ y="no_validation",
149
+ ensure_2d=True,
150
+ copy=True,
151
+ reset=False,
152
+ dtype=np.float64,
153
+ )
154
+
155
+ if self.weights is None:
156
+ for i, x in enumerate(X_):
157
+ X_[i] = self._calculate_emsc(x)
158
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
159
+
160
+ if self.weights is not None:
161
+ for i, x in enumerate(X_):
162
+ X_[i] = self._calculate_weighted_emsc(x)
163
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
164
+
165
+ def _calculate_weighted_emsc(self, x):
166
+ reg = np.linalg.lstsq(
167
+ np.diag(self.weights_) @ self.A_, x * self.weights_, rcond=None
168
+ )[0]
169
+ x_ = (x - np.dot(self.A_[:, 0:-1], reg[0:-1])) / reg[-1]
170
+ return x_
171
+
172
+ def _calculate_emsc(self, x):
173
+ reg = np.linalg.lstsq(self.A_, x, rcond=None)[0]
174
+ x_ = (x - np.dot(self.A_[:, 0:-1], reg[0:-1])) / reg[-1]
175
+ return x_
176
+
177
+ def _calculate_indices(self, reference):
178
+ return np.linspace(0, len(reference) - 1, len(reference))
179
+
180
+ def _calculate_A(self, indices, reference):
181
+ return np.vstack(
182
+ [[np.power(indices, o) for o in range(self.order + 1)], reference]
183
+ ).T
@@ -0,0 +1,169 @@
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
+
7
+
8
+ class MultiplicativeScatterCorrection(
9
+ TransformerMixin, OneToOneFeatureMixin, BaseEstimator
10
+ ):
11
+ """Multiplicative scatter correction (MSC) is a preprocessing technique for
12
+ removing scatter effects from spectra. It is based on fitting a linear
13
+ regression model to the spectrum using a reference spectrum. The reference
14
+ spectrum is usually a mean or median spectrum of a set of spectra.
15
+
16
+ Parameters
17
+ ----------
18
+ reference : np.ndarray of shape (n_freatures), optional
19
+ The reference spectrum to use for the correction. If None, the mean
20
+ spectrum will be used. The default is None.
21
+ use_mean : bool, optional
22
+ Whether to use the mean spectrum as the reference. The default is True.
23
+ use_median : bool, optional
24
+ Whether to use the median spectrum as the reference. The default is False.
25
+
26
+ Attributes
27
+ ----------
28
+ reference_ : np.ndarray
29
+ The reference spectrum used for the correction.
30
+ n_features_in_ : int
31
+ The number of features in the training data.
32
+
33
+ Raises
34
+ ------
35
+ ValueError
36
+ If no reference is provided.
37
+
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ reference: Optional[np.ndarray] = None,
43
+ use_mean: bool = True,
44
+ use_median: bool = False,
45
+ weights: Optional[np.ndarray] = None,
46
+ ):
47
+ self.reference = reference
48
+ self.use_mean = use_mean
49
+ self.use_median = use_median
50
+ self.weights = weights
51
+
52
+ def fit(self, X: np.ndarray, y=None) -> "MultiplicativeScatterCorrection":
53
+ """
54
+ Fit the transformer to the input data. If no reference is provided, the
55
+ mean or median spectrum will be calculated from the input data.
56
+
57
+ Parameters
58
+ ----------
59
+ X : np.ndarray of shape (n_samples, n_features)
60
+ The input data to fit the transformer to.
61
+
62
+ y : None
63
+ Ignored.
64
+
65
+ Returns
66
+ -------
67
+ self : MultiplicativeScatterCorrection
68
+ The fitted transformer.
69
+ """
70
+ # Check that X is a 2D array and has only finite values
71
+ X = validate_data(
72
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
73
+ )
74
+ # Check that the length of the reference is the same as the number of features
75
+ if self.reference is not None:
76
+ if len(self.reference) != self.n_features_in_:
77
+ raise ValueError(
78
+ f"Expected {self.n_features_in_} features in reference but got {len(self.reference)}"
79
+ )
80
+
81
+ if self.weights is not None:
82
+ if len(self.weights) != self.n_features_in_:
83
+ raise ValueError(
84
+ f"Expected {self.n_features_in_} features in weights but got {len(self.weights)}"
85
+ )
86
+
87
+ # Set the reference
88
+ if self.reference is not None:
89
+ self.reference_ = np.array(self.reference)
90
+ self.A_ = self._calculate_A(self.reference_)
91
+ self.weights_ = np.array(self.weights)
92
+ return self
93
+
94
+ if self.use_median:
95
+ self.reference_ = np.median(X, axis=0)
96
+ self.A_ = self._calculate_A(self.reference_)
97
+ self.weights_ = np.array(self.weights)
98
+ return self
99
+
100
+ if self.use_mean:
101
+ self.reference_ = X.mean(axis=0)
102
+ self.A_ = self._calculate_A(self.reference_)
103
+ self.weights_ = np.array(self.weights)
104
+ return self
105
+
106
+ raise ValueError("No reference was provided")
107
+
108
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
109
+ """
110
+ Transform the input data by applying the multiplicative scatter
111
+ correction.
112
+
113
+ Parameters
114
+ ----------
115
+ X : np.ndarray of shape (n_samples, n_features)
116
+ The input data to transform.
117
+
118
+ y : None
119
+ Ignored.
120
+
121
+ Returns
122
+ -------
123
+ X_ : np.ndarray of shape (n_samples, n_features)
124
+ The transformed data.
125
+ """
126
+ # Check that the estimator is fitted
127
+ check_is_fitted(self, "n_features_in_")
128
+
129
+ # Check that X is a 2D array and has only finite values
130
+ X_ = validate_data(
131
+ self,
132
+ X,
133
+ y="no_validation",
134
+ ensure_2d=True,
135
+ copy=True,
136
+ reset=False,
137
+ dtype=np.float64,
138
+ )
139
+
140
+ # Check that the number of features is the same as the fitted data
141
+ if X_.shape[1] != self.n_features_in_:
142
+ raise ValueError(
143
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
144
+ )
145
+
146
+ # Calculate the multiplicative signal correction
147
+ if self.weights is None:
148
+ for i, x in enumerate(X_):
149
+ X_[i] = self._calculate_multiplicative_correction(x)
150
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
151
+
152
+ if self.weights is not None:
153
+ for i, x in enumerate(X_):
154
+ X_[i] = self._calculate_weighted_multiplicative_correction(x)
155
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
156
+
157
+ def _calculate_weighted_multiplicative_correction(self, x) -> np.ndarray:
158
+ m, c = np.linalg.lstsq(
159
+ np.diag(self.weights_) @ self.A_, x * self.weights_, rcond=None
160
+ )[0]
161
+ return (x - c) / m
162
+
163
+ def _calculate_multiplicative_correction(self, x) -> np.ndarray:
164
+ m, c = np.linalg.lstsq(self.A_, x, rcond=None)[0]
165
+ return (x - c) / m
166
+
167
+ def _calculate_A(self, reference):
168
+ ones = np.ones(reference.shape[0])
169
+ return np.vstack([reference, ones]).T
@@ -0,0 +1,101 @@
1
+ import numpy as np
2
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
+ from sklearn.utils.validation import check_is_fitted, validate_data
4
+
5
+
6
+ class RobustNormalVariate(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
7
+ """
8
+ A transformer that calculates the robust normal variate of the input data.
9
+
10
+ Parameters
11
+ ----------
12
+ percentile : float, optional
13
+ The percentile to use for the robust normal variate. The value should be
14
+ between 0 and 100. The default is 25.
15
+
16
+ Methods
17
+ -------
18
+ fit(X, y=None)
19
+ Fit the transformer to the input data.
20
+
21
+ transform(X, y=0, copy=True)
22
+ Transform the input data by calculating the standard normal variate.
23
+
24
+ References
25
+ ----------
26
+ Q. Guo, W. Wu, D.L. Massart. The robust normal variate transform for pattern
27
+ recognition with near-infrared data. doi:10.1016/S0003-2670(98)00737-5
28
+ """
29
+
30
+ def __init__(self, percentile: float = 25):
31
+ self.percentile = percentile
32
+
33
+ def fit(self, X: np.ndarray, y=None) -> "RobustNormalVariate":
34
+ """
35
+ Fit the transformer to the input data.
36
+
37
+ Parameters
38
+ ----------
39
+ X : np.ndarray of shape (n_samples, n_features)
40
+ The input data to fit the transformer to.
41
+
42
+ y : None
43
+ Ignored.
44
+
45
+ Returns
46
+ -------
47
+ self : RobustNormalVariate
48
+ The fitted transformer.
49
+ """
50
+ # Check that X is a 2D array and has only finite values
51
+ X = validate_data(
52
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
53
+ )
54
+ return self
55
+
56
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
57
+ """
58
+ Transform the input data by calculating the standard normal variate.
59
+
60
+ Parameters
61
+ ----------
62
+ X : np.ndarray of shape (n_samples, n_features)
63
+ The input data to transform.
64
+
65
+ y : None
66
+ Ignored.
67
+
68
+ Returns
69
+ -------
70
+ X_ : np.ndarray of shape (n_samples, n_features)
71
+ The transformed data.
72
+ """
73
+ # Check that the estimator is fitted
74
+ check_is_fitted(self, "n_features_in_")
75
+
76
+ # Check that X is a 2D array and has only finite values
77
+ X_ = validate_data(
78
+ self,
79
+ X,
80
+ y="no_validation",
81
+ ensure_2d=True,
82
+ copy=True,
83
+ reset=False,
84
+ dtype=np.float64,
85
+ )
86
+
87
+ # Check that the number of features is the same as the fitted data
88
+ if X_.shape[1] != self.n_features_in_:
89
+ raise ValueError(
90
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
91
+ )
92
+
93
+ # Calculate the standard normal variate
94
+ for i, x in enumerate(X_):
95
+ X_[i] = self._calculate_robust_normal_variate(x)
96
+
97
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
98
+
99
+ def _calculate_robust_normal_variate(self, x) -> np.ndarray:
100
+ percentile = np.percentile(x, self.percentile)
101
+ return (x - percentile) / np.std(x[x <= percentile])
@@ -1,22 +1,12 @@
1
1
  import numpy as np
2
2
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
3
+ from sklearn.utils.validation import check_is_fitted, validate_data
4
4
 
5
- from chemotools.utils.check_inputs import check_input
6
5
 
7
-
8
- class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
6
+ class StandardNormalVariate(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
7
  """
10
8
  A transformer that calculates the standard normal variate of the input data.
11
9
 
12
- Attributes
13
- ----------
14
- n_features_in_ : int
15
- The number of features in the input data.
16
-
17
- _is_fitted : bool
18
- Whether the transformer has been fitted to data.
19
-
20
10
  Methods
21
11
  -------
22
12
  fit(X, y=None)
@@ -25,10 +15,11 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
25
15
  transform(X, y=0, copy=True)
26
16
  Transform the input data by calculating the standard normal variate.
27
17
  """
18
+
28
19
  def fit(self, X: np.ndarray, y=None) -> "StandardNormalVariate":
29
20
  """
30
21
  Fit the transformer to the input data.
31
-
22
+
32
23
  Parameters
33
24
  ----------
34
25
  X : np.ndarray of shape (n_samples, n_features)
@@ -43,14 +34,9 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
43
34
  The fitted transformer.
44
35
  """
45
36
  # Check that X is a 2D array and has only finite values
46
- X = check_input(X)
47
-
48
- # Set the number of features
49
- self.n_features_in_ = X.shape[1]
50
-
51
- # Set the fitted attribute to True
52
- self._is_fitted = True
53
-
37
+ X = validate_data(
38
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
39
+ )
54
40
  return self
55
41
 
56
42
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -71,15 +57,24 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
71
57
  The transformed data.
72
58
  """
73
59
  # Check that the estimator is fitted
74
- check_is_fitted(self, "_is_fitted")
60
+ check_is_fitted(self, "n_features_in_")
75
61
 
76
62
  # Check that X is a 2D array and has only finite values
77
- X = check_input(X)
78
- X_ = X.copy()
63
+ X_ = validate_data(
64
+ self,
65
+ X,
66
+ y="no_validation",
67
+ ensure_2d=True,
68
+ copy=True,
69
+ reset=False,
70
+ dtype=np.float64,
71
+ )
79
72
 
80
73
  # Check that the number of features is the same as the fitted data
81
74
  if X_.shape[1] != self.n_features_in_:
82
- raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
75
+ raise ValueError(
76
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
77
+ )
83
78
 
84
79
  # Calculate the standard normal variate
85
80
  for i, x in enumerate(X_):
@@ -88,4 +83,4 @@ class StandardNormalVariate(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
88
83
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
89
84
 
90
85
  def _calculate_standard_normal_variate(self, x) -> np.ndarray:
91
- return (x - x.mean()) / x.std()
86
+ return (x - x.mean()) / x.std()
@@ -1,4 +1,6 @@
1
- from .mean_filter import MeanFilter
2
- from .median_filter import MedianFilter
3
- from .savitzky_golay_filter import SavitzkyGolayFilter
4
- from .whittaker_smooth import WhittakerSmooth
1
+ from ._mean_filter import MeanFilter
2
+ from ._median_filter import MedianFilter
3
+ from ._savitzky_golay_filter import SavitzkyGolayFilter
4
+ from ._whittaker_smooth import WhittakerSmooth
5
+
6
+ __all__ = ["MeanFilter", "MedianFilter", "SavitzkyGolayFilter", "WhittakerSmooth"]
@@ -1,12 +1,10 @@
1
1
  import numpy as np
2
2
  from scipy.ndimage import uniform_filter1d
3
3
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
4
- from sklearn.utils.validation import check_is_fitted
4
+ from sklearn.utils.validation import check_is_fitted, validate_data
5
5
 
6
- from chemotools.utils.check_inputs import check_input
7
6
 
8
-
9
- class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
7
+ class MeanFilter(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
10
8
  """
11
9
  A transformer that calculates the mean filter of the input data.
12
10
 
@@ -14,19 +12,11 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
14
12
  ----------
15
13
  window_size : int, optional
16
14
  The size of the window to use for the mean filter. Must be odd. Default is 3.
17
-
15
+
18
16
  mode : str, optional
19
17
  The mode to use for the mean filter. Can be "nearest", "constant", "reflect",
20
18
  "wrap", "mirror" or "interp". Default is "nearest".
21
19
 
22
- Attributes
23
- ----------
24
- n_features_in_ : int
25
- The number of features in the input data.
26
-
27
- _is_fitted : bool
28
- Whether the transformer has been fitted to data.
29
-
30
20
  Methods
31
21
  -------
32
22
  fit(X, y=None)
@@ -35,7 +25,8 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
35
25
  transform(X, y=0, copy=True)
36
26
  Transform the input data by calculating the mean filter.
37
27
  """
38
- def __init__(self, window_size: int = 3, mode='nearest') -> None:
28
+
29
+ def __init__(self, window_size: int = 3, mode="nearest") -> None:
39
30
  self.window_size = window_size
40
31
  self.mode = mode
41
32
 
@@ -57,14 +48,9 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
57
48
  The fitted transformer.
58
49
  """
59
50
  # Check that X is a 2D array and has only finite values
60
- X = check_input(X)
61
-
62
- # Set the number of features
63
- self.n_features_in_ = X.shape[1]
64
-
65
- # Set the fitted attribute to True
66
- self._is_fitted = True
67
-
51
+ X = validate_data(
52
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
53
+ )
68
54
  return self
69
55
 
70
56
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -85,11 +71,18 @@ class MeanFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
85
71
  The transformed data.
86
72
  """
87
73
  # Check that the estimator is fitted
88
- check_is_fitted(self, "_is_fitted")
74
+ check_is_fitted(self, "n_features_in_")
89
75
 
90
76
  # Check that X is a 2D array and has only finite values
91
- X = check_input(X)
92
- X_ = X.copy()
77
+ X_ = validate_data(
78
+ self,
79
+ X,
80
+ y="no_validation",
81
+ ensure_2d=True,
82
+ copy=True,
83
+ reset=False,
84
+ dtype=np.float64,
85
+ )
93
86
 
94
87
  if X_.shape[1] != self.n_features_in_:
95
88
  raise ValueError(