chemotools 0.0.27__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. chemotools/augmentation/__init__.py +16 -0
  2. chemotools/augmentation/baseline_shift.py +119 -0
  3. chemotools/augmentation/exponential_noise.py +117 -0
  4. chemotools/augmentation/index_shift.py +120 -0
  5. chemotools/augmentation/normal_noise.py +118 -0
  6. chemotools/augmentation/spectrum_scale.py +120 -0
  7. chemotools/augmentation/uniform_noise.py +124 -0
  8. chemotools/baseline/__init__.py +20 -8
  9. chemotools/baseline/{air_pls.py → _air_pls.py} +20 -32
  10. chemotools/baseline/{ar_pls.py → _ar_pls.py} +18 -31
  11. chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +22 -30
  12. chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +26 -19
  13. chemotools/baseline/{linear_correction.py → _linear_correction.py} +19 -28
  14. chemotools/baseline/{non_negative.py → _non_negative.py} +15 -23
  15. chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +29 -31
  16. chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +23 -27
  17. chemotools/datasets/__init__.py +3 -0
  18. chemotools/datasets/_base.py +85 -15
  19. chemotools/datasets/data/coffee_labels.csv +61 -0
  20. chemotools/datasets/data/coffee_spectra.csv +61 -0
  21. chemotools/derivative/__init__.py +4 -2
  22. chemotools/derivative/{norris_william.py → _norris_william.py} +17 -24
  23. chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +26 -36
  24. chemotools/feature_selection/__init__.py +4 -0
  25. chemotools/{variable_selection/select_features.py → feature_selection/_index_selector.py} +32 -56
  26. chemotools/{variable_selection/range_cut.py → feature_selection/_range_cut.py} +25 -50
  27. chemotools/scale/__init__.py +5 -3
  28. chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +20 -27
  29. chemotools/scale/{norm_scaler.py → _norm_scaler.py} +18 -25
  30. chemotools/scale/{point_scaler.py → _point_scaler.py} +27 -32
  31. chemotools/scatter/__init__.py +13 -4
  32. chemotools/scatter/{extended_multiplicative_scatter_correction.py → _extended_multiplicative_scatter_correction.py} +19 -28
  33. chemotools/scatter/{multiplicative_scatter_correction.py → _multiplicative_scatter_correction.py} +19 -17
  34. chemotools/scatter/{robust_normal_variate.py → _robust_normal_variate.py} +15 -23
  35. chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +21 -26
  36. chemotools/smooth/__init__.py +6 -4
  37. chemotools/smooth/{mean_filter.py → _mean_filter.py} +18 -25
  38. chemotools/smooth/{median_filter.py → _median_filter.py} +32 -24
  39. chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +22 -24
  40. chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +24 -29
  41. {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/METADATA +19 -16
  42. chemotools-0.1.6.dist-info/RECORD +51 -0
  43. {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/WHEEL +1 -2
  44. chemotools/utils/check_inputs.py +0 -14
  45. chemotools/variable_selection/__init__.py +0 -2
  46. chemotools-0.0.27.dist-info/RECORD +0 -49
  47. chemotools-0.0.27.dist-info/top_level.txt +0 -2
  48. tests/__init__.py +0 -0
  49. tests/fixtures.py +0 -89
  50. tests/test_datasets.py +0 -30
  51. tests/test_functionality.py +0 -616
  52. tests/test_sklearn_compliance.py +0 -220
  53. {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/LICENSE +0 -0
@@ -0,0 +1,124 @@
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
+
7
+
8
+ class UniformNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
+ """
10
+ Add uniform noise to the input data.
11
+
12
+ Parameters
13
+ ----------
14
+ min : float, default=0.0
15
+ The lower bound of the uniform distribution.
16
+
17
+ max : float, default=0.0
18
+ The upper bound of the uniform distribution.
19
+
20
+ random_state : int, default=None
21
+ The random state to use for the random number generator.
22
+
23
+ Attributes
24
+ ----------
25
+ n_features_in_ : int
26
+ The number of features in the input data.
27
+
28
+ _is_fitted : bool
29
+ Whether the transformer has been fitted to data.
30
+
31
+ Methods
32
+ -------
33
+ fit(X, y=None)
34
+ Fit the transformer to the input data.
35
+
36
+ transform(X, y=0, copy=True)
37
+ Transform the input data by adding random noise.
38
+ """
39
+
40
+ def __init__(
41
+ self, min: float = 0.0, max: float = 0.0, random_state: Optional[int] = None
42
+ ):
43
+ self.min = min
44
+ self.max = max
45
+ self.random_state = random_state
46
+
47
+ def fit(self, X: np.ndarray, y=None) -> "UniformNoise":
48
+ """
49
+ Fit the transformer to the input data.
50
+
51
+ Parameters
52
+ ----------
53
+ X : np.ndarray of shape (n_samples, n_features)
54
+ The input data to fit the transformer to.
55
+
56
+ y : None
57
+ Ignored.
58
+
59
+ Returns
60
+ -------
61
+ self : UniformNoise
62
+ The fitted transformer.
63
+ """
64
+ # Check that X is a 2D array and has only finite values
65
+ X = validate_data(
66
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
67
+ )
68
+
69
+ # Set the number of features
70
+ self.n_features_in_ = X.shape[1]
71
+
72
+ # Set the fitted attribute to True
73
+ self._is_fitted = True
74
+
75
+ # Instantiate the random number generator
76
+ self._rng = np.random.default_rng(self.random_state)
77
+
78
+ return self
79
+
80
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
81
+ """
82
+ Transform the input data by adding random uniform noise.
83
+
84
+ Parameters
85
+ ----------
86
+ X : np.ndarray of shape (n_samples, n_features)
87
+ The input data to transform.
88
+
89
+ y : None
90
+ Ignored.
91
+
92
+ Returns
93
+ -------
94
+ X_ : np.ndarray of shape (n_samples, n_features)
95
+ The transformed data.
96
+ """
97
+ # Check that the estimator is fitted
98
+ check_is_fitted(self, "_is_fitted")
99
+
100
+ # Check that X is a 2D array and has only finite values
101
+ X_ = validate_data(
102
+ self,
103
+ X,
104
+ y="no_validation",
105
+ ensure_2d=True,
106
+ copy=True,
107
+ reset=False,
108
+ dtype=np.float64,
109
+ )
110
+
111
+ # Check that the number of features is the same as the fitted data
112
+ if X_.shape[1] != self.n_features_in_:
113
+ raise ValueError(
114
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
115
+ )
116
+
117
+ # Calculate the standard uniform variate
118
+ for i, x in enumerate(X_):
119
+ X_[i] = self._add_random_noise(x)
120
+
121
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
122
+
123
+ def _add_random_noise(self, x) -> np.ndarray:
124
+ return x + self._rng.uniform(self.min, self.max, size=x.shape)
@@ -1,8 +1,20 @@
1
- from .air_pls import AirPls
2
- from .ar_pls import ArPls
3
- from .constant_baseline_correction import ConstantBaselineCorrection
4
- from .cubic_spline_correction import CubicSplineCorrection
5
- from .linear_correction import LinearCorrection
6
- from .non_negative import NonNegative
7
- from .polynomial_correction import PolynomialCorrection
8
- from .subtract_reference import SubtractReference
1
+ from ._air_pls import AirPls
2
+ from ._ar_pls import ArPls
3
+ from ._constant_baseline_correction import ConstantBaselineCorrection
4
+ from ._cubic_spline_correction import CubicSplineCorrection
5
+ from ._linear_correction import LinearCorrection
6
+ from ._non_negative import NonNegative
7
+ from ._polynomial_correction import PolynomialCorrection
8
+ from ._subtract_reference import SubtractReference
9
+
10
+
11
+ __all__ = [
12
+ "AirPls",
13
+ "ArPls",
14
+ "ConstantBaselineCorrection",
15
+ "CubicSplineCorrection",
16
+ "LinearCorrection",
17
+ "NonNegative",
18
+ "PolynomialCorrection",
19
+ "SubtractReference",
20
+ ]
@@ -3,14 +3,12 @@ import numpy as np
3
3
  from scipy.sparse import csc_matrix, eye, diags
4
4
  from scipy.sparse.linalg import spsolve
5
5
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
6
- from sklearn.utils.validation import check_is_fitted
7
-
8
- from chemotools.utils.check_inputs import check_input
6
+ from sklearn.utils.validation import check_is_fitted, validate_data
9
7
 
10
8
  logger = logging.getLogger(__name__)
11
9
 
12
10
 
13
- class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
11
+ class AirPls(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
14
12
  """
15
13
  This class implements the AirPLS (Adaptive Iteratively Reweighted Penalized Least Squares) algorithm for baseline
16
14
  correction of spectra data. AirPLS is a common approach for removing the baseline from spectra, which can be useful
@@ -30,14 +28,6 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
30
28
  The number of iterations used to calculate the baseline. Increasing the number of iterations can improve the
31
29
  accuracy of the baseline correction, but also increases the computation time.
32
30
 
33
- Attributes
34
- ----------
35
- n_features_in_ : int
36
- The number of features in the input data.
37
-
38
- _is_fitted : bool
39
- A flag indicating whether the estimator has been fitted to data.
40
-
41
31
  Methods
42
32
  -------
43
33
  fit(X, y=None)
@@ -48,7 +38,7 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
48
38
 
49
39
  _calculate_whittaker_smooth(x, w)
50
40
  Calculate the Whittaker smooth of a given input vector x, with weights w.
51
-
41
+
52
42
  _calculate_air_pls(x)
53
43
  Calculate the AirPLS baseline of a given input vector x.
54
44
 
@@ -84,14 +74,11 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
84
74
  self : AirPls
85
75
  Returns the instance itself.
86
76
  """
87
- # Check that X is a 2D array and has only finite values
88
- X = check_input(X)
89
-
90
- # Set the number of features
91
- self.n_features_in_ = X.shape[1]
92
77
 
93
- # Set the fitted attribute to True
94
- self._is_fitted = True
78
+ # Check that X is a 2D array and has only finite values
79
+ X = validate_data(
80
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
81
+ )
95
82
 
96
83
  return self
97
84
 
@@ -113,17 +100,18 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
113
100
  """
114
101
 
115
102
  # Check that the estimator is fitted
116
- check_is_fitted(self, "_is_fitted")
103
+ check_is_fitted(self, "n_features_in_")
117
104
 
118
105
  # Check that X is a 2D array and has only finite values
119
- X = check_input(X)
120
- X_ = X.copy()
121
-
122
- # Check that the number of features is the same as the fitted data
123
- if X_.shape[1] != self.n_features_in_:
124
- raise ValueError(
125
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
126
- )
106
+ X_ = validate_data(
107
+ self,
108
+ X,
109
+ y="no_validation",
110
+ ensure_2d=True,
111
+ copy=True,
112
+ reset=False,
113
+ dtype=np.float64,
114
+ )
127
115
 
128
116
  # Calculate the air pls smooth
129
117
  for i, x in enumerate(X_):
@@ -132,14 +120,14 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
132
120
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
133
121
 
134
122
  def _calculate_whittaker_smooth(self, x, w):
135
- X = np.matrix(x)
123
+ X = np.array(x)
136
124
  m = X.size
137
125
  E = eye(m, format="csc")
138
126
  for i in range(self.polynomial_order):
139
127
  E = E[1:] - E[:-1]
140
128
  W = diags(w, 0, shape=(m, m))
141
- A = csc_matrix(W + (self.lam * E.T * E))
142
- B = csc_matrix(W * X.T)
129
+ A = csc_matrix(W + (self.lam * E.T @ E))
130
+ B = csc_matrix(W @ X.T).toarray().ravel()
143
131
  background = spsolve(A, B)
144
132
  return np.array(background)
145
133
 
@@ -5,14 +5,12 @@ from scipy.sparse import spdiags, csc_matrix
5
5
  from scipy.sparse.linalg import splu
6
6
 
7
7
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
8
- from sklearn.utils.validation import check_is_fitted
9
-
10
- from chemotools.utils.check_inputs import check_input
8
+ from sklearn.utils.validation import check_is_fitted, validate_data
11
9
 
12
10
  logger = logging.getLogger(__name__)
13
11
 
14
12
 
15
- class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
13
+ class ArPls(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
16
14
  """
17
15
  This class implements the Assymmetrically Reweighted Penalized Least Squares (ArPls) is a baseline
18
16
  correction method for spectroscopy data. It uses an iterative process
@@ -29,13 +27,6 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
29
27
  nr_iterations : int, optional (default=100)
30
28
  The maximum number of iterations for the weight updating scheme.
31
29
 
32
- Attributes
33
- ----------
34
- n_features_in_ : int
35
- The number of input features.
36
-
37
- _is_fitted : bool
38
- Whether the estimator has been fitted.
39
30
 
40
31
  Methods
41
32
  -------
@@ -53,8 +44,8 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
53
44
 
54
45
  References
55
46
  ----------
56
- - Sung-June Baek, Aaron Park, Young-Jin Ahn, Jaebum Choo
57
- Baseline correction using asymmetrically reweighted penalized
47
+ - Sung-June Baek, Aaron Park, Young-Jin Ahn, Jaebum Choo
48
+ Baseline correction using asymmetrically reweighted penalized
58
49
  least squares smoothing
59
50
  """
60
51
 
@@ -86,13 +77,9 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
86
77
  """
87
78
 
88
79
  # Check that X is a 2D array and has only finite values
89
- X = check_input(X)
90
-
91
- # Set the number of features
92
- self.n_features_in_ = X.shape[1]
93
-
94
- # Set the fitted attribute to True
95
- self._is_fitted = True
80
+ X = validate_data(
81
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
82
+ )
96
83
 
97
84
  return self
98
85
 
@@ -114,17 +101,17 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
114
101
  """
115
102
 
116
103
  # Check that the estimator is fitted
117
- check_is_fitted(self, "_is_fitted")
104
+ check_is_fitted(self, "n_features_in_")
118
105
 
119
106
  # Check that X is a 2D array and has only finite values
120
- X = check_input(X)
121
- X_ = X.copy()
122
-
123
- # Check that the number of features is the same as the fitted data
124
- if X_.shape[1] != self.n_features_in_:
125
- raise ValueError(
126
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
127
- )
107
+ X_ = validate_data(
108
+ self,
109
+ X,
110
+ y="no_validation",
111
+ ensure_2d=True,
112
+ copy=True,
113
+ reset=False,
114
+ )
128
115
 
129
116
  # Calculate the ar pls baseline
130
117
  for i, x in enumerate(X_):
@@ -133,9 +120,9 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
133
120
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
134
121
 
135
122
  def _calculate_diff(self, N):
136
- I = sp.eye(N, format="csc")
123
+ identity_matrix = sp.eye(N, format="csc")
137
124
  D2 = sp.diags([1, -2, 1], [0, 1, 2], shape=(N - 2, N), format="csc")
138
- return D2.dot(I).T
125
+ return D2.dot(identity_matrix).T
139
126
 
140
127
  def _calculate_ar_pls(self, x):
141
128
  N = len(x)
@@ -1,11 +1,11 @@
1
+ from typing import Optional
2
+
1
3
  import numpy as np
2
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
4
-
5
- from chemotools.utils.check_inputs import check_input
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
6
 
7
7
 
8
- class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
8
+ class ConstantBaselineCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
9
  """
10
10
  A transformer that corrects a baseline by subtracting a constant value.
11
11
  The constant value is taken by the mean of the features between the start
@@ -30,12 +30,6 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
30
30
  end_index_ : int
31
31
  The index of the end of the range. It is 1 if the wavenumbers are not provided.
32
32
 
33
- n_features_in_ : int
34
- The number of features in the input data.
35
-
36
- _is_fitted : bool
37
- Whether the transformer has been fitted to data.
38
-
39
33
  Methods
40
34
  -------
41
35
  fit(X, y=None)
@@ -46,7 +40,10 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
46
40
  """
47
41
 
48
42
  def __init__(
49
- self, start: int = 0, end: int = 1, wavenumbers: np.ndarray = None,
43
+ self,
44
+ start: int = 0,
45
+ end: int = 1,
46
+ wavenumbers: Optional[np.ndarray] = None,
50
47
  ) -> None:
51
48
  self.start = start
52
49
  self.end = end
@@ -70,13 +67,9 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
70
67
  The fitted transformer.
71
68
  """
72
69
  # Check that X is a 2D array and has only finite values
73
- X = check_input(X)
74
-
75
- # Set the number of features
76
- self.n_features_in_ = X.shape[1]
77
-
78
- # Set the fitted attribute to True
79
- self._is_fitted = True
70
+ X = validate_data(
71
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
72
+ )
80
73
 
81
74
  # Set the start and end indices
82
75
  if self.wavenumbers is None:
@@ -109,17 +102,18 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
109
102
  The transformed input data.
110
103
  """
111
104
  # Check that the estimator is fitted
112
- check_is_fitted(self, "_is_fitted")
105
+ check_is_fitted(self, "n_features_in_")
113
106
 
114
107
  # Check that X is a 2D array and has only finite values
115
- X = check_input(X)
116
- X_ = X.copy()
117
-
118
- # Check that the number of features is the same as the fitted data
119
- if X_.shape[1] != self.n_features_in_:
120
- raise ValueError(
121
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
122
- )
108
+ X_ = validate_data(
109
+ self,
110
+ X,
111
+ y="no_validation",
112
+ ensure_2d=True,
113
+ copy=True,
114
+ reset=False,
115
+ dtype=np.float64,
116
+ )
123
117
 
124
118
  # Base line correct the spectra
125
119
  for i, x in enumerate(X_):
@@ -128,7 +122,5 @@ class ConstantBaselineCorrection(OneToOneFeatureMixin, BaseEstimator, Transforme
128
122
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
129
123
 
130
124
  def _find_index(self, target: float) -> int:
131
- if self.wavenumbers is None:
132
- return target
133
125
  wavenumbers = np.array(self.wavenumbers)
134
- return np.argmin(np.abs(wavenumbers - target))
126
+ return np.argmin(np.abs(wavenumbers - target)).astype(int)
@@ -1,13 +1,14 @@
1
+ from typing import Optional
2
+
1
3
  import numpy as np
2
4
  from scipy.interpolate import CubicSpline
3
5
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
4
- from sklearn.utils.validation import check_is_fitted
6
+ from sklearn.utils.validation import check_is_fitted, validate_data
5
7
 
6
- from chemotools.utils.check_inputs import check_input
7
8
 
8
- class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
9
+ class CubicSplineCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
10
  """
10
- A transformer that corrects a baseline by subtracting a cubic spline through the
11
+ A transformer that corrects a baseline by subtracting a cubic spline through the
11
12
  points defined by the indices.
12
13
 
13
14
  Parameters
@@ -32,7 +33,8 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
32
33
  Transform the input data by subtracting the constant baseline value.
33
34
 
34
35
  """
35
- def __init__(self, indices: list = None) -> None:
36
+
37
+ def __init__(self, indices: Optional[list] = None) -> None:
36
38
  self.indices = indices
37
39
 
38
40
  def fit(self, X: np.ndarray, y=None) -> "CubicSplineCorrection":
@@ -53,13 +55,9 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
53
55
  The fitted transformer.
54
56
  """
55
57
  # Check that X is a 2D array and has only finite values
56
- X = check_input(X)
57
-
58
- # Set the number of features
59
- self.n_features_in_ = X.shape[1]
60
-
61
- # Set the fitted attribute to True
62
- self._is_fitted = True
58
+ X = validate_data(
59
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
60
+ )
63
61
 
64
62
  if self.indices is None:
65
63
  self.indices_ = [0, len(X[0]) - 1]
@@ -89,15 +87,24 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
89
87
  The transformed data.
90
88
  """
91
89
  # Check that the estimator is fitted
92
- check_is_fitted(self, "_is_fitted")
90
+ check_is_fitted(self, "indices_")
93
91
 
94
92
  # Check that X is a 2D array and has only finite values
95
- X = check_input(X)
96
- X_ = X.copy()
93
+ X_ = validate_data(
94
+ self,
95
+ X,
96
+ y="no_validation",
97
+ ensure_2d=True,
98
+ copy=True,
99
+ reset=False,
100
+ dtype=np.float64,
101
+ )
97
102
 
98
103
  # Check that the number of features is the same as the fitted data
99
104
  if X_.shape[1] != self.n_features_in_:
100
- raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
105
+ raise ValueError(
106
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
107
+ )
101
108
 
102
109
  # Calculate spline baseline correction
103
110
  for i, x in enumerate(X_):
@@ -106,7 +113,7 @@ class CubicSplineCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixi
106
113
 
107
114
  def _spline_baseline_correct(self, x: np.ndarray) -> np.ndarray:
108
115
  indices = self.indices_
109
- intensity = x[indices]
116
+ intensity = x[indices]
110
117
  spl = CubicSpline(indices, intensity)
111
- baseline = spl(range(len(x)))
112
- return x - baseline
118
+ baseline = spl(range(len(x)))
119
+ return x - baseline
@@ -1,26 +1,13 @@
1
1
  import numpy as np
2
2
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
3
+ from sklearn.utils.validation import check_is_fitted, validate_data
4
4
 
5
- from chemotools.utils.check_inputs import check_input
6
5
 
7
-
8
- class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
6
+ class LinearCorrection(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
7
  """
10
8
  A transformer that corrects a baseline by subtracting a linear baseline through the
11
9
  initial and final points of the spectrum.
12
10
 
13
- Parameters
14
- ----------
15
-
16
- Attributes
17
- ----------
18
- n_features_in_ : int
19
- The number of features in the input data.
20
-
21
- _is_fitted : bool
22
- Whether the transformer has been fitted to data.
23
-
24
11
  Methods
25
12
  -------
26
13
  fit(X, y=None)
@@ -31,7 +18,6 @@ class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
31
18
  """
32
19
 
33
20
  def _drift_correct_spectrum(self, x: np.ndarray) -> np.ndarray:
34
-
35
21
  # Can take any array and returns with a linear baseline correction
36
22
  # Find the x values at the edges of the spectrum
37
23
  y1: float = x[0]
@@ -68,13 +54,9 @@ class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
68
54
  The fitted transformer.
69
55
  """
70
56
  # Check that X is a 2D array and has only finite values
71
- X = check_input(X)
72
-
73
- # Set the number of features
74
- self.n_features_in_ = X.shape[1]
75
-
76
- # Set the fitted attribute to True
77
- self._is_fitted = True
57
+ X = validate_data(
58
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
59
+ )
78
60
 
79
61
  return self
80
62
 
@@ -99,17 +81,26 @@ class LinearCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
99
81
  The transformed data.
100
82
  """
101
83
  # Check that the estimator is fitted
102
- check_is_fitted(self, "_is_fitted")
84
+ check_is_fitted(self, "n_features_in_")
103
85
 
104
86
  # Check that X is a 2D array and has only finite values
105
- X = check_input(X)
106
- X_ = X.copy()
87
+ X_ = validate_data(
88
+ self,
89
+ X,
90
+ y="no_validation",
91
+ ensure_2d=True,
92
+ copy=True,
93
+ reset=False,
94
+ dtype=np.float64,
95
+ )
107
96
 
108
97
  # Check that the number of features is the same as the fitted data
109
98
  if X_.shape[1] != self.n_features_in_:
110
- raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
99
+ raise ValueError(
100
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
101
+ )
111
102
 
112
103
  # Calculate non-negative values
113
104
  for i, x in enumerate(X_):
114
105
  X_[i, :] = self._drift_correct_spectrum(x)
115
- return X_.reshape(-1, 1) if X_.ndim == 1 else X_
106
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
@@ -1,11 +1,9 @@
1
1
  import numpy as np
2
2
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
3
+ from sklearn.utils.validation import check_is_fitted, validate_data
4
4
 
5
- from chemotools.utils.check_inputs import check_input
6
5
 
7
-
8
- class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
6
+ class NonNegative(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
7
  """
10
8
  A transformer that sets all negative values to zero or to abs.
11
9
 
@@ -14,14 +12,6 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
14
12
  mode : str, optional
15
13
  The mode to use for the non-negative values. Can be "zero" or "abs".
16
14
 
17
- Attributes
18
- ----------
19
- n_features_in_ : int
20
- The number of features in the input data.
21
-
22
- _is_fitted : bool
23
- Whether the transformer has been fitted to data.
24
-
25
15
  Methods
26
16
  -------
27
17
  fit(X, y=None)
@@ -52,14 +42,9 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
52
42
  The fitted transformer.
53
43
  """
54
44
  # Check that X is a 2D array and has only finite values
55
- X = check_input(X)
56
-
57
- # Set the number of features
58
- self.n_features_in_ = X.shape[1]
59
-
60
- # Set the fitted attribute to True
61
- self._is_fitted = True
62
-
45
+ X = validate_data(
46
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
47
+ )
63
48
  return self
64
49
 
65
50
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -80,11 +65,18 @@ class NonNegative(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
80
65
  The transformed data.
81
66
  """
82
67
  # Check that the estimator is fitted
83
- check_is_fitted(self, "_is_fitted")
68
+ check_is_fitted(self, "n_features_in_")
84
69
 
85
70
  # Check that X is a 2D array and has only finite values
86
- X = check_input(X)
87
- X_ = X.copy()
71
+ X_ = validate_data(
72
+ self,
73
+ X,
74
+ y="no_validation",
75
+ ensure_2d=True,
76
+ copy=True,
77
+ reset=False,
78
+ dtype=np.float64,
79
+ )
88
80
 
89
81
  # Check that the number of features is the same as the fitted data
90
82
  if X_.shape[1] != self.n_features_in_: