chemotools 0.0.27__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. chemotools/augmentation/__init__.py +16 -0
  2. chemotools/augmentation/baseline_shift.py +119 -0
  3. chemotools/augmentation/exponential_noise.py +117 -0
  4. chemotools/augmentation/index_shift.py +120 -0
  5. chemotools/augmentation/normal_noise.py +118 -0
  6. chemotools/augmentation/spectrum_scale.py +120 -0
  7. chemotools/augmentation/uniform_noise.py +124 -0
  8. chemotools/baseline/__init__.py +20 -8
  9. chemotools/baseline/{air_pls.py → _air_pls.py} +20 -32
  10. chemotools/baseline/{ar_pls.py → _ar_pls.py} +18 -31
  11. chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +22 -30
  12. chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +26 -19
  13. chemotools/baseline/{linear_correction.py → _linear_correction.py} +19 -28
  14. chemotools/baseline/{non_negative.py → _non_negative.py} +15 -23
  15. chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +29 -31
  16. chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +23 -27
  17. chemotools/datasets/__init__.py +3 -0
  18. chemotools/datasets/_base.py +85 -15
  19. chemotools/datasets/data/coffee_labels.csv +61 -0
  20. chemotools/datasets/data/coffee_spectra.csv +61 -0
  21. chemotools/derivative/__init__.py +4 -2
  22. chemotools/derivative/{norris_william.py → _norris_william.py} +17 -24
  23. chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +26 -36
  24. chemotools/feature_selection/__init__.py +4 -0
  25. chemotools/{variable_selection/select_features.py → feature_selection/_index_selector.py} +32 -56
  26. chemotools/{variable_selection/range_cut.py → feature_selection/_range_cut.py} +25 -50
  27. chemotools/scale/__init__.py +5 -3
  28. chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +20 -27
  29. chemotools/scale/{norm_scaler.py → _norm_scaler.py} +18 -25
  30. chemotools/scale/{point_scaler.py → _point_scaler.py} +27 -32
  31. chemotools/scatter/__init__.py +13 -4
  32. chemotools/scatter/{extended_multiplicative_scatter_correction.py → _extended_multiplicative_scatter_correction.py} +19 -28
  33. chemotools/scatter/{multiplicative_scatter_correction.py → _multiplicative_scatter_correction.py} +19 -17
  34. chemotools/scatter/{robust_normal_variate.py → _robust_normal_variate.py} +15 -23
  35. chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +21 -26
  36. chemotools/smooth/__init__.py +6 -4
  37. chemotools/smooth/{mean_filter.py → _mean_filter.py} +18 -25
  38. chemotools/smooth/{median_filter.py → _median_filter.py} +32 -24
  39. chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +22 -24
  40. chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +24 -29
  41. {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/METADATA +19 -16
  42. chemotools-0.1.6.dist-info/RECORD +51 -0
  43. {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/WHEEL +1 -2
  44. chemotools/utils/check_inputs.py +0 -14
  45. chemotools/variable_selection/__init__.py +0 -2
  46. chemotools-0.0.27.dist-info/RECORD +0 -49
  47. chemotools-0.0.27.dist-info/top_level.txt +0 -2
  48. tests/__init__.py +0 -0
  49. tests/fixtures.py +0 -89
  50. tests/test_datasets.py +0 -30
  51. tests/test_functionality.py +0 -616
  52. tests/test_sklearn_compliance.py +0 -220
  53. {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/LICENSE +0 -0
@@ -1,2 +1,4 @@
1
- from .norris_william import NorrisWilliams
2
- from .savitzky_golay import SavitzkyGolay
1
+ from ._norris_william import NorrisWilliams
2
+ from ._savitzky_golay import SavitzkyGolay
3
+
4
+ __all__ = ["NorrisWilliams", "SavitzkyGolay"]
@@ -1,12 +1,10 @@
1
1
  import numpy as np
2
2
  from scipy.ndimage import convolve1d
3
3
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
4
- from sklearn.utils.validation import check_is_fitted
4
+ from sklearn.utils.validation import check_is_fitted, validate_data
5
5
 
6
- from chemotools.utils.check_inputs import check_input
7
6
 
8
-
9
- class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
7
+ class NorrisWilliams(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
10
8
  """
11
9
  A transformer that calculates the Norris-Williams derivative of the input data.
12
10
 
@@ -22,17 +20,9 @@ class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
22
20
  The order of the derivative to calculate. Can be 1 or 2. Default is 1.
23
21
 
24
22
  mode : str, optional
25
- The mode to use for the derivative calculation. Can be "nearest", "constant",
23
+ The mode to use for the derivative calculation. Can be "nearest", "constant",
26
24
  "reflect", "wrap", "mirror" or "interp". Default is "nearest".
27
25
 
28
- Attributes
29
- ----------
30
- n_features_in_ : int
31
- The number of features in the input data.
32
-
33
- _is_fitted : bool
34
- Whether the transformer has been fitted to data.
35
-
36
26
  Methods
37
27
  -------
38
28
  fit(X, y=None)
@@ -41,6 +31,7 @@ class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
41
31
  transform(X, y=0, copy=True)
42
32
  Transform the input data by calculating the Norris-Williams derivative.
43
33
  """
34
+
44
35
  def __init__(
45
36
  self,
46
37
  window_size: int = 5,
@@ -71,14 +62,9 @@ class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
71
62
  The fitted transformer.
72
63
  """
73
64
  # Check that X is a 2D array and has only finite values
74
- X = check_input(X)
75
-
76
- # Set the number of features
77
- self.n_features_in_ = X.shape[1]
78
-
79
- # Set the fitted attribute to True
80
- self._is_fitted = True
81
-
65
+ X = validate_data(
66
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
67
+ )
82
68
  return self
83
69
 
84
70
  def transform(self, X: np.ndarray, y=None):
@@ -99,11 +85,18 @@ class NorrisWilliams(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
99
85
  The transformed data.
100
86
  """
101
87
  # Check that the estimator is fitted
102
- check_is_fitted(self, "_is_fitted")
88
+ check_is_fitted(self, "n_features_in_")
103
89
 
104
90
  # Check that X is a 2D array and has only finite values
105
- X = check_input(X)
106
- X_ = X.copy()
91
+ X_ = validate_data(
92
+ self,
93
+ X,
94
+ y="no_validation",
95
+ ensure_2d=True,
96
+ copy=True,
97
+ reset=False,
98
+ dtype=np.float64,
99
+ )
107
100
 
108
101
  if X_.shape[1] != self.n_features_in_:
109
102
  raise ValueError(
@@ -1,12 +1,12 @@
1
+ from typing import Literal
2
+
1
3
  import numpy as np
2
4
  from scipy.signal import savgol_filter
3
5
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
4
- from sklearn.utils.validation import check_is_fitted
5
-
6
- from chemotools.utils.check_inputs import check_input
6
+ from sklearn.utils.validation import check_is_fitted, validate_data
7
7
 
8
8
 
9
- class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
9
+ class SavitzkyGolay(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
10
10
  """
11
11
  A transformer that calculates the Savitzky-Golay derivative of the input data.
12
12
 
@@ -27,14 +27,6 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
27
27
  The mode to use for the derivative calculation. Can be "nearest", "constant",
28
28
  "reflect", "wrap", "mirror" or "interp". Default is "nearest".
29
29
 
30
- Attributes
31
- ----------
32
- n_features_in_ : int
33
- The number of features in the input data.
34
-
35
- _is_fitted : bool
36
- Whether the transformer has been fitted to data.
37
-
38
30
  Methods
39
31
  -------
40
32
  fit(X, y=None)
@@ -49,7 +41,7 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
49
41
  window_size: int = 3,
50
42
  polynomial_order: int = 1,
51
43
  derivate_order: int = 1,
52
- mode: str = "nearest",
44
+ mode: Literal["mirror", "constant", "nearest", "wrap", "interp"] = "nearest",
53
45
  ) -> None:
54
46
  self.window_size = window_size
55
47
  self.polynomial_order = polynomial_order
@@ -74,14 +66,9 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
74
66
  The fitted transformer.
75
67
  """
76
68
  # Check that X is a 2D array and has only finite values
77
- X = check_input(X)
78
-
79
- # Set the number of features
80
- self.n_features_in_ = X.shape[1]
81
-
82
- # Set the fitted attribute to True
83
- self._is_fitted = True
84
-
69
+ X = validate_data(
70
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
71
+ )
85
72
  return self
86
73
 
87
74
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -102,29 +89,32 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
102
89
  The transformed data.
103
90
  """
104
91
  # Check that the estimator is fitted
105
- check_is_fitted(self, "_is_fitted")
92
+ check_is_fitted(self, "n_features_in_")
106
93
 
107
94
  # Check that X is a 2D array and has only finite values
108
- X = check_input(X)
109
- X_ = X.copy()
95
+ X_ = validate_data(
96
+ self,
97
+ X,
98
+ y="no_validation",
99
+ ensure_2d=True,
100
+ copy=True,
101
+ reset=False,
102
+ dtype=np.float64,
103
+ )
110
104
 
111
105
  if X_.shape[1] != self.n_features_in_:
112
106
  raise ValueError(
113
107
  f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
114
108
  )
115
-
116
109
  # Calculate the standard normal variate
117
110
  for i, x in enumerate(X_):
118
- X_[i] = self._calculate_derivative(x)
111
+ X_[i] = savgol_filter(
112
+ x,
113
+ self.window_size,
114
+ self.polynomial_order,
115
+ deriv=self.derivate_order,
116
+ axis=0,
117
+ mode=self.mode,
118
+ )
119
119
 
120
120
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
121
-
122
- def _calculate_derivative(self, x) -> np.ndarray:
123
- return savgol_filter(
124
- x,
125
- self.window_size,
126
- self.polynomial_order,
127
- deriv=self.derivate_order,
128
- axis=0,
129
- mode=self.mode,
130
- )
@@ -0,0 +1,4 @@
1
+ from ._index_selector import IndexSelector
2
+ from ._range_cut import RangeCut
3
+
4
+ __all__ = ["IndexSelector", "RangeCut"]
@@ -1,11 +1,13 @@
1
+ from typing import Optional, Union
2
+
1
3
  import numpy as np
2
- from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
4
+ from sklearn.base import BaseEstimator
5
+ from sklearn.feature_selection._base import SelectorMixin
4
6
 
5
- from chemotools.utils.check_inputs import check_input
7
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
8
 
7
9
 
8
- class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
10
+ class IndexSelector(SelectorMixin, BaseEstimator):
9
11
  """
10
12
  A transformer that Selects the spectral data to a specified array of features. This
11
13
  array can be continuous or discontinuous. The array of features is specified by:
@@ -29,12 +31,6 @@ class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
29
31
  features_index_ : int
30
32
  The index of the features to select.
31
33
 
32
- n_features_in_ : int
33
- The number of features in the input data.
34
-
35
- _is_fitted : bool
36
- Whether the transformer has been fitted to data.
37
-
38
34
  Methods
39
35
  -------
40
36
  fit(X, y=None)
@@ -46,13 +42,13 @@ class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
46
42
 
47
43
  def __init__(
48
44
  self,
49
- features: np.ndarray = None,
50
- wavenumbers: np.ndarray = None,
45
+ features: Optional[np.ndarray] = None,
46
+ wavenumbers: Optional[np.ndarray] = None,
51
47
  ):
52
48
  self.features = features
53
49
  self.wavenumbers = wavenumbers
54
50
 
55
- def fit(self, X: np.ndarray, y=None) -> "SelectFeatures":
51
+ def fit(self, X: np.ndarray, y=None) -> "IndexSelector":
56
52
  """
57
53
  Fit the transformer to the input data.
58
54
 
@@ -66,15 +62,13 @@ class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
66
62
 
67
63
  Returns
68
64
  -------
69
- self : SelectFeatures
65
+ self : IndexSelector
70
66
  The fitted transformer.
71
67
  """
72
- # Check that X is a 2D array and has only finite values
73
- X = check_input(X)
74
-
75
- # Set the number of features
76
- self.n_features_in_ = X.shape[1]
77
-
68
+ # validate that X is a 2D array and has only finite values
69
+ X = validate_data(
70
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
71
+ )
78
72
  # Set the fitted attribute to True
79
73
  self._is_fitted = True
80
74
 
@@ -83,55 +77,37 @@ class SelectFeatures(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
83
77
  self.features_index_ = self.features
84
78
  return self
85
79
 
86
- if self.wavenumbers is None:
80
+ elif self.wavenumbers is None:
87
81
  self.features_index_ = self.features
88
82
  return self
89
83
 
90
- self.features_index_ = self._find_indices()
91
-
92
- return self
84
+ else:
85
+ self.features_index_ = self._find_indices(self.features)
86
+ return self
93
87
 
94
- def transform(self, X: np.ndarray, y=None) -> np.ndarray:
88
+ def _get_support_mask(self):
95
89
  """
96
- Transform the input data by cutting it to the specified range.
97
-
98
- Parameters
99
- ----------
100
- X : array-like of shape (n_samples, n_features)
101
- The input data to transform.
102
-
103
- y : None
104
- Ignored.
90
+ Get the boolean mask indicating which features are selected.
105
91
 
106
92
  Returns
107
93
  -------
108
- X_ : np.ndarray of shape (n_samples, n_features)
109
- The transformed data.
94
+ mask : ndarray of shape (n_features_in_,)
95
+ The mask indicating the selected features.
110
96
  """
111
97
  # Check that the estimator is fitted
112
- check_is_fitted(self, "_is_fitted")
113
-
114
- # Check that X is a 2D array and has only finite values
115
- X = check_input(X)
116
- X_ = X.copy()
98
+ check_is_fitted(self)
117
99
 
118
- # Check that the number of features is the same as the fitted data
119
- if X_.shape[1] != self.n_features_in_:
120
- raise ValueError(
121
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
122
- )
123
-
124
- # Select the features
125
- if self.features is None:
126
- return X_
100
+ # Create the mask
101
+ mask = np.zeros(self.n_features_in_, dtype=bool)
102
+ mask[self.features_index_] = True
127
103
 
128
- return X_[:, self.features_index_]
104
+ return mask
129
105
 
130
- def _find_index(self, target: float) -> int:
106
+ def _find_index(self, target: Union[float, int]) -> int:
131
107
  if self.wavenumbers is None:
132
- return target
108
+ return int(target)
133
109
  wavenumbers = np.array(self.wavenumbers)
134
- return np.argmin(np.abs(wavenumbers - target))
110
+ return int(np.argmin(np.abs(wavenumbers - target)))
135
111
 
136
- def _find_indices(self) -> np.ndarray:
137
- return np.array([self._find_index(feature) for feature in self.features])
112
+ def _find_indices(self, features: np.ndarray) -> np.ndarray:
113
+ return np.array([self._find_index(feature) for feature in features])
@@ -1,13 +1,14 @@
1
- import numpy as np
2
- from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
1
+ from typing import Optional
4
2
 
5
- from chemotools.utils.check_inputs import check_input
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator
5
+ from sklearn.feature_selection._base import SelectorMixin
6
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
7
 
7
8
 
8
- class RangeCut(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
9
+ class RangeCut(SelectorMixin, BaseEstimator):
9
10
  """
10
- A transformer that cuts the input data to a specified range. The range is specified:
11
+ A selector that cuts the input data to a specified range. The range is specified:
11
12
  - by the indices of the start and end of the range,
12
13
  - by the wavenumbers of the start and end of the range. In this case, the wavenumbers
13
14
  must be provided to the transformer when it is initialised. If the wavenumbers
@@ -35,26 +36,20 @@ class RangeCut(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
35
36
  end_index_ : int
36
37
  The index of the end of the range. It is -1 if the wavenumbers are not provided.
37
38
 
38
- n_features_in_ : int
39
- The number of features in the input data.
40
-
41
- _is_fitted : bool
42
- Whether the transformer has been fitted to data.
39
+ wavenuumbers_ : array-like
40
+ The cut wavenumbers of the input data.
43
41
 
44
42
  Methods
45
43
  -------
46
44
  fit(X, y=None)
47
45
  Fit the transformer to the input data.
48
-
49
- transform(X, y=0, copy=True)
50
- Transform the input data by cutting it to the specified range.
51
46
  """
52
47
 
53
48
  def __init__(
54
49
  self,
55
50
  start: int = 0,
56
51
  end: int = -1,
57
- wavenumbers: np.ndarray = None,
52
+ wavenumbers: Optional[np.ndarray] = None,
58
53
  ):
59
54
  self.start = start
60
55
  self.end = end
@@ -78,59 +73,39 @@ class RangeCut(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
78
73
  The fitted transformer.
79
74
  """
80
75
  # Check that X is a 2D array and has only finite values
81
- X = check_input(X)
82
-
83
- # Set the number of features
84
- self.n_features_in_ = X.shape[1]
85
-
86
- # Set the fitted attribute to True
87
- self._is_fitted = True
88
-
76
+ X = validate_data(
77
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
78
+ )
89
79
  # Set the start and end indices
90
80
  if self.wavenumbers is None:
91
81
  self.start_index_ = self.start
92
82
  self.end_index_ = self.end
83
+ self.wavenumbers_ = None
93
84
  else:
94
85
  self.start_index_ = self._find_index(self.start)
95
86
  self.end_index_ = self._find_index(self.end)
87
+ self.wavenumbers_ = self.wavenumbers[self.start_index_ : self.end_index_]
96
88
 
97
89
  return self
98
90
 
99
- def transform(self, X: np.ndarray, y=None) -> np.ndarray:
91
+ def _get_support_mask(self):
100
92
  """
101
- Transform the input data by cutting it to the specified range.
102
-
103
- Parameters
104
- ----------
105
- X : array-like of shape (n_samples, n_features)
106
- The input data to transform.
107
-
108
- y : None
109
- Ignored.
93
+ Get the boolean mask indicating which features are selected.
110
94
 
111
95
  Returns
112
96
  -------
113
- X_ : np.ndarray of shape (n_samples, n_features)
114
- The transformed data.
97
+ mask : np.ndarray of shape (n_features,)
98
+ The boolean mask indicating which features are selected.
115
99
  """
116
100
  # Check that the estimator is fitted
117
- check_is_fitted(self, "_is_fitted")
118
-
119
- # Check that X is a 2D array and has only finite values
120
- X = check_input(X)
121
- X_ = X.copy()
101
+ check_is_fitted(self, ["start_index_", "end_index_"])
122
102
 
123
- # Check that the number of features is the same as the fitted data
124
- if X_.shape[1] != self.n_features_in_:
125
- raise ValueError(
126
- f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
127
- )
103
+ # Create the mask
104
+ mask = np.zeros(self.n_features_in_, dtype=bool)
105
+ mask[self.start_index_ : self.end_index_] = True
128
106
 
129
- # Range cut the spectra
130
- return X_[:, self.start_index_ : self.end_index_]
107
+ return mask
131
108
 
132
109
  def _find_index(self, target: float) -> int:
133
- if self.wavenumbers is None:
134
- return target
135
110
  wavenumbers = np.array(self.wavenumbers)
136
- return np.argmin(np.abs(wavenumbers - target))
111
+ return int(np.argmin(np.abs(wavenumbers - target)))
@@ -1,3 +1,5 @@
1
- from .min_max_scaler import MinMaxScaler
2
- from .norm_scaler import NormScaler
3
- from .point_scaler import PointScaler
1
+ from ._min_max_scaler import MinMaxScaler
2
+ from ._norm_scaler import NormScaler
3
+ from ._point_scaler import PointScaler
4
+
5
+ __all__ = ["MinMaxScaler", "NormScaler", "PointScaler"]
@@ -1,30 +1,20 @@
1
1
  import numpy as np
2
2
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
3
+ from sklearn.utils.validation import check_is_fitted, validate_data
4
4
 
5
- from chemotools.utils.check_inputs import check_input
6
5
 
7
-
8
- class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
6
+ class MinMaxScaler(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
7
  """
10
8
  A transformer that scales the input data by subtracting the minimum and dividing by
11
- the difference between the maximum and the minimum. When the use_min parameter is False,
9
+ the difference between the maximum and the minimum. When the use_min parameter is False,
12
10
  the data is scaled by the maximum.
13
11
 
14
12
  Parameters
15
13
  ----------
16
14
  use_min : bool, default=True
17
- The normalization to use. If True, the data is subtracted by the minimum and
15
+ The normalization to use. If True, the data is subtracted by the minimum and
18
16
  scaled by the maximum. If False, the data is scaled by the maximum.
19
17
 
20
- Attributes
21
- ----------
22
- n_features_in_ : int
23
- The number of features in the input data.
24
-
25
- _is_fitted : bool
26
- Whether the transformer has been fitted to data.
27
-
28
18
  Methods
29
19
  -------
30
20
  fit(X, y=None)
@@ -55,14 +45,9 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
55
45
  The fitted transformer.
56
46
  """
57
47
  # Check that X is a 2D array and has only finite values
58
- X = check_input(X)
59
-
60
- # Set the number of features
61
- self.n_features_in_ = X.shape[1]
62
-
63
- # Set the fitted attribute to True
64
- self._is_fitted = True
65
-
48
+ X = validate_data(
49
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
50
+ )
66
51
  return self
67
52
 
68
53
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -83,11 +68,18 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
83
68
  The transformed data.
84
69
  """
85
70
  # Check that the estimator is fitted
86
- check_is_fitted(self, "_is_fitted")
71
+ check_is_fitted(self, "n_features_in_")
87
72
 
88
73
  # Check that X is a 2D array and has only finite values
89
- X = check_input(X)
90
- X_ = X.copy()
74
+ X_ = validate_data(
75
+ self,
76
+ X,
77
+ y="no_validation",
78
+ ensure_2d=True,
79
+ copy=True,
80
+ reset=False,
81
+ dtype=np.float64,
82
+ )
91
83
 
92
84
  # Check that the number of features is the same as the fitted data
93
85
  if X_.shape[1] != self.n_features_in_:
@@ -97,8 +89,9 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
97
89
 
98
90
  # Normalize the data by the maximum value
99
91
  if self.use_min:
100
- X_ = (X_ - np.min(X_, axis=1, keepdims=True)) / (np.max(
101
- X_, axis=1, keepdims=True) - np.min(X_, axis=1, keepdims=True))
92
+ X_ = (X_ - np.min(X_, axis=1, keepdims=True)) / (
93
+ np.max(X_, axis=1, keepdims=True) - np.min(X_, axis=1, keepdims=True)
94
+ )
102
95
 
103
96
  else:
104
97
  X_ = X_ / np.max(X_, axis=1, keepdims=True)
@@ -1,26 +1,16 @@
1
1
  import numpy as np
2
2
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
3
+ from sklearn.utils.validation import check_is_fitted, validate_data
4
4
 
5
- from chemotools.utils.check_inputs import check_input
6
5
 
7
-
8
- class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
6
+ class NormScaler(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
7
  """
10
8
  A transformer that scales the input data by the L-norm of the spectrum.
11
9
 
12
10
  Parameters
13
11
  ----------
14
12
  l_norm : int, optional
15
- The L-norm to use. Default is 2.
16
-
17
- Attributes
18
- ----------
19
- n_features_in_ : int
20
- The number of features in the input data.
21
-
22
- _is_fitted : bool
23
- Whether the transformer has been fitted to data.
13
+ The L-norm to use. Default is 2.
24
14
 
25
15
  Methods
26
16
  -------
@@ -30,13 +20,14 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
30
20
  transform(X, y=0, copy=True)
31
21
  Transform the input data by scaling by the L-norm.
32
22
  """
23
+
33
24
  def __init__(self, l_norm: int = 2):
34
25
  self.l_norm = l_norm
35
26
 
36
27
  def fit(self, X: np.ndarray, y=None) -> "NormScaler":
37
28
  """
38
29
  Fit the transformer to the input data.
39
-
30
+
40
31
  Parameters
41
32
  ----------
42
33
  X : np.ndarray of shape (n_samples, n_features)
@@ -51,14 +42,9 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
51
42
  The fitted transformer.
52
43
  """
53
44
  # Check that X is a 2D array and has only finite values
54
- X = check_input(X)
55
-
56
- # Set the number of features
57
- self.n_features_in_ = X.shape[1]
58
-
59
- # Set the fitted attribute to True
60
- self._is_fitted = True
61
-
45
+ X = validate_data(
46
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
47
+ )
62
48
  return self
63
49
 
64
50
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -79,11 +65,18 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
79
65
  The transformed data.
80
66
  """
81
67
  # Check that the estimator is fitted
82
- check_is_fitted(self, "_is_fitted")
68
+ check_is_fitted(self, "n_features_in_")
83
69
 
84
70
  # Check that X is a 2D array and has only finite values
85
- X = check_input(X)
86
- X_ = X.copy()
71
+ X_ = validate_data(
72
+ self,
73
+ X,
74
+ y="no_validation",
75
+ ensure_2d=True,
76
+ copy=True,
77
+ reset=False,
78
+ dtype=np.float64,
79
+ )
87
80
 
88
81
  # Check that the number of features is the same as the fitted data
89
82
  if X_.shape[1] != self.n_features_in_: