chemotools 0.0.22__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. chemotools/augmentation/__init__.py +16 -0
  2. chemotools/augmentation/baseline_shift.py +119 -0
  3. chemotools/augmentation/exponential_noise.py +117 -0
  4. chemotools/augmentation/index_shift.py +120 -0
  5. chemotools/augmentation/normal_noise.py +118 -0
  6. chemotools/augmentation/spectrum_scale.py +120 -0
  7. chemotools/augmentation/uniform_noise.py +124 -0
  8. chemotools/baseline/__init__.py +20 -8
  9. chemotools/baseline/{air_pls.py → _air_pls.py} +20 -32
  10. chemotools/baseline/{ar_pls.py → _ar_pls.py} +18 -31
  11. chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +37 -31
  12. chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +26 -19
  13. chemotools/baseline/{linear_correction.py → _linear_correction.py} +19 -28
  14. chemotools/baseline/{non_negative.py → _non_negative.py} +15 -23
  15. chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +29 -31
  16. chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +23 -27
  17. chemotools/datasets/__init__.py +5 -0
  18. chemotools/datasets/_base.py +122 -0
  19. chemotools/datasets/data/coffee_labels.csv +61 -0
  20. chemotools/datasets/data/coffee_spectra.csv +61 -0
  21. chemotools/datasets/data/fermentation_hplc.csv +35 -0
  22. chemotools/datasets/data/fermentation_spectra.csv +1630 -0
  23. chemotools/datasets/data/train_hplc.csv +22 -0
  24. chemotools/datasets/data/train_spectra.csv +22 -0
  25. chemotools/derivative/__init__.py +4 -2
  26. chemotools/derivative/{norris_william.py → _norris_william.py} +20 -25
  27. chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +26 -36
  28. chemotools/feature_selection/__init__.py +4 -0
  29. chemotools/feature_selection/_index_selector.py +113 -0
  30. chemotools/feature_selection/_range_cut.py +111 -0
  31. chemotools/scale/__init__.py +5 -3
  32. chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +36 -39
  33. chemotools/scale/{norm_scaler.py → _norm_scaler.py} +18 -25
  34. chemotools/scale/_point_scaler.py +115 -0
  35. chemotools/scatter/__init__.py +13 -2
  36. chemotools/scatter/_extended_multiplicative_scatter_correction.py +183 -0
  37. chemotools/scatter/_multiplicative_scatter_correction.py +169 -0
  38. chemotools/scatter/_robust_normal_variate.py +101 -0
  39. chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +21 -26
  40. chemotools/smooth/__init__.py +6 -4
  41. chemotools/smooth/{mean_filter.py → _mean_filter.py} +18 -25
  42. chemotools/smooth/{median_filter.py → _median_filter.py} +32 -24
  43. chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +22 -24
  44. chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +24 -29
  45. {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/METADATA +19 -15
  46. chemotools-0.1.6.dist-info/RECORD +51 -0
  47. {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/WHEEL +1 -2
  48. chemotools/scale/index_scaler.py +0 -97
  49. chemotools/scatter/extended_multiplicative_scatter_correction.py +0 -33
  50. chemotools/scatter/multiplicative_scatter_correction.py +0 -123
  51. chemotools/utils/check_inputs.py +0 -14
  52. chemotools/variable_selection/__init__.py +0 -1
  53. chemotools/variable_selection/range_cut.py +0 -121
  54. chemotools-0.0.22.dist-info/RECORD +0 -39
  55. chemotools-0.0.22.dist-info/top_level.txt +0 -2
  56. tests/fixtures.py +0 -89
  57. tests/test_functionality.py +0 -397
  58. tests/test_sklearn_compliance.py +0 -192
  59. {tests → chemotools/datasets/data}/__init__.py +0 -0
  60. {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/LICENSE +0 -0
@@ -1,12 +1,12 @@
1
+ from typing import Literal
2
+
1
3
  import numpy as np
2
4
  from scipy.signal import savgol_filter
3
5
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
4
- from sklearn.utils.validation import check_is_fitted
5
-
6
- from chemotools.utils.check_inputs import check_input
6
+ from sklearn.utils.validation import check_is_fitted, validate_data
7
7
 
8
8
 
9
- class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
9
+ class SavitzkyGolay(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
10
10
  """
11
11
  A transformer that calculates the Savitzky-Golay derivative of the input data.
12
12
 
@@ -27,14 +27,6 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
27
27
  The mode to use for the derivative calculation. Can be "nearest", "constant",
28
28
  "reflect", "wrap", "mirror" or "interp". Default is "nearest".
29
29
 
30
- Attributes
31
- ----------
32
- n_features_in_ : int
33
- The number of features in the input data.
34
-
35
- _is_fitted : bool
36
- Whether the transformer has been fitted to data.
37
-
38
30
  Methods
39
31
  -------
40
32
  fit(X, y=None)
@@ -49,7 +41,7 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
49
41
  window_size: int = 3,
50
42
  polynomial_order: int = 1,
51
43
  derivate_order: int = 1,
52
- mode: str = "nearest",
44
+ mode: Literal["mirror", "constant", "nearest", "wrap", "interp"] = "nearest",
53
45
  ) -> None:
54
46
  self.window_size = window_size
55
47
  self.polynomial_order = polynomial_order
@@ -74,14 +66,9 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
74
66
  The fitted transformer.
75
67
  """
76
68
  # Check that X is a 2D array and has only finite values
77
- X = check_input(X)
78
-
79
- # Set the number of features
80
- self.n_features_in_ = X.shape[1]
81
-
82
- # Set the fitted attribute to True
83
- self._is_fitted = True
84
-
69
+ X = validate_data(
70
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
71
+ )
85
72
  return self
86
73
 
87
74
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -102,29 +89,32 @@ class SavitzkyGolay(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
102
89
  The transformed data.
103
90
  """
104
91
  # Check that the estimator is fitted
105
- check_is_fitted(self, "_is_fitted")
92
+ check_is_fitted(self, "n_features_in_")
106
93
 
107
94
  # Check that X is a 2D array and has only finite values
108
- X = check_input(X)
109
- X_ = X.copy()
95
+ X_ = validate_data(
96
+ self,
97
+ X,
98
+ y="no_validation",
99
+ ensure_2d=True,
100
+ copy=True,
101
+ reset=False,
102
+ dtype=np.float64,
103
+ )
110
104
 
111
105
  if X_.shape[1] != self.n_features_in_:
112
106
  raise ValueError(
113
107
  f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
114
108
  )
115
-
116
109
  # Calculate the standard normal variate
117
110
  for i, x in enumerate(X_):
118
- X_[i] = self._calculate_derivative(x)
111
+ X_[i] = savgol_filter(
112
+ x,
113
+ self.window_size,
114
+ self.polynomial_order,
115
+ deriv=self.derivate_order,
116
+ axis=0,
117
+ mode=self.mode,
118
+ )
119
119
 
120
120
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
121
-
122
- def _calculate_derivative(self, x) -> np.ndarray:
123
- return savgol_filter(
124
- x,
125
- self.window_size,
126
- self.polynomial_order,
127
- deriv=self.derivate_order,
128
- axis=0,
129
- mode=self.mode,
130
- )
@@ -0,0 +1,4 @@
1
+ from ._index_selector import IndexSelector
2
+ from ._range_cut import RangeCut
3
+
4
+ __all__ = ["IndexSelector", "RangeCut"]
@@ -0,0 +1,113 @@
1
+ from typing import Optional, Union
2
+
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator
5
+ from sklearn.feature_selection._base import SelectorMixin
6
+
7
+ from sklearn.utils.validation import check_is_fitted, validate_data
8
+
9
+
10
+ class IndexSelector(SelectorMixin, BaseEstimator):
11
+ """
12
+ A transformer that Selects the spectral data to a specified array of features. This
13
+ array can be continuous or discontinuous. The array of features is specified by:
14
+ - by the indices of the wavenumbers to select,
15
+ - by the wavenumbers to select, the wavenumbers must be provided to the transformer
16
+ when it is initialised. If the wavenumbers are not provided, the indices will be
17
+ used instead. The wavenumbers must be provided in ascending order.
18
+
19
+ Parameters
20
+ ----------
21
+ features : narray-like, optional
22
+ The index of the features to select. Default is None.
23
+
24
+ wavenumbers : array-like, optional
25
+ The wavenumbers of the input data. If not provided, the indices will be used
26
+ instead. Default is None. If provided, the wavenumbers must be provided in
27
+ ascending order.
28
+
29
+ Attributes
30
+ ----------
31
+ features_index_ : int
32
+ The index of the features to select.
33
+
34
+ Methods
35
+ -------
36
+ fit(X, y=None)
37
+ Fit the transformer to the input data.
38
+
39
+ transform(X, y=0, copy=True)
40
+ Transform the input data by cutting it to the specified range.
41
+ """
42
+
43
+ def __init__(
44
+ self,
45
+ features: Optional[np.ndarray] = None,
46
+ wavenumbers: Optional[np.ndarray] = None,
47
+ ):
48
+ self.features = features
49
+ self.wavenumbers = wavenumbers
50
+
51
+ def fit(self, X: np.ndarray, y=None) -> "IndexSelector":
52
+ """
53
+ Fit the transformer to the input data.
54
+
55
+ Parameters
56
+ ----------
57
+ X : array-like of shape (n_samples, n_features)
58
+ The input data to fit the transformer to.
59
+
60
+ y : None
61
+ Ignored.
62
+
63
+ Returns
64
+ -------
65
+ self : IndexSelector
66
+ The fitted transformer.
67
+ """
68
+ # validate that X is a 2D array and has only finite values
69
+ X = validate_data(
70
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
71
+ )
72
+ # Set the fitted attribute to True
73
+ self._is_fitted = True
74
+
75
+ # Set the start and end indices
76
+ if self.features is None:
77
+ self.features_index_ = self.features
78
+ return self
79
+
80
+ elif self.wavenumbers is None:
81
+ self.features_index_ = self.features
82
+ return self
83
+
84
+ else:
85
+ self.features_index_ = self._find_indices(self.features)
86
+ return self
87
+
88
+ def _get_support_mask(self):
89
+ """
90
+ Get the boolean mask indicating which features are selected.
91
+
92
+ Returns
93
+ -------
94
+ mask : ndarray of shape (n_features_in_,)
95
+ The mask indicating the selected features.
96
+ """
97
+ # Check that the estimator is fitted
98
+ check_is_fitted(self)
99
+
100
+ # Create the mask
101
+ mask = np.zeros(self.n_features_in_, dtype=bool)
102
+ mask[self.features_index_] = True
103
+
104
+ return mask
105
+
106
+ def _find_index(self, target: Union[float, int]) -> int:
107
+ if self.wavenumbers is None:
108
+ return int(target)
109
+ wavenumbers = np.array(self.wavenumbers)
110
+ return int(np.argmin(np.abs(wavenumbers - target)))
111
+
112
+ def _find_indices(self, features: np.ndarray) -> np.ndarray:
113
+ return np.array([self._find_index(feature) for feature in features])
@@ -0,0 +1,111 @@
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator
5
+ from sklearn.feature_selection._base import SelectorMixin
6
+ from sklearn.utils.validation import check_is_fitted, validate_data
7
+
8
+
9
+ class RangeCut(SelectorMixin, BaseEstimator):
10
+ """
11
+ A selector that cuts the input data to a specified range. The range is specified:
12
+ - by the indices of the start and end of the range,
13
+ - by the wavenumbers of the start and end of the range. In this case, the wavenumbers
14
+ must be provided to the transformer when it is initialised. If the wavenumbers
15
+ are not provided, the indices will be used instead. The wavenumbers must be
16
+ provided in ascending order.
17
+
18
+ Parameters
19
+ ----------
20
+ start : int, optional
21
+ The index or wavenumber of the start of the range. Default is 0.
22
+
23
+ end : int, optional
24
+ The index or wavenumber of the end of the range. Default is -1.
25
+
26
+ wavenumbers : array-like, optional
27
+ The wavenumbers of the input data. If not provided, the indices will be used
28
+ instead. Default is None. If provided, the wavenumbers must be provided in
29
+ ascending order.
30
+
31
+ Attributes
32
+ ----------
33
+ start_index_ : int
34
+ The index of the start of the range. It is 0 if the wavenumbers are not provided.
35
+
36
+ end_index_ : int
37
+ The index of the end of the range. It is -1 if the wavenumbers are not provided.
38
+
39
+ wavenuumbers_ : array-like
40
+ The cut wavenumbers of the input data.
41
+
42
+ Methods
43
+ -------
44
+ fit(X, y=None)
45
+ Fit the transformer to the input data.
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ start: int = 0,
51
+ end: int = -1,
52
+ wavenumbers: Optional[np.ndarray] = None,
53
+ ):
54
+ self.start = start
55
+ self.end = end
56
+ self.wavenumbers = wavenumbers
57
+
58
+ def fit(self, X: np.ndarray, y=None) -> "RangeCut":
59
+ """
60
+ Fit the transformer to the input data.
61
+
62
+ Parameters
63
+ ----------
64
+ X : array-like of shape (n_samples, n_features)
65
+ The input data to fit the transformer to.
66
+
67
+ y : None
68
+ Ignored.
69
+
70
+ Returns
71
+ -------
72
+ self : RangeCut
73
+ The fitted transformer.
74
+ """
75
+ # Check that X is a 2D array and has only finite values
76
+ X = validate_data(
77
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
78
+ )
79
+ # Set the start and end indices
80
+ if self.wavenumbers is None:
81
+ self.start_index_ = self.start
82
+ self.end_index_ = self.end
83
+ self.wavenumbers_ = None
84
+ else:
85
+ self.start_index_ = self._find_index(self.start)
86
+ self.end_index_ = self._find_index(self.end)
87
+ self.wavenumbers_ = self.wavenumbers[self.start_index_ : self.end_index_]
88
+
89
+ return self
90
+
91
+ def _get_support_mask(self):
92
+ """
93
+ Get the boolean mask indicating which features are selected.
94
+
95
+ Returns
96
+ -------
97
+ mask : np.ndarray of shape (n_features,)
98
+ The boolean mask indicating which features are selected.
99
+ """
100
+ # Check that the estimator is fitted
101
+ check_is_fitted(self, ["start_index_", "end_index_"])
102
+
103
+ # Create the mask
104
+ mask = np.zeros(self.n_features_in_, dtype=bool)
105
+ mask[self.start_index_ : self.end_index_] = True
106
+
107
+ return mask
108
+
109
+ def _find_index(self, target: float) -> int:
110
+ wavenumbers = np.array(self.wavenumbers)
111
+ return int(np.argmin(np.abs(wavenumbers - target)))
@@ -1,3 +1,5 @@
1
- from .index_scaler import IndexScaler
2
- from .min_max_scaler import MinMaxScaler
3
- from .norm_scaler import NormScaler
1
+ from ._min_max_scaler import MinMaxScaler
2
+ from ._norm_scaler import NormScaler
3
+ from ._point_scaler import PointScaler
4
+
5
+ __all__ = ["MinMaxScaler", "NormScaler", "PointScaler"]
@@ -1,27 +1,19 @@
1
1
  import numpy as np
2
2
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
3
+ from sklearn.utils.validation import check_is_fitted, validate_data
4
4
 
5
- from chemotools.utils.check_inputs import check_input
6
5
 
7
-
8
- class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
6
+ class MinMaxScaler(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
7
  """
10
- A transformer that scales the input data by the maximum value or minimum
11
- value in the spectrum.
8
+ A transformer that scales the input data by subtracting the minimum and dividing by
9
+ the difference between the maximum and the minimum. When the use_min parameter is False,
10
+ the data is scaled by the maximum.
12
11
 
13
12
  Parameters
14
13
  ----------
15
- norm : str, optional
16
- The normalization to use. Can be "max" or "min". Default is "max".
17
-
18
- Attributes
19
- ----------
20
- n_features_in_ : int
21
- The number of features in the input data.
22
-
23
- _is_fitted : bool
24
- Whether the transformer has been fitted to data.
14
+ use_min : bool, default=True
15
+ The normalization to use. If True, the data is subtracted by the minimum and
16
+ scaled by the maximum. If False, the data is scaled by the maximum.
25
17
 
26
18
  Methods
27
19
  -------
@@ -31,9 +23,9 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
31
23
  transform(X, y=0, copy=True)
32
24
  Transform the input data by scaling by the maximum value.
33
25
  """
34
- def __init__(self, norm: str = 'max'):
35
- self.norm = norm
36
26
 
27
+ def __init__(self, use_min: bool = True):
28
+ self.use_min = use_min
37
29
 
38
30
  def fit(self, X: np.ndarray, y=None) -> "MinMaxScaler":
39
31
  """
@@ -53,19 +45,14 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
53
45
  The fitted transformer.
54
46
  """
55
47
  # Check that X is a 2D array and has only finite values
56
- X = check_input(X)
57
-
58
- # Set the number of features
59
- self.n_features_in_ = X.shape[1]
60
-
61
- # Set the fitted attribute to True
62
- self._is_fitted = True
63
-
48
+ X = validate_data(
49
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
50
+ )
64
51
  return self
65
52
 
66
53
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
67
54
  """
68
- Transform the input data by scaling by the maximum or minimum value.
55
+ Transform the input data by scaling it.
69
56
 
70
57
  Parameters
71
58
  ----------
@@ -81,22 +68,32 @@ class MinMaxScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
81
68
  The transformed data.
82
69
  """
83
70
  # Check that the estimator is fitted
84
- check_is_fitted(self, "_is_fitted")
71
+ check_is_fitted(self, "n_features_in_")
85
72
 
86
73
  # Check that X is a 2D array and has only finite values
87
- X = check_input(X)
88
- X_ = X.copy()
74
+ X_ = validate_data(
75
+ self,
76
+ X,
77
+ y="no_validation",
78
+ ensure_2d=True,
79
+ copy=True,
80
+ reset=False,
81
+ dtype=np.float64,
82
+ )
89
83
 
90
84
  # Check that the number of features is the same as the fitted data
91
85
  if X_.shape[1] != self.n_features_in_:
92
- raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
86
+ raise ValueError(
87
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
88
+ )
93
89
 
94
90
  # Normalize the data by the maximum value
95
- for i, x in enumerate(X_):
96
- if self.norm == 'max':
97
- X_[i] = x / np.max(x)
98
-
99
- if self.norm == 'min':
100
- X_[i] = x / np.min(x)
101
-
102
- return X_.reshape(-1, 1) if X_.ndim == 1 else X_
91
+ if self.use_min:
92
+ X_ = (X_ - np.min(X_, axis=1, keepdims=True)) / (
93
+ np.max(X_, axis=1, keepdims=True) - np.min(X_, axis=1, keepdims=True)
94
+ )
95
+
96
+ else:
97
+ X_ = X_ / np.max(X_, axis=1, keepdims=True)
98
+
99
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
@@ -1,26 +1,16 @@
1
1
  import numpy as np
2
2
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
3
+ from sklearn.utils.validation import check_is_fitted, validate_data
4
4
 
5
- from chemotools.utils.check_inputs import check_input
6
5
 
7
-
8
- class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
6
+ class NormScaler(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
7
  """
10
8
  A transformer that scales the input data by the L-norm of the spectrum.
11
9
 
12
10
  Parameters
13
11
  ----------
14
12
  l_norm : int, optional
15
- The L-norm to use. Default is 2.
16
-
17
- Attributes
18
- ----------
19
- n_features_in_ : int
20
- The number of features in the input data.
21
-
22
- _is_fitted : bool
23
- Whether the transformer has been fitted to data.
13
+ The L-norm to use. Default is 2.
24
14
 
25
15
  Methods
26
16
  -------
@@ -30,13 +20,14 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
30
20
  transform(X, y=0, copy=True)
31
21
  Transform the input data by scaling by the L-norm.
32
22
  """
23
+
33
24
  def __init__(self, l_norm: int = 2):
34
25
  self.l_norm = l_norm
35
26
 
36
27
  def fit(self, X: np.ndarray, y=None) -> "NormScaler":
37
28
  """
38
29
  Fit the transformer to the input data.
39
-
30
+
40
31
  Parameters
41
32
  ----------
42
33
  X : np.ndarray of shape (n_samples, n_features)
@@ -51,14 +42,9 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
51
42
  The fitted transformer.
52
43
  """
53
44
  # Check that X is a 2D array and has only finite values
54
- X = check_input(X)
55
-
56
- # Set the number of features
57
- self.n_features_in_ = X.shape[1]
58
-
59
- # Set the fitted attribute to True
60
- self._is_fitted = True
61
-
45
+ X = validate_data(
46
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
47
+ )
62
48
  return self
63
49
 
64
50
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -79,11 +65,18 @@ class NormScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
79
65
  The transformed data.
80
66
  """
81
67
  # Check that the estimator is fitted
82
- check_is_fitted(self, "_is_fitted")
68
+ check_is_fitted(self, "n_features_in_")
83
69
 
84
70
  # Check that X is a 2D array and has only finite values
85
- X = check_input(X)
86
- X_ = X.copy()
71
+ X_ = validate_data(
72
+ self,
73
+ X,
74
+ y="no_validation",
75
+ ensure_2d=True,
76
+ copy=True,
77
+ reset=False,
78
+ dtype=np.float64,
79
+ )
87
80
 
88
81
  # Check that the number of features is the same as the fitted data
89
82
  if X_.shape[1] != self.n_features_in_:
@@ -0,0 +1,115 @@
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
+
7
+
8
+ class PointScaler(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
+ """
10
+ A transformer that scales the input data by the intensity value at a given point.
11
+ The point can be specified by an index or by a wavenumber.
12
+
13
+ Parameters
14
+ ----------
15
+ point : int,
16
+ The point to scale the data by. It can be an index or a wavenumber.
17
+
18
+ wavenumber : array-like, optional
19
+ The wavenumbers of the input data. If not provided, the indices will be used
20
+ instead. Default is None. If provided, the wavenumbers must be provided in
21
+ ascending order.
22
+
23
+ Attributes
24
+ ----------
25
+ point_index_ : int
26
+ The index of the point to scale the data by. It is 0 if the wavenumbers are not provided.
27
+
28
+ Methods
29
+ -------
30
+ fit(X, y=None)
31
+ Fit the transformer to the input data.
32
+
33
+ transform(X, y=0, copy=True)
34
+ Transform the input data by scaling by the value at a given Point.
35
+ """
36
+
37
+ def __init__(self, point: int = 0, wavenumbers: Optional[np.ndarray] = None):
38
+ self.point = point
39
+ self.wavenumbers = wavenumbers
40
+
41
+ def fit(self, X: np.ndarray, y=None) -> "PointScaler":
42
+ """
43
+ Fit the transformer to the input data.
44
+
45
+ Parameters
46
+ ----------
47
+ X : np.ndarray of shape (n_samples, n_features)
48
+ The input data to fit the transformer to.
49
+
50
+ y : None
51
+ Ignored.
52
+
53
+ Returns
54
+ -------
55
+ self : PointScaler
56
+ The fitted transformer.
57
+ """
58
+ # Check that X is a 2D array and has only finite values
59
+ X = validate_data(
60
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
61
+ )
62
+ # Set the point index
63
+ if self.wavenumbers is None:
64
+ self.point_index_ = self.point
65
+ else:
66
+ self.point_index_ = self._find_index(self.point)
67
+
68
+ return self
69
+
70
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
71
+ """
72
+ Transform the input data by scaling by the value at a given Point.
73
+
74
+ Parameters
75
+ ----------
76
+ X : np.ndarray of shape (n_samples, n_features)
77
+ The input data to transform.
78
+
79
+ y : None
80
+ Ignored.
81
+
82
+ Returns
83
+ -------
84
+ X_ : np.ndarray of shape (n_samples, n_features)
85
+ The transformed data.
86
+ """
87
+ # Check that the estimator is fitted
88
+ check_is_fitted(self, "point_index_")
89
+
90
+ # Check that X is a 2D array and has only finite values
91
+ X_ = validate_data(
92
+ self,
93
+ X,
94
+ y="no_validation",
95
+ ensure_2d=True,
96
+ copy=True,
97
+ reset=False,
98
+ dtype=np.float64,
99
+ )
100
+
101
+ # Check that the number of features is the same as the fitted data
102
+ if X_.shape[1] != self.n_features_in_:
103
+ raise ValueError(
104
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
105
+ )
106
+
107
+ # Scale the data by Point
108
+ for i, x in enumerate(X_):
109
+ X_[i] = x / x[self.point_index_]
110
+
111
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
112
+
113
+ def _find_index(self, target: float) -> int:
114
+ wavenumbers = np.array(self.wavenumbers)
115
+ return int(np.argmin(np.abs(wavenumbers - target)))
@@ -1,2 +1,13 @@
1
- from .multiplicative_scatter_correction import MultiplicativeScatterCorrection
2
- from .standard_normal_variate import StandardNormalVariate
1
+ from ._extended_multiplicative_scatter_correction import (
2
+ ExtendedMultiplicativeScatterCorrection,
3
+ )
4
+ from ._multiplicative_scatter_correction import MultiplicativeScatterCorrection
5
+ from ._robust_normal_variate import RobustNormalVariate
6
+ from ._standard_normal_variate import StandardNormalVariate
7
+
8
+ __all__ = [
9
+ "ExtendedMultiplicativeScatterCorrection",
10
+ "MultiplicativeScatterCorrection",
11
+ "RobustNormalVariate",
12
+ "StandardNormalVariate",
13
+ ]