chemotools 0.1.5__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {chemotools-0.1.5 → chemotools-0.1.7}/PKG-INFO +18 -17
  2. {chemotools-0.1.5 → chemotools-0.1.7}/README.md +2 -0
  3. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/augmentation/__init__.py +10 -0
  4. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/augmentation/baseline_shift.py +23 -15
  5. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/augmentation/exponential_noise.py +24 -15
  6. chemotools-0.1.7/chemotools/augmentation/index_shift.py +199 -0
  7. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/augmentation/normal_noise.py +24 -14
  8. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/augmentation/spectrum_scale.py +24 -15
  9. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/augmentation/uniform_noise.py +26 -14
  10. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/baseline/__init__.py +13 -1
  11. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/baseline/_air_pls.py +16 -14
  12. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/baseline/_ar_pls.py +17 -17
  13. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/baseline/_constant_baseline_correction.py +19 -16
  14. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/baseline/_cubic_spline_correction.py +17 -8
  15. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/baseline/_linear_correction.py +18 -10
  16. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/baseline/_non_negative.py +14 -8
  17. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/baseline/_polynomial_correction.py +19 -11
  18. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/baseline/_subtract_reference.py +17 -9
  19. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/datasets/__init__.py +2 -0
  20. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/datasets/_base.py +3 -3
  21. chemotools-0.1.7/chemotools/derivative/__init__.py +4 -0
  22. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/derivative/_norris_william.py +14 -8
  23. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/derivative/_savitzky_golay.py +25 -21
  24. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/feature_selection/__init__.py +2 -0
  25. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/feature_selection/_index_selector.py +18 -17
  26. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/feature_selection/_range_cut.py +9 -7
  27. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/scale/__init__.py +2 -0
  28. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/scale/_min_max_scaler.py +14 -8
  29. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/scale/_norm_scaler.py +14 -8
  30. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/scale/_point_scaler.py +18 -10
  31. chemotools-0.1.7/chemotools/scatter/__init__.py +13 -0
  32. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/scatter/_extended_multiplicative_scatter_correction.py +33 -29
  33. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/scatter/_multiplicative_scatter_correction.py +33 -18
  34. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/scatter/_robust_normal_variate.py +14 -8
  35. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/scatter/_standard_normal_variate.py +14 -8
  36. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/smooth/__init__.py +3 -1
  37. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/smooth/_mean_filter.py +14 -8
  38. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/smooth/_median_filter.py +31 -9
  39. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/smooth/_savitzky_golay_filter.py +20 -9
  40. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/smooth/_whittaker_smooth.py +20 -11
  41. chemotools-0.1.7/pyproject.toml +38 -0
  42. chemotools-0.1.5/chemotools/augmentation/index_shift.py +0 -111
  43. chemotools-0.1.5/chemotools/derivative/__init__.py +0 -2
  44. chemotools-0.1.5/chemotools/scatter/__init__.py +0 -4
  45. chemotools-0.1.5/chemotools/utils/check_inputs.py +0 -14
  46. chemotools-0.1.5/chemotools.egg-info/PKG-INFO +0 -104
  47. chemotools-0.1.5/chemotools.egg-info/SOURCES.txt +0 -65
  48. chemotools-0.1.5/chemotools.egg-info/dependency_links.txt +0 -1
  49. chemotools-0.1.5/chemotools.egg-info/requires.txt +0 -6
  50. chemotools-0.1.5/chemotools.egg-info/top_level.txt +0 -2
  51. chemotools-0.1.5/pyproject.toml +0 -6
  52. chemotools-0.1.5/setup.cfg +0 -4
  53. chemotools-0.1.5/setup.py +0 -38
  54. chemotools-0.1.5/tests/__init__.py +0 -0
  55. chemotools-0.1.5/tests/fixtures.py +0 -89
  56. chemotools-0.1.5/tests/test_datasets.py +0 -111
  57. chemotools-0.1.5/tests/test_functionality.py +0 -777
  58. chemotools-0.1.5/tests/test_sklearn_compliance.py +0 -277
  59. {chemotools-0.1.5 → chemotools-0.1.7}/LICENSE +0 -0
  60. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/__init__.py +0 -0
  61. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/datasets/data/__init__.py +0 -0
  62. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/datasets/data/coffee_labels.csv +0 -0
  63. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/datasets/data/coffee_spectra.csv +0 -0
  64. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/datasets/data/fermentation_hplc.csv +0 -0
  65. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/datasets/data/fermentation_spectra.csv +0 -0
  66. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/datasets/data/train_hplc.csv +0 -0
  67. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/datasets/data/train_spectra.csv +0 -0
  68. {chemotools-0.1.5 → chemotools-0.1.7}/chemotools/utils/__init__.py +0 -0
@@ -1,23 +1,22 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: chemotools
3
- Version: 0.1.5
4
- Summary: Package to integrate chemometrics in scikit-learn pipelines
5
- Home-page: https://github.com/paucablop/chemotools
6
- Author: Pau Cabaneros Lopez
7
- Author-email: pau.cabaneros@gmail.com
8
- Project-URL: Bug Tracker, https://github.com/paucablop/chemotools/issues/
9
- Classifier: Programming Language :: Python :: 3
3
+ Version: 0.1.7
4
+ Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
5
+ License: MIT
6
+ Author: Pau Cabaneros
7
+ Requires-Python: >=3.10,<4.0
10
8
  Classifier: License :: OSI Approved :: MIT License
11
- Classifier: Operating System :: OS Independent
12
- Requires-Python: >=3.9
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Requires-Dist: numpy (>=2.0.0,<3.0.0)
15
+ Requires-Dist: pandas (>=2.0.0,<3.0.0)
16
+ Requires-Dist: polars (>=1.17.0,<2.0.0)
17
+ Requires-Dist: pyarrow (>=18.0.0,<19.0.0)
18
+ Requires-Dist: scikit-learn (>=1.4.0,<2.0.0)
13
19
  Description-Content-Type: text/markdown
14
- License-File: LICENSE
15
- Requires-Dist: numpy
16
- Requires-Dist: pandas
17
- Requires-Dist: polars
18
- Requires-Dist: pyarrow
19
- Requires-Dist: scipy
20
- Requires-Dist: scikit-learn>=1.4.0
21
20
 
22
21
  ![chemotools](assets/images/logo_pixel.png)
23
22
 
@@ -27,6 +26,8 @@ Requires-Dist: scikit-learn>=1.4.0
27
26
  [![pypi](https://img.shields.io/pypi/l/chemotools)](https://github.com/paucablop/chemotools/blob/main/LICENSE)
28
27
  [![codecov](https://codecov.io/github/paucablop/chemotools/branch/main/graph/badge.svg?token=D7JUJM89LN)](https://codecov.io/github/paucablop/chemotools)
29
28
  [![Downloads](https://static.pepy.tech/badge/chemotools)](https://pepy.tech/project/chemotools)
29
+ [![DOI](https://joss.theoj.org/papers/10.21105/joss.06802/status.svg)](https://doi.org/10.21105/joss.06802)
30
+
30
31
 
31
32
  # __chemotools__
32
33
 
@@ -6,6 +6,8 @@
6
6
  [![pypi](https://img.shields.io/pypi/l/chemotools)](https://github.com/paucablop/chemotools/blob/main/LICENSE)
7
7
  [![codecov](https://codecov.io/github/paucablop/chemotools/branch/main/graph/badge.svg?token=D7JUJM89LN)](https://codecov.io/github/paucablop/chemotools)
8
8
  [![Downloads](https://static.pepy.tech/badge/chemotools)](https://pepy.tech/project/chemotools)
9
+ [![DOI](https://joss.theoj.org/papers/10.21105/joss.06802/status.svg)](https://doi.org/10.21105/joss.06802)
10
+
9
11
 
10
12
  # __chemotools__
11
13
 
@@ -4,3 +4,13 @@ from .normal_noise import NormalNoise
4
4
  from .index_shift import IndexShift
5
5
  from .spectrum_scale import SpectrumScale
6
6
  from .uniform_noise import UniformNoise
7
+
8
+
9
+ __all__ = [
10
+ "BaselineShift",
11
+ "ExponentialNoise",
12
+ "NormalNoise",
13
+ "IndexShift",
14
+ "SpectrumScale",
15
+ "UniformNoise",
16
+ ]
@@ -1,11 +1,11 @@
1
+ from typing import Optional
2
+
1
3
  import numpy as np
2
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
4
-
5
- from chemotools.utils.check_inputs import check_input
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
6
 
7
7
 
8
- class BaselineShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
8
+ class BaselineShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
9
  """
10
10
  Adds a constant baseline to the data. The baseline is drawn from a one-sided
11
11
  uniform distribution between 0 and 0 + scale.
@@ -17,7 +17,7 @@ class BaselineShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
17
17
 
18
18
  random_state : int, default=None
19
19
  The random state to use for the random number generator.
20
-
20
+
21
21
  Attributes
22
22
  ----------
23
23
  n_features_in_ : int
@@ -25,7 +25,7 @@ class BaselineShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
25
25
 
26
26
  _is_fitted : bool
27
27
  Whether the transformer has been fitted to data.
28
-
28
+
29
29
  Methods
30
30
  -------
31
31
  fit(X, y=None)
@@ -35,15 +35,14 @@ class BaselineShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
35
35
  Transform the input data by adding a baseline the spectrum.
36
36
  """
37
37
 
38
-
39
- def __init__(self, scale: int = 0.0, random_state: int = None):
38
+ def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
40
39
  self.scale = scale
41
40
  self.random_state = random_state
42
41
 
43
42
  def fit(self, X: np.ndarray, y=None) -> "BaselineShift":
44
43
  """
45
44
  Fit the transformer to the input data.
46
-
45
+
47
46
  Parameters
48
47
  ----------
49
48
  X : np.ndarray of shape (n_samples, n_features)
@@ -58,8 +57,9 @@ class BaselineShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
58
57
  The fitted transformer.
59
58
  """
60
59
  # Check that X is a 2D array and has only finite values
61
- X = check_input(X)
62
-
60
+ X = validate_data(
61
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
62
+ )
63
63
  # Set the number of features
64
64
  self.n_features_in_ = X.shape[1]
65
65
 
@@ -92,12 +92,21 @@ class BaselineShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
92
92
  check_is_fitted(self, "_is_fitted")
93
93
 
94
94
  # Check that X is a 2D array and has only finite values
95
- X = check_input(X)
96
- X_ = X.copy()
95
+ X_ = validate_data(
96
+ self,
97
+ X,
98
+ y="no_validation",
99
+ ensure_2d=True,
100
+ copy=True,
101
+ reset=False,
102
+ dtype=np.float64,
103
+ )
97
104
 
98
105
  # Check that the number of features is the same as the fitted data
99
106
  if X_.shape[1] != self.n_features_in_:
100
- raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
107
+ raise ValueError(
108
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
109
+ )
101
110
 
102
111
  # Calculate the scaled spectrum
103
112
  for i, x in enumerate(X_):
@@ -108,4 +117,3 @@ class BaselineShift(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
108
117
  def _add_baseline(self, x) -> np.ndarray:
109
118
  adding_factor = self._rng.uniform(low=0, high=self.scale)
110
119
  return np.add(x, adding_factor)
111
-
@@ -1,11 +1,11 @@
1
+ from typing import Optional
2
+
1
3
  import numpy as np
2
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
4
-
5
- from chemotools.utils.check_inputs import check_input
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
6
 
7
7
 
8
- class ExponentialNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
8
+ class ExponentialNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
9
  """
10
10
  Add exponential noise to the input data.
11
11
 
@@ -16,7 +16,7 @@ class ExponentialNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
16
16
 
17
17
  random_state : int, default=None
18
18
  The random state to use for the random number generator.
19
-
19
+
20
20
  Attributes
21
21
  ----------
22
22
  n_features_in_ : int
@@ -24,7 +24,7 @@ class ExponentialNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
24
24
 
25
25
  _is_fitted : bool
26
26
  Whether the transformer has been fitted to data.
27
-
27
+
28
28
  Methods
29
29
  -------
30
30
  fit(X, y=None)
@@ -34,15 +34,14 @@ class ExponentialNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
34
34
  Transform the input data by adding random noise.
35
35
  """
36
36
 
37
-
38
- def __init__(self, scale: float = 0.0, random_state: int = None):
37
+ def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
39
38
  self.scale = scale
40
39
  self.random_state = random_state
41
40
 
42
41
  def fit(self, X: np.ndarray, y=None) -> "ExponentialNoise":
43
42
  """
44
43
  Fit the transformer to the input data.
45
-
44
+
46
45
  Parameters
47
46
  ----------
48
47
  X : np.ndarray of shape (n_samples, n_features)
@@ -57,8 +56,9 @@ class ExponentialNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
57
56
  The fitted transformer.
58
57
  """
59
58
  # Check that X is a 2D array and has only finite values
60
- X = check_input(X)
61
-
59
+ X = validate_data(
60
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
61
+ )
62
62
  # Set the number of features
63
63
  self.n_features_in_ = X.shape[1]
64
64
 
@@ -91,12 +91,21 @@ class ExponentialNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
91
91
  check_is_fitted(self, "_is_fitted")
92
92
 
93
93
  # Check that X is a 2D array and has only finite values
94
- X = check_input(X)
95
- X_ = X.copy()
94
+ X_ = validate_data(
95
+ self,
96
+ X,
97
+ y="no_validation",
98
+ ensure_2d=True,
99
+ copy=True,
100
+ reset=False,
101
+ dtype=np.float64,
102
+ )
96
103
 
97
104
  # Check that the number of features is the same as the fitted data
98
105
  if X_.shape[1] != self.n_features_in_:
99
- raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
106
+ raise ValueError(
107
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
108
+ )
100
109
 
101
110
  # Calculate the standard exponential variate
102
111
  for i, x in enumerate(X_):
@@ -105,4 +114,4 @@ class ExponentialNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
105
114
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
106
115
 
107
116
  def _add_random_noise(self, x) -> np.ndarray:
108
- return x + self._rng.exponential(self.scale, size=x.shape)
117
+ return x + self._rng.exponential(self.scale, size=x.shape)
@@ -0,0 +1,199 @@
1
+ from typing import Literal, Optional
2
+
3
+ import numpy as np
4
+ from numpy.polynomial import polynomial as poly
5
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
6
+ from sklearn.utils.validation import check_is_fitted, validate_data
7
+
8
+
9
+ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
10
+ """
11
+ Shift the spectrum a given number of indices between - shift and + shift drawn
12
+ from a discrete uniform distribution.
13
+
14
+ Parameters
15
+ ----------
16
+ shift : float, default=0.0
17
+ Shifts the data by a random integer between -shift and shift.
18
+
19
+ random_state : int, default=None
20
+ The random state to use for the random number generator.
21
+
22
+ Attributes
23
+ ----------
24
+ n_features_in_ : int
25
+ The number of features in the input data.
26
+
27
+ _is_fitted : bool
28
+ Whether the transformer has been fitted to data.
29
+
30
+ Methods
31
+ -------
32
+ fit(X, y=None)
33
+ Fit the transformer to the input data.
34
+
35
+ transform(X, y=0, copy=True)
36
+ Transform the input data by shifting the spectrum.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ shift: int = 0,
42
+ fill_method: Literal["constant", "linear", "quadratic"] = "constant",
43
+ random_state: Optional[int] = None,
44
+ ):
45
+ self.shift = shift
46
+ self.fill_method = fill_method
47
+ self.random_state = random_state
48
+
49
+ def fit(self, X: np.ndarray, y=None) -> "IndexShift":
50
+ """
51
+ Fit the transformer to the input data.
52
+
53
+ Parameters
54
+ ----------
55
+ X : np.ndarray of shape (n_samples, n_features)
56
+ The input data to fit the transformer to.
57
+
58
+ y : None
59
+ Ignored.
60
+
61
+ Returns
62
+ -------
63
+ self : IndexShift
64
+ The fitted transformer.
65
+ """
66
+ # Check that X is a 2D array and has only finite values
67
+ X = validate_data(
68
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
69
+ )
70
+
71
+ # Set the number of features
72
+ self.n_features_in_ = X.shape[1]
73
+
74
+ # Set the fitted attribute to True
75
+ self._is_fitted = True
76
+
77
+ # Instantiate the random number generator
78
+ self._rng = np.random.default_rng(self.random_state)
79
+
80
+ return self
81
+
82
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
83
+ """
84
+ Transform the input data by shifting the spectrum.
85
+
86
+ Parameters
87
+ ----------
88
+ X : np.ndarray of shape (n_samples, n_features)
89
+ The input data to transform.
90
+
91
+ y : None
92
+ Ignored.
93
+
94
+ Returns
95
+ -------
96
+ X_ : np.ndarray of shape (n_samples, n_features)
97
+ The transformed data.
98
+ """
99
+ # Check that the estimator is fitted
100
+ check_is_fitted(self, "_is_fitted")
101
+
102
+ # Check that X is a 2D array and has only finite values
103
+ X_ = validate_data(
104
+ self,
105
+ X,
106
+ y="no_validation",
107
+ ensure_2d=True,
108
+ copy=True,
109
+ reset=False,
110
+ dtype=np.float64,
111
+ )
112
+
113
+ # Check that the number of features is the same as the fitted data
114
+ if X_.shape[1] != self.n_features_in_:
115
+ raise ValueError(
116
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
117
+ )
118
+
119
+ # Calculate the standard normal variate
120
+ for i, x in enumerate(X_):
121
+ X_[i] = self._shift_vector(x)
122
+
123
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
124
+
125
+ def _shift_spectrum(self, x) -> np.ndarray:
126
+ shift_amount = self._rng.integers(-self.shift, self.shift, endpoint=True)
127
+ return np.roll(x, shift_amount)
128
+
129
+ def _shift_vector(
130
+ self,
131
+ x: np.ndarray,
132
+ ) -> np.ndarray:
133
+ """
134
+ Shift vector with option to fill missing values.
135
+
136
+ Args:
137
+ arr: Input numpy array
138
+ shift: Number of positions to shift
139
+ fill_method: Method to fill missing values
140
+ 'constant': fill with first/last value
141
+ 'linear': fill using linear regression
142
+ 'quadratic': fill using quadratic regression
143
+
144
+ Returns:
145
+ Shifted numpy array
146
+ """
147
+ shift = self._rng.integers(-self.shift, self.shift, endpoint=True)
148
+
149
+ result = np.roll(x, shift)
150
+
151
+ if self.fill_method == "constant":
152
+ if shift > 0:
153
+ result[:shift] = x[0]
154
+ elif shift < 0:
155
+ result[shift:] = x[-1]
156
+
157
+ elif self.fill_method == "linear":
158
+ if shift > 0:
159
+ x_ = np.arange(5)
160
+ coeffs = poly.polyfit(x_, x[:5], 1)
161
+
162
+ extrapolate_x = np.arange(-shift, 0)
163
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
164
+
165
+ result[:shift] = extrapolated_values
166
+
167
+ elif shift < 0:
168
+ x_ = np.arange(5)
169
+ coeffs = poly.polyfit(x_, x[-5:], 1)
170
+
171
+ extrapolate_x = np.arange(len(x_), len(x_) - shift)
172
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
173
+
174
+ result[shift:] = extrapolated_values
175
+
176
+ elif self.fill_method == "quadratic":
177
+ if shift > 0:
178
+ # Use first 3 values for quadratic regression
179
+ x_ = np.arange(5)
180
+ coeffs = poly.polyfit(x_, x[:5], 2)
181
+
182
+ # Extrapolate to fill shifted region
183
+ extrapolate_x = np.arange(-shift, 0)
184
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
185
+
186
+ result[:shift] = extrapolated_values
187
+
188
+ elif shift < 0:
189
+ # Use last 3 values for quadratic regression
190
+ x_ = np.arange(5)
191
+ coeffs = poly.polyfit(x_, x[-5:], 2)
192
+
193
+ # Extrapolate to fill shifted region
194
+ extrapolate_x = np.arange(len(x_), len(x_) - shift)
195
+ extrapolated_values = poly.polyval(extrapolate_x, coeffs)
196
+
197
+ result[shift:] = extrapolated_values
198
+
199
+ return result
@@ -1,11 +1,11 @@
1
+ from typing import Optional
2
+
1
3
  import numpy as np
2
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
4
-
5
- from chemotools.utils.check_inputs import check_input
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
6
 
7
7
 
8
- class NormalNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
8
+ class NormalNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
9
  """
10
10
  Add normal noise to the input data.
11
11
 
@@ -16,7 +16,7 @@ class NormalNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
16
16
 
17
17
  random_state : int, default=None
18
18
  The random state to use for the random number generator.
19
-
19
+
20
20
  Attributes
21
21
  ----------
22
22
  n_features_in_ : int
@@ -24,7 +24,7 @@ class NormalNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
24
24
 
25
25
  _is_fitted : bool
26
26
  Whether the transformer has been fitted to data.
27
-
27
+
28
28
  Methods
29
29
  -------
30
30
  fit(X, y=None)
@@ -34,15 +34,14 @@ class NormalNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
34
34
  Transform the input data by adding random noise.
35
35
  """
36
36
 
37
-
38
- def __init__(self, scale: float = 0.0, random_state: int = None):
37
+ def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
39
38
  self.scale = scale
40
39
  self.random_state = random_state
41
40
 
42
41
  def fit(self, X: np.ndarray, y=None) -> "NormalNoise":
43
42
  """
44
43
  Fit the transformer to the input data.
45
-
44
+
46
45
  Parameters
47
46
  ----------
48
47
  X : np.ndarray of shape (n_samples, n_features)
@@ -57,7 +56,9 @@ class NormalNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
57
56
  The fitted transformer.
58
57
  """
59
58
  # Check that X is a 2D array and has only finite values
60
- X = check_input(X)
59
+ X = validate_data(
60
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
61
+ )
61
62
 
62
63
  # Set the number of features
63
64
  self.n_features_in_ = X.shape[1]
@@ -91,12 +92,21 @@ class NormalNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
91
92
  check_is_fitted(self, "_is_fitted")
92
93
 
93
94
  # Check that X is a 2D array and has only finite values
94
- X = check_input(X)
95
- X_ = X.copy()
95
+ X_ = validate_data(
96
+ self,
97
+ X,
98
+ y="no_validation",
99
+ ensure_2d=True,
100
+ copy=True,
101
+ reset=False,
102
+ dtype=np.float64,
103
+ )
96
104
 
97
105
  # Check that the number of features is the same as the fitted data
98
106
  if X_.shape[1] != self.n_features_in_:
99
- raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
107
+ raise ValueError(
108
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
109
+ )
100
110
 
101
111
  # Calculate the standard normal variate
102
112
  for i, x in enumerate(X_):
@@ -105,4 +115,4 @@ class NormalNoise(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
105
115
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
106
116
 
107
117
  def _add_random_noise(self, x) -> np.ndarray:
108
- return x + self._rng.normal(0, self.scale, size=x.shape)
118
+ return x + self._rng.normal(0, self.scale, size=x.shape)
@@ -1,11 +1,11 @@
1
+ from typing import Optional
2
+
1
3
  import numpy as np
2
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
4
-
5
- from chemotools.utils.check_inputs import check_input
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
6
 
7
7
 
8
- class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
8
+ class SpectrumScale(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
9
  """
10
10
  Scales the data by a value drawn from the uniform distribution centered
11
11
  around 1.0.
@@ -17,7 +17,7 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
17
17
 
18
18
  random_state : int, default=None
19
19
  The random state to use for the random number generator.
20
-
20
+
21
21
  Attributes
22
22
  ----------
23
23
  n_features_in_ : int
@@ -25,7 +25,7 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
25
25
 
26
26
  _is_fitted : bool
27
27
  Whether the transformer has been fitted to data.
28
-
28
+
29
29
  Methods
30
30
  -------
31
31
  fit(X, y=None)
@@ -35,15 +35,14 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
35
35
  Transform the input data by scaling the spectrum.
36
36
  """
37
37
 
38
-
39
- def __init__(self, scale: int = 0.0, random_state: int = None):
38
+ def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
40
39
  self.scale = scale
41
40
  self.random_state = random_state
42
41
 
43
42
  def fit(self, X: np.ndarray, y=None) -> "SpectrumScale":
44
43
  """
45
44
  Fit the transformer to the input data.
46
-
45
+
47
46
  Parameters
48
47
  ----------
49
48
  X : np.ndarray of shape (n_samples, n_features)
@@ -58,7 +57,9 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
58
57
  The fitted transformer.
59
58
  """
60
59
  # Check that X is a 2D array and has only finite values
61
- X = check_input(X)
60
+ X = validate_data(
61
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
62
+ )
62
63
 
63
64
  # Set the number of features
64
65
  self.n_features_in_ = X.shape[1]
@@ -92,12 +93,21 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
92
93
  check_is_fitted(self, "_is_fitted")
93
94
 
94
95
  # Check that X is a 2D array and has only finite values
95
- X = check_input(X)
96
- X_ = X.copy()
96
+ X_ = validate_data(
97
+ self,
98
+ X,
99
+ y="no_validation",
100
+ ensure_2d=True,
101
+ copy=True,
102
+ reset=False,
103
+ dtype=np.float64,
104
+ )
97
105
 
98
106
  # Check that the number of features is the same as the fitted data
99
107
  if X_.shape[1] != self.n_features_in_:
100
- raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
108
+ raise ValueError(
109
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
110
+ )
101
111
 
102
112
  # Calculate the scaled spectrum
103
113
  for i, x in enumerate(X_):
@@ -106,6 +116,5 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
106
116
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
107
117
 
108
118
  def _scale_spectrum(self, x) -> np.ndarray:
109
- scaling_factor = self._rng.uniform(low=1-self.scale, high=1+self.scale)
119
+ scaling_factor = self._rng.uniform(low=1 - self.scale, high=1 + self.scale)
110
120
  return np.multiply(x, scaling_factor)
111
-