chemotools 0.0.27__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. chemotools/augmentation/__init__.py +16 -0
  2. chemotools/augmentation/baseline_shift.py +119 -0
  3. chemotools/augmentation/exponential_noise.py +117 -0
  4. chemotools/augmentation/index_shift.py +120 -0
  5. chemotools/augmentation/normal_noise.py +118 -0
  6. chemotools/augmentation/spectrum_scale.py +120 -0
  7. chemotools/augmentation/uniform_noise.py +124 -0
  8. chemotools/baseline/__init__.py +20 -8
  9. chemotools/baseline/{air_pls.py → _air_pls.py} +20 -32
  10. chemotools/baseline/{ar_pls.py → _ar_pls.py} +18 -31
  11. chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +22 -30
  12. chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +26 -19
  13. chemotools/baseline/{linear_correction.py → _linear_correction.py} +19 -28
  14. chemotools/baseline/{non_negative.py → _non_negative.py} +15 -23
  15. chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +29 -31
  16. chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +23 -27
  17. chemotools/datasets/__init__.py +3 -0
  18. chemotools/datasets/_base.py +85 -15
  19. chemotools/datasets/data/coffee_labels.csv +61 -0
  20. chemotools/datasets/data/coffee_spectra.csv +61 -0
  21. chemotools/derivative/__init__.py +4 -2
  22. chemotools/derivative/{norris_william.py → _norris_william.py} +17 -24
  23. chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +26 -36
  24. chemotools/feature_selection/__init__.py +4 -0
  25. chemotools/{variable_selection/select_features.py → feature_selection/_index_selector.py} +32 -56
  26. chemotools/{variable_selection/range_cut.py → feature_selection/_range_cut.py} +25 -50
  27. chemotools/scale/__init__.py +5 -3
  28. chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +20 -27
  29. chemotools/scale/{norm_scaler.py → _norm_scaler.py} +18 -25
  30. chemotools/scale/{point_scaler.py → _point_scaler.py} +27 -32
  31. chemotools/scatter/__init__.py +13 -4
  32. chemotools/scatter/{extended_multiplicative_scatter_correction.py → _extended_multiplicative_scatter_correction.py} +19 -28
  33. chemotools/scatter/{multiplicative_scatter_correction.py → _multiplicative_scatter_correction.py} +19 -17
  34. chemotools/scatter/{robust_normal_variate.py → _robust_normal_variate.py} +15 -23
  35. chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +21 -26
  36. chemotools/smooth/__init__.py +6 -4
  37. chemotools/smooth/{mean_filter.py → _mean_filter.py} +18 -25
  38. chemotools/smooth/{median_filter.py → _median_filter.py} +32 -24
  39. chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +22 -24
  40. chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +24 -29
  41. {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/METADATA +19 -16
  42. chemotools-0.1.6.dist-info/RECORD +51 -0
  43. {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/WHEEL +1 -2
  44. chemotools/utils/check_inputs.py +0 -14
  45. chemotools/variable_selection/__init__.py +0 -2
  46. chemotools-0.0.27.dist-info/RECORD +0 -49
  47. chemotools-0.0.27.dist-info/top_level.txt +0 -2
  48. tests/__init__.py +0 -0
  49. tests/fixtures.py +0 -89
  50. tests/test_datasets.py +0 -30
  51. tests/test_functionality.py +0 -616
  52. tests/test_sklearn_compliance.py +0 -220
  53. {chemotools-0.0.27.dist-info → chemotools-0.1.6.dist-info}/LICENSE +0 -0
@@ -0,0 +1,16 @@
1
+ from .baseline_shift import BaselineShift
2
+ from .exponential_noise import ExponentialNoise
3
+ from .normal_noise import NormalNoise
4
+ from .index_shift import IndexShift
5
+ from .spectrum_scale import SpectrumScale
6
+ from .uniform_noise import UniformNoise
7
+
8
+
9
+ __all__ = [
10
+ "BaselineShift",
11
+ "ExponentialNoise",
12
+ "NormalNoise",
13
+ "IndexShift",
14
+ "SpectrumScale",
15
+ "UniformNoise",
16
+ ]
@@ -0,0 +1,119 @@
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
+
7
+
8
+ class BaselineShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
+ """
10
+ Adds a constant baseline to the data. The baseline is drawn from a one-sided
11
+ uniform distribution between 0 and 0 + scale.
12
+
13
+ Parameters
14
+ ----------
15
+ scale : float, default=0.0
16
+ Range of the uniform distribution to draw the baseline factor from.
17
+
18
+ random_state : int, default=None
19
+ The random state to use for the random number generator.
20
+
21
+ Attributes
22
+ ----------
23
+ n_features_in_ : int
24
+ The number of features in the input data.
25
+
26
+ _is_fitted : bool
27
+ Whether the transformer has been fitted to data.
28
+
29
+ Methods
30
+ -------
31
+ fit(X, y=None)
32
+ Fit the transformer to the input data.
33
+
34
+ transform(X, y=0, copy=True)
35
+ Transform the input data by adding a baseline the spectrum.
36
+ """
37
+
38
+ def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
39
+ self.scale = scale
40
+ self.random_state = random_state
41
+
42
+ def fit(self, X: np.ndarray, y=None) -> "BaselineShift":
43
+ """
44
+ Fit the transformer to the input data.
45
+
46
+ Parameters
47
+ ----------
48
+ X : np.ndarray of shape (n_samples, n_features)
49
+ The input data to fit the transformer to.
50
+
51
+ y : None
52
+ Ignored.
53
+
54
+ Returns
55
+ -------
56
+ self : BaselineShift
57
+ The fitted transformer.
58
+ """
59
+ # Check that X is a 2D array and has only finite values
60
+ X = validate_data(
61
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
62
+ )
63
+ # Set the number of features
64
+ self.n_features_in_ = X.shape[1]
65
+
66
+ # Set the fitted attribute to True
67
+ self._is_fitted = True
68
+
69
+ # Instantiate the random number generator
70
+ self._rng = np.random.default_rng(self.random_state)
71
+
72
+ return self
73
+
74
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
75
+ """
76
+ Transform the input data by adding a baseline to the spectrum.
77
+
78
+ Parameters
79
+ ----------
80
+ X : np.ndarray of shape (n_samples, n_features)
81
+ The input data to transform.
82
+
83
+ y : None
84
+ Ignored.
85
+
86
+ Returns
87
+ -------
88
+ X_ : np.ndarray of shape (n_samples, n_features)
89
+ The transformed data.
90
+ """
91
+ # Check that the estimator is fitted
92
+ check_is_fitted(self, "_is_fitted")
93
+
94
+ # Check that X is a 2D array and has only finite values
95
+ X_ = validate_data(
96
+ self,
97
+ X,
98
+ y="no_validation",
99
+ ensure_2d=True,
100
+ copy=True,
101
+ reset=False,
102
+ dtype=np.float64,
103
+ )
104
+
105
+ # Check that the number of features is the same as the fitted data
106
+ if X_.shape[1] != self.n_features_in_:
107
+ raise ValueError(
108
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
109
+ )
110
+
111
+ # Calculate the scaled spectrum
112
+ for i, x in enumerate(X_):
113
+ X_[i] = self._add_baseline(x)
114
+
115
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
116
+
117
+ def _add_baseline(self, x) -> np.ndarray:
118
+ adding_factor = self._rng.uniform(low=0, high=self.scale)
119
+ return np.add(x, adding_factor)
@@ -0,0 +1,117 @@
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
+
7
+
8
+ class ExponentialNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
+ """
10
+ Add exponential noise to the input data.
11
+
12
+ Parameters
13
+ ----------
14
+ scale: float, default=0.0
15
+ The scale of the noise to add to the input data.
16
+
17
+ random_state : int, default=None
18
+ The random state to use for the random number generator.
19
+
20
+ Attributes
21
+ ----------
22
+ n_features_in_ : int
23
+ The number of features in the input data.
24
+
25
+ _is_fitted : bool
26
+ Whether the transformer has been fitted to data.
27
+
28
+ Methods
29
+ -------
30
+ fit(X, y=None)
31
+ Fit the transformer to the input data.
32
+
33
+ transform(X, y=0, copy=True)
34
+ Transform the input data by adding random noise.
35
+ """
36
+
37
+ def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
38
+ self.scale = scale
39
+ self.random_state = random_state
40
+
41
+ def fit(self, X: np.ndarray, y=None) -> "ExponentialNoise":
42
+ """
43
+ Fit the transformer to the input data.
44
+
45
+ Parameters
46
+ ----------
47
+ X : np.ndarray of shape (n_samples, n_features)
48
+ The input data to fit the transformer to.
49
+
50
+ y : None
51
+ Ignored.
52
+
53
+ Returns
54
+ -------
55
+ self : ExponentialNoise
56
+ The fitted transformer.
57
+ """
58
+ # Check that X is a 2D array and has only finite values
59
+ X = validate_data(
60
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
61
+ )
62
+ # Set the number of features
63
+ self.n_features_in_ = X.shape[1]
64
+
65
+ # Set the fitted attribute to True
66
+ self._is_fitted = True
67
+
68
+ # Instantiate the random number generator
69
+ self._rng = np.random.default_rng(self.random_state)
70
+
71
+ return self
72
+
73
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
74
+ """
75
+ Transform the input data by adding random exponential noise.
76
+
77
+ Parameters
78
+ ----------
79
+ X : np.ndarray of shape (n_samples, n_features)
80
+ The input data to transform.
81
+
82
+ y : None
83
+ Ignored.
84
+
85
+ Returns
86
+ -------
87
+ X_ : np.ndarray of shape (n_samples, n_features)
88
+ The transformed data.
89
+ """
90
+ # Check that the estimator is fitted
91
+ check_is_fitted(self, "_is_fitted")
92
+
93
+ # Check that X is a 2D array and has only finite values
94
+ X_ = validate_data(
95
+ self,
96
+ X,
97
+ y="no_validation",
98
+ ensure_2d=True,
99
+ copy=True,
100
+ reset=False,
101
+ dtype=np.float64,
102
+ )
103
+
104
+ # Check that the number of features is the same as the fitted data
105
+ if X_.shape[1] != self.n_features_in_:
106
+ raise ValueError(
107
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
108
+ )
109
+
110
+ # Calculate the standard exponential variate
111
+ for i, x in enumerate(X_):
112
+ X_[i] = self._add_random_noise(x)
113
+
114
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
115
+
116
+ def _add_random_noise(self, x) -> np.ndarray:
117
+ return x + self._rng.exponential(self.scale, size=x.shape)
@@ -0,0 +1,120 @@
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
+
7
+
8
+ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
+ """
10
+ Shift the spectrum a given number of indices between - shift and + shift drawn
11
+ from a discrete uniform distribution.
12
+
13
+ Parameters
14
+ ----------
15
+ shift : float, default=0.0
16
+ Shifts the data by a random integer between -shift and shift.
17
+
18
+ random_state : int, default=None
19
+ The random state to use for the random number generator.
20
+
21
+ Attributes
22
+ ----------
23
+ n_features_in_ : int
24
+ The number of features in the input data.
25
+
26
+ _is_fitted : bool
27
+ Whether the transformer has been fitted to data.
28
+
29
+ Methods
30
+ -------
31
+ fit(X, y=None)
32
+ Fit the transformer to the input data.
33
+
34
+ transform(X, y=0, copy=True)
35
+ Transform the input data by shifting the spectrum.
36
+ """
37
+
38
+ def __init__(self, shift: int = 0, random_state: Optional[int] = None):
39
+ self.shift = shift
40
+ self.random_state = random_state
41
+
42
+ def fit(self, X: np.ndarray, y=None) -> "IndexShift":
43
+ """
44
+ Fit the transformer to the input data.
45
+
46
+ Parameters
47
+ ----------
48
+ X : np.ndarray of shape (n_samples, n_features)
49
+ The input data to fit the transformer to.
50
+
51
+ y : None
52
+ Ignored.
53
+
54
+ Returns
55
+ -------
56
+ self : IndexShift
57
+ The fitted transformer.
58
+ """
59
+ # Check that X is a 2D array and has only finite values
60
+ X = validate_data(
61
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
62
+ )
63
+
64
+ # Set the number of features
65
+ self.n_features_in_ = X.shape[1]
66
+
67
+ # Set the fitted attribute to True
68
+ self._is_fitted = True
69
+
70
+ # Instantiate the random number generator
71
+ self._rng = np.random.default_rng(self.random_state)
72
+
73
+ return self
74
+
75
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
76
+ """
77
+ Transform the input data by shifting the spectrum.
78
+
79
+ Parameters
80
+ ----------
81
+ X : np.ndarray of shape (n_samples, n_features)
82
+ The input data to transform.
83
+
84
+ y : None
85
+ Ignored.
86
+
87
+ Returns
88
+ -------
89
+ X_ : np.ndarray of shape (n_samples, n_features)
90
+ The transformed data.
91
+ """
92
+ # Check that the estimator is fitted
93
+ check_is_fitted(self, "_is_fitted")
94
+
95
+ # Check that X is a 2D array and has only finite values
96
+ X_ = validate_data(
97
+ self,
98
+ X,
99
+ y="no_validation",
100
+ ensure_2d=True,
101
+ copy=True,
102
+ reset=False,
103
+ dtype=np.float64,
104
+ )
105
+
106
+ # Check that the number of features is the same as the fitted data
107
+ if X_.shape[1] != self.n_features_in_:
108
+ raise ValueError(
109
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
110
+ )
111
+
112
+ # Calculate the standard normal variate
113
+ for i, x in enumerate(X_):
114
+ X_[i] = self._shift_spectrum(x)
115
+
116
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
117
+
118
+ def _shift_spectrum(self, x) -> np.ndarray:
119
+ shift_amount = self._rng.integers(-self.shift, self.shift, endpoint=True)
120
+ return np.roll(x, shift_amount)
@@ -0,0 +1,118 @@
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
+
7
+
8
+ class NormalNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
+ """
10
+ Add normal noise to the input data.
11
+
12
+ Parameters
13
+ ----------
14
+ scale : float, default=0.0
15
+ The scale of the noise to add to the input data.
16
+
17
+ random_state : int, default=None
18
+ The random state to use for the random number generator.
19
+
20
+ Attributes
21
+ ----------
22
+ n_features_in_ : int
23
+ The number of features in the input data.
24
+
25
+ _is_fitted : bool
26
+ Whether the transformer has been fitted to data.
27
+
28
+ Methods
29
+ -------
30
+ fit(X, y=None)
31
+ Fit the transformer to the input data.
32
+
33
+ transform(X, y=0, copy=True)
34
+ Transform the input data by adding random noise.
35
+ """
36
+
37
+ def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
38
+ self.scale = scale
39
+ self.random_state = random_state
40
+
41
+ def fit(self, X: np.ndarray, y=None) -> "NormalNoise":
42
+ """
43
+ Fit the transformer to the input data.
44
+
45
+ Parameters
46
+ ----------
47
+ X : np.ndarray of shape (n_samples, n_features)
48
+ The input data to fit the transformer to.
49
+
50
+ y : None
51
+ Ignored.
52
+
53
+ Returns
54
+ -------
55
+ self : NormalNoise
56
+ The fitted transformer.
57
+ """
58
+ # Check that X is a 2D array and has only finite values
59
+ X = validate_data(
60
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
61
+ )
62
+
63
+ # Set the number of features
64
+ self.n_features_in_ = X.shape[1]
65
+
66
+ # Set the fitted attribute to True
67
+ self._is_fitted = True
68
+
69
+ # Instantiate the random number generator
70
+ self._rng = np.random.default_rng(self.random_state)
71
+
72
+ return self
73
+
74
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
75
+ """
76
+ Transform the input data by adding random normal noise.
77
+
78
+ Parameters
79
+ ----------
80
+ X : np.ndarray of shape (n_samples, n_features)
81
+ The input data to transform.
82
+
83
+ y : None
84
+ Ignored.
85
+
86
+ Returns
87
+ -------
88
+ X_ : np.ndarray of shape (n_samples, n_features)
89
+ The transformed data.
90
+ """
91
+ # Check that the estimator is fitted
92
+ check_is_fitted(self, "_is_fitted")
93
+
94
+ # Check that X is a 2D array and has only finite values
95
+ X_ = validate_data(
96
+ self,
97
+ X,
98
+ y="no_validation",
99
+ ensure_2d=True,
100
+ copy=True,
101
+ reset=False,
102
+ dtype=np.float64,
103
+ )
104
+
105
+ # Check that the number of features is the same as the fitted data
106
+ if X_.shape[1] != self.n_features_in_:
107
+ raise ValueError(
108
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
109
+ )
110
+
111
+ # Calculate the standard normal variate
112
+ for i, x in enumerate(X_):
113
+ X_[i] = self._add_random_noise(x)
114
+
115
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
116
+
117
+ def _add_random_noise(self, x) -> np.ndarray:
118
+ return x + self._rng.normal(0, self.scale, size=x.shape)
@@ -0,0 +1,120 @@
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
+
7
+
8
+ class SpectrumScale(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
+ """
10
+ Scales the data by a value drawn from the uniform distribution centered
11
+ around 1.0.
12
+
13
+ Parameters
14
+ ----------
15
+ scale : float, default=0.0
16
+ Range of the uniform distribution to draw the scaling factor from.
17
+
18
+ random_state : int, default=None
19
+ The random state to use for the random number generator.
20
+
21
+ Attributes
22
+ ----------
23
+ n_features_in_ : int
24
+ The number of features in the input data.
25
+
26
+ _is_fitted : bool
27
+ Whether the transformer has been fitted to data.
28
+
29
+ Methods
30
+ -------
31
+ fit(X, y=None)
32
+ Fit the transformer to the input data.
33
+
34
+ transform(X, y=0, copy=True)
35
+ Transform the input data by scaling the spectrum.
36
+ """
37
+
38
+ def __init__(self, scale: float = 0.0, random_state: Optional[int] = None):
39
+ self.scale = scale
40
+ self.random_state = random_state
41
+
42
+ def fit(self, X: np.ndarray, y=None) -> "SpectrumScale":
43
+ """
44
+ Fit the transformer to the input data.
45
+
46
+ Parameters
47
+ ----------
48
+ X : np.ndarray of shape (n_samples, n_features)
49
+ The input data to fit the transformer to.
50
+
51
+ y : None
52
+ Ignored.
53
+
54
+ Returns
55
+ -------
56
+ self : SpectrumScale
57
+ The fitted transformer.
58
+ """
59
+ # Check that X is a 2D array and has only finite values
60
+ X = validate_data(
61
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
62
+ )
63
+
64
+ # Set the number of features
65
+ self.n_features_in_ = X.shape[1]
66
+
67
+ # Set the fitted attribute to True
68
+ self._is_fitted = True
69
+
70
+ # Instantiate the random number generator
71
+ self._rng = np.random.default_rng(self.random_state)
72
+
73
+ return self
74
+
75
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
76
+ """
77
+ Transform the input data by scaling the spectrum.
78
+
79
+ Parameters
80
+ ----------
81
+ X : np.ndarray of shape (n_samples, n_features)
82
+ The input data to transform.
83
+
84
+ y : None
85
+ Ignored.
86
+
87
+ Returns
88
+ -------
89
+ X_ : np.ndarray of shape (n_samples, n_features)
90
+ The transformed data.
91
+ """
92
+ # Check that the estimator is fitted
93
+ check_is_fitted(self, "_is_fitted")
94
+
95
+ # Check that X is a 2D array and has only finite values
96
+ X_ = validate_data(
97
+ self,
98
+ X,
99
+ y="no_validation",
100
+ ensure_2d=True,
101
+ copy=True,
102
+ reset=False,
103
+ dtype=np.float64,
104
+ )
105
+
106
+ # Check that the number of features is the same as the fitted data
107
+ if X_.shape[1] != self.n_features_in_:
108
+ raise ValueError(
109
+ f"Expected {self.n_features_in_} features but got {X_.shape[1]}"
110
+ )
111
+
112
+ # Calculate the scaled spectrum
113
+ for i, x in enumerate(X_):
114
+ X_[i] = self._scale_spectrum(x)
115
+
116
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
117
+
118
+ def _scale_spectrum(self, x) -> np.ndarray:
119
+ scaling_factor = self._rng.uniform(low=1 - self.scale, high=1 + self.scale)
120
+ return np.multiply(x, scaling_factor)