chemotools 0.0.22__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemotools/augmentation/__init__.py +16 -0
- chemotools/augmentation/baseline_shift.py +119 -0
- chemotools/augmentation/exponential_noise.py +117 -0
- chemotools/augmentation/index_shift.py +120 -0
- chemotools/augmentation/normal_noise.py +118 -0
- chemotools/augmentation/spectrum_scale.py +120 -0
- chemotools/augmentation/uniform_noise.py +124 -0
- chemotools/baseline/__init__.py +20 -8
- chemotools/baseline/{air_pls.py → _air_pls.py} +20 -32
- chemotools/baseline/{ar_pls.py → _ar_pls.py} +18 -31
- chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +37 -31
- chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +26 -19
- chemotools/baseline/{linear_correction.py → _linear_correction.py} +19 -28
- chemotools/baseline/{non_negative.py → _non_negative.py} +15 -23
- chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +29 -31
- chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +23 -27
- chemotools/datasets/__init__.py +5 -0
- chemotools/datasets/_base.py +122 -0
- chemotools/datasets/data/coffee_labels.csv +61 -0
- chemotools/datasets/data/coffee_spectra.csv +61 -0
- chemotools/datasets/data/fermentation_hplc.csv +35 -0
- chemotools/datasets/data/fermentation_spectra.csv +1630 -0
- chemotools/datasets/data/train_hplc.csv +22 -0
- chemotools/datasets/data/train_spectra.csv +22 -0
- chemotools/derivative/__init__.py +4 -2
- chemotools/derivative/{norris_william.py → _norris_william.py} +20 -25
- chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +26 -36
- chemotools/feature_selection/__init__.py +4 -0
- chemotools/feature_selection/_index_selector.py +113 -0
- chemotools/feature_selection/_range_cut.py +111 -0
- chemotools/scale/__init__.py +5 -3
- chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +36 -39
- chemotools/scale/{norm_scaler.py → _norm_scaler.py} +18 -25
- chemotools/scale/_point_scaler.py +115 -0
- chemotools/scatter/__init__.py +13 -2
- chemotools/scatter/_extended_multiplicative_scatter_correction.py +183 -0
- chemotools/scatter/_multiplicative_scatter_correction.py +169 -0
- chemotools/scatter/_robust_normal_variate.py +101 -0
- chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +21 -26
- chemotools/smooth/__init__.py +6 -4
- chemotools/smooth/{mean_filter.py → _mean_filter.py} +18 -25
- chemotools/smooth/{median_filter.py → _median_filter.py} +32 -24
- chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +22 -24
- chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +24 -29
- {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/METADATA +19 -15
- chemotools-0.1.6.dist-info/RECORD +51 -0
- {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/WHEEL +1 -2
- chemotools/scale/index_scaler.py +0 -97
- chemotools/scatter/extended_multiplicative_scatter_correction.py +0 -33
- chemotools/scatter/multiplicative_scatter_correction.py +0 -123
- chemotools/utils/check_inputs.py +0 -14
- chemotools/variable_selection/__init__.py +0 -1
- chemotools/variable_selection/range_cut.py +0 -121
- chemotools-0.0.22.dist-info/RECORD +0 -39
- chemotools-0.0.22.dist-info/top_level.txt +0 -2
- tests/fixtures.py +0 -89
- tests/test_functionality.py +0 -397
- tests/test_sklearn_compliance.py +0 -192
- {tests → chemotools/datasets/data}/__init__.py +0 -0
- {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/LICENSE +0 -0
@@ -1,12 +1,12 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from scipy.ndimage import median_filter
|
3
5
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
4
|
-
from sklearn.utils.validation import check_is_fitted
|
5
|
-
|
6
|
-
from chemotools.utils.check_inputs import check_input
|
6
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
7
7
|
|
8
8
|
|
9
|
-
class MedianFilter(OneToOneFeatureMixin, BaseEstimator
|
9
|
+
class MedianFilter(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
10
10
|
"""
|
11
11
|
A transformer that calculates the median filter of the input data.
|
12
12
|
|
@@ -19,14 +19,6 @@ class MedianFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
19
19
|
The mode to use for the median filter. Can be "nearest", "constant", "reflect",
|
20
20
|
"wrap", "mirror" or "interp". Default is "nearest".
|
21
21
|
|
22
|
-
Attributes
|
23
|
-
----------
|
24
|
-
n_features_in_ : int
|
25
|
-
The number of features in the input data.
|
26
|
-
|
27
|
-
_is_fitted : bool
|
28
|
-
Whether the transformer has been fitted to data.
|
29
|
-
|
30
22
|
Methods
|
31
23
|
-------
|
32
24
|
fit(X, y=None)
|
@@ -35,7 +27,21 @@ class MedianFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
35
27
|
transform(X, y=0, copy=True)
|
36
28
|
Transform the input data by calculating the median filter.
|
37
29
|
"""
|
38
|
-
|
30
|
+
|
31
|
+
def __init__(
|
32
|
+
self,
|
33
|
+
window_size: int = 3,
|
34
|
+
mode: Literal[
|
35
|
+
"reflect",
|
36
|
+
"constant",
|
37
|
+
"nearest",
|
38
|
+
"mirror",
|
39
|
+
"wrap",
|
40
|
+
"grid-constant",
|
41
|
+
"grid-mirror",
|
42
|
+
"grid-wrap",
|
43
|
+
] = "nearest",
|
44
|
+
) -> None:
|
39
45
|
self.window_size = window_size
|
40
46
|
self.mode = mode
|
41
47
|
|
@@ -57,14 +63,9 @@ class MedianFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
57
63
|
The fitted transformer.
|
58
64
|
"""
|
59
65
|
# Check that X is a 2D array and has only finite values
|
60
|
-
X =
|
61
|
-
|
62
|
-
|
63
|
-
self.n_features_in_ = X.shape[1]
|
64
|
-
|
65
|
-
# Set the fitted attribute to True
|
66
|
-
self._is_fitted = True
|
67
|
-
|
66
|
+
X = validate_data(
|
67
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
68
|
+
)
|
68
69
|
return self
|
69
70
|
|
70
71
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -85,11 +86,18 @@ class MedianFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
85
86
|
The transformed data.
|
86
87
|
"""
|
87
88
|
# Check that the estimator is fitted
|
88
|
-
check_is_fitted(self, "
|
89
|
+
check_is_fitted(self, "n_features_in_")
|
89
90
|
|
90
91
|
# Check that X is a 2D array and has only finite values
|
91
|
-
|
92
|
-
|
92
|
+
X_ = validate_data(
|
93
|
+
self,
|
94
|
+
X,
|
95
|
+
y="no_validation",
|
96
|
+
ensure_2d=True,
|
97
|
+
copy=True,
|
98
|
+
reset=False,
|
99
|
+
dtype=np.float64,
|
100
|
+
)
|
93
101
|
|
94
102
|
if X_.shape[1] != self.n_features_in_:
|
95
103
|
raise ValueError(
|
@@ -1,12 +1,12 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
1
3
|
import numpy as np
|
2
4
|
from scipy.signal import savgol_filter
|
3
5
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
4
|
-
from sklearn.utils.validation import check_is_fitted
|
5
|
-
|
6
|
-
from chemotools.utils.check_inputs import check_input
|
6
|
+
from sklearn.utils.validation import check_is_fitted, validate_data
|
7
7
|
|
8
8
|
|
9
|
-
class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator
|
9
|
+
class SavitzkyGolayFilter(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
10
10
|
"""
|
11
11
|
A transformer that calculates the Savitzky-Golay filter of the input data.
|
12
12
|
|
@@ -24,14 +24,6 @@ class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
|
|
24
24
|
The mode to use for the Savitzky-Golay filter. Can be "nearest", "constant",
|
25
25
|
"reflect", "wrap", "mirror" or "interp". Default is "nearest".
|
26
26
|
|
27
|
-
Attributes
|
28
|
-
----------
|
29
|
-
n_features_in_ : int
|
30
|
-
The number of features in the input data.
|
31
|
-
|
32
|
-
_is_fitted : bool
|
33
|
-
Whether the transformer has been fitted to data.
|
34
|
-
|
35
27
|
Methods
|
36
28
|
-------
|
37
29
|
fit(X, y=None)
|
@@ -40,8 +32,12 @@ class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
|
|
40
32
|
transform(X, y=0, copy=True)
|
41
33
|
Transform the input data by calculating the Savitzky-Golay filter.
|
42
34
|
"""
|
35
|
+
|
43
36
|
def __init__(
|
44
|
-
self,
|
37
|
+
self,
|
38
|
+
window_size: int = 3,
|
39
|
+
polynomial_order: int = 1,
|
40
|
+
mode: Literal["mirror", "constant", "nearest", "wrap", "interp"] = "nearest",
|
45
41
|
) -> None:
|
46
42
|
self.window_size = window_size
|
47
43
|
self.polynomial_order = polynomial_order
|
@@ -65,14 +61,9 @@ class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
|
|
65
61
|
The fitted transformer.
|
66
62
|
"""
|
67
63
|
# Check that X is a 2D array and has only finite values
|
68
|
-
X =
|
69
|
-
|
70
|
-
|
71
|
-
self.n_features_in_ = X.shape[1]
|
72
|
-
|
73
|
-
# Set the fitted attribute to True
|
74
|
-
self._is_fitted = True
|
75
|
-
|
64
|
+
X = validate_data(
|
65
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
66
|
+
)
|
76
67
|
return self
|
77
68
|
|
78
69
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -93,11 +84,18 @@ class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
|
|
93
84
|
The transformed data.
|
94
85
|
"""
|
95
86
|
# Check that the estimator is fitted
|
96
|
-
check_is_fitted(self, "
|
87
|
+
check_is_fitted(self, "n_features_in_")
|
97
88
|
|
98
89
|
# Check that X is a 2D array and has only finite values
|
99
|
-
|
100
|
-
|
90
|
+
X_ = validate_data(
|
91
|
+
self,
|
92
|
+
X,
|
93
|
+
y="no_validation",
|
94
|
+
ensure_2d=True,
|
95
|
+
copy=True,
|
96
|
+
reset=False,
|
97
|
+
dtype=np.float64,
|
98
|
+
)
|
101
99
|
|
102
100
|
if X_.shape[1] != self.n_features_in_:
|
103
101
|
raise ValueError(
|
@@ -2,17 +2,17 @@ import numpy as np
|
|
2
2
|
from scipy.sparse import csc_matrix, eye, diags
|
3
3
|
from scipy.sparse.linalg import spsolve
|
4
4
|
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
5
|
-
from sklearn.utils.validation import
|
5
|
+
from sklearn.utils.validation import (
|
6
|
+
check_is_fitted,
|
7
|
+
validate_data,
|
8
|
+
) # This code is adapted from the following source:
|
6
9
|
|
7
|
-
|
8
|
-
|
9
|
-
# This code is adapted from the following source:
|
10
|
-
# Z.-M. Zhang, S. Chen, and Y.-Z. Liang,
|
11
|
-
# Baseline correction using adaptive iteratively reweighted penalized least squares.
|
10
|
+
# Z.-M. Zhang, S. Chen, and Y.-Z. Liang,
|
11
|
+
# Baseline correction using adaptive iteratively reweighted penalized least squares.
|
12
12
|
# Analyst 135 (5), 1138-1146 (2010).
|
13
13
|
|
14
14
|
|
15
|
-
class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator
|
15
|
+
class WhittakerSmooth(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
|
16
16
|
"""
|
17
17
|
A transformer that calculates the Whittaker smooth of the input data.
|
18
18
|
|
@@ -24,14 +24,6 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
24
24
|
differences : int, optional
|
25
25
|
The number of differences to use for the Whittaker smooth. Default is 1.
|
26
26
|
|
27
|
-
Attributes
|
28
|
-
----------
|
29
|
-
n_features_in_ : int
|
30
|
-
The number of features in the input data.
|
31
|
-
|
32
|
-
_is_fitted : bool
|
33
|
-
Whether the transformer has been fitted to data.
|
34
|
-
|
35
27
|
Methods
|
36
28
|
-------
|
37
29
|
fit(X, y=None)
|
@@ -40,6 +32,7 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
40
32
|
transform(X, y=0, copy=True)
|
41
33
|
Transform the input data by calculating the Whittaker smooth.
|
42
34
|
"""
|
35
|
+
|
43
36
|
def __init__(
|
44
37
|
self,
|
45
38
|
lam: float = 1e2,
|
@@ -66,14 +59,9 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
66
59
|
The fitted transformer.
|
67
60
|
"""
|
68
61
|
# Check that X is a 2D array and has only finite values
|
69
|
-
X =
|
70
|
-
|
71
|
-
|
72
|
-
self.n_features_in_ = X.shape[1]
|
73
|
-
|
74
|
-
# Set the fitted attribute to True
|
75
|
-
self._is_fitted = True
|
76
|
-
|
62
|
+
X = validate_data(
|
63
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
64
|
+
)
|
77
65
|
return self
|
78
66
|
|
79
67
|
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
@@ -94,11 +82,18 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
94
82
|
The transformed data.
|
95
83
|
"""
|
96
84
|
# Check that the estimator is fitted
|
97
|
-
check_is_fitted(self, "
|
85
|
+
check_is_fitted(self, "n_features_in_")
|
98
86
|
|
99
87
|
# Check that X is a 2D array and has only finite values
|
100
|
-
|
101
|
-
|
88
|
+
X_ = validate_data(
|
89
|
+
self,
|
90
|
+
X,
|
91
|
+
y="no_validation",
|
92
|
+
ensure_2d=True,
|
93
|
+
copy=True,
|
94
|
+
reset=False,
|
95
|
+
dtype=np.float64,
|
96
|
+
)
|
102
97
|
|
103
98
|
# Check that the number of features is the same as the fitted data
|
104
99
|
if X_.shape[1] != self.n_features_in_:
|
@@ -113,14 +108,14 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
|
113
108
|
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
114
109
|
|
115
110
|
def _calculate_whittaker_smooth(self, x):
|
116
|
-
X = np.
|
111
|
+
X = np.array(x)
|
117
112
|
m = X.size
|
118
113
|
E = eye(m, format="csc")
|
119
114
|
w = np.ones(m)
|
120
115
|
for i in range(self.differences):
|
121
116
|
E = E[1:] - E[:-1]
|
122
117
|
W = diags(w, 0, shape=(m, m))
|
123
|
-
A = csc_matrix(W + (self.lam * E.T
|
124
|
-
B = csc_matrix(W
|
118
|
+
A = csc_matrix(W + (self.lam * E.T @ E))
|
119
|
+
B = csc_matrix(W @ X.T).toarray().ravel()
|
125
120
|
background = spsolve(A, B)
|
126
121
|
return np.array(background)
|
@@ -1,22 +1,24 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.3
|
2
2
|
Name: chemotools
|
3
|
-
Version: 0.
|
4
|
-
Summary: Package
|
5
|
-
|
6
|
-
Author: Pau Cabaneros
|
7
|
-
|
8
|
-
Project-URL: Bug Tracker, https://github.com/paucablop/chemotools/issues/
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
3
|
+
Version: 0.1.6
|
4
|
+
Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
|
5
|
+
License: MIT
|
6
|
+
Author: Pau Cabaneros
|
7
|
+
Requires-Python: >=3.10,<4.0
|
10
8
|
Classifier: License :: OSI Approved :: MIT License
|
11
|
-
Classifier:
|
12
|
-
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
14
|
+
Requires-Dist: numpy (>=2.0.0,<3.0.0)
|
15
|
+
Requires-Dist: pandas (>=2.0.0,<3.0.0)
|
16
|
+
Requires-Dist: polars (>=1.17.0,<2.0.0)
|
17
|
+
Requires-Dist: pyarrow (>=18.0.0,<19.0.0)
|
18
|
+
Requires-Dist: scikit-learn (>=1.4.0,<2.0.0)
|
13
19
|
Description-Content-Type: text/markdown
|
14
|
-
License-File: LICENSE
|
15
|
-
Requires-Dist: numpy
|
16
|
-
Requires-Dist: scipy
|
17
|
-
Requires-Dist: scikit-learn
|
18
20
|
|
19
|
-

|
20
22
|
|
21
23
|
|
22
24
|
[](https://pypi.org/project/chemotools)
|
@@ -24,6 +26,8 @@ Requires-Dist: scikit-learn
|
|
24
26
|
[](https://github.com/paucablop/chemotools/blob/main/LICENSE)
|
25
27
|
[](https://codecov.io/github/paucablop/chemotools)
|
26
28
|
[](https://pepy.tech/project/chemotools)
|
29
|
+
[](https://doi.org/10.21105/joss.06802)
|
30
|
+
|
27
31
|
|
28
32
|
# __chemotools__
|
29
33
|
|
@@ -0,0 +1,51 @@
|
|
1
|
+
chemotools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
chemotools/augmentation/__init__.py,sha256=xIUoWov_aluoW5L3zpVAahyPdkWA5erApW-duzdE_9A,385
|
3
|
+
chemotools/augmentation/baseline_shift.py,sha256=kIlYvmKS9pu9vh_-eZ7PSHPuH_58V9mgYbSJt6Gq3BA,3476
|
4
|
+
chemotools/augmentation/exponential_noise.py,sha256=fhZ4zQGGqmW-OiSu388th6IhgXrFj1xOguqKYAgj8Y4,3348
|
5
|
+
chemotools/augmentation/index_shift.py,sha256=DWVfnxCUgm2NNQfASTpqNoMkfhlW1WZT8EoWVsSSF4c,3459
|
6
|
+
chemotools/augmentation/normal_noise.py,sha256=-se2Xv1pAWt9HY7H5yC4XlxRArPKZWGeTy2MdyN4lBE,3318
|
7
|
+
chemotools/augmentation/spectrum_scale.py,sha256=hMsmzXpssbI7tGm_YnQn9wjbByso3CgVxd3Hs8kfLS8,3442
|
8
|
+
chemotools/augmentation/uniform_noise.py,sha256=8a-AYzEDIkLckL6FK2i8mr_jXnQGcFaKXh_roGCICaQ,3456
|
9
|
+
chemotools/baseline/__init__.py,sha256=VzoblGg8Hx_FkTc_n7a-ZjGvtKP8JE_NwJKWenGFQkM,584
|
10
|
+
chemotools/baseline/_air_pls.py,sha256=eotXuIEsus7Z-c17oLx8UbiwOHM7DzQJ6rruHnwCGPQ,5067
|
11
|
+
chemotools/baseline/_ar_pls.py,sha256=Cl0tN0DGQA8JpnbIge4cBqT7aGQ7yltppYEDI6tWqiM,4385
|
12
|
+
chemotools/baseline/_constant_baseline_correction.py,sha256=2ARXIma3m_He5KJs0t0Bz3m0Hd7CNHDR4Dd4XfjMWgs,3893
|
13
|
+
chemotools/baseline/_cubic_spline_correction.py,sha256=Qr8jLwAM4JIcD-8G6BBU2vLSLyi44iHiIpJrHyZ6qJE,3432
|
14
|
+
chemotools/baseline/_linear_correction.py,sha256=jYUy1q5hlBIhoQr5yPWbqr65pTK8NCVPdJdjVg1SFtg,3258
|
15
|
+
chemotools/baseline/_non_negative.py,sha256=0Huq4fKAzAoX9nr6Fk-Awx5xBqmah4jTcn0TY31FJQc,2741
|
16
|
+
chemotools/baseline/_polynomial_correction.py,sha256=jzoTyj5a9dHBtefTKVer8CVpCwWqV25Ruj7mq7Ra_PI,4005
|
17
|
+
chemotools/baseline/_subtract_reference.py,sha256=B92DAYJmJR5VtWTM7Q6_orvIl2xaadmvbGr1r_ZJALA,3379
|
18
|
+
chemotools/datasets/__init__.py,sha256=WcchczWPH-A22DmYEnz2-u8A6vfVviJ6tOCBB0zaIAU,196
|
19
|
+
chemotools/datasets/_base.py,sha256=g_-R6c9WI5lt_j40FgA_mvEFzFHM9eGW6hj9d1e29P4,4883
|
20
|
+
chemotools/datasets/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
+
chemotools/datasets/data/coffee_labels.csv,sha256=ZXQWQIf8faLHjdnHfRoXfxMR56kq9Q1BGPZBkQyhGlY,487
|
22
|
+
chemotools/datasets/data/coffee_spectra.csv,sha256=VA-sN4u0hC5iALlRxxkj-K87Lz3b3mmUHBJPoDXychI,2206147
|
23
|
+
chemotools/datasets/data/fermentation_hplc.csv,sha256=AMmiFQxwaXrH8aN310-3h1YQDiDrT8JNRv1RDvhEvg4,2140
|
24
|
+
chemotools/datasets/data/fermentation_spectra.csv,sha256=MaaNMQP0lygJgFbEoUX0OUqdA-id8mF5Llvf_vj9tJk,15237508
|
25
|
+
chemotools/datasets/data/train_hplc.csv,sha256=DjtmqiePOWB-F6TsOGFngE1pKyXkb7Xmsi-1CLxsTnE,249
|
26
|
+
chemotools/datasets/data/train_spectra.csv,sha256=iVF19W52NHlbqq8BbLomn8n47kSPT0QxJv7wtQX4yjQ,203244
|
27
|
+
chemotools/derivative/__init__.py,sha256=FkckdzO30jrRWPGpIU3cfnaTtxPtNT5Tb2G9F9PmVTw,134
|
28
|
+
chemotools/derivative/_norris_william.py,sha256=rMY_yntpiB5fbSM1tPph4AaGmF1k-HqJp7o48ijePBs,4958
|
29
|
+
chemotools/derivative/_savitzky_golay.py,sha256=CuCrKoLmrB1YmJ4ihIykgkL3tO3frqkStMogtsVhO3A,3632
|
30
|
+
chemotools/feature_selection/__init__.py,sha256=1_i28hIxijjwhMypTy1w2fLbzXXVkKD5IYzzY8ZSuHw,117
|
31
|
+
chemotools/feature_selection/_index_selector.py,sha256=lNTP2b7P3doWl30KiAr3Xd2HOMxeUmj24MuqoXl4Voc,3556
|
32
|
+
chemotools/feature_selection/_range_cut.py,sha256=lVVVC30ZsK2z9jsDGb_z6l8Ty2I89yM05_dIDbMP73Q,3564
|
33
|
+
chemotools/scale/__init__.py,sha256=eztqcHg-TKE1Rr0N9ArfytHk8teuqVfi4SZi2DS96vc,175
|
34
|
+
chemotools/scale/_min_max_scaler.py,sha256=YvqRkV2pXu-viQrpjzWcp9KmSSCYSoubSnrZHRLqgKQ,3011
|
35
|
+
chemotools/scale/_norm_scaler.py,sha256=CHWSir2q-pL1hxzw_ZB45yi4mw-SkJ4YOa1CUL4nm2I,2568
|
36
|
+
chemotools/scale/_point_scaler.py,sha256=je-vomAk7g3Q7yxmisQK4-3ndKEKI2wDwLrUiNuwzzA,3505
|
37
|
+
chemotools/scatter/__init__.py,sha256=ftyC_MGurzxpWMie8WlFDGh5ylalK2K3aCSN4qUzQAw,459
|
38
|
+
chemotools/scatter/_extended_multiplicative_scatter_correction.py,sha256=7OpOcvWX1hlMUR18tC29pkSiADLZViDrTh-wro738E4,6560
|
39
|
+
chemotools/scatter/_multiplicative_scatter_correction.py,sha256=nPMPYKHl6-U--GAuQdZL8KVNPlr3V52teUAoJ0iRs3g,5801
|
40
|
+
chemotools/scatter/_robust_normal_variate.py,sha256=nPfcvjHEpwkcSCjdvD86WN9q2wVMCeZ2Z8wMzcBpM3Y,3110
|
41
|
+
chemotools/scatter/_standard_normal_variate.py,sha256=22mJzbbZoXQY-_hHAhGO0vzfYwr3oMqaR6xPjJryHtk,2582
|
42
|
+
chemotools/smooth/__init__.py,sha256=G8JvAoBK9d18-k6XgukqN6dbJP-dsEgeDdbKbZdCIkA,265
|
43
|
+
chemotools/smooth/_mean_filter.py,sha256=KVAqOzYWv-SnDX2HD3zLWSSDNePi2Zy3EV9NwIX2H38,2827
|
44
|
+
chemotools/smooth/_median_filter.py,sha256=9ndTJCwrZirWlvDNldiigMddy79KIGq9OwwYNSXaw14,3111
|
45
|
+
chemotools/smooth/_savitzky_golay_filter.py,sha256=27iFUWxdL9_7oZabR0R5L0ZTpBmYfVUjx2XCTukihBE,3509
|
46
|
+
chemotools/smooth/_whittaker_smooth.py,sha256=lpLAyf4GdyDW4ulT1nyEoK6xQEl2cVUKquawQdGWbHU,3571
|
47
|
+
chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
48
|
+
chemotools-0.1.6.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
|
49
|
+
chemotools-0.1.6.dist-info/METADATA,sha256=79TZ--QC_SOHj3ou6bDaRYsJsQoFS0sx2Rfe2BUOrG4,5239
|
50
|
+
chemotools-0.1.6.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
51
|
+
chemotools-0.1.6.dist-info/RECORD,,
|
chemotools/scale/index_scaler.py
DELETED
@@ -1,97 +0,0 @@
|
|
1
|
-
import numpy as np
|
2
|
-
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
4
|
-
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
|
-
|
7
|
-
|
8
|
-
class IndexScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
9
|
-
"""
|
10
|
-
A transformer that scales the input data by the value at a given index.
|
11
|
-
|
12
|
-
Parameters
|
13
|
-
----------
|
14
|
-
index : int, optional
|
15
|
-
The index to scale the data by.
|
16
|
-
|
17
|
-
Attributes
|
18
|
-
----------
|
19
|
-
n_features_in_ : int
|
20
|
-
The number of features in the input data.
|
21
|
-
|
22
|
-
_is_fitted : bool
|
23
|
-
Whether the transformer has been fitted to data.
|
24
|
-
|
25
|
-
Methods
|
26
|
-
-------
|
27
|
-
fit(X, y=None)
|
28
|
-
Fit the transformer to the input data.
|
29
|
-
|
30
|
-
transform(X, y=0, copy=True)
|
31
|
-
Transform the input data by scaling by the value at a given index.
|
32
|
-
"""
|
33
|
-
def __init__(self, index: int = 0):
|
34
|
-
self.index = index
|
35
|
-
|
36
|
-
|
37
|
-
def fit(self, X: np.ndarray, y=None) -> "IndexScaler":
|
38
|
-
"""
|
39
|
-
Fit the transformer to the input data.
|
40
|
-
|
41
|
-
Parameters
|
42
|
-
----------
|
43
|
-
X : np.ndarray of shape (n_samples, n_features)
|
44
|
-
The input data to fit the transformer to.
|
45
|
-
|
46
|
-
y : None
|
47
|
-
Ignored.
|
48
|
-
|
49
|
-
Returns
|
50
|
-
-------
|
51
|
-
self : IndexScaler
|
52
|
-
The fitted transformer.
|
53
|
-
"""
|
54
|
-
# Check that X is a 2D array and has only finite values
|
55
|
-
X = check_input(X)
|
56
|
-
|
57
|
-
# Set the number of features
|
58
|
-
self.n_features_in_ = X.shape[1]
|
59
|
-
|
60
|
-
# Set the fitted attribute to True
|
61
|
-
self._is_fitted = True
|
62
|
-
|
63
|
-
return self
|
64
|
-
|
65
|
-
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
66
|
-
"""
|
67
|
-
Transform the input data by scaling by the value at a given index.
|
68
|
-
|
69
|
-
Parameters
|
70
|
-
----------
|
71
|
-
X : np.ndarray of shape (n_samples, n_features)
|
72
|
-
The input data to transform.
|
73
|
-
|
74
|
-
y : None
|
75
|
-
Ignored.
|
76
|
-
|
77
|
-
Returns
|
78
|
-
-------
|
79
|
-
X_ : np.ndarray of shape (n_samples, n_features)
|
80
|
-
The transformed data.
|
81
|
-
"""
|
82
|
-
# Check that the estimator is fitted
|
83
|
-
check_is_fitted(self, "_is_fitted")
|
84
|
-
|
85
|
-
# Check that X is a 2D array and has only finite values
|
86
|
-
X = check_input(X)
|
87
|
-
X_ = X.copy()
|
88
|
-
|
89
|
-
# Check that the number of features is the same as the fitted data
|
90
|
-
if X_.shape[1] != self.n_features_in_:
|
91
|
-
raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
|
92
|
-
|
93
|
-
# Scale the data by index
|
94
|
-
for i, x in enumerate(X_):
|
95
|
-
X_[i] = x / x[self.index]
|
96
|
-
|
97
|
-
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
@@ -1,33 +0,0 @@
|
|
1
|
-
import numpy as np
|
2
|
-
from sklearn.base import BaseEstimator, TransformerMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
4
|
-
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
|
-
|
7
|
-
class ExtendedMultiplicativeScatterCorrection(BaseEstimator, TransformerMixin):
|
8
|
-
def __init__(self):
|
9
|
-
self.ref_spec = None
|
10
|
-
self.coeffs = None
|
11
|
-
|
12
|
-
def fit(self, X, ref_spec=None):
|
13
|
-
if ref_spec is None:
|
14
|
-
# Use mean spectrum as reference if none provided
|
15
|
-
ref_spec = np.mean(X, axis=0)
|
16
|
-
self.ref_spec = ref_spec
|
17
|
-
|
18
|
-
# Calculate the mean spectrum
|
19
|
-
mean_spec = np.mean(X, axis=0)
|
20
|
-
|
21
|
-
# Fit a linear model to the reference spectrum
|
22
|
-
coeffs = np.polyfit(mean_spec, ref_spec, deg=1)
|
23
|
-
self.coeffs = coeffs
|
24
|
-
|
25
|
-
def transform(self, X):
|
26
|
-
# Divide the spectra by the linear model
|
27
|
-
X_emsc = X / np.polyval(self.coeffs, X.mean(axis=1))
|
28
|
-
return X_emsc
|
29
|
-
|
30
|
-
def fit_transform(self, X, ref_spec=None):
|
31
|
-
self.fit(X, ref_spec=ref_spec)
|
32
|
-
X_emsc = self.transform(X)
|
33
|
-
return X_emsc
|
@@ -1,123 +0,0 @@
|
|
1
|
-
import numpy as np
|
2
|
-
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
|
3
|
-
from sklearn.utils.validation import check_is_fitted
|
4
|
-
|
5
|
-
from chemotools.utils.check_inputs import check_input
|
6
|
-
|
7
|
-
|
8
|
-
class MultiplicativeScatterCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
|
9
|
-
"""Multiplicative scatter correction (MSC) is a preprocessing technique for
|
10
|
-
removing scatter effects from spectra. It is based on fitting a linear
|
11
|
-
regression model to the spectrum using a reference spectrum. The reference
|
12
|
-
spectrum is usually a mean or median spectrum of a set of spectra.
|
13
|
-
|
14
|
-
Parameters
|
15
|
-
----------
|
16
|
-
reference : np.ndarray, optional
|
17
|
-
The reference spectrum to use for the correction. If None, the mean
|
18
|
-
spectrum will be used. The default is None.
|
19
|
-
use_mean : bool, optional
|
20
|
-
Whether to use the mean spectrum as the reference. The default is True.
|
21
|
-
use_median : bool, optional
|
22
|
-
Whether to use the median spectrum as the reference. The default is False.
|
23
|
-
|
24
|
-
Attributes
|
25
|
-
----------
|
26
|
-
reference_ : np.ndarray
|
27
|
-
The reference spectrum used for the correction.
|
28
|
-
n_features_in_ : int
|
29
|
-
The number of features in the training data.
|
30
|
-
|
31
|
-
Raises
|
32
|
-
------
|
33
|
-
ValueError
|
34
|
-
If no reference is provided.
|
35
|
-
|
36
|
-
"""
|
37
|
-
def __init__(
|
38
|
-
self,
|
39
|
-
reference: np.ndarray = None,
|
40
|
-
use_mean: bool = True,
|
41
|
-
use_median: bool = False,
|
42
|
-
):
|
43
|
-
self.reference = reference
|
44
|
-
self.use_mean = use_mean
|
45
|
-
self.use_median = use_median
|
46
|
-
|
47
|
-
def fit(self, X: np.ndarray, y=None) -> "MultiplicativeScatterCorrection":
|
48
|
-
"""
|
49
|
-
Fit the transformer to the input data. If no reference is provided, the
|
50
|
-
mean or median spectrum will be calculated from the input data.
|
51
|
-
|
52
|
-
Parameters
|
53
|
-
----------
|
54
|
-
X : np.ndarray of shape (n_samples, n_features)
|
55
|
-
The input data to fit the transformer to.
|
56
|
-
|
57
|
-
y : None
|
58
|
-
Ignored.
|
59
|
-
|
60
|
-
Returns
|
61
|
-
-------
|
62
|
-
self : MultiplicativeScatterCorrection
|
63
|
-
The fitted transformer.
|
64
|
-
"""
|
65
|
-
# Check that X is a 2D array and has only finite values
|
66
|
-
X = check_input(X)
|
67
|
-
|
68
|
-
# Set the number of features
|
69
|
-
self.n_features_in_ = X.shape[1]
|
70
|
-
|
71
|
-
# Set the fitted attribute to True
|
72
|
-
self._is_fitted = True
|
73
|
-
|
74
|
-
# Set the reference
|
75
|
-
if self.reference is None and self.use_mean:
|
76
|
-
self.reference_ = X.mean(axis=0)
|
77
|
-
return self
|
78
|
-
|
79
|
-
if self.reference is None and self.use_median:
|
80
|
-
self.reference_ = np.median(X, axis=0)
|
81
|
-
return self
|
82
|
-
|
83
|
-
if self.reference is not None:
|
84
|
-
self.reference_ = self.reference.copy()
|
85
|
-
return self
|
86
|
-
|
87
|
-
raise ValueError("No reference was provided")
|
88
|
-
|
89
|
-
def transform(self, X: np.ndarray, y=None) -> np.ndarray:
|
90
|
-
"""
|
91
|
-
Transform the input data by applying the multiplicative scatter
|
92
|
-
correction.
|
93
|
-
|
94
|
-
Parameters
|
95
|
-
----------
|
96
|
-
X : np.ndarray of shape (n_samples, n_features)
|
97
|
-
The input data to transform.
|
98
|
-
|
99
|
-
y : None
|
100
|
-
Ignored.
|
101
|
-
|
102
|
-
Returns
|
103
|
-
-------
|
104
|
-
X_ : np.ndarray of shape (n_samples, n_features)
|
105
|
-
The transformed data.
|
106
|
-
"""
|
107
|
-
# Check that the estimator is fitted
|
108
|
-
check_is_fitted(self, "_is_fitted")
|
109
|
-
|
110
|
-
# Check that X is a 2D array and has only finite values
|
111
|
-
X = check_input(X)
|
112
|
-
X_ = X.copy()
|
113
|
-
|
114
|
-
# Calculate the multiplicative signal correction
|
115
|
-
ones = np.ones(X.shape[1])
|
116
|
-
for i, x in enumerate(X_):
|
117
|
-
X_[i] = self._calculate_multiplicative_correction(x, ones)
|
118
|
-
return X_.reshape(-1, 1) if X_.ndim == 1 else X_
|
119
|
-
|
120
|
-
def _calculate_multiplicative_correction(self, x, ones) -> np.ndarray:
|
121
|
-
A = np.vstack([self.reference_, ones]).T
|
122
|
-
m, c = np.linalg.lstsq(A, x, rcond=None)[0]
|
123
|
-
return (x - c) / m
|
chemotools/utils/check_inputs.py
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
from sklearn.utils.validation import check_array
|
2
|
-
|
3
|
-
|
4
|
-
def check_input(X, y=None):
|
5
|
-
# Check that X is a 2D array and has only finite values
|
6
|
-
X = check_array(X, ensure_2d=True, force_all_finite=True)
|
7
|
-
|
8
|
-
# Check that y is None or a 1D array of the same length as X
|
9
|
-
if y is not None:
|
10
|
-
y = y.reshape(-1, 1) if y.ndim == 1 else y
|
11
|
-
y = check_array(y, force_all_finite=True)
|
12
|
-
if len(y) != X.shape[0]:
|
13
|
-
raise ValueError("y must have the same number of samples as X")
|
14
|
-
return X
|
@@ -1 +0,0 @@
|
|
1
|
-
from .range_cut import RangeCut
|