chemotools 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chemotools/augmentation/__init__.py +4 -0
- chemotools/augmentation/_add_noise.py +70 -49
- chemotools/augmentation/_fractional_shift.py +203 -0
- chemotools/augmentation/_gaussian_broadening.py +136 -0
- chemotools/augmentation/_index_shift.py +116 -101
- chemotools/outliers/__init__.py +7 -0
- chemotools/outliers/_base.py +180 -0
- chemotools/outliers/_utils.py +91 -0
- chemotools/outliers/dmodx.py +146 -0
- chemotools/outliers/hotelling_t2.py +155 -0
- chemotools/outliers/leverage.py +150 -0
- chemotools/outliers/q_residuals.py +225 -0
- chemotools/outliers/studentized_residuals.py +197 -0
- {chemotools-0.1.8.dist-info → chemotools-0.1.10.dist-info}/METADATA +1 -1
- {chemotools-0.1.8.dist-info → chemotools-0.1.10.dist-info}/RECORD +17 -7
- {chemotools-0.1.8.dist-info → chemotools-0.1.10.dist-info}/WHEEL +1 -1
- {chemotools-0.1.8.dist-info → chemotools-0.1.10.dist-info}/LICENSE +0 -0
@@ -0,0 +1,197 @@
|
|
1
|
+
from typing import Optional, Union
|
2
|
+
import numpy as np
|
3
|
+
|
4
|
+
from sklearn.cross_decomposition._pls import _PLS
|
5
|
+
from sklearn.pipeline import Pipeline
|
6
|
+
from sklearn.utils.validation import validate_data, check_is_fitted
|
7
|
+
|
8
|
+
|
9
|
+
from ._base import _ModelResidualsBase, ModelTypes
|
10
|
+
from .leverage import calculate_leverage
|
11
|
+
|
12
|
+
|
13
|
+
class StudentizedResiduals(_ModelResidualsBase):
|
14
|
+
"""
|
15
|
+
Calculate the Studentized Residuals on a _PLS model preditions.
|
16
|
+
|
17
|
+
Parameters
|
18
|
+
----------
|
19
|
+
model : Union[ModelType, Pipeline]
|
20
|
+
A fitted _PLS model or Pipeline ending with such a model
|
21
|
+
|
22
|
+
Attributes
|
23
|
+
----------
|
24
|
+
model_ : ModelType
|
25
|
+
The fitted model of type _BasePCA or _PLS
|
26
|
+
|
27
|
+
preprocessing_ : Optional[Pipeline]
|
28
|
+
Preprocessing steps before the model
|
29
|
+
|
30
|
+
References
|
31
|
+
----------
|
32
|
+
|
33
|
+
"""
|
34
|
+
|
35
|
+
def __init__(self, model: Union[_PLS, Pipeline], confidence=0.95) -> None:
|
36
|
+
super().__init__(model, confidence)
|
37
|
+
|
38
|
+
def fit(self, X: np.ndarray, y: Optional[np.ndarray]) -> "StudentizedResiduals":
|
39
|
+
"""
|
40
|
+
Fit the model to the input data.
|
41
|
+
|
42
|
+
Parameters
|
43
|
+
----------
|
44
|
+
X : array-like of shape (n_samples, n_features)
|
45
|
+
Input data
|
46
|
+
|
47
|
+
y : array-like of shape (n_samples,)
|
48
|
+
Target data
|
49
|
+
"""
|
50
|
+
# Validate the input data
|
51
|
+
X = validate_data(
|
52
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
53
|
+
)
|
54
|
+
|
55
|
+
# Preprocess the data
|
56
|
+
if self.preprocessing_:
|
57
|
+
X = self.preprocessing_.transform(X)
|
58
|
+
|
59
|
+
# Calculate y residuals
|
60
|
+
y_residuals = y - self.model_.predict(X)
|
61
|
+
y_residuals = (
|
62
|
+
y_residuals.reshape(-1, 1) if len(y_residuals.shape) == 1 else y_residuals
|
63
|
+
)
|
64
|
+
|
65
|
+
# Calculate the studentized residuals
|
66
|
+
studentized_residuals = calculate_studentized_residuals(
|
67
|
+
self.model_, X, y_residuals
|
68
|
+
)
|
69
|
+
|
70
|
+
# Calculate the critical threshold
|
71
|
+
self.critical_value_ = self._calculate_critical_value(studentized_residuals)
|
72
|
+
|
73
|
+
return self
|
74
|
+
|
75
|
+
def predict(self, X: np.ndarray, y: Optional[np.ndarray]) -> np.ndarray:
|
76
|
+
"""Calculate studentized residuals in the model predictions. and return a boolean array indicating outliers.
|
77
|
+
|
78
|
+
Parameters
|
79
|
+
----------
|
80
|
+
X : array-like of shape (n_samples, n_features)
|
81
|
+
Input data
|
82
|
+
|
83
|
+
y : array-like of shape (n_samples,)
|
84
|
+
Target data
|
85
|
+
|
86
|
+
Returns
|
87
|
+
-------
|
88
|
+
ndarray of shape (n_samples,)
|
89
|
+
Studentized residuals of the predictions
|
90
|
+
"""
|
91
|
+
# Check the estimator has been fitted
|
92
|
+
check_is_fitted(self, ["critical_value_"])
|
93
|
+
|
94
|
+
# Validate the input data
|
95
|
+
X = validate_data(
|
96
|
+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
|
97
|
+
)
|
98
|
+
|
99
|
+
# Preprocess the data
|
100
|
+
if self.preprocessing_:
|
101
|
+
X = self.preprocessing_.transform(X)
|
102
|
+
|
103
|
+
# Calculate y residuals
|
104
|
+
y_residuals = y - self.model_.predict(X)
|
105
|
+
y_residuals = (
|
106
|
+
y_residuals.reshape(-1, 1) if len(y_residuals.shape) == 1 else y_residuals
|
107
|
+
)
|
108
|
+
|
109
|
+
# Calculate the studentized residuals
|
110
|
+
studentized_residuals = calculate_studentized_residuals(
|
111
|
+
self.model_, X, y_residuals
|
112
|
+
)
|
113
|
+
return np.where(studentized_residuals > self.critical_value_, -1, 1)
|
114
|
+
|
115
|
+
def predict_residuals(
|
116
|
+
self, X: np.ndarray, y: Optional[np.ndarray], validate: bool = True
|
117
|
+
) -> np.ndarray:
|
118
|
+
"""Calculate the studentized residuals of the model predictions.
|
119
|
+
|
120
|
+
Parameters
|
121
|
+
----------
|
122
|
+
X : array-like of shape (n_samples, n_features)
|
123
|
+
Input data
|
124
|
+
|
125
|
+
y : array-like of shape (n_samples,)
|
126
|
+
Target values
|
127
|
+
|
128
|
+
Returns
|
129
|
+
-------
|
130
|
+
ndarray of shape (n_samples,)
|
131
|
+
Studentized residuals of the model predictions
|
132
|
+
"""
|
133
|
+
# Check the estimator has been fitted
|
134
|
+
check_is_fitted(self, ["critical_value_"])
|
135
|
+
|
136
|
+
# Validate the input data
|
137
|
+
if validate:
|
138
|
+
X = validate_data(self, X, ensure_2d=True, dtype=np.float64)
|
139
|
+
|
140
|
+
# Apply preprocessing if available
|
141
|
+
if self.preprocessing_:
|
142
|
+
X = self.preprocessing_.transform(X)
|
143
|
+
|
144
|
+
# Calculate y residuals
|
145
|
+
y_residuals = y - self.model_.predict(X)
|
146
|
+
y_residuals = (
|
147
|
+
y_residuals.reshape(-1, 1) if len(y_residuals.shape) == 1 else y_residuals
|
148
|
+
)
|
149
|
+
|
150
|
+
return calculate_studentized_residuals(self.model_, X, y_residuals)
|
151
|
+
|
152
|
+
def _calculate_critical_value(self, X: Optional[np.ndarray]) -> float:
|
153
|
+
"""Calculate the critical value for outlier detection.
|
154
|
+
|
155
|
+
Parameters
|
156
|
+
----------
|
157
|
+
X : array-like of shape (n_samples,)
|
158
|
+
Studentized residuals
|
159
|
+
|
160
|
+
Returns
|
161
|
+
-------
|
162
|
+
float
|
163
|
+
The calculated critical value for outlier detection
|
164
|
+
"""
|
165
|
+
|
166
|
+
return np.percentile(X, self.confidence * 100) if X is not None else 0.0
|
167
|
+
|
168
|
+
|
169
|
+
def calculate_studentized_residuals(
|
170
|
+
model: ModelTypes, X: np.ndarray, y_residuals: np.ndarray
|
171
|
+
) -> np.ndarray:
|
172
|
+
"""Calculate the studentized residuals of the model predictions.
|
173
|
+
|
174
|
+
Parameters
|
175
|
+
----------
|
176
|
+
model : ModelTypes
|
177
|
+
A fitted model
|
178
|
+
|
179
|
+
X : array-like of shape (n_samples, n_features)
|
180
|
+
Input data
|
181
|
+
|
182
|
+
y : array-like of shape (n_samples,)
|
183
|
+
Target values
|
184
|
+
|
185
|
+
Returns
|
186
|
+
-------
|
187
|
+
ndarray of shape (n_samples,)
|
188
|
+
Studentized residuals of the model predictions
|
189
|
+
"""
|
190
|
+
|
191
|
+
# Calculate the leverage of the samples
|
192
|
+
leverage = calculate_leverage(model, X)
|
193
|
+
|
194
|
+
# Calculate the standard deviation of the residuals
|
195
|
+
std = np.sqrt(np.sum(y_residuals**2, axis=0) / (X.shape[0] - model.n_components))
|
196
|
+
|
197
|
+
return (y_residuals / (std * np.sqrt(1 - leverage.reshape(-1, 1)))).flatten()
|
@@ -1,8 +1,10 @@
|
|
1
1
|
chemotools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
chemotools/augmentation/__init__.py,sha256=
|
3
|
-
chemotools/augmentation/_add_noise.py,sha256=
|
2
|
+
chemotools/augmentation/__init__.py,sha256=ohlRHgRWTkvNpO3RikKtowzboqunQqx0WqtNccuWOHw,397
|
3
|
+
chemotools/augmentation/_add_noise.py,sha256=fkTJfIYtZXezcjy6Vz8asIhpBoVp4oaIifppK9vZpM8,4362
|
4
4
|
chemotools/augmentation/_baseline_shift.py,sha256=kIlYvmKS9pu9vh_-eZ7PSHPuH_58V9mgYbSJt6Gq3BA,3476
|
5
|
-
chemotools/augmentation/
|
5
|
+
chemotools/augmentation/_fractional_shift.py,sha256=dJ0Vuc-U02HhjKkOwc48qnOksZYgbHwL2ko7tWCZTQU,6916
|
6
|
+
chemotools/augmentation/_gaussian_broadening.py,sha256=dJsPlTKqpecKaCDU3vOvedIb-t_HyCkQprxNv0DmYZQ,4236
|
7
|
+
chemotools/augmentation/_index_shift.py,sha256=BTtadweDvvMtiF8t7ldwsE6Kl6FmKLCkVJjSzSWyIDs,6904
|
6
8
|
chemotools/augmentation/_spectrum_scale.py,sha256=hMsmzXpssbI7tGm_YnQn9wjbByso3CgVxd3Hs8kfLS8,3442
|
7
9
|
chemotools/baseline/__init__.py,sha256=VzoblGg8Hx_FkTc_n7a-ZjGvtKP8JE_NwJKWenGFQkM,584
|
8
10
|
chemotools/baseline/_air_pls.py,sha256=eotXuIEsus7Z-c17oLx8UbiwOHM7DzQJ6rruHnwCGPQ,5067
|
@@ -28,6 +30,14 @@ chemotools/derivative/_savitzky_golay.py,sha256=CuCrKoLmrB1YmJ4ihIykgkL3tO3frqkS
|
|
28
30
|
chemotools/feature_selection/__init__.py,sha256=1_i28hIxijjwhMypTy1w2fLbzXXVkKD5IYzzY8ZSuHw,117
|
29
31
|
chemotools/feature_selection/_index_selector.py,sha256=lNTP2b7P3doWl30KiAr3Xd2HOMxeUmj24MuqoXl4Voc,3556
|
30
32
|
chemotools/feature_selection/_range_cut.py,sha256=lVVVC30ZsK2z9jsDGb_z6l8Ty2I89yM05_dIDbMP73Q,3564
|
33
|
+
chemotools/outliers/__init__.py,sha256=wpdlyqU34n1Pb9kGCM4idhcok35WAakxEhzP0xeKaZw,272
|
34
|
+
chemotools/outliers/_base.py,sha256=zx9z_Snkvq5YWBoRi_-kRr3a-Q7jTz1zVlrGWycUTb4,5232
|
35
|
+
chemotools/outliers/_utils.py,sha256=SDrrDvgEVQyPuKdh0Rw0DD4a8LShbNAQLRwSLICtiYU,2720
|
36
|
+
chemotools/outliers/dmodx.py,sha256=R9LaQpUJeDv4GJ0hroKOlFcFbsfQRtrHWD_EI3-TX7Y,4521
|
37
|
+
chemotools/outliers/hotelling_t2.py,sha256=Ga1qmlurF_fps9igaTUGOrnUOctIJEYqoCdb468KhY4,5006
|
38
|
+
chemotools/outliers/leverage.py,sha256=zgxG2F7ZCf5wRVJeezHSJ2gaUDTP2CvK38Rr-hR6niA,4215
|
39
|
+
chemotools/outliers/q_residuals.py,sha256=6_h73A1YxHBcQtjAXOAp1Rb7egHJwj0DQ0MKdnj6aBQ,7647
|
40
|
+
chemotools/outliers/studentized_residuals.py,sha256=rF0wObKQV5DCa8THkZcuwdu7u4mBk-dbOHth5tj5cqM,5830
|
31
41
|
chemotools/scale/__init__.py,sha256=eztqcHg-TKE1Rr0N9ArfytHk8teuqVfi4SZi2DS96vc,175
|
32
42
|
chemotools/scale/_min_max_scaler.py,sha256=YvqRkV2pXu-viQrpjzWcp9KmSSCYSoubSnrZHRLqgKQ,3011
|
33
43
|
chemotools/scale/_norm_scaler.py,sha256=CHWSir2q-pL1hxzw_ZB45yi4mw-SkJ4YOa1CUL4nm2I,2568
|
@@ -43,7 +53,7 @@ chemotools/smooth/_median_filter.py,sha256=9ndTJCwrZirWlvDNldiigMddy79KIGq9OwwYN
|
|
43
53
|
chemotools/smooth/_savitzky_golay_filter.py,sha256=27iFUWxdL9_7oZabR0R5L0ZTpBmYfVUjx2XCTukihBE,3509
|
44
54
|
chemotools/smooth/_whittaker_smooth.py,sha256=lpLAyf4GdyDW4ulT1nyEoK6xQEl2cVUKquawQdGWbHU,3571
|
45
55
|
chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
46
|
-
chemotools-0.1.
|
47
|
-
chemotools-0.1.
|
48
|
-
chemotools-0.1.
|
49
|
-
chemotools-0.1.
|
56
|
+
chemotools-0.1.10.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
|
57
|
+
chemotools-0.1.10.dist-info/METADATA,sha256=fRgOO8cS2JNtNWz_CEG0uKvncSHEJ8myfhm2IOz3y-4,5240
|
58
|
+
chemotools-0.1.10.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
59
|
+
chemotools-0.1.10.dist-info/RECORD,,
|
File without changes
|