chemotools 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,197 @@
1
+ from typing import Optional, Union
2
+ import numpy as np
3
+
4
+ from sklearn.cross_decomposition._pls import _PLS
5
+ from sklearn.pipeline import Pipeline
6
+ from sklearn.utils.validation import validate_data, check_is_fitted
7
+
8
+
9
+ from ._base import _ModelResidualsBase, ModelTypes
10
+ from .leverage import calculate_leverage
11
+
12
+
13
+ class StudentizedResiduals(_ModelResidualsBase):
14
+ """
15
+ Calculate the Studentized Residuals on a _PLS model preditions.
16
+
17
+ Parameters
18
+ ----------
19
+ model : Union[ModelType, Pipeline]
20
+ A fitted _PLS model or Pipeline ending with such a model
21
+
22
+ Attributes
23
+ ----------
24
+ model_ : ModelType
25
+ The fitted model of type _BasePCA or _PLS
26
+
27
+ preprocessing_ : Optional[Pipeline]
28
+ Preprocessing steps before the model
29
+
30
+ References
31
+ ----------
32
+
33
+ """
34
+
35
+ def __init__(self, model: Union[_PLS, Pipeline], confidence=0.95) -> None:
36
+ super().__init__(model, confidence)
37
+
38
+ def fit(self, X: np.ndarray, y: Optional[np.ndarray]) -> "StudentizedResiduals":
39
+ """
40
+ Fit the model to the input data.
41
+
42
+ Parameters
43
+ ----------
44
+ X : array-like of shape (n_samples, n_features)
45
+ Input data
46
+
47
+ y : array-like of shape (n_samples,)
48
+ Target data
49
+ """
50
+ # Validate the input data
51
+ X = validate_data(
52
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
53
+ )
54
+
55
+ # Preprocess the data
56
+ if self.preprocessing_:
57
+ X = self.preprocessing_.transform(X)
58
+
59
+ # Calculate y residuals
60
+ y_residuals = y - self.model_.predict(X)
61
+ y_residuals = (
62
+ y_residuals.reshape(-1, 1) if len(y_residuals.shape) == 1 else y_residuals
63
+ )
64
+
65
+ # Calculate the studentized residuals
66
+ studentized_residuals = calculate_studentized_residuals(
67
+ self.model_, X, y_residuals
68
+ )
69
+
70
+ # Calculate the critical threshold
71
+ self.critical_value_ = self._calculate_critical_value(studentized_residuals)
72
+
73
+ return self
74
+
75
+ def predict(self, X: np.ndarray, y: Optional[np.ndarray]) -> np.ndarray:
76
+ """Calculate studentized residuals in the model predictions. and return a boolean array indicating outliers.
77
+
78
+ Parameters
79
+ ----------
80
+ X : array-like of shape (n_samples, n_features)
81
+ Input data
82
+
83
+ y : array-like of shape (n_samples,)
84
+ Target data
85
+
86
+ Returns
87
+ -------
88
+ ndarray of shape (n_samples,)
89
+ Studentized residuals of the predictions
90
+ """
91
+ # Check the estimator has been fitted
92
+ check_is_fitted(self, ["critical_value_"])
93
+
94
+ # Validate the input data
95
+ X = validate_data(
96
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
97
+ )
98
+
99
+ # Preprocess the data
100
+ if self.preprocessing_:
101
+ X = self.preprocessing_.transform(X)
102
+
103
+ # Calculate y residuals
104
+ y_residuals = y - self.model_.predict(X)
105
+ y_residuals = (
106
+ y_residuals.reshape(-1, 1) if len(y_residuals.shape) == 1 else y_residuals
107
+ )
108
+
109
+ # Calculate the studentized residuals
110
+ studentized_residuals = calculate_studentized_residuals(
111
+ self.model_, X, y_residuals
112
+ )
113
+ return np.where(studentized_residuals > self.critical_value_, -1, 1)
114
+
115
+ def predict_residuals(
116
+ self, X: np.ndarray, y: Optional[np.ndarray], validate: bool = True
117
+ ) -> np.ndarray:
118
+ """Calculate the studentized residuals of the model predictions.
119
+
120
+ Parameters
121
+ ----------
122
+ X : array-like of shape (n_samples, n_features)
123
+ Input data
124
+
125
+ y : array-like of shape (n_samples,)
126
+ Target values
127
+
128
+ Returns
129
+ -------
130
+ ndarray of shape (n_samples,)
131
+ Studentized residuals of the model predictions
132
+ """
133
+ # Check the estimator has been fitted
134
+ check_is_fitted(self, ["critical_value_"])
135
+
136
+ # Validate the input data
137
+ if validate:
138
+ X = validate_data(self, X, ensure_2d=True, dtype=np.float64)
139
+
140
+ # Apply preprocessing if available
141
+ if self.preprocessing_:
142
+ X = self.preprocessing_.transform(X)
143
+
144
+ # Calculate y residuals
145
+ y_residuals = y - self.model_.predict(X)
146
+ y_residuals = (
147
+ y_residuals.reshape(-1, 1) if len(y_residuals.shape) == 1 else y_residuals
148
+ )
149
+
150
+ return calculate_studentized_residuals(self.model_, X, y_residuals)
151
+
152
+ def _calculate_critical_value(self, X: Optional[np.ndarray]) -> float:
153
+ """Calculate the critical value for outlier detection.
154
+
155
+ Parameters
156
+ ----------
157
+ X : array-like of shape (n_samples,)
158
+ Studentized residuals
159
+
160
+ Returns
161
+ -------
162
+ float
163
+ The calculated critical value for outlier detection
164
+ """
165
+
166
+ return np.percentile(X, self.confidence * 100) if X is not None else 0.0
167
+
168
+
169
+ def calculate_studentized_residuals(
170
+ model: ModelTypes, X: np.ndarray, y_residuals: np.ndarray
171
+ ) -> np.ndarray:
172
+ """Calculate the studentized residuals of the model predictions.
173
+
174
+ Parameters
175
+ ----------
176
+ model : ModelTypes
177
+ A fitted model
178
+
179
+ X : array-like of shape (n_samples, n_features)
180
+ Input data
181
+
182
+ y : array-like of shape (n_samples,)
183
+ Target values
184
+
185
+ Returns
186
+ -------
187
+ ndarray of shape (n_samples,)
188
+ Studentized residuals of the model predictions
189
+ """
190
+
191
+ # Calculate the leverage of the samples
192
+ leverage = calculate_leverage(model, X)
193
+
194
+ # Calculate the standard deviation of the residuals
195
+ std = np.sqrt(np.sum(y_residuals**2, axis=0) / (X.shape[0] - model.n_components))
196
+
197
+ return (y_residuals / (std * np.sqrt(1 - leverage.reshape(-1, 1)))).flatten()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: chemotools
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
5
5
  License: MIT
6
6
  Author: Pau Cabaneros
@@ -1,8 +1,10 @@
1
1
  chemotools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- chemotools/augmentation/__init__.py,sha256=_DiyO7M0xztix8Ea_esxe0xjEYHTneJVJZ52bu5WFpg,248
3
- chemotools/augmentation/_add_noise.py,sha256=4SQFiU9Snl0Dz5EfvRjimpndlNGdXxW2ya3YplHL2fg,3502
2
+ chemotools/augmentation/__init__.py,sha256=ohlRHgRWTkvNpO3RikKtowzboqunQqx0WqtNccuWOHw,397
3
+ chemotools/augmentation/_add_noise.py,sha256=fkTJfIYtZXezcjy6Vz8asIhpBoVp4oaIifppK9vZpM8,4362
4
4
  chemotools/augmentation/_baseline_shift.py,sha256=kIlYvmKS9pu9vh_-eZ7PSHPuH_58V9mgYbSJt6Gq3BA,3476
5
- chemotools/augmentation/_index_shift.py,sha256=w1maDHGLAKSiGAQ8c9yYHofs_PJnxeN0nB1RU-pINcE,6042
5
+ chemotools/augmentation/_fractional_shift.py,sha256=dJ0Vuc-U02HhjKkOwc48qnOksZYgbHwL2ko7tWCZTQU,6916
6
+ chemotools/augmentation/_gaussian_broadening.py,sha256=dJsPlTKqpecKaCDU3vOvedIb-t_HyCkQprxNv0DmYZQ,4236
7
+ chemotools/augmentation/_index_shift.py,sha256=BTtadweDvvMtiF8t7ldwsE6Kl6FmKLCkVJjSzSWyIDs,6904
6
8
  chemotools/augmentation/_spectrum_scale.py,sha256=hMsmzXpssbI7tGm_YnQn9wjbByso3CgVxd3Hs8kfLS8,3442
7
9
  chemotools/baseline/__init__.py,sha256=VzoblGg8Hx_FkTc_n7a-ZjGvtKP8JE_NwJKWenGFQkM,584
8
10
  chemotools/baseline/_air_pls.py,sha256=eotXuIEsus7Z-c17oLx8UbiwOHM7DzQJ6rruHnwCGPQ,5067
@@ -28,6 +30,14 @@ chemotools/derivative/_savitzky_golay.py,sha256=CuCrKoLmrB1YmJ4ihIykgkL3tO3frqkS
28
30
  chemotools/feature_selection/__init__.py,sha256=1_i28hIxijjwhMypTy1w2fLbzXXVkKD5IYzzY8ZSuHw,117
29
31
  chemotools/feature_selection/_index_selector.py,sha256=lNTP2b7P3doWl30KiAr3Xd2HOMxeUmj24MuqoXl4Voc,3556
30
32
  chemotools/feature_selection/_range_cut.py,sha256=lVVVC30ZsK2z9jsDGb_z6l8Ty2I89yM05_dIDbMP73Q,3564
33
+ chemotools/outliers/__init__.py,sha256=wpdlyqU34n1Pb9kGCM4idhcok35WAakxEhzP0xeKaZw,272
34
+ chemotools/outliers/_base.py,sha256=zx9z_Snkvq5YWBoRi_-kRr3a-Q7jTz1zVlrGWycUTb4,5232
35
+ chemotools/outliers/_utils.py,sha256=SDrrDvgEVQyPuKdh0Rw0DD4a8LShbNAQLRwSLICtiYU,2720
36
+ chemotools/outliers/dmodx.py,sha256=R9LaQpUJeDv4GJ0hroKOlFcFbsfQRtrHWD_EI3-TX7Y,4521
37
+ chemotools/outliers/hotelling_t2.py,sha256=Ga1qmlurF_fps9igaTUGOrnUOctIJEYqoCdb468KhY4,5006
38
+ chemotools/outliers/leverage.py,sha256=zgxG2F7ZCf5wRVJeezHSJ2gaUDTP2CvK38Rr-hR6niA,4215
39
+ chemotools/outliers/q_residuals.py,sha256=6_h73A1YxHBcQtjAXOAp1Rb7egHJwj0DQ0MKdnj6aBQ,7647
40
+ chemotools/outliers/studentized_residuals.py,sha256=rF0wObKQV5DCa8THkZcuwdu7u4mBk-dbOHth5tj5cqM,5830
31
41
  chemotools/scale/__init__.py,sha256=eztqcHg-TKE1Rr0N9ArfytHk8teuqVfi4SZi2DS96vc,175
32
42
  chemotools/scale/_min_max_scaler.py,sha256=YvqRkV2pXu-viQrpjzWcp9KmSSCYSoubSnrZHRLqgKQ,3011
33
43
  chemotools/scale/_norm_scaler.py,sha256=CHWSir2q-pL1hxzw_ZB45yi4mw-SkJ4YOa1CUL4nm2I,2568
@@ -43,7 +53,7 @@ chemotools/smooth/_median_filter.py,sha256=9ndTJCwrZirWlvDNldiigMddy79KIGq9OwwYN
43
53
  chemotools/smooth/_savitzky_golay_filter.py,sha256=27iFUWxdL9_7oZabR0R5L0ZTpBmYfVUjx2XCTukihBE,3509
44
54
  chemotools/smooth/_whittaker_smooth.py,sha256=lpLAyf4GdyDW4ulT1nyEoK6xQEl2cVUKquawQdGWbHU,3571
45
55
  chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
- chemotools-0.1.8.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
47
- chemotools-0.1.8.dist-info/METADATA,sha256=gK71zOTZyaFxCqjxXGGKfQi4TvN43AXhBIaWdMWVJh4,5239
48
- chemotools-0.1.8.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
49
- chemotools-0.1.8.dist-info/RECORD,,
56
+ chemotools-0.1.10.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
57
+ chemotools-0.1.10.dist-info/METADATA,sha256=fRgOO8cS2JNtNWz_CEG0uKvncSHEJ8myfhm2IOz3y-4,5240
58
+ chemotools-0.1.10.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
59
+ chemotools-0.1.10.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.0.1
2
+ Generator: poetry-core 2.1.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any