chemotools 0.1.9__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  from ._add_noise import AddNoise
2
2
  from ._baseline_shift import BaselineShift
3
3
  from ._fractional_shift import FractionalShift
4
+ from ._gaussian_broadening import GaussianBroadening
4
5
  from ._index_shift import IndexShift
5
6
  from ._spectrum_scale import SpectrumScale
6
7
 
@@ -9,6 +10,7 @@ __all__ = [
9
10
  "AddNoise",
10
11
  "BaselineShift",
11
12
  "FractionalShift",
13
+ "GaussianBroadening",
12
14
  "IndexShift",
13
15
  "SpectrumScale",
14
16
  ]
@@ -0,0 +1,136 @@
1
+ from typing import Literal, Optional
2
+ import numpy as np
3
+ from scipy.ndimage import gaussian_filter1d
4
+ from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
+ from sklearn.utils.validation import check_is_fitted, validate_data
6
+
7
+
8
+ class GaussianBroadening(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
9
+ """
10
+ Transform spectral data by broadening peaks using Gaussian convolution.
11
+
12
+ This transformer applies Gaussian smoothing to broaden peaks in spectral data.
13
+ For each signal, a random sigma is chosen between 0 and the specified sigma value.
14
+
15
+ Parameters
16
+ ----------
17
+ sigma : float, default=1.0
18
+ Maximum standard deviation for the Gaussian kernel.
19
+ The actual sigma used will be randomly chosen between 0 and this value.
20
+
21
+ mode : {'reflect', 'constant', 'nearest', 'mirror', 'wrap'}, default='reflect'
22
+ The mode parameter determines how the input array is extended when
23
+ the filter overlaps a border. Default is 'reflect'.
24
+
25
+ pad_value : float, default=0.0
26
+ Value to fill past edges of input if mode is 'constant'.
27
+
28
+ random_state : int, optional, default=None
29
+ Random state for reproducible sigma selection.
30
+
31
+ truncate : float, default=4.0
32
+ Truncate the filter at this many standard deviations.
33
+ Larger values increase computation time but improve accuracy.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ sigma: float = 1.0,
39
+ mode: Literal["reflect", "constant", "nearest", "mirror", "wrap"] = "reflect",
40
+ pad_value: float = 0.0,
41
+ random_state: Optional[int] = None,
42
+ truncate: float = 4.0,
43
+ ):
44
+ self.sigma = sigma
45
+ self.mode = mode
46
+ self.pad_value = pad_value
47
+ self.random_state = random_state
48
+ self.truncate = truncate
49
+
50
+ def fit(self, X: np.ndarray, y=None) -> "GaussianBroadening":
51
+ """
52
+ Fit the transformer to the data (in this case, only validates input).
53
+
54
+ Parameters
55
+ ----------
56
+ X : array-like of shape (n_samples, n_features)
57
+ Input data to validate.
58
+
59
+ y : None
60
+ Ignored.
61
+
62
+ Returns
63
+ -------
64
+ self : GaussianBroadening
65
+ The fitted transformer.
66
+ """
67
+ X = validate_data(
68
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
69
+ )
70
+
71
+ # Validate sigma parameter
72
+ if not isinstance(self.sigma, (int, float)):
73
+ raise ValueError("sigma must be a number")
74
+ if self.sigma < 0:
75
+ raise ValueError("sigma must be non-negative")
76
+
77
+ # Initialize random number generator
78
+ self._rng = np.random.default_rng(self.random_state)
79
+
80
+ return self
81
+
82
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
83
+ """
84
+ Apply Gaussian broadening to the input data.
85
+
86
+ Parameters
87
+ ----------
88
+ X : array-like of shape (n_samples, n_features)
89
+ The data to transform.
90
+
91
+ y : None
92
+ Ignored.
93
+
94
+ Returns
95
+ -------
96
+ X_transformed : ndarray of shape (n_samples, n_features)
97
+ The transformed data with broadened peaks.
98
+ """
99
+ check_is_fitted(self, "n_features_in_")
100
+ X_ = validate_data(
101
+ self,
102
+ X,
103
+ y="no_validation",
104
+ ensure_2d=True,
105
+ copy=True,
106
+ reset=False,
107
+ dtype=np.float64,
108
+ )
109
+
110
+ # Transform each sample
111
+ for i, x in enumerate(X_):
112
+ X_[i] = self._broaden_signal(x)
113
+
114
+ return X_
115
+
116
+ def _broaden_signal(self, x: np.ndarray) -> np.ndarray:
117
+ """
118
+ Apply Gaussian broadening to a single signal.
119
+
120
+ Parameters
121
+ ----------
122
+ x : ndarray of shape (n_features,)
123
+ The input signal to broaden.
124
+
125
+ Returns
126
+ -------
127
+ broadened_signal : ndarray of shape (n_features,)
128
+ The broadened signal.
129
+ """
130
+ # Randomly choose sigma between 0 and max sigma
131
+ sigma = self._rng.uniform(0, self.sigma)
132
+
133
+ # Apply Gaussian filter
134
+ return gaussian_filter1d(
135
+ x, sigma=sigma, mode=self.mode, cval=self.pad_value, truncate=self.truncate
136
+ )
@@ -0,0 +1,7 @@
1
+ from .dmodx import DModX
2
+ from .hotelling_t2 import HotellingT2
3
+ from .q_residuals import QResiduals
4
+ from .leverage import Leverage
5
+ from .studentized_residuals import StudentizedResiduals
6
+
7
+ __all__ = ["DModX", "HotellingT2", "QResiduals", "Leverage", "StudentizedResiduals"]
@@ -0,0 +1,180 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Union, Optional
3
+
4
+ import numpy as np
5
+
6
+ from sklearn.base import BaseEstimator, OutlierMixin
7
+ from sklearn.decomposition._base import _BasePCA
8
+ from sklearn.cross_decomposition._pls import _PLS
9
+ from sklearn.pipeline import Pipeline
10
+ from sklearn.utils.validation import check_is_fitted
11
+
12
+ from ._utils import validate_confidence, validate_and_extract_model
13
+
14
+ ModelTypes = Union[_BasePCA, _PLS]
15
+
16
+
17
+ class _ModelResidualsBase(ABC, BaseEstimator, OutlierMixin):
18
+ """Base class for model outlier calculations.
19
+
20
+ Implements statistical calculations for outlier detection in dimensionality
21
+ reduction models like PCA and PLS.
22
+
23
+ Parameters
24
+ ----------
25
+ model : Union[ModelTypes, Pipeline]
26
+ A fitted _BasePCA or _PLS models or Pipeline ending with such a model
27
+ confidence : float
28
+ Confidence level for statistical calculations (between 0 and 1)
29
+
30
+ Attributes
31
+ ----------
32
+ model_ : ModelTypes
33
+ The fitted model of type _BasePCA or _PLS
34
+
35
+ preprocessing_ : Optional[Pipeline]
36
+ Preprocessing steps before the model
37
+
38
+ n_features_in_ : int
39
+ Number of features in the input data
40
+
41
+ n_components_ : int
42
+ Number of components in the model
43
+
44
+ n_samples_ : int
45
+ Number of samples used to train the model
46
+
47
+ critical_value_ : float
48
+ The calculated critical value for outlier detection
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ model: Union[ModelTypes, Pipeline],
54
+ confidence: float,
55
+ ) -> None:
56
+ (
57
+ self.model_,
58
+ self.preprocessing_,
59
+ self.n_features_in_,
60
+ self.n_components_,
61
+ self.n_samples_,
62
+ ) = validate_and_extract_model(model)
63
+ self.confidence = validate_confidence(confidence)
64
+
65
+ def fit_predict_residuals(
66
+ self, X: np.ndarray, y: Optional[np.ndarray] = None
67
+ ) -> np.ndarray:
68
+ """Fit the model to the input data and calculate the residuals.
69
+
70
+ Parameters
71
+ ----------
72
+ X : array-like of shape (n_samples, n_features)
73
+ Input data
74
+
75
+ y : array-like of shape (n_samples,), default=None
76
+ Target values
77
+
78
+ Returns
79
+ -------
80
+ ndarray of shape (n_samples,)
81
+ The residuals of the model
82
+ """
83
+ self.fit(X, y)
84
+ return self.predict_residuals(X, y, validate=True)
85
+
86
+ @abstractmethod
87
+ def predict_residuals(
88
+ self, X: np.ndarray, y: Optional[np.ndarray], validate: bool
89
+ ) -> np.ndarray:
90
+ """Calculate the residuals of the model.
91
+
92
+ Returns
93
+ -------
94
+ ndarray of shape (n_samples,)
95
+ The residuals of the model
96
+ """
97
+
98
+ @abstractmethod
99
+ def _calculate_critical_value(self, X: Optional[np.ndarray]) -> float:
100
+ """Calculate the critical value for outlier detection.
101
+
102
+ Returns
103
+ -------
104
+ float
105
+ The calculated critical value for outlier detection
106
+ """
107
+
108
+
109
+ class _ModelDiagnosticsBase(ABC):
110
+ """Base class for model diagnostics methods. This does not implement outlier detection algorithms,
111
+ but rather implements methods that are used to assess trained models.
112
+
113
+ Parameters
114
+ ----------
115
+ model : Union[ModelTypes, Pipeline]
116
+ A fitted PCA/PLS model or Pipeline ending with such a model
117
+
118
+ Attributes
119
+ ----------
120
+ model_ : ModelTypes
121
+ The fitted model of type _BasePCA or _PLS
122
+
123
+ preprocessing_ : Optional[Pipeline]
124
+ Preprocessing steps before the model
125
+
126
+ """
127
+
128
+ def __init__(self, model: Union[ModelTypes, Pipeline]):
129
+ self.model_, self.preprocessing_ = self._validate_and_extract_model(model)
130
+
131
+ def _validate_and_extract_model(self, model):
132
+ """Validate and extract the model and preprocessing steps.
133
+
134
+ Parameters
135
+ ----------
136
+ model : Union[ModelTypes, Pipeline]
137
+ A fitted PCA/PLS model or Pipeline ending with such a model
138
+
139
+ Returns
140
+ -------
141
+ Tuple[ModelTypes, Optional[Pipeline]]
142
+ The extracted model and preprocessing steps
143
+
144
+ Raises
145
+ ------
146
+ ValueError
147
+ If the model is not of type _BasePCA or _PLS or a Pipeline ending with one of these types or if the model is not fitted
148
+ """
149
+ if isinstance(model, Pipeline):
150
+ preprocessing = model[:-1]
151
+ model = model[-1]
152
+ else:
153
+ preprocessing = None
154
+
155
+ if isinstance(model, (_BasePCA, _PLS)):
156
+ check_is_fitted(model)
157
+ else:
158
+ raise ValueError(
159
+ "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
160
+ )
161
+ check_is_fitted(model)
162
+ return model, preprocessing
163
+
164
+ @abstractmethod
165
+ def predict(self, X: np.ndarray, y: Optional[np.ndarray]) -> np.ndarray:
166
+ """Predict the output of the model.
167
+
168
+ Parameters
169
+ ----------
170
+ X : array-like of shape (n_samples, n_features)
171
+ Input data
172
+
173
+ y : array-like of shape (n_samples,), default=None
174
+ Target values
175
+
176
+ Returns
177
+ -------
178
+ ndarray of shape (n_samples,)
179
+ Predicted values
180
+ """
@@ -0,0 +1,91 @@
1
+ from typing import Optional, Tuple, Union
2
+
3
+ from sklearn.cross_decomposition._pls import _PLS
4
+ from sklearn.decomposition._base import _BasePCA
5
+ from sklearn.pipeline import Pipeline
6
+ from sklearn.utils.validation import check_is_fitted
7
+
8
+ ModelTypes = Union[_BasePCA, _PLS]
9
+
10
+
11
+ def get_model_parameters(model: ModelTypes) -> Tuple[int, int, int]:
12
+ """
13
+ Get the number of features, components and samples from a model with PLS or PCA. types.
14
+
15
+ Parameters
16
+ ----------
17
+ model : ModelType
18
+ A fitted model of type _BasePCA or _PLS
19
+
20
+ Returns
21
+ -------
22
+ Tuple[int, int, int]
23
+ The number of features, components and samples in the model
24
+ """
25
+ if isinstance(model, _BasePCA):
26
+ return model.n_features_in_, model.n_components_, model.n_samples_
27
+ elif isinstance(model, _PLS):
28
+ return model.n_features_in_, model.n_components, len(model.x_scores_)
29
+ else:
30
+ raise ValueError(
31
+ "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
32
+ )
33
+
34
+
35
+ def validate_confidence(confidence: float) -> float:
36
+ """Validate parameters using sklearn conventions.
37
+
38
+ Parameters
39
+ ----------
40
+ confidence : float
41
+ Confidence level for statistical calculations (between 0 and 1)
42
+
43
+ Returns
44
+ -------
45
+ float
46
+ The validated confidence level
47
+
48
+ Raises
49
+ ------
50
+ ValueError
51
+ If confidence is not between 0 and 1
52
+ """
53
+ if not 0 < confidence < 1:
54
+ raise ValueError("Confidence must be between 0 and 1")
55
+ return confidence
56
+
57
+
58
+ def validate_and_extract_model(
59
+ model: Union[ModelTypes, Pipeline],
60
+ ) -> Tuple[ModelTypes, Optional[Pipeline], int, int, int]:
61
+ """Validate and extract the model and preprocessing steps.
62
+
63
+ Parameters
64
+ ----------
65
+ model : Union[ModelTypes, Pipeline]
66
+ A fitted PCA/PLS model or Pipeline ending with such a model
67
+
68
+ Returns
69
+ -------
70
+ Tuple[ModelTypes, Optional[Pipeline]]
71
+ The extracted model and preprocessing steps
72
+
73
+ Raises
74
+ ------
75
+ ValueError
76
+ If the model is not of type _BasePCA or _PLS or a Pipeline ending with one of these types or if the model is not fitted
77
+ """
78
+ if isinstance(model, Pipeline):
79
+ preprocessing = model[:-1]
80
+ model = model[-1]
81
+ else:
82
+ preprocessing = None
83
+
84
+ if not isinstance(model, (_BasePCA, _PLS)):
85
+ raise ValueError(
86
+ "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
87
+ )
88
+
89
+ check_is_fitted(model)
90
+ n_features_in, n_components, n_samples = get_model_parameters(model)
91
+ return model, preprocessing, n_features_in, n_components, n_samples
@@ -0,0 +1,146 @@
1
+ from typing import Optional, Union
2
+ import numpy as np
3
+
4
+ from sklearn.pipeline import Pipeline
5
+ from sklearn.utils.validation import validate_data, check_is_fitted
6
+ from scipy.stats import f as f_distribution
7
+
8
+
9
+ from ._base import _ModelResidualsBase, ModelTypes
10
+
11
+
12
+ class DModX(_ModelResidualsBase):
13
+ """Calculate Distance to Model (DModX) statistics.
14
+
15
+ DModX measures the distance between an observation and the model plane
16
+ in the X-space, useful for detecting outliers.
17
+
18
+ Parameters
19
+ ----------
20
+ model : Union[ModelType, Pipeline]
21
+ A fitted PCA/PLS model or Pipeline ending with such a model
22
+
23
+ confidence : float, default=0.95
24
+ Confidence level for statistical calculations (between 0 and 1)
25
+
26
+ Attributes
27
+ ----------
28
+ model_ : ModelType
29
+ The fitted model of type _BasePCA or _PLS
30
+
31
+ preprocessing_ : Optional[Pipeline]
32
+ Preprocessing steps before the model
33
+
34
+ n_features_in_ : int
35
+ Number of features in the input data
36
+
37
+ n_components_ : int
38
+ Number of components in the model
39
+
40
+ n_samples_ : int
41
+ Number of samples used to train the model
42
+
43
+ critical_value_ : float
44
+ The calculated critical value for outlier detection
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ model: Union[ModelTypes, Pipeline],
50
+ confidence: float = 0.95,
51
+ ) -> None:
52
+ super().__init__(model, confidence)
53
+
54
+ def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "DModX":
55
+ """
56
+ Fit the model to the input data.
57
+
58
+ This step calculates the critical value for the outlier detection. In the DmodX method,
59
+ the critical value is not depend on the input data but on the model parameters.
60
+ """
61
+ X = validate_data(
62
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
63
+ )
64
+
65
+ self.critical_value_ = self._calculate_critical_value()
66
+ return self
67
+
68
+ def predict(self, X: np.ndarray) -> np.ndarray:
69
+ """Identify outliers in the input data.
70
+
71
+ Parameters
72
+ ----------
73
+ X : array-like of shape (n_samples, n_features)
74
+ Input data
75
+
76
+ Returns
77
+ -------
78
+ ndarray of shape (n_samples,)
79
+ Boolean array indicating outliers
80
+ """
81
+ # Check the estimator has been fitted
82
+ check_is_fitted(self, ["critical_value_"])
83
+
84
+ # Validate the input data
85
+ X = validate_data(
86
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
87
+ )
88
+
89
+ # Calculate outliers based on the DModX statistics
90
+ dmodx_values = self.predict_residuals(X, validate=False)
91
+ return np.where(dmodx_values > self.critical_value_, -1, 1)
92
+
93
+ def predict_residuals(
94
+ self, X: np.ndarray, y: Optional[np.ndarray] = None, validate: bool = True
95
+ ) -> np.ndarray:
96
+ """Calculate DModX statistics for input data.
97
+
98
+ Parameters
99
+ ----------
100
+ X : array-like of shape (n_samples, n_features)
101
+ Input data
102
+
103
+ validate : bool, default=True
104
+ Whether to validate the input data
105
+
106
+ Returns
107
+ -------
108
+ ndarray of shape (n_samples,)
109
+ DModX statistics for each sample
110
+ """
111
+ # Check the estimator has been fitted
112
+ check_is_fitted(self, ["critical_value_"])
113
+
114
+ # Validate the input data
115
+ if validate:
116
+ X = validate_data(
117
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
118
+ )
119
+
120
+ # Apply preprocessing if available
121
+ if self.preprocessing_:
122
+ X = self.preprocessing_.transform(X)
123
+
124
+ # Calculate the DModX statistics
125
+ X_transformed = self.model_.transform(X)
126
+ X_reconstructed = self.model_.inverse_transform(X_transformed)
127
+ squared_errors = np.sum((X - X_reconstructed) ** 2, axis=1)
128
+
129
+ return np.sqrt(squared_errors / (self.n_features_in_ - self.n_components_))
130
+
131
+ def _calculate_critical_value(self, X: Optional[np.ndarray] = None) -> float:
132
+ """Calculate F-distribution based critical value.
133
+
134
+ Returns
135
+ -------
136
+ float
137
+ The critical value for outlier detection
138
+ """
139
+
140
+ dof_numerator = self.n_features_in_ - self.n_components_
141
+ dof_denominator = self.n_features_in_ - self.n_components_ - 1
142
+
143
+ upper_control_limit = f_distribution.ppf(
144
+ self.confidence, dof_numerator, dof_denominator
145
+ )
146
+ return np.sqrt(upper_control_limit)
@@ -0,0 +1,155 @@
1
+ from typing import Optional, Union
2
+ import numpy as np
3
+
4
+ from sklearn.cross_decomposition._pls import _PLS
5
+ from sklearn.decomposition._base import _BasePCA
6
+ from sklearn.pipeline import Pipeline
7
+ from sklearn.utils.validation import validate_data, check_is_fitted
8
+ from scipy.stats import f as f_distribution
9
+
10
+ from ._base import _ModelResidualsBase, ModelTypes
11
+
12
+
13
+ class HotellingT2(_ModelResidualsBase):
14
+ """
15
+ Calculate Hotelling's T-squared statistics for PCA or PLS like models.
16
+
17
+ Parameters
18
+ ----------
19
+ model : Union[ModelType, Pipeline]
20
+ A fitted PCA/PLS model or Pipeline ending with such a model
21
+
22
+ confidence : float, default=0.95
23
+ Confidence level for statistical calculations (between 0 and 1)
24
+
25
+ Attributes
26
+ ----------
27
+ model_ : ModelType
28
+ The fitted model of type _BasePCA or _PLS
29
+
30
+ preprocessing_ : Optional[Pipeline]
31
+ Preprocessing steps before the model
32
+
33
+ n_features_in_ : int
34
+ Number of features in the input data
35
+
36
+ n_components_ : int
37
+ Number of components in the model
38
+
39
+ n_samples_ : int
40
+ Number of samples used to train the model
41
+
42
+ critical_value_ : float
43
+ The calculated critical value for outlier detection
44
+
45
+ References
46
+ ----------
47
+ Johan A. Westerhuis, Stephen P. Gurden, Age K. Smilde (2001) Generalized contribution plots in multivariate statistical process
48
+ monitoring Chemometrics and Intelligent Laboratory Systems 51 2000 95–114
49
+ """
50
+
51
+ def __init__(
52
+ self, model: Union[ModelTypes, Pipeline], confidence: float = 0.95
53
+ ) -> None:
54
+ super().__init__(model, confidence)
55
+
56
+ def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "HotellingT2":
57
+ """
58
+ Fit the model to the input data.
59
+
60
+ This step calculates the critical value for the outlier detection. In the DmodX method,
61
+ the critical value is not depend on the input data but on the model parameters.
62
+ """
63
+ X = validate_data(
64
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
65
+ )
66
+
67
+ self.critical_value_ = self._calculate_critical_value()
68
+ return self
69
+
70
+ def predict(self, X: np.ndarray) -> np.ndarray:
71
+ """Identify outliers in the input data.
72
+
73
+ Parameters
74
+ ----------
75
+ X : array-like of shape (n_samples, n_features)
76
+ Input data
77
+
78
+ Returns
79
+ -------
80
+ ndarray of shape (n_samples,)
81
+ Boolean array indicating outliers
82
+ """
83
+ # Check the estimator has been fitted
84
+ check_is_fitted(self, ["critical_value_"])
85
+
86
+ # Validate the input data
87
+ X = validate_data(
88
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
89
+ )
90
+
91
+ # Calculate the Hotelling's T-squared statistics
92
+ hotelling_t2_values = self.predict_residuals(X, y=None, validate=False)
93
+ return np.where(hotelling_t2_values > self.critical_value_, -1, 1)
94
+
95
+ def predict_residuals(
96
+ self, X: np.ndarray, y: Optional[np.ndarray], validate: bool = True
97
+ ) -> np.ndarray:
98
+ """Calculate Hotelling's T-squared statistics for input data.
99
+
100
+ Parameters
101
+ ----------
102
+ X : array-like of shape (n_samples, n_features)
103
+ Input data
104
+
105
+ Returns
106
+ -------
107
+ ndarray of shape (n_samples,)
108
+ Hotellin's T-squared statistics for each sample
109
+ """
110
+ # Check the estimator has been fitted
111
+ check_is_fitted(self, ["critical_value_"])
112
+
113
+ # Validate the input data
114
+ if validate:
115
+ X = validate_data(
116
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
117
+ )
118
+
119
+ # Apply preprocessing steps
120
+ if self.preprocessing_:
121
+ X = self.preprocessing_.transform(X)
122
+
123
+ # Calculate the Hotelling's T-squared statistics
124
+ if isinstance(self.model_, _BasePCA):
125
+ # For PCA-like models
126
+ variances = self.model_.explained_variance_
127
+
128
+ if isinstance(self.model_, _PLS):
129
+ # For PLS-like models
130
+ variances = np.var(self.model_.x_scores_, axis=0)
131
+
132
+ # Equivalent to X @ model.components_.T for _BasePCA and X @ model.x_rotations_ for _PLS
133
+ X_transformed = self.model_.transform(X)
134
+
135
+ return np.sum((X_transformed**2) / variances, axis=1)
136
+
137
+ def _calculate_critical_value(self, X: Optional[np.ndarray] = None) -> float:
138
+ """
139
+ Calculate the critical value for the Hotelling's T-squared statistics.
140
+
141
+ Returns
142
+ -------
143
+ float
144
+ The critical value for the Hotelling's T-squared statistics
145
+ """
146
+
147
+ critical_value = f_distribution.ppf(
148
+ self.confidence, self.n_components_, self.n_samples_ - self.n_components_
149
+ )
150
+ return (
151
+ critical_value
152
+ * self.n_components_
153
+ * (self.n_samples_ - 1)
154
+ / (self.n_samples_ - self.n_components_)
155
+ )
@@ -0,0 +1,150 @@
1
+ from typing import Optional, Union
2
+ import numpy as np
3
+
4
+ from sklearn.pipeline import Pipeline
5
+ from sklearn.utils.validation import validate_data, check_is_fitted
6
+
7
+
8
+ from ._base import _ModelResidualsBase, ModelTypes
9
+
10
+
11
+ class Leverage(_ModelResidualsBase):
12
+ """
13
+ Calculate the leverage of the training samples on the latent space of a PCA or PLS models.
14
+ This method allows to detect datapoints with high leverage in the model.
15
+
16
+ Parameters
17
+ ----------
18
+ model : Union[ModelType, Pipeline]
19
+ A fitted PCA/PLS model or Pipeline ending with such a model
20
+
21
+ Attributes
22
+ ----------
23
+ model_ : ModelType
24
+ The fitted model of type _BasePCA or _PLS
25
+
26
+ preprocessing_ : Optional[Pipeline]
27
+ Preprocessing steps before the model
28
+
29
+ References
30
+ ----------
31
+
32
+ """
33
+
34
+ def __init__(
35
+ self, model: Union[ModelTypes, Pipeline], confidence: float = 0.95
36
+ ) -> None:
37
+ super().__init__(model, confidence)
38
+
39
+ def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "Leverage":
40
+ """
41
+ Fit the model to the input data.
42
+
43
+ Parameters
44
+
45
+ """
46
+ X = validate_data(
47
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
48
+ )
49
+
50
+ if self.preprocessing_:
51
+ X = self.preprocessing_.fit_transform(X)
52
+
53
+ # Compute the critical threshold
54
+ self.critical_value_ = self._calculate_critical_value(X)
55
+
56
+ return self
57
+
58
+ def predict(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray:
59
+ """Calculate Leverage for training data on the model.
60
+
61
+ Parameters
62
+ ----------
63
+ X : array-like of shape (n_samples, n_features)
64
+ Input data
65
+
66
+ Returns
67
+ -------
68
+ ndarray of shape (n_samples,)
69
+ Bool with samples with a leverage above the critical value
70
+ """
71
+ # Check the estimator has been fitted
72
+ check_is_fitted(self, ["critical_value_"])
73
+
74
+ # Validate the input data
75
+ X = validate_data(
76
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
77
+ )
78
+
79
+ # Preprocess the data
80
+ if self.preprocessing_:
81
+ X = self.preprocessing_.transform(X)
82
+
83
+ # Calculate outliers based on samples with too high leverage
84
+ leverage = calculate_leverage(self.model_, X)
85
+ return np.where(leverage > self.critical_value_, -1, 1)
86
+
87
+ def predict_residuals(
88
+ self, X: np.ndarray, y: Optional[np.ndarray], validate: bool = True
89
+ ) -> np.ndarray:
90
+ """Calculate the leverage of the samples.
91
+
92
+ Parameters
93
+ ----------
94
+ X : array-like of shape (n_samples, n_features)
95
+ Input data
96
+
97
+ Returns
98
+ -------
99
+ np.ndarray
100
+ Leverage of the samples
101
+ """
102
+ # Check the estimator has been fitted
103
+ check_is_fitted(self, ["critical_value_"])
104
+
105
+ # Validate the input data
106
+ if validate:
107
+ X = validate_data(self, X, ensure_2d=True, dtype=np.float64)
108
+
109
+ # Apply preprocessing if available
110
+ if self.preprocessing_:
111
+ X = self.preprocessing_.transform(X)
112
+
113
+ # Calculate the leverage
114
+ return calculate_leverage(self.model_, X)
115
+
116
+ def _calculate_critical_value(self, X: Optional[np.ndarray]) -> float:
117
+ """Calculate the critical value for outlier detection using the percentile outlier method."""
118
+
119
+ # Calculate the leverage of the samples
120
+ leverage = calculate_leverage(self.model_, X)
121
+
122
+ # Calculate the critical value
123
+ return np.percentile(leverage, self.confidence * 100)
124
+
125
+
126
+ def calculate_leverage(model: ModelTypes, X: Optional[np.ndarray]) -> np.ndarray:
127
+ """
128
+ Calculate the leverage of the training samples in a PLS/PCA-like model.
129
+
130
+ Parameters
131
+ ----------
132
+ model : Union[_BasePCA, _PLS]
133
+ A fitted PCA/PLS model
134
+
135
+ X : np.ndarray
136
+ Preprocessed input data
137
+
138
+ Returns
139
+ -------
140
+ np.ndarray
141
+ Leverage of the samples
142
+ """
143
+
144
+ X_transformed = model.transform(X)
145
+
146
+ X_hat = (
147
+ X_transformed @ np.linalg.inv(X_transformed.T @ X_transformed) @ X_transformed.T
148
+ )
149
+
150
+ return np.diag(X_hat)
@@ -0,0 +1,225 @@
1
+ from typing import Optional, Literal, Union
2
+
3
+ import numpy as np
4
+
5
+ from scipy.stats import norm, chi2
6
+ from sklearn.pipeline import Pipeline
7
+ from sklearn.utils.validation import validate_data, check_is_fitted
8
+
9
+ from ._base import _ModelResidualsBase, ModelTypes
10
+
11
+
12
+ class QResiduals(_ModelResidualsBase):
13
+ """
14
+ Calculate Q residuals (Squared Prediction Error - SPE) for PCA or PLS models.
15
+
16
+ Parameters
17
+ ----------
18
+ model : Union[ModelType, Pipeline]
19
+ A fitted PCA/PLS model or Pipeline ending with such a model.
20
+
21
+ confidence : float, default=0.95
22
+ Confidence level for statistical calculations (between 0 and 1).
23
+
24
+ method : str, default="chi-square"
25
+ The method used to compute the confidence threshold for Q residuals.
26
+ Options:
27
+ - "chi-square" : Uses mean and standard deviation to approximate Q residuals threshold.
28
+ - "jackson-mudholkar" : Uses eigenvalue-based analytical approximation.
29
+ - "percentile" : Uses empirical percentile threshold.
30
+
31
+ Attributes
32
+ ----------
33
+ model_ : ModelType
34
+ The fitted model of type _BasePCA or _PLS.
35
+
36
+ preprocessing_ : Optional[Pipeline]
37
+ Preprocessing steps before the model.
38
+
39
+ n_features_in_ : int
40
+ Number of features in the input data.
41
+
42
+ n_components_ : int
43
+ Number of components in the model.
44
+
45
+ n_samples_ : int
46
+ Number of samples used to train the model.
47
+
48
+ critical_value_ : float
49
+ The calculated critical value for outlier detection.
50
+
51
+ References
52
+ ----------
53
+ Johan A. Westerhuis, Stephen P. Gurden, Age K. Smilde (2001) Generalized contribution plots in multivariate statistical process
54
+ monitoring Chemometrics and Intelligent Laboratory Systems 51 2000 95–114
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ model: Union[ModelTypes, Pipeline],
60
+ confidence: float = 0.95,
61
+ method: Literal["chi-square", "jackson-mudholkar", "percentile"] = "percentile",
62
+ ) -> None:
63
+ self.method = method
64
+ super().__init__(model, confidence)
65
+
66
+ def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "QResiduals":
67
+ """
68
+ Fit the Q Residuals model by computing residuals from the training set.
69
+
70
+ Parameters
71
+ ----------
72
+ X : array-like of shape (n_samples, n_features)
73
+ Training data.
74
+
75
+ Returns
76
+ -------
77
+ self : object
78
+ Fitted instance of QResiduals.
79
+ """
80
+ X = validate_data(self, X, ensure_2d=True, dtype=np.float64)
81
+
82
+ if self.preprocessing_:
83
+ X = self.preprocessing_.fit_transform(X)
84
+
85
+ # Compute the critical threshold using the chosen method
86
+ self.critical_value_ = self._calculate_critical_value(X)
87
+
88
+ return self
89
+
90
+ def predict(self, X: np.ndarray) -> np.ndarray:
91
+ """Identify outliers in the input data based on Q residuals threshold.
92
+
93
+ Parameters
94
+ ----------
95
+ X : array-like of shape (n_samples, n_features)
96
+ Input data.
97
+
98
+ Returns
99
+ -------
100
+ ndarray of shape (n_samples,)
101
+ Boolean array indicating outliers (-1 for outliers, 1 for normal data).
102
+ """
103
+ # Check the estimator has been fitted
104
+ check_is_fitted(self, ["critical_value_"])
105
+
106
+ # Validate the input data
107
+ X = validate_data(
108
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
109
+ )
110
+
111
+ # Calculate outliers based on the Q residuals
112
+ Q_residuals = self.predict_residuals(X, validate=False)
113
+ return np.where(Q_residuals > self.critical_value_, -1, 1)
114
+
115
+ def predict_residuals(
116
+ self, X: np.ndarray, y: Optional[np.ndarray] = None, validate: bool = True
117
+ ) -> np.ndarray:
118
+ """Calculate Q residuals (Squared Prediction Error - SPE) for input data.
119
+
120
+ Parameters
121
+ ----------
122
+ X : array-like of shape (n_samples, n_features)
123
+ Input data.
124
+
125
+ validate : bool, default=True
126
+ Whether to validate the input data.
127
+
128
+ Returns
129
+ -------
130
+ ndarray of shape (n_samples,)
131
+ Q residuals for each sample.
132
+ """
133
+ # Check the estimator has been fitted
134
+ check_is_fitted(self, ["critical_value_"])
135
+
136
+ # Validate the input data
137
+ if validate:
138
+ X = validate_data(self, X, ensure_2d=True, dtype=np.float64)
139
+
140
+ # Apply preprocessing if available
141
+ if self.preprocessing_:
142
+ X = self.preprocessing_.transform(X)
143
+
144
+ # Compute reconstruction error (Q residuals)
145
+ X_transformed = self.model_.transform(X)
146
+ X_reconstructed = self.model_.inverse_transform(X_transformed)
147
+ Q_residuals = np.sum((X - X_reconstructed) ** 2, axis=1)
148
+
149
+ return Q_residuals
150
+
151
+ def _calculate_critical_value(
152
+ self,
153
+ X: Optional[np.ndarray] = None,
154
+ ) -> float:
155
+ """Calculate the critical value for outlier detection.
156
+
157
+ Parameters
158
+ ----------
159
+ X : array-like of shape (n_samples, n_features)
160
+ Input data.
161
+
162
+ X_reconstructed : array-like of shape (n_samples, n_features)
163
+ Reconstructed input data.
164
+
165
+ method : str Literal["chi-square", "jackson-mudholkar", "percentile"]
166
+ The method used to compute the confidence threshold for Q residuals.
167
+
168
+ Returns
169
+ -------
170
+ float
171
+ The calculated critical value for outlier detection.
172
+
173
+ """
174
+ # Compute Q residuals for training data
175
+ X_transformed = self.model_.transform(X)
176
+ X_reconstructed = self.model_.inverse_transform(X_transformed)
177
+ residuals = X - X_reconstructed
178
+
179
+ if self.method == "chi-square":
180
+ return self._chi_square_threshold(residuals)
181
+ elif self.method == "jackson-mudholkar":
182
+ return self._jackson_mudholkar_threshold(residuals)
183
+ elif self.method == "percentile":
184
+ Q_residuals = np.sum((residuals) ** 2, axis=1)
185
+ return self._percentile_threshold(Q_residuals)
186
+ else:
187
+ raise ValueError(
188
+ "Invalid method. Choose from 'chi-square', 'jackson-mudholkar', or 'percentile'."
189
+ )
190
+
191
+ def _chi_square_threshold(self, residuals: np.ndarray) -> float:
192
+ """Compute Q residual threshold using Chi-Square Approximation."""
193
+ eigenvalues = np.linalg.trace(np.cov(residuals.T))
194
+
195
+ theta_1 = np.sum(eigenvalues)
196
+ theta_2 = np.sum(eigenvalues**2)
197
+ # Degrees of freedom approximation
198
+ g = theta_2 / theta_1
199
+ h = (2 * theta_1**2) / theta_2
200
+
201
+ # Compute chi-square critical value at given confidence level
202
+ chi_critical = chi2.ppf(self.confidence, df=h)
203
+
204
+ # Compute final Q residual threshold
205
+ return g * chi_critical
206
+
207
+ def _jackson_mudholkar_threshold(self, residuals: np.ndarray) -> float:
208
+ """Compute Q residual threshold using Jackson & Mudholkar’s analytical method."""
209
+
210
+ eigenvalues = np.linalg.trace(np.cov(residuals.T))
211
+ theta_1 = np.sum(eigenvalues)
212
+ theta_2 = np.sum(eigenvalues**2)
213
+ theta_3 = np.sum(eigenvalues**3)
214
+ z_alpha = norm.ppf(self.confidence)
215
+
216
+ h0 = 1 - (2 * theta_1 * theta_3) / (3 * theta_2**2)
217
+
218
+ term1 = theta_2 * h0 * (1 - h0) / theta_1**2
219
+ term2 = np.sqrt(z_alpha * 2 * theta_2 * h0**2) / theta_1
220
+
221
+ return theta_1 * (1 - term1 + term2) ** (1 / h0)
222
+
223
+ def _percentile_threshold(self, Q_residuals: np.ndarray) -> float:
224
+ """Compute Q residual threshold using the empirical percentile method."""
225
+ return np.percentile(Q_residuals, self.confidence * 100)
@@ -0,0 +1,197 @@
1
+ from typing import Optional, Union
2
+ import numpy as np
3
+
4
+ from sklearn.cross_decomposition._pls import _PLS
5
+ from sklearn.pipeline import Pipeline
6
+ from sklearn.utils.validation import validate_data, check_is_fitted
7
+
8
+
9
+ from ._base import _ModelResidualsBase, ModelTypes
10
+ from .leverage import calculate_leverage
11
+
12
+
13
+ class StudentizedResiduals(_ModelResidualsBase):
14
+ """
15
+ Calculate the Studentized Residuals on a _PLS model preditions.
16
+
17
+ Parameters
18
+ ----------
19
+ model : Union[ModelType, Pipeline]
20
+ A fitted _PLS model or Pipeline ending with such a model
21
+
22
+ Attributes
23
+ ----------
24
+ model_ : ModelType
25
+ The fitted model of type _BasePCA or _PLS
26
+
27
+ preprocessing_ : Optional[Pipeline]
28
+ Preprocessing steps before the model
29
+
30
+ References
31
+ ----------
32
+
33
+ """
34
+
35
+ def __init__(self, model: Union[_PLS, Pipeline], confidence=0.95) -> None:
36
+ super().__init__(model, confidence)
37
+
38
+ def fit(self, X: np.ndarray, y: Optional[np.ndarray]) -> "StudentizedResiduals":
39
+ """
40
+ Fit the model to the input data.
41
+
42
+ Parameters
43
+ ----------
44
+ X : array-like of shape (n_samples, n_features)
45
+ Input data
46
+
47
+ y : array-like of shape (n_samples,)
48
+ Target data
49
+ """
50
+ # Validate the input data
51
+ X = validate_data(
52
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
53
+ )
54
+
55
+ # Preprocess the data
56
+ if self.preprocessing_:
57
+ X = self.preprocessing_.transform(X)
58
+
59
+ # Calculate y residuals
60
+ y_residuals = y - self.model_.predict(X)
61
+ y_residuals = (
62
+ y_residuals.reshape(-1, 1) if len(y_residuals.shape) == 1 else y_residuals
63
+ )
64
+
65
+ # Calculate the studentized residuals
66
+ studentized_residuals = calculate_studentized_residuals(
67
+ self.model_, X, y_residuals
68
+ )
69
+
70
+ # Calculate the critical threshold
71
+ self.critical_value_ = self._calculate_critical_value(studentized_residuals)
72
+
73
+ return self
74
+
75
+ def predict(self, X: np.ndarray, y: Optional[np.ndarray]) -> np.ndarray:
76
+ """Calculate studentized residuals in the model predictions. and return a boolean array indicating outliers.
77
+
78
+ Parameters
79
+ ----------
80
+ X : array-like of shape (n_samples, n_features)
81
+ Input data
82
+
83
+ y : array-like of shape (n_samples,)
84
+ Target data
85
+
86
+ Returns
87
+ -------
88
+ ndarray of shape (n_samples,)
89
+ Studentized residuals of the predictions
90
+ """
91
+ # Check the estimator has been fitted
92
+ check_is_fitted(self, ["critical_value_"])
93
+
94
+ # Validate the input data
95
+ X = validate_data(
96
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
97
+ )
98
+
99
+ # Preprocess the data
100
+ if self.preprocessing_:
101
+ X = self.preprocessing_.transform(X)
102
+
103
+ # Calculate y residuals
104
+ y_residuals = y - self.model_.predict(X)
105
+ y_residuals = (
106
+ y_residuals.reshape(-1, 1) if len(y_residuals.shape) == 1 else y_residuals
107
+ )
108
+
109
+ # Calculate the studentized residuals
110
+ studentized_residuals = calculate_studentized_residuals(
111
+ self.model_, X, y_residuals
112
+ )
113
+ return np.where(studentized_residuals > self.critical_value_, -1, 1)
114
+
115
+ def predict_residuals(
116
+ self, X: np.ndarray, y: Optional[np.ndarray], validate: bool = True
117
+ ) -> np.ndarray:
118
+ """Calculate the studentized residuals of the model predictions.
119
+
120
+ Parameters
121
+ ----------
122
+ X : array-like of shape (n_samples, n_features)
123
+ Input data
124
+
125
+ y : array-like of shape (n_samples,)
126
+ Target values
127
+
128
+ Returns
129
+ -------
130
+ ndarray of shape (n_samples,)
131
+ Studentized residuals of the model predictions
132
+ """
133
+ # Check the estimator has been fitted
134
+ check_is_fitted(self, ["critical_value_"])
135
+
136
+ # Validate the input data
137
+ if validate:
138
+ X = validate_data(self, X, ensure_2d=True, dtype=np.float64)
139
+
140
+ # Apply preprocessing if available
141
+ if self.preprocessing_:
142
+ X = self.preprocessing_.transform(X)
143
+
144
+ # Calculate y residuals
145
+ y_residuals = y - self.model_.predict(X)
146
+ y_residuals = (
147
+ y_residuals.reshape(-1, 1) if len(y_residuals.shape) == 1 else y_residuals
148
+ )
149
+
150
+ return calculate_studentized_residuals(self.model_, X, y_residuals)
151
+
152
+ def _calculate_critical_value(self, X: Optional[np.ndarray]) -> float:
153
+ """Calculate the critical value for outlier detection.
154
+
155
+ Parameters
156
+ ----------
157
+ X : array-like of shape (n_samples,)
158
+ Studentized residuals
159
+
160
+ Returns
161
+ -------
162
+ float
163
+ The calculated critical value for outlier detection
164
+ """
165
+
166
+ return np.percentile(X, self.confidence * 100) if X is not None else 0.0
167
+
168
+
169
+ def calculate_studentized_residuals(
170
+ model: ModelTypes, X: np.ndarray, y_residuals: np.ndarray
171
+ ) -> np.ndarray:
172
+ """Calculate the studentized residuals of the model predictions.
173
+
174
+ Parameters
175
+ ----------
176
+ model : ModelTypes
177
+ A fitted model
178
+
179
+ X : array-like of shape (n_samples, n_features)
180
+ Input data
181
+
182
+ y : array-like of shape (n_samples,)
183
+ Target values
184
+
185
+ Returns
186
+ -------
187
+ ndarray of shape (n_samples,)
188
+ Studentized residuals of the model predictions
189
+ """
190
+
191
+ # Calculate the leverage of the samples
192
+ leverage = calculate_leverage(model, X)
193
+
194
+ # Calculate the standard deviation of the residuals
195
+ std = np.sqrt(np.sum(y_residuals**2, axis=0) / (X.shape[0] - model.n_components))
196
+
197
+ return (y_residuals / (std * np.sqrt(1 - leverage.reshape(-1, 1)))).flatten()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: chemotools
3
- Version: 0.1.9
3
+ Version: 0.1.10
4
4
  Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
5
5
  License: MIT
6
6
  Author: Pau Cabaneros
@@ -1,8 +1,9 @@
1
1
  chemotools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- chemotools/augmentation/__init__.py,sha256=iRltqvskLJAhwxxlTtGPWJfR5XiwvQfqbv4QdUMF9BU,318
2
+ chemotools/augmentation/__init__.py,sha256=ohlRHgRWTkvNpO3RikKtowzboqunQqx0WqtNccuWOHw,397
3
3
  chemotools/augmentation/_add_noise.py,sha256=fkTJfIYtZXezcjy6Vz8asIhpBoVp4oaIifppK9vZpM8,4362
4
4
  chemotools/augmentation/_baseline_shift.py,sha256=kIlYvmKS9pu9vh_-eZ7PSHPuH_58V9mgYbSJt6Gq3BA,3476
5
5
  chemotools/augmentation/_fractional_shift.py,sha256=dJ0Vuc-U02HhjKkOwc48qnOksZYgbHwL2ko7tWCZTQU,6916
6
+ chemotools/augmentation/_gaussian_broadening.py,sha256=dJsPlTKqpecKaCDU3vOvedIb-t_HyCkQprxNv0DmYZQ,4236
6
7
  chemotools/augmentation/_index_shift.py,sha256=BTtadweDvvMtiF8t7ldwsE6Kl6FmKLCkVJjSzSWyIDs,6904
7
8
  chemotools/augmentation/_spectrum_scale.py,sha256=hMsmzXpssbI7tGm_YnQn9wjbByso3CgVxd3Hs8kfLS8,3442
8
9
  chemotools/baseline/__init__.py,sha256=VzoblGg8Hx_FkTc_n7a-ZjGvtKP8JE_NwJKWenGFQkM,584
@@ -29,6 +30,14 @@ chemotools/derivative/_savitzky_golay.py,sha256=CuCrKoLmrB1YmJ4ihIykgkL3tO3frqkS
29
30
  chemotools/feature_selection/__init__.py,sha256=1_i28hIxijjwhMypTy1w2fLbzXXVkKD5IYzzY8ZSuHw,117
30
31
  chemotools/feature_selection/_index_selector.py,sha256=lNTP2b7P3doWl30KiAr3Xd2HOMxeUmj24MuqoXl4Voc,3556
31
32
  chemotools/feature_selection/_range_cut.py,sha256=lVVVC30ZsK2z9jsDGb_z6l8Ty2I89yM05_dIDbMP73Q,3564
33
+ chemotools/outliers/__init__.py,sha256=wpdlyqU34n1Pb9kGCM4idhcok35WAakxEhzP0xeKaZw,272
34
+ chemotools/outliers/_base.py,sha256=zx9z_Snkvq5YWBoRi_-kRr3a-Q7jTz1zVlrGWycUTb4,5232
35
+ chemotools/outliers/_utils.py,sha256=SDrrDvgEVQyPuKdh0Rw0DD4a8LShbNAQLRwSLICtiYU,2720
36
+ chemotools/outliers/dmodx.py,sha256=R9LaQpUJeDv4GJ0hroKOlFcFbsfQRtrHWD_EI3-TX7Y,4521
37
+ chemotools/outliers/hotelling_t2.py,sha256=Ga1qmlurF_fps9igaTUGOrnUOctIJEYqoCdb468KhY4,5006
38
+ chemotools/outliers/leverage.py,sha256=zgxG2F7ZCf5wRVJeezHSJ2gaUDTP2CvK38Rr-hR6niA,4215
39
+ chemotools/outliers/q_residuals.py,sha256=6_h73A1YxHBcQtjAXOAp1Rb7egHJwj0DQ0MKdnj6aBQ,7647
40
+ chemotools/outliers/studentized_residuals.py,sha256=rF0wObKQV5DCa8THkZcuwdu7u4mBk-dbOHth5tj5cqM,5830
32
41
  chemotools/scale/__init__.py,sha256=eztqcHg-TKE1Rr0N9ArfytHk8teuqVfi4SZi2DS96vc,175
33
42
  chemotools/scale/_min_max_scaler.py,sha256=YvqRkV2pXu-viQrpjzWcp9KmSSCYSoubSnrZHRLqgKQ,3011
34
43
  chemotools/scale/_norm_scaler.py,sha256=CHWSir2q-pL1hxzw_ZB45yi4mw-SkJ4YOa1CUL4nm2I,2568
@@ -44,7 +53,7 @@ chemotools/smooth/_median_filter.py,sha256=9ndTJCwrZirWlvDNldiigMddy79KIGq9OwwYN
44
53
  chemotools/smooth/_savitzky_golay_filter.py,sha256=27iFUWxdL9_7oZabR0R5L0ZTpBmYfVUjx2XCTukihBE,3509
45
54
  chemotools/smooth/_whittaker_smooth.py,sha256=lpLAyf4GdyDW4ulT1nyEoK6xQEl2cVUKquawQdGWbHU,3571
46
55
  chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
- chemotools-0.1.9.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
48
- chemotools-0.1.9.dist-info/METADATA,sha256=9sih25qSOTJX36gib96hxm-e86TorD040r_WNFzbc9U,5239
49
- chemotools-0.1.9.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
50
- chemotools-0.1.9.dist-info/RECORD,,
56
+ chemotools-0.1.10.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
57
+ chemotools-0.1.10.dist-info/METADATA,sha256=fRgOO8cS2JNtNWz_CEG0uKvncSHEJ8myfhm2IOz3y-4,5240
58
+ chemotools-0.1.10.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
59
+ chemotools-0.1.10.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.0.1
2
+ Generator: poetry-core 2.1.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any