chemotools 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,6 @@
1
1
  from ._index_selector import IndexSelector
2
2
  from ._range_cut import RangeCut
3
+ from ._sr_selector import SRSelector
4
+ from ._vip_selector import VIPSelector
3
5
 
4
- __all__ = ["IndexSelector", "RangeCut"]
6
+ __all__ = ["IndexSelector", "RangeCut", "SRSelector", "VIPSelector"]
@@ -0,0 +1,88 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Union
3
+
4
+ import numpy as np
5
+
6
+ from sklearn.base import BaseEstimator
7
+ from sklearn.cross_decomposition._pls import _PLS
8
+ from sklearn.feature_selection._base import SelectorMixin
9
+ from sklearn.pipeline import Pipeline
10
+ from sklearn.utils.validation import check_is_fitted
11
+
12
+ ModelTypes = Union[_PLS, Pipeline]
13
+
14
+
15
+ class _PLSFeatureSelectorBase(ABC, BaseEstimator, SelectorMixin):
16
+ """Feature selection base class for _PLS-like models.
17
+
18
+
19
+ Parameters
20
+ ----------
21
+ model : Union[_PLS, Pipeline]
22
+ A fitted _PLS models or Pipeline ending with such a model
23
+
24
+ threshold : float
25
+ The threshold for feature selection. Features with importance
26
+ above this threshold will be selected.
27
+
28
+ Attributes
29
+ ----------
30
+ estimator_ : ModelTypes
31
+ The fitted model of type _BasePCA or _PLS
32
+
33
+ feature_scores_ : np.ndarray
34
+ The calculated feature scores based on the selected method.
35
+
36
+ support_mask : np.ndarray
37
+ The boolean mask indicating which features are selected.
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ model: Union[_PLS, Pipeline],
43
+ ) -> None:
44
+ self.estimator_ = _validate_and_extract_model(model)
45
+
46
+ @abstractmethod
47
+ def _calculate_features(self, X: np.ndarray) -> np.ndarray:
48
+ """Calculate the residuals of the model.
49
+
50
+ Returns
51
+ -------
52
+ ndarray of shape (n_samples,)
53
+ The residuals of the model
54
+ """
55
+
56
+
57
+ def _validate_and_extract_model(
58
+ model: Union[_PLS, Pipeline],
59
+ ) -> _PLS:
60
+ """Validate and extract the model.
61
+
62
+ Parameters
63
+ ----------
64
+ model : Union[_PLS, Pipeline]
65
+ A fitted _PLS model or Pipeline ending with such a model
66
+
67
+ Returns
68
+ -------
69
+ _PLS
70
+ The extracted estimator
71
+
72
+ Raises
73
+ ------
74
+ TypeError
75
+ If the model is not of type _BasePCA or _PLS or a Pipeline ending with one of these types or if the model is not fitted
76
+ """
77
+ if isinstance(model, Pipeline):
78
+ estimator = model[-1]
79
+ else:
80
+ estimator = model
81
+
82
+ if not isinstance(estimator, _PLS):
83
+ raise TypeError(
84
+ "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
85
+ )
86
+
87
+ check_is_fitted(model)
88
+ return estimator
@@ -0,0 +1,137 @@
1
+ import numpy as np
2
+ from sklearn.utils.validation import validate_data
3
+
4
+ from ._base import _PLSFeatureSelectorBase
5
+
6
+
7
+ class SRSelector(_PLSFeatureSelectorBase):
8
+ """
9
+ This selector is used to select features that contribute significantly
10
+ to the latent variables in a PLS regression model using the Selectivity
11
+ Ratio (SR) method.
12
+
13
+ Parameters
14
+ ----------
15
+ - model: Union[_PLS, Pipeline]
16
+ The PLS regression model or a pipeline with a PLS regression model as last step.
17
+
18
+ - threshold: float, default=1.0
19
+ The threshold for feature selection. Features with importance
20
+ above this threshold will be selected.
21
+
22
+ Attributes
23
+ ----------
24
+ estimator_ : ModelTypes
25
+ The fitted model of type _BasePCA or _PLS
26
+
27
+ feature_scores_ : np.ndarray
28
+ The calculated feature scores based on the selected method.
29
+
30
+ support_mask_ : np.ndarray
31
+ The boolean mask indicating which features are selected.
32
+
33
+ Methods
34
+ -------
35
+ fit(X, y=None)
36
+ Fit the transformer to the input data. It calculates the feature scores and the feature_mask.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ model,
42
+ threshold: float = 1.0,
43
+ ):
44
+ self.model = model
45
+ self.threshold = threshold
46
+ super().__init__(self.model)
47
+
48
+ def fit(self, X: np.ndarray, y=None) -> "SRSelector":
49
+ """
50
+ Fit the transformer to calculate the feature scores and the support mask.
51
+
52
+ Parameters
53
+ ----------
54
+ X : array-like of shape (n_samples, n_features)
55
+ The input data to fit the transformer to.
56
+
57
+ y : None
58
+ Ignored.
59
+
60
+ Returns
61
+ -------
62
+ self : SRSelector
63
+ The fitted transformer.
64
+ """
65
+ # Check that X is a 2D array and has only finite values
66
+ X = validate_data(
67
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
68
+ )
69
+
70
+ # Calculate the SR scores
71
+ self.feature_scores_ = self._calculate_features(X)
72
+
73
+ # Calculate the support mask
74
+ self.support_mask_ = self._get_support_mask()
75
+
76
+ return self
77
+
78
+ def _get_support_mask(self) -> np.ndarray:
79
+ """
80
+ Get the support mask based on the feature scores and threshold.
81
+ Features with scores above the threshold are selected.
82
+ Parameters
83
+ ----------
84
+ self : SRSelector
85
+ The fitted transformer.
86
+
87
+ Returns
88
+ -------
89
+ support_mask_ : np.ndarray
90
+ The boolean mask indicating which features are selected.
91
+ """
92
+ return self.feature_scores_ > self.threshold
93
+
94
+ def _calculate_features(self, X: np.ndarray) -> np.ndarray:
95
+ """
96
+ Vectorized Selectivity Ratio calculation from a fitted _PLS
97
+ like model.
98
+
99
+ Parameters:
100
+ ----------
101
+ - self: SRSelector
102
+ The fitted transformer.
103
+
104
+ - X: array-like of shape (n_samples, n_features)
105
+ The input training data to calculate the feature scores from.
106
+
107
+ Returns
108
+ -------
109
+ feature_scores_ : np.ndarray
110
+ The calculated feature scores based on the selected method.
111
+ """
112
+ bpls = self.estimator_.coef_
113
+ bpls_norm = bpls.T / np.linalg.norm(bpls)
114
+
115
+ # Handle 1D case correctly
116
+ if bpls.ndim == 1:
117
+ bpls_norm = bpls_norm.reshape(-1, 1)
118
+
119
+ # Project X onto the regression vector
120
+ ttp = X @ bpls_norm
121
+ ptp = X.T @ np.linalg.pinv(ttp).T
122
+
123
+ # Predicted part of X
124
+ X_hat = ttp @ ptp.T
125
+
126
+ # Compute squared norms directly
127
+ total_ss = np.linalg.norm(X, axis=0) ** 2
128
+ explained_ss = np.linalg.norm(X_hat, axis=0) ** 2
129
+
130
+ # Calculate residual sum of squares
131
+ residual_ss = total_ss - explained_ss
132
+
133
+ # Stability: avoid division by zero
134
+ epsilon = 1e-12
135
+
136
+ # Calculate Selectivity Ratio
137
+ return explained_ss / (residual_ss + epsilon)
@@ -0,0 +1,129 @@
1
+ import numpy as np
2
+ from sklearn.utils.validation import validate_data
3
+
4
+ from ._base import _PLSFeatureSelectorBase
5
+
6
+
7
+ class VIPSelector(_PLSFeatureSelectorBase):
8
+ """
9
+ This selector is used to select features that contribute significantly
10
+ to the latent variables in a PLS regression model using the Variables
11
+ Importance in Projection (VIP) method.
12
+
13
+ Parameters
14
+ ----------
15
+ - model: Union[_PLS, Pipeline]
16
+ The PLS regression model or a pipeline with a PLS regression model as last step.
17
+
18
+ - threshold: float, default=1.0
19
+ The threshold for feature selection. Features with importance
20
+ above this threshold will be selected.
21
+
22
+ Attributes
23
+ ----------
24
+ estimator_ : ModelTypes
25
+ The fitted model of type _BasePCA or _PLS
26
+
27
+ feature_scores_ : np.ndarray
28
+ The calculated feature scores based on the selected method.
29
+
30
+ support_mask_ : np.ndarray
31
+ The boolean mask indicating which features are selected.
32
+
33
+ Methods
34
+ -------
35
+ fit(X, y=None)
36
+ Fit the transformer to the input data. It calculates the feature scores and the feature_mask.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ model,
42
+ threshold: float = 1.0,
43
+ ):
44
+ self.model = model
45
+ self.threshold = threshold
46
+ super().__init__(self.model)
47
+
48
+ def fit(self, X: np.ndarray, y=None) -> "VIPSelector":
49
+ """
50
+ Fit the transformer to calculate the feature scores and the support mask.
51
+
52
+ Parameters
53
+ ----------
54
+ X : array-like of shape (n_samples, n_features)
55
+ The input data to fit the transformer to.
56
+
57
+ y : None
58
+ Ignored.
59
+
60
+ Returns
61
+ -------
62
+ self : VIPSelector
63
+ The fitted transformer.
64
+ """
65
+ # Check that X is a 2D array and has only finite values
66
+ X = validate_data(
67
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
68
+ )
69
+
70
+ # Calculate the VIP scores
71
+ self.feature_scores_ = self._calculate_features(X)
72
+
73
+ # Calculate the support mask
74
+ self.support_mask_ = self._get_support_mask()
75
+
76
+ return self
77
+
78
+ def _get_support_mask(self) -> np.ndarray:
79
+ """
80
+ Get the support mask based on the feature scores and threshold.
81
+ Features with scores above the threshold are selected.
82
+ Parameters
83
+ ----------
84
+ self : VIPSelector
85
+ The fitted transformer.
86
+
87
+ Returns
88
+ -------
89
+ support_mask_ : np.ndarray
90
+ The boolean mask indicating which features are selected.
91
+ """
92
+ return self.feature_scores_ > self.threshold
93
+
94
+ def _calculate_features(self, X: np.ndarray) -> np.ndarray:
95
+ """
96
+ Calculate the VIP scores based on the fitted model.
97
+
98
+ Parameters
99
+ ----------
100
+ self : VIPSelector
101
+ The fitted transformer.
102
+
103
+ Returns
104
+ -------
105
+ feature_scores_ : np.ndarray
106
+ The calculated feature scores based on the selected method.
107
+ """
108
+ # Calculate sum of squares of y_loadings and x_scores
109
+ sum_of_squares_y_loadings = (
110
+ np.linalg.norm(self.estimator_.y_loadings_, ord=2, axis=0) ** 2
111
+ )
112
+ sum_of_squares_x_scores = (
113
+ np.linalg.norm(self.estimator_.x_scores_, ord=2, axis=0) ** 2
114
+ )
115
+
116
+ # Calculate the sum of squares
117
+ sum_of_squares = sum_of_squares_y_loadings * sum_of_squares_x_scores
118
+
119
+ # Calculate the numerator
120
+ numerator = self.estimator_.n_features_in_ * np.sum(
121
+ sum_of_squares * self.estimator_.x_weights_**2,
122
+ axis=1,
123
+ )
124
+
125
+ # Calculate the denominator
126
+ denominator = np.sum(sum_of_squares, axis=0)
127
+
128
+ # Calculate the VIP scores
129
+ return np.sqrt(numerator / denominator)
@@ -1,5 +1,5 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Union, Optional
2
+ from typing import Optional, Tuple, Union
3
3
 
4
4
  import numpy as np
5
5
 
@@ -9,7 +9,6 @@ from sklearn.cross_decomposition._pls import _PLS
9
9
  from sklearn.pipeline import Pipeline
10
10
  from sklearn.utils.validation import check_is_fitted
11
11
 
12
- from ._utils import validate_confidence, validate_and_extract_model
13
12
 
14
13
  ModelTypes = Union[_BasePCA, _PLS]
15
14
 
@@ -29,10 +28,10 @@ class _ModelResidualsBase(ABC, BaseEstimator, OutlierMixin):
29
28
 
30
29
  Attributes
31
30
  ----------
32
- model_ : ModelTypes
31
+ estimator_ : ModelTypes
33
32
  The fitted model of type _BasePCA or _PLS
34
33
 
35
- preprocessing_ : Optional[Pipeline]
34
+ transformer_ : Optional[Pipeline]
36
35
  Preprocessing steps before the model
37
36
 
38
37
  n_features_in_ : int
@@ -54,13 +53,13 @@ class _ModelResidualsBase(ABC, BaseEstimator, OutlierMixin):
54
53
  confidence: float,
55
54
  ) -> None:
56
55
  (
57
- self.model_,
58
- self.preprocessing_,
56
+ self.estimator_,
57
+ self.transformer_,
59
58
  self.n_features_in_,
60
59
  self.n_components_,
61
60
  self.n_samples_,
62
- ) = validate_and_extract_model(model)
63
- self.confidence = validate_confidence(confidence)
61
+ ) = _validate_and_extract_model(model)
62
+ self.confidence = _validate_confidence(confidence)
64
63
 
65
64
  def fit_predict_residuals(
66
65
  self, X: np.ndarray, y: Optional[np.ndarray] = None
@@ -96,7 +95,7 @@ class _ModelResidualsBase(ABC, BaseEstimator, OutlierMixin):
96
95
  """
97
96
 
98
97
  @abstractmethod
99
- def _calculate_critical_value(self, X: Optional[np.ndarray]) -> float:
98
+ def _calculate_critical_value(self, X: np.ndarray) -> float:
100
99
  """Calculate the critical value for outlier detection.
101
100
 
102
101
  Returns
@@ -106,75 +105,84 @@ class _ModelResidualsBase(ABC, BaseEstimator, OutlierMixin):
106
105
  """
107
106
 
108
107
 
109
- class _ModelDiagnosticsBase(ABC):
110
- """Base class for model diagnostics methods. This does not implement outlier detection algorithms,
111
- but rather implements methods that are used to assess trained models.
108
+ def _get_model_parameters(model: ModelTypes) -> Tuple[int, int, int]:
109
+ """
110
+ Get the number of features, components and samples from a model with PLS or PCA. types.
112
111
 
113
112
  Parameters
114
113
  ----------
115
- model : Union[ModelTypes, Pipeline]
116
- A fitted PCA/PLS model or Pipeline ending with such a model
117
-
118
- Attributes
119
- ----------
120
- model_ : ModelTypes
121
- The fitted model of type _BasePCA or _PLS
122
-
123
- preprocessing_ : Optional[Pipeline]
124
- Preprocessing steps before the model
114
+ model : ModelType
115
+ A fitted model of type _BasePCA or _PLS
125
116
 
117
+ Returns
118
+ -------
119
+ Tuple[int, int, int]
120
+ The number of features, components and samples in the model
126
121
  """
122
+ if isinstance(model, _BasePCA):
123
+ return model.n_features_in_, model.n_components_, model.n_samples_
124
+ elif isinstance(model, _PLS):
125
+ return model.n_features_in_, model.n_components, len(model.x_scores_)
126
+ else:
127
+ raise ValueError(
128
+ "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
129
+ )
127
130
 
128
- def __init__(self, model: Union[ModelTypes, Pipeline]):
129
- self.model_, self.preprocessing_ = self._validate_and_extract_model(model)
130
131
 
131
- def _validate_and_extract_model(self, model):
132
- """Validate and extract the model and preprocessing steps.
132
+ def _validate_confidence(confidence: float) -> float:
133
+ """Validate parameters using sklearn conventions.
133
134
 
134
- Parameters
135
- ----------
136
- model : Union[ModelTypes, Pipeline]
137
- A fitted PCA/PLS model or Pipeline ending with such a model
135
+ Parameters
136
+ ----------
137
+ confidence : float
138
+ Confidence level for statistical calculations (between 0 and 1)
138
139
 
139
- Returns
140
- -------
141
- Tuple[ModelTypes, Optional[Pipeline]]
142
- The extracted model and preprocessing steps
140
+ Returns
141
+ -------
142
+ float
143
+ The validated confidence level
143
144
 
144
- Raises
145
- ------
146
- ValueError
147
- If the model is not of type _BasePCA or _PLS or a Pipeline ending with one of these types or if the model is not fitted
148
- """
149
- if isinstance(model, Pipeline):
150
- preprocessing = model[:-1]
151
- model = model[-1]
152
- else:
153
- preprocessing = None
154
-
155
- if isinstance(model, (_BasePCA, _PLS)):
156
- check_is_fitted(model)
157
- else:
158
- raise ValueError(
159
- "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
160
- )
161
- check_is_fitted(model)
162
- return model, preprocessing
145
+ Raises
146
+ ------
147
+ ValueError
148
+ If confidence is not between 0 and 1
149
+ """
150
+ if not 0 < confidence < 1:
151
+ raise ValueError("Confidence must be between 0 and 1")
152
+ return confidence
163
153
 
164
- @abstractmethod
165
- def predict(self, X: np.ndarray, y: Optional[np.ndarray]) -> np.ndarray:
166
- """Predict the output of the model.
167
154
 
168
- Parameters
169
- ----------
170
- X : array-like of shape (n_samples, n_features)
171
- Input data
155
+ def _validate_and_extract_model(
156
+ model: Union[ModelTypes, Pipeline],
157
+ ) -> Tuple[ModelTypes, Optional[Pipeline], int, int, int]:
158
+ """Validate and extract the model and preprocessing steps.
172
159
 
173
- y : array-like of shape (n_samples,), default=None
174
- Target values
160
+ Parameters
161
+ ----------
162
+ model : Union[ModelTypes, Pipeline]
163
+ A fitted PCA/PLS model or Pipeline ending with such a model
175
164
 
176
- Returns
177
- -------
178
- ndarray of shape (n_samples,)
179
- Predicted values
180
- """
165
+ Returns
166
+ -------
167
+ Tuple[ModelTypes, Optional[Pipeline]]
168
+ The extracted model and preprocessing steps
169
+
170
+ Raises
171
+ ------
172
+ ValueError
173
+ If the model is not of type _BasePCA or _PLS or a Pipeline ending with one of these types or if the model is not fitted
174
+ """
175
+ if isinstance(model, Pipeline):
176
+ preprocessing = model[:-1]
177
+ model = model[-1]
178
+ else:
179
+ preprocessing = None
180
+
181
+ if not isinstance(model, (_BasePCA, _PLS)):
182
+ raise ValueError(
183
+ "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
184
+ )
185
+
186
+ check_is_fitted(model)
187
+ n_features_in, n_components, n_samples = _get_model_parameters(model)
188
+ return model, preprocessing, n_features_in, n_components, n_samples
@@ -7,6 +7,7 @@ from scipy.stats import f as f_distribution
7
7
 
8
8
 
9
9
  from ._base import _ModelResidualsBase, ModelTypes
10
+ from .utils import calculate_residual_spectrum
10
11
 
11
12
 
12
13
  class DModX(_ModelResidualsBase):
@@ -25,10 +26,10 @@ class DModX(_ModelResidualsBase):
25
26
 
26
27
  Attributes
27
28
  ----------
28
- model_ : ModelType
29
+ estimator_ : ModelType
29
30
  The fitted model of type _BasePCA or _PLS
30
31
 
31
- preprocessing_ : Optional[Pipeline]
32
+ transformer_ : Optional[Pipeline]
32
33
  Preprocessing steps before the model
33
34
 
34
35
  n_features_in_ : int
@@ -42,6 +43,9 @@ class DModX(_ModelResidualsBase):
42
43
 
43
44
  critical_value_ : float
44
45
  The calculated critical value for outlier detection
46
+
47
+ train_spe_: float
48
+ The training sum of squared errors (SSE) for the model normalized by degrees of freedom
45
49
  """
46
50
 
47
51
  def __init__(
@@ -49,6 +53,7 @@ class DModX(_ModelResidualsBase):
49
53
  model: Union[ModelTypes, Pipeline],
50
54
  confidence: float = 0.95,
51
55
  ) -> None:
56
+ model, confidence = model, confidence
52
57
  super().__init__(model, confidence)
53
58
 
54
59
  def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "DModX":
@@ -62,7 +67,18 @@ class DModX(_ModelResidualsBase):
62
67
  self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
63
68
  )
64
69
 
70
+ # Calculate the critical value
65
71
  self.critical_value_ = self._calculate_critical_value()
72
+
73
+ # Calculate the degrees of freedom normalized SPE of the training set
74
+ residuals = calculate_residual_spectrum(X, self.estimator_)
75
+ squared_errors = np.sum((residuals) ** 2, axis=1)
76
+ self.train_spe_ = np.sqrt(
77
+ squared_errors
78
+ / (self.n_samples_ - self.n_components_ - 1)
79
+ * (self.n_features_in_ - self.n_components_)
80
+ )
81
+
66
82
  return self
67
83
 
68
84
  def predict(self, X: np.ndarray) -> np.ndarray:
@@ -118,15 +134,17 @@ class DModX(_ModelResidualsBase):
118
134
  )
119
135
 
120
136
  # Apply preprocessing if available
121
- if self.preprocessing_:
122
- X = self.preprocessing_.transform(X)
137
+ if self.transformer_:
138
+ X = self.transformer_.transform(X)
123
139
 
124
140
  # Calculate the DModX statistics
125
- X_transformed = self.model_.transform(X)
126
- X_reconstructed = self.model_.inverse_transform(X_transformed)
127
- squared_errors = np.sum((X - X_reconstructed) ** 2, axis=1)
141
+ residual = calculate_residual_spectrum(X, self.estimator_)
142
+ squared_errors = np.sum((residual) ** 2, axis=1)
128
143
 
129
- return np.sqrt(squared_errors / (self.n_features_in_ - self.n_components_))
144
+ return (
145
+ np.sqrt(squared_errors / (self.n_features_in_ - self.n_components_))
146
+ / self.train_spe_
147
+ )
130
148
 
131
149
  def _calculate_critical_value(self, X: Optional[np.ndarray] = None) -> float:
132
150
  """Calculate F-distribution based critical value.
@@ -24,10 +24,10 @@ class HotellingT2(_ModelResidualsBase):
24
24
 
25
25
  Attributes
26
26
  ----------
27
- model_ : ModelType
27
+ estimator_ : ModelType
28
28
  The fitted model of type _BasePCA or _PLS
29
29
 
30
- preprocessing_ : Optional[Pipeline]
30
+ transformer_ : Optional[Pipeline]
31
31
  Preprocessing steps before the model
32
32
 
33
33
  n_features_in_ : int
@@ -51,6 +51,7 @@ class HotellingT2(_ModelResidualsBase):
51
51
  def __init__(
52
52
  self, model: Union[ModelTypes, Pipeline], confidence: float = 0.95
53
53
  ) -> None:
54
+ self.model, self.confidence = model, confidence
54
55
  super().__init__(model, confidence)
55
56
 
56
57
  def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "HotellingT2":
@@ -93,7 +94,7 @@ class HotellingT2(_ModelResidualsBase):
93
94
  return np.where(hotelling_t2_values > self.critical_value_, -1, 1)
94
95
 
95
96
  def predict_residuals(
96
- self, X: np.ndarray, y: Optional[np.ndarray], validate: bool = True
97
+ self, X: np.ndarray, y: Optional[np.ndarray] = None, validate: bool = True
97
98
  ) -> np.ndarray:
98
99
  """Calculate Hotelling's T-squared statistics for input data.
99
100
 
@@ -117,20 +118,20 @@ class HotellingT2(_ModelResidualsBase):
117
118
  )
118
119
 
119
120
  # Apply preprocessing steps
120
- if self.preprocessing_:
121
- X = self.preprocessing_.transform(X)
121
+ if self.transformer_:
122
+ X = self.transformer_.transform(X)
122
123
 
123
124
  # Calculate the Hotelling's T-squared statistics
124
- if isinstance(self.model_, _BasePCA):
125
+ if isinstance(self.estimator_, _BasePCA):
125
126
  # For PCA-like models
126
- variances = self.model_.explained_variance_
127
+ variances = self.estimator_.explained_variance_
127
128
 
128
- if isinstance(self.model_, _PLS):
129
+ if isinstance(self.estimator_, _PLS):
129
130
  # For PLS-like models
130
- variances = np.var(self.model_.x_scores_, axis=0)
131
+ variances = np.var(self.estimator_.x_scores_, axis=0)
131
132
 
132
133
  # Equivalent to X @ model.components_.T for _BasePCA and X @ model.x_rotations_ for _PLS
133
- X_transformed = self.model_.transform(X)
134
+ X_transformed = self.estimator_.transform(X)
134
135
 
135
136
  return np.sum((X_transformed**2) / variances, axis=1)
136
137
 
@@ -20,10 +20,10 @@ class Leverage(_ModelResidualsBase):
20
20
 
21
21
  Attributes
22
22
  ----------
23
- model_ : ModelType
23
+ estimator_ : ModelType
24
24
  The fitted model of type _BasePCA or _PLS
25
25
 
26
- preprocessing_ : Optional[Pipeline]
26
+ transformer_ : Optional[Pipeline]
27
27
  Preprocessing steps before the model
28
28
 
29
29
  References
@@ -34,6 +34,7 @@ class Leverage(_ModelResidualsBase):
34
34
  def __init__(
35
35
  self, model: Union[ModelTypes, Pipeline], confidence: float = 0.95
36
36
  ) -> None:
37
+ model, confidence = model, confidence
37
38
  super().__init__(model, confidence)
38
39
 
39
40
  def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "Leverage":
@@ -47,8 +48,8 @@ class Leverage(_ModelResidualsBase):
47
48
  self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
48
49
  )
49
50
 
50
- if self.preprocessing_:
51
- X = self.preprocessing_.fit_transform(X)
51
+ if self.transformer_:
52
+ X = self.transformer_.fit_transform(X)
52
53
 
53
54
  # Compute the critical threshold
54
55
  self.critical_value_ = self._calculate_critical_value(X)
@@ -77,15 +78,15 @@ class Leverage(_ModelResidualsBase):
77
78
  )
78
79
 
79
80
  # Preprocess the data
80
- if self.preprocessing_:
81
- X = self.preprocessing_.transform(X)
81
+ if self.transformer_:
82
+ X = self.transformer_.transform(X)
82
83
 
83
84
  # Calculate outliers based on samples with too high leverage
84
- leverage = calculate_leverage(self.model_, X)
85
+ leverage = calculate_leverage(X, self.estimator_)
85
86
  return np.where(leverage > self.critical_value_, -1, 1)
86
87
 
87
88
  def predict_residuals(
88
- self, X: np.ndarray, y: Optional[np.ndarray], validate: bool = True
89
+ self, X: np.ndarray, y: Optional[np.ndarray] = None, validate: bool = True
89
90
  ) -> np.ndarray:
90
91
  """Calculate the leverage of the samples.
91
92
 
@@ -107,23 +108,23 @@ class Leverage(_ModelResidualsBase):
107
108
  X = validate_data(self, X, ensure_2d=True, dtype=np.float64)
108
109
 
109
110
  # Apply preprocessing if available
110
- if self.preprocessing_:
111
- X = self.preprocessing_.transform(X)
111
+ if self.transformer_:
112
+ X = self.transformer_.transform(X)
112
113
 
113
114
  # Calculate the leverage
114
- return calculate_leverage(self.model_, X)
115
+ return calculate_leverage(X, self.estimator_)
115
116
 
116
- def _calculate_critical_value(self, X: Optional[np.ndarray]) -> float:
117
+ def _calculate_critical_value(self, X: np.ndarray) -> float:
117
118
  """Calculate the critical value for outlier detection using the percentile outlier method."""
118
119
 
119
120
  # Calculate the leverage of the samples
120
- leverage = calculate_leverage(self.model_, X)
121
+ leverage = calculate_leverage(X, self.estimator_)
121
122
 
122
123
  # Calculate the critical value
123
124
  return np.percentile(leverage, self.confidence * 100)
124
125
 
125
126
 
126
- def calculate_leverage(model: ModelTypes, X: Optional[np.ndarray]) -> np.ndarray:
127
+ def calculate_leverage(X: np.ndarray, model: ModelTypes) -> np.ndarray:
127
128
  """
128
129
  Calculate the leverage of the training samples in a PLS/PCA-like model.
129
130
 
@@ -7,6 +7,7 @@ from sklearn.pipeline import Pipeline
7
7
  from sklearn.utils.validation import validate_data, check_is_fitted
8
8
 
9
9
  from ._base import _ModelResidualsBase, ModelTypes
10
+ from .utils import calculate_residual_spectrum
10
11
 
11
12
 
12
13
  class QResiduals(_ModelResidualsBase):
@@ -21,7 +22,7 @@ class QResiduals(_ModelResidualsBase):
21
22
  confidence : float, default=0.95
22
23
  Confidence level for statistical calculations (between 0 and 1).
23
24
 
24
- method : str, default="chi-square"
25
+ method : str, default="jackson-mudholkar"
25
26
  The method used to compute the confidence threshold for Q residuals.
26
27
  Options:
27
28
  - "chi-square" : Uses mean and standard deviation to approximate Q residuals threshold.
@@ -30,10 +31,10 @@ class QResiduals(_ModelResidualsBase):
30
31
 
31
32
  Attributes
32
33
  ----------
33
- model_ : ModelType
34
+ estimator_ : ModelType
34
35
  The fitted model of type _BasePCA or _PLS.
35
36
 
36
- preprocessing_ : Optional[Pipeline]
37
+ transformer_ : Optional[Pipeline]
37
38
  Preprocessing steps before the model.
38
39
 
39
40
  n_features_in_ : int
@@ -58,9 +59,11 @@ class QResiduals(_ModelResidualsBase):
58
59
  self,
59
60
  model: Union[ModelTypes, Pipeline],
60
61
  confidence: float = 0.95,
61
- method: Literal["chi-square", "jackson-mudholkar", "percentile"] = "percentile",
62
+ method: Literal[
63
+ "chi-square", "jackson-mudholkar", "percentile"
64
+ ] = "jackson-mudholkar",
62
65
  ) -> None:
63
- self.method = method
66
+ self.model, self.confidence, self.method = model, confidence, method
64
67
  super().__init__(model, confidence)
65
68
 
66
69
  def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "QResiduals":
@@ -79,8 +82,8 @@ class QResiduals(_ModelResidualsBase):
79
82
  """
80
83
  X = validate_data(self, X, ensure_2d=True, dtype=np.float64)
81
84
 
82
- if self.preprocessing_:
83
- X = self.preprocessing_.fit_transform(X)
85
+ if self.transformer_:
86
+ X = self.transformer_.fit_transform(X)
84
87
 
85
88
  # Compute the critical threshold using the chosen method
86
89
  self.critical_value_ = self._calculate_critical_value(X)
@@ -138,19 +141,18 @@ class QResiduals(_ModelResidualsBase):
138
141
  X = validate_data(self, X, ensure_2d=True, dtype=np.float64)
139
142
 
140
143
  # Apply preprocessing if available
141
- if self.preprocessing_:
142
- X = self.preprocessing_.transform(X)
144
+ if self.transformer_:
145
+ X = self.transformer_.transform(X)
143
146
 
144
147
  # Compute reconstruction error (Q residuals)
145
- X_transformed = self.model_.transform(X)
146
- X_reconstructed = self.model_.inverse_transform(X_transformed)
147
- Q_residuals = np.sum((X - X_reconstructed) ** 2, axis=1)
148
+ residual = calculate_residual_spectrum(X, self.estimator_)
149
+ Q_residuals = np.sum(residual**2, axis=1)
148
150
 
149
151
  return Q_residuals
150
152
 
151
153
  def _calculate_critical_value(
152
154
  self,
153
- X: Optional[np.ndarray] = None,
155
+ X: np.ndarray,
154
156
  ) -> float:
155
157
  """Calculate the critical value for outlier detection.
156
158
 
@@ -172,17 +174,18 @@ class QResiduals(_ModelResidualsBase):
172
174
 
173
175
  """
174
176
  # Compute Q residuals for training data
175
- X_transformed = self.model_.transform(X)
176
- X_reconstructed = self.model_.inverse_transform(X_transformed)
177
- residuals = X - X_reconstructed
177
+ residuals = calculate_residual_spectrum(X, self.estimator_)
178
178
 
179
179
  if self.method == "chi-square":
180
180
  return self._chi_square_threshold(residuals)
181
+
181
182
  elif self.method == "jackson-mudholkar":
182
183
  return self._jackson_mudholkar_threshold(residuals)
184
+
183
185
  elif self.method == "percentile":
184
186
  Q_residuals = np.sum((residuals) ** 2, axis=1)
185
187
  return self._percentile_threshold(Q_residuals)
188
+
186
189
  else:
187
190
  raise ValueError(
188
191
  "Invalid method. Choose from 'chi-square', 'jackson-mudholkar', or 'percentile'."
@@ -21,10 +21,10 @@ class StudentizedResiduals(_ModelResidualsBase):
21
21
 
22
22
  Attributes
23
23
  ----------
24
- model_ : ModelType
24
+ estimator_ : ModelType
25
25
  The fitted model of type _BasePCA or _PLS
26
26
 
27
- preprocessing_ : Optional[Pipeline]
27
+ transformer_ : Optional[Pipeline]
28
28
  Preprocessing steps before the model
29
29
 
30
30
  References
@@ -33,6 +33,7 @@ class StudentizedResiduals(_ModelResidualsBase):
33
33
  """
34
34
 
35
35
  def __init__(self, model: Union[_PLS, Pipeline], confidence=0.95) -> None:
36
+ self.model, self.confidence = model, confidence
36
37
  super().__init__(model, confidence)
37
38
 
38
39
  def fit(self, X: np.ndarray, y: Optional[np.ndarray]) -> "StudentizedResiduals":
@@ -53,18 +54,18 @@ class StudentizedResiduals(_ModelResidualsBase):
53
54
  )
54
55
 
55
56
  # Preprocess the data
56
- if self.preprocessing_:
57
- X = self.preprocessing_.transform(X)
57
+ if self.transformer_:
58
+ X = self.transformer_.transform(X)
58
59
 
59
60
  # Calculate y residuals
60
- y_residuals = y - self.model_.predict(X)
61
+ y_residuals = y - self.estimator_.predict(X)
61
62
  y_residuals = (
62
63
  y_residuals.reshape(-1, 1) if len(y_residuals.shape) == 1 else y_residuals
63
64
  )
64
65
 
65
66
  # Calculate the studentized residuals
66
67
  studentized_residuals = calculate_studentized_residuals(
67
- self.model_, X, y_residuals
68
+ self.estimator_, X, y_residuals
68
69
  )
69
70
 
70
71
  # Calculate the critical threshold
@@ -97,18 +98,18 @@ class StudentizedResiduals(_ModelResidualsBase):
97
98
  )
98
99
 
99
100
  # Preprocess the data
100
- if self.preprocessing_:
101
- X = self.preprocessing_.transform(X)
101
+ if self.transformer_:
102
+ X = self.transformer_.transform(X)
102
103
 
103
104
  # Calculate y residuals
104
- y_residuals = y - self.model_.predict(X)
105
+ y_residuals = y - self.estimator_.predict(X)
105
106
  y_residuals = (
106
107
  y_residuals.reshape(-1, 1) if len(y_residuals.shape) == 1 else y_residuals
107
108
  )
108
109
 
109
110
  # Calculate the studentized residuals
110
111
  studentized_residuals = calculate_studentized_residuals(
111
- self.model_, X, y_residuals
112
+ self.estimator_, X, y_residuals
112
113
  )
113
114
  return np.where(studentized_residuals > self.critical_value_, -1, 1)
114
115
 
@@ -138,18 +139,18 @@ class StudentizedResiduals(_ModelResidualsBase):
138
139
  X = validate_data(self, X, ensure_2d=True, dtype=np.float64)
139
140
 
140
141
  # Apply preprocessing if available
141
- if self.preprocessing_:
142
- X = self.preprocessing_.transform(X)
142
+ if self.transformer_:
143
+ X = self.transformer_.transform(X)
143
144
 
144
145
  # Calculate y residuals
145
- y_residuals = y - self.model_.predict(X)
146
+ y_residuals = y - self.estimator_.predict(X)
146
147
  y_residuals = (
147
148
  y_residuals.reshape(-1, 1) if len(y_residuals.shape) == 1 else y_residuals
148
149
  )
149
150
 
150
- return calculate_studentized_residuals(self.model_, X, y_residuals)
151
+ return calculate_studentized_residuals(self.estimator_, X, y_residuals)
151
152
 
152
- def _calculate_critical_value(self, X: Optional[np.ndarray]) -> float:
153
+ def _calculate_critical_value(self, X: np.ndarray) -> float:
153
154
  """Calculate the critical value for outlier detection.
154
155
 
155
156
  Parameters
@@ -189,7 +190,7 @@ def calculate_studentized_residuals(
189
190
  """
190
191
 
191
192
  # Calculate the leverage of the samples
192
- leverage = calculate_leverage(model, X)
193
+ leverage = calculate_leverage(X, model)
193
194
 
194
195
  # Calculate the standard deviation of the residuals
195
196
  std = np.sqrt(np.sum(y_residuals**2, axis=0) / (X.shape[0] - model.n_components))
@@ -0,0 +1,51 @@
1
+ import numpy as np
2
+
3
+ from ._base import ModelTypes
4
+
5
+
6
+ def calculate_decoded_spectrum(X: np.ndarray, estimator: ModelTypes):
7
+ """
8
+ Calculate the decoded spectrum for a given transformed (preprocessed!!) spectrum and estimator from the latent space.
9
+
10
+ Parameters
11
+ ----------
12
+ spectrum : np.ndarray
13
+ The transformed spectrum data.
14
+
15
+ estimator : ModelTypes
16
+ The fitted PCA or PLS model.
17
+
18
+ Returns
19
+ -------
20
+ np.ndarray
21
+ The decoded spectrum.
22
+ """
23
+ # Project the transformed spectrum onto the latent space
24
+ X_transformed = estimator.transform(X)
25
+
26
+ # Decode the spectrum back to the original space
27
+ return estimator.inverse_transform(X_transformed)
28
+
29
+
30
+ def calculate_residual_spectrum(X: np.ndarray, estimator: ModelTypes):
31
+ """
32
+ Calculate the residual spectrum for a given transformed (preprocessed!!) spectrum and estimator.
33
+
34
+ Parameters
35
+ ----------
36
+ spectrum : np.ndarray
37
+ The transformed spectrum data.
38
+
39
+ estimator : ModelTypes
40
+ The fitted PCA or PLS model.
41
+
42
+ Returns
43
+ -------
44
+ np.ndarray
45
+ The residual spectrum.
46
+ """
47
+ # Compute the reconstruction error (Q residuals)
48
+ decoded_spectrum = calculate_decoded_spectrum(X, estimator)
49
+
50
+ # Calculate the residual
51
+ return X - decoded_spectrum
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: chemotools
3
- Version: 0.1.10
3
+ Version: 0.1.11
4
4
  Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
5
5
  License: MIT
6
6
  Author: Pau Cabaneros
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.13
14
14
  Requires-Dist: numpy (>=2.0.0,<3.0.0)
15
15
  Requires-Dist: pandas (>=2.0.0,<3.0.0)
16
16
  Requires-Dist: polars (>=1.17.0,<2.0.0)
17
- Requires-Dist: pyarrow (>=18.0.0,<19.0.0)
17
+ Requires-Dist: pyarrow (>=18,<21)
18
18
  Requires-Dist: scikit-learn (>=1.4.0,<2.0.0)
19
19
  Description-Content-Type: text/markdown
20
20
 
@@ -27,17 +27,20 @@ chemotools/datasets/data/train_spectra.csv,sha256=iVF19W52NHlbqq8BbLomn8n47kSPT0
27
27
  chemotools/derivative/__init__.py,sha256=FkckdzO30jrRWPGpIU3cfnaTtxPtNT5Tb2G9F9PmVTw,134
28
28
  chemotools/derivative/_norris_william.py,sha256=rMY_yntpiB5fbSM1tPph4AaGmF1k-HqJp7o48ijePBs,4958
29
29
  chemotools/derivative/_savitzky_golay.py,sha256=CuCrKoLmrB1YmJ4ihIykgkL3tO3frqkStMogtsVhO3A,3632
30
- chemotools/feature_selection/__init__.py,sha256=1_i28hIxijjwhMypTy1w2fLbzXXVkKD5IYzzY8ZSuHw,117
30
+ chemotools/feature_selection/__init__.py,sha256=e_GFVawlDNEQv3EqrGSXUr5cvDN1jckoxe2C2jRwVl8,222
31
+ chemotools/feature_selection/_base.py,sha256=SIH6kl9AePVWTByL0OvJFfc2j3idqs7lm_7Zi1YMp4Y,2311
31
32
  chemotools/feature_selection/_index_selector.py,sha256=lNTP2b7P3doWl30KiAr3Xd2HOMxeUmj24MuqoXl4Voc,3556
32
33
  chemotools/feature_selection/_range_cut.py,sha256=lVVVC30ZsK2z9jsDGb_z6l8Ty2I89yM05_dIDbMP73Q,3564
34
+ chemotools/feature_selection/_sr_selector.py,sha256=OaXkt3t_NvymgDy6R15ig87jhcb-vM7i63LgtsNdfZo,3969
35
+ chemotools/feature_selection/_vip_selector.py,sha256=ZK3bhdpl3nBYt6xmuHq2IvWtpgJ8ZdElH06xnCFA-Xs,3835
33
36
  chemotools/outliers/__init__.py,sha256=wpdlyqU34n1Pb9kGCM4idhcok35WAakxEhzP0xeKaZw,272
34
- chemotools/outliers/_base.py,sha256=zx9z_Snkvq5YWBoRi_-kRr3a-Q7jTz1zVlrGWycUTb4,5232
35
- chemotools/outliers/_utils.py,sha256=SDrrDvgEVQyPuKdh0Rw0DD4a8LShbNAQLRwSLICtiYU,2720
36
- chemotools/outliers/dmodx.py,sha256=R9LaQpUJeDv4GJ0hroKOlFcFbsfQRtrHWD_EI3-TX7Y,4521
37
- chemotools/outliers/hotelling_t2.py,sha256=Ga1qmlurF_fps9igaTUGOrnUOctIJEYqoCdb468KhY4,5006
38
- chemotools/outliers/leverage.py,sha256=zgxG2F7ZCf5wRVJeezHSJ2gaUDTP2CvK38Rr-hR6niA,4215
39
- chemotools/outliers/q_residuals.py,sha256=6_h73A1YxHBcQtjAXOAp1Rb7egHJwj0DQ0MKdnj6aBQ,7647
40
- chemotools/outliers/studentized_residuals.py,sha256=rF0wObKQV5DCa8THkZcuwdu7u4mBk-dbOHth5tj5cqM,5830
37
+ chemotools/outliers/_base.py,sha256=zl0LhRKjpvj5IbYc3su6zEZ7YZ0pDSR3yqNWt2qBjNA,5374
38
+ chemotools/outliers/dmodx.py,sha256=sgizal_BDlqWTZNT8y2D_ImcKAJejXt6vqvFYk4Vqi0,5152
39
+ chemotools/outliers/hotelling_t2.py,sha256=g_IOQD_rhKb3cjIJkn5OTto6bYClQtqXunG_02BSIs8,5087
40
+ chemotools/outliers/leverage.py,sha256=hNQ_x68LPPTDZvSJP_eRqu3GoeV3OBU37VC_XTFEzvw,4250
41
+ chemotools/outliers/q_residuals.py,sha256=sg7u8ockQvSSnXwNM4U-GITB-5OcbsDMX6Oig_TcONM,7598
42
+ chemotools/outliers/studentized_residuals.py,sha256=1L-GiutuO1x9s3UKMOBpmhs2Q-UuDtfG2YLELIxiiao,5890
43
+ chemotools/outliers/utils.py,sha256=SAjvtjl9oWHrQnkqGnDfYE4WWAgiL1RwnKmW-ql5TIc,1304
41
44
  chemotools/scale/__init__.py,sha256=eztqcHg-TKE1Rr0N9ArfytHk8teuqVfi4SZi2DS96vc,175
42
45
  chemotools/scale/_min_max_scaler.py,sha256=YvqRkV2pXu-viQrpjzWcp9KmSSCYSoubSnrZHRLqgKQ,3011
43
46
  chemotools/scale/_norm_scaler.py,sha256=CHWSir2q-pL1hxzw_ZB45yi4mw-SkJ4YOa1CUL4nm2I,2568
@@ -53,7 +56,7 @@ chemotools/smooth/_median_filter.py,sha256=9ndTJCwrZirWlvDNldiigMddy79KIGq9OwwYN
53
56
  chemotools/smooth/_savitzky_golay_filter.py,sha256=27iFUWxdL9_7oZabR0R5L0ZTpBmYfVUjx2XCTukihBE,3509
54
57
  chemotools/smooth/_whittaker_smooth.py,sha256=lpLAyf4GdyDW4ulT1nyEoK6xQEl2cVUKquawQdGWbHU,3571
55
58
  chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- chemotools-0.1.10.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
57
- chemotools-0.1.10.dist-info/METADATA,sha256=fRgOO8cS2JNtNWz_CEG0uKvncSHEJ8myfhm2IOz3y-4,5240
58
- chemotools-0.1.10.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
59
- chemotools-0.1.10.dist-info/RECORD,,
59
+ chemotools-0.1.11.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
60
+ chemotools-0.1.11.dist-info/METADATA,sha256=Ne8xEa1cZUhbP-I4D1CFVvy8fhJANUjsY5cXRpNVV1k,5232
61
+ chemotools-0.1.11.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
62
+ chemotools-0.1.11.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.1
2
+ Generator: poetry-core 2.1.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,91 +0,0 @@
1
- from typing import Optional, Tuple, Union
2
-
3
- from sklearn.cross_decomposition._pls import _PLS
4
- from sklearn.decomposition._base import _BasePCA
5
- from sklearn.pipeline import Pipeline
6
- from sklearn.utils.validation import check_is_fitted
7
-
8
- ModelTypes = Union[_BasePCA, _PLS]
9
-
10
-
11
- def get_model_parameters(model: ModelTypes) -> Tuple[int, int, int]:
12
- """
13
- Get the number of features, components and samples from a model with PLS or PCA. types.
14
-
15
- Parameters
16
- ----------
17
- model : ModelType
18
- A fitted model of type _BasePCA or _PLS
19
-
20
- Returns
21
- -------
22
- Tuple[int, int, int]
23
- The number of features, components and samples in the model
24
- """
25
- if isinstance(model, _BasePCA):
26
- return model.n_features_in_, model.n_components_, model.n_samples_
27
- elif isinstance(model, _PLS):
28
- return model.n_features_in_, model.n_components, len(model.x_scores_)
29
- else:
30
- raise ValueError(
31
- "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
32
- )
33
-
34
-
35
- def validate_confidence(confidence: float) -> float:
36
- """Validate parameters using sklearn conventions.
37
-
38
- Parameters
39
- ----------
40
- confidence : float
41
- Confidence level for statistical calculations (between 0 and 1)
42
-
43
- Returns
44
- -------
45
- float
46
- The validated confidence level
47
-
48
- Raises
49
- ------
50
- ValueError
51
- If confidence is not between 0 and 1
52
- """
53
- if not 0 < confidence < 1:
54
- raise ValueError("Confidence must be between 0 and 1")
55
- return confidence
56
-
57
-
58
- def validate_and_extract_model(
59
- model: Union[ModelTypes, Pipeline],
60
- ) -> Tuple[ModelTypes, Optional[Pipeline], int, int, int]:
61
- """Validate and extract the model and preprocessing steps.
62
-
63
- Parameters
64
- ----------
65
- model : Union[ModelTypes, Pipeline]
66
- A fitted PCA/PLS model or Pipeline ending with such a model
67
-
68
- Returns
69
- -------
70
- Tuple[ModelTypes, Optional[Pipeline]]
71
- The extracted model and preprocessing steps
72
-
73
- Raises
74
- ------
75
- ValueError
76
- If the model is not of type _BasePCA or _PLS or a Pipeline ending with one of these types or if the model is not fitted
77
- """
78
- if isinstance(model, Pipeline):
79
- preprocessing = model[:-1]
80
- model = model[-1]
81
- else:
82
- preprocessing = None
83
-
84
- if not isinstance(model, (_BasePCA, _PLS)):
85
- raise ValueError(
86
- "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
87
- )
88
-
89
- check_is_fitted(model)
90
- n_features_in, n_components, n_samples = get_model_parameters(model)
91
- return model, preprocessing, n_features_in, n_components, n_samples