mlquantify 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. mlquantify/__init__.py +0 -29
  2. mlquantify/adjust_counting/__init__.py +14 -0
  3. mlquantify/adjust_counting/_adjustment.py +365 -0
  4. mlquantify/adjust_counting/_base.py +247 -0
  5. mlquantify/adjust_counting/_counting.py +145 -0
  6. mlquantify/adjust_counting/_utils.py +114 -0
  7. mlquantify/base.py +117 -519
  8. mlquantify/base_aggregative.py +209 -0
  9. mlquantify/calibration.py +1 -0
  10. mlquantify/confidence.py +335 -0
  11. mlquantify/likelihood/__init__.py +5 -0
  12. mlquantify/likelihood/_base.py +161 -0
  13. mlquantify/likelihood/_classes.py +414 -0
  14. mlquantify/meta/__init__.py +1 -0
  15. mlquantify/meta/_classes.py +761 -0
  16. mlquantify/metrics/__init__.py +21 -0
  17. mlquantify/metrics/_oq.py +109 -0
  18. mlquantify/metrics/_rq.py +98 -0
  19. mlquantify/{evaluation/measures.py → metrics/_slq.py} +43 -28
  20. mlquantify/mixture/__init__.py +7 -0
  21. mlquantify/mixture/_base.py +153 -0
  22. mlquantify/mixture/_classes.py +400 -0
  23. mlquantify/mixture/_utils.py +112 -0
  24. mlquantify/model_selection/__init__.py +9 -0
  25. mlquantify/model_selection/_protocol.py +358 -0
  26. mlquantify/model_selection/_search.py +315 -0
  27. mlquantify/model_selection/_split.py +1 -0
  28. mlquantify/multiclass.py +350 -0
  29. mlquantify/neighbors/__init__.py +9 -0
  30. mlquantify/neighbors/_base.py +198 -0
  31. mlquantify/neighbors/_classes.py +159 -0
  32. mlquantify/{classification/methods.py → neighbors/_classification.py} +48 -66
  33. mlquantify/neighbors/_kde.py +270 -0
  34. mlquantify/neighbors/_utils.py +135 -0
  35. mlquantify/neural/__init__.py +1 -0
  36. mlquantify/utils/__init__.py +47 -2
  37. mlquantify/utils/_artificial.py +27 -0
  38. mlquantify/utils/_constraints.py +219 -0
  39. mlquantify/utils/_context.py +21 -0
  40. mlquantify/utils/_decorators.py +36 -0
  41. mlquantify/utils/_exceptions.py +12 -0
  42. mlquantify/utils/_get_scores.py +159 -0
  43. mlquantify/utils/_load.py +18 -0
  44. mlquantify/utils/_parallel.py +6 -0
  45. mlquantify/utils/_random.py +36 -0
  46. mlquantify/utils/_sampling.py +273 -0
  47. mlquantify/utils/_tags.py +44 -0
  48. mlquantify/utils/_validation.py +447 -0
  49. mlquantify/utils/prevalence.py +61 -0
  50. {mlquantify-0.1.8.dist-info → mlquantify-0.1.9.dist-info}/METADATA +2 -1
  51. mlquantify-0.1.9.dist-info/RECORD +53 -0
  52. mlquantify/classification/__init__.py +0 -1
  53. mlquantify/evaluation/__init__.py +0 -14
  54. mlquantify/evaluation/protocol.py +0 -289
  55. mlquantify/methods/__init__.py +0 -37
  56. mlquantify/methods/aggregative.py +0 -1159
  57. mlquantify/methods/meta.py +0 -472
  58. mlquantify/methods/mixture_models.py +0 -1003
  59. mlquantify/methods/non_aggregative.py +0 -136
  60. mlquantify/methods/threshold_optimization.py +0 -869
  61. mlquantify/model_selection.py +0 -377
  62. mlquantify/plots.py +0 -367
  63. mlquantify/utils/general.py +0 -371
  64. mlquantify/utils/method.py +0 -449
  65. mlquantify-0.1.8.dist-info/RECORD +0 -22
  66. {mlquantify-0.1.8.dist-info → mlquantify-0.1.9.dist-info}/WHEEL +0 -0
  67. {mlquantify-0.1.8.dist-info → mlquantify-0.1.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,159 @@
1
+ import numpy as np
2
+ from mlquantify.utils._constraints import Interval, Options
3
+ from mlquantify.neighbors._classification import PWKCLF
4
+ from mlquantify.base_aggregative import AggregationMixin, CrispLearnerQMixin
5
+ from mlquantify.base import BaseQuantifier
6
+ from mlquantify.utils._decorators import _fit_context
7
+ from mlquantify.adjust_counting import CC
8
+ from mlquantify.utils import validate_y, validate_data
9
+ from mlquantify.utils._validation import validate_prevalences
10
+
11
+
12
+ class PWK(BaseQuantifier):
13
+ """
14
+ Probabilistic Weighted k-Nearest Neighbor (PWK) Quantifier.
15
+
16
+ This quantifier leverages the PWKCLF classifier to perform quantification by estimating
17
+ class prevalences through a probabilistically weighted k-nearest neighbor approach.
18
+
19
+ The method internally uses a weighted k-NN classifier where neighbors' contributions
20
+ are adjusted by class-specific weights designed to correct for class imbalance,
21
+ controlled by the hyperparameter alpha.
22
+
23
+ Parameters
24
+ ----------
25
+ alpha : float, default=1
26
+ Imbalance correction exponent for class weights. Higher values increase
27
+ the influence of minority classes.
28
+ n_neighbors : int, default=10
29
+ Number of nearest neighbors considered.
30
+ algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
31
+ Algorithm used to compute nearest neighbors.
32
+ metric : str, default='euclidean'
33
+ Distance metric for nearest neighbor search.
34
+ leaf_size : int, default=30
35
+ Leaf size for tree-based neighbors algorithms.
36
+ p : int, default=2
37
+ Power parameter for the Minkowski metric (when metric='minkowski').
38
+ metric_params : dict or None, default=None
39
+ Additional parameters for the metric function.
40
+ n_jobs : int or None, default=None
41
+ Number of parallel jobs for neighbors search.
42
+
43
+ Attributes
44
+ ----------
45
+ cc : object
46
+ Internally used Classify & Count quantifier wrapping PWKCLF.
47
+ learner : PWKCLF
48
+ Underlying probabilistic weighted k-NN classifier.
49
+
50
+ Methods
51
+ -------
52
+ fit(X, y)
53
+ Fits the quantifier by training the internal PWKCLF and wrapping it with
54
+ Classify & Count quantification.
55
+ predict(X)
56
+ Predicts class prevalences for input data using the trained model.
57
+ classify(X)
58
+ Returns label predictions by applying the trained PWKCLF classifier.
59
+
60
+ Examples
61
+ --------
62
+ >>> q = PWK(alpha=1.5, n_neighbors=5)
63
+ >>> q.fit(X_train, y_train)
64
+ >>> prevalences = q.predict(X_test)
65
+ """
66
+
67
+ _parameter_constraints = {
68
+ "alpha": [Interval(1, None, inclusive_right=False)],
69
+ "n_neighbors": [Interval(1, None, inclusive_right=False)],
70
+ "algorithm": [Options(["auto", "ball_tree", "kd_tree", "brute"])],
71
+ "metric": [str],
72
+ "leaf_size": [Interval(1, None, inclusive_right=False)],
73
+ "p": [Interval(1, None, inclusive_right=False)],
74
+ "metric_params": [dict, type(None)],
75
+ "n_jobs": [Interval(1, None, inclusive_right=False), type(None)],
76
+ }
77
+
78
+ def __init__(self,
79
+ alpha=1,
80
+ n_neighbors=10,
81
+ algorithm="auto",
82
+ metric="euclidean",
83
+ leaf_size=30,
84
+ p=2,
85
+ metric_params=None,
86
+ n_jobs=None):
87
+ learner = PWKCLF(alpha=alpha,
88
+ n_neighbors=n_neighbors,
89
+ algorithm=algorithm,
90
+ metric=metric,
91
+ leaf_size=leaf_size,
92
+ p=p,
93
+ metric_params=metric_params,
94
+ n_jobs=n_jobs)
95
+ self.algorithm = algorithm
96
+ self.alpha = alpha
97
+ self.n_neighbors = n_neighbors
98
+ self.metric = metric
99
+ self.leaf_size = leaf_size
100
+ self.p = p
101
+ self.metric_params = metric_params
102
+ self.n_jobs = n_jobs
103
+ self.learner = learner
104
+
105
+ @_fit_context(prefer_skip_nested_validation=True)
106
+ def fit(self, X, y):
107
+ """Fit the PWK quantifier to the training data.
108
+
109
+ Parameters
110
+ ----------
111
+ X_train : array-like of shape (n_samples, n_features)
112
+ Training features.
113
+
114
+ y_train : array-like of shape (n_samples,)
115
+ Training labels.
116
+
117
+ Returns
118
+ -------
119
+ self : object
120
+ The fitted instance.
121
+ """
122
+ X, y = validate_data(self, X, y, ensure_2d=True, ensure_min_samples=2)
123
+ validate_y(self, y)
124
+ self.classes_ = np.unique(y)
125
+ self.cc = CC(self.learner)
126
+ return self.cc.fit(X, y)
127
+
128
+ def predict(self, X):
129
+ """Predict prevalences for the given data.
130
+
131
+ Parameters
132
+ ----------
133
+ X : array-like of shape (n_samples, n_features)
134
+ Features for which to predict prevalences.
135
+
136
+ Returns
137
+ -------
138
+ prevalences : array of shape (n_classes,)
139
+ Predicted class prevalences.
140
+ """
141
+ prevalences = self.cc.predict(X)
142
+ prevalences = validate_prevalences(self, prevalences, self.classes_)
143
+ return prevalences
144
+
145
+ def classify(self, X):
146
+ """Classify samples using the underlying learner.
147
+
148
+ Parameters
149
+ ----------
150
+ X : array-like of shape (n_samples, n_features)
151
+ Features to classify.
152
+
153
+ Returns
154
+ -------
155
+ labels : array of shape (n_samples,)
156
+ Predicted class labels.
157
+ """
158
+ return self.learner.predict(X)
159
+
@@ -1,73 +1,61 @@
1
- from sklearn.neighbors import NearestNeighbors
2
- from sklearn.base import BaseEstimator
3
- import numpy as np
4
- import pandas as pd
5
-
6
- class PWKCLF(BaseEstimator):
7
- """
8
- Learner based on k-Nearest Neighbors (KNN) to use in the PWK method.
9
-
10
- This classifier adjusts the influence of neighbors using class weights
11
- derived from the `alpha` parameter. The `alpha` parameter controls the
12
- influence of class imbalance.
13
-
14
- Parameters
15
- ----------
16
- alpha : float, default=1
17
- Controls the influence of class imbalance. Must be >= 1.
18
-
19
- n_neighbors : int, default=10
20
- Number of neighbors to use.
21
1
 
22
- algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
23
- Algorithm to compute nearest neighbors.
2
+ import numpy as np
3
+ from sklearn.neighbors import NearestNeighbors
24
4
 
25
- metric : str, default='euclidean'
26
- Distance metric to use.
27
5
 
28
- leaf_size : int, default=30
29
- Leaf size passed to the tree-based algorithms.
30
6
 
31
- p : int, default=2
32
- Power parameter for the Minkowski metric.
7
+ class PWKCLF:
8
+ """
9
+ Probabilistic Weighted k-Nearest Neighbor Classifier (PWKCLF).
33
10
 
34
- metric_params : dict, optional
35
- Additional keyword arguments for the metric function.
11
+ A weighted k-nearest neighbor classifier that assigns class probabilities to
12
+ instances based on neighbor counts weighted by class-specific inverse frequency
13
+ factors adjusted by a hyperparameter alpha controlling imbalance compensation.
36
14
 
37
- n_jobs : int, optional
38
- Number of parallel jobs to run for neighbors search.
15
+ Attributes
16
+ ----------
17
+ alpha : float
18
+ Exponent controlling the degree of imbalance compensation.
19
+ n_neighbors : int
20
+ Number of nearest neighbors considered.
21
+ nbrs : sklearn.neighbors.NearestNeighbors
22
+ The underlying k-NN structure used for neighbor queries.
23
+ classes_ : ndarray
24
+ Unique classes observed during training.
25
+ class_to_index : dict
26
+ Mapping from class label to index used in internal arrays.
27
+ class_weights : ndarray
28
+ Per-class weights computed based on class frequency and alpha.
29
+ y_train : ndarray
30
+ Labels of training samples.
31
+
32
+ Methods
33
+ -------
34
+ fit(X, y)
35
+ Fits the k-NN structure and computes class weights.
36
+ predict(X)
37
+ Predicts class labels by weighted voting among neighbors.
38
+
39
+ Notes
40
+ -----
41
+ The class weights are defined as:
42
+
43
+ \[
44
+ w_c = \left( \frac{N_c}{\min_{c'} N_{c'}} \right)^{-\frac{1}{\alpha}},
45
+ \]
46
+
47
+ where \( N_c \) is the count of class \( c \) in the training set.
48
+
49
+ This weighting scheme reduces bias towards majority classes by downweighting them
50
+ in the voting process.
39
51
 
40
52
  Examples
41
53
  --------
42
- >>> from sklearn.datasets import load_breast_cancer
43
- >>> from sklearn.model_selection import train_test_split
44
- >>> from mlquantify.methods.aggregative import PWK
45
- >>> from mlquantify.utils.general import get_real_prev
46
- >>> from mlquantify.classification import PWKCLF
47
- >>>
48
- >>> # Load dataset
49
- >>> features, target = load_breast_cancer(return_X_y=True)
50
- >>>
51
- >>> # Split into training and testing sets
52
- >>> X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=32)
53
- >>>
54
- >>> # Create and configure the PWKCLF learner
55
- >>> learner = PWKCLF(alpha=1, n_neighbors=10)
56
- >>>
57
- >>> # Create the PWK quantifier
58
- >>> model = PWK(learner=learner)
59
- >>>
60
- >>> # Train the model
61
- >>> model.fit(X_train, y_train)
62
- >>>
63
- >>> # Predict prevalences
64
- >>> y_pred = model.predict(X_test)
65
- >>>
66
- >>> # Display results
67
- >>> print("Real:", get_real_prev(y_test))
68
- >>> print("PWK:", y_pred)
54
+ >>> clf = PWKCLF(alpha=2.0, n_neighbors=7)
55
+ >>> clf.fit(X_train, y_train)
56
+ >>> labels = clf.predict(X_test)
69
57
  """
70
-
58
+
71
59
  def __init__(self,
72
60
  alpha=1,
73
61
  n_neighbors=10,
@@ -77,9 +65,6 @@ class PWKCLF(BaseEstimator):
77
65
  p=2,
78
66
  metric_params=None,
79
67
  n_jobs=None):
80
- if alpha < 1:
81
- raise ValueError("alpha must not be smaller than 1")
82
-
83
68
  self.alpha = alpha
84
69
  self.n_neighbors = n_neighbors
85
70
 
@@ -119,9 +104,6 @@ class PWKCLF(BaseEstimator):
119
104
 
120
105
  self.y_train = y
121
106
 
122
- if isinstance(y, pd.DataFrame):
123
- self.y_train = y.reset_index(drop=True)
124
-
125
107
  unique_classes, class_counts = np.unique(y, return_counts=True)
126
108
  self.classes_ = unique_classes
127
109
  self.class_to_index = dict(zip(self.classes_, range(len(self.classes_))))
@@ -0,0 +1,270 @@
1
+ import numpy as np
2
+ from sklearn.neighbors import KernelDensity
3
+ from mlquantify.utils._constraints import Interval
4
+ from mlquantify.neighbors._base import BaseKDE
5
+ from mlquantify.neighbors._utils import (
6
+ gaussian_kernel,
7
+ negative_log_likelihood,
8
+ EPS,
9
+ )
10
+ from mlquantify.utils import check_random_state
11
+ from scipy.optimize import minimize
12
+
13
+
14
+ # ============================================================
15
+ # Auxiliary functions
16
+ # ============================================================
17
+
18
+ def _optimize_on_simplex(objective, n_classes, x0=None):
19
+ """
20
+ Optimize an objective function over the probability simplex.
21
+
22
+ This function performs constrained optimization to find the mixture weights
23
+ \( \alpha \) on the simplex \( \Delta^{n-1} = \{ \alpha \in \mathbb{R}^n : \alpha_i \geq 0, \sum_i \alpha_i = 1 \} \)
24
+ that minimize the given objective function.
25
+
26
+ Parameters
27
+ ----------
28
+ objective : callable
29
+ Function from \( \mathbb{R}^n \to \mathbb{R} \) to minimize.
30
+ n_classes : int
31
+ Dimensionality of the simplex (number of classes).
32
+ x0 : array-like, optional
33
+ Initial guess for the optimization, defaults to uniform vector.
34
+
35
+ Returns
36
+ -------
37
+ alpha_opt : ndarray of shape (n_classes,)
38
+ Optimized weights summing to 1.
39
+ min_loss : float
40
+ Objective function value at optimum.
41
+
42
+ Notes
43
+ -----
44
+ The optimization uses scipy's `minimize` with bounds and equality constraint.
45
+ """
46
+ if x0 is None:
47
+ x0 = np.ones(n_classes) / n_classes
48
+
49
+ constraints = {'type': 'eq', 'fun': lambda x: np.sum(x) - 1}
50
+ bounds = [(0, 1)] * n_classes
51
+
52
+ res = minimize(objective, x0, bounds=bounds, constraints=constraints)
53
+ alpha_opt = res.x / np.sum(res.x)
54
+ return alpha_opt, res.fun
55
+
56
+
57
+ # ============================================================
58
+ # KDEy-ML — Maximum Likelihood
59
+ # ============================================================
60
+
61
+ class KDEyML(BaseKDE):
62
+ """KDEy Maximum Likelihood quantifier.
63
+
64
+ Models class-conditional densities of posterior probabilities via Kernel Density
65
+ Estimation (KDE) and estimates class prevalences by maximizing the likelihood of
66
+ test data under a mixture model of these KDEs.
67
+
68
+ The mixture weights correspond to class prevalences, optimized under the simplex
69
+ constraint. The optimization minimizes the negative log-likelihood of the mixture
70
+ density evaluated at test posteriors.
71
+
72
+ This approach generalizes EM-based quantification methods by using KDE instead
73
+ of discrete histograms, allowing smooth multivariate density estimation over
74
+ the probability simplex.
75
+
76
+ References
77
+ ----------
78
+ The method is based on ideas presented by Moreo et al. (2023), extending KDE-based
79
+ approaches for distribution matching and maximum likelihood estimation.
80
+ """
81
+
82
+ def _precompute_training(self, train_predictions, train_y_values):
83
+ """
84
+ Fit KDE models on class-specific training posterior predictions.
85
+ """
86
+ super()._fit_kde_models(train_predictions, train_y_values)
87
+
88
+ def _solve_prevalences(self, predictions):
89
+ """
90
+ Estimate class prevalences by maximizing log-likelihood under KDE mixture.
91
+
92
+ Parameters
93
+ ----------
94
+ predictions : ndarray, shape (n_samples, n_features)
95
+ Posterior probabilities of test set instances.
96
+
97
+ Returns
98
+ -------
99
+ alpha_opt : ndarray, shape (n_classes,)
100
+ Estimated class prevalences.
101
+ min_loss : float
102
+ Minimum negative log-likelihood achieved.
103
+
104
+ Notes
105
+ -----
106
+ The optimization is solved over the probability simplex.
107
+ """
108
+ n_classes = len(self._class_kdes)
109
+ class_likelihoods = np.array([
110
+ np.exp(kde.score_samples(predictions)) + EPS for kde in self._class_kdes
111
+ ]) # (n_classes, n_samples)
112
+
113
+ def objective(alpha):
114
+ mixture = np.dot(alpha, class_likelihoods)
115
+ return negative_log_likelihood(mixture)
116
+
117
+ alpha_opt, min_loss = _optimize_on_simplex(objective, n_classes)
118
+
119
+ self.best_distance = min_loss
120
+
121
+ return alpha_opt, min_loss
122
+
123
+
124
+ # ============================================================
125
+ # KDEy-HD — Hellinger Distance Minimization
126
+ # ============================================================
127
+
128
+ class KDEyHD(BaseKDE):
129
+ r"""KDEy Hellinger Distance Minimization quantifier.
130
+
131
+ Estimates class prevalences by minimizing the Hellinger distance \( HD \) between
132
+ the KDE mixture of class-conditional densities and the KDE of test data, estimated
133
+ via Monte Carlo sampling and importance weighting.
134
+
135
+ This stochastic approximation enables practical optimization of complex divergence
136
+ measures otherwise lacking closed-form expressions for Gaussian Mixture Models.
137
+
138
+ Parameters
139
+ ----------
140
+ montecarlo_trials : int
141
+ Number of Monte Carlo samples used in approximation.
142
+ random_state : int or None
143
+ Seed or random state for reproducibility.
144
+
145
+ References
146
+ ----------
147
+ Builds on f-divergence Monte Carlo approximations for KDE mixtures as detailed
148
+ by Moreo et al. (2023) and importance sampling techniques.
149
+ """
150
+
151
+ _parameter_constraints = {
152
+ "montecarlo_trials": [Interval(1, None)],
153
+ }
154
+
155
+ def __init__(self, learner=None, bandwidth=0.1, kernel="gaussian", montecarlo_trials=1000, random_state=None):
156
+ super().__init__(learner, bandwidth, kernel)
157
+ self.montecarlo_trials = montecarlo_trials
158
+ self.random_state = random_state
159
+
160
+ def _precompute_training(self, train_predictions, train_y_values):
161
+ """
162
+ Precompute reference samples from class KDEs and their densities.
163
+ """
164
+ super()._fit_kde_models(train_predictions, train_y_values)
165
+ n_class = len(self._class_kdes)
166
+ trials = int(self.montecarlo_trials)
167
+ rng = check_random_state(self.random_state)
168
+ # Convert to integer seed for sklearn compatibility
169
+ seed = rng.integers(0, 2**31 - 1) if hasattr(rng, 'integers') else self.random_state
170
+
171
+ samples = np.vstack([
172
+ kde.sample(max(1, trials // n_class), random_state=seed)
173
+ for kde in self._class_kdes
174
+ ])
175
+
176
+ ref_classwise = np.array([np.exp(k.score_samples(samples)) + EPS for k in self._class_kdes])
177
+ ref_density = np.mean(ref_classwise, axis=0) + EPS
178
+
179
+ self._ref_samples = samples
180
+ self._ref_classwise = ref_classwise
181
+ self._ref_density = ref_density
182
+
183
+ def _solve_prevalences(self, predictions):
184
+ """
185
+ Minimize Hellinger distance between test KDE and mixture KDE via importance sampling.
186
+ """
187
+ test_kde = KernelDensity(bandwidth=self.bandwidth).fit(predictions)
188
+ qs = np.exp(test_kde.score_samples(self._ref_samples)) + EPS
189
+ iw = qs / self._ref_density
190
+ fracs = self._ref_classwise / qs
191
+
192
+ def objective(alpha):
193
+ alpha = np.clip(alpha, EPS, None)
194
+ alpha /= np.sum(alpha)
195
+ ps_div_qs = np.dot(alpha, fracs)
196
+ vals = (np.sqrt(ps_div_qs) - 1.0) ** 2 * iw
197
+ return np.mean(vals)
198
+
199
+ alpha_opt, min_loss = _optimize_on_simplex(objective, len(self._class_kdes))
200
+
201
+ self.best_distance = min_loss
202
+
203
+ return alpha_opt, min_loss
204
+
205
+
206
+ # ============================================================
207
+ # KDEy-CS — Cauchy–Schwarz Divergence
208
+ # ============================================================
209
+
210
+ class KDEyCS(BaseKDE):
211
+ """
212
+ KDEy Cauchy-Schwarz Divergence quantifier.
213
+
214
+ Uses a closed-form solution for minimizing the Cauchy-Schwarz (CS) divergence between
215
+ Gaussian Mixture Models representing class-conditional densities fitted via KDE.
216
+
217
+ This mathematically efficient approach leverages precomputed kernel Gram matrices
218
+ of train-train, train-test, and test-test instances for fast divergence evaluation,
219
+ enabling scalable multiclass quantification.
220
+
221
+ References
222
+ ----------
223
+ Based on closed-form CS divergence derivations by Kampa et al. (2011) and KDEy
224
+ density representations, as discussed by Moreo et al. (2023).
225
+ """
226
+
227
+ def _precompute_training(self, train_predictions, train_y_values):
228
+ """
229
+ Precompute kernel sums and Gram matrices needed for CS divergence evaluation.
230
+ """
231
+ P = np.atleast_2d(train_predictions)
232
+ y = np.asarray(train_y_values)
233
+ centers = [P[y == c] for c in self.classes_]
234
+ counts = np.array([len(x) if len(x) > 0 else 1 for x in centers])
235
+ h_eff = np.sqrt(2) * self.bandwidth
236
+
237
+ B_bar = np.zeros((len(self.classes_), len(self.classes_)))
238
+ for i, Xi in enumerate(centers):
239
+ for j, Xj in enumerate(centers[i:], start=i):
240
+ val = np.sum(gaussian_kernel(Xi, Xj, h_eff))
241
+ B_bar[i, j] = B_bar[j, i] = val
242
+ self._centers = centers
243
+ self._counts = counts
244
+ self._B_bar = B_bar
245
+ self._h_eff = h_eff
246
+
247
+ def _solve_prevalences(self, predictions):
248
+ """
249
+ Minimize Cauchy-Schwarz divergence over class mixture weights on the probability simplex.
250
+ """
251
+ Pte = np.atleast_2d(predictions)
252
+ n = len(self.classes_)
253
+ a_bar = np.array([np.sum(gaussian_kernel(Xi, Pte, self._h_eff)) for Xi in self._centers])
254
+ counts = self._counts + EPS
255
+ B_bar = self._B_bar + EPS
256
+ t = 1.0 / max(1, Pte.shape[0])
257
+
258
+ def objective(alpha):
259
+ alpha = np.clip(alpha, EPS, None)
260
+ alpha /= np.sum(alpha)
261
+ rbar = alpha / counts
262
+ partA = -np.log(np.dot(rbar, a_bar) * t + EPS)
263
+ partB = 0.5 * np.log(rbar @ (B_bar @ rbar) + EPS)
264
+ return partA + partB
265
+
266
+ alpha_opt, min_loss = _optimize_on_simplex(objective, n)
267
+
268
+ self.best_distance = min_loss
269
+
270
+ return alpha_opt, min_loss
@@ -0,0 +1,135 @@
1
+ import numpy as np
2
+ from sklearn.metrics import pairwise_distances
3
+ from math import pi
4
+ from scipy.optimize import minimize
5
+
6
+
7
+ EPS = 1e-12
8
+
9
+ # ============================================================
10
+ # Utilitários
11
+ # ============================================================
12
+
13
+ def gaussian_kernel(X, Y, bandwidth):
14
+ """
15
+ Compute the Gaussian kernel matrix K(x, y) with specified bandwidth.
16
+
17
+ This kernel matrix represents the similarity between each pair of points in X and Y,
18
+ computed using the Gaussian (RBF) kernel function:
19
+
20
+ \[
21
+ K(x, y) = \frac{1}{(2 \pi)^{D/2} h^D} \exp\left(- \frac{\|x - y\|^2}{2 h^2}\right)
22
+ \]
23
+
24
+ where \( h \) is the bandwidth (smoothing parameter), and \( D \) is the dimensionality
25
+ of the input feature space.
26
+
27
+ Parameters
28
+ ----------
29
+ X : array-like of shape (n_samples_X, n_features)
30
+ Input data points.
31
+ Y : array-like of shape (n_samples_Y, n_features) or None
32
+ Input data points for kernel computation. If None, defaults to X.
33
+ bandwidth : float
34
+ Kernel bandwidth parameter \( h \).
35
+
36
+ Returns
37
+ -------
38
+ K : ndarray of shape (n_samples_X, n_samples_Y)
39
+ Gaussian kernel matrix.
40
+ """
41
+ X = np.atleast_2d(X)
42
+ if Y is None:
43
+ Y = X
44
+ else:
45
+ Y = np.atleast_2d(Y)
46
+ sqd = pairwise_distances(X, Y, metric="euclidean") ** 2
47
+ D = X.shape[1]
48
+ norm = (bandwidth ** D) * ((2 * pi) ** (D / 2))
49
+ return np.exp(-sqd / (2 * (bandwidth ** 2))) / (norm + EPS)
50
+
51
+
52
+ def negative_log_likelihood(mixture_likelihoods):
53
+ """
54
+ Compute the negative log-likelihood of given mixture likelihoods in a numerically stable way.
55
+
56
+ Given mixture likelihood values \( p_i \) for samples, the negative log-likelihood is:
57
+
58
+ \[
59
+ - \sum_i \log(p_i)
60
+ \]
61
+
62
+ Numerical stability is achieved by clipping likelihoods below a small epsilon.
63
+
64
+ Parameters
65
+ ----------
66
+ mixture_likelihoods : array-like
67
+ Likelihood values for the mixture distribution evaluated at samples.
68
+
69
+ Returns
70
+ -------
71
+ nll : float
72
+ Negative log-likelihood value.
73
+ """
74
+ mixture_likelihoods = np.clip(mixture_likelihoods, EPS, None)
75
+ return -np.sum(np.log(mixture_likelihoods))
76
+
77
+
78
+ def _simplex_constraints(n):
79
+ """
80
+ Define constraints and bounds for optimization over the probability simplex.
81
+
82
+ The simplex is defined as all vectors \( \alpha \in \mathbb{R}^n \) such that:
83
+
84
+ \[
85
+ \alpha_i \geq 0, \quad \sum_{i=1}^n \alpha_i = 1
86
+ \]
87
+
88
+ Parameters
89
+ ----------
90
+ n : int
91
+ Dimensionality of the simplex (number of mixture components).
92
+
93
+ Returns
94
+ -------
95
+ constraints : list of dict
96
+ List containing equality constraint for sum of elements equaling 1.
97
+ bounds : list of tuple
98
+ Bounds for each element to lie between 0 and 1.
99
+ """
100
+ cons = [{"type": "eq", "fun": lambda a: np.sum(a) - 1.0}]
101
+ bounds = [(0.0, 1.0) for _ in range(n)]
102
+ return cons, bounds
103
+
104
+
105
+ def _optimize_on_simplex(objective, n, x0=None):
106
+ """
107
+ Minimize an objective function over the probability simplex.
108
+
109
+ This function solves for mixture weights \( \boldsymbol{\alpha} \) that minimize the
110
+ objective function under the constraints \(\alpha_i \geq 0\) and \(\sum_i \alpha_i = 1\).
111
+
112
+ The optimization uses Sequential Least SQuares Programming (SLSQP).
113
+
114
+ Parameters
115
+ ----------
116
+ objective : callable
117
+ The objective function to minimize. It should accept a vector of length n and
118
+ return a scalar loss.
119
+ n : int
120
+ Number of mixture components (dimension of \( \boldsymbol{\alpha} \)).
121
+ x0 : array-like, optional
122
+ Initial guess for \( \boldsymbol{\alpha} \). If None, defaults to uniform.
123
+
124
+ Returns
125
+ -------
126
+ alpha_opt : ndarray of shape (n,)
127
+ Optimized mixture weights summing to one.
128
+ """
129
+ if x0 is None:
130
+ x0 = np.ones(n) / n
131
+ cons, bounds = _simplex_constraints(n)
132
+ res = minimize(objective, x0, method="SLSQP", constraints=cons, bounds=bounds)
133
+ x = np.clip(getattr(res, "x", x0), 0.0, None)
134
+ s = np.sum(x)
135
+ return x / s if s > 0 else np.ones(n) / n