mlquantify 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. mlquantify/__init__.py +10 -29
  2. mlquantify/adjust_counting/__init__.py +24 -0
  3. mlquantify/adjust_counting/_adjustment.py +648 -0
  4. mlquantify/adjust_counting/_base.py +245 -0
  5. mlquantify/adjust_counting/_counting.py +153 -0
  6. mlquantify/adjust_counting/_utils.py +109 -0
  7. mlquantify/base.py +117 -519
  8. mlquantify/base_aggregative.py +209 -0
  9. mlquantify/calibration.py +1 -0
  10. mlquantify/confidence.py +329 -0
  11. mlquantify/likelihood/__init__.py +5 -0
  12. mlquantify/likelihood/_base.py +147 -0
  13. mlquantify/likelihood/_classes.py +430 -0
  14. mlquantify/meta/__init__.py +1 -0
  15. mlquantify/meta/_classes.py +785 -0
  16. mlquantify/metrics/__init__.py +21 -0
  17. mlquantify/metrics/_oq.py +109 -0
  18. mlquantify/metrics/_rq.py +98 -0
  19. mlquantify/{evaluation/measures.py → metrics/_slq.py} +51 -36
  20. mlquantify/mixture/__init__.py +7 -0
  21. mlquantify/mixture/_base.py +147 -0
  22. mlquantify/mixture/_classes.py +458 -0
  23. mlquantify/mixture/_utils.py +163 -0
  24. mlquantify/model_selection/__init__.py +9 -0
  25. mlquantify/model_selection/_protocol.py +358 -0
  26. mlquantify/model_selection/_search.py +315 -0
  27. mlquantify/model_selection/_split.py +1 -0
  28. mlquantify/multiclass.py +350 -0
  29. mlquantify/neighbors/__init__.py +9 -0
  30. mlquantify/neighbors/_base.py +168 -0
  31. mlquantify/neighbors/_classes.py +150 -0
  32. mlquantify/{classification/methods.py → neighbors/_classification.py} +37 -62
  33. mlquantify/neighbors/_kde.py +268 -0
  34. mlquantify/neighbors/_utils.py +131 -0
  35. mlquantify/neural/__init__.py +1 -0
  36. mlquantify/utils/__init__.py +47 -2
  37. mlquantify/utils/_artificial.py +27 -0
  38. mlquantify/utils/_constraints.py +219 -0
  39. mlquantify/utils/_context.py +21 -0
  40. mlquantify/utils/_decorators.py +36 -0
  41. mlquantify/utils/_exceptions.py +12 -0
  42. mlquantify/utils/_get_scores.py +159 -0
  43. mlquantify/utils/_load.py +18 -0
  44. mlquantify/utils/_parallel.py +6 -0
  45. mlquantify/utils/_random.py +36 -0
  46. mlquantify/utils/_sampling.py +273 -0
  47. mlquantify/utils/_tags.py +44 -0
  48. mlquantify/utils/_validation.py +447 -0
  49. mlquantify/utils/prevalence.py +64 -0
  50. {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/METADATA +2 -1
  51. mlquantify-0.1.10.dist-info/RECORD +53 -0
  52. mlquantify/classification/__init__.py +0 -1
  53. mlquantify/evaluation/__init__.py +0 -14
  54. mlquantify/evaluation/protocol.py +0 -289
  55. mlquantify/methods/__init__.py +0 -37
  56. mlquantify/methods/aggregative.py +0 -1159
  57. mlquantify/methods/meta.py +0 -472
  58. mlquantify/methods/mixture_models.py +0 -1003
  59. mlquantify/methods/non_aggregative.py +0 -136
  60. mlquantify/methods/threshold_optimization.py +0 -869
  61. mlquantify/model_selection.py +0 -377
  62. mlquantify/plots.py +0 -367
  63. mlquantify/utils/general.py +0 -371
  64. mlquantify/utils/method.py +0 -449
  65. mlquantify-0.1.8.dist-info/RECORD +0 -22
  66. {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/WHEEL +0 -0
  67. {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/top_level.txt +0 -0
@@ -1,73 +1,54 @@
1
- from sklearn.neighbors import NearestNeighbors
2
- from sklearn.base import BaseEstimator
1
+
3
2
  import numpy as np
4
- import pandas as pd
3
+ from sklearn.neighbors import NearestNeighbors
5
4
 
6
- class PWKCLF(BaseEstimator):
7
- """
8
- Learner based on k-Nearest Neighbors (KNN) to use in the PWK method.
9
-
10
- This classifier adjusts the influence of neighbors using class weights
11
- derived from the `alpha` parameter. The `alpha` parameter controls the
12
- influence of class imbalance.
13
5
 
14
- Parameters
15
- ----------
16
- alpha : float, default=1
17
- Controls the influence of class imbalance. Must be >= 1.
18
6
 
19
- n_neighbors : int, default=10
20
- Number of neighbors to use.
7
+ class PWKCLF:
8
+ r"""Probabilistic Weighted k-Nearest Neighbor Classifier (PWKCLF).
21
9
 
22
- algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
23
- Algorithm to compute nearest neighbors.
10
+ A weighted k-nearest neighbor classifier that assigns class probabilities to
11
+ instances based on neighbor counts weighted by class-specific inverse frequency
12
+ factors adjusted by a hyperparameter alpha controlling imbalance compensation.
24
13
 
25
- metric : str, default='euclidean'
26
- Distance metric to use.
14
+ Attributes
15
+ ----------
16
+ alpha : float
17
+ Exponent controlling the degree of imbalance compensation.
18
+ n_neighbors : int
19
+ Number of nearest neighbors considered.
20
+ nbrs : sklearn.neighbors.NearestNeighbors
21
+ The underlying k-NN structure used for neighbor queries.
22
+ classes_ : ndarray
23
+ Unique classes observed during training.
24
+ class_to_index : dict
25
+ Mapping from class label to index used in internal arrays.
26
+ class_weights : ndarray
27
+ Per-class weights computed based on class frequency and alpha.
28
+ y_train : ndarray
29
+ Labels of training samples.
27
30
 
28
- leaf_size : int, default=30
29
- Leaf size passed to the tree-based algorithms.
30
31
 
31
- p : int, default=2
32
- Power parameter for the Minkowski metric.
32
+ Notes
33
+ -----
34
+ The class weights are defined as:
33
35
 
34
- metric_params : dict, optional
35
- Additional keyword arguments for the metric function.
36
+ .. math::
36
37
 
37
- n_jobs : int, optional
38
- Number of parallel jobs to run for neighbors search.
38
+ w_c = \left( \frac{N_c}{\min_{c'} N_{c'}} \right)^{-\frac{1}{\alpha}},
39
+
40
+ where :math:`N_c` is the count of class :math:`c` in the training set.
41
+
42
+ This weighting scheme reduces bias towards majority classes by downweighting them
43
+ in the voting process.
39
44
 
40
45
  Examples
41
46
  --------
42
- >>> from sklearn.datasets import load_breast_cancer
43
- >>> from sklearn.model_selection import train_test_split
44
- >>> from mlquantify.methods.aggregative import PWK
45
- >>> from mlquantify.utils.general import get_real_prev
46
- >>> from mlquantify.classification import PWKCLF
47
- >>>
48
- >>> # Load dataset
49
- >>> features, target = load_breast_cancer(return_X_y=True)
50
- >>>
51
- >>> # Split into training and testing sets
52
- >>> X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=32)
53
- >>>
54
- >>> # Create and configure the PWKCLF learner
55
- >>> learner = PWKCLF(alpha=1, n_neighbors=10)
56
- >>>
57
- >>> # Create the PWK quantifier
58
- >>> model = PWK(learner=learner)
59
- >>>
60
- >>> # Train the model
61
- >>> model.fit(X_train, y_train)
62
- >>>
63
- >>> # Predict prevalences
64
- >>> y_pred = model.predict(X_test)
65
- >>>
66
- >>> # Display results
67
- >>> print("Real:", get_real_prev(y_test))
68
- >>> print("PWK:", y_pred)
47
+ >>> clf = PWKCLF(alpha=2.0, n_neighbors=7)
48
+ >>> clf.fit(X_train, y_train)
49
+ >>> labels = clf.predict(X_test)
69
50
  """
70
-
51
+
71
52
  def __init__(self,
72
53
  alpha=1,
73
54
  n_neighbors=10,
@@ -77,9 +58,6 @@ class PWKCLF(BaseEstimator):
77
58
  p=2,
78
59
  metric_params=None,
79
60
  n_jobs=None):
80
- if alpha < 1:
81
- raise ValueError("alpha must not be smaller than 1")
82
-
83
61
  self.alpha = alpha
84
62
  self.n_neighbors = n_neighbors
85
63
 
@@ -119,9 +97,6 @@ class PWKCLF(BaseEstimator):
119
97
 
120
98
  self.y_train = y
121
99
 
122
- if isinstance(y, pd.DataFrame):
123
- self.y_train = y.reset_index(drop=True)
124
-
125
100
  unique_classes, class_counts = np.unique(y, return_counts=True)
126
101
  self.classes_ = unique_classes
127
102
  self.class_to_index = dict(zip(self.classes_, range(len(self.classes_))))
@@ -0,0 +1,268 @@
1
+ import numpy as np
2
+ from sklearn.neighbors import KernelDensity
3
+ from mlquantify.utils._constraints import Interval
4
+ from mlquantify.neighbors._base import BaseKDE
5
+ from mlquantify.neighbors._utils import (
6
+ gaussian_kernel,
7
+ negative_log_likelihood,
8
+ EPS,
9
+ )
10
+ from mlquantify.utils import check_random_state
11
+ from scipy.optimize import minimize
12
+
13
+
14
+ # ============================================================
15
+ # Auxiliary functions
16
+ # ============================================================
17
+
18
+ def _optimize_on_simplex(objective, n_classes, x0=None):
19
+ r"""Optimize an objective function over the probability simplex.
20
+
21
+ This function performs constrained optimization to find the mixture weights
22
+ :math:`\alpha` on the simplex :math:`\Delta^{n-1} = \{ \alpha \in \mathbb{R}^n : \alpha_i \geq 0, \sum_i \alpha_i = 1 \}`
23
+ that minimize the given objective function.
24
+
25
+ Parameters
26
+ ----------
27
+ objective : callable
28
+ Function from :math:`\mathbb{R}^n \to \mathbb{R}` to minimize.
29
+ n_classes : int
30
+ Dimensionality of the simplex (number of classes).
31
+ x0 : array-like, optional
32
+ Initial guess for the optimization, defaults to uniform vector.
33
+
34
+ Returns
35
+ -------
36
+ alpha_opt : ndarray of shape (n_classes,)
37
+ Optimized weights summing to 1.
38
+ min_loss : float
39
+ Objective function value at optimum.
40
+
41
+ Notes
42
+ -----
43
+ The optimization uses scipy's `minimize` with bounds and equality constraint.
44
+ """
45
+ if x0 is None:
46
+ x0 = np.ones(n_classes) / n_classes
47
+
48
+ constraints = {'type': 'eq', 'fun': lambda x: np.sum(x) - 1}
49
+ bounds = [(0, 1)] * n_classes
50
+
51
+ res = minimize(objective, x0, bounds=bounds, constraints=constraints)
52
+ alpha_opt = res.x / np.sum(res.x)
53
+ return alpha_opt, res.fun
54
+
55
+
56
+ # ============================================================
57
+ # KDEy-ML — Maximum Likelihood
58
+ # ============================================================
59
+
60
+ class KDEyML(BaseKDE):
61
+ r"""KDEy Maximum Likelihood quantifier.
62
+
63
+ Models class-conditional densities of posterior probabilities via Kernel Density
64
+ Estimation (KDE) and estimates class prevalences by maximizing the likelihood of
65
+ test data under a mixture model of these KDEs.
66
+
67
+ The mixture weights correspond to class prevalences, optimized under the simplex
68
+ constraint. The optimization minimizes the negative log-likelihood of the mixture
69
+ density evaluated at test posteriors.
70
+
71
+ This approach generalizes EM-based quantification methods by using KDE instead
72
+ of discrete histograms, allowing smooth multivariate density estimation over
73
+ the probability simplex.
74
+
75
+ References
76
+ ----------
77
+ The method is based on ideas presented by Moreo et al. (2023), extending KDE-based
78
+ approaches for distribution matching and maximum likelihood estimation.
79
+ """
80
+
81
+ def _precompute_training(self, train_predictions, train_y_values):
82
+ r"""
83
+ Fit KDE models on class-specific training posterior predictions.
84
+ """
85
+ super()._fit_kde_models(train_predictions, train_y_values)
86
+
87
+ def _solve_prevalences(self, predictions):
88
+ r"""
89
+ Estimate class prevalences by maximizing log-likelihood under KDE mixture.
90
+
91
+ Parameters
92
+ ----------
93
+ predictions : ndarray, shape (n_samples, n_features)
94
+ Posterior probabilities of test set instances.
95
+
96
+ Returns
97
+ -------
98
+ alpha_opt : ndarray, shape (n_classes,)
99
+ Estimated class prevalences.
100
+ min_loss : float
101
+ Minimum negative log-likelihood achieved.
102
+
103
+ Notes
104
+ -----
105
+ The optimization is solved over the probability simplex.
106
+ """
107
+ n_classes = len(self._class_kdes)
108
+ class_likelihoods = np.array([
109
+ np.exp(kde.score_samples(predictions)) + EPS for kde in self._class_kdes
110
+ ]) # (n_classes, n_samples)
111
+
112
+ def objective(alpha):
113
+ mixture = np.dot(alpha, class_likelihoods)
114
+ return negative_log_likelihood(mixture)
115
+
116
+ alpha_opt, min_loss = _optimize_on_simplex(objective, n_classes)
117
+
118
+ self.best_distance = min_loss
119
+
120
+ return alpha_opt, min_loss
121
+
122
+
123
+ # ============================================================
124
+ # KDEy-HD — Hellinger Distance Minimization
125
+ # ============================================================
126
+
127
+ class KDEyHD(BaseKDE):
128
+ r"""KDEy Hellinger Distance Minimization quantifier.
129
+
130
+ Estimates class prevalences by minimizing the Hellinger distance \( HD \) between
131
+ the KDE mixture of class-conditional densities and the KDE of test data, estimated
132
+ via Monte Carlo sampling and importance weighting.
133
+
134
+ This stochastic approximation enables practical optimization of complex divergence
135
+ measures otherwise lacking closed-form expressions for Gaussian Mixture Models.
136
+
137
+ Parameters
138
+ ----------
139
+ montecarlo_trials : int
140
+ Number of Monte Carlo samples used in approximation.
141
+ random_state : int or None
142
+ Seed or random state for reproducibility.
143
+
144
+ References
145
+ ----------
146
+ Builds on f-divergence Monte Carlo approximations for KDE mixtures as detailed
147
+ by Moreo et al. (2023) and importance sampling techniques.
148
+ """
149
+
150
+ _parameter_constraints = {
151
+ "montecarlo_trials": [Interval(1, None)],
152
+ }
153
+
154
+ def __init__(self, learner=None, bandwidth=0.1, kernel="gaussian", montecarlo_trials=1000, random_state=None):
155
+ super().__init__(learner, bandwidth, kernel)
156
+ self.montecarlo_trials = montecarlo_trials
157
+ self.random_state = random_state
158
+
159
+ def _precompute_training(self, train_predictions, train_y_values):
160
+ """
161
+ Precompute reference samples from class KDEs and their densities.
162
+ """
163
+ super()._fit_kde_models(train_predictions, train_y_values)
164
+ n_class = len(self._class_kdes)
165
+ trials = int(self.montecarlo_trials)
166
+ rng = check_random_state(self.random_state)
167
+ # Convert to integer seed for sklearn compatibility
168
+ seed = rng.integers(0, 2**31 - 1) if hasattr(rng, 'integers') else self.random_state
169
+
170
+ samples = np.vstack([
171
+ kde.sample(max(1, trials // n_class), random_state=seed)
172
+ for kde in self._class_kdes
173
+ ])
174
+
175
+ ref_classwise = np.array([np.exp(k.score_samples(samples)) + EPS for k in self._class_kdes])
176
+ ref_density = np.mean(ref_classwise, axis=0) + EPS
177
+
178
+ self._ref_samples = samples
179
+ self._ref_classwise = ref_classwise
180
+ self._ref_density = ref_density
181
+
182
+ def _solve_prevalences(self, predictions):
183
+ """
184
+ Minimize Hellinger distance between test KDE and mixture KDE via importance sampling.
185
+ """
186
+ test_kde = KernelDensity(bandwidth=self.bandwidth).fit(predictions)
187
+ qs = np.exp(test_kde.score_samples(self._ref_samples)) + EPS
188
+ iw = qs / self._ref_density
189
+ fracs = self._ref_classwise / qs
190
+
191
+ def objective(alpha):
192
+ alpha = np.clip(alpha, EPS, None)
193
+ alpha /= np.sum(alpha)
194
+ ps_div_qs = np.dot(alpha, fracs)
195
+ vals = (np.sqrt(ps_div_qs) - 1.0) ** 2 * iw
196
+ return np.mean(vals)
197
+
198
+ alpha_opt, min_loss = _optimize_on_simplex(objective, len(self._class_kdes))
199
+
200
+ self.best_distance = min_loss
201
+
202
+ return alpha_opt, min_loss
203
+
204
+
205
+ # ============================================================
206
+ # KDEy-CS — Cauchy–Schwarz Divergence
207
+ # ============================================================
208
+
209
+ class KDEyCS(BaseKDE):
210
+ r"""KDEy Cauchy-Schwarz Divergence quantifier.
211
+
212
+ Uses a closed-form solution for minimizing the Cauchy-Schwarz (CS) divergence between
213
+ Gaussian Mixture Models representing class-conditional densities fitted via KDE.
214
+
215
+ This mathematically efficient approach leverages precomputed kernel Gram matrices
216
+ of train-train, train-test, and test-test instances for fast divergence evaluation,
217
+ enabling scalable multiclass quantification.
218
+
219
+ References
220
+ ----------
221
+ Based on closed-form CS divergence derivations by Kampa et al. (2011) and KDEy
222
+ density representations, as discussed by Moreo et al. (2023).
223
+ """
224
+
225
+ def _precompute_training(self, train_predictions, train_y_values):
226
+ """
227
+ Precompute kernel sums and Gram matrices needed for CS divergence evaluation.
228
+ """
229
+ P = np.atleast_2d(train_predictions)
230
+ y = np.asarray(train_y_values)
231
+ centers = [P[y == c] for c in self.classes_]
232
+ counts = np.array([len(x) if len(x) > 0 else 1 for x in centers])
233
+ h_eff = np.sqrt(2) * self.bandwidth
234
+
235
+ B_bar = np.zeros((len(self.classes_), len(self.classes_)))
236
+ for i, Xi in enumerate(centers):
237
+ for j, Xj in enumerate(centers[i:], start=i):
238
+ val = np.sum(gaussian_kernel(Xi, Xj, h_eff))
239
+ B_bar[i, j] = B_bar[j, i] = val
240
+ self._centers = centers
241
+ self._counts = counts
242
+ self._B_bar = B_bar
243
+ self._h_eff = h_eff
244
+
245
+ def _solve_prevalences(self, predictions):
246
+ """
247
+ Minimize Cauchy-Schwarz divergence over class mixture weights on the probability simplex.
248
+ """
249
+ Pte = np.atleast_2d(predictions)
250
+ n = len(self.classes_)
251
+ a_bar = np.array([np.sum(gaussian_kernel(Xi, Pte, self._h_eff)) for Xi in self._centers])
252
+ counts = self._counts + EPS
253
+ B_bar = self._B_bar + EPS
254
+ t = 1.0 / max(1, Pte.shape[0])
255
+
256
+ def objective(alpha):
257
+ alpha = np.clip(alpha, EPS, None)
258
+ alpha /= np.sum(alpha)
259
+ rbar = alpha / counts
260
+ partA = -np.log(np.dot(rbar, a_bar) * t + EPS)
261
+ partB = 0.5 * np.log(rbar @ (B_bar @ rbar) + EPS)
262
+ return partA + partB
263
+
264
+ alpha_opt, min_loss = _optimize_on_simplex(objective, n)
265
+
266
+ self.best_distance = min_loss
267
+
268
+ return alpha_opt, min_loss
@@ -0,0 +1,131 @@
1
+ import numpy as np
2
+ from sklearn.metrics import pairwise_distances
3
+ from math import pi
4
+ from scipy.optimize import minimize
5
+
6
+
7
+ EPS = 1e-12
8
+
9
+ # ============================================================
10
+ # Utilitários
11
+ # ============================================================
12
+
13
+ def gaussian_kernel(X, Y, bandwidth):
14
+ r"""Compute the Gaussian kernel matrix K(x, y) with specified bandwidth.
15
+
16
+ This kernel matrix represents the similarity between each pair of points in X and Y,
17
+ computed using the Gaussian (RBF) kernel function:
18
+
19
+ .. math::
20
+
21
+ K(x, y) = \frac{1}{(2 \pi)^{D/2} h^D} \exp\left(- \frac{\|x - y\|^2}{2 h^2}\right)
22
+
23
+ where :math:`h` is the bandwidth (smoothing parameter), and :math:`D` is the dimensionality
24
+ of the input feature space.
25
+
26
+ Parameters
27
+ ----------
28
+ X : array-like of shape (n_samples_X, n_features)
29
+ Input data points.
30
+ Y : array-like of shape (n_samples_Y, n_features) or None
31
+ Input data points for kernel computation. If None, defaults to X.
32
+ bandwidth : float
33
+ Kernel bandwidth parameter :math:`h`.
34
+
35
+ Returns
36
+ -------
37
+ K : ndarray of shape (n_samples_X, n_samples_Y)
38
+ Gaussian kernel matrix.
39
+ """
40
+ X = np.atleast_2d(X)
41
+ if Y is None:
42
+ Y = X
43
+ else:
44
+ Y = np.atleast_2d(Y)
45
+ sqd = pairwise_distances(X, Y, metric="euclidean") ** 2
46
+ D = X.shape[1]
47
+ norm = (bandwidth ** D) * ((2 * pi) ** (D / 2))
48
+ return np.exp(-sqd / (2 * (bandwidth ** 2))) / (norm + EPS)
49
+
50
+
51
+ def negative_log_likelihood(mixture_likelihoods):
52
+ r"""Compute the negative log-likelihood of given mixture likelihoods in a numerically stable way.
53
+
54
+ Given mixture likelihood values :math:`p_i` for samples, the negative log-likelihood is:
55
+
56
+ .. math::
57
+
58
+ - \sum_i \log(p_i)
59
+
60
+ Numerical stability is achieved by clipping likelihoods below a small epsilon.
61
+
62
+ Parameters
63
+ ----------
64
+ mixture_likelihoods : array-like
65
+ Likelihood values for the mixture distribution evaluated at samples.
66
+
67
+ Returns
68
+ -------
69
+ nll : float
70
+ Negative log-likelihood value.
71
+ """
72
+ mixture_likelihoods = np.clip(mixture_likelihoods, EPS, None)
73
+ return -np.sum(np.log(mixture_likelihoods))
74
+
75
+
76
+ def _simplex_constraints(n):
77
+ r"""Define constraints and bounds for optimization over the probability simplex.
78
+
79
+ The simplex is defined as all vectors :math:`\alpha \in \mathbb{R}^n` such that:
80
+
81
+ .. math::
82
+
83
+ \alpha_i \geq 0, \quad \sum_{i=1}^n \alpha_i = 1
84
+
85
+ Parameters
86
+ ----------
87
+ n : int
88
+ Dimensionality of the simplex (number of mixture components).
89
+
90
+ Returns
91
+ -------
92
+ constraints : list of dict
93
+ List containing equality constraint for sum of elements equaling 1.
94
+ bounds : list of tuple
95
+ Bounds for each element to lie between 0 and 1.
96
+ """
97
+ cons = [{"type": "eq", "fun": lambda a: np.sum(a) - 1.0}]
98
+ bounds = [(0.0, 1.0) for _ in range(n)]
99
+ return cons, bounds
100
+
101
+
102
+ def _optimize_on_simplex(objective, n, x0=None):
103
+ r"""Minimize an objective function over the probability simplex.
104
+
105
+ This function solves for mixture weights \( \boldsymbol{\alpha} \) that minimize the
106
+ objective function under the constraints \(\alpha_i \geq 0\) and \(\sum_i \alpha_i = 1\).
107
+
108
+ The optimization uses Sequential Least SQuares Programming (SLSQP).
109
+
110
+ Parameters
111
+ ----------
112
+ objective : callable
113
+ The objective function to minimize. It should accept a vector of length n and
114
+ return a scalar loss.
115
+ n : int
116
+ Number of mixture components (dimension of \( \boldsymbol{\alpha} \)).
117
+ x0 : array-like, optional
118
+ Initial guess for \( \boldsymbol{\alpha} \). If None, defaults to uniform.
119
+
120
+ Returns
121
+ -------
122
+ alpha_opt : ndarray of shape (n,)
123
+ Optimized mixture weights summing to one.
124
+ """
125
+ if x0 is None:
126
+ x0 = np.ones(n) / n
127
+ cons, bounds = _simplex_constraints(n)
128
+ res = minimize(objective, x0, method="SLSQP", constraints=cons, bounds=bounds)
129
+ x = np.clip(getattr(res, "x", x0), 0.0, None)
130
+ s = np.sum(x)
131
+ return x / s if s > 0 else np.ones(n) / n
@@ -0,0 +1 @@
1
+ # TODO
@@ -1,2 +1,47 @@
1
- from .general import *
2
- from .method import *
1
+ from mlquantify.utils._tags import (
2
+ Tags,
3
+ TargetInputTags,
4
+ get_tags
5
+ )
6
+ from mlquantify.utils._constraints import (
7
+ Interval,
8
+ Options,
9
+ CallableConstraint
10
+ )
11
+ from mlquantify.utils.prevalence import (
12
+ get_prev_from_labels,
13
+ normalize_prevalence
14
+ )
15
+ from mlquantify.utils._load import load_quantifier
16
+ from mlquantify.utils._artificial import make_prevs
17
+ from mlquantify.utils._context import validation_context, is_validation_skipped
18
+ from mlquantify.utils._decorators import _fit_context
19
+ from mlquantify.utils._exceptions import (
20
+ InputValidationError,
21
+ InvalidParameterError,
22
+ NotFittedError
23
+ )
24
+ from mlquantify.utils._get_scores import apply_cross_validation
25
+ from mlquantify.utils._parallel import resolve_n_jobs
26
+ from mlquantify.utils._random import check_random_state
27
+ from mlquantify.utils._sampling import (
28
+ simplex_uniform_kraemer,
29
+ simplex_grid_sampling,
30
+ simplex_uniform_sampling,
31
+ get_indexes_with_prevalence
32
+ )
33
+ from mlquantify.utils._validation import (
34
+ _validate_is_numpy_array,
35
+ _validate_2d_predictions,
36
+ _validate_1d_predictions,
37
+ validate_y,
38
+ validate_predictions,
39
+ validate_parameter_constraints,
40
+ validate_learner_contraints,
41
+ _is_fitted,
42
+ check_is_fitted,
43
+ _is_arraylike_not_scalar,
44
+ _is_arraylike,
45
+ validate_data,
46
+ check_classes_attribute
47
+ )
@@ -0,0 +1,27 @@
1
+ import numpy as np
2
+
3
+
4
+ def make_prevs(ndim:int) -> list:
5
+ """
6
+ Generate a list of n_dim values uniformly distributed between 0 and 1 that sum exactly to 1.
7
+
8
+ Parameters
9
+ ----------
10
+ ndim : int
11
+ Number of dimensions.
12
+
13
+ Returns
14
+ -------
15
+ list
16
+ List of n_dim values uniformly distributed between 0 and 1 that sum exactly to 1.
17
+ """
18
+ # Generate n_dim-1 random u_dist uniformly distributed between 0 and 1
19
+ u_dist = np.random.uniform(0, 1, ndim - 1)
20
+ # Add 0 and 1 to the u_dist
21
+ u_dist = np.append(u_dist, [0, 1])
22
+ # Sort the u_dist
23
+ u_dist.sort()
24
+ # Calculate the differences between consecutive u_dist
25
+ prevs = np.diff(u_dist)
26
+
27
+ return prevs