mlquantify 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. mlquantify/__init__.py +10 -29
  2. mlquantify/adjust_counting/__init__.py +24 -0
  3. mlquantify/adjust_counting/_adjustment.py +648 -0
  4. mlquantify/adjust_counting/_base.py +245 -0
  5. mlquantify/adjust_counting/_counting.py +153 -0
  6. mlquantify/adjust_counting/_utils.py +109 -0
  7. mlquantify/base.py +117 -519
  8. mlquantify/base_aggregative.py +209 -0
  9. mlquantify/calibration.py +1 -0
  10. mlquantify/confidence.py +329 -0
  11. mlquantify/likelihood/__init__.py +5 -0
  12. mlquantify/likelihood/_base.py +147 -0
  13. mlquantify/likelihood/_classes.py +430 -0
  14. mlquantify/meta/__init__.py +1 -0
  15. mlquantify/meta/_classes.py +785 -0
  16. mlquantify/metrics/__init__.py +21 -0
  17. mlquantify/metrics/_oq.py +109 -0
  18. mlquantify/metrics/_rq.py +98 -0
  19. mlquantify/{evaluation/measures.py → metrics/_slq.py} +51 -36
  20. mlquantify/mixture/__init__.py +7 -0
  21. mlquantify/mixture/_base.py +147 -0
  22. mlquantify/mixture/_classes.py +458 -0
  23. mlquantify/mixture/_utils.py +163 -0
  24. mlquantify/model_selection/__init__.py +9 -0
  25. mlquantify/model_selection/_protocol.py +358 -0
  26. mlquantify/model_selection/_search.py +315 -0
  27. mlquantify/model_selection/_split.py +1 -0
  28. mlquantify/multiclass.py +350 -0
  29. mlquantify/neighbors/__init__.py +9 -0
  30. mlquantify/neighbors/_base.py +168 -0
  31. mlquantify/neighbors/_classes.py +150 -0
  32. mlquantify/{classification/methods.py → neighbors/_classification.py} +37 -62
  33. mlquantify/neighbors/_kde.py +268 -0
  34. mlquantify/neighbors/_utils.py +131 -0
  35. mlquantify/neural/__init__.py +1 -0
  36. mlquantify/utils/__init__.py +47 -2
  37. mlquantify/utils/_artificial.py +27 -0
  38. mlquantify/utils/_constraints.py +219 -0
  39. mlquantify/utils/_context.py +21 -0
  40. mlquantify/utils/_decorators.py +36 -0
  41. mlquantify/utils/_exceptions.py +12 -0
  42. mlquantify/utils/_get_scores.py +159 -0
  43. mlquantify/utils/_load.py +18 -0
  44. mlquantify/utils/_parallel.py +6 -0
  45. mlquantify/utils/_random.py +36 -0
  46. mlquantify/utils/_sampling.py +273 -0
  47. mlquantify/utils/_tags.py +44 -0
  48. mlquantify/utils/_validation.py +447 -0
  49. mlquantify/utils/prevalence.py +64 -0
  50. {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/METADATA +2 -1
  51. mlquantify-0.1.10.dist-info/RECORD +53 -0
  52. mlquantify/classification/__init__.py +0 -1
  53. mlquantify/evaluation/__init__.py +0 -14
  54. mlquantify/evaluation/protocol.py +0 -289
  55. mlquantify/methods/__init__.py +0 -37
  56. mlquantify/methods/aggregative.py +0 -1159
  57. mlquantify/methods/meta.py +0 -472
  58. mlquantify/methods/mixture_models.py +0 -1003
  59. mlquantify/methods/non_aggregative.py +0 -136
  60. mlquantify/methods/threshold_optimization.py +0 -869
  61. mlquantify/model_selection.py +0 -377
  62. mlquantify/plots.py +0 -367
  63. mlquantify/utils/general.py +0 -371
  64. mlquantify/utils/method.py +0 -449
  65. mlquantify-0.1.8.dist-info/RECORD +0 -22
  66. {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/WHEEL +0 -0
  67. {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,209 @@
1
+ from mlquantify.utils._tags import (
2
+ get_tags
3
+ )
4
+ from mlquantify.utils._validation import validate_parameter_constraints, validate_learner_contraints
5
+
6
+
7
+ class AggregationMixin:
8
+ """Mixin class for all aggregative quantifiers.
9
+
10
+ An aggregative quantifier is a quantifier that relies on an underlying
11
+ supervised learner to produce predictions on which the quantification
12
+ is then performed.
13
+
14
+ Inheriting from this mixin provides learner validation and setting
15
+ parameters also for the learner (used by `GridSearchQ` and friends).
16
+
17
+ This mixin also sets the `has_estimator` and `requires_fit`
18
+ tags to `True`.
19
+
20
+
21
+ Notes
22
+ -----
23
+ - An aggregative quantifier must have a 'learner' attribute that is
24
+ a supervised learning estimator.
25
+ - Depending on the type of predictions required from the learner,
26
+ the quantifier can be further classified as a 'soft' or 'crisp'
27
+ aggregative quantifier.
28
+
29
+ Read more in the :ref:`User Guide <rolling_your_own_aggregative_quantifiers>`
30
+ for more details.
31
+
32
+
33
+ Examples
34
+ --------
35
+ >>> from mlquantify.base import BaseQuantifier, AggregationMixin
36
+ >>> from sklearn.linear_model import LogisticRegression
37
+ >>> import numpy as np
38
+ >>> class MyAggregativeQuantifier(AggregationMixin, BaseQuantifier):
39
+ ... def __init__(self, learner=None):
40
+ ... self.learner = learner if learner is not None else LogisticRegression()
41
+ ... def fit(self, X, y):
42
+ ... self.learner.fit(X, y)
43
+ ... self.classes_ = np.unique(y)
44
+ ... return self
45
+ ... def predict(self, X):
46
+ ... preds = self.learner.predict(X)
47
+ ... _, counts = np.unique(preds, return_counts=True)
48
+ ... prevalence = counts / counts.sum()
49
+ ... return prevalence
50
+ >>> quantifier = MyAggregativeQuantifier()
51
+ >>> X = np.random.rand(100, 10)
52
+ >>> y = np.random.randint(0, 2, size=100)
53
+ >>> quantifier.fit(X, y).predict(X)
54
+ [0.5 0.5]
55
+ """
56
+
57
+ def __mlquantify_tags__(self):
58
+ tags = super().__mlquantify_tags__()
59
+ tags.has_estimator = True
60
+ tags.requires_fit = True
61
+ return tags
62
+
63
+
64
+ def _validate_params(self):
65
+ """Validate the parameters of the quantifier instance,
66
+ including the learner's parameters.
67
+
68
+ The expected types and values must be defined in the `_parameter_constraints`
69
+ class attribute as a dictionary. `param_name: list of constraints`. See
70
+ the docstring of `validate_parameter_constraints` for more details.
71
+ """
72
+ validate_learner_contraints(self, self.learner)
73
+
74
+ validate_parameter_constraints(
75
+ self._parameter_constraints,
76
+ self.get_params(deep=False),
77
+ caller_name=self.__class__.__name__,
78
+ )
79
+
80
+ def set_params(self, **params):
81
+
82
+ # Model Params
83
+ for key, value in params.items():
84
+ if hasattr(self, key):
85
+ setattr(self, key, value)
86
+
87
+ # Learner Params
88
+ if self.learner is not None:
89
+ learner_params = {k.replace('learner__', ''): v for k, v in params.items() if 'learner__' in k}
90
+ if learner_params:
91
+ self.learner.set_params(**learner_params)
92
+
93
+ return self
94
+
95
+
96
+ class SoftLearnerQMixin:
97
+ """Soft predictions mixin for aggregative quantifiers.
98
+
99
+ This mixin provides the following change tags:
100
+ - `estimator_function`: "predict_proba"
101
+ - `estimator_type`: "soft"
102
+
103
+
104
+ Notes
105
+ -----
106
+ - This mixin should be used alongside the `AggregationMixin`, in
107
+ the left of it in the inheritance order.
108
+
109
+ Examples
110
+ --------
111
+ >>> from mlquantify.base import BaseQuantifier, AggregationMixin, SoftLearnerQMixin
112
+ >>> from sklearn.linear_model import LogisticRegression
113
+ >>> import numpy as np
114
+ >>> class MySoftAggregativeQuantifier(SoftLearnerQMixin, AggregationMixin, BaseQuantifier):
115
+ ... def __init__(self, learner=None):
116
+ ... self.learner = learner if learner is not None else LogisticRegression()
117
+ ... def fit(self, X, y):
118
+ ... self.learner.fit(X, y)
119
+ ... self.classes_ = np.unique(y)
120
+ ... return self
121
+ ... def predict(self, X):
122
+ ... proba = self.learner.predict_proba(X)
123
+ ... return proba.mean(axis=0)
124
+ >>> quantifier = MySoftAggregativeQuantifier()
125
+ >>> X = np.random.rand(100, 10)
126
+ >>> y = np.random.randint(0, 2, size=100)
127
+ >>> quantifier.fit(X, y).predict(X)
128
+ [0.5 0.5]
129
+ """
130
+
131
+ def __mlquantify_tags__(self):
132
+ tags = super().__mlquantify_tags__()
133
+ tags.estimator_function = "predict_proba"
134
+ tags.estimator_type = "soft"
135
+ return tags
136
+
137
+
138
+ class CrispLearnerQMixin:
139
+ """Crisp predictions mixin for aggregative quantifiers.
140
+
141
+ This mixin provides the following change tags:
142
+ - `estimator_function`: "predict"
143
+ - `estimator_type`: "crisp"
144
+
145
+
146
+ Notes
147
+ -----
148
+ - This mixin should be used alongside the `AggregationMixin`, in
149
+ the left of it in the inheritance order.
150
+
151
+
152
+ Examples
153
+ --------
154
+ >>> from mlquantify.base import BaseQuantifier, AggregationMixin, CrispLearnerQMixin
155
+ >>> from sklearn.linear_model import LogisticRegression
156
+ >>> import numpy as np
157
+ >>> class MyCrispAggregativeQuantifier(CrispLearnerQMixin, AggregationMixin, BaseQuantifier):
158
+ ... def __init__(self, learner=None):
159
+ ... self.learner = learner if learner is not None else LogisticRegression()
160
+ ... def fit(self, X, y):
161
+ ... self.learner.fit(X, y)
162
+ ... self.classes_ = np.unique(y)
163
+ ... return self
164
+ ... def predict(self, X):
165
+ ... preds = self.learner.predict(X)
166
+ ... _, counts = np.unique(preds, return_counts=True)
167
+ ... prevalence = counts / counts.sum()
168
+ ... return prevalence
169
+ >>> quantifier = MyCrispAggregativeQuantifier()
170
+ >>> X = np.random.rand(100, 10)
171
+ >>> y = np.random.randint(0, 2, size=100)
172
+ >>> quantifier.fit(X, y).predict(X)
173
+ [0.5 0.5]
174
+ """
175
+
176
+ def __mlquantify_tags__(self):
177
+ tags = super().__mlquantify_tags__()
178
+ tags.estimator_function = "predict"
179
+ tags.estimator_type= "crisp"
180
+ return tags
181
+
182
+
183
+ def uses_soft_predictions(quantifier):
184
+ """Check if the quantifier uses soft predictions."""
185
+ return get_tags(quantifier).estimator_type == "soft"
186
+
187
+ def uses_crisp_predictions(quantifier):
188
+ """Check if the quantifier uses crisp predictions."""
189
+ return get_tags(quantifier).estimator_type == "crisp"
190
+
191
+ def is_aggregative_quantifier(quantifier):
192
+ """Check if the quantifier is aggregative."""
193
+ return get_tags(quantifier).has_estimator
194
+
195
+ def get_aggregation_requirements(quantifier):
196
+ """Get the prediction requirements for the aggregative quantifier."""
197
+ tags = get_tags(quantifier)
198
+ return tags.prediction_requirements
199
+
200
+
201
+ def _get_learner_function(quantifier):
202
+ """Get the learner function name used by the aggregative quantifier."""
203
+ tags = get_tags(quantifier)
204
+ function_name = tags.estimator_function
205
+ if function_name is None:
206
+ raise ValueError(f"The quantifier {quantifier.__class__.__name__} does not specify an estimator function.")
207
+ if not hasattr(quantifier.learner, function_name):
208
+ raise AttributeError(f"The learner {quantifier.learner.__class__.__name__} does not have the method '{function_name}'.")
209
+ return function_name
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1,329 @@
1
+ import numpy as np
2
+ from scipy.stats import chi2
3
+
4
+
5
+ class BaseConfidenceRegion:
6
+ r"""
7
+ Base class for confidence regions of prevalence estimates.
8
+
9
+ This class defines the interface and core structure for constructing
10
+ confidence regions around class prevalence estimates obtained from
11
+ quantification models.
12
+
13
+ Confidence regions capture the uncertainty associated with prevalence
14
+ estimates, typically derived from bootstrap resampling as proposed in
15
+ [1]_.
16
+
17
+ Parameters
18
+ ----------
19
+ prev_estims : array-like of shape (m, n)
20
+ Collection of ``m`` bootstrap prevalence estimates for ``n`` classes.
21
+ confidence_level : float, default=0.95
22
+ Desired confidence level :math:`1 - \alpha` of the region.
23
+
24
+ Attributes
25
+ ----------
26
+ prev_estims : ndarray of shape (m, n)
27
+ Bootstrap prevalence estimates.
28
+ confidence_level : float
29
+ Confidence level associated with the region.
30
+
31
+ Notes
32
+ -----
33
+ The confidence region :math:`CR_{\alpha}` is defined such that
34
+
35
+ .. math::
36
+
37
+ \mathbb{P}\left(\pi^{\ast} \in CR_{\alpha}\right) = 1 - \alpha
38
+
39
+ where :math:`\pi^{\ast}` is the unknown true class-prevalence vector.
40
+
41
+ Examples
42
+ --------
43
+ >>> import numpy as np
44
+ >>> class DummyRegion(BaseConfidenceRegion):
45
+ ... def _compute_region(self):
46
+ ... self.mean_ = np.mean(self.prev_estims, axis=0)
47
+ ... def get_region(self):
48
+ ... return self.mean_
49
+ ... def get_point_estimate(self):
50
+ ... return self.mean_
51
+ ... def contains(self, point):
52
+ ... return np.allclose(point, self.mean_, atol=0.1)
53
+ >>> X = np.random.dirichlet(np.ones(3), size=100)
54
+ >>> region = DummyRegion(X, confidence_level=0.9)
55
+ >>> region.get_point_estimate().round(3)
56
+ array([0.33, 0.33, 0.34])
57
+
58
+ References
59
+ ----------
60
+ .. [1] Moreo, A., & Salvati, N. (2025).
61
+ *An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification.*
62
+ Istituto di Scienza e Tecnologie dell'Informazione, CNR, Pisa.
63
+ """
64
+
65
+ def __init__(self, prev_estims, confidence_level=0.95):
66
+ self.prev_estims = np.asarray(prev_estims)
67
+ self.confidence_level = confidence_level
68
+ self._compute_region()
69
+
70
+ def _compute_region(self):
71
+ raise NotImplementedError("Subclasses must implement _compute_region().")
72
+
73
+ def get_region(self):
74
+ """Return the parameters defining the confidence region."""
75
+ raise NotImplementedError
76
+
77
+ def get_point_estimate(self):
78
+ """Return the point estimate of prevalence (e.g., mean of bootstrap samples)."""
79
+ raise NotImplementedError
80
+
81
+ def contains(self, point):
82
+ """Check whether a prevalence vector lies within the region."""
83
+ raise NotImplementedError
84
+
85
+
86
+ # ==========================================================
87
+ # Confidence Intervals (via percentiles)
88
+ # ==========================================================
89
+
90
+ class ConfidenceInterval(BaseConfidenceRegion):
91
+ r"""Bootstrap confidence intervals for each class prevalence.
92
+
93
+ Constructs independent percentile-based confidence intervals
94
+ for each class dimension from bootstrap samples.
95
+
96
+ The confidence region is defined as:
97
+
98
+ .. math::
99
+ CI_α(π) =
100
+ \\begin{cases}
101
+ 1 & \\text{if } L_i \\le π_i \\le U_i, \\forall i=1,...,n \\\\
102
+ 0 & \\text{otherwise}
103
+ \\end{cases}
104
+
105
+ where :math:`L_i` and :math:`U_i` are the empirical
106
+ α/2 and 1−α/2 quantiles for class i.
107
+
108
+ Parameters
109
+ ----------
110
+ prev_estims : array-like of shape (m, n)
111
+ Bootstrap prevalence estimates.
112
+ confidence_level : float, default=0.95
113
+ Desired confidence level.
114
+
115
+ Attributes
116
+ ----------
117
+ I_low : ndarray of shape (n,)
118
+ Lower confidence bounds.
119
+ I_high : ndarray of shape (n,)
120
+ Upper confidence bounds.
121
+
122
+ Examples
123
+ --------
124
+ >>> X = np.random.dirichlet(np.ones(3), size=200)
125
+ >>> ci = ConfidenceInterval(X, confidence_level=0.9)
126
+ >>> ci.get_region()
127
+ (array([0.05, 0.06, 0.05]), array([0.48, 0.50, 0.48]))
128
+ >>> ci.contains([0.3, 0.4, 0.3])
129
+ array([[ True]])
130
+
131
+ References
132
+ ----------
133
+ [1] Moreo, A., & Salvati, N. (2025).
134
+ *An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification*.
135
+ Section 3.3, Equation (1).
136
+ """
137
+
138
+ def _compute_region(self):
139
+ alpha = 1 - self.confidence_level
140
+ low_perc = (alpha / 2.) * 100
141
+ high_perc = (1 - alpha / 2.) * 100
142
+ self.I_low, self.I_high = np.percentile(self.prev_estims, q=[low_perc, high_perc], axis=0)
143
+
144
+ def get_region(self):
145
+ return self.I_low, self.I_high
146
+
147
+ def get_point_estimate(self):
148
+ return np.mean(self.prev_estims, axis=0)
149
+
150
+ def contains(self, point):
151
+ point = np.asarray(point)
152
+ within = np.logical_and(self.I_low <= point, point <= self.I_high)
153
+ return np.all(within, axis=-1, keepdims=True)
154
+
155
+
156
+ # ==========================================================
157
+ # Confidence Ellipse in Simplex
158
+ # ==========================================================
159
+
160
+ class ConfidenceEllipseSimplex(BaseConfidenceRegion):
161
+ r"""Confidence ellipse for prevalence estimates in the simplex.
162
+
163
+ Defines a multivariate confidence region based on a chi-squared threshold:
164
+
165
+ .. math::
166
+ CE_α(π) =
167
+ \\begin{cases}
168
+ 1 & \\text{if } (π - μ)^T Σ^{-1} (π - μ) \\le χ^2_{n-1}(1-α) \\\\
169
+ 0 & \\text{otherwise}
170
+ \\end{cases}
171
+
172
+ Parameters
173
+ ----------
174
+ prev_estims : array-like of shape (m, n)
175
+ Bootstrap prevalence estimates.
176
+ confidence_level : float, default=0.95
177
+ Confidence level.
178
+
179
+ Attributes
180
+ ----------
181
+ mean_ : ndarray of shape (n,)
182
+ Mean prevalence estimate.
183
+ precision_matrix : ndarray of shape (n, n)
184
+ Inverse covariance matrix of estimates.
185
+ chi2_val : float
186
+ Chi-squared cutoff threshold defining the ellipse.
187
+
188
+ Examples
189
+ --------
190
+ >>> X = np.random.dirichlet(np.ones(3), size=200)
191
+ >>> ce = ConfidenceEllipseSimplex(X, confidence_level=0.95)
192
+ >>> ce.get_point_estimate().round(3)
193
+ array([0.33, 0.34, 0.33])
194
+ >>> ce.contains(np.array([0.4, 0.3, 0.3]))
195
+ True
196
+
197
+ References
198
+ ----------
199
+ [1] Moreo, A., & Salvati, N. (2025).
200
+ *An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification*.
201
+ Section 3.3, Equation (2).
202
+ """
203
+
204
+ def _compute_region(self):
205
+ cov_ = np.cov(self.prev_estims, rowvar=False, ddof=1)
206
+ try:
207
+ self.precision_matrix = np.linalg.inv(cov_)
208
+ except np.linalg.LinAlgError:
209
+ self.precision_matrix = None
210
+
211
+ dim = self.prev_estims.shape[-1]
212
+ ddof = dim - 1
213
+ self.chi2_val = chi2.ppf(self.confidence_level, ddof)
214
+ self.mean_ = np.mean(self.prev_estims, axis=0)
215
+
216
+ def get_region(self):
217
+ return self.mean_, self.precision_matrix, self.chi2_val
218
+
219
+ def get_point_estimate(self):
220
+ return self.mean_
221
+
222
+ def contains(self, point):
223
+ if self.precision_matrix is None:
224
+ return False
225
+ diff = point - self.mean_
226
+ dist2 = diff.T @ self.precision_matrix @ diff
227
+ return bool(np.mean(dist2 <= self.chi2_val))
228
+
229
+
230
+ # ==========================================================
231
+ # Confidence Ellipse in CLR (Centered Log-Ratio) Space
232
+ # ==========================================================
233
+
234
+ class ConfidenceEllipseCLR(ConfidenceEllipseSimplex):
235
+ r"""Confidence ellipse for prevalence estimates in CLR-transformed space.
236
+
237
+ Applies the Centered Log-Ratio (CLR) transformation:
238
+
239
+ .. math::
240
+ T(π) = [\log(π_1/g(π)), ..., \log(π_n/g(π))], \\
241
+ g(π) = (\prod_i π_i)^{1/n}
242
+
243
+ A confidence ellipse is then built in the transformed space:
244
+
245
+ .. math::
246
+ CT_α(π) =
247
+ \\begin{cases}
248
+ 1 & \\text{if } (T(π) - μ_{CLR})^T Σ^{-1} (T(π) - μ_{CLR}) \\le χ^2_{n-1}(1-α) \\\\
249
+ 0 & \\text{otherwise}
250
+ \\end{cases}
251
+
252
+ Parameters
253
+ ----------
254
+ prev_estims : array-like of shape (m, n)
255
+ Bootstrap prevalence estimates.
256
+ confidence_level : float, default=0.95
257
+ Confidence level.
258
+
259
+ Attributes
260
+ ----------
261
+ mean_ : ndarray of shape (n,)
262
+ Mean vector in CLR space.
263
+ precision_matrix : ndarray of shape (n, n)
264
+ Inverse covariance matrix in CLR space.
265
+ chi2_val : float
266
+ Chi-squared threshold.
267
+
268
+ Examples
269
+ --------
270
+ >>> X = np.random.dirichlet(np.ones(3), size=200)
271
+ >>> clr = ConfidenceEllipseCLR(X, confidence_level=0.9)
272
+ >>> clr.get_point_estimate().round(3)
273
+ array([ 0., 0., -0.])
274
+ >>> clr.contains(np.array([0.4, 0.4, 0.2]))
275
+ True
276
+
277
+ References
278
+ ----------
279
+ [1] Moreo, A., & Salvati, N. (2025).
280
+ *An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification*.
281
+ Section 3.3, Equation (3).
282
+ """
283
+
284
+ def _compute_region(self, eps=1e-6):
285
+ x = self.prev_estims
286
+ G = np.exp(np.mean(np.log(x + eps), axis=1, keepdims=True))
287
+ x_clr = np.log((x + eps) / (G + eps))
288
+ self.x_clr = x_clr
289
+ cov_ = np.cov(x_clr, rowvar=False, ddof=1)
290
+ try:
291
+ self.precision_matrix = np.linalg.inv(cov_)
292
+ except np.linalg.LinAlgError:
293
+ self.precision_matrix = None
294
+
295
+ dim = x_clr.shape[-1]
296
+ ddof = dim - 1
297
+ self.chi2_val = chi2.ppf(self.confidence_level, ddof)
298
+ self.mean_ = np.mean(x_clr, axis=0)
299
+
300
+ def get_point_estimate(self):
301
+ Gp = np.exp(np.mean(np.log(self.prev_estims + 1e-6), axis=1, keepdims=True))
302
+ x_clr = np.log((self.prev_estims + 1e-6) / (Gp + 1e-6))
303
+ return np.mean(x_clr, axis=0)
304
+
305
+ def contains(self, point, eps=1e-6):
306
+ if self.precision_matrix is None:
307
+ return False
308
+ Gp = np.exp(np.mean(np.log(point + eps)))
309
+ point_clr = np.log((point + eps) / (Gp + eps))
310
+ diff = point_clr - self.mean_
311
+ dist2 = diff.T @ self.precision_matrix @ diff
312
+ return dist2 <= self.chi2_val
313
+
314
+
315
+
316
+ # ==========================================================
317
+ # Factory Method for Confidence Regions
318
+ # ==========================================================
319
+
320
+ def construct_confidence_region(prev_estims, confidence_level=0.95, method="intervals"):
321
+ method = method.lower()
322
+ if method == "intervals":
323
+ return ConfidenceInterval(prev_estims, confidence_level)
324
+ elif method == "ellipse":
325
+ return ConfidenceEllipseSimplex(prev_estims, confidence_level)
326
+ elif method in ("elipse-clr", "ellipse-clr", "clr"):
327
+ return ConfidenceEllipseCLR(prev_estims, confidence_level)
328
+ else:
329
+ raise NotImplementedError(f"Método '{method}' desconhecido.")
@@ -0,0 +1,5 @@
1
+ from ._classes import (
2
+ EMQ,
3
+ MLPE,
4
+ CDE
5
+ )
@@ -0,0 +1,147 @@
1
+ import numpy as np
2
+ from abc import abstractmethod
3
+
4
+ from mlquantify.base import BaseQuantifier
5
+
6
+ from mlquantify.base_aggregative import (
7
+ AggregationMixin,
8
+ _get_learner_function
9
+ )
10
+ from mlquantify.adjust_counting import CC
11
+ from mlquantify.utils._decorators import _fit_context
12
+ from mlquantify.utils._validation import check_classes_attribute, validate_predictions, validate_y, validate_data, validate_prevalences
13
+
14
+
15
+
16
+ class BaseIterativeLikelihood(AggregationMixin, BaseQuantifier):
17
+ r"""Iterative likelihood-based quantification adjustment methods.
18
+
19
+ This base class encompasses quantification approaches that estimate class prevalences
20
+ by maximizing the likelihood of observed data, adjusting prevalence estimates on test
21
+ sets under the assumption of prior probability shift.
22
+
23
+ These methods iteratively refine estimates of class prevalences by maximizing the
24
+ likelihood of classifier outputs, usually the posterior probabilities provided by
25
+ a trained model, assuming that the class-conditional distributions remain fixed
26
+ between training and test domains.
27
+
28
+ Mathematical formulation
29
+ ------------------------
30
+ Let:
31
+
32
+ - :math:`p_k^t` be the prior probabilities for class \(k\) in the training set, satisfying \( \sum_k p_k^t = 1 \),
33
+ - :math:`s_k(x)` be the posterior probability estimate from the classifier for class \(k\) given instance \(x\),
34
+ - :math:`p_k` be the unknown prior probabilities for class \(k\) in the test set,
35
+ - \( x_1, \dots, x_N \) be unlabeled test set instances.
36
+
37
+ The likelihood of the observed data is:
38
+
39
+ .. math::
40
+
41
+ L = \prod_{i=1}^N \sum_{k=1}^K s_k(x_i) \frac{p_k}{p_k^t}
42
+
43
+ Methods in this class seek a solution that maximizes this likelihood via iterative methods.
44
+
45
+ Notes
46
+ -----
47
+ - Applicable to binary and multiclass problems as long as the classifier provides calibrated posterior probabilities.
48
+ - Assumes changes only in prior probabilities (prior probability shift).
49
+ - Algorithms converge to local maxima of the likelihood function.
50
+ - Includes methods such as Class Distribution Estimation (CDE), Maximum Likelihood Prevalence Estimation (MLPE), and Expectation-Maximization (EM) based quantification.
51
+
52
+ Parameters
53
+ ----------
54
+ learner : estimator, optional
55
+ Probabilistic classifier implementing the methods `fit(X, y)` and `predict_proba(X)`.
56
+ tol : float, default=1e-4
57
+ Convergence tolerance for prevalence update criteria.
58
+ max_iter : int, default=100
59
+ Maximum allowed number of iterations.
60
+
61
+ Attributes
62
+ ----------
63
+ learner : estimator
64
+ Underlying classification model.
65
+ tol : float
66
+ Tolerance for stopping criterion.
67
+ max_iter : int
68
+ Maximum number of iterations.
69
+ classes : ndarray of shape (n_classes,)
70
+ Unique classes observed during training.
71
+ priors : ndarray of shape (n_classes,)
72
+ Class distribution in the training set.
73
+ y_train : array-like
74
+ Training labels used to estimate priors.
75
+
76
+ Examples
77
+ --------
78
+ >>> import numpy as np
79
+ >>> from sklearn.linear_model import LogisticRegression
80
+ >>> class MyQuantifier(BaseIterativeLikelihood):
81
+ ... def _iterate(self, predictions, priors):
82
+ ... # Implementation of iterative update logic
83
+ ... pass
84
+ >>> X = np.random.randn(200, 8)
85
+ >>> y = np.random.randint(0, 3, size=(200,))
86
+ >>> q = MyQuantifier(learner=LogisticRegression(max_iter=200))
87
+ >>> q.fit(X, y)
88
+ >>> q.predict(X)
89
+ {0: 0.32, 1: 0.40, 2: 0.28}
90
+
91
+ References
92
+ ----------
93
+ .. [1] Saerens, M., Latinne, P., & Decaestecker, C. (2002). "Adjusting the Outputs of a Classifier to New a Priori Probabilities: A Simple Procedure." Neural Computation, 14(1), 2141-2156.
94
+
95
+ .. [2] Esuli, A., Moreo, A., & Sebastiani, F. (2023). "Learning to Quantify." The Information Retrieval Series 47, Springer. https://doi.org/10.1007/978-3-031-20467-8
96
+ """
97
+
98
+ @abstractmethod
99
+ def __init__(self,
100
+ learner=None,
101
+ tol=1e-4,
102
+ max_iter=100):
103
+ self.learner = learner
104
+ self.tol = tol
105
+ self.max_iter = max_iter
106
+
107
+ def __mlquantify_tags__(self):
108
+ tags = super().__mlquantify_tags__()
109
+ tags.prediction_requirements.requires_train_proba = False
110
+ return tags
111
+
112
+
113
+ @_fit_context(prefer_skip_nested_validation=True)
114
+ def fit(self, X, y):
115
+ """Fit the quantifier using the provided data and learner."""
116
+ X, y = validate_data(self, X, y)
117
+ validate_y(self, y)
118
+ self.classes_ = np.unique(y)
119
+ self.learner.fit(X, y)
120
+ counts = np.array([np.count_nonzero(y == _class) for _class in self.classes_])
121
+ self.priors = counts / len(y)
122
+ self.y_train = y
123
+
124
+ return self
125
+
126
+ def predict(self, X):
127
+ """Predict class prevalences for the given data."""
128
+ estimator_function = _get_learner_function(self)
129
+ predictions = getattr(self.learner, estimator_function)(X)
130
+ prevalences = self.aggregate(predictions, self.y_train)
131
+ return prevalences
132
+
133
+ def aggregate(self, predictions, y_train):
134
+ predictions = validate_predictions(self, predictions)
135
+ self.classes_ = check_classes_attribute(self, np.unique(y_train))
136
+
137
+ if not hasattr(self, 'priors') or len(self.priors) != len(self.classes_):
138
+ counts = np.array([np.count_nonzero(y_train == _class) for _class in self.classes_])
139
+ self.priors = counts / len(y_train)
140
+
141
+ prevalences = self._iterate(predictions, self.priors)
142
+ prevalences = validate_prevalences(self, prevalences, self.classes_)
143
+ return prevalences
144
+
145
+ @abstractmethod
146
+ def _iterate(self, predictions, priors):
147
+ ...