mlquantify 0.1.23__tar.gz → 0.1.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {mlquantify-0.1.23/mlquantify.egg-info → mlquantify-0.1.25}/PKG-INFO +1 -1
  2. mlquantify-0.1.25/VERSION.txt +1 -0
  3. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/adjust_counting/_adjustment.py +71 -19
  4. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/adjust_counting/_base.py +32 -6
  5. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/adjust_counting/_counting.py +60 -19
  6. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/confidence.py +22 -19
  7. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/likelihood/_classes.py +40 -23
  8. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/utils/_validation.py +5 -11
  9. {mlquantify-0.1.23 → mlquantify-0.1.25/mlquantify.egg-info}/PKG-INFO +1 -1
  10. mlquantify-0.1.23/VERSION.txt +0 -1
  11. {mlquantify-0.1.23 → mlquantify-0.1.25}/LICENSE +0 -0
  12. {mlquantify-0.1.23 → mlquantify-0.1.25}/MANIFEST.in +0 -0
  13. {mlquantify-0.1.23 → mlquantify-0.1.25}/README.md +0 -0
  14. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/__init__.py +0 -0
  15. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/adjust_counting/__init__.py +0 -0
  16. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/adjust_counting/_utils.py +0 -0
  17. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/base.py +0 -0
  18. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/base_aggregative.py +0 -0
  19. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/calibration.py +0 -0
  20. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/likelihood/__init__.py +0 -0
  21. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/meta/__init__.py +0 -0
  22. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/meta/_classes.py +0 -0
  23. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/metrics/__init__.py +0 -0
  24. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/metrics/_oq.py +0 -0
  25. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/metrics/_rq.py +0 -0
  26. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/metrics/_slq.py +0 -0
  27. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/mixture/__init__.py +0 -0
  28. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/mixture/_base.py +0 -0
  29. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/mixture/_classes.py +0 -0
  30. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/mixture/_utils.py +0 -0
  31. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/model_selection/__init__.py +0 -0
  32. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/model_selection/_protocol.py +0 -0
  33. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/model_selection/_search.py +0 -0
  34. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/model_selection/_split.py +0 -0
  35. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/multiclass.py +0 -0
  36. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/neighbors/__init__.py +0 -0
  37. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/neighbors/_base.py +0 -0
  38. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/neighbors/_classes.py +0 -0
  39. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/neighbors/_classification.py +0 -0
  40. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/neighbors/_kde.py +0 -0
  41. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/neighbors/_utils.py +0 -0
  42. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/neural/__init__.py +0 -0
  43. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/neural/_base.py +0 -0
  44. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/neural/_classes.py +0 -0
  45. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/neural/_perm_invariant.py +0 -0
  46. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/neural/_utils.py +0 -0
  47. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/utils/__init__.py +0 -0
  48. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/utils/_artificial.py +0 -0
  49. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/utils/_constraints.py +0 -0
  50. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/utils/_context.py +0 -0
  51. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/utils/_decorators.py +0 -0
  52. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/utils/_exceptions.py +0 -0
  53. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/utils/_get_scores.py +0 -0
  54. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/utils/_load.py +0 -0
  55. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/utils/_parallel.py +0 -0
  56. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/utils/_random.py +0 -0
  57. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/utils/_sampling.py +0 -0
  58. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/utils/_tags.py +0 -0
  59. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify/utils/prevalence.py +0 -0
  60. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify.egg-info/SOURCES.txt +0 -0
  61. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify.egg-info/dependency_links.txt +0 -0
  62. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify.egg-info/requires.txt +0 -0
  63. {mlquantify-0.1.23 → mlquantify-0.1.25}/mlquantify.egg-info/top_level.txt +0 -0
  64. {mlquantify-0.1.23 → mlquantify-0.1.25}/setup.cfg +0 -0
  65. {mlquantify-0.1.23 → mlquantify-0.1.25}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mlquantify
3
- Version: 0.1.23
3
+ Version: 0.1.25
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -0,0 +1 @@
1
+ 0.1.25
@@ -208,24 +208,23 @@ class MatrixAdjustment(BaseAdjustCount):
208
208
  pp. 1-8.
209
209
  """
210
210
 
211
-
212
211
  _parameter_constraints = {"solver": Options(["optim", "linear"])}
213
212
 
214
213
  def __init__(self, learner=None, solver=None):
215
214
  super().__init__(learner=learner)
216
215
  self.solver = solver
217
216
 
218
- def _adjust(self, predictions, train_y_pred, y_train):
217
+ def _adjust(self, predictions, train_predictions, y_train):
219
218
  n_class = len(np.unique(y_train))
220
219
  self.CM = np.zeros((n_class, n_class))
221
-
222
220
  if self.solver == 'optim':
223
- priors = np.array(list(CC().aggregate(train_y_pred, y_train).values()))
224
- self.CM = self._compute_confusion_matrix(train_y_pred, y_train, priors)
221
+ class_counts = np.array([np.count_nonzero(y_train == _class) for _class in self.classes_])
222
+ priors = class_counts / len(y_train)
223
+ self.CM = self._compute_confusion_matrix(train_predictions, y_train, priors)
225
224
  prevs_estim = self._get_estimations(predictions > priors, y_train)
226
225
  prevalence = self._solve_optimization(prevs_estim, priors)
227
226
  else:
228
- self.CM = self._compute_confusion_matrix(train_y_pred, y_train)
227
+ self.CM = self._compute_confusion_matrix(train_predictions, y_train)
229
228
  prevs_estim = self._get_estimations(predictions, y_train)
230
229
  prevalence = self._solve_linear(prevs_estim)
231
230
 
@@ -244,26 +243,31 @@ class MatrixAdjustment(BaseAdjustCount):
244
243
  return adjusted
245
244
 
246
245
  def _solve_optimization(self, prevs_estim, priors):
247
- r"""Solve the system linearly.
246
+ r"""Solve the system using constrained optimization.
248
247
 
249
- The solution is obtained by matrix inversion:
248
+ The solution is obtained by minimizing the discrepancy:
250
249
 
251
- .. math::
250
+ || C @ \hat{\pi}_F - p ||
252
251
 
253
- \hat{\pi}_F = \mathbf{C}^{-1} \mathbf{p}
252
+ subject to the constraints:
254
253
 
255
- where :math:`\mathbf{C}` is the confusion matrix and :math:`\mathbf{p}`
256
- is the observed prevalence vector.
254
+ \hat{\pi}_F \ge 0, sum_k \hat{\pi}_{F,k} = 1
255
+
256
+ where:
257
+ - C is the confusion matrix,
258
+ - p is the observed prevalence vector from the test set.
257
259
 
258
260
  Parameters
259
261
  ----------
260
- p : ndarray of shape (n_classes,)
261
- Observed prevalence vector from test set.
262
+ prevs_estim : ndarray of shape (n_classes,)
263
+ Observed prevalence vector from the test set.
264
+ priors : ndarray of shape (n_classes,)
265
+ Fallback class prior vector used if optimization fails.
262
266
 
263
267
  Returns
264
268
  -------
265
269
  ndarray of shape (n_classes,)
266
- Adjusted prevalence estimates :math:`\hat{\pi}_F`.
270
+ Adjusted prevalence estimates \hat{\pi}_F.
267
271
  """
268
272
  def objective(prevs_pred):
269
273
  return np.linalg.norm(self.CM @ prevs_pred - prevs_estim)
@@ -294,19 +298,27 @@ class CDE(SoftLearnerQMixin, AggregationMixin, BaseQuantifier):
294
298
  r"""CDE-Iterate for binary classification prevalence estimation.
295
299
 
296
300
  Threshold :math:`\tau` from false positive and false negative costs:
301
+
297
302
  .. math::
303
+
298
304
  \tau = \frac{c_{FP}}{c_{FP} + c_{FN}}
299
305
 
300
306
  Hard classification by thresholding posterior probability :math:`p(+|x)` at :math:`\tau`:
307
+
301
308
  .. math::
309
+
302
310
  \hat{y}(x) = \mathbf{1}_{p(+|x) > \tau}
303
311
 
304
312
  Prevalence estimation via classify-and-count:
313
+
305
314
  .. math::
315
+
306
316
  \hat{p}_U(+) = \frac{1}{N} \sum_{n=1}^N \hat{y}(x_n)
307
317
 
308
318
  False positive cost update:
319
+
309
320
  .. math::
321
+
310
322
  c_{FP}^{new} = \frac{p_L(+)}{p_L(-)} \times \frac{\hat{p}_U(-)}{\hat{p}_U(+)} \times c_{FN}
311
323
 
312
324
  Parameters
@@ -365,6 +377,31 @@ class CDE(SoftLearnerQMixin, AggregationMixin, BaseQuantifier):
365
377
 
366
378
 
367
379
  def aggregate(self, predictions, y_train):
380
+ """Aggregate predictions and apply matrix- or rate-based bias correction.
381
+
382
+ Parameters
383
+ ----------
384
+ predictions : ndarray of shape (n_samples, n_classes)
385
+ Learner predictions on test data. Can be probabilities (n_samples, n_classes) or class labels (n_samples,).
386
+ y_train : ndarray of shape (n_samples,)
387
+ True class labels of the training data.
388
+
389
+ Returns
390
+ -------
391
+ ndarray of shape (n_classes,)
392
+ Class prevalence estimates.
393
+
394
+ Examples
395
+ --------
396
+ >>> from mlquantify.adjust_counting import CDE
397
+ >>> import numpy as np
398
+ >>> q = CDE()
399
+ >>> predictions = np.random.rand(200)
400
+ >>> train_predictions = np.random.rand(200) # generated via cross-validation
401
+ >>> y_train = np.random.randint(0, 2, 200)
402
+ >>> q.aggregate(predictions, train_predictions, y_train)
403
+ {0: 0.51, 1: 0.49}
404
+ """
368
405
 
369
406
  self.classes_ = check_classes_attribute(self, np.unique(y_train))
370
407
  predictions = validate_predictions(self, predictions)
@@ -465,7 +502,7 @@ class FM(SoftLearnerQMixin, MatrixAdjustment):
465
502
  \min_{\hat{\pi}_F} \| \mathbf{C} \hat{\pi}_F - \mathbf{p} \|^2
466
503
 
467
504
  subject to constraints:
468
-
505
+
469
506
  .. math::
470
507
 
471
508
  \hat{\pi}_F \geq 0, \quad \sum_k \hat{\pi}_{F,k} = 1
@@ -478,7 +515,7 @@ class FM(SoftLearnerQMixin, MatrixAdjustment):
478
515
  ----------
479
516
  learner : estimator, optional
480
517
  Base classifier with `fit` and `predict_proba` methods.
481
- If None, a default estimator will be used.
518
+ If None, it is expected that user will use the `aggregate` method directly.
482
519
 
483
520
  Attributes
484
521
  ----------
@@ -505,13 +542,28 @@ class FM(SoftLearnerQMixin, MatrixAdjustment):
505
542
  def __init__(self, learner=None):
506
543
  super().__init__(learner=learner, solver='optim')
507
544
 
508
- def _compute_confusion_matrix(self, posteriors, y_true, priors):
545
+ def _compute_confusion_matrix(self, predictions, y_true, priors):
546
+ n_classes = len(self.classes_)
547
+ self.CM = np.zeros((n_classes, n_classes))
548
+
509
549
  for i, _class in enumerate(self.classes_):
510
550
  indices = (y_true == _class)
511
- self.CM[:, i] = self._get_estimations(posteriors[indices] > priors, y_true[indices])
551
+ preds_sub = predictions[indices]
552
+
553
+ mask = preds_sub > priors # (n_i, n_classes)
554
+ masked = np.where(mask, preds_sub, -np.inf)
555
+ best_classes = np.argmax(masked, axis=1)
556
+
557
+ hard_preds = np.zeros_like(preds_sub, dtype=bool)
558
+ rows = np.arange(preds_sub.shape[0])
559
+ hard_preds[rows, best_classes] = True
560
+
561
+ self.CM[:, i] = self._get_estimations(hard_preds, y_true[indices])
562
+
512
563
  return self.CM
513
564
 
514
565
 
566
+
515
567
  class AC(CrispLearnerQMixin, MatrixAdjustment):
516
568
  r"""Adjusted Count method.
517
569
 
@@ -114,7 +114,6 @@ class BaseCount(AggregationMixin, BaseQuantifier):
114
114
 
115
115
  @abstractmethod
116
116
  def aggregate(self, predictions):
117
- """Aggregate predictions into class prevalence estimates."""
118
117
  ...
119
118
 
120
119
 
@@ -156,7 +155,7 @@ class BaseAdjustCount(AggregationMixin, BaseQuantifier):
156
155
  Parameters
157
156
  ----------
158
157
  learner : object, optional
159
- Supervised learner implementing `fit`, `predict`, or `predict_proba`.
158
+ Supervised learner implementing `fit` and (`predict` or `predict_proba`) depending on the quantifier.
160
159
 
161
160
  Attributes
162
161
  ----------
@@ -164,7 +163,7 @@ class BaseAdjustCount(AggregationMixin, BaseQuantifier):
164
163
  Underlying classification model.
165
164
  train_predictions : ndarray of shape (n_samples_train, n_classes)
166
165
  Predictions on training data from cross-validation.
167
- train_y_values : ndarray of shape (n_samples_train,)
166
+ y_train : ndarray of shape (n_samples_train,)
168
167
  True labels corresponding to training predictions.
169
168
  classes : ndarray of shape (n_classes,)
170
169
  Unique class labels.
@@ -225,17 +224,44 @@ class BaseAdjustCount(AggregationMixin, BaseQuantifier):
225
224
  )
226
225
 
227
226
  self.train_predictions = train_predictions
228
- self.train_y_values = y_train_labels
227
+ self.y_train = y_train_labels
229
228
  return self
230
229
 
231
230
  def predict(self, X):
232
231
  """Predict class prevalences for the given data."""
232
+ X = validate_data(self, X)
233
233
  predictions = getattr(self.learner, _get_learner_function(self))(X)
234
- prevalences = self.aggregate(predictions, self.train_predictions, self.train_y_values)
234
+ prevalences = self.aggregate(predictions, self.train_predictions, self.y_train)
235
235
  return prevalences
236
236
 
237
237
  def aggregate(self, predictions, train_predictions, y_train):
238
- """Aggregate predictions and apply matrix- or rate-based bias correction."""
238
+ """Aggregate predictions and apply matrix- or rate-based bias correction.
239
+
240
+ Parameters
241
+ ----------
242
+ predictions : ndarray of shape (n_samples, n_classes)
243
+ Learner predictions on test data. Can be probabilities (n_samples, n_classes) or class labels (n_samples,).
244
+ train_predictions : ndarray of shape (n_samples, n_classes)
245
+ Learner predictions on training data. Can be probabilities (n_samples, n_classes) or class labels (n_samples,).
246
+ y_train : ndarray of shape (n_samples,)
247
+ True class labels of the training data.
248
+
249
+ Returns
250
+ -------
251
+ ndarray of shape (n_classes,)
252
+ Class prevalence estimates.
253
+
254
+ Examples
255
+ --------
256
+ >>> from mlquantify.adjust_counting import AC
257
+ >>> import numpy as np
258
+ >>> q = AC()
259
+ >>> predictions = np.random.rand(200)
260
+ >>> train_predictions = np.random.rand(200) # generated via cross-validation
261
+ >>> y_train = np.random.randint(0, 2, 200)
262
+ >>> q.aggregate(predictions, train_predictions, y_train)
263
+ {0: 0.51, 1: 0.49}
264
+ """
239
265
  self.classes_ = check_classes_attribute(self, np.unique(y_train))
240
266
 
241
267
  predictions = validate_predictions(self, predictions)
@@ -15,7 +15,7 @@ class CC(CrispLearnerQMixin, BaseCount):
15
15
  r"""Classify and Count (CC) quantifier.
16
16
 
17
17
  Implements the Classify and Count method for quantification, describe as a
18
- baseline approach in the literature [1][2].
18
+ baseline approach in the literature [1]_, [2]_.
19
19
 
20
20
  Parameters
21
21
  ----------
@@ -51,17 +51,15 @@ class CC(CrispLearnerQMixin, BaseCount):
51
51
  >>> q.fit(X, y)
52
52
  >>> q.predict(X)
53
53
  {0: 0.47, 1: 0.53}
54
- >>> q2 = CC()
55
- >>> predictions = np.random.rand(200)
56
- >>> q2.aggregate(predictions)
57
- {0: 0.51, 1: 0.49}
58
54
 
59
55
  References
60
56
  ----------
61
- .. [1] Forman, G. (2005). "Counting Positives Accurately Despite Inaccurate Classification",
62
- *ECML*, pp. 564-575.
63
- .. [2] Forman, G. (2008). "Quantifying Counts and Costs via Classification",
64
- *Data Mining and Knowledge Discovery*, 17(2), 164-206.
57
+ .. dropdown:: References
58
+
59
+ .. [1] Forman, G. (2005). "Counting Positives Accurately Despite Inaccurate Classification",
60
+ *ECML*, pp. 564-575.
61
+ .. [2] Forman, G. (2008). "Quantifying Counts and Costs via Classification",
62
+ *Data Mining and Knowledge Discovery*, 17(2), 164-206.
65
63
  """
66
64
 
67
65
  _parameters_constraints = {
@@ -76,6 +74,29 @@ class CC(CrispLearnerQMixin, BaseCount):
76
74
  self.threshold = threshold
77
75
 
78
76
  def aggregate(self, predictions, y_train=None):
77
+ """Aggregate predictions into class prevalence estimates.
78
+
79
+ Parameters
80
+ ----------
81
+ predictions : ndarray of shape (n_samples, n_classes)
82
+ Learner predictions on test data. Can be probabilities (n_samples, n_classes) or class labels (n_samples,).
83
+ y_train : ndarray of shape (n_samples,)
84
+ True class labels of the training data. None by default.
85
+
86
+ Returns
87
+ -------
88
+ ndarray of shape (n_classes,)
89
+ Class prevalence estimates.
90
+
91
+ Examples
92
+ --------
93
+ >>> from mlquantify.adjust_counting import CC
94
+ >>> import numpy as np
95
+ >>> q = CC()
96
+ >>> predictions = np.random.rand(200)
97
+ >>> q.aggregate(predictions)
98
+ {0: 0.51, 1: 0.49}
99
+ """
79
100
  predictions = validate_predictions(self, predictions, self.threshold, y_train)
80
101
 
81
102
  if y_train is None:
@@ -92,12 +113,8 @@ class CC(CrispLearnerQMixin, BaseCount):
92
113
  class PCC(SoftLearnerQMixin, BaseCount):
93
114
  r"""Probabilistic Classify and Count (PCC) quantifier.
94
115
 
95
- Implements the Probabilistic Classify and Count method for quantification as described in:
96
- [1] Forman, G. (2005). *Counting Positives Accurately Despite Inaccurate Classification.*
97
- ECML, pp. 564-575.
98
- [2] Forman, G. (2008). *Quantifying Counts and Costs via Classification.*
99
- Data Mining and Knowledge Discovery, 17(2), 164-206.
100
-
116
+ Implements the Probabilistic Classify and Count method for quantification as described in [1]_, [2]_:
117
+
101
118
 
102
119
  Parameters
103
120
  ----------
@@ -112,6 +129,11 @@ class PCC(SoftLearnerQMixin, BaseCount):
112
129
  Underlying classification model.
113
130
  classes : ndarray of shape (n_classes,)
114
131
  Unique class labels observed during training.
132
+
133
+ .. dropdown:: References
134
+
135
+ .. [1] Forman, G. (2005). *Counting Positives Accurately Despite Inaccurate Classification.* ECML, pp. 564-575.
136
+ .. [2] Forman, G. (2008). *Quantifying Counts and Costs via Classification.* Data Mining and Knowledge Discovery, 17(2), 164-206.
115
137
 
116
138
 
117
139
  Examples
@@ -125,16 +147,35 @@ class PCC(SoftLearnerQMixin, BaseCount):
125
147
  >>> q.fit(X, y)
126
148
  >>> q.predict(X)
127
149
  {0: 0.48, 1: 0.52}
128
- >>> q2 = PCC()
129
- >>> predictions = np.random.rand(200, 2)
130
- >>> q2.aggregate(predictions)
131
- {0: 0.50, 1: 0.50}
132
150
  """
133
151
 
134
152
  def __init__(self, learner=None):
135
153
  super().__init__(learner=learner)
136
154
 
137
155
  def aggregate(self, predictions, y_train=None):
156
+ """Aggregate predictions into class prevalence estimates.
157
+
158
+ Parameters
159
+ ----------
160
+ predictions : ndarray of shape (n_samples, n_classes)
161
+ Learner predictions on test data. Can be probabilities (n_samples, n_classes) or class labels (n_samples,).
162
+ y_train : ndarray of shape (n_samples,)
163
+ True class labels of the training data. None by default.
164
+
165
+ Returns
166
+ -------
167
+ ndarray of shape (n_classes,)
168
+ Class prevalence estimates.
169
+
170
+ Examples
171
+ --------
172
+ >>> from mlquantify.adjust_counting import PCC
173
+ >>> import numpy as np
174
+ >>> q = PCC()
175
+ >>> predictions = np.random.rand(200, 2)
176
+ >>> q.aggregate(predictions)
177
+ {0: 0.50, 1: 0.50}
178
+ """
138
179
  predictions = validate_predictions(self, predictions)
139
180
 
140
181
  # Handle categorical predictions (1D array with class labels)
@@ -96,14 +96,15 @@ class ConfidenceInterval(BaseConfidenceRegion):
96
96
  The confidence region is defined as:
97
97
 
98
98
  .. math::
99
- CI_α(π) =
100
- \\begin{cases}
101
- 1 & \\text{if } L_i \\le π_i \\le U_i, \\forall i=1,...,n \\\\
102
- 0 & \\text{otherwise}
103
- \\end{cases}
104
99
 
105
- where :math:`L_i` and :math:`U_i` are the empirical
106
- α/2 and 1−α/2 quantiles for class i.
100
+ CI_{\alpha}(\pi) =
101
+ \begin{cases}
102
+ 1 & \text{if } L_i \le \pi_i \le U_i, \forall i=1,\dots,n \\
103
+ 0 & \text{otherwise}
104
+ \end{cases}
105
+
106
+ where :math:`L_i` and :math:`U_i` are the empirical
107
+ :math:`\alpha/2` and :math:`1-\alpha/2` quantiles for class :math:`i`.
107
108
 
108
109
  Parameters
109
110
  ----------
@@ -163,11 +164,12 @@ class ConfidenceEllipseSimplex(BaseConfidenceRegion):
163
164
  Defines a multivariate confidence region based on a chi-squared threshold:
164
165
 
165
166
  .. math::
166
- CE_α(π) =
167
- \\begin{cases}
168
- 1 & \\text{if } (π - μ)^T Σ^{-1} (π - μ) \\le χ^2_{n-1}(1-α) \\\\
169
- 0 & \\text{otherwise}
170
- \\end{cases}
167
+
168
+ CE_{\alpha}(\pi) =
169
+ begin{cases}
170
+ 1 & \text{if} (\pi - \mu)^T \Sigma^{-1} (\pi - \mu) \le \chi^2_{n-1}(1-\alpha) \\
171
+ 0 & \text{otherwise}
172
+ end{cases}
171
173
 
172
174
  Parameters
173
175
  ----------
@@ -237,17 +239,18 @@ class ConfidenceEllipseCLR(ConfidenceEllipseSimplex):
237
239
  Applies the Centered Log-Ratio (CLR) transformation:
238
240
 
239
241
  .. math::
240
- T(π) = [\log(π_1/g(π)), ..., \log(π_n/g(π))], \\
241
- g(π) = (\prod_i π_i)^{1/n}
242
+
243
+ T(\pi) = [\log(\pi_1/g(\pi)), ..., \log(\pi_n/g(\pi))], g(\pi) = (\prod_i \pi_i)^{1/n}
242
244
 
243
245
  A confidence ellipse is then built in the transformed space:
244
246
 
245
247
  .. math::
246
- CT_α(π) =
247
- \\begin{cases}
248
- 1 & \\text{if } (T(π) - μ_{CLR})^T Σ^{-1} (T(π) - μ_{CLR}) \\le χ^2_{n-1}(1-α) \\\\
249
- 0 & \\text{otherwise}
250
- \\end{cases}
248
+
249
+ CT_\alpha(\pi) =
250
+ \begin{cases}
251
+ 1 & \text{if } (T(\pi) - \mu_{CLR})^T \Sigma^{-1} (T(\pi) - \mu_{CLR}) \le \chi^2_{n-1}(1-\alpha) \\
252
+ 0 & \text{otherwise}
253
+ \end{cases}
251
254
 
252
255
  Parameters
253
256
  ----------
@@ -16,36 +16,53 @@ class EMQ(SoftLearnerQMixin, AggregationMixin, BaseQuantifier):
16
16
  Estimates class prevalences under prior probability shift by alternating
17
17
  between expectation **(E)** and maximization **(M)** steps on posterior probabilities.
18
18
 
19
- E-step:
20
- .. math::
21
- p_i^{(s+1)}(x) = \frac{q_i^{(s)} p_i(x)}{\sum_j q_j^{(s)} p_j(x)}
19
+ .. dropdown:: Mathematical Formulation
22
20
 
23
- M-step:
24
- .. math::
25
- q_i^{(s+1)} = \frac{1}{N} \sum_{n=1}^N p_i^{(s+1)}(x_n)
21
+ E-step:
26
22
 
27
- where
28
- - :math:`p_i(x)` are posterior probabilities predicted by the classifier
29
- - :math:`q_i^{(s)}` are class prevalence estimates at iteration :math:`s`
30
- - :math:`N` is the number of test instances.
23
+ .. math::
31
24
 
32
- Calibrations supported on posterior probabilities before **EM** iteration:
25
+ p_i^{(s+1)}(x) = \frac{q_i^{(s)} p_i(x)}{\sum_j q_j^{(s)} p_j(x)}
33
26
 
34
- Temperature Scaling (TS):
35
- .. math::
36
- \hat{p} = \text{softmax}\left(\frac{\log(p)}{T}\right)
27
+ M-step:
37
28
 
38
- Bias-Corrected Temperature Scaling (BCTS):
39
- .. math::
40
- \hat{p} = \text{softmax}\left(\frac{\log(p)}{T} + b\right)
29
+ .. math::
41
30
 
42
- Vector Scaling (VS):
43
- .. math::
44
- \hat{p}_i = \text{softmax}(W_i \cdot \log(p_i) + b_i)
31
+ q_i^{(s+1)} = \frac{1}{N} \sum_{n=1}^N p_i^{(s+1)}(x_n)
45
32
 
46
- No-Bias Vector Scaling (NBVS):
47
- .. math::
48
- \hat{p}_i = \text{softmax}(W_i \cdot \log(p_i))
33
+ where:
34
+
35
+ - :math:`p_i(x)` are posterior probabilities predicted by the classifier
36
+
37
+ - :math:`q_i^{(s)}` are class prevalence estimates at iteration :math:`s`
38
+
39
+ - :math:`N` is the number of test instances.
40
+
41
+ Calibrations supported on posterior probabilities before **EM** iteration:
42
+
43
+ Temperature Scaling (TS):
44
+
45
+ .. math::
46
+
47
+ \hat{p} = \text{softmax}\left(\frac{\log(p)}{T}\right)
48
+
49
+ Bias-Corrected Temperature Scaling (BCTS):
50
+
51
+ .. math::
52
+
53
+ \hat{p} = \text{softmax}\left(\frac{\log(p)}{T} + b\right)
54
+
55
+ Vector Scaling (VS):
56
+
57
+ .. math::
58
+
59
+ \hat{p}_i = \text{softmax}(W_i \cdot \log(p_i) + b_i)
60
+
61
+ No-Bias Vector Scaling (NBVS):
62
+
63
+ .. math::
64
+
65
+ \hat{p}_i = \text{softmax}(W_i \cdot \log(p_i))
49
66
 
50
67
  Parameters
51
68
  ----------
@@ -94,12 +94,12 @@ def validate_y(quantifier: Any, y: np.ndarray) -> None:
94
94
  f"Predictions must be 1D or 2D array, got array with ndim={y.ndim} and shape={y.shape}."
95
95
  )
96
96
 
97
- def _get_valid_crisp_predictions(predictions, train_y_values=None, threshold=0.5):
97
+ def _get_valid_crisp_predictions(predictions, y_train=None, threshold=0.5):
98
98
  predictions = np.asarray(predictions)
99
99
  dimensions = predictions.ndim
100
100
 
101
- if train_y_values is not None:
102
- classes = np.unique(train_y_values)
101
+ if y_train is not None:
102
+ classes = np.unique(y_train)
103
103
  else:
104
104
  classes = None
105
105
 
@@ -138,7 +138,7 @@ def _get_valid_crisp_predictions(predictions, train_y_values=None, threshold=0.5
138
138
  return predictions
139
139
 
140
140
 
141
- def validate_predictions(quantifier: Any, predictions: np.ndarray, threshold: float = 0.5, train_y_values=None) -> np.ndarray:
141
+ def validate_predictions(quantifier: Any, predictions: np.ndarray, threshold: float = 0.5, y_train=None) -> np.ndarray:
142
142
  """
143
143
  Validate predictions using the quantifier's declared output tags.
144
144
  Raises InputValidationError if inconsistent with tags.
@@ -158,7 +158,7 @@ def validate_predictions(quantifier: Any, predictions: np.ndarray, threshold: fl
158
158
  f"Soft predictions for {quantifier.__class__.__name__} must be float, got dtype {predictions.dtype}."
159
159
  )
160
160
  elif estimator_type == "crisp" and np.issubdtype(predictions.dtype, np.floating):
161
- predictions = _get_valid_crisp_predictions(predictions, train_y_values, threshold)
161
+ predictions = _get_valid_crisp_predictions(predictions, y_train, threshold)
162
162
  return predictions
163
163
 
164
164
 
@@ -379,12 +379,6 @@ def validate_prevalences(quantifier, prevalences: np.ndarray | list | dict, clas
379
379
  f"prevalences must be a numpy array, list, or dict, got {type(prevalences).__name__}."
380
380
  )
381
381
 
382
- # Validate all classes are present
383
- if set(prev_dict.keys()) != set(classes):
384
- raise InputValidationError(
385
- f"prevalences keys must match classes. Got keys {set(prev_dict.keys())}, expected {set(classes)}."
386
- )
387
-
388
382
  # Normalize if requested
389
383
  if normalize:
390
384
  total = sum(prev_dict.values())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mlquantify
3
- Version: 0.1.23
3
+ Version: 0.1.25
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -1 +0,0 @@
1
- 0.1.23
File without changes
File without changes
File without changes
File without changes
File without changes