mlquantify 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlquantify/__init__.py +11 -1
- mlquantify/adjust_counting/__init__.py +11 -1
- mlquantify/adjust_counting/_adjustment.py +370 -87
- mlquantify/adjust_counting/_base.py +1 -3
- mlquantify/adjust_counting/_counting.py +27 -19
- mlquantify/adjust_counting/_utils.py +23 -28
- mlquantify/confidence.py +16 -22
- mlquantify/likelihood/_base.py +38 -52
- mlquantify/likelihood/_classes.py +88 -72
- mlquantify/meta/_classes.py +86 -62
- mlquantify/metrics/_oq.py +2 -2
- mlquantify/metrics/_rq.py +2 -2
- mlquantify/metrics/_slq.py +9 -9
- mlquantify/mixture/_base.py +13 -19
- mlquantify/mixture/_classes.py +68 -10
- mlquantify/mixture/_utils.py +62 -11
- mlquantify/model_selection/_protocol.py +6 -6
- mlquantify/model_selection/_search.py +1 -1
- mlquantify/neighbors/_base.py +35 -65
- mlquantify/neighbors/_classes.py +1 -10
- mlquantify/neighbors/_classification.py +5 -12
- mlquantify/neighbors/_kde.py +7 -9
- mlquantify/neighbors/_utils.py +17 -21
- mlquantify/utils/_validation.py +3 -3
- mlquantify/utils/prevalence.py +4 -1
- {mlquantify-0.1.9.dist-info → mlquantify-0.1.11.dist-info}/METADATA +10 -18
- mlquantify-0.1.11.dist-info/RECORD +53 -0
- mlquantify-0.1.9.dist-info/RECORD +0 -53
- {mlquantify-0.1.9.dist-info → mlquantify-0.1.11.dist-info}/WHEEL +0 -0
- {mlquantify-0.1.9.dist-info → mlquantify-0.1.11.dist-info}/top_level.txt +0 -0
|
@@ -12,33 +12,34 @@ from mlquantify.utils._constraints import Interval
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class CC(CrispLearnerQMixin, BaseCount):
|
|
15
|
-
"""Classify and Count (CC) quantifier.
|
|
16
|
-
|
|
17
|
-
Implements the Classify and Count method for quantification as
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
[2] Forman, G. (2008). *Quantifying Counts and Costs via Classification.*
|
|
21
|
-
Data Mining and Knowledge Discovery, 17(2), 164-206.
|
|
22
|
-
|
|
23
|
-
|
|
15
|
+
r"""Classify and Count (CC) quantifier.
|
|
16
|
+
|
|
17
|
+
Implements the Classify and Count method for quantification, describe as a
|
|
18
|
+
baseline approach in the literature [1][2].
|
|
19
|
+
|
|
24
20
|
Parameters
|
|
25
21
|
----------
|
|
26
22
|
learner : estimator, optional
|
|
27
|
-
A supervised learning estimator with fit and predict methods.
|
|
28
|
-
If None, it is expected that
|
|
23
|
+
A supervised learning estimator with `fit` and `predict` methods.
|
|
24
|
+
If None, it is expected that the aggregate method is used directly.
|
|
29
25
|
threshold : float, default=0.5
|
|
30
26
|
Decision threshold for converting predicted probabilities into class labels.
|
|
31
27
|
Must be in the interval [0.0, 1.0].
|
|
32
|
-
|
|
33
|
-
|
|
28
|
+
|
|
34
29
|
Attributes
|
|
35
30
|
----------
|
|
36
31
|
learner : estimator
|
|
37
32
|
Underlying classification model.
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
33
|
+
|
|
34
|
+
Notes
|
|
35
|
+
-----
|
|
36
|
+
The Classify and Count approach performs quantification by classifying each instance
|
|
37
|
+
using the classifier's predicted labels at a given threshold, then counting the
|
|
38
|
+
prevalence of each class.
|
|
39
|
+
|
|
40
|
+
This method can be biased when class distributions differ between training and test sets,
|
|
41
|
+
motivating further adjustment methods.
|
|
42
|
+
|
|
42
43
|
Examples
|
|
43
44
|
--------
|
|
44
45
|
>>> from mlquantify.adjust_counting import CC
|
|
@@ -50,10 +51,17 @@ class CC(CrispLearnerQMixin, BaseCount):
|
|
|
50
51
|
>>> q.fit(X, y)
|
|
51
52
|
>>> q.predict(X)
|
|
52
53
|
{0: 0.47, 1: 0.53}
|
|
53
|
-
|
|
54
|
+
>>> q2 = CC()
|
|
54
55
|
>>> predictions = np.random.rand(200)
|
|
55
56
|
>>> q2.aggregate(predictions)
|
|
56
57
|
{0: 0.51, 1: 0.49}
|
|
58
|
+
|
|
59
|
+
References
|
|
60
|
+
----------
|
|
61
|
+
.. [1] Forman, G. (2005). "Counting Positives Accurately Despite Inaccurate Classification",
|
|
62
|
+
*ECML*, pp. 564-575.
|
|
63
|
+
.. [2] Forman, G. (2008). "Quantifying Counts and Costs via Classification",
|
|
64
|
+
*Data Mining and Knowledge Discovery*, 17(2), 164-206.
|
|
57
65
|
"""
|
|
58
66
|
|
|
59
67
|
_parameters_constraints = {
|
|
@@ -79,7 +87,7 @@ class CC(CrispLearnerQMixin, BaseCount):
|
|
|
79
87
|
|
|
80
88
|
|
|
81
89
|
class PCC(SoftLearnerQMixin, BaseCount):
|
|
82
|
-
"""Probabilistic Classify and Count (PCC) quantifier.
|
|
90
|
+
r"""Probabilistic Classify and Count (PCC) quantifier.
|
|
83
91
|
|
|
84
92
|
Implements the Probabilistic Classify and Count method for quantification as described in:
|
|
85
93
|
[1] Forman, G. (2005). *Counting Positives Accurately Despite Inaccurate Classification.*
|
|
@@ -2,10 +2,8 @@ import numpy as np
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
def compute_table(y, y_pred, classes):
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
r"""Compute the confusion matrix table for a binary classification task.
|
|
6
|
+
|
|
9
7
|
Parameters
|
|
10
8
|
----------
|
|
11
9
|
y : np.ndarray
|
|
@@ -14,12 +12,12 @@ def compute_table(y, y_pred, classes):
|
|
|
14
12
|
The predicted labels.
|
|
15
13
|
classes : np.ndarray
|
|
16
14
|
The unique classes in the dataset.
|
|
17
|
-
|
|
18
|
-
|
|
15
|
+
|
|
19
16
|
Returns
|
|
20
17
|
-------
|
|
21
18
|
tuple
|
|
22
|
-
A tuple containing the True Positives, False Positives,
|
|
19
|
+
A tuple containing the counts of True Positives, False Positives,
|
|
20
|
+
False Negatives, and True Negatives respectively.
|
|
23
21
|
"""
|
|
24
22
|
TP = np.logical_and(y == y_pred, y == classes[1]).sum()
|
|
25
23
|
FP = np.logical_and(y != y_pred, y == classes[0]).sum()
|
|
@@ -29,18 +27,15 @@ def compute_table(y, y_pred, classes):
|
|
|
29
27
|
|
|
30
28
|
|
|
31
29
|
def compute_tpr(TP, FN):
|
|
32
|
-
"""
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
30
|
+
r"""Compute the True Positive Rate (Recall) for a binary classification task.
|
|
31
|
+
|
|
36
32
|
Parameters
|
|
37
33
|
----------
|
|
38
34
|
TP : int
|
|
39
35
|
The number of True Positives.
|
|
40
36
|
FN : int
|
|
41
37
|
The number of False Negatives.
|
|
42
|
-
|
|
43
|
-
|
|
38
|
+
|
|
44
39
|
Returns
|
|
45
40
|
-------
|
|
46
41
|
float
|
|
@@ -52,18 +47,15 @@ def compute_tpr(TP, FN):
|
|
|
52
47
|
|
|
53
48
|
|
|
54
49
|
def compute_fpr(FP, TN):
|
|
55
|
-
"""
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
50
|
+
r"""Compute the False Positive Rate for a binary classification task.
|
|
51
|
+
|
|
59
52
|
Parameters
|
|
60
53
|
----------
|
|
61
54
|
FP : int
|
|
62
55
|
The number of False Positives.
|
|
63
56
|
TN : int
|
|
64
57
|
The number of True Negatives.
|
|
65
|
-
|
|
66
|
-
|
|
58
|
+
|
|
67
59
|
Returns
|
|
68
60
|
-------
|
|
69
61
|
float
|
|
@@ -74,31 +66,34 @@ def compute_fpr(FP, TN):
|
|
|
74
66
|
return FP / (FP + TN)
|
|
75
67
|
|
|
76
68
|
|
|
77
|
-
def evaluate_thresholds (y, probabilities:np.ndarray
|
|
78
|
-
"""
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
69
|
+
def evaluate_thresholds (y, probabilities:np.ndarray) -> tuple:
|
|
70
|
+
r"""Evaluate a range of classification thresholds to compute the corresponding
|
|
71
|
+
True Positive Rate (TPR) and False Positive Rate (FPR) for a binary quantification task.
|
|
72
|
+
|
|
82
73
|
Parameters
|
|
83
74
|
----------
|
|
84
75
|
y : np.ndarray
|
|
85
76
|
The true labels.
|
|
86
77
|
probabilities : np.ndarray
|
|
87
|
-
The predicted probabilities.
|
|
78
|
+
The predicted probabilities (scores) for the positive class.
|
|
88
79
|
classes : np.ndarray
|
|
89
80
|
The unique classes in the dataset.
|
|
90
|
-
|
|
91
|
-
|
|
81
|
+
|
|
92
82
|
Returns
|
|
93
83
|
-------
|
|
94
84
|
tuple
|
|
95
|
-
|
|
85
|
+
A tuple of (thresholds, tprs, fprs), where:
|
|
86
|
+
- thresholds is a numpy array of evaluated thresholds,
|
|
87
|
+
- tprs is a numpy array of corresponding True Positive Rates,
|
|
88
|
+
- fprs is a numpy array of corresponding False Positive Rates.
|
|
96
89
|
"""
|
|
97
90
|
unique_scores = np.linspace(0, 1, 101)
|
|
98
91
|
|
|
99
92
|
tprs = []
|
|
100
93
|
fprs = []
|
|
101
94
|
|
|
95
|
+
classes = np.unique(y)
|
|
96
|
+
|
|
102
97
|
for threshold in unique_scores:
|
|
103
98
|
y_pred = np.where(probabilities >= threshold, classes[1], classes[0])
|
|
104
99
|
|
mlquantify/confidence.py
CHANGED
|
@@ -2,31 +2,24 @@ import numpy as np
|
|
|
2
2
|
from scipy.stats import chi2
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
import numpy as np
|
|
6
|
-
from scipy.stats import chi2
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
import numpy as np
|
|
10
|
-
from scipy.stats import chi2
|
|
11
|
-
|
|
12
|
-
|
|
13
5
|
class BaseConfidenceRegion:
|
|
14
|
-
"""
|
|
6
|
+
r"""
|
|
7
|
+
Base class for confidence regions of prevalence estimates.
|
|
15
8
|
|
|
16
9
|
This class defines the interface and core structure for constructing
|
|
17
10
|
confidence regions around class prevalence estimates obtained from
|
|
18
11
|
quantification models.
|
|
19
12
|
|
|
20
13
|
Confidence regions capture the uncertainty associated with prevalence
|
|
21
|
-
estimates, typically derived from bootstrap resampling as proposed
|
|
22
|
-
|
|
14
|
+
estimates, typically derived from bootstrap resampling as proposed in
|
|
15
|
+
[1]_.
|
|
23
16
|
|
|
24
17
|
Parameters
|
|
25
18
|
----------
|
|
26
19
|
prev_estims : array-like of shape (m, n)
|
|
27
|
-
Collection of
|
|
20
|
+
Collection of ``m`` bootstrap prevalence estimates for ``n`` classes.
|
|
28
21
|
confidence_level : float, default=0.95
|
|
29
|
-
Desired confidence level
|
|
22
|
+
Desired confidence level :math:`1 - \alpha` of the region.
|
|
30
23
|
|
|
31
24
|
Attributes
|
|
32
25
|
----------
|
|
@@ -37,12 +30,13 @@ class BaseConfidenceRegion:
|
|
|
37
30
|
|
|
38
31
|
Notes
|
|
39
32
|
-----
|
|
40
|
-
The
|
|
41
|
-
|
|
33
|
+
The confidence region :math:`CR_{\alpha}` is defined such that
|
|
34
|
+
|
|
42
35
|
.. math::
|
|
43
|
-
P(π^* \\in CR_α) = 1 - α
|
|
44
36
|
|
|
45
|
-
|
|
37
|
+
\mathbb{P}\left(\pi^{\ast} \in CR_{\alpha}\right) = 1 - \alpha
|
|
38
|
+
|
|
39
|
+
where :math:`\pi^{\ast}` is the unknown true class-prevalence vector.
|
|
46
40
|
|
|
47
41
|
Examples
|
|
48
42
|
--------
|
|
@@ -63,9 +57,9 @@ class BaseConfidenceRegion:
|
|
|
63
57
|
|
|
64
58
|
References
|
|
65
59
|
----------
|
|
66
|
-
[1] Moreo, A., & Salvati, N. (2025).
|
|
67
|
-
|
|
68
|
-
|
|
60
|
+
.. [1] Moreo, A., & Salvati, N. (2025).
|
|
61
|
+
*An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification.*
|
|
62
|
+
Istituto di Scienza e Tecnologie dell'Informazione, CNR, Pisa.
|
|
69
63
|
"""
|
|
70
64
|
|
|
71
65
|
def __init__(self, prev_estims, confidence_level=0.95):
|
|
@@ -94,7 +88,7 @@ class BaseConfidenceRegion:
|
|
|
94
88
|
# ==========================================================
|
|
95
89
|
|
|
96
90
|
class ConfidenceInterval(BaseConfidenceRegion):
|
|
97
|
-
"""Bootstrap confidence intervals for each class prevalence.
|
|
91
|
+
r"""Bootstrap confidence intervals for each class prevalence.
|
|
98
92
|
|
|
99
93
|
Constructs independent percentile-based confidence intervals
|
|
100
94
|
for each class dimension from bootstrap samples.
|
|
@@ -164,7 +158,7 @@ class ConfidenceInterval(BaseConfidenceRegion):
|
|
|
164
158
|
# ==========================================================
|
|
165
159
|
|
|
166
160
|
class ConfidenceEllipseSimplex(BaseConfidenceRegion):
|
|
167
|
-
"""Confidence ellipse for prevalence estimates in the simplex.
|
|
161
|
+
r"""Confidence ellipse for prevalence estimates in the simplex.
|
|
168
162
|
|
|
169
163
|
Defines a multivariate confidence region based on a chi-squared threshold:
|
|
170
164
|
|
mlquantify/likelihood/_base.py
CHANGED
|
@@ -14,100 +14,86 @@ from mlquantify.utils._validation import check_classes_attribute, validate_predi
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class BaseIterativeLikelihood(AggregationMixin, BaseQuantifier):
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
by maximizing the likelihood of observed classifier outputs (posterior probabilities),
|
|
27
|
-
under the assumption that the within-class conditional distributions remain fixed
|
|
17
|
+
r"""Iterative likelihood-based quantification adjustment methods.
|
|
18
|
+
|
|
19
|
+
This base class encompasses quantification approaches that estimate class prevalences
|
|
20
|
+
by maximizing the likelihood of observed data, adjusting prevalence estimates on test
|
|
21
|
+
sets under the assumption of prior probability shift.
|
|
22
|
+
|
|
23
|
+
These methods iteratively refine estimates of class prevalences by maximizing the
|
|
24
|
+
likelihood of classifier outputs, usually the posterior probabilities provided by
|
|
25
|
+
a trained model, assuming that the class-conditional distributions remain fixed
|
|
28
26
|
between training and test domains.
|
|
29
|
-
|
|
27
|
+
|
|
30
28
|
Mathematical formulation
|
|
31
29
|
------------------------
|
|
32
30
|
Let:
|
|
33
|
-
- \( p_k^t \) denote the prior probability for class \( k \) in the training set (\( \sum_k p_k^t = 1 \)),
|
|
34
|
-
- \( s_k(x) \) be the classifier's posterior probability estimate (for class \( k \), given instance \( x \), fitted on training set),
|
|
35
|
-
- \( p_k \) be the (unknown) prior for the test set,
|
|
36
|
-
- \( x_1, \dots, x_N \) the unlabeled test set instances.
|
|
37
31
|
|
|
38
|
-
|
|
32
|
+
- :math:`p_k^t` be the prior probabilities for class \(k\) in the training set, satisfying \( \sum_k p_k^t = 1 \),
|
|
33
|
+
- :math:`s_k(x)` be the posterior probability estimate from the classifier for class \(k\) given instance \(x\),
|
|
34
|
+
- :math:`p_k` be the unknown prior probabilities for class \(k\) in the test set,
|
|
35
|
+
- \( x_1, \dots, x_N \) be unlabeled test set instances.
|
|
39
36
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
The E-step updates soft memberships:
|
|
37
|
+
The likelihood of the observed data is:
|
|
38
|
+
|
|
39
|
+
.. math::
|
|
45
40
|
|
|
46
|
-
|
|
47
|
-
w_{ik}^{(t)} = \frac{s_k(x_i) \cdot (p_k^{(t-1)} / p_k^t)}{\sum_{j=1}^K s_j(x_i) \cdot (p_j^{(t-1)} / p_j^t)}
|
|
48
|
-
\]
|
|
49
|
-
and the M-step re-estimates prevalences:
|
|
41
|
+
L = \prod_{i=1}^N \sum_{k=1}^K s_k(x_i) \frac{p_k}{p_k^t}
|
|
50
42
|
|
|
51
|
-
|
|
52
|
-
p_k^{(t)} = \frac{1}{N} \sum_{i=1}^N w_{ik}^{(t)}
|
|
53
|
-
\]
|
|
54
|
-
See also [1].
|
|
43
|
+
Methods in this class seek a solution that maximizes this likelihood via iterative methods.
|
|
55
44
|
|
|
56
45
|
Notes
|
|
57
46
|
-----
|
|
58
|
-
-
|
|
59
|
-
- Assumes prior probability shift
|
|
60
|
-
-
|
|
61
|
-
-
|
|
62
|
-
- Closely related to the Expectation-Maximization (EM) algorithm for mixture models.
|
|
47
|
+
- Applicable to binary and multiclass problems as long as the classifier provides calibrated posterior probabilities.
|
|
48
|
+
- Assumes changes only in prior probabilities (prior probability shift).
|
|
49
|
+
- Algorithms converge to local maxima of the likelihood function.
|
|
50
|
+
- Includes methods such as Class Distribution Estimation (CDE), Maximum Likelihood Prevalence Estimation (MLPE), and Expectation-Maximization (EM) based quantification.
|
|
63
51
|
|
|
64
52
|
Parameters
|
|
65
53
|
----------
|
|
66
54
|
learner : estimator, optional
|
|
67
|
-
Probabilistic classifier
|
|
55
|
+
Probabilistic classifier implementing the methods `fit(X, y)` and `predict_proba(X)`.
|
|
68
56
|
tol : float, default=1e-4
|
|
69
|
-
Convergence tolerance for prevalence update.
|
|
57
|
+
Convergence tolerance for prevalence update criteria.
|
|
70
58
|
max_iter : int, default=100
|
|
71
|
-
Maximum number of
|
|
59
|
+
Maximum allowed number of iterations.
|
|
72
60
|
|
|
73
61
|
Attributes
|
|
74
62
|
----------
|
|
75
63
|
learner : estimator
|
|
76
|
-
Underlying
|
|
64
|
+
Underlying classification model.
|
|
77
65
|
tol : float
|
|
78
|
-
|
|
66
|
+
Tolerance for stopping criterion.
|
|
79
67
|
max_iter : int
|
|
80
|
-
Maximum
|
|
68
|
+
Maximum number of iterations.
|
|
81
69
|
classes : ndarray of shape (n_classes,)
|
|
82
|
-
Unique
|
|
70
|
+
Unique classes observed during training.
|
|
83
71
|
priors : ndarray of shape (n_classes,)
|
|
84
|
-
Class distribution
|
|
72
|
+
Class distribution in the training set.
|
|
85
73
|
y_train : array-like
|
|
86
|
-
Training labels
|
|
74
|
+
Training labels used to estimate priors.
|
|
87
75
|
|
|
88
76
|
Examples
|
|
89
77
|
--------
|
|
90
78
|
>>> import numpy as np
|
|
91
79
|
>>> from sklearn.linear_model import LogisticRegression
|
|
92
|
-
>>> class
|
|
80
|
+
>>> class MyQuantifier(BaseIterativeLikelihood):
|
|
93
81
|
... def _iterate(self, predictions, priors):
|
|
94
|
-
... #
|
|
82
|
+
... # Implementation of iterative update logic
|
|
95
83
|
... pass
|
|
96
84
|
>>> X = np.random.randn(200, 8)
|
|
97
85
|
>>> y = np.random.randint(0, 3, size=(200,))
|
|
98
|
-
>>> q =
|
|
86
|
+
>>> q = MyQuantifier(learner=LogisticRegression(max_iter=200))
|
|
99
87
|
>>> q.fit(X, y)
|
|
100
88
|
>>> q.predict(X)
|
|
101
89
|
{0: 0.32, 1: 0.40, 2: 0.28}
|
|
102
90
|
|
|
103
91
|
References
|
|
104
92
|
----------
|
|
105
|
-
[1] Saerens, M., Latinne, P., & Decaestecker, C. (2002).
|
|
106
|
-
|
|
107
|
-
[2] Esuli, A., Moreo, A., & Sebastiani, F. (2023). *Learning to Quantify.* The Information Retrieval Series 47, Springer. https://doi.org/10.1007/978-3-031-20467-8
|
|
108
|
-
"""
|
|
109
|
-
|
|
93
|
+
.. [1] Saerens, M., Latinne, P., & Decaestecker, C. (2002). "Adjusting the Outputs of a Classifier to New a Priori Probabilities: A Simple Procedure." Neural Computation, 14(1), 2141-2156.
|
|
110
94
|
|
|
95
|
+
.. [2] Esuli, A., Moreo, A., & Sebastiani, F. (2023). "Learning to Quantify." The Information Retrieval Series 47, Springer. https://doi.org/10.1007/978-3-031-20467-8
|
|
96
|
+
"""
|
|
111
97
|
|
|
112
98
|
@abstractmethod
|
|
113
99
|
def __init__(self,
|
|
@@ -10,46 +10,66 @@ from mlquantify.utils._constraints import (
|
|
|
10
10
|
)
|
|
11
11
|
|
|
12
12
|
class EMQ(SoftLearnerQMixin, BaseIterativeLikelihood):
|
|
13
|
-
"""Expectation-Maximization Quantifier.
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
13
|
+
r"""Expectation-Maximization Quantifier (EMQ).
|
|
14
|
+
|
|
15
|
+
Estimates class prevalences under prior probability shift by alternating
|
|
16
|
+
between expectation **(E)** and maximization **(M)** steps on posterior probabilities.
|
|
17
|
+
|
|
18
|
+
E-step:
|
|
19
|
+
.. math::
|
|
20
|
+
p_i^{(s+1)}(x) = \frac{q_i^{(s)} p_i(x)}{\sum_j q_j^{(s)} p_j(x)}
|
|
21
|
+
|
|
22
|
+
M-step:
|
|
23
|
+
.. math::
|
|
24
|
+
q_i^{(s+1)} = \frac{1}{N} \sum_{n=1}^N p_i^{(s+1)}(x_n)
|
|
25
|
+
|
|
26
|
+
where
|
|
27
|
+
- :math:`p_i(x)` are posterior probabilities predicted by the classifier
|
|
28
|
+
- :math:`q_i^{(s)}` are class prevalence estimates at iteration :math:`s`
|
|
29
|
+
- :math:`N` is the number of test instances.
|
|
30
|
+
|
|
31
|
+
Calibrations supported on posterior probabilities before **EM** iteration:
|
|
32
|
+
|
|
33
|
+
Temperature Scaling (TS):
|
|
34
|
+
.. math::
|
|
35
|
+
\hat{p} = \text{softmax}\left(\frac{\log(p)}{T}\right)
|
|
36
|
+
|
|
37
|
+
Bias-Corrected Temperature Scaling (BCTS):
|
|
38
|
+
.. math::
|
|
39
|
+
\hat{p} = \text{softmax}\left(\frac{\log(p)}{T} + b\right)
|
|
40
|
+
|
|
41
|
+
Vector Scaling (VS):
|
|
42
|
+
.. math::
|
|
43
|
+
\hat{p}_i = \text{softmax}(W_i \cdot \log(p_i) + b_i)
|
|
44
|
+
|
|
45
|
+
No-Bias Vector Scaling (NBVS):
|
|
46
|
+
.. math::
|
|
47
|
+
\hat{p}_i = \text{softmax}(W_i \cdot \log(p_i))
|
|
48
|
+
|
|
26
49
|
Parameters
|
|
27
50
|
----------
|
|
28
51
|
learner : estimator, optional
|
|
29
|
-
Probabilistic classifier
|
|
52
|
+
Probabilistic classifier supporting predict_proba.
|
|
30
53
|
tol : float, default=1e-4
|
|
31
|
-
Convergence threshold
|
|
54
|
+
Convergence threshold.
|
|
32
55
|
max_iter : int, default=100
|
|
33
|
-
Maximum
|
|
56
|
+
Maximum EM iterations.
|
|
34
57
|
calib_function : str or callable, optional
|
|
35
|
-
Calibration method
|
|
36
|
-
|
|
58
|
+
Calibration method:
|
|
59
|
+
- 'ts': Temperature Scaling
|
|
60
|
+
- 'bcts': Bias-Corrected Temperature Scaling
|
|
61
|
+
- 'vs': Vector Scaling
|
|
62
|
+
- 'nbvs': No-Bias Vector Scaling
|
|
63
|
+
- callable: custom calibration function
|
|
37
64
|
criteria : callable, default=MAE
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
Methods
|
|
41
|
-
-------
|
|
42
|
-
_iterate(predictions, priors)
|
|
43
|
-
Executes EM iterations to estimate prevalences from posterior probabilities.
|
|
44
|
-
EM(posteriors, priors, tolerance, max_iter, criteria)
|
|
45
|
-
Static method implementing the EM loop with E-step and M-step.
|
|
46
|
-
_apply_calibration(predictions)
|
|
47
|
-
Applies optional calibration method to posterior predictions.
|
|
48
|
-
|
|
65
|
+
Convergence metric.
|
|
66
|
+
|
|
49
67
|
References
|
|
50
68
|
----------
|
|
51
|
-
[1] Saerens
|
|
52
|
-
|
|
69
|
+
.. [1] Saerens, M., Latinne, P., & Decaestecker, C. (2002).
|
|
70
|
+
Adjusting the Outputs of a Classifier to New a Priori Probabilities.
|
|
71
|
+
Neural Computation, 14(1), 2141-2156.
|
|
72
|
+
.. [2] Esuli, A., Moreo, A., & Sebastiani, F. (2023). Learning to Quantify. Springer.
|
|
53
73
|
"""
|
|
54
74
|
|
|
55
75
|
_parameter_constraints = {
|
|
@@ -72,8 +92,7 @@ class EMQ(SoftLearnerQMixin, BaseIterativeLikelihood):
|
|
|
72
92
|
self.criteria = criteria
|
|
73
93
|
|
|
74
94
|
def _iterate(self, predictions, priors):
|
|
75
|
-
"""
|
|
76
|
-
Perform EM quantification iteration.
|
|
95
|
+
r"""Perform EM quantification iteration.
|
|
77
96
|
|
|
78
97
|
Steps:
|
|
79
98
|
- Calibrate posterior predictions if calibration function specified.
|
|
@@ -104,8 +123,7 @@ class EMQ(SoftLearnerQMixin, BaseIterativeLikelihood):
|
|
|
104
123
|
|
|
105
124
|
@classmethod
|
|
106
125
|
def EM(cls, posteriors, priors, tolerance=1e-6, max_iter=100, criteria=MAE):
|
|
107
|
-
"""
|
|
108
|
-
Static method implementing the EM algorithm for quantification.
|
|
126
|
+
r"""Static method implementing the EM algorithm for quantification.
|
|
109
127
|
|
|
110
128
|
Parameters
|
|
111
129
|
----------
|
|
@@ -162,8 +180,7 @@ class EMQ(SoftLearnerQMixin, BaseIterativeLikelihood):
|
|
|
162
180
|
|
|
163
181
|
|
|
164
182
|
def _apply_calibration(self, predictions):
|
|
165
|
-
"""
|
|
166
|
-
Calibrate posterior predictions with specified calibration method.
|
|
183
|
+
r"""Calibrate posterior predictions with specified calibration method.
|
|
167
184
|
|
|
168
185
|
Parameters
|
|
169
186
|
----------
|
|
@@ -242,18 +259,18 @@ class EMQ(SoftLearnerQMixin, BaseIterativeLikelihood):
|
|
|
242
259
|
|
|
243
260
|
|
|
244
261
|
class MLPE(SoftLearnerQMixin, BaseIterativeLikelihood):
|
|
245
|
-
"""
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
training priors as the estimated prevalences, effectively skipping iteration.
|
|
250
|
-
|
|
251
|
-
This method assumes no prior probability shift between training and test.
|
|
252
|
-
|
|
262
|
+
r"""Maximum Likelihood Prevalence Estimation (MLPE).
|
|
263
|
+
|
|
264
|
+
Returns training priors as prevalence estimates without adaptations.
|
|
265
|
+
|
|
253
266
|
Parameters
|
|
254
267
|
----------
|
|
255
268
|
learner : estimator, optional
|
|
256
|
-
Base classifier
|
|
269
|
+
Base classifier.
|
|
270
|
+
|
|
271
|
+
References
|
|
272
|
+
----------
|
|
273
|
+
.. [2] Esuli, A., Moreo, A., & Sebastiani, F. (2023). Learning to Quantify. Springer.
|
|
257
274
|
"""
|
|
258
275
|
|
|
259
276
|
def __init__(self, learner=None):
|
|
@@ -278,38 +295,38 @@ class MLPE(SoftLearnerQMixin, BaseIterativeLikelihood):
|
|
|
278
295
|
|
|
279
296
|
@define_binary
|
|
280
297
|
class CDE(SoftLearnerQMixin, BaseIterativeLikelihood):
|
|
281
|
-
"""
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
298
|
+
r"""CDE-Iterate for binary classification prevalence estimation.
|
|
299
|
+
|
|
300
|
+
Threshold :math:`\tau` from false positive and false negative costs:
|
|
301
|
+
.. math::
|
|
302
|
+
\tau = \frac{c_{FP}}{c_{FP} + c_{FN}}
|
|
303
|
+
|
|
304
|
+
Hard classification by thresholding posterior probability :math:`p(+|x)` at :math:`\tau`:
|
|
305
|
+
.. math::
|
|
306
|
+
\hat{y}(x) = \mathbf{1}_{p(+|x) > \tau}
|
|
307
|
+
|
|
308
|
+
Prevalence estimation via classify-and-count:
|
|
309
|
+
.. math::
|
|
310
|
+
\hat{p}_U(+) = \frac{1}{N} \sum_{n=1}^N \hat{y}(x_n)
|
|
311
|
+
|
|
312
|
+
False positive cost update:
|
|
313
|
+
.. math::
|
|
314
|
+
c_{FP}^{new} = \frac{p_L(+)}{p_L(-)} \times \frac{\hat{p}_U(-)}{\hat{p}_U(+)} \times c_{FN}
|
|
315
|
+
|
|
299
316
|
Parameters
|
|
300
317
|
----------
|
|
301
318
|
learner : estimator, optional
|
|
302
|
-
Wrapped classifier (unused
|
|
319
|
+
Wrapped classifier (unused).
|
|
303
320
|
tol : float, default=1e-4
|
|
304
|
-
|
|
321
|
+
Convergence tolerance.
|
|
305
322
|
max_iter : int, default=100
|
|
306
|
-
Max
|
|
323
|
+
Max iterations.
|
|
307
324
|
init_cfp : float, default=1.0
|
|
308
|
-
Initial false positive cost
|
|
309
|
-
|
|
325
|
+
Initial false positive cost.
|
|
326
|
+
|
|
310
327
|
References
|
|
311
328
|
----------
|
|
312
|
-
[
|
|
329
|
+
.. [1] Esuli, A., Moreo, A., & Sebastiani, F. (2023). Learning to Quantify. Springer.
|
|
313
330
|
"""
|
|
314
331
|
|
|
315
332
|
_parameter_constraints = {
|
|
@@ -323,8 +340,7 @@ class CDE(SoftLearnerQMixin, BaseIterativeLikelihood):
|
|
|
323
340
|
self.init_cfp = float(init_cfp)
|
|
324
341
|
|
|
325
342
|
def _iterate(self, predictions, priors):
|
|
326
|
-
"""
|
|
327
|
-
Iteratively estimate prevalences via cost-sensitive thresholding.
|
|
343
|
+
r"""Iteratively estimate prevalences via cost-sensitive thresholding.
|
|
328
344
|
|
|
329
345
|
Parameters
|
|
330
346
|
----------
|