mlquantify 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlquantify/__init__.py +0 -29
- mlquantify/adjust_counting/__init__.py +14 -0
- mlquantify/adjust_counting/_adjustment.py +365 -0
- mlquantify/adjust_counting/_base.py +247 -0
- mlquantify/adjust_counting/_counting.py +145 -0
- mlquantify/adjust_counting/_utils.py +114 -0
- mlquantify/base.py +117 -519
- mlquantify/base_aggregative.py +209 -0
- mlquantify/calibration.py +1 -0
- mlquantify/confidence.py +335 -0
- mlquantify/likelihood/__init__.py +5 -0
- mlquantify/likelihood/_base.py +161 -0
- mlquantify/likelihood/_classes.py +414 -0
- mlquantify/meta/__init__.py +1 -0
- mlquantify/meta/_classes.py +761 -0
- mlquantify/metrics/__init__.py +21 -0
- mlquantify/metrics/_oq.py +109 -0
- mlquantify/metrics/_rq.py +98 -0
- mlquantify/{evaluation/measures.py → metrics/_slq.py} +43 -28
- mlquantify/mixture/__init__.py +7 -0
- mlquantify/mixture/_base.py +153 -0
- mlquantify/mixture/_classes.py +400 -0
- mlquantify/mixture/_utils.py +112 -0
- mlquantify/model_selection/__init__.py +9 -0
- mlquantify/model_selection/_protocol.py +358 -0
- mlquantify/model_selection/_search.py +315 -0
- mlquantify/model_selection/_split.py +1 -0
- mlquantify/multiclass.py +350 -0
- mlquantify/neighbors/__init__.py +9 -0
- mlquantify/neighbors/_base.py +198 -0
- mlquantify/neighbors/_classes.py +159 -0
- mlquantify/{classification/methods.py → neighbors/_classification.py} +48 -66
- mlquantify/neighbors/_kde.py +270 -0
- mlquantify/neighbors/_utils.py +135 -0
- mlquantify/neural/__init__.py +1 -0
- mlquantify/utils/__init__.py +47 -2
- mlquantify/utils/_artificial.py +27 -0
- mlquantify/utils/_constraints.py +219 -0
- mlquantify/utils/_context.py +21 -0
- mlquantify/utils/_decorators.py +36 -0
- mlquantify/utils/_exceptions.py +12 -0
- mlquantify/utils/_get_scores.py +159 -0
- mlquantify/utils/_load.py +18 -0
- mlquantify/utils/_parallel.py +6 -0
- mlquantify/utils/_random.py +36 -0
- mlquantify/utils/_sampling.py +273 -0
- mlquantify/utils/_tags.py +44 -0
- mlquantify/utils/_validation.py +447 -0
- mlquantify/utils/prevalence.py +61 -0
- {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/METADATA +2 -1
- mlquantify-0.1.9.dist-info/RECORD +53 -0
- mlquantify/classification/__init__.py +0 -1
- mlquantify/evaluation/__init__.py +0 -14
- mlquantify/evaluation/protocol.py +0 -291
- mlquantify/methods/__init__.py +0 -37
- mlquantify/methods/aggregative.py +0 -1159
- mlquantify/methods/meta.py +0 -472
- mlquantify/methods/mixture_models.py +0 -1003
- mlquantify/methods/non_aggregative.py +0 -136
- mlquantify/methods/threshold_optimization.py +0 -869
- mlquantify/model_selection.py +0 -377
- mlquantify/plots.py +0 -367
- mlquantify/utils/general.py +0 -371
- mlquantify/utils/method.py +0 -449
- mlquantify-0.1.7.dist-info/RECORD +0 -22
- {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/WHEEL +0 -0
- {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
from mlquantify.utils._tags import (
|
|
2
|
+
get_tags
|
|
3
|
+
)
|
|
4
|
+
from mlquantify.utils._validation import validate_parameter_constraints, validate_learner_contraints
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class AggregationMixin:
|
|
8
|
+
"""Mixin class for all aggregative quantifiers.
|
|
9
|
+
|
|
10
|
+
An aggregative quantifier is a quantifier that relies on an underlying
|
|
11
|
+
supervised learner to produce predictions on which the quantification
|
|
12
|
+
is then performed.
|
|
13
|
+
|
|
14
|
+
Inheriting from this mixin provides learner validation and setting
|
|
15
|
+
parameters also for the learner (used by `GridSearchQ` and friends).
|
|
16
|
+
|
|
17
|
+
This mixin also sets the `has_estimator` and `requires_fit`
|
|
18
|
+
tags to `True`.
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
Notes
|
|
22
|
+
-----
|
|
23
|
+
- An aggregative quantifier must have a 'learner' attribute that is
|
|
24
|
+
a supervised learning estimator.
|
|
25
|
+
- Depending on the type of predictions required from the learner,
|
|
26
|
+
the quantifier can be further classified as a 'soft' or 'crisp'
|
|
27
|
+
aggregative quantifier.
|
|
28
|
+
|
|
29
|
+
Read more in the :ref:`User Guide <rolling_your_own_aggregative_quantifiers>`
|
|
30
|
+
for more details.
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
Examples
|
|
34
|
+
--------
|
|
35
|
+
>>> from mlquantify.base import BaseQuantifier, AggregationMixin
|
|
36
|
+
>>> from sklearn.linear_model import LogisticRegression
|
|
37
|
+
>>> import numpy as np
|
|
38
|
+
>>> class MyAggregativeQuantifier(AggregationMixin, BaseQuantifier):
|
|
39
|
+
... def __init__(self, learner=None):
|
|
40
|
+
... self.learner = learner if learner is not None else LogisticRegression()
|
|
41
|
+
... def fit(self, X, y):
|
|
42
|
+
... self.learner.fit(X, y)
|
|
43
|
+
... self.classes_ = np.unique(y)
|
|
44
|
+
... return self
|
|
45
|
+
... def predict(self, X):
|
|
46
|
+
... preds = self.learner.predict(X)
|
|
47
|
+
... _, counts = np.unique(preds, return_counts=True)
|
|
48
|
+
... prevalence = counts / counts.sum()
|
|
49
|
+
... return prevalence
|
|
50
|
+
>>> quantifier = MyAggregativeQuantifier()
|
|
51
|
+
>>> X = np.random.rand(100, 10)
|
|
52
|
+
>>> y = np.random.randint(0, 2, size=100)
|
|
53
|
+
>>> quantifier.fit(X, y).predict(X)
|
|
54
|
+
[0.5 0.5]
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __mlquantify_tags__(self):
|
|
58
|
+
tags = super().__mlquantify_tags__()
|
|
59
|
+
tags.has_estimator = True
|
|
60
|
+
tags.requires_fit = True
|
|
61
|
+
return tags
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _validate_params(self):
|
|
65
|
+
"""Validate the parameters of the quantifier instance,
|
|
66
|
+
including the learner's parameters.
|
|
67
|
+
|
|
68
|
+
The expected types and values must be defined in the `_parameter_constraints`
|
|
69
|
+
class attribute as a dictionary. `param_name: list of constraints`. See
|
|
70
|
+
the docstring of `validate_parameter_constraints` for more details.
|
|
71
|
+
"""
|
|
72
|
+
validate_learner_contraints(self, self.learner)
|
|
73
|
+
|
|
74
|
+
validate_parameter_constraints(
|
|
75
|
+
self._parameter_constraints,
|
|
76
|
+
self.get_params(deep=False),
|
|
77
|
+
caller_name=self.__class__.__name__,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def set_params(self, **params):
|
|
81
|
+
|
|
82
|
+
# Model Params
|
|
83
|
+
for key, value in params.items():
|
|
84
|
+
if hasattr(self, key):
|
|
85
|
+
setattr(self, key, value)
|
|
86
|
+
|
|
87
|
+
# Learner Params
|
|
88
|
+
if self.learner is not None:
|
|
89
|
+
learner_params = {k.replace('learner__', ''): v for k, v in params.items() if 'learner__' in k}
|
|
90
|
+
if learner_params:
|
|
91
|
+
self.learner.set_params(**learner_params)
|
|
92
|
+
|
|
93
|
+
return self
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class SoftLearnerQMixin:
|
|
97
|
+
"""Soft predictions mixin for aggregative quantifiers.
|
|
98
|
+
|
|
99
|
+
This mixin provides the following change tags:
|
|
100
|
+
- `estimator_function`: "predict_proba"
|
|
101
|
+
- `estimator_type`: "soft"
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
Notes
|
|
105
|
+
-----
|
|
106
|
+
- This mixin should be used alongside the `AggregationMixin`, in
|
|
107
|
+
the left of it in the inheritance order.
|
|
108
|
+
|
|
109
|
+
Examples
|
|
110
|
+
--------
|
|
111
|
+
>>> from mlquantify.base import BaseQuantifier, AggregationMixin, SoftLearnerQMixin
|
|
112
|
+
>>> from sklearn.linear_model import LogisticRegression
|
|
113
|
+
>>> import numpy as np
|
|
114
|
+
>>> class MySoftAggregativeQuantifier(SoftLearnerQMixin, AggregationMixin, BaseQuantifier):
|
|
115
|
+
... def __init__(self, learner=None):
|
|
116
|
+
... self.learner = learner if learner is not None else LogisticRegression()
|
|
117
|
+
... def fit(self, X, y):
|
|
118
|
+
... self.learner.fit(X, y)
|
|
119
|
+
... self.classes_ = np.unique(y)
|
|
120
|
+
... return self
|
|
121
|
+
... def predict(self, X):
|
|
122
|
+
... proba = self.learner.predict_proba(X)
|
|
123
|
+
... return proba.mean(axis=0)
|
|
124
|
+
>>> quantifier = MySoftAggregativeQuantifier()
|
|
125
|
+
>>> X = np.random.rand(100, 10)
|
|
126
|
+
>>> y = np.random.randint(0, 2, size=100)
|
|
127
|
+
>>> quantifier.fit(X, y).predict(X)
|
|
128
|
+
[0.5 0.5]
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
def __mlquantify_tags__(self):
|
|
132
|
+
tags = super().__mlquantify_tags__()
|
|
133
|
+
tags.estimator_function = "predict_proba"
|
|
134
|
+
tags.estimator_type = "soft"
|
|
135
|
+
return tags
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class CrispLearnerQMixin:
|
|
139
|
+
"""Crisp predictions mixin for aggregative quantifiers.
|
|
140
|
+
|
|
141
|
+
This mixin provides the following change tags:
|
|
142
|
+
- `estimator_function`: "predict"
|
|
143
|
+
- `estimator_type`: "crisp"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
Notes
|
|
147
|
+
-----
|
|
148
|
+
- This mixin should be used alongside the `AggregationMixin`, in
|
|
149
|
+
the left of it in the inheritance order.
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
Examples
|
|
153
|
+
--------
|
|
154
|
+
>>> from mlquantify.base import BaseQuantifier, AggregationMixin, CrispLearnerQMixin
|
|
155
|
+
>>> from sklearn.linear_model import LogisticRegression
|
|
156
|
+
>>> import numpy as np
|
|
157
|
+
>>> class MyCrispAggregativeQuantifier(CrispLearnerQMixin, AggregationMixin, BaseQuantifier):
|
|
158
|
+
... def __init__(self, learner=None):
|
|
159
|
+
... self.learner = learner if learner is not None else LogisticRegression()
|
|
160
|
+
... def fit(self, X, y):
|
|
161
|
+
... self.learner.fit(X, y)
|
|
162
|
+
... self.classes_ = np.unique(y)
|
|
163
|
+
... return self
|
|
164
|
+
... def predict(self, X):
|
|
165
|
+
... preds = self.learner.predict(X)
|
|
166
|
+
... _, counts = np.unique(preds, return_counts=True)
|
|
167
|
+
... prevalence = counts / counts.sum()
|
|
168
|
+
... return prevalence
|
|
169
|
+
>>> quantifier = MyCrispAggregativeQuantifier()
|
|
170
|
+
>>> X = np.random.rand(100, 10)
|
|
171
|
+
>>> y = np.random.randint(0, 2, size=100)
|
|
172
|
+
>>> quantifier.fit(X, y).predict(X)
|
|
173
|
+
[0.5 0.5]
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
def __mlquantify_tags__(self):
|
|
177
|
+
tags = super().__mlquantify_tags__()
|
|
178
|
+
tags.estimator_function = "predict"
|
|
179
|
+
tags.estimator_type= "crisp"
|
|
180
|
+
return tags
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def uses_soft_predictions(quantifier):
|
|
184
|
+
"""Check if the quantifier uses soft predictions."""
|
|
185
|
+
return get_tags(quantifier).estimator_type == "soft"
|
|
186
|
+
|
|
187
|
+
def uses_crisp_predictions(quantifier):
|
|
188
|
+
"""Check if the quantifier uses crisp predictions."""
|
|
189
|
+
return get_tags(quantifier).estimator_type == "crisp"
|
|
190
|
+
|
|
191
|
+
def is_aggregative_quantifier(quantifier):
|
|
192
|
+
"""Check if the quantifier is aggregative."""
|
|
193
|
+
return get_tags(quantifier).has_estimator
|
|
194
|
+
|
|
195
|
+
def get_aggregation_requirements(quantifier):
|
|
196
|
+
"""Get the prediction requirements for the aggregative quantifier."""
|
|
197
|
+
tags = get_tags(quantifier)
|
|
198
|
+
return tags.prediction_requirements
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _get_learner_function(quantifier):
|
|
202
|
+
"""Get the learner function name used by the aggregative quantifier."""
|
|
203
|
+
tags = get_tags(quantifier)
|
|
204
|
+
function_name = tags.estimator_function
|
|
205
|
+
if function_name is None:
|
|
206
|
+
raise ValueError(f"The quantifier {quantifier.__class__.__name__} does not specify an estimator function.")
|
|
207
|
+
if not hasattr(quantifier.learner, function_name):
|
|
208
|
+
raise AttributeError(f"The learner {quantifier.learner.__class__.__name__} does not have the method '{function_name}'.")
|
|
209
|
+
return function_name
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# TODO
|
mlquantify/confidence.py
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from scipy.stats import chi2
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from scipy.stats import chi2
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from scipy.stats import chi2
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BaseConfidenceRegion:
|
|
14
|
+
"""Base class for confidence regions of prevalence estimates.
|
|
15
|
+
|
|
16
|
+
This class defines the interface and core structure for constructing
|
|
17
|
+
confidence regions around class prevalence estimates obtained from
|
|
18
|
+
quantification models.
|
|
19
|
+
|
|
20
|
+
Confidence regions capture the uncertainty associated with prevalence
|
|
21
|
+
estimates, typically derived from bootstrap resampling as proposed by [1].
|
|
22
|
+
Subclasses define specific types of regions (e.g., intervals, ellipses).
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
prev_estims : array-like of shape (m, n)
|
|
27
|
+
Collection of `m` bootstrap prevalence estimates for `n` classes.
|
|
28
|
+
confidence_level : float, default=0.95
|
|
29
|
+
Desired confidence level (1 - α) of the region.
|
|
30
|
+
|
|
31
|
+
Attributes
|
|
32
|
+
----------
|
|
33
|
+
prev_estims : ndarray of shape (m, n)
|
|
34
|
+
Bootstrap prevalence estimates.
|
|
35
|
+
confidence_level : float
|
|
36
|
+
Confidence level associated with the region.
|
|
37
|
+
|
|
38
|
+
Notes
|
|
39
|
+
-----
|
|
40
|
+
The general goal is to construct a confidence region :math:`CR_α` such that:
|
|
41
|
+
|
|
42
|
+
.. math::
|
|
43
|
+
P(π^* \\in CR_α) = 1 - α
|
|
44
|
+
|
|
45
|
+
where :math:`π^*` is the true (unknown) class prevalence vector.
|
|
46
|
+
|
|
47
|
+
Examples
|
|
48
|
+
--------
|
|
49
|
+
>>> import numpy as np
|
|
50
|
+
>>> class DummyRegion(BaseConfidenceRegion):
|
|
51
|
+
... def _compute_region(self):
|
|
52
|
+
... self.mean_ = np.mean(self.prev_estims, axis=0)
|
|
53
|
+
... def get_region(self):
|
|
54
|
+
... return self.mean_
|
|
55
|
+
... def get_point_estimate(self):
|
|
56
|
+
... return self.mean_
|
|
57
|
+
... def contains(self, point):
|
|
58
|
+
... return np.allclose(point, self.mean_, atol=0.1)
|
|
59
|
+
>>> X = np.random.dirichlet(np.ones(3), size=100)
|
|
60
|
+
>>> region = DummyRegion(X, confidence_level=0.9)
|
|
61
|
+
>>> region.get_point_estimate().round(3)
|
|
62
|
+
array([0.33, 0.33, 0.34])
|
|
63
|
+
|
|
64
|
+
References
|
|
65
|
+
----------
|
|
66
|
+
[1] Moreo, A., & Salvati, N. (2025).
|
|
67
|
+
*An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification*.
|
|
68
|
+
Istituto di Scienza e Tecnologie dell’Informazione, CNR, Pisa.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def __init__(self, prev_estims, confidence_level=0.95):
|
|
72
|
+
self.prev_estims = np.asarray(prev_estims)
|
|
73
|
+
self.confidence_level = confidence_level
|
|
74
|
+
self._compute_region()
|
|
75
|
+
|
|
76
|
+
def _compute_region(self):
|
|
77
|
+
raise NotImplementedError("Subclasses must implement _compute_region().")
|
|
78
|
+
|
|
79
|
+
def get_region(self):
|
|
80
|
+
"""Return the parameters defining the confidence region."""
|
|
81
|
+
raise NotImplementedError
|
|
82
|
+
|
|
83
|
+
def get_point_estimate(self):
|
|
84
|
+
"""Return the point estimate of prevalence (e.g., mean of bootstrap samples)."""
|
|
85
|
+
raise NotImplementedError
|
|
86
|
+
|
|
87
|
+
def contains(self, point):
|
|
88
|
+
"""Check whether a prevalence vector lies within the region."""
|
|
89
|
+
raise NotImplementedError
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# ==========================================================
|
|
93
|
+
# Confidence Intervals (via percentiles)
|
|
94
|
+
# ==========================================================
|
|
95
|
+
|
|
96
|
+
class ConfidenceInterval(BaseConfidenceRegion):
|
|
97
|
+
"""Bootstrap confidence intervals for each class prevalence.
|
|
98
|
+
|
|
99
|
+
Constructs independent percentile-based confidence intervals
|
|
100
|
+
for each class dimension from bootstrap samples.
|
|
101
|
+
|
|
102
|
+
The confidence region is defined as:
|
|
103
|
+
|
|
104
|
+
.. math::
|
|
105
|
+
CI_α(π) =
|
|
106
|
+
\\begin{cases}
|
|
107
|
+
1 & \\text{if } L_i \\le π_i \\le U_i, \\forall i=1,...,n \\\\
|
|
108
|
+
0 & \\text{otherwise}
|
|
109
|
+
\\end{cases}
|
|
110
|
+
|
|
111
|
+
where :math:`L_i` and :math:`U_i` are the empirical
|
|
112
|
+
α/2 and 1−α/2 quantiles for class i.
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
prev_estims : array-like of shape (m, n)
|
|
117
|
+
Bootstrap prevalence estimates.
|
|
118
|
+
confidence_level : float, default=0.95
|
|
119
|
+
Desired confidence level.
|
|
120
|
+
|
|
121
|
+
Attributes
|
|
122
|
+
----------
|
|
123
|
+
I_low : ndarray of shape (n,)
|
|
124
|
+
Lower confidence bounds.
|
|
125
|
+
I_high : ndarray of shape (n,)
|
|
126
|
+
Upper confidence bounds.
|
|
127
|
+
|
|
128
|
+
Examples
|
|
129
|
+
--------
|
|
130
|
+
>>> X = np.random.dirichlet(np.ones(3), size=200)
|
|
131
|
+
>>> ci = ConfidenceInterval(X, confidence_level=0.9)
|
|
132
|
+
>>> ci.get_region()
|
|
133
|
+
(array([0.05, 0.06, 0.05]), array([0.48, 0.50, 0.48]))
|
|
134
|
+
>>> ci.contains([0.3, 0.4, 0.3])
|
|
135
|
+
array([[ True]])
|
|
136
|
+
|
|
137
|
+
References
|
|
138
|
+
----------
|
|
139
|
+
[1] Moreo, A., & Salvati, N. (2025).
|
|
140
|
+
*An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification*.
|
|
141
|
+
Section 3.3, Equation (1).
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
def _compute_region(self):
|
|
145
|
+
alpha = 1 - self.confidence_level
|
|
146
|
+
low_perc = (alpha / 2.) * 100
|
|
147
|
+
high_perc = (1 - alpha / 2.) * 100
|
|
148
|
+
self.I_low, self.I_high = np.percentile(self.prev_estims, q=[low_perc, high_perc], axis=0)
|
|
149
|
+
|
|
150
|
+
def get_region(self):
|
|
151
|
+
return self.I_low, self.I_high
|
|
152
|
+
|
|
153
|
+
def get_point_estimate(self):
|
|
154
|
+
return np.mean(self.prev_estims, axis=0)
|
|
155
|
+
|
|
156
|
+
def contains(self, point):
|
|
157
|
+
point = np.asarray(point)
|
|
158
|
+
within = np.logical_and(self.I_low <= point, point <= self.I_high)
|
|
159
|
+
return np.all(within, axis=-1, keepdims=True)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# ==========================================================
|
|
163
|
+
# Confidence Ellipse in Simplex
|
|
164
|
+
# ==========================================================
|
|
165
|
+
|
|
166
|
+
class ConfidenceEllipseSimplex(BaseConfidenceRegion):
|
|
167
|
+
"""Confidence ellipse for prevalence estimates in the simplex.
|
|
168
|
+
|
|
169
|
+
Defines a multivariate confidence region based on a chi-squared threshold:
|
|
170
|
+
|
|
171
|
+
.. math::
|
|
172
|
+
CE_α(π) =
|
|
173
|
+
\\begin{cases}
|
|
174
|
+
1 & \\text{if } (π - μ)^T Σ^{-1} (π - μ) \\le χ^2_{n-1}(1-α) \\\\
|
|
175
|
+
0 & \\text{otherwise}
|
|
176
|
+
\\end{cases}
|
|
177
|
+
|
|
178
|
+
Parameters
|
|
179
|
+
----------
|
|
180
|
+
prev_estims : array-like of shape (m, n)
|
|
181
|
+
Bootstrap prevalence estimates.
|
|
182
|
+
confidence_level : float, default=0.95
|
|
183
|
+
Confidence level.
|
|
184
|
+
|
|
185
|
+
Attributes
|
|
186
|
+
----------
|
|
187
|
+
mean_ : ndarray of shape (n,)
|
|
188
|
+
Mean prevalence estimate.
|
|
189
|
+
precision_matrix : ndarray of shape (n, n)
|
|
190
|
+
Inverse covariance matrix of estimates.
|
|
191
|
+
chi2_val : float
|
|
192
|
+
Chi-squared cutoff threshold defining the ellipse.
|
|
193
|
+
|
|
194
|
+
Examples
|
|
195
|
+
--------
|
|
196
|
+
>>> X = np.random.dirichlet(np.ones(3), size=200)
|
|
197
|
+
>>> ce = ConfidenceEllipseSimplex(X, confidence_level=0.95)
|
|
198
|
+
>>> ce.get_point_estimate().round(3)
|
|
199
|
+
array([0.33, 0.34, 0.33])
|
|
200
|
+
>>> ce.contains(np.array([0.4, 0.3, 0.3]))
|
|
201
|
+
True
|
|
202
|
+
|
|
203
|
+
References
|
|
204
|
+
----------
|
|
205
|
+
[1] Moreo, A., & Salvati, N. (2025).
|
|
206
|
+
*An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification*.
|
|
207
|
+
Section 3.3, Equation (2).
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
def _compute_region(self):
|
|
211
|
+
cov_ = np.cov(self.prev_estims, rowvar=False, ddof=1)
|
|
212
|
+
try:
|
|
213
|
+
self.precision_matrix = np.linalg.inv(cov_)
|
|
214
|
+
except np.linalg.LinAlgError:
|
|
215
|
+
self.precision_matrix = None
|
|
216
|
+
|
|
217
|
+
dim = self.prev_estims.shape[-1]
|
|
218
|
+
ddof = dim - 1
|
|
219
|
+
self.chi2_val = chi2.ppf(self.confidence_level, ddof)
|
|
220
|
+
self.mean_ = np.mean(self.prev_estims, axis=0)
|
|
221
|
+
|
|
222
|
+
def get_region(self):
|
|
223
|
+
return self.mean_, self.precision_matrix, self.chi2_val
|
|
224
|
+
|
|
225
|
+
def get_point_estimate(self):
|
|
226
|
+
return self.mean_
|
|
227
|
+
|
|
228
|
+
def contains(self, point):
|
|
229
|
+
if self.precision_matrix is None:
|
|
230
|
+
return False
|
|
231
|
+
diff = point - self.mean_
|
|
232
|
+
dist2 = diff.T @ self.precision_matrix @ diff
|
|
233
|
+
return bool(np.mean(dist2 <= self.chi2_val))
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
# ==========================================================
|
|
237
|
+
# Confidence Ellipse in CLR (Centered Log-Ratio) Space
|
|
238
|
+
# ==========================================================
|
|
239
|
+
|
|
240
|
+
class ConfidenceEllipseCLR(ConfidenceEllipseSimplex):
|
|
241
|
+
r"""Confidence ellipse for prevalence estimates in CLR-transformed space.
|
|
242
|
+
|
|
243
|
+
Applies the Centered Log-Ratio (CLR) transformation:
|
|
244
|
+
|
|
245
|
+
.. math::
|
|
246
|
+
T(π) = [\log(π_1/g(π)), ..., \log(π_n/g(π))], \\
|
|
247
|
+
g(π) = (\prod_i π_i)^{1/n}
|
|
248
|
+
|
|
249
|
+
A confidence ellipse is then built in the transformed space:
|
|
250
|
+
|
|
251
|
+
.. math::
|
|
252
|
+
CT_α(π) =
|
|
253
|
+
\\begin{cases}
|
|
254
|
+
1 & \\text{if } (T(π) - μ_{CLR})^T Σ^{-1} (T(π) - μ_{CLR}) \\le χ^2_{n-1}(1-α) \\\\
|
|
255
|
+
0 & \\text{otherwise}
|
|
256
|
+
\\end{cases}
|
|
257
|
+
|
|
258
|
+
Parameters
|
|
259
|
+
----------
|
|
260
|
+
prev_estims : array-like of shape (m, n)
|
|
261
|
+
Bootstrap prevalence estimates.
|
|
262
|
+
confidence_level : float, default=0.95
|
|
263
|
+
Confidence level.
|
|
264
|
+
|
|
265
|
+
Attributes
|
|
266
|
+
----------
|
|
267
|
+
mean_ : ndarray of shape (n,)
|
|
268
|
+
Mean vector in CLR space.
|
|
269
|
+
precision_matrix : ndarray of shape (n, n)
|
|
270
|
+
Inverse covariance matrix in CLR space.
|
|
271
|
+
chi2_val : float
|
|
272
|
+
Chi-squared threshold.
|
|
273
|
+
|
|
274
|
+
Examples
|
|
275
|
+
--------
|
|
276
|
+
>>> X = np.random.dirichlet(np.ones(3), size=200)
|
|
277
|
+
>>> clr = ConfidenceEllipseCLR(X, confidence_level=0.9)
|
|
278
|
+
>>> clr.get_point_estimate().round(3)
|
|
279
|
+
array([ 0., 0., -0.])
|
|
280
|
+
>>> clr.contains(np.array([0.4, 0.4, 0.2]))
|
|
281
|
+
True
|
|
282
|
+
|
|
283
|
+
References
|
|
284
|
+
----------
|
|
285
|
+
[1] Moreo, A., & Salvati, N. (2025).
|
|
286
|
+
*An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification*.
|
|
287
|
+
Section 3.3, Equation (3).
|
|
288
|
+
"""
|
|
289
|
+
|
|
290
|
+
def _compute_region(self, eps=1e-6):
|
|
291
|
+
x = self.prev_estims
|
|
292
|
+
G = np.exp(np.mean(np.log(x + eps), axis=1, keepdims=True))
|
|
293
|
+
x_clr = np.log((x + eps) / (G + eps))
|
|
294
|
+
self.x_clr = x_clr
|
|
295
|
+
cov_ = np.cov(x_clr, rowvar=False, ddof=1)
|
|
296
|
+
try:
|
|
297
|
+
self.precision_matrix = np.linalg.inv(cov_)
|
|
298
|
+
except np.linalg.LinAlgError:
|
|
299
|
+
self.precision_matrix = None
|
|
300
|
+
|
|
301
|
+
dim = x_clr.shape[-1]
|
|
302
|
+
ddof = dim - 1
|
|
303
|
+
self.chi2_val = chi2.ppf(self.confidence_level, ddof)
|
|
304
|
+
self.mean_ = np.mean(x_clr, axis=0)
|
|
305
|
+
|
|
306
|
+
def get_point_estimate(self):
|
|
307
|
+
Gp = np.exp(np.mean(np.log(self.prev_estims + 1e-6), axis=1, keepdims=True))
|
|
308
|
+
x_clr = np.log((self.prev_estims + 1e-6) / (Gp + 1e-6))
|
|
309
|
+
return np.mean(x_clr, axis=0)
|
|
310
|
+
|
|
311
|
+
def contains(self, point, eps=1e-6):
|
|
312
|
+
if self.precision_matrix is None:
|
|
313
|
+
return False
|
|
314
|
+
Gp = np.exp(np.mean(np.log(point + eps)))
|
|
315
|
+
point_clr = np.log((point + eps) / (Gp + eps))
|
|
316
|
+
diff = point_clr - self.mean_
|
|
317
|
+
dist2 = diff.T @ self.precision_matrix @ diff
|
|
318
|
+
return dist2 <= self.chi2_val
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
# ==========================================================
|
|
323
|
+
# Factory Method for Confidence Regions
|
|
324
|
+
# ==========================================================
|
|
325
|
+
|
|
326
|
+
def construct_confidence_region(prev_estims, confidence_level=0.95, method="intervals"):
|
|
327
|
+
method = method.lower()
|
|
328
|
+
if method == "intervals":
|
|
329
|
+
return ConfidenceInterval(prev_estims, confidence_level)
|
|
330
|
+
elif method == "ellipse":
|
|
331
|
+
return ConfidenceEllipseSimplex(prev_estims, confidence_level)
|
|
332
|
+
elif method in ("elipse-clr", "ellipse-clr", "clr"):
|
|
333
|
+
return ConfidenceEllipseCLR(prev_estims, confidence_level)
|
|
334
|
+
else:
|
|
335
|
+
raise NotImplementedError(f"Método '{method}' desconhecido.")
|