mlquantify 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlquantify/__init__.py +0 -29
- mlquantify/adjust_counting/__init__.py +14 -0
- mlquantify/adjust_counting/_adjustment.py +365 -0
- mlquantify/adjust_counting/_base.py +247 -0
- mlquantify/adjust_counting/_counting.py +145 -0
- mlquantify/adjust_counting/_utils.py +114 -0
- mlquantify/base.py +117 -519
- mlquantify/base_aggregative.py +209 -0
- mlquantify/calibration.py +1 -0
- mlquantify/confidence.py +335 -0
- mlquantify/likelihood/__init__.py +5 -0
- mlquantify/likelihood/_base.py +161 -0
- mlquantify/likelihood/_classes.py +414 -0
- mlquantify/meta/__init__.py +1 -0
- mlquantify/meta/_classes.py +761 -0
- mlquantify/metrics/__init__.py +21 -0
- mlquantify/metrics/_oq.py +109 -0
- mlquantify/metrics/_rq.py +98 -0
- mlquantify/{evaluation/measures.py → metrics/_slq.py} +43 -28
- mlquantify/mixture/__init__.py +7 -0
- mlquantify/mixture/_base.py +153 -0
- mlquantify/mixture/_classes.py +400 -0
- mlquantify/mixture/_utils.py +112 -0
- mlquantify/model_selection/__init__.py +9 -0
- mlquantify/model_selection/_protocol.py +358 -0
- mlquantify/model_selection/_search.py +315 -0
- mlquantify/model_selection/_split.py +1 -0
- mlquantify/multiclass.py +350 -0
- mlquantify/neighbors/__init__.py +9 -0
- mlquantify/neighbors/_base.py +198 -0
- mlquantify/neighbors/_classes.py +159 -0
- mlquantify/{classification/methods.py → neighbors/_classification.py} +48 -66
- mlquantify/neighbors/_kde.py +270 -0
- mlquantify/neighbors/_utils.py +135 -0
- mlquantify/neural/__init__.py +1 -0
- mlquantify/utils/__init__.py +47 -2
- mlquantify/utils/_artificial.py +27 -0
- mlquantify/utils/_constraints.py +219 -0
- mlquantify/utils/_context.py +21 -0
- mlquantify/utils/_decorators.py +36 -0
- mlquantify/utils/_exceptions.py +12 -0
- mlquantify/utils/_get_scores.py +159 -0
- mlquantify/utils/_load.py +18 -0
- mlquantify/utils/_parallel.py +6 -0
- mlquantify/utils/_random.py +36 -0
- mlquantify/utils/_sampling.py +273 -0
- mlquantify/utils/_tags.py +44 -0
- mlquantify/utils/_validation.py +447 -0
- mlquantify/utils/prevalence.py +61 -0
- {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/METADATA +2 -1
- mlquantify-0.1.9.dist-info/RECORD +53 -0
- mlquantify/classification/__init__.py +0 -1
- mlquantify/evaluation/__init__.py +0 -14
- mlquantify/evaluation/protocol.py +0 -291
- mlquantify/methods/__init__.py +0 -37
- mlquantify/methods/aggregative.py +0 -1159
- mlquantify/methods/meta.py +0 -472
- mlquantify/methods/mixture_models.py +0 -1003
- mlquantify/methods/non_aggregative.py +0 -136
- mlquantify/methods/threshold_optimization.py +0 -869
- mlquantify/model_selection.py +0 -377
- mlquantify/plots.py +0 -367
- mlquantify/utils/general.py +0 -371
- mlquantify/utils/method.py +0 -449
- mlquantify-0.1.7.dist-info/RECORD +0 -22
- {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/WHEEL +0 -0
- {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from mlquantify.utils._constraints import Interval, Options
|
|
3
|
+
from mlquantify.neighbors._classification import PWKCLF
|
|
4
|
+
from mlquantify.base_aggregative import AggregationMixin, CrispLearnerQMixin
|
|
5
|
+
from mlquantify.base import BaseQuantifier
|
|
6
|
+
from mlquantify.utils._decorators import _fit_context
|
|
7
|
+
from mlquantify.adjust_counting import CC
|
|
8
|
+
from mlquantify.utils import validate_y, validate_data
|
|
9
|
+
from mlquantify.utils._validation import validate_prevalences
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PWK(BaseQuantifier):
|
|
13
|
+
"""
|
|
14
|
+
Probabilistic Weighted k-Nearest Neighbor (PWK) Quantifier.
|
|
15
|
+
|
|
16
|
+
This quantifier leverages the PWKCLF classifier to perform quantification by estimating
|
|
17
|
+
class prevalences through a probabilistically weighted k-nearest neighbor approach.
|
|
18
|
+
|
|
19
|
+
The method internally uses a weighted k-NN classifier where neighbors' contributions
|
|
20
|
+
are adjusted by class-specific weights designed to correct for class imbalance,
|
|
21
|
+
controlled by the hyperparameter alpha.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
alpha : float, default=1
|
|
26
|
+
Imbalance correction exponent for class weights. Higher values increase
|
|
27
|
+
the influence of minority classes.
|
|
28
|
+
n_neighbors : int, default=10
|
|
29
|
+
Number of nearest neighbors considered.
|
|
30
|
+
algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
|
|
31
|
+
Algorithm used to compute nearest neighbors.
|
|
32
|
+
metric : str, default='euclidean'
|
|
33
|
+
Distance metric for nearest neighbor search.
|
|
34
|
+
leaf_size : int, default=30
|
|
35
|
+
Leaf size for tree-based neighbors algorithms.
|
|
36
|
+
p : int, default=2
|
|
37
|
+
Power parameter for the Minkowski metric (when metric='minkowski').
|
|
38
|
+
metric_params : dict or None, default=None
|
|
39
|
+
Additional parameters for the metric function.
|
|
40
|
+
n_jobs : int or None, default=None
|
|
41
|
+
Number of parallel jobs for neighbors search.
|
|
42
|
+
|
|
43
|
+
Attributes
|
|
44
|
+
----------
|
|
45
|
+
cc : object
|
|
46
|
+
Internally used Classify & Count quantifier wrapping PWKCLF.
|
|
47
|
+
learner : PWKCLF
|
|
48
|
+
Underlying probabilistic weighted k-NN classifier.
|
|
49
|
+
|
|
50
|
+
Methods
|
|
51
|
+
-------
|
|
52
|
+
fit(X, y)
|
|
53
|
+
Fits the quantifier by training the internal PWKCLF and wrapping it with
|
|
54
|
+
Classify & Count quantification.
|
|
55
|
+
predict(X)
|
|
56
|
+
Predicts class prevalences for input data using the trained model.
|
|
57
|
+
classify(X)
|
|
58
|
+
Returns label predictions by applying the trained PWKCLF classifier.
|
|
59
|
+
|
|
60
|
+
Examples
|
|
61
|
+
--------
|
|
62
|
+
>>> q = PWK(alpha=1.5, n_neighbors=5)
|
|
63
|
+
>>> q.fit(X_train, y_train)
|
|
64
|
+
>>> prevalences = q.predict(X_test)
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
_parameter_constraints = {
|
|
68
|
+
"alpha": [Interval(1, None, inclusive_right=False)],
|
|
69
|
+
"n_neighbors": [Interval(1, None, inclusive_right=False)],
|
|
70
|
+
"algorithm": [Options(["auto", "ball_tree", "kd_tree", "brute"])],
|
|
71
|
+
"metric": [str],
|
|
72
|
+
"leaf_size": [Interval(1, None, inclusive_right=False)],
|
|
73
|
+
"p": [Interval(1, None, inclusive_right=False)],
|
|
74
|
+
"metric_params": [dict, type(None)],
|
|
75
|
+
"n_jobs": [Interval(1, None, inclusive_right=False), type(None)],
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
def __init__(self,
|
|
79
|
+
alpha=1,
|
|
80
|
+
n_neighbors=10,
|
|
81
|
+
algorithm="auto",
|
|
82
|
+
metric="euclidean",
|
|
83
|
+
leaf_size=30,
|
|
84
|
+
p=2,
|
|
85
|
+
metric_params=None,
|
|
86
|
+
n_jobs=None):
|
|
87
|
+
learner = PWKCLF(alpha=alpha,
|
|
88
|
+
n_neighbors=n_neighbors,
|
|
89
|
+
algorithm=algorithm,
|
|
90
|
+
metric=metric,
|
|
91
|
+
leaf_size=leaf_size,
|
|
92
|
+
p=p,
|
|
93
|
+
metric_params=metric_params,
|
|
94
|
+
n_jobs=n_jobs)
|
|
95
|
+
self.algorithm = algorithm
|
|
96
|
+
self.alpha = alpha
|
|
97
|
+
self.n_neighbors = n_neighbors
|
|
98
|
+
self.metric = metric
|
|
99
|
+
self.leaf_size = leaf_size
|
|
100
|
+
self.p = p
|
|
101
|
+
self.metric_params = metric_params
|
|
102
|
+
self.n_jobs = n_jobs
|
|
103
|
+
self.learner = learner
|
|
104
|
+
|
|
105
|
+
@_fit_context(prefer_skip_nested_validation=True)
|
|
106
|
+
def fit(self, X, y):
|
|
107
|
+
"""Fit the PWK quantifier to the training data.
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
X_train : array-like of shape (n_samples, n_features)
|
|
112
|
+
Training features.
|
|
113
|
+
|
|
114
|
+
y_train : array-like of shape (n_samples,)
|
|
115
|
+
Training labels.
|
|
116
|
+
|
|
117
|
+
Returns
|
|
118
|
+
-------
|
|
119
|
+
self : object
|
|
120
|
+
The fitted instance.
|
|
121
|
+
"""
|
|
122
|
+
X, y = validate_data(self, X, y, ensure_2d=True, ensure_min_samples=2)
|
|
123
|
+
validate_y(self, y)
|
|
124
|
+
self.classes_ = np.unique(y)
|
|
125
|
+
self.cc = CC(self.learner)
|
|
126
|
+
return self.cc.fit(X, y)
|
|
127
|
+
|
|
128
|
+
def predict(self, X):
|
|
129
|
+
"""Predict prevalences for the given data.
|
|
130
|
+
|
|
131
|
+
Parameters
|
|
132
|
+
----------
|
|
133
|
+
X : array-like of shape (n_samples, n_features)
|
|
134
|
+
Features for which to predict prevalences.
|
|
135
|
+
|
|
136
|
+
Returns
|
|
137
|
+
-------
|
|
138
|
+
prevalences : array of shape (n_classes,)
|
|
139
|
+
Predicted class prevalences.
|
|
140
|
+
"""
|
|
141
|
+
prevalences = self.cc.predict(X)
|
|
142
|
+
prevalences = validate_prevalences(self, prevalences, self.classes_)
|
|
143
|
+
return prevalences
|
|
144
|
+
|
|
145
|
+
def classify(self, X):
|
|
146
|
+
"""Classify samples using the underlying learner.
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
X : array-like of shape (n_samples, n_features)
|
|
151
|
+
Features to classify.
|
|
152
|
+
|
|
153
|
+
Returns
|
|
154
|
+
-------
|
|
155
|
+
labels : array of shape (n_samples,)
|
|
156
|
+
Predicted class labels.
|
|
157
|
+
"""
|
|
158
|
+
return self.learner.predict(X)
|
|
159
|
+
|
|
@@ -1,73 +1,61 @@
|
|
|
1
|
-
from sklearn.neighbors import NearestNeighbors
|
|
2
|
-
from sklearn.base import BaseEstimator
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pandas as pd
|
|
5
|
-
|
|
6
|
-
class PWKCLF(BaseEstimator):
|
|
7
|
-
"""
|
|
8
|
-
Learner based on k-Nearest Neighbors (KNN) to use in the PWK method.
|
|
9
|
-
|
|
10
|
-
This classifier adjusts the influence of neighbors using class weights
|
|
11
|
-
derived from the `alpha` parameter. The `alpha` parameter controls the
|
|
12
|
-
influence of class imbalance.
|
|
13
|
-
|
|
14
|
-
Parameters
|
|
15
|
-
----------
|
|
16
|
-
alpha : float, default=1
|
|
17
|
-
Controls the influence of class imbalance. Must be >= 1.
|
|
18
|
-
|
|
19
|
-
n_neighbors : int, default=10
|
|
20
|
-
Number of neighbors to use.
|
|
21
1
|
|
|
22
|
-
|
|
23
|
-
|
|
2
|
+
import numpy as np
|
|
3
|
+
from sklearn.neighbors import NearestNeighbors
|
|
24
4
|
|
|
25
|
-
metric : str, default='euclidean'
|
|
26
|
-
Distance metric to use.
|
|
27
5
|
|
|
28
|
-
leaf_size : int, default=30
|
|
29
|
-
Leaf size passed to the tree-based algorithms.
|
|
30
6
|
|
|
31
|
-
|
|
32
|
-
|
|
7
|
+
class PWKCLF:
|
|
8
|
+
"""
|
|
9
|
+
Probabilistic Weighted k-Nearest Neighbor Classifier (PWKCLF).
|
|
33
10
|
|
|
34
|
-
|
|
35
|
-
|
|
11
|
+
A weighted k-nearest neighbor classifier that assigns class probabilities to
|
|
12
|
+
instances based on neighbor counts weighted by class-specific inverse frequency
|
|
13
|
+
factors adjusted by a hyperparameter alpha controlling imbalance compensation.
|
|
36
14
|
|
|
37
|
-
|
|
38
|
-
|
|
15
|
+
Attributes
|
|
16
|
+
----------
|
|
17
|
+
alpha : float
|
|
18
|
+
Exponent controlling the degree of imbalance compensation.
|
|
19
|
+
n_neighbors : int
|
|
20
|
+
Number of nearest neighbors considered.
|
|
21
|
+
nbrs : sklearn.neighbors.NearestNeighbors
|
|
22
|
+
The underlying k-NN structure used for neighbor queries.
|
|
23
|
+
classes_ : ndarray
|
|
24
|
+
Unique classes observed during training.
|
|
25
|
+
class_to_index : dict
|
|
26
|
+
Mapping from class label to index used in internal arrays.
|
|
27
|
+
class_weights : ndarray
|
|
28
|
+
Per-class weights computed based on class frequency and alpha.
|
|
29
|
+
y_train : ndarray
|
|
30
|
+
Labels of training samples.
|
|
31
|
+
|
|
32
|
+
Methods
|
|
33
|
+
-------
|
|
34
|
+
fit(X, y)
|
|
35
|
+
Fits the k-NN structure and computes class weights.
|
|
36
|
+
predict(X)
|
|
37
|
+
Predicts class labels by weighted voting among neighbors.
|
|
38
|
+
|
|
39
|
+
Notes
|
|
40
|
+
-----
|
|
41
|
+
The class weights are defined as:
|
|
42
|
+
|
|
43
|
+
\[
|
|
44
|
+
w_c = \left( \frac{N_c}{\min_{c'} N_{c'}} \right)^{-\frac{1}{\alpha}},
|
|
45
|
+
\]
|
|
46
|
+
|
|
47
|
+
where \( N_c \) is the count of class \( c \) in the training set.
|
|
48
|
+
|
|
49
|
+
This weighting scheme reduces bias towards majority classes by downweighting them
|
|
50
|
+
in the voting process.
|
|
39
51
|
|
|
40
52
|
Examples
|
|
41
53
|
--------
|
|
42
|
-
>>>
|
|
43
|
-
>>>
|
|
44
|
-
>>>
|
|
45
|
-
>>> from mlquantify.utils.general import get_real_prev
|
|
46
|
-
>>> from mlquantify.classification import PWKCLF
|
|
47
|
-
>>>
|
|
48
|
-
>>> # Load dataset
|
|
49
|
-
>>> features, target = load_breast_cancer(return_X_y=True)
|
|
50
|
-
>>>
|
|
51
|
-
>>> # Split into training and testing sets
|
|
52
|
-
>>> X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=32)
|
|
53
|
-
>>>
|
|
54
|
-
>>> # Create and configure the PWKCLF learner
|
|
55
|
-
>>> learner = PWKCLF(alpha=1, n_neighbors=10)
|
|
56
|
-
>>>
|
|
57
|
-
>>> # Create the PWK quantifier
|
|
58
|
-
>>> model = PWK(learner=learner)
|
|
59
|
-
>>>
|
|
60
|
-
>>> # Train the model
|
|
61
|
-
>>> model.fit(X_train, y_train)
|
|
62
|
-
>>>
|
|
63
|
-
>>> # Predict prevalences
|
|
64
|
-
>>> y_pred = model.predict(X_test)
|
|
65
|
-
>>>
|
|
66
|
-
>>> # Display results
|
|
67
|
-
>>> print("Real:", get_real_prev(y_test))
|
|
68
|
-
>>> print("PWK:", y_pred)
|
|
54
|
+
>>> clf = PWKCLF(alpha=2.0, n_neighbors=7)
|
|
55
|
+
>>> clf.fit(X_train, y_train)
|
|
56
|
+
>>> labels = clf.predict(X_test)
|
|
69
57
|
"""
|
|
70
|
-
|
|
58
|
+
|
|
71
59
|
def __init__(self,
|
|
72
60
|
alpha=1,
|
|
73
61
|
n_neighbors=10,
|
|
@@ -77,9 +65,6 @@ class PWKCLF(BaseEstimator):
|
|
|
77
65
|
p=2,
|
|
78
66
|
metric_params=None,
|
|
79
67
|
n_jobs=None):
|
|
80
|
-
if alpha < 1:
|
|
81
|
-
raise ValueError("alpha must not be smaller than 1")
|
|
82
|
-
|
|
83
68
|
self.alpha = alpha
|
|
84
69
|
self.n_neighbors = n_neighbors
|
|
85
70
|
|
|
@@ -119,9 +104,6 @@ class PWKCLF(BaseEstimator):
|
|
|
119
104
|
|
|
120
105
|
self.y_train = y
|
|
121
106
|
|
|
122
|
-
if isinstance(y, pd.DataFrame):
|
|
123
|
-
self.y_train = y.reset_index(drop=True)
|
|
124
|
-
|
|
125
107
|
unique_classes, class_counts = np.unique(y, return_counts=True)
|
|
126
108
|
self.classes_ = unique_classes
|
|
127
109
|
self.class_to_index = dict(zip(self.classes_, range(len(self.classes_))))
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from sklearn.neighbors import KernelDensity
|
|
3
|
+
from mlquantify.utils._constraints import Interval
|
|
4
|
+
from mlquantify.neighbors._base import BaseKDE
|
|
5
|
+
from mlquantify.neighbors._utils import (
|
|
6
|
+
gaussian_kernel,
|
|
7
|
+
negative_log_likelihood,
|
|
8
|
+
EPS,
|
|
9
|
+
)
|
|
10
|
+
from mlquantify.utils import check_random_state
|
|
11
|
+
from scipy.optimize import minimize
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# ============================================================
|
|
15
|
+
# Auxiliary functions
|
|
16
|
+
# ============================================================
|
|
17
|
+
|
|
18
|
+
def _optimize_on_simplex(objective, n_classes, x0=None):
|
|
19
|
+
"""
|
|
20
|
+
Optimize an objective function over the probability simplex.
|
|
21
|
+
|
|
22
|
+
This function performs constrained optimization to find the mixture weights
|
|
23
|
+
\( \alpha \) on the simplex \( \Delta^{n-1} = \{ \alpha \in \mathbb{R}^n : \alpha_i \geq 0, \sum_i \alpha_i = 1 \} \)
|
|
24
|
+
that minimize the given objective function.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
objective : callable
|
|
29
|
+
Function from \( \mathbb{R}^n \to \mathbb{R} \) to minimize.
|
|
30
|
+
n_classes : int
|
|
31
|
+
Dimensionality of the simplex (number of classes).
|
|
32
|
+
x0 : array-like, optional
|
|
33
|
+
Initial guess for the optimization, defaults to uniform vector.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
alpha_opt : ndarray of shape (n_classes,)
|
|
38
|
+
Optimized weights summing to 1.
|
|
39
|
+
min_loss : float
|
|
40
|
+
Objective function value at optimum.
|
|
41
|
+
|
|
42
|
+
Notes
|
|
43
|
+
-----
|
|
44
|
+
The optimization uses scipy's `minimize` with bounds and equality constraint.
|
|
45
|
+
"""
|
|
46
|
+
if x0 is None:
|
|
47
|
+
x0 = np.ones(n_classes) / n_classes
|
|
48
|
+
|
|
49
|
+
constraints = {'type': 'eq', 'fun': lambda x: np.sum(x) - 1}
|
|
50
|
+
bounds = [(0, 1)] * n_classes
|
|
51
|
+
|
|
52
|
+
res = minimize(objective, x0, bounds=bounds, constraints=constraints)
|
|
53
|
+
alpha_opt = res.x / np.sum(res.x)
|
|
54
|
+
return alpha_opt, res.fun
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ============================================================
|
|
58
|
+
# KDEy-ML — Maximum Likelihood
|
|
59
|
+
# ============================================================
|
|
60
|
+
|
|
61
|
+
class KDEyML(BaseKDE):
|
|
62
|
+
"""KDEy Maximum Likelihood quantifier.
|
|
63
|
+
|
|
64
|
+
Models class-conditional densities of posterior probabilities via Kernel Density
|
|
65
|
+
Estimation (KDE) and estimates class prevalences by maximizing the likelihood of
|
|
66
|
+
test data under a mixture model of these KDEs.
|
|
67
|
+
|
|
68
|
+
The mixture weights correspond to class prevalences, optimized under the simplex
|
|
69
|
+
constraint. The optimization minimizes the negative log-likelihood of the mixture
|
|
70
|
+
density evaluated at test posteriors.
|
|
71
|
+
|
|
72
|
+
This approach generalizes EM-based quantification methods by using KDE instead
|
|
73
|
+
of discrete histograms, allowing smooth multivariate density estimation over
|
|
74
|
+
the probability simplex.
|
|
75
|
+
|
|
76
|
+
References
|
|
77
|
+
----------
|
|
78
|
+
The method is based on ideas presented by Moreo et al. (2023), extending KDE-based
|
|
79
|
+
approaches for distribution matching and maximum likelihood estimation.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def _precompute_training(self, train_predictions, train_y_values):
|
|
83
|
+
"""
|
|
84
|
+
Fit KDE models on class-specific training posterior predictions.
|
|
85
|
+
"""
|
|
86
|
+
super()._fit_kde_models(train_predictions, train_y_values)
|
|
87
|
+
|
|
88
|
+
def _solve_prevalences(self, predictions):
|
|
89
|
+
"""
|
|
90
|
+
Estimate class prevalences by maximizing log-likelihood under KDE mixture.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
predictions : ndarray, shape (n_samples, n_features)
|
|
95
|
+
Posterior probabilities of test set instances.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
alpha_opt : ndarray, shape (n_classes,)
|
|
100
|
+
Estimated class prevalences.
|
|
101
|
+
min_loss : float
|
|
102
|
+
Minimum negative log-likelihood achieved.
|
|
103
|
+
|
|
104
|
+
Notes
|
|
105
|
+
-----
|
|
106
|
+
The optimization is solved over the probability simplex.
|
|
107
|
+
"""
|
|
108
|
+
n_classes = len(self._class_kdes)
|
|
109
|
+
class_likelihoods = np.array([
|
|
110
|
+
np.exp(kde.score_samples(predictions)) + EPS for kde in self._class_kdes
|
|
111
|
+
]) # (n_classes, n_samples)
|
|
112
|
+
|
|
113
|
+
def objective(alpha):
|
|
114
|
+
mixture = np.dot(alpha, class_likelihoods)
|
|
115
|
+
return negative_log_likelihood(mixture)
|
|
116
|
+
|
|
117
|
+
alpha_opt, min_loss = _optimize_on_simplex(objective, n_classes)
|
|
118
|
+
|
|
119
|
+
self.best_distance = min_loss
|
|
120
|
+
|
|
121
|
+
return alpha_opt, min_loss
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# ============================================================
|
|
125
|
+
# KDEy-HD — Hellinger Distance Minimization
|
|
126
|
+
# ============================================================
|
|
127
|
+
|
|
128
|
+
class KDEyHD(BaseKDE):
|
|
129
|
+
r"""KDEy Hellinger Distance Minimization quantifier.
|
|
130
|
+
|
|
131
|
+
Estimates class prevalences by minimizing the Hellinger distance \( HD \) between
|
|
132
|
+
the KDE mixture of class-conditional densities and the KDE of test data, estimated
|
|
133
|
+
via Monte Carlo sampling and importance weighting.
|
|
134
|
+
|
|
135
|
+
This stochastic approximation enables practical optimization of complex divergence
|
|
136
|
+
measures otherwise lacking closed-form expressions for Gaussian Mixture Models.
|
|
137
|
+
|
|
138
|
+
Parameters
|
|
139
|
+
----------
|
|
140
|
+
montecarlo_trials : int
|
|
141
|
+
Number of Monte Carlo samples used in approximation.
|
|
142
|
+
random_state : int or None
|
|
143
|
+
Seed or random state for reproducibility.
|
|
144
|
+
|
|
145
|
+
References
|
|
146
|
+
----------
|
|
147
|
+
Builds on f-divergence Monte Carlo approximations for KDE mixtures as detailed
|
|
148
|
+
by Moreo et al. (2023) and importance sampling techniques.
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
_parameter_constraints = {
|
|
152
|
+
"montecarlo_trials": [Interval(1, None)],
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
def __init__(self, learner=None, bandwidth=0.1, kernel="gaussian", montecarlo_trials=1000, random_state=None):
|
|
156
|
+
super().__init__(learner, bandwidth, kernel)
|
|
157
|
+
self.montecarlo_trials = montecarlo_trials
|
|
158
|
+
self.random_state = random_state
|
|
159
|
+
|
|
160
|
+
def _precompute_training(self, train_predictions, train_y_values):
|
|
161
|
+
"""
|
|
162
|
+
Precompute reference samples from class KDEs and their densities.
|
|
163
|
+
"""
|
|
164
|
+
super()._fit_kde_models(train_predictions, train_y_values)
|
|
165
|
+
n_class = len(self._class_kdes)
|
|
166
|
+
trials = int(self.montecarlo_trials)
|
|
167
|
+
rng = check_random_state(self.random_state)
|
|
168
|
+
# Convert to integer seed for sklearn compatibility
|
|
169
|
+
seed = rng.integers(0, 2**31 - 1) if hasattr(rng, 'integers') else self.random_state
|
|
170
|
+
|
|
171
|
+
samples = np.vstack([
|
|
172
|
+
kde.sample(max(1, trials // n_class), random_state=seed)
|
|
173
|
+
for kde in self._class_kdes
|
|
174
|
+
])
|
|
175
|
+
|
|
176
|
+
ref_classwise = np.array([np.exp(k.score_samples(samples)) + EPS for k in self._class_kdes])
|
|
177
|
+
ref_density = np.mean(ref_classwise, axis=0) + EPS
|
|
178
|
+
|
|
179
|
+
self._ref_samples = samples
|
|
180
|
+
self._ref_classwise = ref_classwise
|
|
181
|
+
self._ref_density = ref_density
|
|
182
|
+
|
|
183
|
+
def _solve_prevalences(self, predictions):
|
|
184
|
+
"""
|
|
185
|
+
Minimize Hellinger distance between test KDE and mixture KDE via importance sampling.
|
|
186
|
+
"""
|
|
187
|
+
test_kde = KernelDensity(bandwidth=self.bandwidth).fit(predictions)
|
|
188
|
+
qs = np.exp(test_kde.score_samples(self._ref_samples)) + EPS
|
|
189
|
+
iw = qs / self._ref_density
|
|
190
|
+
fracs = self._ref_classwise / qs
|
|
191
|
+
|
|
192
|
+
def objective(alpha):
|
|
193
|
+
alpha = np.clip(alpha, EPS, None)
|
|
194
|
+
alpha /= np.sum(alpha)
|
|
195
|
+
ps_div_qs = np.dot(alpha, fracs)
|
|
196
|
+
vals = (np.sqrt(ps_div_qs) - 1.0) ** 2 * iw
|
|
197
|
+
return np.mean(vals)
|
|
198
|
+
|
|
199
|
+
alpha_opt, min_loss = _optimize_on_simplex(objective, len(self._class_kdes))
|
|
200
|
+
|
|
201
|
+
self.best_distance = min_loss
|
|
202
|
+
|
|
203
|
+
return alpha_opt, min_loss
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# ============================================================
|
|
207
|
+
# KDEy-CS — Cauchy–Schwarz Divergence
|
|
208
|
+
# ============================================================
|
|
209
|
+
|
|
210
|
+
class KDEyCS(BaseKDE):
|
|
211
|
+
"""
|
|
212
|
+
KDEy Cauchy-Schwarz Divergence quantifier.
|
|
213
|
+
|
|
214
|
+
Uses a closed-form solution for minimizing the Cauchy-Schwarz (CS) divergence between
|
|
215
|
+
Gaussian Mixture Models representing class-conditional densities fitted via KDE.
|
|
216
|
+
|
|
217
|
+
This mathematically efficient approach leverages precomputed kernel Gram matrices
|
|
218
|
+
of train-train, train-test, and test-test instances for fast divergence evaluation,
|
|
219
|
+
enabling scalable multiclass quantification.
|
|
220
|
+
|
|
221
|
+
References
|
|
222
|
+
----------
|
|
223
|
+
Based on closed-form CS divergence derivations by Kampa et al. (2011) and KDEy
|
|
224
|
+
density representations, as discussed by Moreo et al. (2023).
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
def _precompute_training(self, train_predictions, train_y_values):
|
|
228
|
+
"""
|
|
229
|
+
Precompute kernel sums and Gram matrices needed for CS divergence evaluation.
|
|
230
|
+
"""
|
|
231
|
+
P = np.atleast_2d(train_predictions)
|
|
232
|
+
y = np.asarray(train_y_values)
|
|
233
|
+
centers = [P[y == c] for c in self.classes_]
|
|
234
|
+
counts = np.array([len(x) if len(x) > 0 else 1 for x in centers])
|
|
235
|
+
h_eff = np.sqrt(2) * self.bandwidth
|
|
236
|
+
|
|
237
|
+
B_bar = np.zeros((len(self.classes_), len(self.classes_)))
|
|
238
|
+
for i, Xi in enumerate(centers):
|
|
239
|
+
for j, Xj in enumerate(centers[i:], start=i):
|
|
240
|
+
val = np.sum(gaussian_kernel(Xi, Xj, h_eff))
|
|
241
|
+
B_bar[i, j] = B_bar[j, i] = val
|
|
242
|
+
self._centers = centers
|
|
243
|
+
self._counts = counts
|
|
244
|
+
self._B_bar = B_bar
|
|
245
|
+
self._h_eff = h_eff
|
|
246
|
+
|
|
247
|
+
def _solve_prevalences(self, predictions):
|
|
248
|
+
"""
|
|
249
|
+
Minimize Cauchy-Schwarz divergence over class mixture weights on the probability simplex.
|
|
250
|
+
"""
|
|
251
|
+
Pte = np.atleast_2d(predictions)
|
|
252
|
+
n = len(self.classes_)
|
|
253
|
+
a_bar = np.array([np.sum(gaussian_kernel(Xi, Pte, self._h_eff)) for Xi in self._centers])
|
|
254
|
+
counts = self._counts + EPS
|
|
255
|
+
B_bar = self._B_bar + EPS
|
|
256
|
+
t = 1.0 / max(1, Pte.shape[0])
|
|
257
|
+
|
|
258
|
+
def objective(alpha):
|
|
259
|
+
alpha = np.clip(alpha, EPS, None)
|
|
260
|
+
alpha /= np.sum(alpha)
|
|
261
|
+
rbar = alpha / counts
|
|
262
|
+
partA = -np.log(np.dot(rbar, a_bar) * t + EPS)
|
|
263
|
+
partB = 0.5 * np.log(rbar @ (B_bar @ rbar) + EPS)
|
|
264
|
+
return partA + partB
|
|
265
|
+
|
|
266
|
+
alpha_opt, min_loss = _optimize_on_simplex(objective, n)
|
|
267
|
+
|
|
268
|
+
self.best_distance = min_loss
|
|
269
|
+
|
|
270
|
+
return alpha_opt, min_loss
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from sklearn.metrics import pairwise_distances
|
|
3
|
+
from math import pi
|
|
4
|
+
from scipy.optimize import minimize
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
EPS = 1e-12
|
|
8
|
+
|
|
9
|
+
# ============================================================
|
|
10
|
+
# Utilitários
|
|
11
|
+
# ============================================================
|
|
12
|
+
|
|
13
|
+
def gaussian_kernel(X, Y, bandwidth):
|
|
14
|
+
"""
|
|
15
|
+
Compute the Gaussian kernel matrix K(x, y) with specified bandwidth.
|
|
16
|
+
|
|
17
|
+
This kernel matrix represents the similarity between each pair of points in X and Y,
|
|
18
|
+
computed using the Gaussian (RBF) kernel function:
|
|
19
|
+
|
|
20
|
+
\[
|
|
21
|
+
K(x, y) = \frac{1}{(2 \pi)^{D/2} h^D} \exp\left(- \frac{\|x - y\|^2}{2 h^2}\right)
|
|
22
|
+
\]
|
|
23
|
+
|
|
24
|
+
where \( h \) is the bandwidth (smoothing parameter), and \( D \) is the dimensionality
|
|
25
|
+
of the input feature space.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
X : array-like of shape (n_samples_X, n_features)
|
|
30
|
+
Input data points.
|
|
31
|
+
Y : array-like of shape (n_samples_Y, n_features) or None
|
|
32
|
+
Input data points for kernel computation. If None, defaults to X.
|
|
33
|
+
bandwidth : float
|
|
34
|
+
Kernel bandwidth parameter \( h \).
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
K : ndarray of shape (n_samples_X, n_samples_Y)
|
|
39
|
+
Gaussian kernel matrix.
|
|
40
|
+
"""
|
|
41
|
+
X = np.atleast_2d(X)
|
|
42
|
+
if Y is None:
|
|
43
|
+
Y = X
|
|
44
|
+
else:
|
|
45
|
+
Y = np.atleast_2d(Y)
|
|
46
|
+
sqd = pairwise_distances(X, Y, metric="euclidean") ** 2
|
|
47
|
+
D = X.shape[1]
|
|
48
|
+
norm = (bandwidth ** D) * ((2 * pi) ** (D / 2))
|
|
49
|
+
return np.exp(-sqd / (2 * (bandwidth ** 2))) / (norm + EPS)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def negative_log_likelihood(mixture_likelihoods):
|
|
53
|
+
"""
|
|
54
|
+
Compute the negative log-likelihood of given mixture likelihoods in a numerically stable way.
|
|
55
|
+
|
|
56
|
+
Given mixture likelihood values \( p_i \) for samples, the negative log-likelihood is:
|
|
57
|
+
|
|
58
|
+
\[
|
|
59
|
+
- \sum_i \log(p_i)
|
|
60
|
+
\]
|
|
61
|
+
|
|
62
|
+
Numerical stability is achieved by clipping likelihoods below a small epsilon.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
mixture_likelihoods : array-like
|
|
67
|
+
Likelihood values for the mixture distribution evaluated at samples.
|
|
68
|
+
|
|
69
|
+
Returns
|
|
70
|
+
-------
|
|
71
|
+
nll : float
|
|
72
|
+
Negative log-likelihood value.
|
|
73
|
+
"""
|
|
74
|
+
mixture_likelihoods = np.clip(mixture_likelihoods, EPS, None)
|
|
75
|
+
return -np.sum(np.log(mixture_likelihoods))
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _simplex_constraints(n):
|
|
79
|
+
"""
|
|
80
|
+
Define constraints and bounds for optimization over the probability simplex.
|
|
81
|
+
|
|
82
|
+
The simplex is defined as all vectors \( \alpha \in \mathbb{R}^n \) such that:
|
|
83
|
+
|
|
84
|
+
\[
|
|
85
|
+
\alpha_i \geq 0, \quad \sum_{i=1}^n \alpha_i = 1
|
|
86
|
+
\]
|
|
87
|
+
|
|
88
|
+
Parameters
|
|
89
|
+
----------
|
|
90
|
+
n : int
|
|
91
|
+
Dimensionality of the simplex (number of mixture components).
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
constraints : list of dict
|
|
96
|
+
List containing equality constraint for sum of elements equaling 1.
|
|
97
|
+
bounds : list of tuple
|
|
98
|
+
Bounds for each element to lie between 0 and 1.
|
|
99
|
+
"""
|
|
100
|
+
cons = [{"type": "eq", "fun": lambda a: np.sum(a) - 1.0}]
|
|
101
|
+
bounds = [(0.0, 1.0) for _ in range(n)]
|
|
102
|
+
return cons, bounds
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _optimize_on_simplex(objective, n, x0=None):
|
|
106
|
+
"""
|
|
107
|
+
Minimize an objective function over the probability simplex.
|
|
108
|
+
|
|
109
|
+
This function solves for mixture weights \( \boldsymbol{\alpha} \) that minimize the
|
|
110
|
+
objective function under the constraints \(\alpha_i \geq 0\) and \(\sum_i \alpha_i = 1\).
|
|
111
|
+
|
|
112
|
+
The optimization uses Sequential Least SQuares Programming (SLSQP).
|
|
113
|
+
|
|
114
|
+
Parameters
|
|
115
|
+
----------
|
|
116
|
+
objective : callable
|
|
117
|
+
The objective function to minimize. It should accept a vector of length n and
|
|
118
|
+
return a scalar loss.
|
|
119
|
+
n : int
|
|
120
|
+
Number of mixture components (dimension of \( \boldsymbol{\alpha} \)).
|
|
121
|
+
x0 : array-like, optional
|
|
122
|
+
Initial guess for \( \boldsymbol{\alpha} \). If None, defaults to uniform.
|
|
123
|
+
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
alpha_opt : ndarray of shape (n,)
|
|
127
|
+
Optimized mixture weights summing to one.
|
|
128
|
+
"""
|
|
129
|
+
if x0 is None:
|
|
130
|
+
x0 = np.ones(n) / n
|
|
131
|
+
cons, bounds = _simplex_constraints(n)
|
|
132
|
+
res = minimize(objective, x0, method="SLSQP", constraints=cons, bounds=bounds)
|
|
133
|
+
x = np.clip(getattr(res, "x", x0), 0.0, None)
|
|
134
|
+
s = np.sum(x)
|
|
135
|
+
return x / s if s > 0 else np.ones(n) / n
|