mlquantify 0.1.19__py3-none-any.whl → 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlquantify/__init__.py +2 -1
- mlquantify/adjust_counting/__init__.py +6 -5
- mlquantify/adjust_counting/_adjustment.py +208 -37
- mlquantify/adjust_counting/_base.py +5 -6
- mlquantify/adjust_counting/_counting.py +10 -7
- mlquantify/likelihood/__init__.py +0 -2
- mlquantify/likelihood/_classes.py +45 -199
- mlquantify/meta/_classes.py +50 -42
- mlquantify/mixture/__init__.py +2 -1
- mlquantify/mixture/_classes.py +310 -15
- mlquantify/model_selection/_search.py +1 -1
- mlquantify/neighbors/_base.py +15 -15
- mlquantify/neighbors/_classes.py +2 -2
- mlquantify/neighbors/_kde.py +6 -6
- mlquantify/neural/__init__.py +1 -1
- mlquantify/neural/_base.py +0 -0
- mlquantify/neural/_classes.py +609 -0
- mlquantify/neural/_perm_invariant.py +0 -0
- mlquantify/neural/_utils.py +0 -0
- mlquantify/utils/__init__.py +2 -1
- mlquantify/utils/_constraints.py +2 -0
- mlquantify/utils/_validation.py +9 -0
- {mlquantify-0.1.19.dist-info → mlquantify-0.1.21.dist-info}/METADATA +13 -18
- {mlquantify-0.1.19.dist-info → mlquantify-0.1.21.dist-info}/RECORD +27 -23
- {mlquantify-0.1.19.dist-info → mlquantify-0.1.21.dist-info}/WHEEL +1 -1
- mlquantify-0.1.21.dist-info/licenses/LICENSE +28 -0
- mlquantify/likelihood/_base.py +0 -147
- {mlquantify-0.1.19.dist-info → mlquantify-0.1.21.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,16 @@
|
|
|
1
|
+
from mlquantify.base import BaseQuantifier
|
|
2
|
+
from mlquantify.base_aggregative import AggregationMixin
|
|
1
3
|
import numpy as np
|
|
2
4
|
from mlquantify.base_aggregative import SoftLearnerQMixin
|
|
3
|
-
from mlquantify.likelihood._base import BaseIterativeLikelihood
|
|
4
5
|
from mlquantify.metrics._slq import MAE
|
|
5
|
-
from mlquantify.
|
|
6
|
+
from mlquantify.utils import _fit_context, validate_data, check_classes_attribute, validate_predictions, validate_prevalences
|
|
6
7
|
from mlquantify.utils._constraints import (
|
|
7
8
|
Interval,
|
|
8
9
|
CallableConstraint,
|
|
9
10
|
Options
|
|
10
11
|
)
|
|
11
12
|
|
|
12
|
-
class EMQ(SoftLearnerQMixin,
|
|
13
|
+
class EMQ(SoftLearnerQMixin, AggregationMixin, BaseQuantifier):
|
|
13
14
|
r"""Expectation-Maximization Quantifier (EMQ).
|
|
14
15
|
|
|
15
16
|
Estimates class prevalences under prior probability shift by alternating
|
|
@@ -81,45 +82,63 @@ class EMQ(SoftLearnerQMixin, BaseIterativeLikelihood):
|
|
|
81
82
|
"criteria": [CallableConstraint()],
|
|
82
83
|
}
|
|
83
84
|
|
|
85
|
+
def __mlquantify_tags__(self):
|
|
86
|
+
tags = super().__mlquantify_tags__()
|
|
87
|
+
tags.prediction_requirements.requires_train_proba = False
|
|
88
|
+
return tags
|
|
89
|
+
|
|
84
90
|
def __init__(self,
|
|
85
91
|
learner=None,
|
|
86
92
|
tol=1e-4,
|
|
87
93
|
max_iter=100,
|
|
88
94
|
calib_function=None,
|
|
89
95
|
criteria=MAE):
|
|
90
|
-
|
|
96
|
+
self.learner = learner
|
|
97
|
+
self.tol = tol
|
|
98
|
+
self.max_iter = max_iter
|
|
91
99
|
self.calib_function = calib_function
|
|
92
100
|
self.criteria = criteria
|
|
101
|
+
|
|
102
|
+
@_fit_context(prefer_skip_nested_validation=True)
|
|
103
|
+
def fit(self, X, y):
|
|
104
|
+
"""Fit the quantifier using the provided data and learner."""
|
|
105
|
+
X, y = validate_data(self, X, y)
|
|
106
|
+
self.classes_ = np.unique(y)
|
|
107
|
+
self.learner.fit(X, y)
|
|
108
|
+
counts = np.array([np.count_nonzero(y == _class) for _class in self.classes_])
|
|
109
|
+
self.priors = counts / len(y)
|
|
110
|
+
self.y_train = y
|
|
111
|
+
|
|
112
|
+
return self
|
|
113
|
+
|
|
114
|
+
def predict(self, X):
|
|
115
|
+
"""Predict the prevalence of each class."""
|
|
116
|
+
X = validate_data(self, X)
|
|
117
|
+
estimator_function = _get_learner_function(self)
|
|
118
|
+
predictions = getattr(self.learner, estimator_function)(X)
|
|
119
|
+
prevalences = self.aggregate(predictions, self.y_train)
|
|
120
|
+
return prevalences
|
|
121
|
+
|
|
122
|
+
def aggregate(self, predictions, y_train):
|
|
123
|
+
predictions = validate_predictions(self, predictions)
|
|
124
|
+
self.classes_ = check_classes_attribute(self, np.unique(y_train))
|
|
93
125
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
- Calibrate posterior predictions if calibration function specified.
|
|
99
|
-
- Apply EM procedure to re-estimate prevalences, based on training priors and calibrated posteriors.
|
|
100
|
-
|
|
101
|
-
Parameters
|
|
102
|
-
----------
|
|
103
|
-
predictions : ndarray of shape (n_samples, n_classes)
|
|
104
|
-
Posterior probabilities for each class on test data.
|
|
105
|
-
priors : ndarray of shape (n_classes,)
|
|
106
|
-
Training set class prevalences, serving as initial priors.
|
|
107
|
-
|
|
108
|
-
Returns
|
|
109
|
-
-------
|
|
110
|
-
prevalences : ndarray of shape (n_classes,)
|
|
111
|
-
Estimated class prevalences after EM iteration.
|
|
112
|
-
"""
|
|
126
|
+
if not hasattr(self, 'priors') or len(self.priors) != len(self.classes_):
|
|
127
|
+
counts = np.array([np.count_nonzero(y_train == _class) for _class in self.classes_])
|
|
128
|
+
self.priors = counts / len(y_train)
|
|
129
|
+
|
|
113
130
|
calibrated_predictions = self._apply_calibration(predictions)
|
|
114
131
|
prevalences, _ = self.EM(
|
|
115
132
|
posteriors=calibrated_predictions,
|
|
116
|
-
priors=priors,
|
|
133
|
+
priors=self.priors,
|
|
117
134
|
tolerance=self.tol,
|
|
118
135
|
max_iter=self.max_iter,
|
|
119
136
|
criteria=self.criteria
|
|
120
137
|
)
|
|
121
|
-
return prevalences
|
|
122
138
|
|
|
139
|
+
prevalences = validate_prevalences(self, prevalences, self.classes_)
|
|
140
|
+
return prevalences
|
|
141
|
+
|
|
123
142
|
|
|
124
143
|
@classmethod
|
|
125
144
|
def EM(cls, posteriors, priors, tolerance=1e-6, max_iter=100, criteria=MAE):
|
|
@@ -254,177 +273,4 @@ class EMQ(SoftLearnerQMixin, BaseIterativeLikelihood):
|
|
|
254
273
|
logits = np.log(preds)
|
|
255
274
|
scaled = logits * W
|
|
256
275
|
exp_scaled = np.exp(scaled - np.max(scaled, axis=1, keepdims=True))
|
|
257
|
-
return exp_scaled / np.sum(exp_scaled, axis=1, keepdims=True)
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
class MLPE(SoftLearnerQMixin, BaseIterativeLikelihood):
|
|
262
|
-
r"""Maximum Likelihood Prevalence Estimation (MLPE).
|
|
263
|
-
|
|
264
|
-
Returns training priors as prevalence estimates without adaptations.
|
|
265
|
-
|
|
266
|
-
Parameters
|
|
267
|
-
----------
|
|
268
|
-
learner : estimator, optional
|
|
269
|
-
Base classifier.
|
|
270
|
-
|
|
271
|
-
References
|
|
272
|
-
----------
|
|
273
|
-
.. [2] Esuli, A., Moreo, A., & Sebastiani, F. (2023). Learning to Quantify. Springer.
|
|
274
|
-
"""
|
|
275
|
-
|
|
276
|
-
def __init__(self, learner=None):
|
|
277
|
-
super().__init__(learner=learner, max_iter=1)
|
|
278
|
-
|
|
279
|
-
def _iterate(self, predictions, priors):
|
|
280
|
-
"""Returns training priors without adjustment.
|
|
281
|
-
|
|
282
|
-
Parameters
|
|
283
|
-
----------
|
|
284
|
-
predictions : array-like
|
|
285
|
-
Ignored in this implementation.
|
|
286
|
-
priors : array-like
|
|
287
|
-
Training priors, returned as is.
|
|
288
|
-
|
|
289
|
-
Returns
|
|
290
|
-
-------
|
|
291
|
-
prevalences : array-like
|
|
292
|
-
Equal to the training priors.
|
|
293
|
-
"""
|
|
294
|
-
return priors
|
|
295
|
-
|
|
296
|
-
@define_binary
|
|
297
|
-
class CDE(SoftLearnerQMixin, BaseIterativeLikelihood):
|
|
298
|
-
r"""CDE-Iterate for binary classification prevalence estimation.
|
|
299
|
-
|
|
300
|
-
Threshold :math:`\tau` from false positive and false negative costs:
|
|
301
|
-
.. math::
|
|
302
|
-
\tau = \frac{c_{FP}}{c_{FP} + c_{FN}}
|
|
303
|
-
|
|
304
|
-
Hard classification by thresholding posterior probability :math:`p(+|x)` at :math:`\tau`:
|
|
305
|
-
.. math::
|
|
306
|
-
\hat{y}(x) = \mathbf{1}_{p(+|x) > \tau}
|
|
307
|
-
|
|
308
|
-
Prevalence estimation via classify-and-count:
|
|
309
|
-
.. math::
|
|
310
|
-
\hat{p}_U(+) = \frac{1}{N} \sum_{n=1}^N \hat{y}(x_n)
|
|
311
|
-
|
|
312
|
-
False positive cost update:
|
|
313
|
-
.. math::
|
|
314
|
-
c_{FP}^{new} = \frac{p_L(+)}{p_L(-)} \times \frac{\hat{p}_U(-)}{\hat{p}_U(+)} \times c_{FN}
|
|
315
|
-
|
|
316
|
-
Parameters
|
|
317
|
-
----------
|
|
318
|
-
learner : estimator, optional
|
|
319
|
-
Wrapped classifier (unused).
|
|
320
|
-
tol : float, default=1e-4
|
|
321
|
-
Convergence tolerance.
|
|
322
|
-
max_iter : int, default=100
|
|
323
|
-
Max iterations.
|
|
324
|
-
init_cfp : float, default=1.0
|
|
325
|
-
Initial false positive cost.
|
|
326
|
-
|
|
327
|
-
References
|
|
328
|
-
----------
|
|
329
|
-
.. [1] Esuli, A., Moreo, A., & Sebastiani, F. (2023). Learning to Quantify. Springer.
|
|
330
|
-
"""
|
|
331
|
-
|
|
332
|
-
_parameter_constraints = {
|
|
333
|
-
"tol": [Interval(0, None, inclusive_left=False)],
|
|
334
|
-
"max_iter": [Interval(1, None, inclusive_left=True)],
|
|
335
|
-
"init_cfp": [Interval(0, None, inclusive_left=False)]
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
def __init__(self, learner=None, tol=1e-4, max_iter=100, init_cfp=1.0):
|
|
339
|
-
super().__init__(learner=learner, tol=tol, max_iter=max_iter)
|
|
340
|
-
self.init_cfp = float(init_cfp)
|
|
341
|
-
|
|
342
|
-
def _iterate(self, predictions, priors):
|
|
343
|
-
r"""Iteratively estimate prevalences via cost-sensitive thresholding.
|
|
344
|
-
|
|
345
|
-
Parameters
|
|
346
|
-
----------
|
|
347
|
-
predictions : ndarray, shape (n_samples, 2)
|
|
348
|
-
Posterior probabilities for binary classes [neg, pos].
|
|
349
|
-
priors : ndarray, shape (2,)
|
|
350
|
-
Training priors [p(neg), p(pos)].
|
|
351
|
-
|
|
352
|
-
Returns
|
|
353
|
-
-------
|
|
354
|
-
prevalences : ndarray, shape (2,)
|
|
355
|
-
Estimated prevalences for classes [neg, pos].
|
|
356
|
-
"""
|
|
357
|
-
P = np.asarray(predictions, dtype=np.float64)
|
|
358
|
-
Ptr = np.asarray(priors, dtype=np.float64)
|
|
359
|
-
|
|
360
|
-
# basic checks
|
|
361
|
-
if P.ndim != 2 or P.shape[1] != 2:
|
|
362
|
-
raise ValueError("CDE implementation here supports binary case only: predictions shape (n,2).")
|
|
363
|
-
|
|
364
|
-
# ensure no zeros
|
|
365
|
-
eps = 1e-12
|
|
366
|
-
P = np.clip(P, eps, 1.0)
|
|
367
|
-
|
|
368
|
-
# training priors pL(+), pL(-)
|
|
369
|
-
# assume Ptr order matches columns of P; if Ptr sums to 1 but order unknown, user must match.
|
|
370
|
-
pL_pos = Ptr[1]
|
|
371
|
-
pL_neg = Ptr[0]
|
|
372
|
-
if pL_pos <= 0 or pL_neg <= 0:
|
|
373
|
-
# keep them positive to avoid divisions by zero
|
|
374
|
-
pL_pos = max(pL_pos, eps)
|
|
375
|
-
pL_neg = max(pL_neg, eps)
|
|
376
|
-
|
|
377
|
-
# initialize costs
|
|
378
|
-
cFN = 1.0
|
|
379
|
-
cFP = float(self.init_cfp)
|
|
380
|
-
|
|
381
|
-
prev_prev_pos = None
|
|
382
|
-
s = 0
|
|
383
|
-
|
|
384
|
-
# iterate: compute threshold from costs, classify, estimate prevalences via CC,
|
|
385
|
-
# update cFP via eq. (4.27), repeat
|
|
386
|
-
while s < self.max_iter:
|
|
387
|
-
# decision threshold tau for positive class:
|
|
388
|
-
# Derivation:
|
|
389
|
-
# predict positive if cost_FP * p(-|x) < cost_FN * p(+|x)
|
|
390
|
-
# => predict positive if p(+|x) / p(-|x) > cost_FP / cost_FN
|
|
391
|
-
# since p(+|x) / p(-|x) = p(+|x) / (1 - p(+|x)):
|
|
392
|
-
# p(+|x) > cost_FP / (cost_FP + cost_FN)
|
|
393
|
-
tau = cFP / (cFP + cFN)
|
|
394
|
-
|
|
395
|
-
# hard predictions for positive class using threshold on posterior for positive (col 1)
|
|
396
|
-
pos_probs = P[:, 1]
|
|
397
|
-
hard_pos = (pos_probs > tau).astype(float)
|
|
398
|
-
|
|
399
|
-
# classify-and-count prevalence estimate on U
|
|
400
|
-
prev_pos = hard_pos.mean()
|
|
401
|
-
prev_neg = 1.0 - prev_pos
|
|
402
|
-
|
|
403
|
-
# update cFP according to Eq. 4.27:
|
|
404
|
-
# cFP_new = (pL_pos / pL_neg) * (pU_hat(neg) / pU_hat(pos)) * cFN
|
|
405
|
-
# guard against zero prev_pos / prev_neg
|
|
406
|
-
prev_pos_safe = max(prev_pos, eps)
|
|
407
|
-
prev_neg_safe = max(prev_neg, eps)
|
|
408
|
-
|
|
409
|
-
cFP_new = (pL_pos / pL_neg) * (prev_neg_safe / prev_pos_safe) * cFN
|
|
410
|
-
|
|
411
|
-
# check convergence on prevalences (absolute change)
|
|
412
|
-
if prev_prev_pos is not None and abs(prev_pos - prev_prev_pos) < self.tol:
|
|
413
|
-
break
|
|
414
|
-
|
|
415
|
-
# prepare next iter
|
|
416
|
-
cFP = cFP_new
|
|
417
|
-
prev_prev_pos = prev_pos
|
|
418
|
-
s += 1
|
|
419
|
-
|
|
420
|
-
# if didn't converge within max_iter we keep last estimate (book warns about lack of fisher consistency)
|
|
421
|
-
if s >= self.max_iter:
|
|
422
|
-
# optional: warning
|
|
423
|
-
# print('[warning] CDE-Iterate reached max_iter without converging')
|
|
424
|
-
pass
|
|
425
|
-
|
|
426
|
-
prevalences = np.array([prev_neg, prev_pos], dtype=np.float64)
|
|
427
|
-
# ensure sums to 1 (numerical safety)
|
|
428
|
-
prevalences = prevalences / prevalences.sum()
|
|
429
|
-
|
|
430
|
-
return prevalences
|
|
276
|
+
return exp_scaled / np.sum(exp_scaled, axis=1, keepdims=True)
|
mlquantify/meta/_classes.py
CHANGED
|
@@ -518,15 +518,15 @@ class AggregativeBootstrap(MetaquantifierMixin, BaseQuantifier):
|
|
|
518
518
|
|
|
519
519
|
if val_split is None:
|
|
520
520
|
model.fit(X, y)
|
|
521
|
-
|
|
521
|
+
y_train = y
|
|
522
522
|
train_predictions = getattr(model, learner_function)(X)
|
|
523
523
|
else:
|
|
524
524
|
X_fit, y_fit, X_val, y_val = train_test_split(X, y, test_size=val_split, random_state=self.random_state)
|
|
525
525
|
model.fit(X_fit, y_fit)
|
|
526
|
-
|
|
526
|
+
y_train = y_val
|
|
527
527
|
train_predictions = getattr(model, learner_function)(X_val)
|
|
528
528
|
self.train_predictions = train_predictions
|
|
529
|
-
self.
|
|
529
|
+
self.y_train = y_train
|
|
530
530
|
|
|
531
531
|
return self
|
|
532
532
|
|
|
@@ -549,10 +549,10 @@ class AggregativeBootstrap(MetaquantifierMixin, BaseQuantifier):
|
|
|
549
549
|
|
|
550
550
|
predictions = getattr(model, learner_function)(X)
|
|
551
551
|
|
|
552
|
-
return self.aggregate(predictions, self.train_predictions, self.
|
|
552
|
+
return self.aggregate(predictions, self.train_predictions, self.y_train)
|
|
553
553
|
|
|
554
554
|
|
|
555
|
-
def aggregate(self, predictions, train_predictions,
|
|
555
|
+
def aggregate(self, predictions, train_predictions, y_train):
|
|
556
556
|
r""" Aggregates the predictions using bootstrap resampling.
|
|
557
557
|
|
|
558
558
|
Parameters
|
|
@@ -561,7 +561,7 @@ class AggregativeBootstrap(MetaquantifierMixin, BaseQuantifier):
|
|
|
561
561
|
The input data.
|
|
562
562
|
train_predictions : array-like of shape (n_samples, n_classes)
|
|
563
563
|
The training predictions.
|
|
564
|
-
|
|
564
|
+
y_train : array-like of shape (n_samples,)
|
|
565
565
|
The training target values.
|
|
566
566
|
|
|
567
567
|
Returns
|
|
@@ -571,7 +571,7 @@ class AggregativeBootstrap(MetaquantifierMixin, BaseQuantifier):
|
|
|
571
571
|
"""
|
|
572
572
|
prevalences = []
|
|
573
573
|
|
|
574
|
-
self.classes = np.unique(
|
|
574
|
+
self.classes = np.unique(y_train)
|
|
575
575
|
|
|
576
576
|
for train_idx in bootstrap_sample_indices(
|
|
577
577
|
n_samples=len(train_predictions),
|
|
@@ -580,7 +580,7 @@ class AggregativeBootstrap(MetaquantifierMixin, BaseQuantifier):
|
|
|
580
580
|
random_state=self.random_state
|
|
581
581
|
):
|
|
582
582
|
train_pred_boot = train_predictions[train_idx]
|
|
583
|
-
train_y_boot =
|
|
583
|
+
train_y_boot = y_train[train_idx]
|
|
584
584
|
|
|
585
585
|
for test_idx in bootstrap_sample_indices(
|
|
586
586
|
n_samples=len(predictions),
|
|
@@ -679,7 +679,7 @@ class QuaDapt(MetaquantifierMixin, BaseQuantifier):
|
|
|
679
679
|
raise ValueError(f"The quantifier {self.quantifier.__class__.__name__} does not use training probabilities, which are required for QuaDapt.")
|
|
680
680
|
|
|
681
681
|
self.quantifier.learner.fit(X, y)
|
|
682
|
-
self.
|
|
682
|
+
self.y_train = y
|
|
683
683
|
|
|
684
684
|
return self
|
|
685
685
|
|
|
@@ -691,64 +691,72 @@ class QuaDapt(MetaquantifierMixin, BaseQuantifier):
|
|
|
691
691
|
|
|
692
692
|
predictions = getattr(model, "predict_proba")(X)
|
|
693
693
|
|
|
694
|
-
return self.aggregate(predictions, self.
|
|
694
|
+
return self.aggregate(predictions, self.y_train)
|
|
695
695
|
|
|
696
696
|
|
|
697
|
-
def aggregate(self, predictions,
|
|
697
|
+
def aggregate(self, predictions, y_train):
|
|
698
698
|
|
|
699
|
-
|
|
700
|
-
|
|
699
|
+
prevalence, _, _ = self.best_mixture(predictions)
|
|
700
|
+
prevalences = np.asarray([1-prevalence, prevalence])
|
|
701
701
|
|
|
702
|
-
self.classes = self.classes if hasattr(self, 'classes') else np.unique(
|
|
703
|
-
|
|
704
|
-
moss_scores, moss_labels = self.MoSS(1000, 0.5, m)
|
|
705
|
-
|
|
706
|
-
prevalences = self.quantifier.aggregate(predictions,
|
|
707
|
-
moss_scores,
|
|
708
|
-
moss_labels)
|
|
702
|
+
self.classes = self.classes if hasattr(self, 'classes') else np.unique(y_train)
|
|
709
703
|
|
|
710
|
-
prevalences =
|
|
704
|
+
prevalences = validate_prevalences(self, prevalences, self.classes)
|
|
711
705
|
return prevalences
|
|
712
706
|
|
|
713
707
|
|
|
714
|
-
def
|
|
708
|
+
def best_mixture(self, predictions):
|
|
709
|
+
predictions = predictions[:, 1]
|
|
715
710
|
|
|
716
711
|
MF = np.atleast_1d(np.round(self.merging_factors, 2)).astype(float)
|
|
717
712
|
|
|
718
713
|
distances = []
|
|
714
|
+
alphas = []
|
|
719
715
|
|
|
720
716
|
for mf in MF:
|
|
721
|
-
scores, labels = self.MoSS(1000, 0.5, mf)
|
|
717
|
+
scores, labels = self.MoSS(n=1000, alpha=0.5, merging_factor=mf)
|
|
722
718
|
pos_scores = scores[labels == 1][:, 1]
|
|
723
719
|
neg_scores = scores[labels == 0][:, 1]
|
|
720
|
+
|
|
721
|
+
if self.measure in ["hellinger", "topsoe", "probsymm"]:
|
|
722
|
+
method = DyS(measure=self.measure)
|
|
723
|
+
elif self.measure == "sord":
|
|
724
|
+
method = SORD()
|
|
724
725
|
|
|
725
|
-
|
|
726
|
+
alpha, distance = method.best_mixture(predictions, pos_scores, neg_scores)
|
|
726
727
|
|
|
727
|
-
distances.append(
|
|
728
|
+
distances.append(distance)
|
|
729
|
+
alphas.append(alpha)
|
|
728
730
|
|
|
729
731
|
best_m = MF[np.argmin(distances)]
|
|
730
|
-
|
|
732
|
+
best_alpha = alphas[np.argmin(distances)]
|
|
733
|
+
best_distance = np.min(distances)
|
|
734
|
+
return best_alpha, best_distance, best_m
|
|
731
735
|
|
|
732
|
-
def
|
|
733
|
-
|
|
734
|
-
if self.measure in ["hellinger", "topsoe", "probsymm"]:
|
|
735
|
-
method = DyS(measure=self.measure)
|
|
736
|
-
elif self.measure == "sord":
|
|
737
|
-
method = SORD()
|
|
736
|
+
def get_best_distance(self, predictions):
|
|
738
737
|
|
|
739
|
-
|
|
740
|
-
|
|
738
|
+
_, distance, _= self.get_best_merging_factor(predictions)
|
|
739
|
+
|
|
740
|
+
return distance
|
|
741
741
|
|
|
742
742
|
|
|
743
743
|
@classmethod
|
|
744
|
-
def MoSS(cls, n, alpha,
|
|
744
|
+
def MoSS(cls, n, alpha, merging_factor):
|
|
745
745
|
r"""Model for Score Simulation
|
|
746
746
|
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
747
|
+
Parameters
|
|
748
|
+
----------
|
|
749
|
+
n : int
|
|
750
|
+
Number of observations.
|
|
751
|
+
alpha : float
|
|
752
|
+
Class proportion, which defines the prevalence of the positive class.
|
|
753
|
+
m : float
|
|
754
|
+
Merging factor, which controls the overlap between positive and negative score distributions.
|
|
755
|
+
|
|
756
|
+
Returns
|
|
757
|
+
-------
|
|
758
|
+
tuple
|
|
759
|
+
Tuple of score and label arrays.
|
|
752
760
|
|
|
753
761
|
.. math::
|
|
754
762
|
|
|
@@ -776,9 +784,9 @@ class QuaDapt(MetaquantifierMixin, BaseQuantifier):
|
|
|
776
784
|
n_neg = n - n_pos
|
|
777
785
|
|
|
778
786
|
# Scores positivos
|
|
779
|
-
p_score = np.random.uniform(size=n_pos) **
|
|
787
|
+
p_score = np.random.uniform(size=n_pos) ** merging_factor
|
|
780
788
|
# Scores negativos
|
|
781
|
-
n_score = 1 - (np.random.uniform(size=n_neg) **
|
|
789
|
+
n_score = 1 - (np.random.uniform(size=n_neg) ** merging_factor)
|
|
782
790
|
|
|
783
791
|
# Construção dos arrays de features (duas colunas iguais)
|
|
784
792
|
moss = np.column_stack(
|