mlquantify 0.0.11.8__tar.gz → 0.0.11.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/PKG-INFO +1 -1
- mlquantify-0.0.11.9/mlquantify/__init__.py +27 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/base.py +57 -2
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/evaluation/protocol.py +11 -5
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/methods/aggregative.py +47 -48
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/methods/mixture_models.py +23 -20
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/methods/threshold_optimization.py +31 -25
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/utils/general.py +1 -1
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify.egg-info/PKG-INFO +1 -1
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/setup.py +1 -1
- mlquantify-0.0.11.8/mlquantify/__init__.py +0 -9
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/README.md +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/classification/__init__.py +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/classification/methods.py +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/evaluation/__init__.py +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/evaluation/measures.py +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/methods/__init__.py +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/methods/meta.py +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/methods/non_aggregative.py +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/model_selection.py +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/plots.py +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/utils/__init__.py +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify/utils/method.py +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify.egg-info/SOURCES.txt +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify.egg-info/dependency_links.txt +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify.egg-info/requires.txt +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/mlquantify.egg-info/top_level.txt +0 -0
- {mlquantify-0.0.11.8 → mlquantify-0.0.11.9}/setup.cfg +0 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"mlquantify, a Python package for quantification"
|
|
2
|
+
|
|
3
|
+
from . import base
|
|
4
|
+
from . import model_selection
|
|
5
|
+
from . import plots
|
|
6
|
+
from . import classification
|
|
7
|
+
from . import evaluation
|
|
8
|
+
from . import methods
|
|
9
|
+
from . import utils
|
|
10
|
+
|
|
11
|
+
ARGUMENTS_SETTED = False
|
|
12
|
+
|
|
13
|
+
arguments = {
|
|
14
|
+
"y_pred": None,
|
|
15
|
+
"posteriors": None,
|
|
16
|
+
"y_labels": None,
|
|
17
|
+
"y_train_pred": None,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
def set_arguments(y_pred=None, posteriors=None, y_labels=None, y_train_pred=None):
|
|
21
|
+
global ARGUMENTS_SETTED
|
|
22
|
+
global arguments
|
|
23
|
+
arguments["y_pred"] = y_pred
|
|
24
|
+
arguments["posteriors"] = posteriors
|
|
25
|
+
arguments["y_labels"] = y_labels
|
|
26
|
+
arguments["y_train_pred"] = y_train_pred
|
|
27
|
+
ARGUMENTS_SETTED = True
|
|
@@ -4,7 +4,7 @@ from copy import deepcopy
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import joblib
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
import mlquantify as mq
|
|
8
8
|
from .utils.general import parallel, normalize_prevalence
|
|
9
9
|
|
|
10
10
|
class Quantifier(ABC, BaseEstimator):
|
|
@@ -271,6 +271,18 @@ class AggregativeQuantifier(Quantifier, ABC):
|
|
|
271
271
|
|
|
272
272
|
...
|
|
273
273
|
|
|
274
|
+
@property
|
|
275
|
+
def is_probabilistic(self) -> bool:
|
|
276
|
+
"""Check if the learner is probabilistic or not.
|
|
277
|
+
|
|
278
|
+
Returns
|
|
279
|
+
-------
|
|
280
|
+
bool
|
|
281
|
+
True if the learner is probabilistic, False otherwise.
|
|
282
|
+
"""
|
|
283
|
+
return False
|
|
284
|
+
|
|
285
|
+
|
|
274
286
|
@property
|
|
275
287
|
def learner(self):
|
|
276
288
|
"""Returns the learner_ object.
|
|
@@ -289,9 +301,52 @@ class AggregativeQuantifier(Quantifier, ABC):
|
|
|
289
301
|
value : any
|
|
290
302
|
The value to be assigned to the learner_ attribute.
|
|
291
303
|
"""
|
|
292
|
-
|
|
304
|
+
assert isinstance(value, BaseEstimator) or mq.ARGUMENTS_SETTED, "learner object is not an estimator, or you may change ARGUMENTS_SETTED to True"
|
|
293
305
|
self.learner_ = value
|
|
306
|
+
|
|
307
|
+
def fit_learner(self, X, y):
|
|
308
|
+
"""Fit the learner to the training data.
|
|
294
309
|
|
|
310
|
+
Parameters
|
|
311
|
+
----------
|
|
312
|
+
X : array-like
|
|
313
|
+
Training features.
|
|
314
|
+
y : array-like
|
|
315
|
+
Training labels.
|
|
316
|
+
"""
|
|
317
|
+
if mq.ARGUMENTS_SETTED:
|
|
318
|
+
if self.is_probabilistic and mq.arguments["posteriors"] is not None:
|
|
319
|
+
return
|
|
320
|
+
elif not self.is_probabilistic and mq.arguments["y_pred"] is not None:
|
|
321
|
+
return
|
|
322
|
+
else:
|
|
323
|
+
if not self.learner_fitted:
|
|
324
|
+
self.learner_.fit(X, y)
|
|
325
|
+
|
|
326
|
+
def predict_learner(self, X):
|
|
327
|
+
"""Predict the class labels or probabilities for the given data.
|
|
328
|
+
|
|
329
|
+
Parameters
|
|
330
|
+
----------
|
|
331
|
+
X : array-like
|
|
332
|
+
Test features.
|
|
333
|
+
|
|
334
|
+
Returns
|
|
335
|
+
-------
|
|
336
|
+
array-like
|
|
337
|
+
The predicted class labels or probabilities.
|
|
338
|
+
"""
|
|
339
|
+
if self.learner is not None:
|
|
340
|
+
if self.is_probabilistic:
|
|
341
|
+
return self.learner_.predict_proba(X)
|
|
342
|
+
return self.learner_.predict(X)
|
|
343
|
+
else:
|
|
344
|
+
if mq.ARGUMENTS_SETTED:
|
|
345
|
+
if self.is_probabilistic:
|
|
346
|
+
return mq.arguments["posteriors"]
|
|
347
|
+
return mq.arguments["y_pred"]
|
|
348
|
+
else:
|
|
349
|
+
raise ValueError("No learner object was set and no arguments were setted")
|
|
295
350
|
|
|
296
351
|
def set_params(self, **params):
|
|
297
352
|
"""
|
|
@@ -12,6 +12,8 @@ from ..utils.method import *
|
|
|
12
12
|
from . import MEASURES
|
|
13
13
|
from ..base import Quantifier
|
|
14
14
|
|
|
15
|
+
import mlquantify as mq
|
|
16
|
+
|
|
15
17
|
class Protocol(ABC):
|
|
16
18
|
"""Base class for evaluation protocols.
|
|
17
19
|
|
|
@@ -198,7 +200,6 @@ class Protocol(ABC):
|
|
|
198
200
|
if isinstance(models, list):
|
|
199
201
|
if all(isinstance(model, Quantifier) for model in models):
|
|
200
202
|
return models
|
|
201
|
-
assert learner is not None, "Learner is required for model methods."
|
|
202
203
|
return [get_method(model)(learner) for model in models]
|
|
203
204
|
|
|
204
205
|
if isinstance(models, Quantifier):
|
|
@@ -214,7 +215,6 @@ class Protocol(ABC):
|
|
|
214
215
|
|
|
215
216
|
if models in model_dict:
|
|
216
217
|
return [model(learner) if hasattr(model, "learner") else model() for model in model_dict[models]()]
|
|
217
|
-
|
|
218
218
|
return [get_method(models)(learner)]
|
|
219
219
|
|
|
220
220
|
def sout(self, msg):
|
|
@@ -240,12 +240,12 @@ class Protocol(ABC):
|
|
|
240
240
|
self.sout("Fitting models")
|
|
241
241
|
|
|
242
242
|
args = ((model, X_train, y_train) for model in self.models)
|
|
243
|
+
|
|
243
244
|
wrapper = tqdm if self.verbose else lambda x, **kwargs: x
|
|
244
245
|
|
|
245
|
-
self.models = Parallel(n_jobs=self.n_jobs)( # Parallel processing of models
|
|
246
|
+
self.models = Parallel(n_jobs=self.n_jobs, backend='threading')( # Parallel processing of models
|
|
246
247
|
delayed(self._delayed_fit)(*arg) for arg in wrapper(args, desc="Fitting models", total=len(self.models))
|
|
247
248
|
)
|
|
248
|
-
|
|
249
249
|
self.sout("Fit [Done]")
|
|
250
250
|
return self
|
|
251
251
|
|
|
@@ -336,10 +336,14 @@ class Protocol(ABC):
|
|
|
336
336
|
Quantifier
|
|
337
337
|
Fitted quantification model
|
|
338
338
|
"""
|
|
339
|
+
model_name = model.__class__.__name__
|
|
340
|
+
if model_name == "Ensemble" and isinstance(model.base_quantifier, Quantifier):
|
|
341
|
+
model_name = f"{model.__class__.__name__}_{model.base_quantifier.__class__.__name__}_{model.size}"
|
|
342
|
+
|
|
339
343
|
start = time()
|
|
340
344
|
model = model.fit(X=X_train, y=y_train)
|
|
341
345
|
duration = time() - start
|
|
342
|
-
print(f"\tFitted {
|
|
346
|
+
print(f"\tFitted {model_name} in {duration:.3f} seconds")
|
|
343
347
|
return model
|
|
344
348
|
|
|
345
349
|
|
|
@@ -520,6 +524,8 @@ class APP(Protocol):
|
|
|
520
524
|
Tuple containing the iteration, model name, prev, prev_pred, and batch size.
|
|
521
525
|
"""
|
|
522
526
|
model_name = model.__class__.__name__
|
|
527
|
+
if model_name == "Ensemble" and isinstance(model.base_quantifier, Quantifier):
|
|
528
|
+
model_name = f"{model.__class__.__name__}_{model.base_quantifier.__class__.__name__}_{model.size}"
|
|
523
529
|
|
|
524
530
|
if verbose:
|
|
525
531
|
print(f'\t {model_name} with {batch_size} instances and prev {prev}')
|
|
@@ -7,6 +7,7 @@ from ..utils.method import *
|
|
|
7
7
|
from sklearn.base import BaseEstimator
|
|
8
8
|
from sklearn.metrics import confusion_matrix
|
|
9
9
|
from sklearn.model_selection import train_test_split
|
|
10
|
+
import mlquantify as mq
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
|
|
@@ -75,8 +76,7 @@ class CC(AggregativeQuantifier):
|
|
|
75
76
|
{0: 0.4166666666666667, 1: 0.3194444444444444, 2: 0.2638888888888889}
|
|
76
77
|
"""
|
|
77
78
|
|
|
78
|
-
def __init__(self, learner: BaseEstimator):
|
|
79
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
79
|
+
def __init__(self, learner: BaseEstimator=None):
|
|
80
80
|
self.learner = learner
|
|
81
81
|
|
|
82
82
|
def _fit_method(self, X, y):
|
|
@@ -95,8 +95,7 @@ class CC(AggregativeQuantifier):
|
|
|
95
95
|
self : CC
|
|
96
96
|
The instance of the CC class.
|
|
97
97
|
"""
|
|
98
|
-
|
|
99
|
-
self.learner.fit(X, y)
|
|
98
|
+
self.fit_learner(X, y)
|
|
100
99
|
return self
|
|
101
100
|
|
|
102
101
|
def _predict_method(self, X) -> np.ndarray:
|
|
@@ -114,7 +113,7 @@ class CC(AggregativeQuantifier):
|
|
|
114
113
|
array-like
|
|
115
114
|
An array containing the prevalence of each class.
|
|
116
115
|
"""
|
|
117
|
-
predicted_labels = self.
|
|
116
|
+
predicted_labels = self.predict_learner(X)
|
|
118
117
|
|
|
119
118
|
# Count occurrences of each class in the predictions
|
|
120
119
|
class_counts = np.array([np.count_nonzero(predicted_labels == _class) for _class in self.classes])
|
|
@@ -147,13 +146,6 @@ class EMQ(AggregativeQuantifier):
|
|
|
147
146
|
priors : array-like
|
|
148
147
|
Prior probabilities of the classes, estimated from the training data.
|
|
149
148
|
|
|
150
|
-
Constants
|
|
151
|
-
---------
|
|
152
|
-
MAX_ITER : int
|
|
153
|
-
The maximum number of iterations allowed for the EM algorithm (default: 1000).
|
|
154
|
-
EPSILON : float
|
|
155
|
-
Convergence threshold for the EM algorithm (default: 1e-6).
|
|
156
|
-
|
|
157
149
|
References
|
|
158
150
|
----------
|
|
159
151
|
SAERENS, Marco; LATINNE, Patrice; DECAESTECKER, Christine. Adjusting the outputs of a classifier
|
|
@@ -184,8 +176,11 @@ class EMQ(AggregativeQuantifier):
|
|
|
184
176
|
MAX_ITER = 1000
|
|
185
177
|
EPSILON = 1e-6
|
|
186
178
|
|
|
187
|
-
|
|
188
|
-
|
|
179
|
+
@property
|
|
180
|
+
def is_probabilistic(self) -> bool:
|
|
181
|
+
return True
|
|
182
|
+
|
|
183
|
+
def __init__(self, learner: BaseEstimator=None):
|
|
189
184
|
self.learner = learner
|
|
190
185
|
self.priors = None
|
|
191
186
|
|
|
@@ -205,9 +200,8 @@ class EMQ(AggregativeQuantifier):
|
|
|
205
200
|
self : EMQ
|
|
206
201
|
The fitted instance of EMQ.
|
|
207
202
|
"""
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
203
|
+
self.fit_learner(X, y)
|
|
204
|
+
|
|
211
205
|
counts = np.array([np.count_nonzero(y == _class) for _class in self.classes])
|
|
212
206
|
self.priors = counts / len(y)
|
|
213
207
|
|
|
@@ -227,7 +221,7 @@ class EMQ(AggregativeQuantifier):
|
|
|
227
221
|
dict
|
|
228
222
|
A dictionary with class labels as keys and their prevalence as values.
|
|
229
223
|
"""
|
|
230
|
-
posteriors = self.
|
|
224
|
+
posteriors = self.predict_learner(X)
|
|
231
225
|
prevalences, _ = self.EM(self.priors, posteriors)
|
|
232
226
|
|
|
233
227
|
return prevalences
|
|
@@ -250,7 +244,7 @@ class EMQ(AggregativeQuantifier):
|
|
|
250
244
|
np.ndarray
|
|
251
245
|
Adjusted posterior probabilities.
|
|
252
246
|
"""
|
|
253
|
-
posteriors = self.
|
|
247
|
+
posteriors = self.predict_learner(X)
|
|
254
248
|
_, posteriors = self.EM(self.priors, posteriors, epsilon, max_iter)
|
|
255
249
|
return posteriors
|
|
256
250
|
|
|
@@ -360,8 +354,13 @@ class FM(AggregativeQuantifier):
|
|
|
360
354
|
>>> get_real_prev(y_test)
|
|
361
355
|
{0: 0.4166666666666667, 1: 0.3194444444444444, 2: 0.2638888888888889}
|
|
362
356
|
"""
|
|
363
|
-
|
|
364
|
-
|
|
357
|
+
|
|
358
|
+
@property
|
|
359
|
+
def is_probabilistic(self) -> bool:
|
|
360
|
+
return True
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def __init__(self, learner: BaseEstimator=None):
|
|
365
364
|
self.learner = learner
|
|
366
365
|
self.CM = None
|
|
367
366
|
|
|
@@ -386,11 +385,14 @@ class FM(AggregativeQuantifier):
|
|
|
386
385
|
The fitted instance of FM.
|
|
387
386
|
"""
|
|
388
387
|
# Get predicted labels and probabilities using cross-validation
|
|
389
|
-
y_labels
|
|
388
|
+
if mq.arguments["y_labels"] is not None and mq.arguments["posteriors"] is not None:
|
|
389
|
+
y_labels = mq.arguments["y_labels"]
|
|
390
|
+
probabilities = mq.arguments["posteriors"]
|
|
391
|
+
else:
|
|
392
|
+
y_labels, probabilities = get_scores(X, y, self.learner, self.cv_folds, self.learner_fitted)
|
|
390
393
|
|
|
391
394
|
# Fit the learner if it hasn't been fitted already
|
|
392
|
-
|
|
393
|
-
self.learner.fit(X, y)
|
|
395
|
+
self.fit_learner(X, y)
|
|
394
396
|
|
|
395
397
|
# Initialize the confusion matrix
|
|
396
398
|
CM = np.zeros((self.n_class, self.n_class))
|
|
@@ -426,7 +428,7 @@ class FM(AggregativeQuantifier):
|
|
|
426
428
|
dict
|
|
427
429
|
A dictionary with class labels as keys and their prevalence as values.
|
|
428
430
|
"""
|
|
429
|
-
posteriors = self.
|
|
431
|
+
posteriors = self.predict_learner(X)
|
|
430
432
|
|
|
431
433
|
# Calculate the estimated prevalences in the test set
|
|
432
434
|
prevs_estim = np.sum(posteriors > self.priors, axis=0) / posteriors.shape[0]
|
|
@@ -518,8 +520,7 @@ class GAC(AggregativeQuantifier):
|
|
|
518
520
|
"""
|
|
519
521
|
|
|
520
522
|
|
|
521
|
-
def __init__(self, learner: BaseEstimator, train_size:float=0.6, random_state:int=None):
|
|
522
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
523
|
+
def __init__(self, learner: BaseEstimator=None, train_size:float=0.6, random_state:int=None):
|
|
523
524
|
self.learner = learner
|
|
524
525
|
self.cond_prob_matrix = None
|
|
525
526
|
self.train_size = train_size
|
|
@@ -546,14 +547,14 @@ class GAC(AggregativeQuantifier):
|
|
|
546
547
|
if isinstance(y, np.ndarray):
|
|
547
548
|
y = pd.Series(y)
|
|
548
549
|
|
|
549
|
-
if self.learner_fitted:
|
|
550
|
-
y_pred = self.
|
|
550
|
+
if self.learner_fitted or self.learner is None:
|
|
551
|
+
y_pred = mq.arguments["y_train_pred"] if mq.arguments["y_train_pred"] is not None else self.predict_learner(X)
|
|
551
552
|
y_label = y
|
|
552
553
|
else:
|
|
553
554
|
X_train, X_val, y_train, y_val = train_test_split(
|
|
554
555
|
X, y, train_size=self.train_size, stratify=y, random_state=self.random_state
|
|
555
556
|
)
|
|
556
|
-
self.
|
|
557
|
+
self.fit_learner(X_train, y_train)
|
|
557
558
|
y_label = y_val
|
|
558
559
|
y_pred = self.learner.predict(X_val)
|
|
559
560
|
|
|
@@ -574,7 +575,7 @@ class GAC(AggregativeQuantifier):
|
|
|
574
575
|
dict
|
|
575
576
|
Adjusted class prevalences.
|
|
576
577
|
"""
|
|
577
|
-
y_pred = self.
|
|
578
|
+
y_pred = self.predict_learner(X)
|
|
578
579
|
_, counts = np.unique(y_pred, return_counts=True)
|
|
579
580
|
predicted_prevalences = counts / counts.sum()
|
|
580
581
|
adjusted_prevalences = self.solve_adjustment(self.cond_prob_matrix, predicted_prevalences)
|
|
@@ -702,8 +703,7 @@ class GPAC(AggregativeQuantifier):
|
|
|
702
703
|
{0: 0.4166666666666667, 1: 0.3194444444444444, 2: 0.2638888888888889}
|
|
703
704
|
"""
|
|
704
705
|
|
|
705
|
-
def __init__(self, learner: BaseEstimator, train_size: float = 0.6, random_state: int = None):
|
|
706
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
706
|
+
def __init__(self, learner: BaseEstimator=None, train_size: float = 0.6, random_state: int = None):
|
|
707
707
|
self.learner = learner
|
|
708
708
|
self.cond_prob_matrix = None
|
|
709
709
|
self.train_size = train_size
|
|
@@ -730,16 +730,16 @@ class GPAC(AggregativeQuantifier):
|
|
|
730
730
|
if isinstance(y, np.ndarray):
|
|
731
731
|
y = pd.Series(y)
|
|
732
732
|
|
|
733
|
-
if self.learner_fitted:
|
|
734
|
-
y_pred = self.
|
|
733
|
+
if self.learner_fitted or self.learner is None:
|
|
734
|
+
y_pred = mq.arguments["y_train_pred"] if mq.arguments["y_train_pred"] is not None else self.predict_learner(X)
|
|
735
735
|
y_labels = y
|
|
736
736
|
else:
|
|
737
737
|
X_train, X_val, y_train, y_val = train_test_split(
|
|
738
738
|
X, y, train_size=self.train_size, stratify=y, random_state=self.random_state
|
|
739
739
|
)
|
|
740
|
-
self.
|
|
740
|
+
self.fit_learner(X_train, y_train)
|
|
741
741
|
y_labels = y_val
|
|
742
|
-
y_pred = self.
|
|
742
|
+
y_pred = self.predict_learner(X_val)
|
|
743
743
|
|
|
744
744
|
# Compute the conditional probability matrix
|
|
745
745
|
self.cond_prob_matrix = GAC.get_cond_prob_matrix(self.classes, y_labels, y_pred)
|
|
@@ -759,7 +759,7 @@ class GPAC(AggregativeQuantifier):
|
|
|
759
759
|
dict
|
|
760
760
|
Adjusted class prevalences.
|
|
761
761
|
"""
|
|
762
|
-
predictions = self.
|
|
762
|
+
predictions = self.predict_learner(X)
|
|
763
763
|
|
|
764
764
|
# Compute the distribution of predictions
|
|
765
765
|
predicted_prevalences = np.zeros(self.n_class)
|
|
@@ -851,9 +851,11 @@ class PCC(AggregativeQuantifier):
|
|
|
851
851
|
>>> get_real_prev(y_test)
|
|
852
852
|
{0: 0.4166666666666667, 1: 0.3194444444444444, 2: 0.2638888888888889}
|
|
853
853
|
"""
|
|
854
|
+
@property
|
|
855
|
+
def is_probabilistic(self) -> bool:
|
|
856
|
+
return True
|
|
854
857
|
|
|
855
|
-
def __init__(self, learner: BaseEstimator):
|
|
856
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
858
|
+
def __init__(self, learner: BaseEstimator=None):
|
|
857
859
|
self.learner = learner
|
|
858
860
|
|
|
859
861
|
def _fit_method(self, X, y):
|
|
@@ -872,8 +874,7 @@ class PCC(AggregativeQuantifier):
|
|
|
872
874
|
self : PCC
|
|
873
875
|
Fitted quantifier object.
|
|
874
876
|
"""
|
|
875
|
-
|
|
876
|
-
self.learner.fit(X, y)
|
|
877
|
+
self.fit_learner(X, y)
|
|
877
878
|
return self
|
|
878
879
|
|
|
879
880
|
def _predict_method(self, X) -> np.ndarray:
|
|
@@ -896,7 +897,7 @@ class PCC(AggregativeQuantifier):
|
|
|
896
897
|
# Calculate the prevalence for each class
|
|
897
898
|
for class_index in range(self.n_class):
|
|
898
899
|
# Get the predicted probabilities for the current class
|
|
899
|
-
class_probabilities = self.
|
|
900
|
+
class_probabilities = self.predict_learner(X)[:, class_index]
|
|
900
901
|
|
|
901
902
|
# Compute the average probability (prevalence) for the current class
|
|
902
903
|
mean_prev = np.mean(class_probabilities)
|
|
@@ -954,8 +955,7 @@ class PWK(AggregativeQuantifier):
|
|
|
954
955
|
{0: 0.4166666666666667, 1: 0.3194444444444444, 2: 0.2638888888888889}
|
|
955
956
|
"""
|
|
956
957
|
|
|
957
|
-
def __init__(self, learner: BaseEstimator):
|
|
958
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
958
|
+
def __init__(self, learner: BaseEstimator=None):
|
|
959
959
|
self.learner = learner
|
|
960
960
|
|
|
961
961
|
def _fit_method(self, X, y):
|
|
@@ -974,8 +974,7 @@ class PWK(AggregativeQuantifier):
|
|
|
974
974
|
self : PWK
|
|
975
975
|
Fitted quantifier object.
|
|
976
976
|
"""
|
|
977
|
-
|
|
978
|
-
self.learner.fit(X, y)
|
|
977
|
+
self.fit_learner(X, y)
|
|
979
978
|
return self
|
|
980
979
|
|
|
981
980
|
def _predict_method(self, X) -> dict:
|
|
@@ -993,7 +992,7 @@ class PWK(AggregativeQuantifier):
|
|
|
993
992
|
A dictionary mapping each class label to its estimated prevalence.
|
|
994
993
|
"""
|
|
995
994
|
# Predict class labels for the given data
|
|
996
|
-
predicted_labels = self.
|
|
995
|
+
predicted_labels = self.predict_learner(X)
|
|
997
996
|
|
|
998
997
|
# Compute the distribution of predicted labels
|
|
999
998
|
unique_labels, label_counts = np.unique(predicted_labels, return_counts=True)
|
|
@@ -6,6 +6,7 @@ from ..base import AggregativeQuantifier
|
|
|
6
6
|
|
|
7
7
|
from ..utils.general import get_real_prev
|
|
8
8
|
from ..utils.method import *
|
|
9
|
+
import mlquantify as mq
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
|
|
@@ -69,7 +70,7 @@ class MixtureModel(AggregativeQuantifier):
|
|
|
69
70
|
{0: 0.37719298245614036, 1: 0.6228070175438597}
|
|
70
71
|
"""
|
|
71
72
|
|
|
72
|
-
def __init__(self, learner: BaseEstimator):
|
|
73
|
+
def __init__(self, learner: BaseEstimator=None):
|
|
73
74
|
self.learner = learner
|
|
74
75
|
self.pos_scores = None
|
|
75
76
|
self.neg_scores = None
|
|
@@ -85,6 +86,10 @@ class MixtureModel(AggregativeQuantifier):
|
|
|
85
86
|
Always returns False, as MixtureModel supports only binary classification.
|
|
86
87
|
"""
|
|
87
88
|
return False
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def is_probabilistic(self) -> bool:
|
|
92
|
+
return True
|
|
88
93
|
|
|
89
94
|
def _fit_method(self, X, y):
|
|
90
95
|
"""
|
|
@@ -102,11 +107,15 @@ class MixtureModel(AggregativeQuantifier):
|
|
|
102
107
|
self : MixtureModel
|
|
103
108
|
The fitted MixtureModel instance.
|
|
104
109
|
"""
|
|
105
|
-
|
|
110
|
+
if mq.arguments["y_labels"] is not None and mq.arguments["posteriors"] is not None:
|
|
111
|
+
y_labels = mq.arguments["y_labels"]
|
|
112
|
+
probabilities = mq.arguments["posteriors"]
|
|
113
|
+
else:
|
|
114
|
+
y_labels, probabilities = get_scores(X, y, self.learner, self.cv_folds, self.learner_fitted)
|
|
106
115
|
|
|
107
116
|
# Separate positive and negative scores based on labels
|
|
108
|
-
self.pos_scores = probabilities[
|
|
109
|
-
self.neg_scores = probabilities[
|
|
117
|
+
self.pos_scores = probabilities[y_labels == self.classes[1]][:, 1]
|
|
118
|
+
self.neg_scores = probabilities[y_labels == self.classes[0]][:, 1]
|
|
110
119
|
|
|
111
120
|
return self
|
|
112
121
|
|
|
@@ -125,7 +134,7 @@ class MixtureModel(AggregativeQuantifier):
|
|
|
125
134
|
An array containing the prevalence for each class.
|
|
126
135
|
"""
|
|
127
136
|
# Get the predicted probabilities for the positive class
|
|
128
|
-
test_scores = self.
|
|
137
|
+
test_scores = self.predict_learner(X)[:, 1]
|
|
129
138
|
|
|
130
139
|
# Compute the prevalence using the mixture model
|
|
131
140
|
prevalence = np.clip(self._compute_prevalence(test_scores), 0, 1)
|
|
@@ -256,9 +265,8 @@ class DyS(MixtureModel):
|
|
|
256
265
|
{0: 0.37719298245614036, 1: 0.6228070175438597}
|
|
257
266
|
"""
|
|
258
267
|
|
|
259
|
-
def __init__(self, learner: BaseEstimator, measure: str = "topsoe", bins_size: np.ndarray = None):
|
|
268
|
+
def __init__(self, learner: BaseEstimator=None, measure: str = "topsoe", bins_size: np.ndarray = None):
|
|
260
269
|
assert measure in ["hellinger", "topsoe", "probsymm"], "Invalid measure."
|
|
261
|
-
assert isinstance(learner, BaseEstimator), "Learner must be a valid estimator."
|
|
262
270
|
super().__init__(learner)
|
|
263
271
|
|
|
264
272
|
# Set up bins_size
|
|
@@ -305,7 +313,7 @@ class DyS(MixtureModel):
|
|
|
305
313
|
distance : float
|
|
306
314
|
The minimum distance value.
|
|
307
315
|
"""
|
|
308
|
-
test_scores = self.
|
|
316
|
+
test_scores = self.predict_learner(X_test)
|
|
309
317
|
prevs = self.GetMinDistancesDyS(test_scores)
|
|
310
318
|
|
|
311
319
|
size = len(prevs)
|
|
@@ -455,9 +463,8 @@ class DySsyn(MixtureModel):
|
|
|
455
463
|
"""
|
|
456
464
|
|
|
457
465
|
|
|
458
|
-
def __init__(self, learner:BaseEstimator, measure:str="topsoe", merge_factor:np.ndarray=None, bins_size:np.ndarray=None, alpha_train:float=0.5, n:int=None):
|
|
466
|
+
def __init__(self, learner:BaseEstimator=None, measure:str="topsoe", merge_factor:np.ndarray=None, bins_size:np.ndarray=None, alpha_train:float=0.5, n:int=None):
|
|
459
467
|
assert measure in ["hellinger", "topsoe", "probsymm"], "measure not valid"
|
|
460
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
461
468
|
super().__init__(learner)
|
|
462
469
|
|
|
463
470
|
# Set up bins_size
|
|
@@ -494,8 +501,7 @@ class DySsyn(MixtureModel):
|
|
|
494
501
|
self : DySsyn
|
|
495
502
|
The fitted DySsyn instance.
|
|
496
503
|
"""
|
|
497
|
-
|
|
498
|
-
self.learner.fit(X, y)
|
|
504
|
+
self.fit_learner(X, y)
|
|
499
505
|
|
|
500
506
|
self.alpha_train = list(get_real_prev(y).values())[1]
|
|
501
507
|
|
|
@@ -538,7 +544,7 @@ class DySsyn(MixtureModel):
|
|
|
538
544
|
distance : float
|
|
539
545
|
Minimum distance value for the test data.
|
|
540
546
|
"""
|
|
541
|
-
test_scores = self.
|
|
547
|
+
test_scores = self.predict_learner(X_test)
|
|
542
548
|
|
|
543
549
|
distances = self.GetMinDistancesDySsyn(test_scores)
|
|
544
550
|
|
|
@@ -679,8 +685,7 @@ class HDy(MixtureModel):
|
|
|
679
685
|
{0: 0.37719298245614036, 1: 0.6228070175438597}
|
|
680
686
|
"""
|
|
681
687
|
|
|
682
|
-
def __init__(self, learner: BaseEstimator):
|
|
683
|
-
assert isinstance(learner, BaseEstimator), "Learner must be a valid estimator."
|
|
688
|
+
def __init__(self, learner: BaseEstimator=None):
|
|
684
689
|
super().__init__(learner)
|
|
685
690
|
|
|
686
691
|
def _compute_prevalence(self, test_scores: np.ndarray) -> float:
|
|
@@ -717,7 +722,7 @@ class HDy(MixtureModel):
|
|
|
717
722
|
distance : float
|
|
718
723
|
The minimum distance value.
|
|
719
724
|
"""
|
|
720
|
-
test_scores = self.
|
|
725
|
+
test_scores = self.predict_learner(X_test)
|
|
721
726
|
_, distances = self.GetMinDistancesHDy(test_scores)
|
|
722
727
|
|
|
723
728
|
size = len(distances)
|
|
@@ -833,8 +838,7 @@ class SMM(MixtureModel):
|
|
|
833
838
|
{0: 0.37719298245614036, 1: 0.6228070175438597}
|
|
834
839
|
"""
|
|
835
840
|
|
|
836
|
-
def __init__(self, learner: BaseEstimator):
|
|
837
|
-
assert isinstance(learner, BaseEstimator), "Learner must be a valid estimator."
|
|
841
|
+
def __init__(self, learner: BaseEstimator=None):
|
|
838
842
|
super().__init__(learner)
|
|
839
843
|
|
|
840
844
|
def _compute_prevalence(self, test_scores: np.ndarray) -> float:
|
|
@@ -909,8 +913,7 @@ class SORD(MixtureModel):
|
|
|
909
913
|
{0: 0.37719298245614036, 1: 0.6228070175438597}
|
|
910
914
|
"""
|
|
911
915
|
|
|
912
|
-
def __init__(self, learner: BaseEstimator):
|
|
913
|
-
assert isinstance(learner, BaseEstimator), "Learner must be a valid estimator."
|
|
916
|
+
def __init__(self, learner: BaseEstimator=None):
|
|
914
917
|
super().__init__(learner)
|
|
915
918
|
|
|
916
919
|
self.best_distance_index = None # Stores the index of the best alpha value
|
|
@@ -4,6 +4,7 @@ from sklearn.base import BaseEstimator
|
|
|
4
4
|
|
|
5
5
|
from ..base import AggregativeQuantifier
|
|
6
6
|
from ..utils.method import adjust_threshold, get_scores
|
|
7
|
+
import mlquantify as mq
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
|
|
@@ -67,13 +68,28 @@ class ThresholdOptimization(AggregativeQuantifier):
|
|
|
67
68
|
>>> y_pred = mtm.predict(X_test)
|
|
68
69
|
"""
|
|
69
70
|
|
|
70
|
-
def __init__(self, learner: BaseEstimator):
|
|
71
|
+
def __init__(self, learner: BaseEstimator=None):
|
|
71
72
|
self.learner = learner
|
|
72
73
|
self.threshold = None
|
|
73
74
|
self.cc_output = None
|
|
74
75
|
self.tpr = None
|
|
75
76
|
self.fpr = None
|
|
76
77
|
|
|
78
|
+
@property
|
|
79
|
+
def is_probabilistic(self) -> bool:
|
|
80
|
+
"""
|
|
81
|
+
Returns whether the method is probabilistic.
|
|
82
|
+
|
|
83
|
+
This method is used to determine whether the quantification method is probabilistic,
|
|
84
|
+
meaning it uses class-conditional probabilities to estimate class prevalences.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
bool
|
|
89
|
+
True, indicating that this method is probabilistic.
|
|
90
|
+
"""
|
|
91
|
+
return True
|
|
92
|
+
|
|
77
93
|
@property
|
|
78
94
|
def is_multiclass(self) -> bool:
|
|
79
95
|
"""
|
|
@@ -106,7 +122,11 @@ class ThresholdOptimization(AggregativeQuantifier):
|
|
|
106
122
|
The fitted quantifier object with the best threshold, TPR, and FPR.
|
|
107
123
|
"""
|
|
108
124
|
# Get predicted labels and probabilities
|
|
109
|
-
y_labels
|
|
125
|
+
if mq.arguments["y_labels"] is not None and mq.arguments["posteriors"] is not None:
|
|
126
|
+
y_labels = mq.arguments["y_labels"]
|
|
127
|
+
probabilities = mq.arguments["posteriors"]
|
|
128
|
+
else:
|
|
129
|
+
y_labels, probabilities = get_scores(X, y, self.learner, self.cv_folds, self.learner_fitted)
|
|
110
130
|
|
|
111
131
|
# Adjust thresholds and compute true and false positive rates
|
|
112
132
|
thresholds, tprs, fprs = adjust_threshold(y_labels, probabilities[:, 1], self.classes)
|
|
@@ -131,7 +151,7 @@ class ThresholdOptimization(AggregativeQuantifier):
|
|
|
131
151
|
An array of predicted prevalences for the classes.
|
|
132
152
|
"""
|
|
133
153
|
# Get predicted probabilities for the positive class
|
|
134
|
-
probabilities = self.
|
|
154
|
+
probabilities = self.predict_learner(X)[:, 1]
|
|
135
155
|
|
|
136
156
|
# Compute the classification count output based on the threshold
|
|
137
157
|
self.cc_output = len(probabilities[probabilities >= self.threshold]) / len(probabilities)
|
|
@@ -231,8 +251,7 @@ class ACC(ThresholdOptimization):
|
|
|
231
251
|
{0: 0.3991228070175439, 1: 0.6008771929824561}
|
|
232
252
|
"""
|
|
233
253
|
|
|
234
|
-
def __init__(self, learner: BaseEstimator, threshold: float = 0.5):
|
|
235
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
254
|
+
def __init__(self, learner: BaseEstimator=None, threshold: float = 0.5):
|
|
236
255
|
super().__init__(learner)
|
|
237
256
|
self.threshold = threshold
|
|
238
257
|
|
|
@@ -325,8 +344,7 @@ class MAX(ThresholdOptimization):
|
|
|
325
344
|
{0: 0.3991228070175439, 1: 0.6008771929824561}
|
|
326
345
|
"""
|
|
327
346
|
|
|
328
|
-
def __init__(self, learner: BaseEstimator):
|
|
329
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
347
|
+
def __init__(self, learner: BaseEstimator=None):
|
|
330
348
|
super().__init__(learner)
|
|
331
349
|
|
|
332
350
|
def best_tprfpr(self, thresholds: np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
|
|
@@ -428,8 +446,7 @@ class MS(ThresholdOptimization):
|
|
|
428
446
|
{0: 0.3991228070175439, 1: 0.6008771929824561}
|
|
429
447
|
"""
|
|
430
448
|
|
|
431
|
-
def __init__(self, learner: BaseEstimator, threshold: float = 0.5):
|
|
432
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
449
|
+
def __init__(self, learner: BaseEstimator=None, threshold: float = 0.5):
|
|
433
450
|
super().__init__(learner)
|
|
434
451
|
self.threshold = threshold
|
|
435
452
|
|
|
@@ -528,8 +545,7 @@ class MS2(ThresholdOptimization):
|
|
|
528
545
|
{0: 0.3991228070175439, 1: 0.6008771929824561}
|
|
529
546
|
"""
|
|
530
547
|
|
|
531
|
-
def __init__(self, learner: BaseEstimator):
|
|
532
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
548
|
+
def __init__(self, learner: BaseEstimator=None):
|
|
533
549
|
super().__init__(learner)
|
|
534
550
|
|
|
535
551
|
def best_tprfpr(self, thresholds: np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
|
|
@@ -639,8 +655,7 @@ class PACC(ThresholdOptimization):
|
|
|
639
655
|
{0: 0.3991228070175439, 1: 0.6008771929824561}
|
|
640
656
|
"""
|
|
641
657
|
|
|
642
|
-
def __init__(self, learner: BaseEstimator, threshold: float = 0.5):
|
|
643
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
658
|
+
def __init__(self, learner: BaseEstimator=None, threshold: float = 0.5):
|
|
644
659
|
super().__init__(learner)
|
|
645
660
|
self.threshold = threshold
|
|
646
661
|
|
|
@@ -675,7 +690,7 @@ class PACC(ThresholdOptimization):
|
|
|
675
690
|
prevalences = {}
|
|
676
691
|
|
|
677
692
|
# Calculate probabilities for the positive class
|
|
678
|
-
probabilities = self.
|
|
693
|
+
probabilities = self.predict_learner(X)[:, 1]
|
|
679
694
|
|
|
680
695
|
# Compute the mean score for the positive class
|
|
681
696
|
mean_scores = np.mean(probabilities)
|
|
@@ -731,13 +746,6 @@ class PACC(ThresholdOptimization):
|
|
|
731
746
|
|
|
732
747
|
|
|
733
748
|
|
|
734
|
-
def best_tprfpr(self, thresholds:np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
|
|
735
|
-
tpr = tprs[thresholds == self.threshold][0]
|
|
736
|
-
fpr = fprs[thresholds == self.threshold][0]
|
|
737
|
-
return (self.threshold, tpr, fpr)
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
749
|
|
|
742
750
|
|
|
743
751
|
|
|
@@ -797,8 +805,7 @@ class T50(ThresholdOptimization):
|
|
|
797
805
|
{0: 0.3991228070175439, 1: 0.6008771929824561}
|
|
798
806
|
"""
|
|
799
807
|
|
|
800
|
-
def __init__(self, learner: BaseEstimator):
|
|
801
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
808
|
+
def __init__(self, learner: BaseEstimator=None):
|
|
802
809
|
super().__init__(learner)
|
|
803
810
|
|
|
804
811
|
def best_tprfpr(self, thresholds: np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
|
|
@@ -906,8 +913,7 @@ class X_method(ThresholdOptimization):
|
|
|
906
913
|
{0: 0.3991228070175439, 1: 0.6008771929824561}
|
|
907
914
|
"""
|
|
908
915
|
|
|
909
|
-
def __init__(self, learner: BaseEstimator):
|
|
910
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
916
|
+
def __init__(self, learner: BaseEstimator=None):
|
|
911
917
|
super().__init__(learner)
|
|
912
918
|
|
|
913
919
|
def best_tprfpr(self, thresholds: np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
|
|
@@ -256,7 +256,7 @@ def parallel(func, elements, n_jobs: int = 1, *args):
|
|
|
256
256
|
list
|
|
257
257
|
List of results from running the function on each element.
|
|
258
258
|
"""
|
|
259
|
-
return Parallel(n_jobs=n_jobs)(
|
|
259
|
+
return Parallel(n_jobs=n_jobs, backend="threading")(
|
|
260
260
|
delayed(func)(e, *args) for e in elements
|
|
261
261
|
)
|
|
262
262
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|