mlquantify 0.0.11.8__tar.gz → 0.0.11.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/PKG-INFO +1 -1
  2. mlquantify-0.0.11.10/mlquantify/__init__.py +30 -0
  3. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/base.py +57 -2
  4. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/evaluation/protocol.py +11 -5
  5. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/methods/aggregative.py +47 -48
  6. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/methods/mixture_models.py +23 -20
  7. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/methods/threshold_optimization.py +31 -25
  8. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/utils/general.py +1 -1
  9. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify.egg-info/PKG-INFO +1 -1
  10. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/setup.py +1 -1
  11. mlquantify-0.0.11.8/mlquantify/__init__.py +0 -9
  12. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/README.md +0 -0
  13. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/classification/__init__.py +0 -0
  14. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/classification/methods.py +0 -0
  15. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/evaluation/__init__.py +0 -0
  16. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/evaluation/measures.py +0 -0
  17. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/methods/__init__.py +0 -0
  18. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/methods/meta.py +0 -0
  19. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/methods/non_aggregative.py +0 -0
  20. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/model_selection.py +0 -0
  21. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/plots.py +0 -0
  22. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/utils/__init__.py +0 -0
  23. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify/utils/method.py +0 -0
  24. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify.egg-info/SOURCES.txt +0 -0
  25. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify.egg-info/dependency_links.txt +0 -0
  26. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify.egg-info/requires.txt +0 -0
  27. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/mlquantify.egg-info/top_level.txt +0 -0
  28. {mlquantify-0.0.11.8 → mlquantify-0.0.11.10}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mlquantify
3
- Version: 0.0.11.8
3
+ Version: 0.0.11.10
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -0,0 +1,30 @@
1
+ "mlquantify, a Python package for quantification"
2
+
3
+ from . import base
4
+ from . import model_selection
5
+ from . import plots
6
+ from . import classification
7
+ from . import evaluation
8
+ from . import methods
9
+ from . import utils
10
+
11
+ ARGUMENTS_SETTED = False
12
+
13
+ arguments = {
14
+ "y_pred": None,
15
+ "posteriors_train": None,
16
+ "posteriors_test": None,
17
+ "y_labels": None,
18
+ "y_pred_train": None,
19
+ }
20
+
21
+ def set_arguments(y_pred=None, posteriors_train=None, posteriors_test=None, y_labels=None, y_pred_train=None):
22
+ global ARGUMENTS_SETTED
23
+ global arguments
24
+ arguments["y_pred"] = y_pred
25
+ arguments["posteriors_train"] = posteriors_train
26
+ arguments["posteriors_test"] = posteriors_test
27
+ arguments["y_labels"] = y_labels
28
+ arguments["y_pred_train"] = y_pred_train
29
+
30
+ ARGUMENTS_SETTED = True
@@ -4,7 +4,7 @@ from copy import deepcopy
4
4
  import numpy as np
5
5
  import joblib
6
6
 
7
-
7
+ import mlquantify as mq
8
8
  from .utils.general import parallel, normalize_prevalence
9
9
 
10
10
  class Quantifier(ABC, BaseEstimator):
@@ -271,6 +271,18 @@ class AggregativeQuantifier(Quantifier, ABC):
271
271
 
272
272
  ...
273
273
 
274
+ @property
275
+ def is_probabilistic(self) -> bool:
276
+ """Check if the learner is probabilistic or not.
277
+
278
+ Returns
279
+ -------
280
+ bool
281
+ True if the learner is probabilistic, False otherwise.
282
+ """
283
+ return False
284
+
285
+
274
286
  @property
275
287
  def learner(self):
276
288
  """Returns the learner_ object.
@@ -289,9 +301,52 @@ class AggregativeQuantifier(Quantifier, ABC):
289
301
  value : any
290
302
  The value to be assigned to the learner_ attribute.
291
303
  """
292
-
304
+ assert isinstance(value, BaseEstimator) or mq.ARGUMENTS_SETTED, "learner object is not an estimator, or you may change ARGUMENTS_SETTED to True"
293
305
  self.learner_ = value
306
+
307
+ def fit_learner(self, X, y):
308
+ """Fit the learner to the training data.
294
309
 
310
+ Parameters
311
+ ----------
312
+ X : array-like
313
+ Training features.
314
+ y : array-like
315
+ Training labels.
316
+ """
317
+ if mq.ARGUMENTS_SETTED:
318
+ if self.is_probabilistic and mq.arguments["posteriors_test"] is not None:
319
+ return
320
+ elif not self.is_probabilistic and mq.arguments["y_pred"] is not None:
321
+ return
322
+ else:
323
+ if not self.learner_fitted:
324
+ self.learner_.fit(X, y)
325
+
326
+ def predict_learner(self, X):
327
+ """Predict the class labels or probabilities for the given data.
328
+
329
+ Parameters
330
+ ----------
331
+ X : array-like
332
+ Test features.
333
+
334
+ Returns
335
+ -------
336
+ array-like
337
+ The predicted class labels or probabilities.
338
+ """
339
+ if self.learner is not None:
340
+ if self.is_probabilistic:
341
+ return self.learner_.predict_proba(X)
342
+ return self.learner_.predict(X)
343
+ else:
344
+ if mq.ARGUMENTS_SETTED:
345
+ if self.is_probabilistic:
346
+ return mq.arguments["posteriors_test"]
347
+ return mq.arguments["y_pred"]
348
+ else:
349
+ raise ValueError("No learner object was set and no arguments were setted")
295
350
 
296
351
  def set_params(self, **params):
297
352
  """
@@ -12,6 +12,8 @@ from ..utils.method import *
12
12
  from . import MEASURES
13
13
  from ..base import Quantifier
14
14
 
15
+ import mlquantify as mq
16
+
15
17
  class Protocol(ABC):
16
18
  """Base class for evaluation protocols.
17
19
 
@@ -198,7 +200,6 @@ class Protocol(ABC):
198
200
  if isinstance(models, list):
199
201
  if all(isinstance(model, Quantifier) for model in models):
200
202
  return models
201
- assert learner is not None, "Learner is required for model methods."
202
203
  return [get_method(model)(learner) for model in models]
203
204
 
204
205
  if isinstance(models, Quantifier):
@@ -214,7 +215,6 @@ class Protocol(ABC):
214
215
 
215
216
  if models in model_dict:
216
217
  return [model(learner) if hasattr(model, "learner") else model() for model in model_dict[models]()]
217
-
218
218
  return [get_method(models)(learner)]
219
219
 
220
220
  def sout(self, msg):
@@ -240,12 +240,12 @@ class Protocol(ABC):
240
240
  self.sout("Fitting models")
241
241
 
242
242
  args = ((model, X_train, y_train) for model in self.models)
243
+
243
244
  wrapper = tqdm if self.verbose else lambda x, **kwargs: x
244
245
 
245
- self.models = Parallel(n_jobs=self.n_jobs)( # Parallel processing of models
246
+ self.models = Parallel(n_jobs=self.n_jobs, backend='threading')( # Parallel processing of models
246
247
  delayed(self._delayed_fit)(*arg) for arg in wrapper(args, desc="Fitting models", total=len(self.models))
247
248
  )
248
-
249
249
  self.sout("Fit [Done]")
250
250
  return self
251
251
 
@@ -336,10 +336,14 @@ class Protocol(ABC):
336
336
  Quantifier
337
337
  Fitted quantification model
338
338
  """
339
+ model_name = model.__class__.__name__
340
+ if model_name == "Ensemble" and isinstance(model.base_quantifier, Quantifier):
341
+ model_name = f"{model.__class__.__name__}_{model.base_quantifier.__class__.__name__}_{model.size}"
342
+
339
343
  start = time()
340
344
  model = model.fit(X=X_train, y=y_train)
341
345
  duration = time() - start
342
- print(f"\tFitted {model.__class__.__name__} in {duration:.3f} seconds")
346
+ print(f"\tFitted {model_name} in {duration:.3f} seconds")
343
347
  return model
344
348
 
345
349
 
@@ -520,6 +524,8 @@ class APP(Protocol):
520
524
  Tuple containing the iteration, model name, prev, prev_pred, and batch size.
521
525
  """
522
526
  model_name = model.__class__.__name__
527
+ if model_name == "Ensemble" and isinstance(model.base_quantifier, Quantifier):
528
+ model_name = f"{model.__class__.__name__}_{model.base_quantifier.__class__.__name__}_{model.size}"
523
529
 
524
530
  if verbose:
525
531
  print(f'\t {model_name} with {batch_size} instances and prev {prev}')
@@ -7,6 +7,7 @@ from ..utils.method import *
7
7
  from sklearn.base import BaseEstimator
8
8
  from sklearn.metrics import confusion_matrix
9
9
  from sklearn.model_selection import train_test_split
10
+ import mlquantify as mq
10
11
 
11
12
 
12
13
 
@@ -75,8 +76,7 @@ class CC(AggregativeQuantifier):
75
76
  {0: 0.4166666666666667, 1: 0.3194444444444444, 2: 0.2638888888888889}
76
77
  """
77
78
 
78
- def __init__(self, learner: BaseEstimator):
79
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
79
+ def __init__(self, learner: BaseEstimator=None):
80
80
  self.learner = learner
81
81
 
82
82
  def _fit_method(self, X, y):
@@ -95,8 +95,7 @@ class CC(AggregativeQuantifier):
95
95
  self : CC
96
96
  The instance of the CC class.
97
97
  """
98
- if not self.learner_fitted:
99
- self.learner.fit(X, y)
98
+ self.fit_learner(X, y)
100
99
  return self
101
100
 
102
101
  def _predict_method(self, X) -> np.ndarray:
@@ -114,7 +113,7 @@ class CC(AggregativeQuantifier):
114
113
  array-like
115
114
  An array containing the prevalence of each class.
116
115
  """
117
- predicted_labels = self.learner.predict(X)
116
+ predicted_labels = self.predict_learner(X)
118
117
 
119
118
  # Count occurrences of each class in the predictions
120
119
  class_counts = np.array([np.count_nonzero(predicted_labels == _class) for _class in self.classes])
@@ -147,13 +146,6 @@ class EMQ(AggregativeQuantifier):
147
146
  priors : array-like
148
147
  Prior probabilities of the classes, estimated from the training data.
149
148
 
150
- Constants
151
- ---------
152
- MAX_ITER : int
153
- The maximum number of iterations allowed for the EM algorithm (default: 1000).
154
- EPSILON : float
155
- Convergence threshold for the EM algorithm (default: 1e-6).
156
-
157
149
  References
158
150
  ----------
159
151
  SAERENS, Marco; LATINNE, Patrice; DECAESTECKER, Christine. Adjusting the outputs of a classifier
@@ -184,8 +176,11 @@ class EMQ(AggregativeQuantifier):
184
176
  MAX_ITER = 1000
185
177
  EPSILON = 1e-6
186
178
 
187
- def __init__(self, learner: BaseEstimator):
188
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
179
+ @property
180
+ def is_probabilistic(self) -> bool:
181
+ return True
182
+
183
+ def __init__(self, learner: BaseEstimator=None):
189
184
  self.learner = learner
190
185
  self.priors = None
191
186
 
@@ -205,9 +200,8 @@ class EMQ(AggregativeQuantifier):
205
200
  self : EMQ
206
201
  The fitted instance of EMQ.
207
202
  """
208
- if not self.learner_fitted:
209
- self.learner.fit(X, y)
210
-
203
+ self.fit_learner(X, y)
204
+
211
205
  counts = np.array([np.count_nonzero(y == _class) for _class in self.classes])
212
206
  self.priors = counts / len(y)
213
207
 
@@ -227,7 +221,7 @@ class EMQ(AggregativeQuantifier):
227
221
  dict
228
222
  A dictionary with class labels as keys and their prevalence as values.
229
223
  """
230
- posteriors = self.learner.predict_proba(X)
224
+ posteriors = self.predict_learner(X)
231
225
  prevalences, _ = self.EM(self.priors, posteriors)
232
226
 
233
227
  return prevalences
@@ -250,7 +244,7 @@ class EMQ(AggregativeQuantifier):
250
244
  np.ndarray
251
245
  Adjusted posterior probabilities.
252
246
  """
253
- posteriors = self.learner.predict_proba(X)
247
+ posteriors = self.predict_learner(X)
254
248
  _, posteriors = self.EM(self.priors, posteriors, epsilon, max_iter)
255
249
  return posteriors
256
250
 
@@ -360,8 +354,13 @@ class FM(AggregativeQuantifier):
360
354
  >>> get_real_prev(y_test)
361
355
  {0: 0.4166666666666667, 1: 0.3194444444444444, 2: 0.2638888888888889}
362
356
  """
363
- def __init__(self, learner: BaseEstimator):
364
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
357
+
358
+ @property
359
+ def is_probabilistic(self) -> bool:
360
+ return True
361
+
362
+
363
+ def __init__(self, learner: BaseEstimator=None):
365
364
  self.learner = learner
366
365
  self.CM = None
367
366
 
@@ -386,11 +385,14 @@ class FM(AggregativeQuantifier):
386
385
  The fitted instance of FM.
387
386
  """
388
387
  # Get predicted labels and probabilities using cross-validation
389
- y_labels, probabilities = get_scores(X, y, self.learner, self.cv_folds, self.learner_fitted)
388
+ if mq.arguments["y_labels"] is not None and mq.arguments["posteriors_train"] is not None:
389
+ y_labels = mq.arguments["y_labels"]
390
+ probabilities = mq.arguments["posteriors_train"]
391
+ else:
392
+ y_labels, probabilities = get_scores(X, y, self.learner, self.cv_folds, self.learner_fitted)
390
393
 
391
394
  # Fit the learner if it hasn't been fitted already
392
- if not self.learner_fitted:
393
- self.learner.fit(X, y)
395
+ self.fit_learner(X, y)
394
396
 
395
397
  # Initialize the confusion matrix
396
398
  CM = np.zeros((self.n_class, self.n_class))
@@ -426,7 +428,7 @@ class FM(AggregativeQuantifier):
426
428
  dict
427
429
  A dictionary with class labels as keys and their prevalence as values.
428
430
  """
429
- posteriors = self.learner.predict_proba(X)
431
+ posteriors = self.predict_learner(X)
430
432
 
431
433
  # Calculate the estimated prevalences in the test set
432
434
  prevs_estim = np.sum(posteriors > self.priors, axis=0) / posteriors.shape[0]
@@ -518,8 +520,7 @@ class GAC(AggregativeQuantifier):
518
520
  """
519
521
 
520
522
 
521
- def __init__(self, learner: BaseEstimator, train_size:float=0.6, random_state:int=None):
522
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
523
+ def __init__(self, learner: BaseEstimator=None, train_size:float=0.6, random_state:int=None):
523
524
  self.learner = learner
524
525
  self.cond_prob_matrix = None
525
526
  self.train_size = train_size
@@ -546,14 +547,14 @@ class GAC(AggregativeQuantifier):
546
547
  if isinstance(y, np.ndarray):
547
548
  y = pd.Series(y)
548
549
 
549
- if self.learner_fitted:
550
- y_pred = self.learner.predict(X)
550
+ if self.learner_fitted or self.learner is None:
551
+ y_pred = mq.arguments["y_pred_train"] if mq.arguments["y_pred_train"] is not None else self.predict_learner(X)
551
552
  y_label = y
552
553
  else:
553
554
  X_train, X_val, y_train, y_val = train_test_split(
554
555
  X, y, train_size=self.train_size, stratify=y, random_state=self.random_state
555
556
  )
556
- self.learner.fit(X_train, y_train)
557
+ self.fit_learner(X_train, y_train)
557
558
  y_label = y_val
558
559
  y_pred = self.learner.predict(X_val)
559
560
 
@@ -574,7 +575,7 @@ class GAC(AggregativeQuantifier):
574
575
  dict
575
576
  Adjusted class prevalences.
576
577
  """
577
- y_pred = self.learner.predict(X)
578
+ y_pred = self.predict_learner(X)
578
579
  _, counts = np.unique(y_pred, return_counts=True)
579
580
  predicted_prevalences = counts / counts.sum()
580
581
  adjusted_prevalences = self.solve_adjustment(self.cond_prob_matrix, predicted_prevalences)
@@ -702,8 +703,7 @@ class GPAC(AggregativeQuantifier):
702
703
  {0: 0.4166666666666667, 1: 0.3194444444444444, 2: 0.2638888888888889}
703
704
  """
704
705
 
705
- def __init__(self, learner: BaseEstimator, train_size: float = 0.6, random_state: int = None):
706
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
706
+ def __init__(self, learner: BaseEstimator=None, train_size: float = 0.6, random_state: int = None):
707
707
  self.learner = learner
708
708
  self.cond_prob_matrix = None
709
709
  self.train_size = train_size
@@ -730,16 +730,16 @@ class GPAC(AggregativeQuantifier):
730
730
  if isinstance(y, np.ndarray):
731
731
  y = pd.Series(y)
732
732
 
733
- if self.learner_fitted:
734
- y_pred = self.learner.predict(X)
733
+ if self.learner_fitted or self.learner is None:
734
+ y_pred = mq.arguments["y_pred_train"] if mq.arguments["y_pred_train"] is not None else self.predict_learner(X)
735
735
  y_labels = y
736
736
  else:
737
737
  X_train, X_val, y_train, y_val = train_test_split(
738
738
  X, y, train_size=self.train_size, stratify=y, random_state=self.random_state
739
739
  )
740
- self.learner.fit(X_train, y_train)
740
+ self.fit_learner(X_train, y_train)
741
741
  y_labels = y_val
742
- y_pred = self.learner.predict(X_val)
742
+ y_pred = self.predict_learner(X_val)
743
743
 
744
744
  # Compute the conditional probability matrix
745
745
  self.cond_prob_matrix = GAC.get_cond_prob_matrix(self.classes, y_labels, y_pred)
@@ -759,7 +759,7 @@ class GPAC(AggregativeQuantifier):
759
759
  dict
760
760
  Adjusted class prevalences.
761
761
  """
762
- predictions = self.learner.predict(X)
762
+ predictions = self.predict_learner(X)
763
763
 
764
764
  # Compute the distribution of predictions
765
765
  predicted_prevalences = np.zeros(self.n_class)
@@ -851,9 +851,11 @@ class PCC(AggregativeQuantifier):
851
851
  >>> get_real_prev(y_test)
852
852
  {0: 0.4166666666666667, 1: 0.3194444444444444, 2: 0.2638888888888889}
853
853
  """
854
+ @property
855
+ def is_probabilistic(self) -> bool:
856
+ return True
854
857
 
855
- def __init__(self, learner: BaseEstimator):
856
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
858
+ def __init__(self, learner: BaseEstimator=None):
857
859
  self.learner = learner
858
860
 
859
861
  def _fit_method(self, X, y):
@@ -872,8 +874,7 @@ class PCC(AggregativeQuantifier):
872
874
  self : PCC
873
875
  Fitted quantifier object.
874
876
  """
875
- if not self.learner_fitted:
876
- self.learner.fit(X, y)
877
+ self.fit_learner(X, y)
877
878
  return self
878
879
 
879
880
  def _predict_method(self, X) -> np.ndarray:
@@ -896,7 +897,7 @@ class PCC(AggregativeQuantifier):
896
897
  # Calculate the prevalence for each class
897
898
  for class_index in range(self.n_class):
898
899
  # Get the predicted probabilities for the current class
899
- class_probabilities = self.learner.predict_proba(X)[:, class_index]
900
+ class_probabilities = self.predict_learner(X)[:, class_index]
900
901
 
901
902
  # Compute the average probability (prevalence) for the current class
902
903
  mean_prev = np.mean(class_probabilities)
@@ -954,8 +955,7 @@ class PWK(AggregativeQuantifier):
954
955
  {0: 0.4166666666666667, 1: 0.3194444444444444, 2: 0.2638888888888889}
955
956
  """
956
957
 
957
- def __init__(self, learner: BaseEstimator):
958
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
958
+ def __init__(self, learner: BaseEstimator=None):
959
959
  self.learner = learner
960
960
 
961
961
  def _fit_method(self, X, y):
@@ -974,8 +974,7 @@ class PWK(AggregativeQuantifier):
974
974
  self : PWK
975
975
  Fitted quantifier object.
976
976
  """
977
- if not self.learner_fitted:
978
- self.learner.fit(X, y)
977
+ self.fit_learner(X, y)
979
978
  return self
980
979
 
981
980
  def _predict_method(self, X) -> dict:
@@ -993,7 +992,7 @@ class PWK(AggregativeQuantifier):
993
992
  A dictionary mapping each class label to its estimated prevalence.
994
993
  """
995
994
  # Predict class labels for the given data
996
- predicted_labels = self.learner.predict(X)
995
+ predicted_labels = self.predict_learner(X)
997
996
 
998
997
  # Compute the distribution of predicted labels
999
998
  unique_labels, label_counts = np.unique(predicted_labels, return_counts=True)
@@ -6,6 +6,7 @@ from ..base import AggregativeQuantifier
6
6
 
7
7
  from ..utils.general import get_real_prev
8
8
  from ..utils.method import *
9
+ import mlquantify as mq
9
10
 
10
11
 
11
12
 
@@ -69,7 +70,7 @@ class MixtureModel(AggregativeQuantifier):
69
70
  {0: 0.37719298245614036, 1: 0.6228070175438597}
70
71
  """
71
72
 
72
- def __init__(self, learner: BaseEstimator):
73
+ def __init__(self, learner: BaseEstimator=None):
73
74
  self.learner = learner
74
75
  self.pos_scores = None
75
76
  self.neg_scores = None
@@ -85,6 +86,10 @@ class MixtureModel(AggregativeQuantifier):
85
86
  Always returns False, as MixtureModel supports only binary classification.
86
87
  """
87
88
  return False
89
+
90
+ @property
91
+ def is_probabilistic(self) -> bool:
92
+ return True
88
93
 
89
94
  def _fit_method(self, X, y):
90
95
  """
@@ -102,11 +107,15 @@ class MixtureModel(AggregativeQuantifier):
102
107
  self : MixtureModel
103
108
  The fitted MixtureModel instance.
104
109
  """
105
- y_label, probabilities = get_scores(X, y, self.learner, self.cv_folds, self.learner_fitted)
110
+ if mq.arguments["y_labels"] is not None and mq.arguments["posteriors_train"] is not None:
111
+ y_labels = mq.arguments["y_labels"]
112
+ probabilities = mq.arguments["posteriors_train"]
113
+ else:
114
+ y_labels, probabilities = get_scores(X, y, self.learner, self.cv_folds, self.learner_fitted)
106
115
 
107
116
  # Separate positive and negative scores based on labels
108
- self.pos_scores = probabilities[y_label == self.classes[1]][:, 1]
109
- self.neg_scores = probabilities[y_label == self.classes[0]][:, 1]
117
+ self.pos_scores = probabilities[y_labels == self.classes[1]][:, 1]
118
+ self.neg_scores = probabilities[y_labels == self.classes[0]][:, 1]
110
119
 
111
120
  return self
112
121
 
@@ -125,7 +134,7 @@ class MixtureModel(AggregativeQuantifier):
125
134
  An array containing the prevalence for each class.
126
135
  """
127
136
  # Get the predicted probabilities for the positive class
128
- test_scores = self.learner.predict_proba(X)[:, 1]
137
+ test_scores = self.predict_learner(X)[:, 1]
129
138
 
130
139
  # Compute the prevalence using the mixture model
131
140
  prevalence = np.clip(self._compute_prevalence(test_scores), 0, 1)
@@ -256,9 +265,8 @@ class DyS(MixtureModel):
256
265
  {0: 0.37719298245614036, 1: 0.6228070175438597}
257
266
  """
258
267
 
259
- def __init__(self, learner: BaseEstimator, measure: str = "topsoe", bins_size: np.ndarray = None):
268
+ def __init__(self, learner: BaseEstimator=None, measure: str = "topsoe", bins_size: np.ndarray = None):
260
269
  assert measure in ["hellinger", "topsoe", "probsymm"], "Invalid measure."
261
- assert isinstance(learner, BaseEstimator), "Learner must be a valid estimator."
262
270
  super().__init__(learner)
263
271
 
264
272
  # Set up bins_size
@@ -305,7 +313,7 @@ class DyS(MixtureModel):
305
313
  distance : float
306
314
  The minimum distance value.
307
315
  """
308
- test_scores = self.learner.predict_proba(X_test)
316
+ test_scores = self.predict_learner(X_test)
309
317
  prevs = self.GetMinDistancesDyS(test_scores)
310
318
 
311
319
  size = len(prevs)
@@ -455,9 +463,8 @@ class DySsyn(MixtureModel):
455
463
  """
456
464
 
457
465
 
458
- def __init__(self, learner:BaseEstimator, measure:str="topsoe", merge_factor:np.ndarray=None, bins_size:np.ndarray=None, alpha_train:float=0.5, n:int=None):
466
+ def __init__(self, learner:BaseEstimator=None, measure:str="topsoe", merge_factor:np.ndarray=None, bins_size:np.ndarray=None, alpha_train:float=0.5, n:int=None):
459
467
  assert measure in ["hellinger", "topsoe", "probsymm"], "measure not valid"
460
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
461
468
  super().__init__(learner)
462
469
 
463
470
  # Set up bins_size
@@ -494,8 +501,7 @@ class DySsyn(MixtureModel):
494
501
  self : DySsyn
495
502
  The fitted DySsyn instance.
496
503
  """
497
- if not self.learner_fitted:
498
- self.learner.fit(X, y)
504
+ self.fit_learner(X, y)
499
505
 
500
506
  self.alpha_train = list(get_real_prev(y).values())[1]
501
507
 
@@ -538,7 +544,7 @@ class DySsyn(MixtureModel):
538
544
  distance : float
539
545
  Minimum distance value for the test data.
540
546
  """
541
- test_scores = self.learner.predict_proba(X_test)
547
+ test_scores = self.predict_learner(X_test)
542
548
 
543
549
  distances = self.GetMinDistancesDySsyn(test_scores)
544
550
 
@@ -679,8 +685,7 @@ class HDy(MixtureModel):
679
685
  {0: 0.37719298245614036, 1: 0.6228070175438597}
680
686
  """
681
687
 
682
- def __init__(self, learner: BaseEstimator):
683
- assert isinstance(learner, BaseEstimator), "Learner must be a valid estimator."
688
+ def __init__(self, learner: BaseEstimator=None):
684
689
  super().__init__(learner)
685
690
 
686
691
  def _compute_prevalence(self, test_scores: np.ndarray) -> float:
@@ -717,7 +722,7 @@ class HDy(MixtureModel):
717
722
  distance : float
718
723
  The minimum distance value.
719
724
  """
720
- test_scores = self.learner.predict_proba(X_test)
725
+ test_scores = self.predict_learner(X_test)
721
726
  _, distances = self.GetMinDistancesHDy(test_scores)
722
727
 
723
728
  size = len(distances)
@@ -833,8 +838,7 @@ class SMM(MixtureModel):
833
838
  {0: 0.37719298245614036, 1: 0.6228070175438597}
834
839
  """
835
840
 
836
- def __init__(self, learner: BaseEstimator):
837
- assert isinstance(learner, BaseEstimator), "Learner must be a valid estimator."
841
+ def __init__(self, learner: BaseEstimator=None):
838
842
  super().__init__(learner)
839
843
 
840
844
  def _compute_prevalence(self, test_scores: np.ndarray) -> float:
@@ -909,8 +913,7 @@ class SORD(MixtureModel):
909
913
  {0: 0.37719298245614036, 1: 0.6228070175438597}
910
914
  """
911
915
 
912
- def __init__(self, learner: BaseEstimator):
913
- assert isinstance(learner, BaseEstimator), "Learner must be a valid estimator."
916
+ def __init__(self, learner: BaseEstimator=None):
914
917
  super().__init__(learner)
915
918
 
916
919
  self.best_distance_index = None # Stores the index of the best alpha value
@@ -4,6 +4,7 @@ from sklearn.base import BaseEstimator
4
4
 
5
5
  from ..base import AggregativeQuantifier
6
6
  from ..utils.method import adjust_threshold, get_scores
7
+ import mlquantify as mq
7
8
 
8
9
 
9
10
 
@@ -67,13 +68,28 @@ class ThresholdOptimization(AggregativeQuantifier):
67
68
  >>> y_pred = mtm.predict(X_test)
68
69
  """
69
70
 
70
- def __init__(self, learner: BaseEstimator):
71
+ def __init__(self, learner: BaseEstimator=None):
71
72
  self.learner = learner
72
73
  self.threshold = None
73
74
  self.cc_output = None
74
75
  self.tpr = None
75
76
  self.fpr = None
76
77
 
78
+ @property
79
+ def is_probabilistic(self) -> bool:
80
+ """
81
+ Returns whether the method is probabilistic.
82
+
83
+ This method is used to determine whether the quantification method is probabilistic,
84
+ meaning it uses class-conditional probabilities to estimate class prevalences.
85
+
86
+ Returns
87
+ -------
88
+ bool
89
+ True, indicating that this method is probabilistic.
90
+ """
91
+ return True
92
+
77
93
  @property
78
94
  def is_multiclass(self) -> bool:
79
95
  """
@@ -106,7 +122,11 @@ class ThresholdOptimization(AggregativeQuantifier):
106
122
  The fitted quantifier object with the best threshold, TPR, and FPR.
107
123
  """
108
124
  # Get predicted labels and probabilities
109
- y_labels, probabilities = get_scores(X, y, self.learner, self.cv_folds, self.learner_fitted)
125
+ if mq.arguments["y_labels"] is not None and mq.arguments["posteriors_train"] is not None:
126
+ y_labels = mq.arguments["y_labels"]
127
+ probabilities = mq.arguments["posteriors_train"]
128
+ else:
129
+ y_labels, probabilities = get_scores(X, y, self.learner, self.cv_folds, self.learner_fitted)
110
130
 
111
131
  # Adjust thresholds and compute true and false positive rates
112
132
  thresholds, tprs, fprs = adjust_threshold(y_labels, probabilities[:, 1], self.classes)
@@ -131,7 +151,7 @@ class ThresholdOptimization(AggregativeQuantifier):
131
151
  An array of predicted prevalences for the classes.
132
152
  """
133
153
  # Get predicted probabilities for the positive class
134
- probabilities = self.learner.predict_proba(X)[:, 1]
154
+ probabilities = self.predict_learner(X)[:, 1]
135
155
 
136
156
  # Compute the classification count output based on the threshold
137
157
  self.cc_output = len(probabilities[probabilities >= self.threshold]) / len(probabilities)
@@ -231,8 +251,7 @@ class ACC(ThresholdOptimization):
231
251
  {0: 0.3991228070175439, 1: 0.6008771929824561}
232
252
  """
233
253
 
234
- def __init__(self, learner: BaseEstimator, threshold: float = 0.5):
235
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
254
+ def __init__(self, learner: BaseEstimator=None, threshold: float = 0.5):
236
255
  super().__init__(learner)
237
256
  self.threshold = threshold
238
257
 
@@ -325,8 +344,7 @@ class MAX(ThresholdOptimization):
325
344
  {0: 0.3991228070175439, 1: 0.6008771929824561}
326
345
  """
327
346
 
328
- def __init__(self, learner: BaseEstimator):
329
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
347
+ def __init__(self, learner: BaseEstimator=None):
330
348
  super().__init__(learner)
331
349
 
332
350
  def best_tprfpr(self, thresholds: np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
@@ -428,8 +446,7 @@ class MS(ThresholdOptimization):
428
446
  {0: 0.3991228070175439, 1: 0.6008771929824561}
429
447
  """
430
448
 
431
- def __init__(self, learner: BaseEstimator, threshold: float = 0.5):
432
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
449
+ def __init__(self, learner: BaseEstimator=None, threshold: float = 0.5):
433
450
  super().__init__(learner)
434
451
  self.threshold = threshold
435
452
 
@@ -528,8 +545,7 @@ class MS2(ThresholdOptimization):
528
545
  {0: 0.3991228070175439, 1: 0.6008771929824561}
529
546
  """
530
547
 
531
- def __init__(self, learner: BaseEstimator):
532
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
548
+ def __init__(self, learner: BaseEstimator=None):
533
549
  super().__init__(learner)
534
550
 
535
551
  def best_tprfpr(self, thresholds: np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
@@ -639,8 +655,7 @@ class PACC(ThresholdOptimization):
639
655
  {0: 0.3991228070175439, 1: 0.6008771929824561}
640
656
  """
641
657
 
642
- def __init__(self, learner: BaseEstimator, threshold: float = 0.5):
643
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
658
+ def __init__(self, learner: BaseEstimator=None, threshold: float = 0.5):
644
659
  super().__init__(learner)
645
660
  self.threshold = threshold
646
661
 
@@ -675,7 +690,7 @@ class PACC(ThresholdOptimization):
675
690
  prevalences = {}
676
691
 
677
692
  # Calculate probabilities for the positive class
678
- probabilities = self.learner.predict_proba(X)[:, 1]
693
+ probabilities = self.predict_learner(X)[:, 1]
679
694
 
680
695
  # Compute the mean score for the positive class
681
696
  mean_scores = np.mean(probabilities)
@@ -731,13 +746,6 @@ class PACC(ThresholdOptimization):
731
746
 
732
747
 
733
748
 
734
- def best_tprfpr(self, thresholds:np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
735
- tpr = tprs[thresholds == self.threshold][0]
736
- fpr = fprs[thresholds == self.threshold][0]
737
- return (self.threshold, tpr, fpr)
738
-
739
-
740
-
741
749
 
742
750
 
743
751
 
@@ -797,8 +805,7 @@ class T50(ThresholdOptimization):
797
805
  {0: 0.3991228070175439, 1: 0.6008771929824561}
798
806
  """
799
807
 
800
- def __init__(self, learner: BaseEstimator):
801
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
808
+ def __init__(self, learner: BaseEstimator=None):
802
809
  super().__init__(learner)
803
810
 
804
811
  def best_tprfpr(self, thresholds: np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
@@ -906,8 +913,7 @@ class X_method(ThresholdOptimization):
906
913
  {0: 0.3991228070175439, 1: 0.6008771929824561}
907
914
  """
908
915
 
909
- def __init__(self, learner: BaseEstimator):
910
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
916
+ def __init__(self, learner: BaseEstimator=None):
911
917
  super().__init__(learner)
912
918
 
913
919
  def best_tprfpr(self, thresholds: np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
@@ -256,7 +256,7 @@ def parallel(func, elements, n_jobs: int = 1, *args):
256
256
  list
257
257
  List of results from running the function on each element.
258
258
  """
259
- return Parallel(n_jobs=n_jobs)(
259
+ return Parallel(n_jobs=n_jobs, backend="threading")(
260
260
  delayed(func)(e, *args) for e in elements
261
261
  )
262
262
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mlquantify
3
- Version: 0.0.11.8
3
+ Version: 0.0.11.10
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -6,7 +6,7 @@ here = pathlib.Path(__file__).parent.resolve()
6
6
 
7
7
  long_description = (here / 'README.md').read_text(encoding='utf-8')
8
8
 
9
- VERSION = '0.0.11.8'
9
+ VERSION = '0.0.11.10'
10
10
  DESCRIPTION = 'Quantification Library'
11
11
 
12
12
  # Setting up
@@ -1,9 +0,0 @@
1
- "mlquantify, a Python package for quantification"
2
-
3
- from . import base
4
- from . import model_selection
5
- from . import plots
6
- from . import classification
7
- from . import evaluation
8
- from . import methods
9
- from . import utils
File without changes
File without changes