mlquantify 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -603,7 +603,12 @@ class MS(ThresholdAdjustment):
603
603
  for thr, tpr, fpr in zip(thresholds, tprs, fprs):
604
604
  cc_predictions = CC(thr).aggregate(predictions)
605
605
  cc_predictions = cc_predictions[1]
606
- prevalence = cc_predictions if tpr - fpr == 0 else (cc_predictions - fpr) / (tpr - fpr)
606
+
607
+ if tpr - fpr == 0:
608
+ prevalence = cc_predictions
609
+ else:
610
+ prevalence = np.clip((cc_predictions - fpr) / (tpr - fpr), 0, 1)
611
+
607
612
  prevs.append(prevalence)
608
613
  prevalence = np.median(prevs)
609
614
  return np.asarray([1 - prevalence, prevalence])
@@ -239,7 +239,10 @@ class BaseAdjustCount(AggregationMixin, BaseQuantifier):
239
239
  def aggregate(self, predictions, train_predictions, y_train_values):
240
240
  """Aggregate predictions and apply matrix- or rate-based bias correction."""
241
241
  self.classes_ = check_classes_attribute(self, np.unique(y_train_values))
242
- predictions = validate_predictions(self, train_predictions)
242
+
243
+ predictions = validate_predictions(self, predictions)
244
+ train_predictions = validate_predictions(self, train_predictions)
245
+
243
246
  prevalences = self._adjust(predictions, train_predictions, y_train_values)
244
247
  prevalences = validate_prevalences(self, prevalences, self.classes_)
245
248
  return prevalences
@@ -661,7 +661,7 @@ class QuaDapt(MetaquantifierMixin, BaseQuantifier):
661
661
  def __init__(self,
662
662
  quantifier,
663
663
  measure="topsoe",
664
- merging_factors=(0.1, 1.0, 0.2)):
664
+ merging_factors=np.arange(0.1, 1.0, 0.2)):
665
665
  self.quantifier = quantifier
666
666
  self.measure = measure
667
667
  self.merging_factors = merging_factors
@@ -701,10 +701,7 @@ class QuaDapt(MetaquantifierMixin, BaseQuantifier):
701
701
 
702
702
  self.classes = self.classes if hasattr(self, 'classes') else np.unique(train_y_values)
703
703
 
704
- moss = QuaDapt.MoSS(1000, 0.5, m)
705
-
706
- moss_scores = moss[:, :2]
707
- moss_labels = moss[:, 2]
704
+ moss_scores, moss_labels = self.MoSS(1000, 0.5, m)
708
705
 
709
706
  prevalences = self.quantifier.aggregate(predictions,
710
707
  moss_scores,
@@ -721,9 +718,9 @@ class QuaDapt(MetaquantifierMixin, BaseQuantifier):
721
718
  distances = []
722
719
 
723
720
  for mf in MF:
724
- scores = QuaDapt.MoSS(1000, 0.5, mf)
725
- pos_scores = scores[scores[:, 2] == 1][:, :2]
726
- neg_scores = scores[scores[:, 2] == 0][:, :2]
721
+ scores, labels = self.MoSS(1000, 0.5, mf)
722
+ pos_scores = scores[labels == 1][:, 1]
723
+ neg_scores = scores[labels == 0][:, 1]
727
724
 
728
725
  best_distance = self._get_best_distance(predictions, pos_scores, neg_scores)
729
726
 
@@ -772,14 +769,27 @@ class QuaDapt(MetaquantifierMixin, BaseQuantifier):
772
769
  .. [1] Maletzke, A., Reis, D. dos, Hassan, W., & Batista, G. (2021).
773
770
  Accurately Quantifying under Score Variability. 2021 IEEE International Conference on Data Mining (ICDM), 1228-1233. https://doi.org/10.1109/ICDM51629.2021.00149
774
771
  """
775
- p_score = np.random.uniform(size=int(n * alpha)) ** m
776
- n_score = 1 - (np.random.uniform(size=int(round(n * (1 - alpha), 0))) ** m)
777
- scores = np.column_stack(
778
- (np.concatenate((p_score, n_score)),
779
- np.concatenate((p_score, n_score)),
780
- np.concatenate((
781
- np.ones(len(p_score)),
782
- np.full(len(n_score), 0))))
772
+ if isinstance(alpha, list):
773
+ alpha = float(alpha[1])
774
+
775
+ n_pos = int(n * alpha)
776
+ n_neg = n - n_pos
777
+
778
+ # Scores positivos
779
+ p_score = np.random.uniform(size=n_pos) ** merging_factor
780
+ # Scores negativos
781
+ n_score = 1 - (np.random.uniform(size=n_neg) ** merging_factor)
782
+
783
+ # Construção dos arrays de features (duas colunas iguais)
784
+ moss = np.column_stack(
785
+ (
786
+ 1 - np.concatenate((p_score, n_score)),
787
+ np.concatenate((p_score, n_score)),
788
+ np.int16(np.concatenate((np.ones(len(p_score)), np.full(len(n_score), 0))))
789
+ )
783
790
  )
784
- return scores
791
+
792
+ scores = moss[:, :2]
793
+ labels = moss[:, 2].astype(np.int16)
794
+ return scores, labels
785
795
 
@@ -308,8 +308,10 @@ class SMM(AggregativeMixture):
308
308
  mean_pos = np.mean(pos_scores)
309
309
  mean_neg = np.mean(neg_scores)
310
310
  mean_test = np.mean(predictions)
311
-
312
- alpha = (mean_test - mean_neg) / (mean_pos - mean_neg)
311
+ if mean_pos - mean_neg == 0:
312
+ alpha = mean_test
313
+ else:
314
+ alpha = np.clip((mean_test - mean_neg) / (mean_pos - mean_neg), 0, 1)
313
315
  return alpha, None
314
316
 
315
317
 
@@ -274,7 +274,7 @@ def validate_data(quantifier,
274
274
  else:
275
275
  out = X, y
276
276
  elif not no_val_X and no_val_y:
277
- out = check_array(X, input_name="X", **check_params)
277
+ out = check_array(X, input_name="X", dtype=None, **check_params)
278
278
  elif no_val_X and not no_val_y:
279
279
  out = _check_y(y, **check_params)
280
280
  else:
@@ -286,12 +286,12 @@ def validate_data(quantifier,
286
286
  check_X_params, check_y_params = validate_separately
287
287
  if "estimator" not in check_X_params:
288
288
  check_X_params = {**default_check_params, **check_X_params}
289
- X = check_array(X, input_name="X", **check_X_params)
289
+ X = check_array(X, input_name="X", dtype=None, **check_X_params)
290
290
  if "estimator" not in check_y_params:
291
291
  check_y_params = {**default_check_params, **check_y_params}
292
292
  y = check_array(y, input_name="y", **check_y_params)
293
293
  else:
294
- X, y = check_X_y(X, y, **check_params)
294
+ X, y = check_X_y(X, y, dtype=None, **check_params)
295
295
  out = X, y
296
296
 
297
297
  return out
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mlquantify
3
- Version: 0.1.10
3
+ Version: 0.1.12
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -30,7 +30,7 @@ Dynamic: maintainer
30
30
  Dynamic: requires-dist
31
31
  Dynamic: summary
32
32
 
33
- <h1 align="center">MLQuantify</h1>
33
+ <img src="assets/logo_mlquantify-white.svg" alt="mlquantify logo">
34
34
  <h4 align="center">A Python Package for Quantification</h4>
35
35
 
36
36
  ___
@@ -41,7 +41,7 @@ ___
41
41
 
42
42
  ## Latest Release
43
43
 
44
- - **Version 0.1.3**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
44
+ - **Version 0.1.11**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
45
45
  - In case you need any help, refer to the [User Guide](https://luizfernandolj.github.io/mlquantify/user_guide.html).
46
46
  - Explore the [API documentation](https://luizfernandolj.github.io/mlquantify/api/index.html) for detailed developer information.
47
47
  - See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
@@ -73,7 +73,6 @@ ___
73
73
  | **Model Selection** | Criteria and processes used to select the best model, such as grid-search for the case of quantification|
74
74
  | **Evaluation Metrics** | Specific metrics used to evaluate quantification performance, (e.g., AE, MAE, NAE, SE, KLD, etc.). |
75
75
  | **Evaluation Protocols** | Evaluation protocols used, based on sampling generation (e.g., APP, NPP, etc.).. |
76
- | **Plotting Results** | Tools and techniques used to visualize results, such as the protocol results.|
77
76
  | **Comprehensive Documentation** | Complete documentation of the project, including code, data, and results. |
78
77
 
79
78
  ___
@@ -84,8 +83,8 @@ This code first loads the breast cancer dataset from _sklearn_, which is then sp
84
83
 
85
84
  ```python
86
85
  from mlquantify.methods import EMQ
87
- from mlquantify.evaluation.measures import absolute_error, mean_absolute_error
88
- from mlquantify.utils import get_real_prev
86
+ from mlquantify.metrics import MAE, NRAE
87
+ from mlquantify.utils import get_prev_from_labels
89
88
 
90
89
  from sklearn.ensemble import RandomForestClassifier
91
90
  from sklearn.datasets import load_breast_cancer
@@ -103,14 +102,14 @@ model.fit(X_train, y_train)
103
102
 
104
103
  #Predict the class prevalence for X_test
105
104
  pred_prevalence = model.predict(X_test)
106
- real_prevalence = get_real_prev(y_test)
105
+ real_prevalence = get_prev_from_labels(y_test)
107
106
 
108
107
  #Get the error for the prediction
109
- ae = absolute_error(real_prevalence, pred_prevalence)
110
- mae = mean_absolute_error(real_prevalence, pred_prevalence)
108
+ mae = MAE(real_prevalence, pred_prevalence)
109
+ nrae = NRAE(real_prevalence, pred_prevalence)
111
110
 
112
- print(f"Absolute Error -> {ae}")
113
111
  print(f"Mean Absolute Error -> {mae}")
112
+ print(f"Normalized Relative Absolute Error -> {nrae}")
114
113
  ```
115
114
 
116
115
  ___
@@ -129,13 +128,6 @@ ___
129
128
 
130
129
  ## Documentation
131
130
 
132
- ##### API is avaliable [here](https://luizfernandolj.github.io/mlquantify/api/index.html)
133
-
134
- - [Methods](https://github.com/luizfernandolj/mlquantify/wiki/Methods)
135
- - [Model Selection](https://github.com/luizfernandolj/mlquantify/wiki/Model-Selection)
136
- - [Evaluation](https://github.com/luizfernandolj/mlquantify/wiki/Evaluation)
137
- - [Plotting](https://github.com/luizfernandolj/mlquantify/wiki/Plotting)
138
- - [Utilities](https://github.com/luizfernandolj/mlquantify/wiki/Utilities)
139
-
131
+ ##### API is avaliable [here](https://luizfernandolj.github.io/mlquantify/api/)
140
132
 
141
133
  ___
@@ -5,22 +5,22 @@ mlquantify/calibration.py,sha256=chG3GNX2BBDTWIuSVfZUJ_YF_ZVBSoel2d_AN0OChS0,6
5
5
  mlquantify/confidence.py,sha256=QkEWr6s-Su3Nbinia_TRQbBeTM6ymDPe7Bv204XBKKA,10799
6
6
  mlquantify/multiclass.py,sha256=Jux0fvL5IBZA3DXLCuqUEE77JYYBGAcW6GaEH9srmu4,11747
7
7
  mlquantify/adjust_counting/__init__.py,sha256=AWio99zeaUULQq9vKggkFhnq-tqgXxasQt167NdcNVY,307
8
- mlquantify/adjust_counting/_adjustment.py,sha256=JYfPj-x0tw6aLt6m3YehsuKXF6FMUIIvjXImbiqXkGI,23130
9
- mlquantify/adjust_counting/_base.py,sha256=-nxH0seDXmEW9eGoqrH69JaIUCeNjFKBa8pDDH8u0Tg,9342
8
+ mlquantify/adjust_counting/_adjustment.py,sha256=x0i_jAWCw2UP9Gt20EteYxLmCr1Xh_AbISwFRbOVoI8,23234
9
+ mlquantify/adjust_counting/_base.py,sha256=tbYq2Efaxsub_vzXoMOR-J6SZlK6K8oRr5UvSSsjVvs,9428
10
10
  mlquantify/adjust_counting/_counting.py,sha256=7Ip7-XHQJcTWcWVDaLzEIM6WYcp8k5axsCIyD3QPWZE,5572
11
11
  mlquantify/adjust_counting/_utils.py,sha256=DEPNzvcr0KszCnfUJaRzBilwWzuNVMSdy5eV7aQ_JPE,2907
12
12
  mlquantify/likelihood/__init__.py,sha256=3dC5uregNmquUKz0r0-3aPspfjZjKGn3TRBoZPO1uFs,53
13
13
  mlquantify/likelihood/_base.py,sha256=seu_Vb58QttcGbFjHKAplMYGZcVbIHqkyTXEK2cax9A,5830
14
14
  mlquantify/likelihood/_classes.py,sha256=PZ31cAwO8q5X3O2_oSmQ1FM6bY4EsB8hWEcAgcEmWXQ,14731
15
15
  mlquantify/meta/__init__.py,sha256=GzdGw4ky_kmd5VNWiLBULy06IdN_MLCDAuJKbnMOx4s,62
16
- mlquantify/meta/_classes.py,sha256=3twKSrm4mF_AXZ1FP0V0hoOo-ceJulGKKgSEBvU8Vt0,30631
16
+ mlquantify/meta/_classes.py,sha256=JAnMS4bu2XHXI_sSZUfcW_uIXRanoA0NIS3uN6dWSv4,30956
17
17
  mlquantify/metrics/__init__.py,sha256=3bzzjSYTgrZIJsfAgJidQlB-bnjInwVYUvJ34bPhZxY,186
18
18
  mlquantify/metrics/_oq.py,sha256=koXDKeHWksl_vHpZuhc2pAps8wvu_MOgEztlSr04MmE,3544
19
19
  mlquantify/metrics/_rq.py,sha256=3yiEmGaRAGpzL29Et3tNqkJ3RMsLXwUX3uL9RoIgi40,3034
20
20
  mlquantify/metrics/_slq.py,sha256=JZceO2LR3mjbT_0zVcl9xI6jf8pn3tIcpP3vP3Luf9I,6817
21
21
  mlquantify/mixture/__init__.py,sha256=_KKhpFuvi3vYwxydm5nOy9MKwmIU4eyZDN9Pe00hqtk,70
22
22
  mlquantify/mixture/_base.py,sha256=1-yW64FPQXB_d9hH9KjSlDnmFtW9FY7S2hppXAd1DBg,5645
23
- mlquantify/mixture/_classes.py,sha256=uYtWh6oTx0M3rTG71gfO6RWt3QVXH6KN5F-J4YKN0TM,16329
23
+ mlquantify/mixture/_classes.py,sha256=Dx0KWS-RtVVmJwXvPKIVWitsJhgcYRRiypLYrgE66x4,16420
24
24
  mlquantify/mixture/_utils.py,sha256=CKlC081nrkJ8Pil7lrPZvNZC_xfpXV8SsuQq3M_LHgA,4037
25
25
  mlquantify/model_selection/__init__.py,sha256=98I0uf8k6lbWAjazGyGjbOdPOvzU8aMRLqC3I7D3jzk,113
26
26
  mlquantify/model_selection/_protocol.py,sha256=2k0M_7YwZf7YLoQ8ElR2xMvLySVgtE_EvWieMXTIzTA,12499
@@ -45,9 +45,9 @@ mlquantify/utils/_parallel.py,sha256=XotpX9nsj6nW-tNCmZ-ahTcRztgnn9oQKP2cl1rLdYM
45
45
  mlquantify/utils/_random.py,sha256=7F3nyy7Pa_kN8xP8P1L6MOM4WFu4BirE7bOfGTZ1Spk,1275
46
46
  mlquantify/utils/_sampling.py,sha256=QQxE2WKLdiCFUfPF6fKgzyrsOUIWYf74w_w8fbYVc2c,8409
47
47
  mlquantify/utils/_tags.py,sha256=Rz78TLpxgVxBKS0mKTlC9Qo_kn6HaEwVKNXh8pxFT7M,1095
48
- mlquantify/utils/_validation.py,sha256=dE7NYLy6C5UWf8tXIhQeWLTz2-rej_gr8-aAIwgJTPk,16762
48
+ mlquantify/utils/_validation.py,sha256=yR5zqh_c7OHPnuMFBgKbrdU1bG-oXL2thojFEzydzWs,16798
49
49
  mlquantify/utils/prevalence.py,sha256=FXLCJViQb2yDbyTXeGZt8WsPPnSZINhorQYZTKXOn14,1772
50
- mlquantify-0.1.10.dist-info/METADATA,sha256=qvy3E7u4daj9ZSZnrza7ZtNHcs46xx63wMWbeq4R3T8,5193
51
- mlquantify-0.1.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
52
- mlquantify-0.1.10.dist-info/top_level.txt,sha256=tGEkYkbbFElwULvqENjam3u1uXtyC1J9dRmibsq8_n0,11
53
- mlquantify-0.1.10.dist-info/RECORD,,
50
+ mlquantify-0.1.12.dist-info/METADATA,sha256=qMZWMClRDNjUuFjuiAGhC7aDA3r9hlECzSbyoSLlQ-4,4701
51
+ mlquantify-0.1.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
52
+ mlquantify-0.1.12.dist-info/top_level.txt,sha256=tGEkYkbbFElwULvqENjam3u1uXtyC1J9dRmibsq8_n0,11
53
+ mlquantify-0.1.12.dist-info/RECORD,,