mlquantify 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. mlquantify/__init__.py +0 -29
  2. mlquantify/adjust_counting/__init__.py +14 -0
  3. mlquantify/adjust_counting/_adjustment.py +365 -0
  4. mlquantify/adjust_counting/_base.py +247 -0
  5. mlquantify/adjust_counting/_counting.py +145 -0
  6. mlquantify/adjust_counting/_utils.py +114 -0
  7. mlquantify/base.py +117 -519
  8. mlquantify/base_aggregative.py +209 -0
  9. mlquantify/calibration.py +1 -0
  10. mlquantify/confidence.py +335 -0
  11. mlquantify/likelihood/__init__.py +5 -0
  12. mlquantify/likelihood/_base.py +161 -0
  13. mlquantify/likelihood/_classes.py +414 -0
  14. mlquantify/meta/__init__.py +1 -0
  15. mlquantify/meta/_classes.py +761 -0
  16. mlquantify/metrics/__init__.py +21 -0
  17. mlquantify/metrics/_oq.py +109 -0
  18. mlquantify/metrics/_rq.py +98 -0
  19. mlquantify/{evaluation/measures.py → metrics/_slq.py} +43 -28
  20. mlquantify/mixture/__init__.py +7 -0
  21. mlquantify/mixture/_base.py +153 -0
  22. mlquantify/mixture/_classes.py +400 -0
  23. mlquantify/mixture/_utils.py +112 -0
  24. mlquantify/model_selection/__init__.py +9 -0
  25. mlquantify/model_selection/_protocol.py +358 -0
  26. mlquantify/model_selection/_search.py +315 -0
  27. mlquantify/model_selection/_split.py +1 -0
  28. mlquantify/multiclass.py +350 -0
  29. mlquantify/neighbors/__init__.py +9 -0
  30. mlquantify/neighbors/_base.py +198 -0
  31. mlquantify/neighbors/_classes.py +159 -0
  32. mlquantify/{classification/methods.py → neighbors/_classification.py} +48 -66
  33. mlquantify/neighbors/_kde.py +270 -0
  34. mlquantify/neighbors/_utils.py +135 -0
  35. mlquantify/neural/__init__.py +1 -0
  36. mlquantify/utils/__init__.py +47 -2
  37. mlquantify/utils/_artificial.py +27 -0
  38. mlquantify/utils/_constraints.py +219 -0
  39. mlquantify/utils/_context.py +21 -0
  40. mlquantify/utils/_decorators.py +36 -0
  41. mlquantify/utils/_exceptions.py +12 -0
  42. mlquantify/utils/_get_scores.py +159 -0
  43. mlquantify/utils/_load.py +18 -0
  44. mlquantify/utils/_parallel.py +6 -0
  45. mlquantify/utils/_random.py +36 -0
  46. mlquantify/utils/_sampling.py +273 -0
  47. mlquantify/utils/_tags.py +44 -0
  48. mlquantify/utils/_validation.py +447 -0
  49. mlquantify/utils/prevalence.py +61 -0
  50. {mlquantify-0.1.8.dist-info → mlquantify-0.1.9.dist-info}/METADATA +2 -1
  51. mlquantify-0.1.9.dist-info/RECORD +53 -0
  52. mlquantify/classification/__init__.py +0 -1
  53. mlquantify/evaluation/__init__.py +0 -14
  54. mlquantify/evaluation/protocol.py +0 -289
  55. mlquantify/methods/__init__.py +0 -37
  56. mlquantify/methods/aggregative.py +0 -1159
  57. mlquantify/methods/meta.py +0 -472
  58. mlquantify/methods/mixture_models.py +0 -1003
  59. mlquantify/methods/non_aggregative.py +0 -136
  60. mlquantify/methods/threshold_optimization.py +0 -869
  61. mlquantify/model_selection.py +0 -377
  62. mlquantify/plots.py +0 -367
  63. mlquantify/utils/general.py +0 -371
  64. mlquantify/utils/method.py +0 -449
  65. mlquantify-0.1.8.dist-info/RECORD +0 -22
  66. {mlquantify-0.1.8.dist-info → mlquantify-0.1.9.dist-info}/WHEEL +0 -0
  67. {mlquantify-0.1.8.dist-info → mlquantify-0.1.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,145 @@
1
+ import numpy as np
2
+
3
+ from mlquantify.base_aggregative import (
4
+ SoftLearnerQMixin,
5
+ CrispLearnerQMixin
6
+ )
7
+
8
+ from mlquantify.adjust_counting._base import BaseCount
9
+ from mlquantify.utils._validation import validate_predictions, validate_prevalences, check_classes_attribute
10
+ from mlquantify.utils._constraints import Interval
11
+
12
+
13
+
14
+ class CC(CrispLearnerQMixin, BaseCount):
15
+ """Classify and Count (CC) quantifier.
16
+
17
+ Implements the Classify and Count method for quantification as described in:
18
+ [1] Forman, G. (2005). *Counting Positives Accurately Despite Inaccurate Classification.*
19
+ ECML, pp. 564-575.
20
+ [2] Forman, G. (2008). *Quantifying Counts and Costs via Classification.*
21
+ Data Mining and Knowledge Discovery, 17(2), 164-206.
22
+
23
+
24
+ Parameters
25
+ ----------
26
+ learner : estimator, optional
27
+ A supervised learning estimator with fit and predict methods.
28
+ If None, it is expected that will be used the aggregate method directly.
29
+ threshold : float, default=0.5
30
+ Decision threshold for converting predicted probabilities into class labels.
31
+ Must be in the interval [0.0, 1.0].
32
+
33
+
34
+ Attributes
35
+ ----------
36
+ learner : estimator
37
+ Underlying classification model.
38
+ classes : ndarray of shape (n_classes,)
39
+ Unique class labels observed during training.
40
+
41
+
42
+ Examples
43
+ --------
44
+ >>> from mlquantify.adjust_counting import CC
45
+ >>> import numpy as np
46
+ >>> from sklearn.linear_model import LogisticRegression
47
+ >>> X = np.random.randn(100, 5)
48
+ >>> y = np.random.randint(0, 2, 100)
49
+ >>> q = CC(learner=LogisticRegression())
50
+ >>> q.fit(X, y)
51
+ >>> q.predict(X)
52
+ {0: 0.47, 1: 0.53}
53
+ >> q2 = CC()
54
+ >>> predictions = np.random.rand(200)
55
+ >>> q2.aggregate(predictions)
56
+ {0: 0.51, 1: 0.49}
57
+ """
58
+
59
+ _parameters_constraints = {
60
+ "threshold": [
61
+ Interval(0.0, 1.0),
62
+ Interval(0, 1, discrete=True),
63
+ ],
64
+ }
65
+
66
+ def __init__(self, learner=None, threshold=0.5):
67
+ super().__init__(learner=learner)
68
+ self.threshold = threshold
69
+
70
+ def aggregate(self, predictions):
71
+ predictions = validate_predictions(self, predictions)
72
+
73
+ self.classes_ = check_classes_attribute(self, np.unique(predictions))
74
+ class_counts = np.array([np.count_nonzero(predictions == _class) for _class in self.classes_])
75
+ prevalences = class_counts / len(predictions)
76
+
77
+ prevalences = validate_prevalences(self, prevalences, self.classes_)
78
+ return prevalences
79
+
80
+
81
+ class PCC(SoftLearnerQMixin, BaseCount):
82
+ """Probabilistic Classify and Count (PCC) quantifier.
83
+
84
+ Implements the Probabilistic Classify and Count method for quantification as described in:
85
+ [1] Forman, G. (2005). *Counting Positives Accurately Despite Inaccurate Classification.*
86
+ ECML, pp. 564-575.
87
+ [2] Forman, G. (2008). *Quantifying Counts and Costs via Classification.*
88
+ Data Mining and Knowledge Discovery, 17(2), 164-206.
89
+
90
+
91
+ Parameters
92
+ ----------
93
+ learner : estimator, optional
94
+ A supervised learning estimator with fit and predict_proba methods.
95
+ If None, it is expected that will be used the aggregate method directly.
96
+
97
+
98
+ Attributes
99
+ ----------
100
+ learner : estimator
101
+ Underlying classification model.
102
+ classes : ndarray of shape (n_classes,)
103
+ Unique class labels observed during training.
104
+
105
+
106
+ Examples
107
+ --------
108
+ >>> from mlquantify.adjust_counting import PCC
109
+ >>> import numpy as np
110
+ >>> from sklearn.linear_model import LogisticRegression
111
+ >>> X = np.random.randn(100, 5)
112
+ >>> y = np.random.randint(0, 2, 100)
113
+ >>> q = PCC(learner=LogisticRegression())
114
+ >>> q.fit(X, y)
115
+ >>> q.predict(X)
116
+ {0: 0.48, 1: 0.52}
117
+ >>> q2 = PCC()
118
+ >>> predictions = np.random.rand(200, 2)
119
+ >>> q2.aggregate(predictions)
120
+ {0: 0.50, 1: 0.50}
121
+ """
122
+
123
+ def __init__(self, learner=None):
124
+ super().__init__(learner=learner)
125
+
126
+ def aggregate(self, predictions):
127
+ predictions = validate_predictions(self, predictions)
128
+
129
+ # Handle categorical predictions (1D array with class labels)
130
+ if predictions.ndim == 1 and not np.issubdtype(predictions.dtype, (np.floating, np.integer)):
131
+ self.classes_ = check_classes_attribute(self, np.unique(predictions))
132
+ class_counts = np.array([np.count_nonzero(predictions == _class) for _class in self.classes_])
133
+ prevalences = class_counts / len(predictions)
134
+ else:
135
+ # Handle probability predictions (2D array or 1D probabilities)
136
+ if predictions.ndim == 2:
137
+ self.classes_ = check_classes_attribute(self, np.arange(predictions.shape[1]))
138
+ else:
139
+ self.classes_ = check_classes_attribute(self, np.arange(2))
140
+ prevalences = np.mean(predictions, axis=0) if predictions.ndim == 2 else predictions.mean()
141
+ if predictions.ndim == 1:
142
+ prevalences = np.array([1-prevalences, prevalences])
143
+
144
+ prevalences = validate_prevalences(self, prevalences, self.classes_)
145
+ return prevalences
@@ -0,0 +1,114 @@
1
+ import numpy as np
2
+
3
+
4
+ def compute_table(y, y_pred, classes):
5
+ """
6
+ Compute the confusion matrix table for a binary classification task.
7
+
8
+
9
+ Parameters
10
+ ----------
11
+ y : np.ndarray
12
+ The true labels.
13
+ y_pred : np.ndarray
14
+ The predicted labels.
15
+ classes : np.ndarray
16
+ The unique classes in the dataset.
17
+
18
+
19
+ Returns
20
+ -------
21
+ tuple
22
+ A tuple containing the True Positives, False Positives, False Negatives, and True Negatives.
23
+ """
24
+ TP = np.logical_and(y == y_pred, y == classes[1]).sum()
25
+ FP = np.logical_and(y != y_pred, y == classes[0]).sum()
26
+ FN = np.logical_and(y != y_pred, y == classes[1]).sum()
27
+ TN = np.logical_and(y == y_pred, y == classes[0]).sum()
28
+ return TP, FP, FN, TN
29
+
30
+
31
+ def compute_tpr(TP, FN):
32
+ """
33
+ Compute the True Positive Rate (Recall) for a binary classification task.
34
+
35
+
36
+ Parameters
37
+ ----------
38
+ TP : int
39
+ The number of True Positives.
40
+ FN : int
41
+ The number of False Negatives.
42
+
43
+
44
+ Returns
45
+ -------
46
+ float
47
+ The True Positive Rate (Recall).
48
+ """
49
+ if TP + FN == 0:
50
+ return 0
51
+ return TP / (TP + FN)
52
+
53
+
54
+ def compute_fpr(FP, TN):
55
+ """
56
+ Compute the False Positive Rate for a binary classification task.
57
+
58
+
59
+ Parameters
60
+ ----------
61
+ FP : int
62
+ The number of False Positives.
63
+ TN : int
64
+ The number of True Negatives.
65
+
66
+
67
+ Returns
68
+ -------
69
+ float
70
+ The False Positive Rate.
71
+ """
72
+ if FP + TN == 0:
73
+ return 0
74
+ return FP / (FP + TN)
75
+
76
+
77
+ def evaluate_thresholds (y, probabilities:np.ndarray, classes) -> tuple:
78
+ """
79
+ Adjust the threshold for a binary quantification task to maximize the True Positive Rate.
80
+
81
+
82
+ Parameters
83
+ ----------
84
+ y : np.ndarray
85
+ The true labels.
86
+ probabilities : np.ndarray
87
+ The predicted probabilities.
88
+ classes : np.ndarray
89
+ The unique classes in the dataset.
90
+
91
+
92
+ Returns
93
+ -------
94
+ tuple
95
+ The best True Positive Rate and False Positive Rate.
96
+ """
97
+ unique_scores = np.linspace(0, 1, 101)
98
+
99
+ tprs = []
100
+ fprs = []
101
+
102
+ for threshold in unique_scores:
103
+ y_pred = np.where(probabilities >= threshold, classes[1], classes[0])
104
+
105
+ TP, FP, FN, TN = compute_table(y, y_pred, classes)
106
+
107
+ tpr = compute_tpr(TP, FN)
108
+ fpr = compute_fpr(FP, TN)
109
+
110
+ tprs.append(tpr)
111
+ fprs.append(fpr)
112
+
113
+ #best_tpr, best_fpr = self.adjust_threshold(np.asarray(tprs), np.asarray(fprs))
114
+ return (unique_scores, np.asarray(tprs), np.asarray(fprs))