mlquantify 0.0.11.2__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. mlquantify/__init__.py +32 -6
  2. mlquantify/base.py +559 -257
  3. mlquantify/classification/__init__.py +1 -1
  4. mlquantify/classification/methods.py +160 -0
  5. mlquantify/evaluation/__init__.py +14 -2
  6. mlquantify/evaluation/measures.py +215 -0
  7. mlquantify/evaluation/protocol.py +647 -0
  8. mlquantify/methods/__init__.py +37 -40
  9. mlquantify/methods/aggregative.py +1030 -0
  10. mlquantify/methods/meta.py +472 -0
  11. mlquantify/methods/mixture_models.py +1003 -0
  12. mlquantify/methods/non_aggregative.py +136 -0
  13. mlquantify/methods/threshold_optimization.py +957 -0
  14. mlquantify/model_selection.py +377 -232
  15. mlquantify/plots.py +367 -0
  16. mlquantify/utils/__init__.py +2 -2
  17. mlquantify/utils/general.py +334 -0
  18. mlquantify/utils/method.py +449 -0
  19. {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.1.dist-info}/METADATA +137 -122
  20. mlquantify-0.1.1.dist-info/RECORD +22 -0
  21. {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.1.dist-info}/WHEEL +1 -1
  22. mlquantify/classification/pwkclf.py +0 -73
  23. mlquantify/evaluation/measures/__init__.py +0 -26
  24. mlquantify/evaluation/measures/ae.py +0 -11
  25. mlquantify/evaluation/measures/bias.py +0 -16
  26. mlquantify/evaluation/measures/kld.py +0 -8
  27. mlquantify/evaluation/measures/mse.py +0 -12
  28. mlquantify/evaluation/measures/nae.py +0 -16
  29. mlquantify/evaluation/measures/nkld.py +0 -13
  30. mlquantify/evaluation/measures/nrae.py +0 -16
  31. mlquantify/evaluation/measures/rae.py +0 -12
  32. mlquantify/evaluation/measures/se.py +0 -12
  33. mlquantify/evaluation/protocol/_Protocol.py +0 -202
  34. mlquantify/evaluation/protocol/__init__.py +0 -2
  35. mlquantify/evaluation/protocol/app.py +0 -146
  36. mlquantify/evaluation/protocol/npp.py +0 -34
  37. mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py +0 -62
  38. mlquantify/methods/aggregative/ThreholdOptm/__init__.py +0 -7
  39. mlquantify/methods/aggregative/ThreholdOptm/acc.py +0 -27
  40. mlquantify/methods/aggregative/ThreholdOptm/max.py +0 -23
  41. mlquantify/methods/aggregative/ThreholdOptm/ms.py +0 -21
  42. mlquantify/methods/aggregative/ThreholdOptm/ms2.py +0 -25
  43. mlquantify/methods/aggregative/ThreholdOptm/pacc.py +0 -41
  44. mlquantify/methods/aggregative/ThreholdOptm/t50.py +0 -21
  45. mlquantify/methods/aggregative/ThreholdOptm/x.py +0 -23
  46. mlquantify/methods/aggregative/__init__.py +0 -9
  47. mlquantify/methods/aggregative/cc.py +0 -32
  48. mlquantify/methods/aggregative/emq.py +0 -86
  49. mlquantify/methods/aggregative/fm.py +0 -72
  50. mlquantify/methods/aggregative/gac.py +0 -96
  51. mlquantify/methods/aggregative/gpac.py +0 -87
  52. mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py +0 -81
  53. mlquantify/methods/aggregative/mixtureModels/__init__.py +0 -5
  54. mlquantify/methods/aggregative/mixtureModels/dys.py +0 -55
  55. mlquantify/methods/aggregative/mixtureModels/dys_syn.py +0 -89
  56. mlquantify/methods/aggregative/mixtureModels/hdy.py +0 -46
  57. mlquantify/methods/aggregative/mixtureModels/smm.py +0 -27
  58. mlquantify/methods/aggregative/mixtureModels/sord.py +0 -77
  59. mlquantify/methods/aggregative/pcc.py +0 -33
  60. mlquantify/methods/aggregative/pwk.py +0 -38
  61. mlquantify/methods/meta/__init__.py +0 -1
  62. mlquantify/methods/meta/ensemble.py +0 -236
  63. mlquantify/methods/non_aggregative/__init__.py +0 -1
  64. mlquantify/methods/non_aggregative/hdx.py +0 -71
  65. mlquantify/plots/__init__.py +0 -2
  66. mlquantify/plots/distribution_plot.py +0 -109
  67. mlquantify/plots/protocol_plot.py +0 -193
  68. mlquantify/utils/general_purposes/__init__.py +0 -8
  69. mlquantify/utils/general_purposes/convert_col_to_array.py +0 -13
  70. mlquantify/utils/general_purposes/generate_artificial_indexes.py +0 -29
  71. mlquantify/utils/general_purposes/get_real_prev.py +0 -9
  72. mlquantify/utils/general_purposes/load_quantifier.py +0 -4
  73. mlquantify/utils/general_purposes/make_prevs.py +0 -23
  74. mlquantify/utils/general_purposes/normalize.py +0 -20
  75. mlquantify/utils/general_purposes/parallel.py +0 -10
  76. mlquantify/utils/general_purposes/round_protocol_df.py +0 -14
  77. mlquantify/utils/method_purposes/__init__.py +0 -6
  78. mlquantify/utils/method_purposes/distances.py +0 -21
  79. mlquantify/utils/method_purposes/getHist.py +0 -13
  80. mlquantify/utils/method_purposes/get_scores.py +0 -33
  81. mlquantify/utils/method_purposes/moss.py +0 -16
  82. mlquantify/utils/method_purposes/ternary_search.py +0 -14
  83. mlquantify/utils/method_purposes/tprfpr.py +0 -42
  84. mlquantify-0.0.11.2.dist-info/RECORD +0 -73
  85. {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.1.dist-info}/top_level.txt +0 -0
@@ -1 +1 @@
1
- from .pwkclf import PWKCLF
1
+ from .methods import *
@@ -0,0 +1,160 @@
1
+ from sklearn.neighbors import NearestNeighbors
2
+ from sklearn.base import BaseEstimator
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ class PWKCLF(BaseEstimator):
7
+ """
8
+ Learner based on k-Nearest Neighbors (KNN) to use in the PWK method.
9
+
10
+ This classifier adjusts the influence of neighbors using class weights
11
+ derived from the `alpha` parameter. The `alpha` parameter controls the
12
+ influence of class imbalance.
13
+
14
+ Parameters
15
+ ----------
16
+ alpha : float, default=1
17
+ Controls the influence of class imbalance. Must be >= 1.
18
+
19
+ n_neighbors : int, default=10
20
+ Number of neighbors to use.
21
+
22
+ algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
23
+ Algorithm to compute nearest neighbors.
24
+
25
+ metric : str, default='euclidean'
26
+ Distance metric to use.
27
+
28
+ leaf_size : int, default=30
29
+ Leaf size passed to the tree-based algorithms.
30
+
31
+ p : int, default=2
32
+ Power parameter for the Minkowski metric.
33
+
34
+ metric_params : dict, optional
35
+ Additional keyword arguments for the metric function.
36
+
37
+ n_jobs : int, optional
38
+ Number of parallel jobs to run for neighbors search.
39
+
40
+ Examples
41
+ --------
42
+ >>> from sklearn.datasets import load_breast_cancer
43
+ >>> from sklearn.model_selection import train_test_split
44
+ >>> from mlquantify.methods.aggregative import PWK
45
+ >>> from mlquantify.utils.general import get_real_prev
46
+ >>> from mlquantify.classification import PWKCLF
47
+ >>>
48
+ >>> # Load dataset
49
+ >>> features, target = load_breast_cancer(return_X_y=True)
50
+ >>>
51
+ >>> # Split into training and testing sets
52
+ >>> X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=32)
53
+ >>>
54
+ >>> # Create and configure the PWKCLF learner
55
+ >>> learner = PWKCLF(alpha=1, n_neighbors=10)
56
+ >>>
57
+ >>> # Create the PWK quantifier
58
+ >>> model = PWK(learner=learner)
59
+ >>>
60
+ >>> # Train the model
61
+ >>> model.fit(X_train, y_train)
62
+ >>>
63
+ >>> # Predict prevalences
64
+ >>> y_pred = model.predict(X_test)
65
+ >>>
66
+ >>> # Display results
67
+ >>> print("Real:", get_real_prev(y_test))
68
+ >>> print("PWK:", y_pred)
69
+ """
70
+
71
+ def __init__(self,
72
+ alpha=1,
73
+ n_neighbors=10,
74
+ algorithm="auto",
75
+ metric="euclidean",
76
+ leaf_size=30,
77
+ p=2,
78
+ metric_params=None,
79
+ n_jobs=None):
80
+ if alpha < 1:
81
+ raise ValueError("alpha must not be smaller than 1")
82
+
83
+ self.alpha = alpha
84
+ self.n_neighbors = n_neighbors
85
+
86
+ self.nbrs = NearestNeighbors(n_neighbors=n_neighbors,
87
+ algorithm=algorithm,
88
+ leaf_size=leaf_size,
89
+ metric=metric,
90
+ p=p,
91
+ metric_params=metric_params,
92
+ n_jobs=n_jobs)
93
+
94
+ self.classes_ = None
95
+ self.class_to_index = None
96
+ self.class_weights = None
97
+ self.y_train = None
98
+
99
+ def fit(self, X, y):
100
+ """
101
+ Fit the PWKCLF model to the training data.
102
+
103
+ Parameters
104
+ ----------
105
+ X : array-like of shape (n_samples, n_features)
106
+ Training features.
107
+
108
+ y : array-like of shape (n_samples,)
109
+ Training labels.
110
+
111
+ Returns
112
+ -------
113
+ self : object
114
+ The fitted instance.
115
+ """
116
+ n_samples = X.shape[0]
117
+ if n_samples < self.n_neighbors:
118
+ self.nbrs.set_params(n_neighbors=n_samples)
119
+
120
+ self.y_train = y
121
+
122
+ if isinstance(y, pd.DataFrame):
123
+ self.y_train = y.reset_index(drop=True)
124
+
125
+ unique_classes, class_counts = np.unique(y, return_counts=True)
126
+ self.classes_ = unique_classes
127
+ self.class_to_index = dict(zip(self.classes_, range(len(self.classes_))))
128
+
129
+ min_class_count = np.min(class_counts)
130
+ self.class_weights = (class_counts / min_class_count) ** (-1.0 / self.alpha)
131
+ self.nbrs.fit(X)
132
+ return self
133
+
134
+ def predict(self, X):
135
+ """
136
+ Predict class labels for samples in X.
137
+
138
+ Parameters
139
+ ----------
140
+ X : array-like of shape (n_samples, n_features)
141
+ Input data to predict.
142
+
143
+ Returns
144
+ -------
145
+ y_pred : array of shape (n_samples,)
146
+ Predicted class labels.
147
+ """
148
+ n_samples = X.shape[0]
149
+ nn_indices = self.nbrs.kneighbors(X, return_distance=False)
150
+
151
+ CM = np.zeros((n_samples, len(self.classes_)))
152
+
153
+ for i in range(n_samples):
154
+ for j in nn_indices[i]:
155
+ CM[i, self.class_to_index[self.y_train[j]]] += 1
156
+
157
+ CM = np.multiply(CM, self.class_weights)
158
+ predictions = np.apply_along_axis(np.argmax, axis=1, arr=CM)
159
+
160
+ return self.classes_[predictions]
@@ -1,2 +1,14 @@
1
- from .measures import *
2
- from .protocol import *
1
+ from . import measures
2
+
3
+
4
+ MEASURES = {
5
+ "ae": measures.absolute_error,
6
+ "mae": measures.mean_absolute_error,
7
+ "nae": measures.normalized_absolute_error,
8
+ "kld": measures.kullback_leibler_divergence,
9
+ "nkld": measures.normalized_kullback_leibler_divergence,
10
+ "nrae": measures.normalized_relative_absolute_error,
11
+ "rae": measures.relative_absolute_error,
12
+ "se": measures.squared_error,
13
+ "mse": measures.mean_squared_error
14
+ }
@@ -0,0 +1,215 @@
1
+ import numpy as np
2
+
3
+ def process_inputs(prev_real, prev_pred):
4
+ """
5
+ .. :noindex:
6
+
7
+ Process the input data for internal use.
8
+ """
9
+ if isinstance(prev_real, dict):
10
+ prev_real = np.asarray(list(prev_real.values()))
11
+ if isinstance(prev_pred, dict):
12
+ prev_pred = np.asarray(list(prev_pred.values()))
13
+ return prev_real, prev_pred
14
+
15
+
16
+ def absolute_error(prev_real, prev_pred):
17
+ """
18
+ Compute the absolute error for each class or a dictionary of errors if input is a dictionary.
19
+
20
+ Parameters
21
+ ----------
22
+ prev_real : array-like or dict
23
+ True prevalence values for each class. If a dictionary, keys are class names, and values are prevalences.
24
+
25
+ prev_pred : array-like or dict
26
+ Predicted prevalence values for each class. If a dictionary, keys are class names, and values are prevalences.
27
+
28
+ Returns
29
+ -------
30
+ error : array-like or dict
31
+ Absolute error for each class. If input is a dictionary, returns a dictionary with errors for each class.
32
+ """
33
+ if isinstance(prev_real, dict):
34
+ classes = prev_real.keys()
35
+ prev_real, prev_pred = process_inputs(prev_real, prev_pred)
36
+ abs_errors = np.abs(prev_pred - prev_real)
37
+ return {class_: float(err) for class_, err in zip(classes, abs_errors)}
38
+ prev_real, prev_pred = process_inputs(prev_real, prev_pred)
39
+ return np.abs(prev_pred - prev_real)
40
+
41
+
42
+
43
+ def mean_absolute_error(prev_real, prev_pred):
44
+ """
45
+ Compute the mean absolute error between the real and predicted prevalences.
46
+
47
+ Parameters
48
+ ----------
49
+ prev_real : array-like of shape (n_classes,)
50
+ True prevalence values for each class.
51
+
52
+ prev_pred : array-like of shape (n_classes,)
53
+ Predicted prevalence values for each class.
54
+
55
+ Returns
56
+ -------
57
+ error : float
58
+ Mean absolute error across all classes.
59
+ """
60
+ prev_real, prev_pred = process_inputs(prev_real, prev_pred)
61
+ return np.mean(absolute_error(prev_real, prev_pred))
62
+
63
+
64
+ def kullback_leibler_divergence(prev_real, prev_pred):
65
+ """
66
+ Compute the Kullback-Leibler divergence between the real and predicted prevalences.
67
+
68
+ Parameters
69
+ ----------
70
+ prev_real : array-like of shape (n_classes,)
71
+ True prevalence values for each class.
72
+
73
+ prev_pred : array-like of shape (n_classes,)
74
+ Predicted prevalence values for each class.
75
+
76
+ Returns
77
+ -------
78
+ divergence : array-like of shape (n_classes,)
79
+ Kullback-Leibler divergence for each class.
80
+ """
81
+ prev_real, prev_pred = process_inputs(prev_real, prev_pred)
82
+ return prev_real * np.abs(np.log(prev_real / prev_pred))
83
+
84
+
85
+ def squared_error(prev_real, prev_pred):
86
+ """
87
+ Compute the mean squared error between the real and predicted prevalences.
88
+
89
+ Parameters
90
+ ----------
91
+ prev_real : array-like of shape (n_classes,)
92
+ True prevalence values for each class.
93
+
94
+ prev_pred : array-like of shape (n_classes,)
95
+ Predicted prevalence values for each class.
96
+
97
+ Returns
98
+ -------
99
+ error : float
100
+ Mean squared error across all classes.
101
+ """
102
+ prev_real, prev_pred = process_inputs(prev_real, prev_pred)
103
+ return np.mean((prev_pred - prev_real) ** 2, axis=-1)
104
+
105
+
106
+ def mean_squared_error(prev_real, prev_pred):
107
+ """
108
+ Compute the mean squared error across all classes.
109
+
110
+ Parameters
111
+ ----------
112
+ prev_real : array-like of shape (n_classes,)
113
+ True prevalence values for each class.
114
+
115
+ prev_pred : array-like of shape (n_classes,)
116
+ Predicted prevalence values for each class.
117
+
118
+ Returns
119
+ -------
120
+ mse : float
121
+ Mean squared error across all classes.
122
+ """
123
+ prev_real, prev_pred = process_inputs(prev_real, prev_pred)
124
+ return squared_error(prev_real, prev_pred).mean()
125
+
126
+
127
+ def normalized_absolute_error(prev_real, prev_pred):
128
+ """
129
+ Compute the normalized absolute error between the real and predicted prevalences.
130
+
131
+ Parameters
132
+ ----------
133
+ prev_real : array-like of shape (n_classes,)
134
+ True prevalence values for each class.
135
+
136
+ prev_pred : array-like of shape (n_classes,)
137
+ Predicted prevalence values for each class.
138
+
139
+ Returns
140
+ -------
141
+ error : float
142
+ Normalized absolute error across all classes.
143
+ """
144
+ prev_real, prev_pred = process_inputs(prev_real, prev_pred)
145
+ abs_error = mean_absolute_error(prev_real, prev_pred)
146
+ z_abs_error = 2 * (1 - np.min(prev_real))
147
+ return abs_error / z_abs_error
148
+
149
+
150
+ def normalized_kullback_leibler_divergence(prev_real, prev_pred):
151
+ """
152
+ Compute the normalized Kullback-Leibler divergence between the real and predicted prevalences.
153
+
154
+ Parameters
155
+ ----------
156
+ prev_real : array-like of shape (n_classes,)
157
+ True prevalence values for each class.
158
+
159
+ prev_pred : array-like of shape (n_classes,)
160
+ Predicted prevalence values for each class.
161
+
162
+ Returns
163
+ -------
164
+ divergence : float
165
+ Normalized Kullback-Leibler divergence across all classes.
166
+ """
167
+ prev_real, prev_pred = process_inputs(prev_real, prev_pred)
168
+ kl_divergence = kullback_leibler_divergence(prev_real, prev_pred)
169
+ euler = np.exp(kl_divergence)
170
+ return 2 * (euler / (euler + 1)) - 1
171
+
172
+
173
+ def relative_absolute_error(prev_real, prev_pred):
174
+ """
175
+ Compute the relative absolute error between the real and predicted prevalences.
176
+
177
+ Parameters
178
+ ----------
179
+ prev_real : array-like of shape (n_classes,)
180
+ True prevalence values for each class.
181
+
182
+ prev_pred : array-like of shape (n_classes,)
183
+ Predicted prevalence values for each class.
184
+
185
+ Returns
186
+ -------
187
+ error : float
188
+ Relative absolute error across all classes.
189
+ """
190
+ prev_real, prev_pred = process_inputs(prev_real, prev_pred)
191
+ return (mean_absolute_error(prev_real, prev_pred) / prev_real).mean(axis=-1)
192
+
193
+
194
+ def normalized_relative_absolute_error(prev_real, prev_pred):
195
+ """
196
+ Compute the normalized relative absolute error between the real and predicted prevalences.
197
+
198
+ Parameters
199
+ ----------
200
+ prev_real : array-like of shape (n_classes,)
201
+ True prevalence values for each class.
202
+
203
+ prev_pred : array-like of shape (n_classes,)
204
+ Predicted prevalence values for each class.
205
+
206
+ Returns
207
+ -------
208
+ error : float
209
+ Normalized relative absolute error across all classes.
210
+ """
211
+ prev_real, prev_pred = process_inputs(prev_real, prev_pred)
212
+ relative = relative_absolute_error(prev_real, prev_pred)
213
+ z_relative = (len(prev_real) - 1 + ((1 - np.min(prev_real)) / np.min(prev_real))) / len(prev_real)
214
+ return relative / z_relative
215
+