mlquantify 0.0.11.7__tar.gz → 0.0.11.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {mlquantify-0.0.11.7 → mlquantify-0.0.11.8}/PKG-INFO +1 -1
  2. mlquantify-0.0.11.8/mlquantify/__init__.py +9 -0
  3. mlquantify-0.0.11.8/mlquantify/base.py +504 -0
  4. mlquantify-0.0.11.8/mlquantify/classification/__init__.py +1 -0
  5. mlquantify-0.0.11.8/mlquantify/classification/methods.py +160 -0
  6. mlquantify-0.0.11.8/mlquantify/evaluation/__init__.py +14 -0
  7. mlquantify-0.0.11.8/mlquantify/evaluation/measures.py +215 -0
  8. mlquantify-0.0.11.8/mlquantify/evaluation/protocol.py +641 -0
  9. {mlquantify-0.0.11.7 → mlquantify-0.0.11.8}/mlquantify/methods/__init__.py +4 -7
  10. mlquantify-0.0.11.8/mlquantify/methods/aggregative.py +1031 -0
  11. mlquantify-0.0.11.8/mlquantify/methods/meta.py +472 -0
  12. mlquantify-0.0.11.8/mlquantify/methods/mixture_models.py +1000 -0
  13. mlquantify-0.0.11.8/mlquantify/methods/non_aggregative.py +136 -0
  14. mlquantify-0.0.11.8/mlquantify/methods/threshold_optimization.py +953 -0
  15. mlquantify-0.0.11.8/mlquantify/model_selection.py +377 -0
  16. mlquantify-0.0.11.8/mlquantify/plots.py +367 -0
  17. mlquantify-0.0.11.8/mlquantify/utils/__init__.py +2 -0
  18. mlquantify-0.0.11.8/mlquantify/utils/general.py +334 -0
  19. mlquantify-0.0.11.8/mlquantify/utils/method.py +449 -0
  20. {mlquantify-0.0.11.7 → mlquantify-0.0.11.8}/mlquantify.egg-info/PKG-INFO +1 -1
  21. mlquantify-0.0.11.8/mlquantify.egg-info/SOURCES.txt +25 -0
  22. {mlquantify-0.0.11.7 → mlquantify-0.0.11.8}/setup.py +1 -1
  23. mlquantify-0.0.11.7/mlquantify/__init__.py +0 -6
  24. mlquantify-0.0.11.7/mlquantify/base.py +0 -257
  25. mlquantify-0.0.11.7/mlquantify/classification/__init__.py +0 -1
  26. mlquantify-0.0.11.7/mlquantify/classification/pwkclf.py +0 -73
  27. mlquantify-0.0.11.7/mlquantify/evaluation/__init__.py +0 -2
  28. mlquantify-0.0.11.7/mlquantify/evaluation/measures/__init__.py +0 -26
  29. mlquantify-0.0.11.7/mlquantify/evaluation/measures/ae.py +0 -11
  30. mlquantify-0.0.11.7/mlquantify/evaluation/measures/bias.py +0 -16
  31. mlquantify-0.0.11.7/mlquantify/evaluation/measures/kld.py +0 -8
  32. mlquantify-0.0.11.7/mlquantify/evaluation/measures/mse.py +0 -12
  33. mlquantify-0.0.11.7/mlquantify/evaluation/measures/nae.py +0 -16
  34. mlquantify-0.0.11.7/mlquantify/evaluation/measures/nkld.py +0 -13
  35. mlquantify-0.0.11.7/mlquantify/evaluation/measures/nrae.py +0 -16
  36. mlquantify-0.0.11.7/mlquantify/evaluation/measures/rae.py +0 -12
  37. mlquantify-0.0.11.7/mlquantify/evaluation/measures/se.py +0 -12
  38. mlquantify-0.0.11.7/mlquantify/evaluation/protocol/_Protocol.py +0 -202
  39. mlquantify-0.0.11.7/mlquantify/evaluation/protocol/__init__.py +0 -2
  40. mlquantify-0.0.11.7/mlquantify/evaluation/protocol/app.py +0 -146
  41. mlquantify-0.0.11.7/mlquantify/evaluation/protocol/npp.py +0 -34
  42. mlquantify-0.0.11.7/mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py +0 -62
  43. mlquantify-0.0.11.7/mlquantify/methods/aggregative/ThreholdOptm/__init__.py +0 -7
  44. mlquantify-0.0.11.7/mlquantify/methods/aggregative/ThreholdOptm/acc.py +0 -27
  45. mlquantify-0.0.11.7/mlquantify/methods/aggregative/ThreholdOptm/max.py +0 -23
  46. mlquantify-0.0.11.7/mlquantify/methods/aggregative/ThreholdOptm/ms.py +0 -21
  47. mlquantify-0.0.11.7/mlquantify/methods/aggregative/ThreholdOptm/ms2.py +0 -25
  48. mlquantify-0.0.11.7/mlquantify/methods/aggregative/ThreholdOptm/pacc.py +0 -41
  49. mlquantify-0.0.11.7/mlquantify/methods/aggregative/ThreholdOptm/t50.py +0 -21
  50. mlquantify-0.0.11.7/mlquantify/methods/aggregative/ThreholdOptm/x.py +0 -23
  51. mlquantify-0.0.11.7/mlquantify/methods/aggregative/__init__.py +0 -9
  52. mlquantify-0.0.11.7/mlquantify/methods/aggregative/cc.py +0 -32
  53. mlquantify-0.0.11.7/mlquantify/methods/aggregative/emq.py +0 -86
  54. mlquantify-0.0.11.7/mlquantify/methods/aggregative/fm.py +0 -72
  55. mlquantify-0.0.11.7/mlquantify/methods/aggregative/gac.py +0 -89
  56. mlquantify-0.0.11.7/mlquantify/methods/aggregative/gpac.py +0 -78
  57. mlquantify-0.0.11.7/mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py +0 -80
  58. mlquantify-0.0.11.7/mlquantify/methods/aggregative/mixtureModels/__init__.py +0 -5
  59. mlquantify-0.0.11.7/mlquantify/methods/aggregative/mixtureModels/dys.py +0 -107
  60. mlquantify-0.0.11.7/mlquantify/methods/aggregative/mixtureModels/dys_syn.py +0 -136
  61. mlquantify-0.0.11.7/mlquantify/methods/aggregative/mixtureModels/hdy.py +0 -83
  62. mlquantify-0.0.11.7/mlquantify/methods/aggregative/mixtureModels/smm.py +0 -27
  63. mlquantify-0.0.11.7/mlquantify/methods/aggregative/mixtureModels/sord.py +0 -77
  64. mlquantify-0.0.11.7/mlquantify/methods/aggregative/pcc.py +0 -33
  65. mlquantify-0.0.11.7/mlquantify/methods/aggregative/pwk.py +0 -38
  66. mlquantify-0.0.11.7/mlquantify/methods/meta/__init__.py +0 -1
  67. mlquantify-0.0.11.7/mlquantify/methods/meta/ensemble.py +0 -236
  68. mlquantify-0.0.11.7/mlquantify/methods/non_aggregative/__init__.py +0 -1
  69. mlquantify-0.0.11.7/mlquantify/methods/non_aggregative/hdx.py +0 -71
  70. mlquantify-0.0.11.7/mlquantify/model_selection.py +0 -232
  71. mlquantify-0.0.11.7/mlquantify/plots/__init__.py +0 -2
  72. mlquantify-0.0.11.7/mlquantify/plots/distribution_plot.py +0 -149
  73. mlquantify-0.0.11.7/mlquantify/plots/protocol_plot.py +0 -193
  74. mlquantify-0.0.11.7/mlquantify/utils/__init__.py +0 -2
  75. mlquantify-0.0.11.7/mlquantify/utils/general_purposes/__init__.py +0 -8
  76. mlquantify-0.0.11.7/mlquantify/utils/general_purposes/convert_col_to_array.py +0 -13
  77. mlquantify-0.0.11.7/mlquantify/utils/general_purposes/generate_artificial_indexes.py +0 -29
  78. mlquantify-0.0.11.7/mlquantify/utils/general_purposes/get_real_prev.py +0 -9
  79. mlquantify-0.0.11.7/mlquantify/utils/general_purposes/load_quantifier.py +0 -4
  80. mlquantify-0.0.11.7/mlquantify/utils/general_purposes/make_prevs.py +0 -23
  81. mlquantify-0.0.11.7/mlquantify/utils/general_purposes/normalize.py +0 -20
  82. mlquantify-0.0.11.7/mlquantify/utils/general_purposes/parallel.py +0 -10
  83. mlquantify-0.0.11.7/mlquantify/utils/general_purposes/round_protocol_df.py +0 -14
  84. mlquantify-0.0.11.7/mlquantify/utils/method_purposes/__init__.py +0 -6
  85. mlquantify-0.0.11.7/mlquantify/utils/method_purposes/distances.py +0 -21
  86. mlquantify-0.0.11.7/mlquantify/utils/method_purposes/getHist.py +0 -13
  87. mlquantify-0.0.11.7/mlquantify/utils/method_purposes/get_scores.py +0 -33
  88. mlquantify-0.0.11.7/mlquantify/utils/method_purposes/moss.py +0 -16
  89. mlquantify-0.0.11.7/mlquantify/utils/method_purposes/ternary_search.py +0 -14
  90. mlquantify-0.0.11.7/mlquantify/utils/method_purposes/tprfpr.py +0 -42
  91. mlquantify-0.0.11.7/mlquantify.egg-info/SOURCES.txt +0 -76
  92. {mlquantify-0.0.11.7 → mlquantify-0.0.11.8}/README.md +0 -0
  93. {mlquantify-0.0.11.7 → mlquantify-0.0.11.8}/mlquantify.egg-info/dependency_links.txt +0 -0
  94. {mlquantify-0.0.11.7 → mlquantify-0.0.11.8}/mlquantify.egg-info/requires.txt +0 -0
  95. {mlquantify-0.0.11.7 → mlquantify-0.0.11.8}/mlquantify.egg-info/top_level.txt +0 -0
  96. {mlquantify-0.0.11.7 → mlquantify-0.0.11.8}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mlquantify
3
- Version: 0.0.11.7
3
+ Version: 0.0.11.8
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -0,0 +1,9 @@
1
+ "mlquantify, a Python package for quantification"
2
+
3
+ from . import base
4
+ from . import model_selection
5
+ from . import plots
6
+ from . import classification
7
+ from . import evaluation
8
+ from . import methods
9
+ from . import utils
@@ -0,0 +1,504 @@
1
+ from abc import abstractmethod, ABC
2
+ from sklearn.base import BaseEstimator
3
+ from copy import deepcopy
4
+ import numpy as np
5
+ import joblib
6
+
7
+
8
+ from .utils.general import parallel, normalize_prevalence
9
+
10
+ class Quantifier(ABC, BaseEstimator):
11
+ """Base class for all quantifiers, it defines the basic structure of a quantifier.
12
+
13
+ Warning: Inheriting from this class does not provide dynamic use of multiclass or binary methods, it is necessary to implement the logic in the quantifier itself. If you want to use this feature, inherit from AggregativeQuantifier or NonAggregativeQuantifier.
14
+
15
+ Inheriting from this class, it provides the following implementations:
16
+
17
+ - properties for classes, n_class, is_multiclass and binary_data.
18
+ - save_quantifier method to save the quantifier
19
+
20
+ Read more in the :ref:`User Guide <creating_your_own_quantifier>`.
21
+
22
+
23
+ Notes
24
+ -----
25
+ It's recommended to inherit from AggregativeQuantifier or NonAggregativeQuantifier, as they provide more functionality and flexibility for quantifiers.
26
+ """
27
+
28
+ @abstractmethod
29
+ def fit(self, X, y) -> object: ...
30
+
31
+ @abstractmethod
32
+ def predict(self, X) -> dict: ...
33
+
34
+ @property
35
+ def classes(self) -> list:
36
+ return self._classes
37
+
38
+ @classes.setter
39
+ def classes(self, classes):
40
+ self._classes = sorted(list(classes))
41
+
42
+ @property
43
+ def n_class(self) -> list:
44
+ return len(self._classes)
45
+
46
+ @property
47
+ def is_multiclass(self) -> bool:
48
+ return True
49
+
50
+ @property
51
+ def binary_data(self) -> bool:
52
+ return len(self._classes) == 2
53
+
54
+
55
+ def save_quantifier(self, path: str=None) -> None:
56
+ if not path:
57
+ path = f"{self.__class__.__name__}.joblib"
58
+ joblib.dump(self, path)
59
+
60
+
61
+
62
+ class AggregativeQuantifier(Quantifier, ABC):
63
+ """A base class for aggregative quantifiers.
64
+
65
+ This class provides the basic structure for aggregative quantifiers, which are quantifiers that aggregates a classifier or learner inside to generate predictions.
66
+
67
+ Inheriting from this class, it provides dynamic prediction for multiclass and binary data, making one-vs-all strategy for multiclass data with binary quantifiers.
68
+
69
+ Read more in the :ref:`User Guide <creating_your_own_quantifier>`.
70
+
71
+
72
+ Notes
73
+ -----
74
+ All quantifiers should specify at least the learner attribute. Wich should inherit from BaseEstimator of scikit-learn.
75
+
76
+ All quantifiers can return a dictionary with class:prevalence, a list or a numpy array.
77
+
78
+
79
+ Examples
80
+ --------
81
+ Example 1: Multiclass Quantifier
82
+ >>> from mlquantify.base import AggregativeQuantifier
83
+ >>> from mlquantify.utils.general import get_real_prev
84
+ >>> from sklearn.ensemble import RandomForestClassifier
85
+ >>> from sklearn.model_selection import train_test_split
86
+ >>> import numpy as np
87
+ >>> class MyQuantifier(AggregativeQuantifier):
88
+ ... def __init__(self, learner, *, param):
89
+ ... self.learner = learner
90
+ ... self.param = param
91
+ ... def _fit_method(self, X, y):
92
+ ... self.learner.fit(X, y)
93
+ ... return self
94
+ ... def _predict_method(self, X):
95
+ ... predicted_labels = self.learner.predict(X)
96
+ ... class_counts = np.array([np.count_nonzero(predicted_labels == _class) for _class in self.classes])
97
+ ... return class_counts / len(predicted_labels)
98
+ >>> quantifier = MyQuantifier(learner=RandomForestClassifier(), param=1)
99
+ >>> quantifier.get_params(deep=False)
100
+ {'learner': RandomForestClassifier(), 'param': 1}
101
+ >>> # Sample data
102
+ >>> X = np.array([[0.1, 0.2], [0.2, 0.1], [0.3, 0.4], [0.4, 0.3],
103
+ ... [0.5, 0.6], [0.6, 0.5], [0.7, 0.8], [0.8, 0.7],
104
+ ... [0.9, 1.0], [1.0, 0.9]])
105
+ >>> y = np.array([0, 0, 0, 1, 0, 1, 0, 1, 0, 1]) # 40% positive (4 out of 10)
106
+ >>> # Split the data into training and testing sets
107
+ >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
108
+ >>> # Fit the quantifier
109
+ >>> quantifier.fit(X_train, y_train)
110
+ None
111
+ >>> # Real prevalence in the training set
112
+ >>> get_real_prev(y_train)
113
+ {0: 0.5714285714285714, 1: 0.42857142857142855}
114
+ >>> # Predicted prevalence in the test set
115
+ >>> quantifier.predict(X_test)
116
+ {0: 0.6666666666666666, 1: 0.3333333333333333}
117
+
118
+ Example 2: Binary Quantifier
119
+ >>> from sklearn.svm import SVC
120
+ >>> class BinaryQuantifier(AggregativeQuantifier):
121
+ ... @property
122
+ ... def is_multiclass(self):
123
+ ... return False
124
+ ... def __init__(self, learner):
125
+ ... self.learner = learner
126
+ ... def _fit_method(self, X, y):
127
+ ... self.learner.fit(X, y)
128
+ ... return self
129
+ ... def _predict_method(self, X):
130
+ ... predicted_labels = self.learner.predict(X)
131
+ ... class_counts = np.array([np.count_nonzero(predicted_labels == _class) for _class in self.classes])
132
+ ... return class_counts / len(predicted_labels)
133
+ >>> binary_quantifier = BinaryQuantifier(learner=SVC(probability=True))
134
+ >>> # Sample multiclass data
135
+ >>> X = np.array([
136
+ ... [0.1, 0.2], [0.2, 0.1], [0.3, 0.4], [0.4, 0.3],
137
+ ... [0.5, 0.6], [0.6, 0.5], [0.7, 0.8], [0.8, 0.7],
138
+ ... [0.9, 1.0], [1.0, 0.9], [1.1, 1.2], [1.2, 1.1],
139
+ ... [1.3, 1.4], [1.4, 1.3], [1.5, 1.6], [1.6, 1.5],
140
+ ... [1.7, 1.8], [1.8, 1.7], [1.9, 2.0], [2.0, 1.9]
141
+ ... ])
142
+ >>> # Update the labels to include a third class
143
+ >>> y = np.array([0, 0, 0, 1, 0, 1, 0, 1, 2, 2, 0, 1, 0, 1, 0, 1, 2, 2, 0, 1])
144
+ >>> # Split the data into training and testing sets
145
+ >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
146
+ >>> # Fit the binary quantifier
147
+ >>> binary_quantifier.fit(X_train, y_train)
148
+ None
149
+ >>> # Real prevalence in the training set
150
+ >>> get_real_prev(y_test)
151
+ {0: 0.25, 1: 0.5, 2: 0.25}
152
+ >>> preds = binary_quantifier.predict(X_test)
153
+ >>> preds
154
+ {0: 1.0, 1: 0.0, 2: 0.0}
155
+ """
156
+
157
+
158
+ def __init__(self):
159
+ # Dictionary to hold binary quantifiers for each class.
160
+ self.binary_quantifiers = {}
161
+ self.learner_fitted = False
162
+ self.cv_folds = 10
163
+
164
+ def fit(self, X, y, learner_fitted=False, cv_folds: int = 10, n_jobs:int=1):
165
+ """Fit the quantifier model.
166
+
167
+ Parameters
168
+ ----------
169
+ X : array-like
170
+ Training features.
171
+ y : array-like
172
+ Training labels.
173
+ learner_fitted : bool, default=False
174
+ Whether the learner is already fitted.
175
+ cv_folds : int, default=10
176
+ Number of cross-validation folds.
177
+ n_jobs : int, default=1
178
+ Number of parallel jobs to run.
179
+
180
+
181
+ Returns
182
+ -------
183
+ self : object
184
+ The fitted quantifier instance.
185
+
186
+
187
+ Notes
188
+ -----
189
+ The model dynamically determines whether to perform one-vs-all classification or
190
+ to directly fit the data based on the type of the problem:
191
+ - If the data is binary or inherently multiclass, the model fits directly using
192
+ `_fit_method` without creating binary quantifiers.
193
+ - For other cases, the model creates one binary quantifier per class using the
194
+ one-vs-all approach, allowing for dynamic prediction based on the provided dataset.
195
+ """
196
+
197
+ self.n_jobs = n_jobs
198
+ self.learner_fitted = learner_fitted
199
+ self.cv_folds = cv_folds
200
+
201
+ self.classes = np.unique(y)
202
+
203
+ if self.binary_data or self.is_multiclass:
204
+ return self._fit_method(X, y)
205
+
206
+ # Making one vs all
207
+ self.binary_quantifiers = {class_: deepcopy(self) for class_ in self.classes}
208
+ parallel(self.delayed_fit, self.classes, self.n_jobs, X, y)
209
+
210
+ return self
211
+
212
+ def predict(self, X) -> dict:
213
+ """Predict class prevalences for the given data.
214
+
215
+ Parameters
216
+ ----------
217
+ X : array-like
218
+ Test features.
219
+
220
+ Returns
221
+ -------
222
+ dict
223
+ A dictionary where keys are class labels and values are their predicted prevalences.
224
+
225
+ Notes
226
+ -----
227
+ The prediction approach is dynamically chosen based on the data type:
228
+ - For binary or inherently multiclass data, the model uses `_predict_method` to directly
229
+ estimate class prevalences.
230
+ - For other cases, the model performs one-vs-all prediction, where each binary quantifier
231
+ estimates the prevalence of its respective class. The results are then normalized to
232
+ ensure they form valid proportions.
233
+ """
234
+
235
+ if self.binary_data or self.is_multiclass:
236
+ prevalences = self._predict_method(X)
237
+ return normalize_prevalence(prevalences, self.classes)
238
+
239
+ # Making one vs all
240
+ prevalences = np.asarray(parallel(self.delayed_predict, self.classes, self.n_jobs, X))
241
+ return normalize_prevalence(prevalences, self.classes)
242
+
243
+ @abstractmethod
244
+ def _fit_method(self, X, y):
245
+ """Abstract fit method that each aggregative quantification method must implement.
246
+
247
+ Parameters
248
+ ----------
249
+ X : array-like
250
+ Training features.
251
+ y : array-like
252
+ Training labels.
253
+ """
254
+ ...
255
+
256
+ @abstractmethod
257
+ def _predict_method(self, X) -> dict:
258
+ """Abstract predict method that each aggregative quantification method must implement.
259
+
260
+ Parameters
261
+ ----------
262
+ X : array-like
263
+ Test data to generate class prevalences.
264
+
265
+ Returns
266
+ -------
267
+ dict, list, or numpy array
268
+ The predicted prevalences, which can be a dictionary where keys are class labels
269
+ and values are their predicted prevalences, a list, or a numpy array.
270
+ """
271
+
272
+ ...
273
+
274
+ @property
275
+ def learner(self):
276
+ """Returns the learner_ object.
277
+ Returns
278
+ -------
279
+ learner_ : object
280
+ The learner_ object stored in the class instance.
281
+ """
282
+ return self.learner_
283
+
284
+ @learner.setter
285
+ def learner(self, value):
286
+ """
287
+ Sets the learner attribute.
288
+ Parameters:
289
+ value : any
290
+ The value to be assigned to the learner_ attribute.
291
+ """
292
+
293
+ self.learner_ = value
294
+
295
+
296
+ def set_params(self, **params):
297
+ """
298
+ Set the parameters of this estimator.
299
+ The method allows setting parameters for both the model and the learner.
300
+ Parameters that match the model's attributes will be set directly on the model.
301
+ Parameters prefixed with 'learner__' will be set on the learner if it exists.
302
+ Parameters:
303
+ -----------
304
+ **params : dict
305
+ Dictionary of parameters to set. Keys can be model attribute names or
306
+ 'learner__' prefixed names for learner parameters.
307
+ Returns:
308
+ --------
309
+ self : Quantifier
310
+ Returns the instance of the quantifier with updated parameters itself.
311
+ """
312
+
313
+
314
+ # Model Params
315
+ for key, value in params.items():
316
+ if hasattr(self, key):
317
+ setattr(self, key, value)
318
+
319
+ # Learner Params
320
+ if self.learner is not None:
321
+ learner_params = {k.replace('learner__', ''): v for k, v in params.items() if 'learner__' in k}
322
+ if learner_params:
323
+ self.learner.set_params(**learner_params)
324
+
325
+ return self
326
+
327
+
328
+ # MULTICLASS METHODS
329
+
330
+ def delayed_fit(self, class_, X, y):
331
+ """Delayed fit method for one-vs-all strategy, with parallel execution.
332
+
333
+ Parameters
334
+ ----------
335
+ class_ : Any
336
+ The class for which the model is being fitted.
337
+ X : array-like
338
+ Training features.
339
+ y : array-like
340
+ Training labels.
341
+
342
+ Returns
343
+ -------
344
+ self : object
345
+ Fitted binary quantifier for the given class.
346
+ """
347
+
348
+ y_class = (y == class_).astype(int)
349
+ return self.binary_quantifiers[class_].fit(X, y_class)
350
+
351
+ def delayed_predict(self, class_, X):
352
+ """Delayed predict method for one-vs-all strategy, with parallel execution.
353
+
354
+ Parameters
355
+ ----------
356
+ class_ : Any
357
+ The class for which the model is making predictions.
358
+ X : array-like
359
+ Test features.
360
+
361
+ Returns
362
+ -------
363
+ float
364
+ Predicted prevalence for the given class.
365
+ """
366
+
367
+ return self.binary_quantifiers[class_].predict(X)[1]
368
+
369
+
370
+ class NonAggregativeQuantifier(Quantifier):
371
+ """Abstract base class for non-aggregative quantifiers.
372
+
373
+ Non-aggregative quantifiers differ from aggregative quantifiers as they do not use
374
+ an underlying classifier or specific learner for their predictions.
375
+
376
+ This class defines the general structure and behavior for non-aggregative quantifiers,
377
+ including support for multiclass data and dynamic handling of binary and multiclass problems.
378
+
379
+ Notes
380
+ -----
381
+ This class requires implementing the `_fit_method` and `_predict_method` in subclasses
382
+ to define how the quantification is performed. These methods handle the core logic for
383
+ fitting and predicting class prevalences.
384
+
385
+ Examples
386
+ --------
387
+ >>> from myquantify.base import NonAggregativeQuantifier
388
+ >>> import numpy as np
389
+ >>> class MyNonAggregativeQuantifier(NonAggregativeQuantifier):
390
+ ... def _fit_method(self, X, y):
391
+ ... # Custom logic for fitting
392
+ ... pass
393
+ ... def _predict_method(self, X):
394
+ ... # Custom logic for predicting
395
+ ... return {0: 0.5, 1: 0.5}
396
+ >>> quantifier = MyNonAggregativeQuantifier()
397
+ >>> X = np.random.rand(10, 2)
398
+ >>> y = np.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
399
+ >>> quantifier.fit(X, y)
400
+ <MyNonAggregativeQuantifier>
401
+ >>> quantifier.predict(X)
402
+ {0: 0.5, 1: 0.5}
403
+ """
404
+
405
+ def fit(self, X, y, n_jobs: int = 1):
406
+ """Fit the quantifier model to the training data.
407
+
408
+ Parameters
409
+ ----------
410
+ X : array-like
411
+ Training features.
412
+ y : array-like
413
+ Training labels.
414
+ n_jobs : int, default=1
415
+ Number of parallel jobs to run.
416
+
417
+ Returns
418
+ -------
419
+ self : NonAggregativeQuantifier
420
+ The fitted quantifier instance.
421
+
422
+ Notes
423
+ -----
424
+ - For binary or inherently multiclass data, the model directly calls `_fit_method`
425
+ to process the data.
426
+ - For other cases, it creates one quantifier per class using a one-vs-all strategy
427
+ and fits each quantifier independently in parallel.
428
+ """
429
+ self.n_jobs = n_jobs
430
+ self.classes = np.unique(y)
431
+ if self.binary_data or self.is_multiclass:
432
+ return self._fit_method(X, y)
433
+
434
+ # One-vs-all approach
435
+ self.binary_quantifiers = {class_: deepcopy(self) for class_ in self.classes}
436
+ parallel(self.delayed_fit, self.classes, self.n_jobs, X, y)
437
+ return self
438
+
439
+ def predict(self, X) -> dict:
440
+ """Predict class prevalences for the given data.
441
+
442
+ Parameters
443
+ ----------
444
+ X : array-like
445
+ Test features.
446
+
447
+ Returns
448
+ -------
449
+ dict
450
+ A dictionary where keys are class labels and values are their predicted prevalences.
451
+
452
+ Notes
453
+ -----
454
+ - For binary or inherently multiclass data, the model directly calls `_predict_method`.
455
+ - For other cases, it performs one-vs-all prediction, combining the results into a normalized
456
+ dictionary of class prevalences.
457
+ """
458
+ if self.binary_data or self.is_multiclass:
459
+ prevalences = self._predict_method(X)
460
+ return normalize_prevalence(prevalences, self.classes)
461
+
462
+ # One-vs-all approach
463
+ prevalences = np.asarray(parallel(self.delayed_predict, self.classes, self.n_jobs, X))
464
+ return normalize_prevalence(prevalences, self.classes)
465
+
466
+ @abstractmethod
467
+ def _fit_method(self, X, y):
468
+ """Abstract method for fitting the quantifier.
469
+
470
+ Parameters
471
+ ----------
472
+ X : array-like
473
+ Training features.
474
+ y : array-like
475
+ Training labels.
476
+
477
+ Notes
478
+ -----
479
+ This method must be implemented in subclasses to define the fitting logic for
480
+ the non-aggregative quantifier.
481
+ """
482
+ ...
483
+
484
+ @abstractmethod
485
+ def _predict_method(self, X) -> dict:
486
+ """Abstract method for predicting class prevalences.
487
+
488
+ Parameters
489
+ ----------
490
+ X : array-like
491
+ Test features.
492
+
493
+ Returns
494
+ -------
495
+ dict, list, or numpy array
496
+ The predicted prevalences, which can be a dictionary where keys are class labels
497
+ and values are their predicted prevalences, a list, or a numpy array.
498
+
499
+ Notes
500
+ -----
501
+ This method must be implemented in subclasses to define the prediction logic for
502
+ the non-aggregative quantifier.
503
+ """
504
+ ...
@@ -0,0 +1 @@
1
+ from .methods import *
@@ -0,0 +1,160 @@
1
+ from sklearn.neighbors import NearestNeighbors
2
+ from sklearn.base import BaseEstimator
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ class PWKCLF(BaseEstimator):
7
+ """
8
+ Learner based on k-Nearest Neighbors (KNN) to use in the PWK method.
9
+
10
+ This classifier adjusts the influence of neighbors using class weights
11
+ derived from the `alpha` parameter. The `alpha` parameter controls the
12
+ influence of class imbalance.
13
+
14
+ Parameters
15
+ ----------
16
+ alpha : float, default=1
17
+ Controls the influence of class imbalance. Must be >= 1.
18
+
19
+ n_neighbors : int, default=10
20
+ Number of neighbors to use.
21
+
22
+ algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
23
+ Algorithm to compute nearest neighbors.
24
+
25
+ metric : str, default='euclidean'
26
+ Distance metric to use.
27
+
28
+ leaf_size : int, default=30
29
+ Leaf size passed to the tree-based algorithms.
30
+
31
+ p : int, default=2
32
+ Power parameter for the Minkowski metric.
33
+
34
+ metric_params : dict, optional
35
+ Additional keyword arguments for the metric function.
36
+
37
+ n_jobs : int, optional
38
+ Number of parallel jobs to run for neighbors search.
39
+
40
+ Examples
41
+ --------
42
+ >>> from sklearn.datasets import load_breast_cancer
43
+ >>> from sklearn.model_selection import train_test_split
44
+ >>> from mlquantify.methods.aggregative import PWK
45
+ >>> from mlquantify.utils.general import get_real_prev
46
+ >>> from mlquantify.classification import PWKCLF
47
+ >>>
48
+ >>> # Load dataset
49
+ >>> features, target = load_breast_cancer(return_X_y=True)
50
+ >>>
51
+ >>> # Split into training and testing sets
52
+ >>> X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=32)
53
+ >>>
54
+ >>> # Create and configure the PWKCLF learner
55
+ >>> learner = PWKCLF(alpha=1, n_neighbors=10)
56
+ >>>
57
+ >>> # Create the PWK quantifier
58
+ >>> model = PWK(learner=learner)
59
+ >>>
60
+ >>> # Train the model
61
+ >>> model.fit(X_train, y_train)
62
+ >>>
63
+ >>> # Predict prevalences
64
+ >>> y_pred = model.predict(X_test)
65
+ >>>
66
+ >>> # Display results
67
+ >>> print("Real:", get_real_prev(y_test))
68
+ >>> print("PWK:", y_pred)
69
+ """
70
+
71
+ def __init__(self,
72
+ alpha=1,
73
+ n_neighbors=10,
74
+ algorithm="auto",
75
+ metric="euclidean",
76
+ leaf_size=30,
77
+ p=2,
78
+ metric_params=None,
79
+ n_jobs=None):
80
+ if alpha < 1:
81
+ raise ValueError("alpha must not be smaller than 1")
82
+
83
+ self.alpha = alpha
84
+ self.n_neighbors = n_neighbors
85
+
86
+ self.nbrs = NearestNeighbors(n_neighbors=n_neighbors,
87
+ algorithm=algorithm,
88
+ leaf_size=leaf_size,
89
+ metric=metric,
90
+ p=p,
91
+ metric_params=metric_params,
92
+ n_jobs=n_jobs)
93
+
94
+ self.classes_ = None
95
+ self.class_to_index = None
96
+ self.class_weights = None
97
+ self.y_train = None
98
+
99
+ def fit(self, X, y):
100
+ """
101
+ Fit the PWKCLF model to the training data.
102
+
103
+ Parameters
104
+ ----------
105
+ X : array-like of shape (n_samples, n_features)
106
+ Training features.
107
+
108
+ y : array-like of shape (n_samples,)
109
+ Training labels.
110
+
111
+ Returns
112
+ -------
113
+ self : object
114
+ The fitted instance.
115
+ """
116
+ n_samples = X.shape[0]
117
+ if n_samples < self.n_neighbors:
118
+ self.nbrs.set_params(n_neighbors=n_samples)
119
+
120
+ self.y_train = y
121
+
122
+ if isinstance(y, pd.DataFrame):
123
+ self.y_train = y.reset_index(drop=True)
124
+
125
+ unique_classes, class_counts = np.unique(y, return_counts=True)
126
+ self.classes_ = unique_classes
127
+ self.class_to_index = dict(zip(self.classes_, range(len(self.classes_))))
128
+
129
+ min_class_count = np.min(class_counts)
130
+ self.class_weights = (class_counts / min_class_count) ** (-1.0 / self.alpha)
131
+ self.nbrs.fit(X)
132
+ return self
133
+
134
+ def predict(self, X):
135
+ """
136
+ Predict class labels for samples in X.
137
+
138
+ Parameters
139
+ ----------
140
+ X : array-like of shape (n_samples, n_features)
141
+ Input data to predict.
142
+
143
+ Returns
144
+ -------
145
+ y_pred : array of shape (n_samples,)
146
+ Predicted class labels.
147
+ """
148
+ n_samples = X.shape[0]
149
+ nn_indices = self.nbrs.kneighbors(X, return_distance=False)
150
+
151
+ CM = np.zeros((n_samples, len(self.classes_)))
152
+
153
+ for i in range(n_samples):
154
+ for j in nn_indices[i]:
155
+ CM[i, self.class_to_index[self.y_train[j]]] += 1
156
+
157
+ CM = np.multiply(CM, self.class_weights)
158
+ predictions = np.apply_along_axis(np.argmax, axis=1, arr=CM)
159
+
160
+ return self.classes_[predictions]
@@ -0,0 +1,14 @@
1
+ from . import measures
2
+
3
+
4
+ MEASURES = {
5
+ "ae": measures.absolute_error,
6
+ "mae": measures.mean_absolute_error,
7
+ "nae": measures.normalized_absolute_error,
8
+ "kld": measures.kullback_leibler_divergence,
9
+ "nkld": measures.normalized_kullback_leibler_divergence,
10
+ "nrae": measures.normalized_relative_absolute_error,
11
+ "rae": measures.relative_absolute_error,
12
+ "se": measures.squared_error,
13
+ "mse": measures.mean_squared_error
14
+ }