mlquantify 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. mlquantify/__init__.py +0 -29
  2. mlquantify/adjust_counting/__init__.py +14 -0
  3. mlquantify/adjust_counting/_adjustment.py +365 -0
  4. mlquantify/adjust_counting/_base.py +247 -0
  5. mlquantify/adjust_counting/_counting.py +145 -0
  6. mlquantify/adjust_counting/_utils.py +114 -0
  7. mlquantify/base.py +117 -519
  8. mlquantify/base_aggregative.py +209 -0
  9. mlquantify/calibration.py +1 -0
  10. mlquantify/confidence.py +335 -0
  11. mlquantify/likelihood/__init__.py +5 -0
  12. mlquantify/likelihood/_base.py +161 -0
  13. mlquantify/likelihood/_classes.py +414 -0
  14. mlquantify/meta/__init__.py +1 -0
  15. mlquantify/meta/_classes.py +761 -0
  16. mlquantify/metrics/__init__.py +21 -0
  17. mlquantify/metrics/_oq.py +109 -0
  18. mlquantify/metrics/_rq.py +98 -0
  19. mlquantify/{evaluation/measures.py → metrics/_slq.py} +43 -28
  20. mlquantify/mixture/__init__.py +7 -0
  21. mlquantify/mixture/_base.py +153 -0
  22. mlquantify/mixture/_classes.py +400 -0
  23. mlquantify/mixture/_utils.py +112 -0
  24. mlquantify/model_selection/__init__.py +9 -0
  25. mlquantify/model_selection/_protocol.py +358 -0
  26. mlquantify/model_selection/_search.py +315 -0
  27. mlquantify/model_selection/_split.py +1 -0
  28. mlquantify/multiclass.py +350 -0
  29. mlquantify/neighbors/__init__.py +9 -0
  30. mlquantify/neighbors/_base.py +198 -0
  31. mlquantify/neighbors/_classes.py +159 -0
  32. mlquantify/{classification/methods.py → neighbors/_classification.py} +48 -66
  33. mlquantify/neighbors/_kde.py +270 -0
  34. mlquantify/neighbors/_utils.py +135 -0
  35. mlquantify/neural/__init__.py +1 -0
  36. mlquantify/utils/__init__.py +47 -2
  37. mlquantify/utils/_artificial.py +27 -0
  38. mlquantify/utils/_constraints.py +219 -0
  39. mlquantify/utils/_context.py +21 -0
  40. mlquantify/utils/_decorators.py +36 -0
  41. mlquantify/utils/_exceptions.py +12 -0
  42. mlquantify/utils/_get_scores.py +159 -0
  43. mlquantify/utils/_load.py +18 -0
  44. mlquantify/utils/_parallel.py +6 -0
  45. mlquantify/utils/_random.py +36 -0
  46. mlquantify/utils/_sampling.py +273 -0
  47. mlquantify/utils/_tags.py +44 -0
  48. mlquantify/utils/_validation.py +447 -0
  49. mlquantify/utils/prevalence.py +61 -0
  50. {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/METADATA +2 -1
  51. mlquantify-0.1.9.dist-info/RECORD +53 -0
  52. mlquantify/classification/__init__.py +0 -1
  53. mlquantify/evaluation/__init__.py +0 -14
  54. mlquantify/evaluation/protocol.py +0 -291
  55. mlquantify/methods/__init__.py +0 -37
  56. mlquantify/methods/aggregative.py +0 -1159
  57. mlquantify/methods/meta.py +0 -472
  58. mlquantify/methods/mixture_models.py +0 -1003
  59. mlquantify/methods/non_aggregative.py +0 -136
  60. mlquantify/methods/threshold_optimization.py +0 -869
  61. mlquantify/model_selection.py +0 -377
  62. mlquantify/plots.py +0 -367
  63. mlquantify/utils/general.py +0 -371
  64. mlquantify/utils/method.py +0 -449
  65. mlquantify-0.1.7.dist-info/RECORD +0 -22
  66. {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/WHEEL +0 -0
  67. {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/top_level.txt +0 -0
mlquantify/base.py CHANGED
@@ -1,559 +1,157 @@
1
- from abc import abstractmethod, ABC
1
+ from abc import ABC
2
2
  from sklearn.base import BaseEstimator
3
- from copy import deepcopy
4
- import numpy as np
5
- import joblib
6
3
 
7
- import mlquantify as mq
8
- from .utils.general import parallel, normalize_prevalence
4
+ from mlquantify.utils._tags import (
5
+ PredictionRequirements,
6
+ Tags,
7
+ TargetInputTags,
8
+ )
9
+ from mlquantify.utils._validation import validate_parameter_constraints
9
10
 
10
- class Quantifier(ABC, BaseEstimator):
11
- """Base class for all quantifiers, it defines the basic structure of a quantifier.
12
-
13
- Warning: Inheriting from this class does not provide dynamic use of multiclass or binary methods, it is necessary to implement the logic in the quantifier itself. If you want to use this feature, inherit from AggregativeQuantifier or NonAggregativeQuantifier.
14
-
15
- Inheriting from this class, it provides the following implementations:
16
-
17
- - properties for classes, n_class, is_multiclass and binary_data.
18
- - save_quantifier method to save the quantifier
19
-
20
- Read more in the :ref:`User Guide <creating_your_own_quantifier>`.
21
-
22
-
23
- Notes
24
- -----
25
- It's recommended to inherit from AggregativeQuantifier or NonAggregativeQuantifier, as they provide more functionality and flexibility for quantifiers.
26
- """
27
-
28
- @abstractmethod
29
- def fit(self, X, y) -> object: ...
30
-
31
- @abstractmethod
32
- def predict(self, X) -> dict: ...
33
-
34
- @property
35
- def classes(self) -> list:
36
- return self._classes
37
-
38
- @classes.setter
39
- def classes(self, classes):
40
- self._classes = sorted(list(classes))
41
-
42
- @property
43
- def n_class(self) -> list:
44
- return len(self._classes)
45
-
46
- @property
47
- def is_multiclass(self) -> bool:
48
- return True
49
11
 
50
- @property
51
- def binary_data(self) -> bool:
52
- return len(self._classes) == 2
53
-
54
-
55
- def save_quantifier(self, path: str=None) -> None:
56
- if not path:
57
- path = f"{self.__class__.__name__}.joblib"
58
- joblib.dump(self, path)
59
-
60
12
 
61
-
62
- class AggregativeQuantifier(Quantifier, ABC):
63
- """A base class for aggregative quantifiers.
13
+ class BaseQuantifier(ABC, BaseEstimator):
14
+ """Base class for all quantifiers in mlquantify.
64
15
 
65
- This class provides the basic structure for aggregative quantifiers, which are quantifiers that aggregates a classifier or learner inside to generate predictions.
16
+ Inhering from this class provides default implementations for
66
17
 
67
- Inheriting from this class, it provides dynamic prediction for multiclass and binary data, making one-vs-all strategy for multiclass data with binary quantifiers.
18
+ - setting and getting parameters used by `GridSearchQ` and friends;
19
+ - saving/loading quantifier instances;
20
+ - parameter validation.
68
21
 
69
- Read more in the :ref:`User Guide <creating_your_own_quantifier>`.
22
+ Read more in :ref:`User Guide <rolling_your_own_quantifier>`.
70
23
 
71
24
 
72
25
  Notes
73
26
  -----
74
- All quantifiers should specify at least the learner attribute. Wich should inherit from BaseEstimator of scikit-learn.
27
+ All quantifiers should specify all the parameters that can be set
28
+ at the class level in their ``__init__`` as explicit keyword arguments.
29
+ (No `*args` or `**kwargs` allowed.)
75
30
 
76
- All quantifiers can return a dictionary with class:prevalence, a list or a numpy array.
77
-
78
31
 
79
32
  Examples
80
33
  --------
81
- Example 1: Multiclass Quantifier
82
- >>> from mlquantify.base import AggregativeQuantifier
83
- >>> from mlquantify.utils.general import get_real_prev
84
- >>> from sklearn.ensemble import RandomForestClassifier
85
- >>> from sklearn.model_selection import train_test_split
34
+ >>> from mlquantify.base import BaseQuantifier
86
35
  >>> import numpy as np
87
- >>> class MyQuantifier(AggregativeQuantifier):
88
- ... def __init__(self, learner, *, param):
89
- ... self.learner = learner
90
- ... self.param = param
91
- ... def _fit_method(self, X, y):
92
- ... self.learner.fit(X, y)
36
+ >>> class MyQuantifier(BaseQuantifier):
37
+ ... def __init__(self, param1=42, param2='default'):
38
+ ... self.param1 = param1
39
+ ... self.param2 = param2
40
+ ... def fit(self, X, y):
41
+ ... self.classes_ = np.unique(y)
93
42
  ... return self
94
- ... def _predict_method(self, X):
95
- ... predicted_labels = self.learner.predict(X)
96
- ... class_counts = np.array([np.count_nonzero(predicted_labels == _class) for _class in self.classes])
97
- ... return class_counts / len(predicted_labels)
98
- >>> quantifier = MyQuantifier(learner=RandomForestClassifier(), param=1)
99
- >>> quantifier.get_params(deep=False)
100
- {'learner': RandomForestClassifier(), 'param': 1}
101
- >>> # Sample data
102
- >>> X = np.array([[0.1, 0.2], [0.2, 0.1], [0.3, 0.4], [0.4, 0.3],
103
- ... [0.5, 0.6], [0.6, 0.5], [0.7, 0.8], [0.8, 0.7],
104
- ... [0.9, 1.0], [1.0, 0.9]])
105
- >>> y = np.array([0, 0, 0, 1, 0, 1, 0, 1, 0, 1]) # 40% positive (4 out of 10)
106
- >>> # Split the data into training and testing sets
107
- >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
108
- >>> # Fit the quantifier
109
- >>> quantifier.fit(X_train, y_train)
110
- None
111
- >>> # Real prevalence in the training set
112
- >>> get_real_prev(y_train)
113
- {0: 0.5714285714285714, 1: 0.42857142857142855}
114
- >>> # Predicted prevalence in the test set
115
- >>> quantifier.predict(X_test)
116
- {0: 0.6666666666666666, 1: 0.3333333333333333}
117
-
118
- Example 2: Binary Quantifier
119
- >>> from sklearn.svm import SVC
120
- >>> class BinaryQuantifier(AggregativeQuantifier):
121
- ... @property
122
- ... def is_multiclass(self):
123
- ... return False
124
- ... def __init__(self, learner):
125
- ... self.learner = learner
126
- ... def _fit_method(self, X, y):
127
- ... self.learner.fit(X, y)
128
- ... return self
129
- ... def _predict_method(self, X):
130
- ... predicted_labels = self.learner.predict(X)
131
- ... class_counts = np.array([np.count_nonzero(predicted_labels == _class) for _class in self.classes])
132
- ... return class_counts / len(predicted_labels)
133
- >>> binary_quantifier = BinaryQuantifier(learner=SVC(probability=True))
134
- >>> # Sample multiclass data
135
- >>> X = np.array([
136
- ... [0.1, 0.2], [0.2, 0.1], [0.3, 0.4], [0.4, 0.3],
137
- ... [0.5, 0.6], [0.6, 0.5], [0.7, 0.8], [0.8, 0.7],
138
- ... [0.9, 1.0], [1.0, 0.9], [1.1, 1.2], [1.2, 1.1],
139
- ... [1.3, 1.4], [1.4, 1.3], [1.5, 1.6], [1.6, 1.5],
140
- ... [1.7, 1.8], [1.8, 1.7], [1.9, 2.0], [2.0, 1.9]
141
- ... ])
142
- >>> # Update the labels to include a third class
143
- >>> y = np.array([0, 0, 0, 1, 0, 1, 0, 1, 2, 2, 0, 1, 0, 1, 0, 1, 2, 2, 0, 1])
144
- >>> # Split the data into training and testing sets
145
- >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
146
- >>> # Fit the binary quantifier
147
- >>> binary_quantifier.fit(X_train, y_train)
148
- None
149
- >>> # Real prevalence in the training set
150
- >>> get_real_prev(y_test)
151
- {0: 0.25, 1: 0.5, 2: 0.25}
152
- >>> preds = binary_quantifier.predict(X_test)
153
- >>> preds
154
- {0: 1.0, 1: 0.0, 2: 0.0}
43
+ ... def predict(self, X):
44
+ ... _, counts = np.unique(self.classes_, return_counts=True)
45
+ ... prevalence = counts / counts.sum()
46
+ ... return prevalence
47
+ >>> quantifier = MyQuantifier(param1=10, param2='custom')
48
+ >>> quantifier.get_params()
49
+ {'param1': 10, 'param2': 'custom'}
50
+ >>> X = np.random.rand(100, 10)
51
+ >>> y = np.random.randint(0, 2, size=100)
52
+ >>> quantifier.fit(X, y).predict(X)
53
+ [0.5 0.5]
155
54
  """
55
+
156
56
 
157
-
158
- def __init__(self):
159
- # Dictionary to hold binary quantifiers for each class.
160
- self.binary_quantifiers = {}
161
- self.learner_fitted = False
162
- self.cv_folds = 10
163
-
164
- def fit(self, X, y, learner_fitted=False, cv_folds: int = 10, n_jobs:int=1):
165
- """Fit the quantifier model.
57
+ _parameter_constraints: dict[str, list] = {}
58
+ skip_validation: bool = False
166
59
 
167
- Parameters
168
- ----------
169
- X : array-like
170
- Training features.
171
- y : array-like
172
- Training labels.
173
- learner_fitted : bool, default=False
174
- Whether the learner is already fitted.
175
- cv_folds : int, default=10
176
- Number of cross-validation folds.
177
- n_jobs : int, default=1
178
- Number of parallel jobs to run.
179
-
180
-
181
- Returns
182
- -------
183
- self : object
184
- The fitted quantifier instance.
185
-
186
-
187
- Notes
188
- -----
189
- The model dynamically determines whether to perform one-vs-all classification or
190
- to directly fit the data based on the type of the problem:
191
- - If the data is binary or inherently multiclass, the model fits directly using
192
- `_fit_method` without creating binary quantifiers.
193
- - For other cases, the model creates one binary quantifier per class using the
194
- one-vs-all approach, allowing for dynamic prediction based on the provided dataset.
195
- """
196
-
197
- self.n_jobs = n_jobs
198
- self.learner_fitted = learner_fitted
199
- self.cv_folds = cv_folds
200
-
201
- self.classes = np.unique(y)
202
-
203
- if self.binary_data or self.is_multiclass:
204
- return self._fit_method(X, y)
60
+ def _validate_params(self):
61
+ """Validate the parameters of the quantifier instance.
205
62
 
206
- # Making one vs all
207
- self.binary_quantifiers = {class_: deepcopy(self) for class_ in self.classes}
208
- parallel(self.delayed_fit, self.classes, self.n_jobs, X, y)
63
+ The expected types and values must be defined in the `_parameter_constraints`
64
+ class attribute as a dictionary. `param_name: list of constraints`. See
65
+ the docstring of `validate_parameter_constraints` for more details.
209
66
 
210
- return self
211
-
212
- def predict(self, X) -> dict:
213
- """Predict class prevalences for the given data.
214
-
215
- Parameters
216
- ----------
217
- X : array-like
218
- Test features.
219
-
220
- Returns
221
- -------
222
- dict
223
- A dictionary where keys are class labels and values are their predicted prevalences.
224
-
225
- Notes
226
- -----
227
- The prediction approach is dynamically chosen based on the data type:
228
- - For binary or inherently multiclass data, the model uses `_predict_method` to directly
229
- estimate class prevalences.
230
- - For other cases, the model performs one-vs-all prediction, where each binary quantifier
231
- estimates the prevalence of its respective class. The results are then normalized to
232
- ensure they form valid proportions.
233
67
  """
68
+ validate_parameter_constraints(
69
+ self._parameter_constraints,
70
+ self.get_params(deep=False),
71
+ caller_name=self.__class__.__name__,
72
+ )
234
73
 
235
- if self.binary_data or self.is_multiclass:
236
- prevalences = self._predict_method(X)
237
- return normalize_prevalence(prevalences, self.classes)
238
74
 
239
- # Making one vs all
240
- prevalences = np.asarray(parallel(self.delayed_predict, self.classes, self.n_jobs, X))
241
- return normalize_prevalence(prevalences, self.classes)
242
-
243
- @abstractmethod
244
- def _fit_method(self, X, y):
245
- """Abstract fit method that each aggregative quantification method must implement.
246
-
247
- Parameters
248
- ----------
249
- X : array-like
250
- Training features.
251
- y : array-like
252
- Training labels.
253
- """
254
- ...
255
-
256
- @abstractmethod
257
- def _predict_method(self, X) -> dict:
258
- """Abstract predict method that each aggregative quantification method must implement.
259
-
260
- Parameters
261
- ----------
262
- X : array-like
263
- Test data to generate class prevalences.
264
-
265
- Returns
266
- -------
267
- dict, list, or numpy array
268
- The predicted prevalences, which can be a dictionary where keys are class labels
269
- and values are their predicted prevalences, a list, or a numpy array.
270
- """
271
-
272
- ...
273
-
274
- @property
275
- def is_probabilistic(self) -> bool:
276
- """Check if the learner is probabilistic or not.
75
+ def __mlquantify_tags__(self):
76
+ return Tags(
77
+ has_estimator=None,
78
+ estimation_type=None,
79
+ estimator_function=None,
80
+ estimator_type=None,
81
+ aggregation_type=None,
82
+ target_input_tags=TargetInputTags(),
83
+ prediction_requirements=PredictionRequirements(),
84
+ requires_fit= True
85
+ )
277
86
 
278
- Returns
279
- -------
280
- bool
281
- True if the learner is probabilistic, False otherwise.
282
- """
283
- return False
284
-
285
-
286
- @property
287
- def learner(self):
288
- """Returns the learner_ object.
289
- Returns
290
- -------
291
- learner_ : object
292
- The learner_ object stored in the class instance.
293
- """
294
- return self.learner_
295
-
296
- @learner.setter
297
- def learner(self, value):
298
- """
299
- Sets the learner attribute.
300
- Parameters:
301
- value : any
302
- The value to be assigned to the learner_ attribute.
303
- """
304
- assert isinstance(value, BaseEstimator) or mq.ARGUMENTS_SETTED, "learner object is not an estimator, or you may change ARGUMENTS_SETTED to True"
305
- self.learner_ = value
306
-
307
- def fit_learner(self, X, y):
308
- """Fit the learner to the training data.
309
-
310
- Parameters
311
- ----------
312
- X : array-like
313
- Training features.
314
- y : array-like
315
- Training labels.
316
- """
317
- if self.learner is not None:
318
- if not self.learner_fitted:
319
- self.learner_.fit(X, y)
320
- elif mq.ARGUMENTS_SETTED:
321
- if self.is_probabilistic and mq.arguments["posteriors_test"] is not None:
322
- return
323
- elif not self.is_probabilistic and mq.arguments["y_pred"] is not None:
324
- return
87
+ def save_quantifier(self, path: str=None) -> None:
88
+ """Save the quantifier instance to a file."""
89
+ if not path:
90
+ path = f"{self.__class__.__name__}.joblib"
91
+ import joblib
92
+ joblib.dump(self, path)
325
93
 
326
- def predict_learner(self, X):
327
- """Predict the class labels or probabilities for the given data.
328
-
329
- Parameters
330
- ----------
331
- X : array-like
332
- Test features.
333
-
334
- Returns
335
- -------
336
- array-like
337
- The predicted class labels or probabilities.
338
- """
339
- if self.learner is not None:
340
- if self.is_probabilistic:
341
- return self.learner_.predict_proba(X)
342
- return self.learner_.predict(X)
343
- else:
344
- if mq.ARGUMENTS_SETTED:
345
- if self.is_probabilistic:
346
- return mq.arguments["posteriors_test"]
347
- return mq.arguments["y_pred"]
348
- else:
349
- raise ValueError("No learner object was set and no arguments were setted")
350
94
 
351
- def set_params(self, **params):
352
- """
353
- Set the parameters of this estimator.
354
- The method allows setting parameters for both the model and the learner.
355
- Parameters that match the model's attributes will be set directly on the model.
356
- Parameters prefixed with 'learner__' will be set on the learner if it exists.
357
- Parameters:
358
- -----------
359
- **params : dict
360
- Dictionary of parameters to set. Keys can be model attribute names or
361
- 'learner__' prefixed names for learner parameters.
362
- Returns:
363
- --------
364
- self : Quantifier
365
- Returns the instance of the quantifier with updated parameters itself.
366
- """
367
-
368
-
369
- # Model Params
370
- for key, value in params.items():
371
- if hasattr(self, key):
372
- setattr(self, key, value)
373
95
 
374
- # Learner Params
375
- if self.learner is not None:
376
- learner_params = {k.replace('learner__', ''): v for k, v in params.items() if 'learner__' in k}
377
- if learner_params:
378
- self.learner.set_params(**learner_params)
379
-
380
- return self
96
+ # ==================================================== #
97
+ # ====================== Mixins ====================== #
98
+ # ==================================================== #
381
99
 
382
-
383
- # MULTICLASS METHODS
384
-
385
- def delayed_fit(self, class_, X, y):
386
- """Delayed fit method for one-vs-all strategy, with parallel execution.
387
-
388
- Parameters
389
- ----------
390
- class_ : Any
391
- The class for which the model is being fitted.
392
- X : array-like
393
- Training features.
394
- y : array-like
395
- Training labels.
396
-
397
- Returns
398
- -------
399
- self : object
400
- Fitted binary quantifier for the given class.
401
- """
402
100
 
403
- y_class = (y == class_).astype(int)
404
- return self.binary_quantifiers[class_].fit(X, y_class)
101
+ class MetaquantifierMixin:
102
+ """Mixin class for meta-quantifiers.
405
103
 
406
- def delayed_predict(self, class_, X):
407
- """Delayed predict method for one-vs-all strategy, with parallel execution.
408
-
409
- Parameters
410
- ----------
411
- class_ : Any
412
- The class for which the model is making predictions.
413
- X : array-like
414
- Test features.
415
-
416
- Returns
417
- -------
418
- float
419
- Predicted prevalence for the given class.
420
- """
421
-
422
- return self.binary_quantifiers[class_].predict(X)[1]
423
-
424
-
425
- class NonAggregativeQuantifier(Quantifier):
426
- """Abstract base class for non-aggregative quantifiers.
104
+ This mixin is empty, and only exists to indicate that the quantifier is
105
+ a meta quantifier
427
106
 
428
- Non-aggregative quantifiers differ from aggregative quantifiers as they do not use
429
- an underlying classifier or specific learner for their predictions.
107
+ Examples
108
+ --------
109
+ >>> from mlquantify.base import BaseQuantifier, MetaquantifierMixin
110
+ >>> from mlquantify.adjust_counting import CC
111
+ >>> class MyMetaQuantifier(MetaquantifierMixin, BaseQuantifier):
112
+ ... def __init__(self, quantifier=None):
113
+ ... self.quantifier = quantifier
114
+ ... def fit(self, X, y):
115
+ ... if self.quantifier is not None:
116
+ ... self.quantifier.fit(X, y)
117
+ ... else:
118
+ ... self.quantifier = CC()
119
+ ... return self
120
+ >>> X = np.random.rand(100, 10)
121
+ >>> y = np.random.randint(0, 2, size=100)
122
+ >>> meta_qtf = MyMetaQuantifier().fit(X, y)
123
+ >>> meta_qtf.quantifier
124
+ CC()
125
+ """
126
+ ...
430
127
 
431
- This class defines the general structure and behavior for non-aggregative quantifiers,
432
- including support for multiclass data and dynamic handling of binary and multiclass problems.
433
128
 
434
- Notes
435
- -----
436
- This class requires implementing the `_fit_method` and `_predict_method` in subclasses
437
- to define how the quantification is performed. These methods handle the core logic for
438
- fitting and predicting class prevalences.
129
+ class ProtocolMixin:
130
+ """Mixin class for protocol-based quantifiers.
131
+
132
+ This mixin indicates that the quantifier follows a specific protocol,
133
+ by setting the estimation_type tag to "sample" and requires_fit to False.
439
134
 
440
135
  Examples
441
136
  --------
442
- >>> from myquantify.base import NonAggregativeQuantifier
443
- >>> import numpy as np
444
- >>> class MyNonAggregativeQuantifier(NonAggregativeQuantifier):
445
- ... def _fit_method(self, X, y):
446
- ... # Custom logic for fitting
447
- ... pass
448
- ... def _predict_method(self, X):
449
- ... # Custom logic for predicting
450
- ... return {0: 0.5, 1: 0.5}
451
- >>> quantifier = MyNonAggregativeQuantifier()
452
- >>> X = np.random.rand(10, 2)
453
- >>> y = np.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
454
- >>> quantifier.fit(X, y)
455
- <MyNonAggregativeQuantifier>
456
- >>> quantifier.predict(X)
457
- {0: 0.5, 1: 0.5}
137
+ >>> from mlquantify.base import BaseQuantifier, ProtocolMixin
138
+ >>> class MyProtocolQuantifier(ProtocolMixin, BaseQuantifier):
139
+ ... def __init__(self, param=None):
140
+ ... self.param = param
141
+ ... def sample_method(self, X):
142
+ ... indexes = np.random.choice(len(X), size=10, replace=False)
143
+ ... X_sample = X[indexes]
144
+ ... return X_sample
145
+ >>> X = np.random.rand(100, 10)
146
+ >>> protocol_qtf = MyProtocolQuantifier(param=5)
147
+ >>> X_sample = protocol_qtf.sample_method(X)
148
+ >>> X_sample.shape
149
+ (10, 10)
458
150
  """
459
-
460
- def fit(self, X, y, n_jobs: int = 1):
461
- """Fit the quantifier model to the training data.
462
-
463
- Parameters
464
- ----------
465
- X : array-like
466
- Training features.
467
- y : array-like
468
- Training labels.
469
- n_jobs : int, default=1
470
- Number of parallel jobs to run.
471
-
472
- Returns
473
- -------
474
- self : NonAggregativeQuantifier
475
- The fitted quantifier instance.
476
-
477
- Notes
478
- -----
479
- - For binary or inherently multiclass data, the model directly calls `_fit_method`
480
- to process the data.
481
- - For other cases, it creates one quantifier per class using a one-vs-all strategy
482
- and fits each quantifier independently in parallel.
483
- """
484
- self.n_jobs = n_jobs
485
- self.classes = np.unique(y)
486
- if self.binary_data or self.is_multiclass:
487
- return self._fit_method(X, y)
488
-
489
- # One-vs-all approach
490
- self.binary_quantifiers = {class_: deepcopy(self) for class_ in self.classes}
491
- parallel(self.delayed_fit, self.classes, self.n_jobs, X, y)
492
- return self
493
-
494
- def predict(self, X) -> dict:
495
- """Predict class prevalences for the given data.
496
-
497
- Parameters
498
- ----------
499
- X : array-like
500
- Test features.
501
-
502
- Returns
503
- -------
504
- dict
505
- A dictionary where keys are class labels and values are their predicted prevalences.
506
-
507
- Notes
508
- -----
509
- - For binary or inherently multiclass data, the model directly calls `_predict_method`.
510
- - For other cases, it performs one-vs-all prediction, combining the results into a normalized
511
- dictionary of class prevalences.
512
- """
513
- if self.binary_data or self.is_multiclass:
514
- prevalences = self._predict_method(X)
515
- return normalize_prevalence(prevalences, self.classes)
516
-
517
- # One-vs-all approach
518
- prevalences = np.asarray(parallel(self.delayed_predict, self.classes, self.n_jobs, X))
519
- return normalize_prevalence(prevalences, self.classes)
520
-
521
- @abstractmethod
522
- def _fit_method(self, X, y):
523
- """Abstract method for fitting the quantifier.
524
-
525
- Parameters
526
- ----------
527
- X : array-like
528
- Training features.
529
- y : array-like
530
- Training labels.
531
-
532
- Notes
533
- -----
534
- This method must be implemented in subclasses to define the fitting logic for
535
- the non-aggregative quantifier.
536
- """
537
- ...
538
-
539
- @abstractmethod
540
- def _predict_method(self, X) -> dict:
541
- """Abstract method for predicting class prevalences.
542
-
543
- Parameters
544
- ----------
545
- X : array-like
546
- Test features.
547
-
548
- Returns
549
- -------
550
- dict, list, or numpy array
551
- The predicted prevalences, which can be a dictionary where keys are class labels
552
- and values are their predicted prevalences, a list, or a numpy array.
553
-
554
- Notes
555
- -----
556
- This method must be implemented in subclasses to define the prediction logic for
557
- the non-aggregative quantifier.
558
- """
559
- ...
151
+
152
+ def __mlquantify_tags__(self):
153
+ tags = super().__mlquantify_tags__()
154
+ tags.estimation_type = "sample"
155
+ tags.requires_fit = False
156
+ return tags
157
+