mlquantify 0.1.19__tar.gz → 0.1.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. mlquantify-0.1.21/LICENSE +28 -0
  2. {mlquantify-0.1.19/mlquantify.egg-info → mlquantify-0.1.21}/PKG-INFO +13 -18
  3. {mlquantify-0.1.19 → mlquantify-0.1.21}/README.md +11 -18
  4. mlquantify-0.1.21/VERSION.txt +1 -0
  5. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/__init__.py +2 -1
  6. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/adjust_counting/__init__.py +6 -5
  7. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/adjust_counting/_adjustment.py +208 -37
  8. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/adjust_counting/_base.py +5 -6
  9. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/adjust_counting/_counting.py +10 -7
  10. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/likelihood/__init__.py +0 -2
  11. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/likelihood/_classes.py +45 -199
  12. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/meta/_classes.py +50 -42
  13. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/mixture/__init__.py +2 -1
  14. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/mixture/_classes.py +310 -15
  15. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/model_selection/_search.py +1 -1
  16. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/neighbors/_base.py +15 -15
  17. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/neighbors/_classes.py +2 -2
  18. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/neighbors/_kde.py +6 -6
  19. mlquantify-0.1.21/mlquantify/neural/__init__.py +1 -0
  20. mlquantify-0.1.21/mlquantify/neural/_base.py +0 -0
  21. mlquantify-0.1.21/mlquantify/neural/_classes.py +609 -0
  22. mlquantify-0.1.21/mlquantify/neural/_perm_invariant.py +0 -0
  23. mlquantify-0.1.21/mlquantify/neural/_utils.py +0 -0
  24. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/utils/__init__.py +2 -1
  25. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/utils/_constraints.py +2 -0
  26. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/utils/_validation.py +9 -0
  27. {mlquantify-0.1.19 → mlquantify-0.1.21/mlquantify.egg-info}/PKG-INFO +13 -18
  28. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify.egg-info/SOURCES.txt +5 -1
  29. mlquantify-0.1.19/VERSION.txt +0 -1
  30. mlquantify-0.1.19/mlquantify/likelihood/_base.py +0 -147
  31. mlquantify-0.1.19/mlquantify/neural/__init__.py +0 -1
  32. {mlquantify-0.1.19 → mlquantify-0.1.21}/MANIFEST.in +0 -0
  33. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/adjust_counting/_utils.py +0 -0
  34. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/base.py +0 -0
  35. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/base_aggregative.py +0 -0
  36. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/calibration.py +0 -0
  37. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/confidence.py +0 -0
  38. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/meta/__init__.py +0 -0
  39. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/metrics/__init__.py +0 -0
  40. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/metrics/_oq.py +0 -0
  41. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/metrics/_rq.py +0 -0
  42. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/metrics/_slq.py +0 -0
  43. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/mixture/_base.py +0 -0
  44. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/mixture/_utils.py +0 -0
  45. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/model_selection/__init__.py +0 -0
  46. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/model_selection/_protocol.py +0 -0
  47. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/model_selection/_split.py +0 -0
  48. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/multiclass.py +0 -0
  49. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/neighbors/__init__.py +0 -0
  50. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/neighbors/_classification.py +0 -0
  51. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/neighbors/_utils.py +0 -0
  52. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/utils/_artificial.py +0 -0
  53. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/utils/_context.py +0 -0
  54. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/utils/_decorators.py +0 -0
  55. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/utils/_exceptions.py +0 -0
  56. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/utils/_get_scores.py +0 -0
  57. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/utils/_load.py +0 -0
  58. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/utils/_parallel.py +0 -0
  59. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/utils/_random.py +0 -0
  60. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/utils/_sampling.py +0 -0
  61. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/utils/_tags.py +0 -0
  62. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify/utils/prevalence.py +0 -0
  63. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify.egg-info/dependency_links.txt +0 -0
  64. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify.egg-info/requires.txt +0 -0
  65. {mlquantify-0.1.19 → mlquantify-0.1.21}/mlquantify.egg-info/top_level.txt +0 -0
  66. {mlquantify-0.1.19 → mlquantify-0.1.21}/setup.cfg +0 -0
  67. {mlquantify-0.1.19 → mlquantify-0.1.21}/setup.py +0 -0
@@ -0,0 +1,28 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2025, Luiz Fernando Luth Junior and Andre Gustavo Maletzke
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mlquantify
3
- Version: 0.1.19
3
+ Version: 0.1.21
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -12,6 +12,7 @@ Classifier: Operating System :: Unix
12
12
  Classifier: Operating System :: MacOS :: MacOS X
13
13
  Classifier: Operating System :: Microsoft :: Windows
14
14
  Description-Content-Type: text/markdown
15
+ License-File: LICENSE
15
16
  Requires-Dist: scikit-learn
16
17
  Requires-Dist: numpy
17
18
  Requires-Dist: scipy
@@ -26,25 +27,23 @@ Dynamic: description
26
27
  Dynamic: description-content-type
27
28
  Dynamic: home-page
28
29
  Dynamic: keywords
30
+ Dynamic: license-file
29
31
  Dynamic: maintainer
30
32
  Dynamic: requires-dist
31
33
  Dynamic: summary
32
34
 
33
- <img src="assets/logo_mlquantify-white.svg" alt="mlquantify logo">
35
+ ![PyPI - Version](https://img.shields.io/pypi/v/mlquantify)
36
+ [![docs](https://img.shields.io/badge/docs-sphinx-blue)](https://github.com/luizfernandolj/mlquantify/)
37
+
38
+
39
+ <a href="https://luizfernandolj.github.io/mlquantify/"><img src="assets/logo_mlquantify-white.svg" alt="mlquantify logo"></a>
34
40
  <h4 align="center">A Python Package for Quantification</h4>
35
41
 
36
42
  ___
37
43
 
38
44
  **mlquantify** is a Python library for quantification, also known as supervised prevalence estimation, designed to estimate the distribution of classes within datasets. It offers a range of tools for various quantification methods, model selection tailored for quantification tasks, evaluation metrics, and protocols to assess quantification performance. Additionally, mlquantify includes popular datasets and visualization tools to help analyze and interpret results.
39
45
 
40
- ___
41
-
42
- ## Latest Release
43
-
44
- - **Version 0.1.11**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
45
- - In case you need any help, refer to the [User Guide](https://luizfernandolj.github.io/mlquantify/user_guide.html).
46
- - Explore the [API documentation](https://luizfernandolj.github.io/mlquantify/api/index.html) for detailed developer information.
47
- - See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
46
+ Website: https://luizfernandolj.github.io/mlquantify/
48
47
 
49
48
  ___
50
49
 
@@ -112,6 +111,10 @@ print(f"Mean Absolute Error -> {mae}")
112
111
  print(f"Normalized Relative Absolute Error -> {nrae}")
113
112
  ```
114
113
 
114
+ - In case you need any help, refer to the [User Guide](https://luizfernandolj.github.io/mlquantify/user_guide.html).
115
+ - Explore the [API documentation](https://luizfernandolj.github.io/mlquantify/api/index.html) for detailed developer information.
116
+ - See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
117
+
115
118
  ___
116
119
 
117
120
  ## Requirements
@@ -123,11 +126,3 @@ ___
123
126
  - tqdm
124
127
  - matplotlib
125
128
  - xlrd
126
-
127
- ___
128
-
129
- ## Documentation
130
-
131
- ##### API is avaliable [here](https://luizfernandolj.github.io/mlquantify/api/)
132
-
133
- ___
@@ -1,18 +1,15 @@
1
- <img src="assets/logo_mlquantify-white.svg" alt="mlquantify logo">
1
+ ![PyPI - Version](https://img.shields.io/pypi/v/mlquantify)
2
+ [![docs](https://img.shields.io/badge/docs-sphinx-blue)](https://github.com/luizfernandolj/mlquantify/)
3
+
4
+
5
+ <a href="https://luizfernandolj.github.io/mlquantify/"><img src="assets/logo_mlquantify-white.svg" alt="mlquantify logo"></a>
2
6
  <h4 align="center">A Python Package for Quantification</h4>
3
7
 
4
8
  ___
5
9
 
6
10
  **mlquantify** is a Python library for quantification, also known as supervised prevalence estimation, designed to estimate the distribution of classes within datasets. It offers a range of tools for various quantification methods, model selection tailored for quantification tasks, evaluation metrics, and protocols to assess quantification performance. Additionally, mlquantify includes popular datasets and visualization tools to help analyze and interpret results.
7
11
 
8
- ___
9
-
10
- ## Latest Release
11
-
12
- - **Version 0.1.11**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
13
- - In case you need any help, refer to the [User Guide](https://luizfernandolj.github.io/mlquantify/user_guide.html).
14
- - Explore the [API documentation](https://luizfernandolj.github.io/mlquantify/api/index.html) for detailed developer information.
15
- - See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
12
+ Website: https://luizfernandolj.github.io/mlquantify/
16
13
 
17
14
  ___
18
15
 
@@ -80,6 +77,10 @@ print(f"Mean Absolute Error -> {mae}")
80
77
  print(f"Normalized Relative Absolute Error -> {nrae}")
81
78
  ```
82
79
 
80
+ - In case you need any help, refer to the [User Guide](https://luizfernandolj.github.io/mlquantify/user_guide.html).
81
+ - Explore the [API documentation](https://luizfernandolj.github.io/mlquantify/api/index.html) for detailed developer information.
82
+ - See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
83
+
83
84
  ___
84
85
 
85
86
  ## Requirements
@@ -90,12 +91,4 @@ ___
90
91
  - joblib
91
92
  - tqdm
92
93
  - matplotlib
93
- - xlrd
94
-
95
- ___
96
-
97
- ## Documentation
98
-
99
- ##### API is avaliable [here](https://luizfernandolj.github.io/mlquantify/api/)
100
-
101
- ___
94
+ - xlrd
@@ -0,0 +1 @@
1
+ 0.1.21
@@ -10,4 +10,5 @@ from . import base_aggregative
10
10
  from . import base
11
11
  from . import calibration
12
12
  from . import confidence
13
- from . import multiclass
13
+ from . import multiclass
14
+ from . import neural
@@ -6,14 +6,15 @@ from ._adjustment import (
6
6
  ThresholdAdjustment,
7
7
  MatrixAdjustment,
8
8
  FM,
9
- GAC,
10
- GPAC,
11
- ACC,
12
- X_method,
13
- MAX,
9
+ AC,
10
+ PAC,
11
+ TAC,
12
+ TX,
13
+ TMAX,
14
14
  T50,
15
15
  MS,
16
16
  MS2,
17
+ CDE,
17
18
  )
18
19
 
19
20
  from ._utils import (
@@ -1,15 +1,30 @@
1
+ from mlquantify.utils._validation import validate_prevalences
2
+ from mlquantify.base import BaseQuantifier
1
3
  import numpy as np
2
4
  from abc import abstractmethod
3
5
  from scipy.optimize import minimize
4
6
  import warnings
5
7
  from sklearn.metrics import confusion_matrix
6
8
 
9
+ from mlquantify.utils._tags import (
10
+ PredictionRequirements,
11
+ Tags,
12
+ )
7
13
  from mlquantify.adjust_counting._base import BaseAdjustCount
8
14
  from mlquantify.adjust_counting._counting import CC, PCC
15
+ from mlquantify.utils import (
16
+ _fit_context,
17
+ validate_data,
18
+ validate_prevalences,
19
+ validate_predictions,
20
+ check_classes_attribute
21
+ )
9
22
  from mlquantify.base_aggregative import (
10
23
  CrispLearnerQMixin,
11
24
  SoftLearnerQMixin,
12
- uses_soft_predictions,
25
+ AggregationMixin,
26
+ uses_soft_predictions,
27
+ _get_learner_function
13
28
  )
14
29
  from mlquantify.multiclass import define_binary
15
30
  from mlquantify.adjust_counting._utils import evaluate_thresholds
@@ -98,14 +113,14 @@ class ThresholdAdjustment(SoftLearnerQMixin, BaseAdjustCount):
98
113
  self.threshold = threshold
99
114
  self.strategy = strategy
100
115
 
101
- def _adjust(self, predictions, train_y_scores, train_y_values):
116
+ def _adjust(self, predictions, train_y_scores, y_train):
102
117
  """Internal adjustment computation based on selected ROC threshold."""
103
118
  positive_scores = train_y_scores[:, 1]
104
119
 
105
- thresholds, tprs, fprs = evaluate_thresholds(train_y_values, positive_scores)
120
+ thresholds, tprs, fprs = evaluate_thresholds(y_train, positive_scores)
106
121
  threshold, tpr, fpr = self.get_best_threshold(thresholds, tprs, fprs)
107
122
 
108
- cc_predictions = CC(threshold=threshold).aggregate(predictions, train_y_values)
123
+ cc_predictions = CC(threshold=threshold).aggregate(predictions, y_train)
109
124
  cc_predictions = list(cc_predictions.values())[1]
110
125
 
111
126
  if tpr - fpr == 0:
@@ -200,18 +215,18 @@ class MatrixAdjustment(BaseAdjustCount):
200
215
  super().__init__(learner=learner)
201
216
  self.solver = solver
202
217
 
203
- def _adjust(self, predictions, train_y_pred, train_y_values):
204
- n_class = len(np.unique(train_y_values))
218
+ def _adjust(self, predictions, train_y_pred, y_train):
219
+ n_class = len(np.unique(y_train))
205
220
  self.CM = np.zeros((n_class, n_class))
206
221
 
207
222
  if self.solver == 'optim':
208
- priors = np.array(list(CC().aggregate(train_y_pred, train_y_values).values()))
209
- self.CM = self._compute_confusion_matrix(train_y_pred, train_y_values, priors)
210
- prevs_estim = self._get_estimations(predictions > priors, train_y_values)
223
+ priors = np.array(list(CC().aggregate(train_y_pred, y_train).values()))
224
+ self.CM = self._compute_confusion_matrix(train_y_pred, y_train, priors)
225
+ prevs_estim = self._get_estimations(predictions > priors, y_train)
211
226
  prevalence = self._solve_optimization(prevs_estim, priors)
212
227
  else:
213
- self.CM = self._compute_confusion_matrix(train_y_pred, train_y_values)
214
- prevs_estim = self._get_estimations(predictions, train_y_values)
228
+ self.CM = self._compute_confusion_matrix(train_y_pred, y_train)
229
+ prevs_estim = self._get_estimations(predictions, y_train)
215
230
  prevalence = self._solve_linear(prevs_estim)
216
231
 
217
232
  return prevalence
@@ -262,17 +277,173 @@ class MatrixAdjustment(BaseAdjustCount):
262
277
  result = minimize(objective, init, constraints=constraints, bounds=bounds)
263
278
  return result.x if result.success else priors
264
279
 
265
- def _get_estimations(self, predictions, train_y_values):
280
+ def _get_estimations(self, predictions, y_train):
266
281
  """Return prevalence estimates using CC (crisp) or PCC (probabilistic)."""
267
282
  if uses_soft_predictions(self):
268
283
  return np.array(list(PCC().aggregate(predictions).values()))
269
- return np.array(list(CC().aggregate(predictions, train_y_values).values()))
284
+ return np.array(list(CC().aggregate(predictions, y_train).values()))
270
285
 
271
286
  @abstractmethod
272
287
  def _compute_confusion_matrix(self, predictions, *args):
273
288
  ...
274
289
 
275
290
 
291
+
292
+ @define_binary
293
+ class CDE(SoftLearnerQMixin, AggregationMixin, BaseQuantifier):
294
+ r"""CDE-Iterate for binary classification prevalence estimation.
295
+
296
+ Threshold :math:`\tau` from false positive and false negative costs:
297
+ .. math::
298
+ \tau = \frac{c_{FP}}{c_{FP} + c_{FN}}
299
+
300
+ Hard classification by thresholding posterior probability :math:`p(+|x)` at :math:`\tau`:
301
+ .. math::
302
+ \hat{y}(x) = \mathbf{1}_{p(+|x) > \tau}
303
+
304
+ Prevalence estimation via classify-and-count:
305
+ .. math::
306
+ \hat{p}_U(+) = \frac{1}{N} \sum_{n=1}^N \hat{y}(x_n)
307
+
308
+ False positive cost update:
309
+ .. math::
310
+ c_{FP}^{new} = \frac{p_L(+)}{p_L(-)} \times \frac{\hat{p}_U(-)}{\hat{p}_U(+)} \times c_{FN}
311
+
312
+ Parameters
313
+ ----------
314
+ learner : estimator, optional
315
+ Wrapped classifier (unused).
316
+ tol : float, default=1e-4
317
+ Convergence tolerance.
318
+ max_iter : int, default=100
319
+ Max iterations.
320
+ init_cfp : float, default=1.0
321
+ Initial false positive cost.
322
+
323
+ References
324
+ ----------
325
+ .. [1] Esuli, A., Moreo, A., & Sebastiani, F. (2023). Learning to Quantify. Springer.
326
+ """
327
+
328
+ _parameter_constraints = {
329
+ "tol": [Interval(0, None, inclusive_left=False)],
330
+ "max_iter": [Interval(1, None, inclusive_left=True)],
331
+ "init_cfp": [Interval(0, None, inclusive_left=False)]
332
+ }
333
+
334
+ def __mlquantify_tags__(self):
335
+ tags = super().__mlquantify_tags__()
336
+ tags.prediction_requirements.requires_train_proba = False
337
+ return tags
338
+
339
+
340
+ def __init__(self, learner=None, tol=1e-4, max_iter=100, init_cfp=1.0, strategy="ovr"):
341
+ self.learner = learner
342
+ self.tol = float(tol)
343
+ self.max_iter = int(max_iter)
344
+ self.init_cfp = float(init_cfp)
345
+ self.strategy = strategy
346
+
347
+ @_fit_context(prefer_skip_nested_validation=True)
348
+ def fit(self, X, y):
349
+ """Fit the quantifier using the provided data and learner."""
350
+ X, y = validate_data(self, X, y)
351
+ self.classes_ = np.unique(y)
352
+ self.learner.fit(X, y)
353
+ counts = np.array([np.count_nonzero(y == _class) for _class in self.classes_])
354
+ self.priors = counts / len(y)
355
+ self.y_train = y
356
+
357
+ return self
358
+
359
+
360
+ def predict(self, X):
361
+ """Predict class prevalences for the given data."""
362
+ predictions = getattr(self.learner, _get_learner_function(self))(X)
363
+ prevalences = self.aggregate(predictions, self.y_train)
364
+ return prevalences
365
+
366
+
367
+ def aggregate(self, predictions, y_train):
368
+
369
+ self.classes_ = check_classes_attribute(self, np.unique(y_train))
370
+ predictions = validate_predictions(self, predictions)
371
+
372
+ if hasattr(self, 'priors'):
373
+ Ptr = np.asarray(self.priors, dtype=np.float64)
374
+ else:
375
+ counts = np.array([np.count_nonzero(y_train == _class) for _class in self.classes_])
376
+ Ptr = counts / len(y_train)
377
+
378
+ P = np.asarray(predictions, dtype=np.float64)
379
+
380
+ # ensure no zeros
381
+ eps = 1e-12
382
+ P = np.clip(P, eps, 1.0)
383
+
384
+ # training priors pL(+), pL(-)
385
+ # assume Ptr order matches columns of P; if Ptr sums to 1 but order unknown, user must match.
386
+ pL_pos = Ptr[1]
387
+ pL_neg = Ptr[0]
388
+ if pL_pos <= 0 or pL_neg <= 0:
389
+ # keep them positive to avoid divisions by zero
390
+ pL_pos = max(pL_pos, eps)
391
+ pL_neg = max(pL_neg, eps)
392
+
393
+ # initialize costs
394
+ cFN = 1.0
395
+ cFP = float(self.init_cfp)
396
+
397
+ prev_prev_pos = None
398
+ s = 0
399
+
400
+ # iterate: compute threshold from costs, classify, estimate prevalences via CC,
401
+ # update cFP, repeat
402
+ while s < self.max_iter:
403
+ # decision threshold tau for positive class:
404
+ # Derivation:
405
+ # predict positive if cost_FP * p(-|x) < cost_FN * p(+|x)
406
+ # => predict positive if p(+|x) / p(-|x) > cost_FP / cost_FN
407
+ # since p(+|x) / p(-|x) = p(+|x) / (1 - p(+|x)):
408
+ # p(+|x) > cost_FP / (cost_FP + cost_FN)
409
+ tau = cFP / (cFP + cFN)
410
+
411
+ # hard predictions for positive class using threshold on posterior for positive (col 1)
412
+ pos_probs = P[:, 1]
413
+ hard_pos = (pos_probs > tau).astype(float)
414
+
415
+ # classify-and-count prevalence estimate on U
416
+ prev_pos = hard_pos.mean()
417
+ prev_neg = 1.0 - prev_pos
418
+
419
+ # update cFP according to:
420
+ # cFP_new = (pL_pos / pL_neg) * (pU_hat(neg) / pU_hat(pos)) * cFN
421
+ # guard against zero prev_pos / prev_neg
422
+ prev_pos_safe = max(prev_pos, eps)
423
+ prev_neg_safe = max(prev_neg, eps)
424
+
425
+ cFP_new = (pL_pos / pL_neg) * (prev_neg_safe / prev_pos_safe) * cFN
426
+
427
+ # check convergence on prevalences (absolute change)
428
+ if prev_prev_pos is not None and abs(prev_pos - prev_prev_pos) < self.tol:
429
+ break
430
+
431
+ # prepare next iter
432
+ cFP = cFP_new
433
+ prev_prev_pos = prev_pos
434
+ s += 1
435
+
436
+ # if didn't converge within max_iter we keep last estimate (lack of fisher consistency)
437
+ if s >= self.max_iter:
438
+ # optional: warning
439
+ # print('[warning] CDE-Iterate reached max_iter without converging')
440
+ pass
441
+
442
+ prevalences = np.array([prev_neg, prev_pos], dtype=np.float64)
443
+ prevalences = validate_prevalences(self, prevalences, self.classes_)
444
+ return prevalences
445
+
446
+
276
447
  class FM(SoftLearnerQMixin, MatrixAdjustment):
277
448
  r"""Friedman Method for quantification adjustment.
278
449
 
@@ -337,14 +508,14 @@ class FM(SoftLearnerQMixin, MatrixAdjustment):
337
508
  def _compute_confusion_matrix(self, posteriors, y_true, priors):
338
509
  for i, _class in enumerate(self.classes_):
339
510
  indices = (y_true == _class)
340
- self.CM[:, i] = self._get_estimations(posteriors[indices] > priors)
511
+ self.CM[:, i] = self._get_estimations(posteriors[indices] > priors, y_true[indices])
341
512
  return self.CM
342
513
 
343
514
 
344
- class GAC(CrispLearnerQMixin, MatrixAdjustment):
345
- r"""Generalized Adjusted Count method.
515
+ class AC(CrispLearnerQMixin, MatrixAdjustment):
516
+ r"""Adjusted Count method.
346
517
 
347
- This class implements the Generalized Adjusted Count (GAC) algorithm for
518
+ This class implements the Adjusted Count (AC) algorithm for
348
519
  quantification adjustment as described in Firat (2016) [1]_. The method
349
520
  adjusts the estimated class prevalences by normalizing the confusion matrix
350
521
  based on prevalence estimates, providing a correction for bias caused by
@@ -374,12 +545,12 @@ class GAC(CrispLearnerQMixin, MatrixAdjustment):
374
545
  Examples
375
546
  --------
376
547
  >>> from sklearn.linear_model import LogisticRegression
377
- >>> from mlquantify.adjust_counting import GAC
548
+ >>> from mlquantify.adjust_counting import AC
378
549
  >>> import numpy as np
379
- >>> gac = GAC(learner=LogisticRegression())
550
+ >>> ac = AC(learner=LogisticRegression())
380
551
  >>> X = np.random.randn(50, 4)
381
552
  >>> y = np.random.randint(0, 2, 50)
382
- >>> gac.fit(X, y)
553
+ >>> ac.fit(X, y)
383
554
  >>> gac.predict(X)
384
555
  {0: 0.5, 1: 0.5}
385
556
 
@@ -404,11 +575,11 @@ class GAC(CrispLearnerQMixin, MatrixAdjustment):
404
575
  return self.CM
405
576
 
406
577
 
407
- class GPAC(SoftLearnerQMixin, MatrixAdjustment):
408
- r"""Probabilistic Generalized Adjusted Count (GPAC) method.
578
+ class PAC(SoftLearnerQMixin, MatrixAdjustment):
579
+ r"""Probabilistic Adjusted Count (PAC) method.
409
580
 
410
- This class implements the probabilistic extension of the Generalized Adjusted Count method
411
- as presented in Firat (2016) [1]_. The GPAC method normalizes the confusion matrix by
581
+ This class implements the probabilistic extension of the Adjusted Count method
582
+ as presented in Firat (2016) [1]_. The PAC method normalizes the confusion matrix by
412
583
  the estimated prevalences from posterior probabilities, enabling a probabilistic correction
413
584
  of class prevalences.
414
585
 
@@ -436,13 +607,13 @@ class GPAC(SoftLearnerQMixin, MatrixAdjustment):
436
607
  Examples
437
608
  --------
438
609
  >>> from sklearn.linear_model import LogisticRegression
439
- >>> from mlquantify.adjust_counting import GPAC
610
+ >>> from mlquantify.adjust_counting import PAC
440
611
  >>> import numpy as np
441
- >>> gpac = GPAC(learner=LogisticRegression())
612
+ >>> pac = PAC(learner=LogisticRegression())
442
613
  >>> X = np.random.randn(50, 4)
443
614
  >>> y = np.random.randint(0, 2, 50)
444
- >>> gpac.fit(X, y)
445
- >>> gpac.predict(X)
615
+ >>> pac.fit(X, y)
616
+ >>> pac.predict(X)
446
617
  {0: 0.5, 1: 0.5}
447
618
 
448
619
  References
@@ -466,8 +637,8 @@ class GPAC(SoftLearnerQMixin, MatrixAdjustment):
466
637
  return self.CM
467
638
 
468
639
 
469
- class ACC(ThresholdAdjustment):
470
- r"""Adjusted Count (ACC) — baseline threshold correction.
640
+ class TAC(ThresholdAdjustment):
641
+ r"""Threshold Adjusted Count (TAC) — baseline threshold correction.
471
642
 
472
643
  This method corrects the bias in class prevalence estimates caused by imperfect
473
644
  classification accuracy, by adjusting the observed positive count using estimates
@@ -501,8 +672,8 @@ class ACC(ThresholdAdjustment):
501
672
  return (self.threshold, tpr, fpr)
502
673
 
503
674
 
504
- class X_method(ThresholdAdjustment):
505
- r"""X method — threshold where :math:`\text{TPR} + \text{FPR} = 1`.
675
+ class TX(ThresholdAdjustment):
676
+ r"""Threshold X method — threshold where :math:`\text{TPR} + \text{FPR} = 1`.
506
677
 
507
678
  This method selects the classification threshold at which the sum of the true positive
508
679
  rate (TPR) and false positive rate (FPR) equals one. This threshold choice balances
@@ -526,8 +697,8 @@ class X_method(ThresholdAdjustment):
526
697
  return thresholds[idx], tprs[idx], fprs[idx]
527
698
 
528
699
 
529
- class MAX(ThresholdAdjustment):
530
- r"""MAX method — threshold maximizing :math:`\text{TPR} - \text{FPR}`.
700
+ class TMAX(ThresholdAdjustment):
701
+ r"""Threshold MAX method — threshold maximizing :math:`\text{TPR} - \text{FPR}`.
531
702
 
532
703
  This method selects the threshold that maximizes the difference between the true positive
533
704
  rate (TPR) and the false positive rate (FPR), effectively optimizing classification
@@ -601,15 +772,15 @@ class MS(ThresholdAdjustment):
601
772
  .. [1] Forman, G. (2008). "Quantifying Counts and Costs via Classification",
602
773
  *Data Mining and Knowledge Discovery*, 17(2), 164-206.
603
774
  """
604
- def _adjust(self, predictions, train_y_scores, train_y_values):
775
+ def _adjust(self, predictions, train_y_scores, y_train):
605
776
  positive_scores = train_y_scores[:, 1]
606
777
 
607
- thresholds, tprs, fprs = evaluate_thresholds(train_y_values, positive_scores)
778
+ thresholds, tprs, fprs = evaluate_thresholds(y_train, positive_scores)
608
779
  thresholds, tprs, fprs = self.get_best_threshold(thresholds, tprs, fprs)
609
780
 
610
781
  prevs = []
611
782
  for thr, tpr, fpr in zip(thresholds, tprs, fprs):
612
- cc_predictions = CC(threshold=thr).aggregate(predictions, train_y_values)
783
+ cc_predictions = CC(threshold=thr).aggregate(predictions, y_train)
613
784
  cc_predictions = list(cc_predictions.values())[1]
614
785
 
615
786
  if tpr - fpr == 0:
@@ -93,14 +93,13 @@ class BaseCount(AggregationMixin, BaseQuantifier):
93
93
  def __mlquantify_tags__(self):
94
94
  tags = super().__mlquantify_tags__()
95
95
  tags.prediction_requirements.requires_train_proba = False
96
- tags.prediction_requirements.requires_train_labels = False
96
+ tags.prediction_requirements.requires_train_labels = True
97
97
  return tags
98
98
 
99
99
  @_fit_context(prefer_skip_nested_validation=True)
100
100
  def fit(self, X, y, learner_fitted=False, *args, **kwargs):
101
101
  """Fit the quantifier using the provided data and learner."""
102
102
  X, y = validate_data(self, X, y)
103
- validate_y(self, y)
104
103
  self.classes_ = np.unique(y)
105
104
  if not learner_fitted:
106
105
  self.learner.fit(X, y, *args, **kwargs)
@@ -207,7 +206,6 @@ class BaseAdjustCount(AggregationMixin, BaseQuantifier):
207
206
  def fit(self, X, y, learner_fitted=False, cv=10, stratified=True, random_state=None, shuffle=True):
208
207
  """Fit the quantifier using the provided data and learner."""
209
208
  X, y = validate_data(self, X, y)
210
- validate_y(self, y)
211
209
  self.classes_ = np.unique(y)
212
210
  learner_function = _get_learner_function(self)
213
211
 
@@ -236,12 +234,13 @@ class BaseAdjustCount(AggregationMixin, BaseQuantifier):
236
234
  prevalences = self.aggregate(predictions, self.train_predictions, self.train_y_values)
237
235
  return prevalences
238
236
 
239
- def aggregate(self, predictions, train_predictions, y_train_values):
237
+ def aggregate(self, predictions, train_predictions, y_train):
240
238
  """Aggregate predictions and apply matrix- or rate-based bias correction."""
241
- self.classes_ = check_classes_attribute(self, np.unique(y_train_values))
239
+ self.classes_ = check_classes_attribute(self, np.unique(y_train))
242
240
 
243
241
  predictions = validate_predictions(self, predictions)
242
+ train_predictions = validate_predictions(self, train_predictions)
244
243
 
245
- prevalences = self._adjust(predictions, train_predictions, y_train_values)
244
+ prevalences = self._adjust(predictions, train_predictions, y_train)
246
245
  prevalences = validate_prevalences(self, prevalences, self.classes_)
247
246
  return prevalences
@@ -75,13 +75,13 @@ class CC(CrispLearnerQMixin, BaseCount):
75
75
  super().__init__(learner=learner)
76
76
  self.threshold = threshold
77
77
 
78
- def aggregate(self, predictions, train_y_values=None):
79
- predictions = validate_predictions(self, predictions, self.threshold, train_y_values)
78
+ def aggregate(self, predictions, y_train=None):
79
+ predictions = validate_predictions(self, predictions, self.threshold, y_train)
80
80
 
81
- if train_y_values is None:
82
- train_y_values = np.unique(predictions)
81
+ if y_train is None:
82
+ y_train = np.unique(predictions)
83
83
 
84
- self.classes_ = check_classes_attribute(self, np.unique(train_y_values))
84
+ self.classes_ = check_classes_attribute(self, np.unique(y_train))
85
85
  class_counts = np.array([np.count_nonzero(predictions == _class) for _class in self.classes_])
86
86
  prevalences = class_counts / len(predictions)
87
87
 
@@ -134,12 +134,15 @@ class PCC(SoftLearnerQMixin, BaseCount):
134
134
  def __init__(self, learner=None):
135
135
  super().__init__(learner=learner)
136
136
 
137
- def aggregate(self, predictions):
137
+ def aggregate(self, predictions, y_train=None):
138
138
  predictions = validate_predictions(self, predictions)
139
139
 
140
140
  # Handle categorical predictions (1D array with class labels)
141
141
  if predictions.ndim == 1 and not np.issubdtype(predictions.dtype, (np.floating, np.integer)):
142
- self.classes_ = check_classes_attribute(self, np.unique(predictions))
142
+ if y_train is None:
143
+ y_values = np.unique(predictions)
144
+
145
+ self.classes_ = check_classes_attribute(self, np.unique(y_values))
143
146
  class_counts = np.array([np.count_nonzero(predictions == _class) for _class in self.classes_])
144
147
  prevalences = class_counts / len(predictions)
145
148
  else:
@@ -1,5 +1,3 @@
1
1
  from ._classes import (
2
2
  EMQ,
3
- MLPE,
4
- CDE
5
3
  )