PyPI - mlquantify - Versions diffs - 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl - Mend

mlquantify 0.1.9py3-none-any.whl → 0.1.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

mlquantify/__init__.py +11 -1
mlquantify/adjust_counting/__init__.py +11 -1
mlquantify/adjust_counting/_adjustment.py +370 -87
mlquantify/adjust_counting/_base.py +1 -3
mlquantify/adjust_counting/_counting.py +27 -19
mlquantify/adjust_counting/_utils.py +23 -28
mlquantify/confidence.py +16 -22
mlquantify/likelihood/_base.py +38 -52
mlquantify/likelihood/_classes.py +88 -72
mlquantify/meta/_classes.py +86 -62
mlquantify/metrics/_oq.py +2 -2
mlquantify/metrics/_rq.py +2 -2
mlquantify/metrics/_slq.py +9 -9
mlquantify/mixture/_base.py +13 -19
mlquantify/mixture/_classes.py +68 -10
mlquantify/mixture/_utils.py +62 -11
mlquantify/model_selection/_protocol.py +6 -6
mlquantify/model_selection/_search.py +1 -1
mlquantify/neighbors/_base.py +35 -65
mlquantify/neighbors/_classes.py +1 -10
mlquantify/neighbors/_classification.py +5 -12
mlquantify/neighbors/_kde.py +7 -9
mlquantify/neighbors/_utils.py +17 -21
mlquantify/utils/_validation.py +3 -3
mlquantify/utils/prevalence.py +4 -1
{mlquantify-0.1.9.dist-info → mlquantify-0.1.11.dist-info}/METADATA +10 -18
mlquantify-0.1.11.dist-info/RECORD +53 -0
mlquantify-0.1.9.dist-info/RECORD +0 -53
{mlquantify-0.1.9.dist-info → mlquantify-0.1.11.dist-info}/WHEEL +0 -0
{mlquantify-0.1.9.dist-info → mlquantify-0.1.11.dist-info}/top_level.txt +0 -0

mlquantify/adjust_counting/_counting.py CHANGED Viewed

@@ -12,33 +12,34 @@ from mlquantify.utils._constraints import Interval
 class CC(CrispLearnerQMixin, BaseCount):
-    """Classify and Count (CC) quantifier.
-    Implements the Classify and Count method for quantification as described in:
-    [1] Forman, G. (2005). *Counting Positives Accurately Despite Inaccurate Classification.*
-        ECML, pp. 564-575.
-    [2] Forman, G. (2008). *Quantifying Counts and Costs via Classification.*
-        Data Mining and Knowledge Discovery, 17(2), 164-206.
+    r"""Classify and Count (CC) quantifier.
+    Implements the Classify and Count method for quantification, describe as a
+    baseline approach in the literature [1][2].
     Parameters
     ----------
     learner : estimator, optional
-        A supervised learning estimator with fit and predict methods.
-        If None, it is expected that will be used the aggregate method directly.
+        A supervised learning estimator with `fit` and `predict` methods.
+        If None, it is expected that the aggregate method is used directly.
     threshold : float, default=0.5
         Decision threshold for converting predicted probabilities into class labels.
         Must be in the interval [0.0, 1.0].
     Attributes
     ----------
     learner : estimator
         Underlying classification model.
-    classes : ndarray of shape (n_classes,)
-        Unique class labels observed during training.
+    Notes
+    -----
+    The Classify and Count approach performs quantification by classifying each instance
+    using the classifier's predicted labels at a given threshold, then counting the
+    prevalence of each class.
+    This method can be biased when class distributions differ between training and test sets,
+    motivating further adjustment methods.
     Examples
     --------
     >>> from mlquantify.adjust_counting import CC
@@ -50,10 +51,17 @@ class CC(CrispLearnerQMixin, BaseCount):
     >>> q.fit(X, y)
     >>> q.predict(X)
     {0: 0.47, 1: 0.53}
-    >> q2 = CC()
+    >>> q2 = CC()
     >>> predictions = np.random.rand(200)
     >>> q2.aggregate(predictions)
     {0: 0.51, 1: 0.49}
+    References
+    ----------
+    .. [1] Forman, G. (2005). "Counting Positives Accurately Despite Inaccurate Classification",
+           *ECML*, pp. 564-575.
+    .. [2] Forman, G. (2008). "Quantifying Counts and Costs via Classification",
+           *Data Mining and Knowledge Discovery*, 17(2), 164-206.
     """
     _parameters_constraints = {
@@ -79,7 +87,7 @@ class CC(CrispLearnerQMixin, BaseCount):
 class PCC(SoftLearnerQMixin, BaseCount):
-    """Probabilistic Classify and Count (PCC) quantifier.
+    r"""Probabilistic Classify and Count (PCC) quantifier.
     Implements the Probabilistic Classify and Count method for quantification as described in:
     [1] Forman, G. (2005). *Counting Positives Accurately Despite Inaccurate Classification.*

mlquantify/adjust_counting/_utils.py CHANGED Viewed

@@ -2,10 +2,8 @@ import numpy as np
 def compute_table(y, y_pred, classes):
-    """
-    Compute the confusion matrix table for a binary classification task.
+    r"""Compute the confusion matrix table for a binary classification task.
     Parameters
     ----------
     y : np.ndarray
@@ -14,12 +12,12 @@ def compute_table(y, y_pred, classes):
         The predicted labels.
     classes : np.ndarray
         The unique classes in the dataset.
     Returns
     -------
     tuple
-        A tuple containing the True Positives, False Positives, False Negatives, and True Negatives.
+        A tuple containing the counts of True Positives, False Positives,
+        False Negatives, and True Negatives respectively.
     """
     TP = np.logical_and(y == y_pred, y == classes[1]).sum()
     FP = np.logical_and(y != y_pred, y == classes[0]).sum()
@@ -29,18 +27,15 @@ def compute_table(y, y_pred, classes):
 def compute_tpr(TP, FN):
-    """
-    Compute the True Positive Rate (Recall) for a binary classification task.
+    r"""Compute the True Positive Rate (Recall) for a binary classification task.
     Parameters
     ----------
     TP : int
         The number of True Positives.
     FN : int
         The number of False Negatives.
     Returns
     -------
     float
@@ -52,18 +47,15 @@ def compute_tpr(TP, FN):
 def compute_fpr(FP, TN):
-    """
-    Compute the False Positive Rate for a binary classification task.
+    r"""Compute the False Positive Rate for a binary classification task.
     Parameters
     ----------
     FP : int
         The number of False Positives.
     TN : int
         The number of True Negatives.
     Returns
     -------
     float
@@ -74,31 +66,34 @@ def compute_fpr(FP, TN):
     return FP / (FP + TN)
-def evaluate_thresholds (y, probabilities:np.ndarray, classes) -> tuple:
-    """
-    Adjust the threshold for a binary quantification task to maximize the True Positive Rate.
+def evaluate_thresholds (y, probabilities:np.ndarray) -> tuple:
+    r"""Evaluate a range of classification thresholds to compute the corresponding
+    True Positive Rate (TPR) and False Positive Rate (FPR) for a binary quantification task.
     Parameters
     ----------
     y : np.ndarray
         The true labels.
     probabilities : np.ndarray
-        The predicted probabilities.
+        The predicted probabilities (scores) for the positive class.
     classes : np.ndarray
         The unique classes in the dataset.
     Returns
     -------
     tuple
-        The best True Positive Rate and False Positive Rate.
+        A tuple of (thresholds, tprs, fprs), where:
+        - thresholds is a numpy array of evaluated thresholds,
+        - tprs is a numpy array of corresponding True Positive Rates,
+        - fprs is a numpy array of corresponding False Positive Rates.
     """
     unique_scores = np.linspace(0, 1, 101)
     tprs = []
     fprs = []
+    classes = np.unique(y)
     for threshold in unique_scores:
         y_pred = np.where(probabilities >= threshold, classes[1], classes[0])

mlquantify/confidence.py CHANGED Viewed

@@ -2,31 +2,24 @@ import numpy as np
 from scipy.stats import chi2
-import numpy as np
-from scipy.stats import chi2
-import numpy as np
-from scipy.stats import chi2
 class BaseConfidenceRegion:
-    """Base class for confidence regions of prevalence estimates.
+    r"""
+    Base class for confidence regions of prevalence estimates.
     This class defines the interface and core structure for constructing
     confidence regions around class prevalence estimates obtained from
     quantification models.
     Confidence regions capture the uncertainty associated with prevalence
-    estimates, typically derived from bootstrap resampling as proposed by [1].
-    Subclasses define specific types of regions (e.g., intervals, ellipses).
+    estimates, typically derived from bootstrap resampling as proposed in
+    [1]_.
     Parameters
     ----------
     prev_estims : array-like of shape (m, n)
-        Collection of `m` bootstrap prevalence estimates for `n` classes.
+        Collection of ``m`` bootstrap prevalence estimates for ``n`` classes.
     confidence_level : float, default=0.95
-        Desired confidence level (1 - α) of the region.
+        Desired confidence level :math:`1 - \alpha` of the region.
     Attributes
     ----------
@@ -37,12 +30,13 @@ class BaseConfidenceRegion:
     Notes
     -----
-    The general goal is to construct a confidence region :math:`CR_α` such that:
+    The confidence region :math:`CR_{\alpha}` is defined such that
     .. math::
-        P(π^* \\in CR_α) = 1 - α
-    where :math:`π^*` is the true (unknown) class prevalence vector.
+        \mathbb{P}\left(\pi^{\ast} \in CR_{\alpha}\right) = 1 - \alpha
+    where :math:`\pi^{\ast}` is the unknown true class-prevalence vector.
     Examples
     --------
@@ -63,9 +57,9 @@ class BaseConfidenceRegion:
     References
     ----------
-    [1] Moreo, A., & Salvati, N. (2025).
-        *An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification*.
-        Istituto di Scienza e Tecnologie dell’Informazione, CNR, Pisa.
+    .. [1] Moreo, A., & Salvati, N. (2025).
+       *An Efficient Method for Deriving Confidence Intervals in Aggregative Quantification.*
+       Istituto di Scienza e Tecnologie dell'Informazione, CNR, Pisa.
     """
     def __init__(self, prev_estims, confidence_level=0.95):
@@ -94,7 +88,7 @@ class BaseConfidenceRegion:
 # ==========================================================
 class ConfidenceInterval(BaseConfidenceRegion):
-    """Bootstrap confidence intervals for each class prevalence.
+    r"""Bootstrap confidence intervals for each class prevalence.
     Constructs independent percentile-based confidence intervals
     for each class dimension from bootstrap samples.
@@ -164,7 +158,7 @@ class ConfidenceInterval(BaseConfidenceRegion):
 # ==========================================================
 class ConfidenceEllipseSimplex(BaseConfidenceRegion):
-    """Confidence ellipse for prevalence estimates in the simplex.
+    r"""Confidence ellipse for prevalence estimates in the simplex.
     Defines a multivariate confidence region based on a chi-squared threshold:

mlquantify/likelihood/_base.py CHANGED Viewed

@@ -14,100 +14,86 @@ from mlquantify.utils._validation import check_classes_attribute, validate_predi
 class BaseIterativeLikelihood(AggregationMixin, BaseQuantifier):
-    """
-    Iterative, likelihood-based quantification via EM adjustment.
-    This is the base class for quantification methods that estimate class prevalences
-    by solving the maximum likelihood problem under prior probability shift, using
-    iterative procedures such as the EM (Expectation-Maximization) algorithm
-    [1], [2].
-    These methods repeatedly adjust the estimated class prevalences for a test set
-    by maximizing the likelihood of observed classifier outputs (posterior probabilities),
-    under the assumption that the within-class conditional distributions remain fixed
+    r"""Iterative likelihood-based quantification adjustment methods.
+    This base class encompasses quantification approaches that estimate class prevalences
+    by maximizing the likelihood of observed data, adjusting prevalence estimates on test
+    sets under the assumption of prior probability shift.
+    These methods iteratively refine estimates of class prevalences by maximizing the
+    likelihood of classifier outputs, usually the posterior probabilities provided by
+    a trained model, assuming that the class-conditional distributions remain fixed
     between training and test domains.
     Mathematical formulation
     ------------------------
     Let:
-    - \( p_k^t \) denote the prior probability for class \( k \) in the training set (\( \sum_k p_k^t = 1 \)),
-    - \( s_k(x) \) be the classifier's posterior probability estimate (for class \( k \), given instance \( x \), fitted on training set),
-    - \( p_k \) be the (unknown) prior for the test set,
-    - \( x_1, \dots, x_N \) the unlabeled test set instances.
-    The procedure iteratively estimates \( p_k \) by maximizing the observed data likelihood
+    - :math:`p_k^t` be the prior probabilities for class \(k\) in the training set, satisfying \( \sum_k p_k^t = 1 \),
+    - :math:`s_k(x)` be the posterior probability estimate from the classifier for class \(k\) given instance \(x\),
+    - :math:`p_k` be the unknown prior probabilities for class \(k\) in the test set,
+    - \( x_1, \dots, x_N \) be unlabeled test set instances.
-    \[
-    L = \prod_{i=1}^N \sum_{k=1}^K s_k(x_i) \frac{p_k}{p_k^t}
-    \]
-    The E-step updates soft memberships:
+    The likelihood of the observed data is:
+    .. math::
-    \[
-    w_{ik}^{(t)} = \frac{s_k(x_i) \cdot (p_k^{(t-1)} / p_k^t)}{\sum_{j=1}^K s_j(x_i) \cdot (p_j^{(t-1)} / p_j^t)}
-    \]
-    and the M-step re-estimates prevalences:
+        L = \prod_{i=1}^N \sum_{k=1}^K s_k(x_i) \frac{p_k}{p_k^t}
-    \[
-    p_k^{(t)} = \frac{1}{N} \sum_{i=1}^N w_{ik}^{(t)}
-    \]
-    See also [1].
+    Methods in this class seek a solution that maximizes this likelihood via iterative methods.
     Notes
     -----
-    - Defined for multiclass and binary quantification (single-label), as long as the classifier provides well-calibrated posterior probabilities.
-    - Assumes prior probability shift only.
-    - Converges to a (local) maximum of the data likelihood.
-    - The algorithm is Fisher-consistent under prior probability shift [2].
-    - Closely related to the Expectation-Maximization (EM) algorithm for mixture models.
+    - Applicable to binary and multiclass problems as long as the classifier provides calibrated posterior probabilities.
+    - Assumes changes only in prior probabilities (prior probability shift).
+    - Algorithms converge to local maxima of the likelihood function.
+    - Includes methods such as Class Distribution Estimation (CDE), Maximum Likelihood Prevalence Estimation (MLPE), and Expectation-Maximization (EM) based quantification.
     Parameters
     ----------
     learner : estimator, optional
-        Probabilistic classifier instance with `fit(X, y)` and `predict_proba(X)`.
+        Probabilistic classifier implementing the methods `fit(X, y)` and `predict_proba(X)`.
     tol : float, default=1e-4
-        Convergence tolerance for prevalence update.
+        Convergence tolerance for prevalence update criteria.
     max_iter : int, default=100
-        Maximum number of EM update iterations.
+        Maximum allowed number of iterations.
     Attributes
     ----------
     learner : estimator
-        Underlying classifier instance.
+        Underlying classification model.
     tol : float
-        Stopping tolerance for EM prevalence estimation.
+        Tolerance for stopping criterion.
     max_iter : int
-        Maximum updates performed.
+        Maximum number of iterations.
     classes : ndarray of shape (n_classes,)
-        Unique class labels seen in training.
+        Unique classes observed during training.
     priors : ndarray of shape (n_classes,)
-        Class distribution of the training set.
+        Class distribution in the training set.
     y_train : array-like
-        Training labels (used for estimating priors and confusion matrix if needed).
+        Training labels used to estimate priors.
     Examples
     --------
     >>> import numpy as np
     >>> from sklearn.linear_model import LogisticRegression
-    >>> class MyEM(BaseIterativeLikelihood):
+    >>> class MyQuantifier(BaseIterativeLikelihood):
     ...     def _iterate(self, predictions, priors):
-    ...         # EM iteration logic here
+    ...         # Implementation of iterative update logic
     ...         pass
     >>> X = np.random.randn(200, 8)
     >>> y = np.random.randint(0, 3, size=(200,))
-    >>> q = MyEM(learner=LogisticRegression(max_iter=200))
+    >>> q = MyQuantifier(learner=LogisticRegression(max_iter=200))
     >>> q.fit(X, y)
     >>> q.predict(X)
     {0: 0.32, 1: 0.40, 2: 0.28}
     References
     ----------
-    [1] Saerens, M., Latinne, P., & Decaestecker, C. (2002). *Adjusting the Outputs of a Classifier to New a Priori Probabilities: A Simple Procedure.* Neural Computation, 14(1), 2141-2156.
-    [2] Esuli, A., Moreo, A., & Sebastiani, F. (2023). *Learning to Quantify.* The Information Retrieval Series 47, Springer. https://doi.org/10.1007/978-3-031-20467-8
-    """
+    .. [1] Saerens, M., Latinne, P., & Decaestecker, C. (2002). "Adjusting the Outputs of a Classifier to New a Priori Probabilities: A Simple Procedure." Neural Computation, 14(1), 2141-2156.
+    .. [2] Esuli, A., Moreo, A., & Sebastiani, F. (2023). "Learning to Quantify." The Information Retrieval Series 47, Springer. https://doi.org/10.1007/978-3-031-20467-8
+    """
     @abstractmethod
     def __init__(self,

mlquantify/likelihood/_classes.py CHANGED Viewed

@@ -10,46 +10,66 @@ from mlquantify.utils._constraints import (
 )
 class EMQ(SoftLearnerQMixin, BaseIterativeLikelihood):
-    """Expectation-Maximization Quantifier.
-    Implements iterative quantification using an EM algorithm to adjust class
-    prevalences under prior probability shift, assimilating posterior probabilities
-    (soft predictions) from a probabilistic classifier.
-    The EM procedure alternates between estimating posterior memberships of test
-    instances (E-step) and re-estimating class prevalences (M-step), iterating until
-    convergence (tolerance or max iterations) on prevalence change measured by a
-    user-defined criteria (default: Mean Absolute Error, MAE).
-    Supports optional calibration of predicted posteriors before iteration.
+    r"""Expectation-Maximization Quantifier (EMQ).
+    Estimates class prevalences under prior probability shift by alternating
+    between expectation **(E)** and maximization **(M)** steps on posterior probabilities.
+    E-step:
+    .. math::
+        p_i^{(s+1)}(x) = \frac{q_i^{(s)} p_i(x)}{\sum_j q_j^{(s)} p_j(x)}
+    M-step:
+    .. math::
+        q_i^{(s+1)} = \frac{1}{N} \sum_{n=1}^N p_i^{(s+1)}(x_n)
+    where
+    - :math:`p_i(x)` are posterior probabilities predicted by the classifier
+    - :math:`q_i^{(s)}` are class prevalence estimates at iteration :math:`s`
+    - :math:`N` is the number of test instances.
+    Calibrations supported on posterior probabilities before **EM** iteration:
+    Temperature Scaling (TS):
+    .. math::
+        \hat{p} = \text{softmax}\left(\frac{\log(p)}{T}\right)
+    Bias-Corrected Temperature Scaling (BCTS):
+    .. math::
+        \hat{p} = \text{softmax}\left(\frac{\log(p)}{T} + b\right)
+    Vector Scaling (VS):
+    .. math::
+        \hat{p}_i = \text{softmax}(W_i \cdot \log(p_i) + b_i)
+    No-Bias Vector Scaling (NBVS):
+    .. math::
+        \hat{p}_i = \text{softmax}(W_i \cdot \log(p_i))
     Parameters
     ----------
     learner : estimator, optional
-        Probabilistic classifier fit on training data with `predict_proba`.
+        Probabilistic classifier supporting predict_proba.
     tol : float, default=1e-4
-        Convergence threshold for EM iterative updates.
+        Convergence threshold.
     max_iter : int, default=100
-        Maximum number of EM iterations.
+        Maximum EM iterations.
     calib_function : str or callable, optional
-        Calibration method applied to posterior probabilities.
-        Supported strings: 'bcts', 'ts', 'vs', 'nbvs'.
+        Calibration method:
+        - 'ts': Temperature Scaling
+        - 'bcts': Bias-Corrected Temperature Scaling
+        - 'vs': Vector Scaling
+        - 'nbvs': No-Bias Vector Scaling
+        - callable: custom calibration function
     criteria : callable, default=MAE
-        Function to measure convergence between prevalence estimates.
-    Methods
-    -------
-    _iterate(predictions, priors)
-        Executes EM iterations to estimate prevalences from posterior probabilities.
-    EM(posteriors, priors, tolerance, max_iter, criteria)
-        Static method implementing the EM loop with E-step and M-step.
-    _apply_calibration(predictions)
-        Applies optional calibration method to posterior predictions.
+        Convergence metric.
     References
     ----------
-    [1] Saerens et al. (2002). Adjusting the Outputs of a Classifier to New a Priori Probabilities. Neural Computation, 14(1), 2141-2156.
-    [2] Esuli et al. (2023). Learning to Quantify. Springer.
+    .. [1] Saerens, M., Latinne, P., & Decaestecker, C. (2002).
+        Adjusting the Outputs of a Classifier to New a Priori Probabilities.
+        Neural Computation, 14(1), 2141-2156.
+    .. [2] Esuli, A., Moreo, A., & Sebastiani, F. (2023). Learning to Quantify. Springer.
     """
     _parameter_constraints = {
@@ -72,8 +92,7 @@ class EMQ(SoftLearnerQMixin, BaseIterativeLikelihood):
         self.criteria = criteria
     def _iterate(self, predictions, priors):
-        """
-        Perform EM quantification iteration.
+        r"""Perform EM quantification iteration.
         Steps:
         - Calibrate posterior predictions if calibration function specified.
@@ -104,8 +123,7 @@ class EMQ(SoftLearnerQMixin, BaseIterativeLikelihood):
     @classmethod
     def EM(cls, posteriors, priors, tolerance=1e-6, max_iter=100, criteria=MAE):
-        """
-        Static method implementing the EM algorithm for quantification.
+        r"""Static method implementing the EM algorithm for quantification.
         Parameters
         ----------
@@ -162,8 +180,7 @@ class EMQ(SoftLearnerQMixin, BaseIterativeLikelihood):
     def _apply_calibration(self, predictions):
-        """
-        Calibrate posterior predictions with specified calibration method.
+        r"""Calibrate posterior predictions with specified calibration method.
         Parameters
         ----------
@@ -242,18 +259,18 @@ class EMQ(SoftLearnerQMixin, BaseIterativeLikelihood):
 class MLPE(SoftLearnerQMixin, BaseIterativeLikelihood):
-    """
-    Maximum Likelihood Prevalence Estimation (MLPE) quantifier.
-    A simple iterative likelihood quantification method that returns the
-    training priors as the estimated prevalences, effectively skipping iteration.
-    This method assumes no prior probability shift between training and test.
+    r"""Maximum Likelihood Prevalence Estimation (MLPE).
+    Returns training priors as prevalence estimates without adaptations.
     Parameters
     ----------
     learner : estimator, optional
-        Base classifier for possible extension/fitting.
+        Base classifier.
+    References
+    ----------
+    .. [2] Esuli, A., Moreo, A., & Sebastiani, F. (2023). Learning to Quantify. Springer.
     """
     def __init__(self, learner=None):
@@ -278,38 +295,38 @@ class MLPE(SoftLearnerQMixin, BaseIterativeLikelihood):
 @define_binary
 class CDE(SoftLearnerQMixin, BaseIterativeLikelihood):
-    """
-    CDE-Iterate (Class Distribution Estimation Iterate) for binary classification.
-    This method iteratively estimates class prevalences under prior probability shift
-    by updating the false positive cost in a cost-sensitive classification framework,
-    using a thresholding strategy based on posterior probabilities.
-    The procedure:
-    - Calculates a threshold from false-positive (cFP) and false-negative (cFN) costs.
-    - Assigns hard positive predictions where posterior probability exceeds threshold.
-    - Estimates the prevalence via classify-and-count on thresholded predictions.
-    - Updates false positive cost according to prevalence estimates and training priors.
-    - Iterates until prevalence estimates converge or max iterations reached.
-    This implementation adopts the transductive thresholding variant described in
-    Esuli et al. (2023), rather than retraining a cost-sensitive classifier as in
-    Xue & Weiss (2009).
+    r"""CDE-Iterate for binary classification prevalence estimation.
+    Threshold :math:`\tau` from false positive and false negative costs:
+    .. math::
+        \tau = \frac{c_{FP}}{c_{FP} + c_{FN}}
+    Hard classification by thresholding posterior probability :math:`p(+|x)` at :math:`\tau`:
+    .. math::
+        \hat{y}(x) = \mathbf{1}_{p(+|x) > \tau}
+    Prevalence estimation via classify-and-count:
+    .. math::
+        \hat{p}_U(+) = \frac{1}{N} \sum_{n=1}^N \hat{y}(x_n)
+    False positive cost update:
+    .. math::
+        c_{FP}^{new} = \frac{p_L(+)}{p_L(-)} \times \frac{\hat{p}_U(-)}{\hat{p}_U(+)} \times c_{FN}
     Parameters
     ----------
     learner : estimator, optional
-        Wrapped classifier (unused here but part of base interface).
+        Wrapped classifier (unused).
     tol : float, default=1e-4
-        Absolute tolerance for convergence of estimated prevalences.
+        Convergence tolerance.
     max_iter : int, default=100
-        Max number of iterations allowed.
+        Max iterations.
     init_cfp : float, default=1.0
-        Initial false positive cost coefficient.
+        Initial false positive cost.
     References
     ----------
-    [2] Esuli, A., Moreo, A., & Sebastiani, F. (2023). Learning to Quantify. Springer.
+    .. [1] Esuli, A., Moreo, A., & Sebastiani, F. (2023). Learning to Quantify. Springer.
     """
     _parameter_constraints = {
@@ -323,8 +340,7 @@ class CDE(SoftLearnerQMixin, BaseIterativeLikelihood):
         self.init_cfp = float(init_cfp)
     def _iterate(self, predictions, priors):
-        """
-        Iteratively estimate prevalences via cost-sensitive thresholding.
+        r"""Iteratively estimate prevalences via cost-sensitive thresholding.
         Parameters
         ----------

mlquantify 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

mlquantify 0.1.9py3-none-any.whl → 0.1.11py3-none-any.whl