PyPI - mlquantify - Versions diffs - 0.1.1__tar.gz → 0.1.3__tar.gz - Mend

mlquantify 0.1.1tar.gz → 0.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

mlquantify-0.1.3/MANIFEST.in ADDED Viewed

	@@ -0,0 +1 @@
1	+ include VERSION.txt

{mlquantify-0.1.1 → mlquantify-0.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mlquantify
-Version: 0.1.1
+Version: 0.1.3
 Summary: Quantification Library
 Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
 Maintainer: Luiz Fernando Luth Junior

mlquantify-0.1.3/VERSION.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.3

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/methods/aggregative.py RENAMED Viewed

@@ -907,10 +907,140 @@ class PCC(AggregativeQuantifier):
+class PACC(AggregativeQuantifier):
+    """
+    Probabilistic Adjusted Classify and Count (PACC).
+    This method extends the Adjusted Classify and Count (AC) approach
+    by leveraging the average class-conditional confidences obtained
+    from a probabilistic classifier instead of relying solely on true
+    positive and false positive rates.
+    Parameters
+    ----------
+    learner : BaseEstimator
+        A scikit-learn compatible classifier to be used for quantification.
+    threshold : float, optional
+        The decision threshold for classification. Default is 0.5.
+    Attributes
+    ----------
+    learner : BaseEstimator
+        A scikit-learn compatible classifier.
+    threshold : float
+        Decision threshold for classification. Default is 0.5.
+    tpr : float
+        True positive rate computed during the fitting process.
+    fpr : float
+        False positive rate computed during the fitting process.
+    See Also
+    --------
+    ThresholdOptimization : Base class for threshold-based quantification methods.
+    ACC : Adjusted Classify and Count quantification method.
+    CC : Classify and Count quantification method.
+    References
+    ----------
+    A. Bella, C. Ferri, J. Hernández-Orallo and M. J. Ramírez-Quintana, "Quantification via Probability Estimators," 2010 IEEE International Conference on Data Mining, Sydney, NSW, Australia, 2010, pp. 737-742, doi: 10.1109/ICDM.2010.75. Available at: https://ieeexplore.ieee.org/abstract/document/5694031
+    Examples
+    --------
+    >>> from mlquantify.methods.aggregative import PACC
+    >>> from mlquantify.utils.general import get_real_prev
+    >>> from sklearn.datasets import load_breast_cancer
+    >>> from sklearn.svm import SVC
+    >>> from sklearn.model_selection import train_test_split
+    >>>
+    >>> features, target = load_breast_cancer(return_X_y=True)
+    >>>
+    >>> X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
+    >>>
+    >>> pacc = PACC(learner=SVC(probability=True))
+    >>> pacc.fit(X_train, y_train)
+    >>> y_pred = pacc.predict(X_test)
+    >>> y_pred
+    {0: 0.4664886119311328, 1: 0.5335113880688672}
+    >>> get_real_prev(y_test)
+    {0: 0.3991228070175439, 1: 0.6008771929824561}
+    """
+    def __init__(self, learner: BaseEstimator=None, threshold: float = 0.5):
+        self.learner = learner
+        self.threshold = threshold
+        self.mean_pos = None
+        self.mean_neg = None
+    @property
+    def is_probabilistic(self) -> bool:
+        return True
+    @property
+    def is_multiclass(self) -> bool:
+        return False
+    def _fit_method(self, X, y):
+        # Get predicted labels and probabilities
+        if mq.arguments["y_labels"] is not None and mq.arguments["posteriors_train"] is not None:
+            y_labels = mq.arguments["y_labels"]
+            probabilities = mq.arguments["posteriors_train"]
+        else:
+            y_labels, probabilities = get_scores(X, y, self.learner, self.cv_folds, self.learner_fitted)
+        # Adjust thresholds and compute true and false positive rates
+        self.mean_pos = np.mean(probabilities[y_labels == self.classes[1], 1])
+        self.mean_neg = np.mean(probabilities[y_labels != self.classes[1], 1])
+        return self
+    def _predict_method(self, X):
+        """
+        Predicts the class prevalence using the mean class-conditional
+        probabilities from a probabilistic classifier.
+        Parameters
+        ----------
+        X : array-like or sparse matrix of shape (n_samples, n_features)
+            The input data for prediction.
+        Returns
+        -------
+        dict
+            A dictionary with class labels as keys and their respective
+            prevalence estimates as values.
+        Notes
+        -----
+        The prevalence is adjusted using the formula:
+            prevalence = |mean_score - FPR| / (TPR - FPR),
+        where mean_score is the average probability for the positive class.
+        Raises
+        ------
+        ZeroDivisionError
+            If `TPR - FPR` equals zero, indicating that the classifier's
+            performance does not vary across the threshold range.
+        """
+        prevalences = {}
+        # Calculate probabilities for the positive class
+        probabilities = self.predict_learner(X)[:, 1]
+        # Compute the mean score for the positive class
+        mean_scores = np.mean(probabilities)
+        # Adjust prevalence based on TPR and FPR
+        if self.mean_pos - self.mean_neg == 0:
+            prevalence = mean_scores
+        else:
+            prevalence = np.clip(abs(mean_scores - self.mean_neg) / (self.mean_pos - self.mean_neg), 0, 1)
+        # Map the computed prevalence to the class labels
+        prevalences[self.classes[0]] = 1 - prevalence
+        prevalences[self.classes[1]] = prevalence
+        return prevalences
 class PWK(AggregativeQuantifier):
@@ -1012,7 +1142,6 @@ class PWK(AggregativeQuantifier):
 from . import threshold_optimization
 ACC = threshold_optimization.ACC
-PACC = threshold_optimization.PACC
 T50 = threshold_optimization.T50
 MAX = threshold_optimization.MAX
 X_method  = threshold_optimization.X_method

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/methods/threshold_optimization.py RENAMED Viewed

@@ -447,9 +447,8 @@ class MS(ThresholdOptimization):
     {0: 0.3991228070175439, 1: 0.6008771929824561}
     """
-    def __init__(self, learner: BaseEstimator=None, threshold: float = 0.5):
+    def __init__(self, learner: BaseEstimator=None):
         super().__init__(learner)
-        self.threshold = threshold
     def best_tprfpr(self, thresholds: np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
         """
@@ -482,11 +481,42 @@ class MS(ThresholdOptimization):
         ValueError
             If `thresholds`, `tprs`, or `fprs` are empty or have mismatched lengths.
         """
-        # Compute median TPR and FPR
-        tpr = np.median(tprs)
-        fpr = np.median(fprs)
-        return (self.threshold, tpr, fpr)
+        return (thresholds, tprs, fprs)
+    def _predict_method(self, X) -> dict:
+        """
+        Predicts class prevalences using the adjusted threshold.
+        Parameters
+        ----------
+        X : pd.DataFrame or np.ndarray
+            The input features for prediction.
+        Returns
+        -------
+        np.ndarray
+            An array of predicted prevalences for the classes.
+        """
+        # Get predicted probabilities for the positive class
+        probabilities = self.predict_learner(X)[:, 1]
+        prevs = []
+        for thr, tpr, fpr in zip(self.threshold, self.tpr, self.fpr):
+            cc_output = len(probabilities[probabilities >= thr]) / len(probabilities)
+            if tpr - fpr == 0:
+                prevalence = cc_output
+            else:
+                prev = np.clip((cc_output - fpr) / (tpr - fpr), 0, 1)
+                prevs.append(prev)
+        prevalence = np.median(prevs)
+        prevalences = [1 - prevalence, prevalence]
+        return np.asarray(prevalences)
@@ -586,166 +616,48 @@ class MS2(ThresholdOptimization):
         # Identify indices where the condition is satisfied
         indices = np.where(np.abs(tprs - fprs) > 0.25)[0]
         if len(indices) == 0:
-            raise ValueError("No cases meet the condition |TPR - FPR| > 0.25.")
-        # Compute medians for the selected cases
-        threshold = np.median(thresholds[indices])
-        tpr = np.median(tprs[indices])
-        fpr = np.median(fprs[indices])
-        return (threshold, tpr, fpr)
-class PACC(ThresholdOptimization):
-    """
-    Probabilistic Adjusted Classify and Count (PACC).
-    This method extends the Adjusted Classify and Count (AC) approach
-    by leveraging the average class-conditional confidences obtained
-    from a probabilistic classifier instead of relying solely on true
-    positive and false positive rates.
-    Parameters
-    ----------
-    learner : BaseEstimator
-        A scikit-learn compatible classifier to be used for quantification.
-    threshold : float, optional
-        The decision threshold for classification. Default is 0.5.
-    Attributes
-    ----------
-    learner : BaseEstimator
-        A scikit-learn compatible classifier.
-    threshold : float
-        Decision threshold for classification. Default is 0.5.
-    tpr : float
-        True positive rate computed during the fitting process.
-    fpr : float
-        False positive rate computed during the fitting process.
-    See Also
-    --------
-    ThresholdOptimization : Base class for threshold-based quantification methods.
-    ACC : Adjusted Classify and Count quantification method.
-    CC : Classify and Count quantification method.
-    References
-    ----------
-    A. Bella, C. Ferri, J. Hernández-Orallo and M. J. Ramírez-Quintana, "Quantification via Probability Estimators," 2010 IEEE International Conference on Data Mining, Sydney, NSW, Australia, 2010, pp. 737-742, doi: 10.1109/ICDM.2010.75. Available at: https://ieeexplore.ieee.org/abstract/document/5694031
-    Examples
-    --------
-    >>> from mlquantify.methods.aggregative import PACC
-    >>> from mlquantify.utils.general import get_real_prev
-    >>> from sklearn.datasets import load_breast_cancer
-    >>> from sklearn.svm import SVC
-    >>> from sklearn.model_selection import train_test_split
-    >>>
-    >>> features, target = load_breast_cancer(return_X_y=True)
-    >>>
-    >>> X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
-    >>>
-    >>> pacc = PACC(learner=SVC(probability=True))
-    >>> pacc.fit(X_train, y_train)
-    >>> y_pred = pacc.predict(X_test)
-    >>> y_pred
-    {0: 0.4664886119311328, 1: 0.5335113880688672}
-    >>> get_real_prev(y_test)
-    {0: 0.3991228070175439, 1: 0.6008771929824561}
-    """
+            warnings.warn("No cases satisfy the condition |TPR - FPR| > 0.25.")
+            indices = np.where(np.abs(tprs - fprs) >= 0)[0]
+        thresholds_ = thresholds[indices]
+        tprs_ = tprs[indices]
+        fprs_ = fprs[indices]
-    def __init__(self, learner: BaseEstimator=None, threshold: float = 0.5):
-        super().__init__(learner)
-        self.threshold = threshold
+        return (thresholds_, tprs_, fprs_)
-    def _predict_method(self, X):
+    def _predict_method(self, X) -> dict:
         """
-        Predicts the class prevalence using the mean class-conditional
-        probabilities from a probabilistic classifier.
+        Predicts class prevalences using the adjusted threshold.
         Parameters
         ----------
-        X : array-like or sparse matrix of shape (n_samples, n_features)
-            The input data for prediction.
+        X : pd.DataFrame or np.ndarray
+            The input features for prediction.
         Returns
         -------
-        dict
-            A dictionary with class labels as keys and their respective
-            prevalence estimates as values.
-        Notes
-        -----
-        The prevalence is adjusted using the formula:
-            prevalence = |mean_score - FPR| / (TPR - FPR),
-        where mean_score is the average probability for the positive class.
-        Raises
-        ------
-        ZeroDivisionError
-            If `TPR - FPR` equals zero, indicating that the classifier's
-            performance does not vary across the threshold range.
+        np.ndarray
+            An array of predicted prevalences for the classes.
         """
-        prevalences = {}
-        # Calculate probabilities for the positive class
+        # Get predicted probabilities for the positive class
         probabilities = self.predict_learner(X)[:, 1]
+        prevs = []
+        for thr, tpr, fpr in zip(self.threshold, self.tpr, self.fpr):
+            cc_output = len(probabilities[probabilities >= thr]) / len(probabilities)
+            if tpr - fpr == 0:
+                prevalence = cc_output
+            else:
+                prev = np.clip((cc_output - fpr) / (tpr - fpr), 0, 1)
+                prevs.append(prev)
+        prevalence = np.median(prevs)
+        prevalences = [1 - prevalence, prevalence]
-        # Compute the mean score for the positive class
-        mean_scores = np.mean(probabilities)
-        # Adjust prevalence based on TPR and FPR
-        if self.tpr - self.fpr == 0:
-            prevalence = mean_scores
-        else:
-            prevalence = np.clip(abs(mean_scores - self.fpr) / (self.tpr - self.fpr), 0, 1)
-        # Map the computed prevalence to the class labels
-        prevalences[self.classes[0]] = 1 - prevalence
-        prevalences[self.classes[1]] = prevalence
-        return prevalences
-    def best_tprfpr(self, thresholds: np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
-        """
-        Finds the true positive rate (TPR) and false positive rate (FPR)
-        corresponding to the specified decision threshold.
-        Parameters
-        ----------
-        thresholds : np.ndarray
-            An array of threshold values.
-        tprs : np.ndarray
-            An array of true positive rates corresponding to the thresholds.
-        fprs : np.ndarray
-            An array of false positive rates corresponding to the thresholds.
-        Returns
-        -------
-        tuple
-            A tuple containing the specified threshold, TPR, and FPR.
-        Raises
-        ------
-        IndexError
-            If the specified threshold is not found in the `thresholds` array.
-        """
-        # Locate TPR and FPR for the specified threshold
-        tpr = tprs[thresholds == self.threshold][0]
-        fpr = fprs[thresholds == self.threshold][0]
-        return (self.threshold, tpr, fpr)
-    def best_tprfpr(self, thresholds:np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
-        tpr = tprs[thresholds == self.threshold][0]
-        fpr = fprs[thresholds == self.threshold][0]
-        return (self.threshold, tpr, fpr)
+        return np.asarray(prevalences)

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mlquantify
-Version: 0.1.1
+Version: 0.1.3
 Summary: Quantification Library
 Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
 Maintainer: Luiz Fernando Luth Junior

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,4 +1,6 @@
+MANIFEST.in
 README.md
+VERSION.txt
 setup.py
 mlquantify/__init__.py
 mlquantify/base.py

{mlquantify-0.1.1 → mlquantify-0.1.3}/setup.py RENAMED Viewed

@@ -1,20 +1,20 @@
 from setuptools import setup, find_packages
 import pathlib
 here = pathlib.Path(__file__).parent.resolve()
 long_description = (here / 'README.md').read_text(encoding='utf-8')
-VERSION = '0.1.1'
+# Lê a versão do arquivo VERSION.txt
+version_file = here / 'VERSION.txt'
+VERSION = version_file.read_text(encoding='utf-8').strip()
 DESCRIPTION = 'Quantification Library'
-# Setting up
 setup(
     name="mlquantify",
     version=VERSION,
     url="https://github.com/luizfernandolj/QuantifyML/tree/master",
-    maintainer="Luiz Fernando Luth Junior",
+    maintainer="Luiz Fernando Luth Junior",
     description=DESCRIPTION,
     long_description=long_description,
     long_description_content_type="text/markdown",
@@ -30,4 +30,4 @@ setup(
         "Operating System :: MacOS :: MacOS X",
         "Operating System :: Microsoft :: Windows",
     ]
-)
+)

{mlquantify-0.1.1 → mlquantify-0.1.3}/README.md RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/__init__.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/base.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/classification/__init__.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/classification/methods.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/evaluation/__init__.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/evaluation/measures.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/evaluation/protocol.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/methods/__init__.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/methods/meta.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/methods/mixture_models.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/methods/non_aggregative.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/model_selection.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/plots.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/utils/__init__.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/utils/general.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/utils/method.py RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify.egg-info/requires.txt RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify.egg-info/top_level.txt RENAMED Viewed

File without changes

{mlquantify-0.1.1 → mlquantify-0.1.3}/setup.cfg RENAMED Viewed

File without changes

mlquantify 0.1.1__tar.gz → 0.1.3__tar.gz

mlquantify 0.1.1tar.gz → 0.1.3tar.gz