PyPI - mlquantify - Versions diffs - 0.0.1__py3-none-any.whl - Mend

mlquantify 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

mlquantify/__init__.py +6 -0
mlquantify/base.py +256 -0
mlquantify/classification/__init__.py +1 -0
mlquantify/classification/pwkclf.py +73 -0
mlquantify/evaluation/__init__.py +2 -0
mlquantify/evaluation/measures/__init__.py +26 -0
mlquantify/evaluation/measures/ae.py +11 -0
mlquantify/evaluation/measures/bias.py +16 -0
mlquantify/evaluation/measures/kld.py +8 -0
mlquantify/evaluation/measures/mse.py +12 -0
mlquantify/evaluation/measures/nae.py +16 -0
mlquantify/evaluation/measures/nkld.py +13 -0
mlquantify/evaluation/measures/nrae.py +16 -0
mlquantify/evaluation/measures/rae.py +12 -0
mlquantify/evaluation/measures/se.py +12 -0
mlquantify/evaluation/protocol/_Protocol.py +202 -0
mlquantify/evaluation/protocol/__init__.py +2 -0
mlquantify/evaluation/protocol/app.py +146 -0
mlquantify/evaluation/protocol/npp.py +34 -0
mlquantify/methods/__init__.py +40 -0
mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py +62 -0
mlquantify/methods/aggregative/ThreholdOptm/__init__.py +7 -0
mlquantify/methods/aggregative/ThreholdOptm/acc.py +27 -0
mlquantify/methods/aggregative/ThreholdOptm/max.py +23 -0
mlquantify/methods/aggregative/ThreholdOptm/ms.py +21 -0
mlquantify/methods/aggregative/ThreholdOptm/ms2.py +25 -0
mlquantify/methods/aggregative/ThreholdOptm/pacc.py +41 -0
mlquantify/methods/aggregative/ThreholdOptm/t50.py +21 -0
mlquantify/methods/aggregative/ThreholdOptm/x.py +23 -0
mlquantify/methods/aggregative/__init__.py +9 -0
mlquantify/methods/aggregative/cc.py +32 -0
mlquantify/methods/aggregative/emq.py +86 -0
mlquantify/methods/aggregative/fm.py +72 -0
mlquantify/methods/aggregative/gac.py +96 -0
mlquantify/methods/aggregative/gpac.py +87 -0
mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py +81 -0
mlquantify/methods/aggregative/mixtureModels/__init__.py +5 -0
mlquantify/methods/aggregative/mixtureModels/dys.py +55 -0
mlquantify/methods/aggregative/mixtureModels/dys_syn.py +89 -0
mlquantify/methods/aggregative/mixtureModels/hdy.py +46 -0
mlquantify/methods/aggregative/mixtureModels/smm.py +27 -0
mlquantify/methods/aggregative/mixtureModels/sord.py +77 -0
mlquantify/methods/aggregative/pcc.py +33 -0
mlquantify/methods/aggregative/pwk.py +38 -0
mlquantify/methods/meta/__init__.py +1 -0
mlquantify/methods/meta/ensemble.py +236 -0
mlquantify/methods/non_aggregative/__init__.py +1 -0
mlquantify/methods/non_aggregative/hdx.py +71 -0
mlquantify/model_selection.py +232 -0
mlquantify/plots/__init__.py +2 -0
mlquantify/plots/distribution_plot.py +109 -0
mlquantify/plots/protocol_plot.py +157 -0
mlquantify/utils/__init__.py +2 -0
mlquantify/utils/general_purposes/__init__.py +8 -0
mlquantify/utils/general_purposes/convert_col_to_array.py +13 -0
mlquantify/utils/general_purposes/generate_artificial_indexes.py +29 -0
mlquantify/utils/general_purposes/get_real_prev.py +9 -0
mlquantify/utils/general_purposes/load_quantifier.py +4 -0
mlquantify/utils/general_purposes/make_prevs.py +23 -0
mlquantify/utils/general_purposes/normalize.py +20 -0
mlquantify/utils/general_purposes/parallel.py +10 -0
mlquantify/utils/general_purposes/round_protocol_df.py +14 -0
mlquantify/utils/method_purposes/__init__.py +6 -0
mlquantify/utils/method_purposes/distances.py +21 -0
mlquantify/utils/method_purposes/getHist.py +13 -0
mlquantify/utils/method_purposes/get_scores.py +33 -0
mlquantify/utils/method_purposes/moss.py +16 -0
mlquantify/utils/method_purposes/ternary_search.py +14 -0
mlquantify/utils/method_purposes/tprfpr.py +42 -0
mlquantify-0.0.1.dist-info/METADATA +23 -0
mlquantify-0.0.1.dist-info/RECORD +73 -0
mlquantify-0.0.1.dist-info/WHEEL +5 -0
mlquantify-0.0.1.dist-info/top_level.txt +1 -0

mlquantify/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from .classification import *
+from .evaluation import *
+from .methods import *
+from .utils import *
+from .plots import *
+from .model_selection import GridSearchQ

mlquantify/base.py ADDED Viewed

@@ -0,0 +1,256 @@
+from abc import abstractmethod, ABC
+from sklearn.base import BaseEstimator
+from copy import deepcopy
+import numpy as np
+import joblib
+from .utils import parallel, normalize_prevalence
+class Quantifier(ABC, BaseEstimator):
+    """ Abstract Class for quantifiers."""
+    @abstractmethod
+    def fit(self, X, y) -> object: ...
+    @abstractmethod
+    def predict(self, X) -> dict: ...
+    @property
+    def classes(self) -> list:
+        return self._classes
+    @classes.setter
+    def classes(self, classes):
+        self._classes = sorted(list(classes))
+    @property
+    def n_class(self) -> list:
+        return len(self._classes)
+    @property
+    def multiclass_method(self) -> bool:
+        return True
+    @property
+    def binary_data(self) -> bool:
+        return len(self._classes) == 2
+    def save_quantifier(self, path: str=None) -> None:
+        if not path:
+            path = f"{self.__class__.__name__}.joblib"
+        joblib.dump(self, path)
+class AggregativeQuantifier(Quantifier, ABC):
+    """Abstract class for all Aggregative quantifiers, it means that each one of the quantifiers,
+     uses a learner or possibly a classifier to generate predictions.
+     This class is mostly used to detect whether or not its a binary or multiclass problem, and doing
+     One-Vs-All in case of multiclass dataset and not multiclass quantifier method.
+    """
+    def __init__(self):
+        # Dictionary to hold binary quantifiers for each class.
+        self.binary_quantifiers = {}
+        self.learner_fitted = False
+        self.cv_folds = 10
+    def fit(self, X, y, learner_fitted=False, cv_folds: int = 10, n_jobs:int=1):
+        """Fit the quantifier model.
+        Args:
+            X (array-like): Training features.
+            y (array-like): Training labels.
+            learner_fitted (bool, optional): Whether the learner is already fitted. Defaults to False.
+            cv_folds (int, optional): Number of cross-validation folds. Defaults to 10.
+        Returns:
+            self: Fitted quantifier.
+        """
+        self.n_jobs = n_jobs
+        self.learner_fitted = learner_fitted
+        self.cv_folds = cv_folds
+        self.classes = np.unique(y)
+        if self.binary_data or self.multiclass_method:
+            return self._fit_method(X, y)
+        # Making one vs all
+        self.binary_quantifiers = {class_: deepcopy(self) for class_ in self.classes}
+        parallel(self.delayed_fit, self.classes, self.n_jobs, X, y)
+        return self
+    def predict(self, X) -> dict:
+        """Predict class prevalences for the given data.
+        Args:
+            X (array-like): Test features.
+        Returns:
+            dict: Dictionary with class prevalences.
+        """
+        if self.binary_data or self.multiclass_method:
+            prevalences = self._predict_method(X)
+            return normalize_prevalence(prevalences, self.classes)
+        # Making one vs all
+        prevalences = np.asarray(parallel(self.delayed_predict, self.classes, self.n_jobs, X))
+        return normalize_prevalence(prevalences, self.classes)
+    @abstractmethod
+    def _fit_method(self, X, y):
+        """Abstract fit method that each quantification method must implement.
+        Args:
+            X (array-like): Training features.
+            y (array-like): Training labels.
+            learner_fitted (bool): Whether the learner is already fitted.
+            cv_folds (int): Number of cross-validation folds.
+        """
+        ...
+    @abstractmethod
+    def _predict_method(self, X) -> dict:
+        """Abstract predict method that each quantification method must implement.
+        Args:
+            X (array-like): Test data to generate class prevalences.
+        Returns:
+            dict: Dictionary with class:prevalence for each class.
+        """
+        ...
+    @property
+    def learner(self):
+        return self.learner_
+    @learner.setter
+    def learner(self, value):
+        self.learner_ = value
+    def get_params(self, deep=True):
+        return self.learner.get_params()
+    def set_params(self, **params):
+        # Model Params
+        for key, value in params.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+        # Learner Params
+        if self.learner:
+            learner_params = {k.replace('learner__', ''): v for k, v in params.items() if 'learner__' in k}
+            if learner_params:
+                self.learner.set_params(**learner_params)
+        return self
+    # MULTICLASS METHODS
+    def delayed_fit(self, class_, X, y):
+        """Delayed fit method for one-vs-all strategy, with parallel running.
+        Args:
+            class_ (Any): The class for which the model is being fitted.
+            X (array-like): Training features.
+            y (array-like): Training labels.
+            learner_fitted (bool): Whether the learner is already fitted.
+            cv_folds (int): Number of cross-validation folds.
+        Returns:
+            self: Fitted binary quantifier for the given class.
+        """
+        y_class = (y == class_).astype(int)
+        return self.binary_quantifiers[class_].fit(X, y_class)
+    def delayed_predict(self, class_, X):
+        """Delayed predict method for one-vs-all strategy, with parallel running.
+        Args:
+            class_ (Any): The class for which the model is making predictions.
+            X (array-like): Test features.
+        Returns:
+            float: Predicted prevalence for the given class.
+        """
+        return self.binary_quantifiers[class_].predict(X)[1]
+class NonAggregativeQuantifier(Quantifier):
+    """Abstract class for Non Aggregative quantifiers, it means that
+    theses methods does not use a classifier or specift learner on it's
+    predictions.
+    """
+    def fit(self, X, y, n_jobs:int=1):
+        """Fit the quantifier model.
+        Args:
+            X (array-like): Training features.
+            y (array-like): Training labels.
+            learner_fitted (bool, optional): Whether the learner is already fitted. Defaults to False.
+            cv_folds (int, optional): Number of cross-validation folds. Defaults to 10.
+        Returns:
+            self: Fitted quantifier.
+        """
+        self.n_jobs = n_jobs
+        self.classes = np.unique(y)
+        if self.binary_data or self.multiclass_method:
+            return self._fit_method(X, y)
+        # Making one vs all
+        self.binary_quantifiers = {class_: deepcopy(self) for class_ in self.classes}
+        parallel(self.delayed_fit, self.classes, self.n_jobs, X, y)
+        return self
+    def predict(self, X) -> dict:
+        """Predict class prevalences for the given data.
+        Args:
+            X (array-like): Test features.
+        Returns:
+            dict: Dictionary with class prevalences.
+        """
+        if self.binary_data or self.multiclass_method:
+            prevalences = self._predict_method(X)
+            return normalize_prevalence(prevalences, self.classes)
+        # Making one vs all
+        prevalences = np.asarray(parallel(self.delayed_predict, self.classes, self.n_jobs, X))
+        return normalize_prevalence(prevalences, self.classes)
+    @abstractmethod
+    def _fit_method(self, X, y):
+        """Abstract fit method that each quantification method must implement.
+        Args:
+            X (array-like): Training features.
+            y (array-like): Training labels.
+            learner_fitted (bool): Whether the learner is already fitted.
+            cv_folds (int): Number of cross-validation folds.
+        """
+        ...
+    @abstractmethod
+    def _predict_method(self, X) -> dict:
+        """Abstract predict method that each quantification method must implement.
+        Args:
+            X (array-like): Test data to generate class prevalences.
+        Returns:
+            dict: Dictionary with class:prevalence for each class.
+        """
+        ...

mlquantify/classification/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .pwkclf import PWKCLF

mlquantify/classification/pwkclf.py ADDED Viewed

@@ -0,0 +1,73 @@
+from sklearn.neighbors import NearestNeighbors
+from sklearn.base import BaseEstimator
+import numpy as np
+import pandas as pd
+class PWKCLF(BaseEstimator):
+    """Learner based on k-Nearest Neighborst (KNN) to use on the method PWK,
+    that also is based on KNN.
+    """
+    def __init__(self,
+                 alpha=1,
+                 n_neighbors=10,
+                 algorithm="auto",
+                 metric="euclidean",
+                 leaf_size=30,
+                 p=2,
+                 metric_params=None,
+                 n_jobs=None):
+        if alpha < 1:
+            raise ValueError("alpha must not be smaller than 1")
+        self.alpha = alpha
+        self.n_neighbors = n_neighbors
+        self.nbrs = NearestNeighbors(n_neighbors=n_neighbors,
+                                     algorithm=algorithm,
+                                     leaf_size=leaf_size,
+                                     metric=metric,
+                                     p=p,
+                                     metric_params=metric_params,
+                                     n_jobs=n_jobs)
+        self.Y = None
+        self.Y_map = None
+        self.w = None
+        self.y = None
+    def fit(self, X, y):
+        n_samples = X.shape[0]
+        if n_samples < self.n_neighbors:
+            self.nbrs.set_params(n_neighbors=n_samples)
+        self.y = y
+        if isinstance(y, pd.DataFrame):
+            self.y = y.reset_index(drop=True)
+        Y_cts = np.unique(y, return_counts=True)
+        self.Y = Y_cts[0]
+        self.Y_map = dict(zip(self.Y, range(len(self.Y))))
+        min_class_count = np.min(Y_cts[1])
+        self.w = (Y_cts[1] / min_class_count) ** (-1.0 / self.alpha)
+        self.nbrs.fit(X)
+        return self
+    def predict(self, X):
+        n_samples = X.shape[0]
+        nn_indices = self.nbrs.kneighbors(X, return_distance=False)
+        CM = np.zeros((n_samples, len(self.Y)))
+        for i in range(n_samples):
+            for j in nn_indices[i]:
+                CM[i, self.Y_map[self.y[j]]] += 1
+        CM = np.multiply(CM, self.w)
+        predictions = np.apply_along_axis(np.argmax, axis=1, arr=CM)
+        return self.Y[predictions]

mlquantify/evaluation/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from .measures import *
2	+ from .protocol import *

mlquantify/evaluation/measures/__init__.py ADDED Viewed

@@ -0,0 +1,26 @@
+from .ae import absolute_error
+from .kld import kullback_leibler_divergence
+from .nkld import normalized_kullback_leibler_divergence
+from .rae import relative_absolute_error
+from .nae import normalized_absolute_error
+from .bias import bias
+from .nrae import normalized_relative_absolute_error
+from .se import squared_error
+from .mse import mean_squared_error
+MEASURES = {
+    "ae": absolute_error,
+    "nae": normalized_absolute_error,
+    "kld": kullback_leibler_divergence,
+    "nkld": normalized_kullback_leibler_divergence,
+    "nrae": normalized_relative_absolute_error,
+    "rae": relative_absolute_error,
+    "se": squared_error,
+    "mse": mean_squared_error
+}
+def get_measure(measure:str):
+    return MEASURES.get(measure)

mlquantify/evaluation/measures/ae.py ADDED Viewed

@@ -0,0 +1,11 @@
+import numpy as np
+def absolute_error(prev_real:np.any, prev_pred:np.any):
+    if isinstance(prev_real, dict):
+        prev_real = np.asarray(list(prev_real.values()))
+    if isinstance(prev_pred, dict):
+        prev_pred = np.asarray(list(prev_pred.values()))
+    abs_error = abs(prev_pred - prev_real).mean(axis=-1)
+    return abs_error

mlquantify/evaluation/measures/bias.py ADDED Viewed

@@ -0,0 +1,16 @@
+import numpy as np
+def bias(prev_real:np.any, prev_pred:np.any):
+    classes = None
+    if isinstance(prev_real, dict):
+        classes = prev_real.keys()
+        prev_real = np.asarray(list(prev_real.values()))
+    if isinstance(prev_pred, dict):
+        prev_pred = np.asarray(list(prev_pred.values()))
+    abs_errors = abs(prev_pred - prev_real)
+    if classes:
+        return {class_:abs_error for class_, abs_error in zip(classes, abs_errors)}
+    return abs_errors

mlquantify/evaluation/measures/kld.py ADDED Viewed

@@ -0,0 +1,8 @@
+import numpy as np
+def kullback_leibler_divergence(prev_real:np.any, prev_pred:np.any):
+    if isinstance(prev_real, dict):
+        prev_real = np.asarray(list(prev_real.values()))
+    if isinstance(prev_pred, dict):
+        prev_pred = np.asarray(list(prev_pred.values()))
+    return prev_real * abs(np.log((prev_real / prev_pred)))

mlquantify/evaluation/measures/mse.py ADDED Viewed

@@ -0,0 +1,12 @@
+import numpy as np
+from .se import squared_error
+def mean_squared_error(prev_real:np.any, prev_pred:np.any):
+    if isinstance(prev_real, dict):
+        prev_real = np.asarray(list(prev_real.values()))
+    if isinstance(prev_pred, dict):
+        prev_pred = np.asarray(list(prev_pred.values()))
+    mean_sq_error = squared_error(prev_real, prev_pred).mean()
+    return mean_sq_error

mlquantify/evaluation/measures/nae.py ADDED Viewed

@@ -0,0 +1,16 @@
+import numpy as np
+from .ae import absolute_error
+def normalized_absolute_error(prev_real:np.any, prev_pred:np.any):
+    if isinstance(prev_real, dict):
+        prev_real = np.asarray(list(prev_real.values()))
+    if isinstance(prev_pred, dict):
+        prev_pred = np.asarray(list(prev_pred.values()))
+    abs_error = absolute_error(prev_real, prev_pred)
+    z_abs_error = (2 * (1 - min(prev_real)))
+    normalized = abs_error / z_abs_error
+    return normalized

mlquantify/evaluation/measures/nkld.py ADDED Viewed

@@ -0,0 +1,13 @@
+import numpy as np
+from .kld import kullback_leibler_divergence
+def normalized_kullback_leibler_divergence(prev_real:np.any, prev_pred:np.any):
+    if isinstance(prev_real, dict):
+        prev_real = np.asarray(list(prev_real.values()))
+    if isinstance(prev_pred, dict):
+        prev_pred = np.asarray(list(prev_pred.values()))
+    euler = np.exp(kullback_leibler_divergence(prev_real, prev_pred))
+    normalized = 2 * (euler / (euler + 1)) - 1
+    return normalized

mlquantify/evaluation/measures/nrae.py ADDED Viewed

@@ -0,0 +1,16 @@
+import numpy as np
+from .rae import relative_absolute_error
+def normalized_relative_absolute_error(prev_real:np.any, prev_pred:np.any):
+    if isinstance(prev_real, dict):
+        prev_real = np.asarray(list(prev_real.values()))
+    if isinstance(prev_pred, dict):
+        prev_pred = np.asarray(list(prev_pred.values()))
+    relative = relative_absolute_error(prev_real, prev_pred)
+    z_relative = (len(prev_real) - 1 + ((1 - min(prev_real)) / min(prev_real))) / len(prev_real)
+    normalized = relative/z_relative
+    return normalized

mlquantify/evaluation/measures/rae.py ADDED Viewed

@@ -0,0 +1,12 @@
+import numpy as np
+from . import absolute_error
+def relative_absolute_error(prev_real:np.any, prev_pred:np.any):
+    if isinstance(prev_real, dict):
+        prev_real = np.asarray(list(prev_real.values()))
+    if isinstance(prev_pred, dict):
+        prev_pred = np.asarray(list(prev_pred.values()))
+    relative = (absolute_error(prev_real, prev_pred) / prev_real).mean(axis=-1)
+    return relative

mlquantify/evaluation/measures/se.py ADDED Viewed

@@ -0,0 +1,12 @@
+import numpy as np
+from .ae import absolute_error
+def squared_error(prev_real:np.any, prev_pred:np.any):
+    if isinstance(prev_real, dict):
+        prev_real = np.asarray(list(prev_real.values()))
+    if isinstance(prev_pred, dict):
+        prev_pred = np.asarray(list(prev_pred.values()))
+    sq_abs_error = ((prev_pred - prev_real) ** 2).mean(axis=-1)
+    return sq_abs_error