PyPI - mlquantify - Versions diffs - 0.0.11.2__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

mlquantify 0.0.11.2py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

mlquantify/__init__.py +32 -6
mlquantify/base.py +559 -257
mlquantify/classification/__init__.py +1 -1
mlquantify/classification/methods.py +160 -0
mlquantify/evaluation/__init__.py +14 -2
mlquantify/evaluation/measures.py +215 -0
mlquantify/evaluation/protocol.py +647 -0
mlquantify/methods/__init__.py +37 -40
mlquantify/methods/aggregative.py +1030 -0
mlquantify/methods/meta.py +472 -0
mlquantify/methods/mixture_models.py +1003 -0
mlquantify/methods/non_aggregative.py +136 -0
mlquantify/methods/threshold_optimization.py +957 -0
mlquantify/model_selection.py +377 -232
mlquantify/plots.py +367 -0
mlquantify/utils/__init__.py +2 -2
mlquantify/utils/general.py +334 -0
mlquantify/utils/method.py +449 -0
{mlquantify-0.0.11.2.dist-info → mlquantify-0.1.1.dist-info}/METADATA +137 -122
mlquantify-0.1.1.dist-info/RECORD +22 -0
{mlquantify-0.0.11.2.dist-info → mlquantify-0.1.1.dist-info}/WHEEL +1 -1
mlquantify/classification/pwkclf.py +0 -73
mlquantify/evaluation/measures/__init__.py +0 -26
mlquantify/evaluation/measures/ae.py +0 -11
mlquantify/evaluation/measures/bias.py +0 -16
mlquantify/evaluation/measures/kld.py +0 -8
mlquantify/evaluation/measures/mse.py +0 -12
mlquantify/evaluation/measures/nae.py +0 -16
mlquantify/evaluation/measures/nkld.py +0 -13
mlquantify/evaluation/measures/nrae.py +0 -16
mlquantify/evaluation/measures/rae.py +0 -12
mlquantify/evaluation/measures/se.py +0 -12
mlquantify/evaluation/protocol/_Protocol.py +0 -202
mlquantify/evaluation/protocol/__init__.py +0 -2
mlquantify/evaluation/protocol/app.py +0 -146
mlquantify/evaluation/protocol/npp.py +0 -34
mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py +0 -62
mlquantify/methods/aggregative/ThreholdOptm/__init__.py +0 -7
mlquantify/methods/aggregative/ThreholdOptm/acc.py +0 -27
mlquantify/methods/aggregative/ThreholdOptm/max.py +0 -23
mlquantify/methods/aggregative/ThreholdOptm/ms.py +0 -21
mlquantify/methods/aggregative/ThreholdOptm/ms2.py +0 -25
mlquantify/methods/aggregative/ThreholdOptm/pacc.py +0 -41
mlquantify/methods/aggregative/ThreholdOptm/t50.py +0 -21
mlquantify/methods/aggregative/ThreholdOptm/x.py +0 -23
mlquantify/methods/aggregative/__init__.py +0 -9
mlquantify/methods/aggregative/cc.py +0 -32
mlquantify/methods/aggregative/emq.py +0 -86
mlquantify/methods/aggregative/fm.py +0 -72
mlquantify/methods/aggregative/gac.py +0 -96
mlquantify/methods/aggregative/gpac.py +0 -87
mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py +0 -81
mlquantify/methods/aggregative/mixtureModels/__init__.py +0 -5
mlquantify/methods/aggregative/mixtureModels/dys.py +0 -55
mlquantify/methods/aggregative/mixtureModels/dys_syn.py +0 -89
mlquantify/methods/aggregative/mixtureModels/hdy.py +0 -46
mlquantify/methods/aggregative/mixtureModels/smm.py +0 -27
mlquantify/methods/aggregative/mixtureModels/sord.py +0 -77
mlquantify/methods/aggregative/pcc.py +0 -33
mlquantify/methods/aggregative/pwk.py +0 -38
mlquantify/methods/meta/__init__.py +0 -1
mlquantify/methods/meta/ensemble.py +0 -236
mlquantify/methods/non_aggregative/__init__.py +0 -1
mlquantify/methods/non_aggregative/hdx.py +0 -71
mlquantify/plots/__init__.py +0 -2
mlquantify/plots/distribution_plot.py +0 -109
mlquantify/plots/protocol_plot.py +0 -193
mlquantify/utils/general_purposes/__init__.py +0 -8
mlquantify/utils/general_purposes/convert_col_to_array.py +0 -13
mlquantify/utils/general_purposes/generate_artificial_indexes.py +0 -29
mlquantify/utils/general_purposes/get_real_prev.py +0 -9
mlquantify/utils/general_purposes/load_quantifier.py +0 -4
mlquantify/utils/general_purposes/make_prevs.py +0 -23
mlquantify/utils/general_purposes/normalize.py +0 -20
mlquantify/utils/general_purposes/parallel.py +0 -10
mlquantify/utils/general_purposes/round_protocol_df.py +0 -14
mlquantify/utils/method_purposes/__init__.py +0 -6
mlquantify/utils/method_purposes/distances.py +0 -21
mlquantify/utils/method_purposes/getHist.py +0 -13
mlquantify/utils/method_purposes/get_scores.py +0 -33
mlquantify/utils/method_purposes/moss.py +0 -16
mlquantify/utils/method_purposes/ternary_search.py +0 -14
mlquantify/utils/method_purposes/tprfpr.py +0 -42
mlquantify-0.0.11.2.dist-info/RECORD +0 -73
{mlquantify-0.0.11.2.dist-info → mlquantify-0.1.1.dist-info}/top_level.txt +0 -0

mlquantify/classification/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from .~~pwkclf~~ import ~~PWKCLF~~
1	+ from .methods import *

mlquantify/classification/methods.py ADDED Viewed

@@ -0,0 +1,160 @@
+from sklearn.neighbors import NearestNeighbors
+from sklearn.base import BaseEstimator
+import numpy as np
+import pandas as pd
+class PWKCLF(BaseEstimator):
+    """
+    Learner based on k-Nearest Neighbors (KNN) to use in the PWK method.
+    This classifier adjusts the influence of neighbors using class weights
+    derived from the `alpha` parameter. The `alpha` parameter controls the
+    influence of class imbalance.
+    Parameters
+    ----------
+    alpha : float, default=1
+        Controls the influence of class imbalance. Must be >= 1.
+    n_neighbors : int, default=10
+        Number of neighbors to use.
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
+        Algorithm to compute nearest neighbors.
+    metric : str, default='euclidean'
+        Distance metric to use.
+    leaf_size : int, default=30
+        Leaf size passed to the tree-based algorithms.
+    p : int, default=2
+        Power parameter for the Minkowski metric.
+    metric_params : dict, optional
+        Additional keyword arguments for the metric function.
+    n_jobs : int, optional
+        Number of parallel jobs to run for neighbors search.
+    Examples
+    --------
+    >>> from sklearn.datasets import load_breast_cancer
+    >>> from sklearn.model_selection import train_test_split
+    >>> from mlquantify.methods.aggregative import PWK
+    >>> from mlquantify.utils.general import get_real_prev
+    >>> from mlquantify.classification import PWKCLF
+    >>>
+    >>> # Load dataset
+    >>> features, target = load_breast_cancer(return_X_y=True)
+    >>>
+    >>> # Split into training and testing sets
+    >>> X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=32)
+    >>>
+    >>> # Create and configure the PWKCLF learner
+    >>> learner = PWKCLF(alpha=1, n_neighbors=10)
+    >>>
+    >>> # Create the PWK quantifier
+    >>> model = PWK(learner=learner)
+    >>>
+    >>> # Train the model
+    >>> model.fit(X_train, y_train)
+    >>>
+    >>> # Predict prevalences
+    >>> y_pred = model.predict(X_test)
+    >>>
+    >>> # Display results
+    >>> print("Real:", get_real_prev(y_test))
+    >>> print("PWK:", y_pred)
+    """
+    def __init__(self,
+                 alpha=1,
+                 n_neighbors=10,
+                 algorithm="auto",
+                 metric="euclidean",
+                 leaf_size=30,
+                 p=2,
+                 metric_params=None,
+                 n_jobs=None):
+        if alpha < 1:
+            raise ValueError("alpha must not be smaller than 1")
+        self.alpha = alpha
+        self.n_neighbors = n_neighbors
+        self.nbrs = NearestNeighbors(n_neighbors=n_neighbors,
+                                     algorithm=algorithm,
+                                     leaf_size=leaf_size,
+                                     metric=metric,
+                                     p=p,
+                                     metric_params=metric_params,
+                                     n_jobs=n_jobs)
+        self.classes_ = None
+        self.class_to_index = None
+        self.class_weights = None
+        self.y_train = None
+    def fit(self, X, y):
+        """
+        Fit the PWKCLF model to the training data.
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training features.
+        y : array-like of shape (n_samples,)
+            Training labels.
+        Returns
+        -------
+        self : object
+            The fitted instance.
+        """
+        n_samples = X.shape[0]
+        if n_samples < self.n_neighbors:
+            self.nbrs.set_params(n_neighbors=n_samples)
+        self.y_train = y
+        if isinstance(y, pd.DataFrame):
+            self.y_train = y.reset_index(drop=True)
+        unique_classes, class_counts = np.unique(y, return_counts=True)
+        self.classes_ = unique_classes
+        self.class_to_index = dict(zip(self.classes_, range(len(self.classes_))))
+        min_class_count = np.min(class_counts)
+        self.class_weights = (class_counts / min_class_count) ** (-1.0 / self.alpha)
+        self.nbrs.fit(X)
+        return self
+    def predict(self, X):
+        """
+        Predict class labels for samples in X.
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Input data to predict.
+        Returns
+        -------
+        y_pred : array of shape (n_samples,)
+            Predicted class labels.
+        """
+        n_samples = X.shape[0]
+        nn_indices = self.nbrs.kneighbors(X, return_distance=False)
+        CM = np.zeros((n_samples, len(self.classes_)))
+        for i in range(n_samples):
+            for j in nn_indices[i]:
+                CM[i, self.class_to_index[self.y_train[j]]] += 1
+        CM = np.multiply(CM, self.class_weights)
+        predictions = np.apply_along_axis(np.argmax, axis=1, arr=CM)
+        return self.classes_[predictions]

mlquantify/evaluation/__init__.py CHANGED Viewed

@@ -1,2 +1,14 @@
-from .measures import *
-from .protocol import *
+from . import measures
+MEASURES = {
+    "ae": measures.absolute_error,
+    "mae": measures.mean_absolute_error,
+    "nae": measures.normalized_absolute_error,
+    "kld": measures.kullback_leibler_divergence,
+    "nkld": measures.normalized_kullback_leibler_divergence,
+    "nrae": measures.normalized_relative_absolute_error,
+    "rae": measures.relative_absolute_error,
+    "se": measures.squared_error,
+    "mse": measures.mean_squared_error
+}

mlquantify/evaluation/measures.py ADDED Viewed

@@ -0,0 +1,215 @@
+import numpy as np
+def process_inputs(prev_real, prev_pred):
+    """
+    .. :noindex:
+    Process the input data for internal use.
+    """
+    if isinstance(prev_real, dict):
+        prev_real = np.asarray(list(prev_real.values()))
+    if isinstance(prev_pred, dict):
+        prev_pred = np.asarray(list(prev_pred.values()))
+    return prev_real, prev_pred
+def absolute_error(prev_real, prev_pred):
+    """
+    Compute the absolute error for each class or a dictionary of errors if input is a dictionary.
+    Parameters
+    ----------
+    prev_real : array-like or dict
+        True prevalence values for each class. If a dictionary, keys are class names, and values are prevalences.
+    prev_pred : array-like or dict
+        Predicted prevalence values for each class. If a dictionary, keys are class names, and values are prevalences.
+    Returns
+    -------
+    error : array-like or dict
+        Absolute error for each class. If input is a dictionary, returns a dictionary with errors for each class.
+    """
+    if isinstance(prev_real, dict):
+        classes = prev_real.keys()
+        prev_real, prev_pred = process_inputs(prev_real, prev_pred)
+        abs_errors = np.abs(prev_pred - prev_real)
+        return {class_: float(err) for class_, err in zip(classes, abs_errors)}
+    prev_real, prev_pred = process_inputs(prev_real, prev_pred)
+    return np.abs(prev_pred - prev_real)
+def mean_absolute_error(prev_real, prev_pred):
+    """
+    Compute the mean absolute error between the real and predicted prevalences.
+    Parameters
+    ----------
+    prev_real : array-like of shape (n_classes,)
+        True prevalence values for each class.
+    prev_pred : array-like of shape (n_classes,)
+        Predicted prevalence values for each class.
+    Returns
+    -------
+    error : float
+        Mean absolute error across all classes.
+    """
+    prev_real, prev_pred = process_inputs(prev_real, prev_pred)
+    return np.mean(absolute_error(prev_real, prev_pred))
+def kullback_leibler_divergence(prev_real, prev_pred):
+    """
+    Compute the Kullback-Leibler divergence between the real and predicted prevalences.
+    Parameters
+    ----------
+    prev_real : array-like of shape (n_classes,)
+        True prevalence values for each class.
+    prev_pred : array-like of shape (n_classes,)
+        Predicted prevalence values for each class.
+    Returns
+    -------
+    divergence : array-like of shape (n_classes,)
+        Kullback-Leibler divergence for each class.
+    """
+    prev_real, prev_pred = process_inputs(prev_real, prev_pred)
+    return prev_real * np.abs(np.log(prev_real / prev_pred))
+def squared_error(prev_real, prev_pred):
+    """
+    Compute the mean squared error between the real and predicted prevalences.
+    Parameters
+    ----------
+    prev_real : array-like of shape (n_classes,)
+        True prevalence values for each class.
+    prev_pred : array-like of shape (n_classes,)
+        Predicted prevalence values for each class.
+    Returns
+    -------
+    error : float
+        Mean squared error across all classes.
+    """
+    prev_real, prev_pred = process_inputs(prev_real, prev_pred)
+    return np.mean((prev_pred - prev_real) ** 2, axis=-1)
+def mean_squared_error(prev_real, prev_pred):
+    """
+    Compute the mean squared error across all classes.
+    Parameters
+    ----------
+    prev_real : array-like of shape (n_classes,)
+        True prevalence values for each class.
+    prev_pred : array-like of shape (n_classes,)
+        Predicted prevalence values for each class.
+    Returns
+    -------
+    mse : float
+        Mean squared error across all classes.
+    """
+    prev_real, prev_pred = process_inputs(prev_real, prev_pred)
+    return squared_error(prev_real, prev_pred).mean()
+def normalized_absolute_error(prev_real, prev_pred):
+    """
+    Compute the normalized absolute error between the real and predicted prevalences.
+    Parameters
+    ----------
+    prev_real : array-like of shape (n_classes,)
+        True prevalence values for each class.
+    prev_pred : array-like of shape (n_classes,)
+        Predicted prevalence values for each class.
+    Returns
+    -------
+    error : float
+        Normalized absolute error across all classes.
+    """
+    prev_real, prev_pred = process_inputs(prev_real, prev_pred)
+    abs_error = mean_absolute_error(prev_real, prev_pred)
+    z_abs_error = 2 * (1 - np.min(prev_real))
+    return abs_error / z_abs_error
+def normalized_kullback_leibler_divergence(prev_real, prev_pred):
+    """
+    Compute the normalized Kullback-Leibler divergence between the real and predicted prevalences.
+    Parameters
+    ----------
+    prev_real : array-like of shape (n_classes,)
+        True prevalence values for each class.
+    prev_pred : array-like of shape (n_classes,)
+        Predicted prevalence values for each class.
+    Returns
+    -------
+    divergence : float
+        Normalized Kullback-Leibler divergence across all classes.
+    """
+    prev_real, prev_pred = process_inputs(prev_real, prev_pred)
+    kl_divergence = kullback_leibler_divergence(prev_real, prev_pred)
+    euler = np.exp(kl_divergence)
+    return 2 * (euler / (euler + 1)) - 1
+def relative_absolute_error(prev_real, prev_pred):
+    """
+    Compute the relative absolute error between the real and predicted prevalences.
+    Parameters
+    ----------
+    prev_real : array-like of shape (n_classes,)
+        True prevalence values for each class.
+    prev_pred : array-like of shape (n_classes,)
+        Predicted prevalence values for each class.
+    Returns
+    -------
+    error : float
+        Relative absolute error across all classes.
+    """
+    prev_real, prev_pred = process_inputs(prev_real, prev_pred)
+    return (mean_absolute_error(prev_real, prev_pred) / prev_real).mean(axis=-1)
+def normalized_relative_absolute_error(prev_real, prev_pred):
+    """
+    Compute the normalized relative absolute error between the real and predicted prevalences.
+    Parameters
+    ----------
+    prev_real : array-like of shape (n_classes,)
+        True prevalence values for each class.
+    prev_pred : array-like of shape (n_classes,)
+        Predicted prevalence values for each class.
+    Returns
+    -------
+    error : float
+        Normalized relative absolute error across all classes.
+    """
+    prev_real, prev_pred = process_inputs(prev_real, prev_pred)
+    relative = relative_absolute_error(prev_real, prev_pred)
+    z_relative = (len(prev_real) - 1 + ((1 - np.min(prev_real)) / np.min(prev_real))) / len(prev_real)
+    return relative / z_relative

mlquantify 0.0.11.2__py3-none-any.whl → 0.1.1__py3-none-any.whl

mlquantify 0.0.11.2py3-none-any.whl → 0.1.1py3-none-any.whl