PyPI - dataeval - Versions diffs - 0.61.0__py3-none-any.whl → 0.63.0__py3-none-any.whl - Mend

dataeval 0.61.0py3-none-any.whl → 0.63.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

dataeval/__init__.py +1 -1
dataeval/_internal/detectors/clusterer.py +44 -16
dataeval/_internal/detectors/drift/base.py +14 -12
dataeval/_internal/detectors/drift/cvm.py +11 -8
dataeval/_internal/detectors/drift/ks.py +6 -3
dataeval/_internal/detectors/drift/mmd.py +14 -12
dataeval/_internal/detectors/drift/uncertainty.py +7 -5
dataeval/_internal/detectors/duplicates.py +35 -12
dataeval/_internal/detectors/linter.py +85 -16
dataeval/_internal/detectors/ood/ae.py +6 -5
dataeval/_internal/detectors/ood/aegmm.py +5 -5
dataeval/_internal/detectors/ood/base.py +14 -13
dataeval/_internal/detectors/ood/llr.py +6 -4
dataeval/_internal/detectors/ood/vae.py +5 -4
dataeval/_internal/detectors/ood/vaegmm.py +5 -4
dataeval/_internal/functional/__init__.py +0 -0
dataeval/_internal/functional/ber.py +63 -0
dataeval/_internal/functional/coverage.py +75 -0
dataeval/_internal/functional/divergence.py +16 -0
dataeval/_internal/{metrics → functional}/hash.py +1 -1
dataeval/_internal/functional/metadata.py +136 -0
dataeval/_internal/functional/metadataparity.py +190 -0
dataeval/_internal/functional/uap.py +6 -0
dataeval/_internal/interop.py +52 -0
dataeval/_internal/maite/__init__.py +0 -0
dataeval/_internal/maite/utils.py +30 -0
dataeval/_internal/metrics/base.py +2 -2
dataeval/_internal/metrics/ber.py +16 -66
dataeval/_internal/metrics/coverage.py +51 -35
dataeval/_internal/metrics/divergence.py +50 -42
dataeval/_internal/metrics/metadata.py +610 -0
dataeval/_internal/metrics/metadataparity.py +67 -0
dataeval/_internal/metrics/parity.py +40 -56
dataeval/_internal/metrics/stats.py +46 -35
dataeval/_internal/metrics/uap.py +14 -17
dataeval/_internal/workflows/__init__.py +0 -0
dataeval/metrics/__init__.py +2 -1
{dataeval-0.61.0.dist-info → dataeval-0.63.0.dist-info}/METADATA +1 -2
dataeval-0.63.0.dist-info/RECORD +68 -0
dataeval-0.61.0.dist-info/RECORD +0 -55
/dataeval/_internal/{metrics → functional}/utils.py +0 -0
{dataeval-0.61.0.dist-info → dataeval-0.63.0.dist-info}/LICENSE.txt +0 -0
{dataeval-0.61.0.dist-info → dataeval-0.63.0.dist-info}/WHEEL +0 -0

dataeval/_internal/metrics/divergence.py CHANGED Viewed

@@ -7,46 +7,23 @@ from typing import Any, Callable, Dict, Literal
 import numpy as np
+from dataeval._internal.functional.divergence import divergence_fnn, divergence_mst
+from dataeval._internal.interop import ArrayLike, to_numpy
 from dataeval._internal.metrics.base import EvaluateMixin, MethodsMixin
-from .utils import compute_neighbors, minimum_spanning_tree
-def _mst(data: np.ndarray, labels: np.ndarray) -> int:
-    mst = minimum_spanning_tree(data).toarray()
-    edgelist = np.transpose(np.nonzero(mst))
-    errors = np.sum(labels[edgelist[:, 0]] != labels[edgelist[:, 1]])
-    return errors
-def _fnn(data: np.ndarray, labels: np.ndarray) -> int:
-    nn_indices = compute_neighbors(data, data)
-    errors = np.sum(np.abs(labels[nn_indices] - labels))
-    return errors
 _METHODS = Literal["MST", "FNN"]
 _FUNCTION = Callable[[np.ndarray, np.ndarray], int]
 class Divergence(EvaluateMixin, MethodsMixin[_METHODS, _FUNCTION]):
     """
-    Calculates the estimated divergence between two datasets
+    Calculates the estimated HP divergence between two datasets
     Parameters
     ----------
-    data_a : np.ndarray
-        Array of images or image embeddings to compare
-    data_b : np.ndarray
-        Array of images or image embeddings to compare
     method : Literal["MST, "FNN"], default "MST"
         Method used to estimate dataset divergence
-    See Also
-    --------
-        For more information about this divergence, its formal definition,
-        and its associated estimators see https://arxiv.org/abs/1412.6534.
     Warning
     -------
         MST is very slow in this implementation, this is unlike matlab where
@@ -55,38 +32,69 @@ class Divergence(EvaluateMixin, MethodsMixin[_METHODS, _FUNCTION]):
         Source of slowdown:
         conversion to and from CSR format adds ~10% of the time diff between
         1nn and scipy mst function the remaining 90%
+    References
+    ----------
+    For more information about this divergence, its formal definition,
+    and its associated estimators see https://arxiv.org/abs/1412.6534.
+    Examples
+    --------
+    Initialize the Divergence class:
+    >>> divert = Divergence()
+    Specify the method:
+    >>> divert = Divergence(method="FNN")
     """
-    def __init__(
-        self,
-        data_a: np.ndarray,
-        data_b: np.ndarray,
-        method: _METHODS = "MST",
-    ) -> None:
-        self.data_a = data_a
-        self.data_b = data_b
+    def __init__(self, method: _METHODS = "MST") -> None:
         self._set_method(method)
     @classmethod
     def _methods(cls) -> Dict[str, _FUNCTION]:
-        return {"FNN": _fnn, "MST": _mst}
+        return {"FNN": divergence_fnn, "MST": divergence_mst}
-    def evaluate(self) -> Dict[str, Any]:
+    def evaluate(self, data_a: ArrayLike, data_b: ArrayLike) -> Dict[str, Any]:
         """
         Calculates the divergence and any errors between the datasets
+        Parameters
+        ----------
+        data_a : ArrayLike, shape - (N, P)
+            A dataset in an ArrayLike format to compare.
+            Function expects the data to have 2 dimensions, N number of observations in a P-dimesionial space.
+        data_b : ArrayLike, shape - (N, P)
+            A dataset in an ArrayLike format to compare.
+            Function expects the data to have 2 dimensions, N number of observations in a P-dimesionial space.
         Returns
         -------
         Dict[str, Any]
-            dp : float
+            divergence : float
                 divergence value between 0.0 and 1.0
-            errors : int
-                the number of differing edges
+            error : int
+                the number of differing edges between the datasets
+        Notes
+        -----
+        The divergence value indicates how similar the 2 datasets are
+        with 0 indicating approximately identical data distributions.
+        Examples
+        --------
+        Evaluate the datasets:
+        >>> divert.evaluate(datasetA, datasetB)
+        {'divergence': 0.28, 'error': 36.0}
         """
-        N = self.data_a.shape[0]
-        M = self.data_b.shape[0]
+        a = to_numpy(data_a)
+        b = to_numpy(data_b)
+        N = a.shape[0]
+        M = b.shape[0]
-        stacked_data = np.vstack((self.data_a, self.data_b))
+        stacked_data = np.vstack((a, b))
         labels = np.vstack([np.zeros([N, 1]), np.ones([M, 1])])
         errors = self._method(stacked_data, labels)

dataeval 0.61.0__py3-none-any.whl → 0.63.0__py3-none-any.whl

dataeval 0.61.0py3-none-any.whl → 0.63.0py3-none-any.whl