PyPI - dataeval - Versions diffs - 0.72.0__py3-none-any.whl → 0.72.2__py3-none-any.whl - Mend

dataeval 0.72.0py3-none-any.whl → 0.72.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

dataeval/__init__.py +4 -4
dataeval/detectors/__init__.py +4 -3
dataeval/detectors/drift/__init__.py +10 -11
dataeval/{_internal/detectors → detectors}/drift/base.py +51 -102
dataeval/{_internal/detectors → detectors}/drift/cvm.py +9 -8
dataeval/{_internal/detectors → detectors}/drift/ks.py +11 -10
dataeval/{_internal/detectors → detectors}/drift/mmd.py +33 -34
dataeval/{_internal/detectors → detectors}/drift/torch.py +15 -13
dataeval/{_internal/detectors → detectors}/drift/uncertainty.py +12 -9
dataeval/detectors/drift/updates.py +61 -0
dataeval/detectors/linters/__init__.py +3 -3
dataeval/{_internal/detectors → detectors/linters}/clusterer.py +47 -45
dataeval/{_internal/detectors → detectors/linters}/duplicates.py +20 -10
dataeval/{_internal/detectors → detectors/linters}/merged_stats.py +3 -1
dataeval/{_internal/detectors → detectors/linters}/outliers.py +19 -26
dataeval/detectors/ood/__init__.py +8 -16
dataeval/{_internal/detectors → detectors}/ood/ae.py +9 -9
dataeval/{_internal/detectors → detectors}/ood/aegmm.py +10 -30
dataeval/{_internal/detectors → detectors}/ood/base.py +27 -21
dataeval/{_internal/detectors → detectors}/ood/llr.py +27 -23
dataeval/detectors/ood/metadata_ks_compare.py +99 -0
dataeval/detectors/ood/metadata_least_likely.py +119 -0
dataeval/detectors/ood/metadata_ood_mi.py +92 -0
dataeval/{_internal/detectors → detectors}/ood/vae.py +11 -13
dataeval/{_internal/detectors → detectors}/ood/vaegmm.py +10 -32
dataeval/{_internal/interop.py → interop.py} +12 -7
dataeval/metrics/__init__.py +1 -1
dataeval/metrics/bias/__init__.py +4 -4
dataeval/{_internal/metrics → metrics/bias}/balance.py +70 -4
dataeval/{_internal/metrics → metrics/bias}/coverage.py +10 -8
dataeval/{_internal/metrics → metrics/bias}/diversity.py +54 -20
dataeval/metrics/bias/metadata.py +275 -0
dataeval/{_internal/metrics → metrics/bias}/parity.py +21 -17
dataeval/metrics/estimators/__init__.py +3 -3
dataeval/{_internal/metrics → metrics/estimators}/ber.py +31 -28
dataeval/{_internal/metrics → metrics/estimators}/divergence.py +15 -16
dataeval/{_internal/metrics → metrics/estimators}/uap.py +8 -6
dataeval/metrics/stats/__init__.py +7 -7
dataeval/{_internal/metrics → metrics}/stats/base.py +66 -40
dataeval/{_internal/metrics → metrics}/stats/boxratiostats.py +19 -15
dataeval/{_internal/metrics → metrics}/stats/datasetstats.py +19 -17
dataeval/{_internal/metrics → metrics}/stats/dimensionstats.py +12 -10
dataeval/metrics/stats/hashstats.py +156 -0
dataeval/{_internal/metrics → metrics}/stats/labelstats.py +8 -6
dataeval/{_internal/metrics → metrics}/stats/pixelstats.py +12 -11
dataeval/{_internal/metrics → metrics}/stats/visualstats.py +14 -13
dataeval/{_internal/output.py → output.py} +26 -6
dataeval/utils/__init__.py +8 -4
dataeval/utils/image.py +71 -0
dataeval/utils/shared.py +151 -0
dataeval/utils/split_dataset.py +486 -0
dataeval/utils/tensorflow/__init__.py +9 -7
dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/autoencoder.py +64 -68
dataeval/{_internal/models/tensorflow/losses.py → utils/tensorflow/_internal/loss.py} +10 -9
dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/pixelcnn.py +18 -22
dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/trainer.py +3 -1
dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/utils.py +18 -18
dataeval/utils/tensorflow/loss/__init__.py +6 -2
dataeval/utils/torch/__init__.py +7 -3
dataeval/{_internal/models/pytorch → utils/torch}/blocks.py +19 -14
dataeval/{_internal → utils/torch}/datasets.py +49 -43
dataeval/utils/torch/models.py +138 -0
dataeval/{_internal/models/pytorch/autoencoder.py → utils/torch/trainer.py} +12 -141
dataeval/{_internal → utils/torch}/utils.py +3 -1
dataeval/workflows/__init__.py +1 -1
dataeval/{_internal/workflows → workflows}/sufficiency.py +42 -37
{dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/METADATA +7 -5
dataeval-0.72.2.dist-info/RECORD +72 -0
dataeval/_internal/detectors/__init__.py +0 -0
dataeval/_internal/detectors/drift/__init__.py +0 -0
dataeval/_internal/detectors/ood/__init__.py +0 -0
dataeval/_internal/metrics/__init__.py +0 -0
dataeval/_internal/metrics/stats/hashstats.py +0 -75
dataeval/_internal/metrics/utils.py +0 -447
dataeval/_internal/models/__init__.py +0 -0
dataeval/_internal/models/pytorch/__init__.py +0 -0
dataeval/_internal/models/pytorch/utils.py +0 -67
dataeval/_internal/models/tensorflow/__init__.py +0 -0
dataeval/_internal/workflows/__init__.py +0 -0
dataeval/detectors/drift/kernels/__init__.py +0 -10
dataeval/detectors/drift/updates/__init__.py +0 -7
dataeval/utils/tensorflow/models/__init__.py +0 -9
dataeval/utils/tensorflow/recon/__init__.py +0 -3
dataeval/utils/torch/datasets/__init__.py +0 -12
dataeval/utils/torch/models/__init__.py +0 -11
dataeval/utils/torch/trainer/__init__.py +0 -7
dataeval-0.72.0.dist-info/RECORD +0 -80
/dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/gmm.py +0 -0
{dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/LICENSE.txt +0 -0
{dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/WHEEL +0 -0

dataeval/detectors/ood/metadata_least_likely.py ADDED Viewed

@@ -0,0 +1,119 @@
+from __future__ import annotations
+import numbers
+import warnings
+from typing import Any
+import numpy as np
+from numpy.typing import NDArray
+def get_least_likely_features(
+    metadata: dict[str, list[Any] | NDArray[Any]],
+    new_metadata: dict[str, list[Any] | NDArray[Any]],
+    is_ood: NDArray[np.bool_],
+) -> list[tuple[str, float]]:
+    """Computes which metadata feature is most out-of-distribution (OOD) relative to a reference metadata set.
+    Given a reference metadata dictionary `metadata` (where each key maps to one scalar metadata feature), a second
+    metadata dictionary, and a corresponding boolean flag `is_ood` indicating whether each new example falls
+    out-of-distribution (OOD) relative to the reference, this function finds which metadata feature is the most OOD,
+    for each OOD example.
+    Parameters
+    ----------
+    metadata: dict[str, list[Any] | NDArray[Any]]
+        A reference set of arrays of values, indexed by metadata feature names, with one value per data example per
+        feature.
+    new_metadata: dict[str, list[Any] | NDArray[Any]]
+        A second metadata set, to be tested against the reference metadata. It is ok if the two meta data objects
+        hold different numbers of examples.
+    is_ood: NDArray[np.bool_]
+        A boolean array, with one value per new_metadata example, that indicates which examples are OOD.
+    Returns
+    -------
+    list[tuple[str, float]]
+        An array of names of the features of each OOD new_metadata example that were the most OOD.
+    Examples
+    --------
+    Imagine we have 3 data examples, and that the corresponding metadata contains 2 features called time and
+    altitude, as shown below.
+    >>> from dataeval._internal.metrics.metadata_least_likely import get_least_likely_features
+    >>> import numpy
+    >>> metadata = {"time": [1.2, 3.4, 5.6], "altitude": [235, 6789, 101112]}
+    >>> new_metadata = {"time": [7.8, 11.12], "altitude": [532, -211101]}
+    >>> is_ood = numpy.array([True, True])
+    >>> get_least_likely_features(metadata, new_metadata, is_ood)
+    [('time', 2.0), ('altitude', 33.245346)]
+    """
+    # Raise errors for bad inputs...
+    if metadata.keys() != new_metadata.keys():
+        raise ValueError(f"Reference and test metadata keys must be identical: {list(metadata)}, {list(new_metadata)}")
+    md_lengths = {len(np.atleast_1d(v)) for v in metadata.values()}
+    new_md_lengths = {len(np.atleast_1d(v)) for v in new_metadata.values()}
+    if len(md_lengths) > 1 or len(new_md_lengths) > 1:
+        raise ValueError(f"All features must have same length, got lengths {md_lengths}, {new_md_lengths}")
+    n_reference, n_new = md_lengths.pop(), new_md_lengths.pop()  # possibly different numbers of metadata examples
+    if n_new != len(is_ood):
+        raise ValueError(f"is_ood flag must have same length as new metadata {n_new} but has length {len(is_ood)}.")
+    if n_reference < 3:  # too hard to define "in-distribution" with this few reference samples.
+        warnings.warn(
+            "We need at least 3 reference metadata examples to determine which "
+            f"features are least likely, but only got {n_reference}",
+            UserWarning,
+        )
+        return []
+    if not any(is_ood):
+        return []
+    # ...inputs are good, look for most deviant standardized features.
+    # largest standardized absolute deviation from the median observed so far for each example
+    deviation = np.zeros_like(is_ood, dtype=np.float32)
+    # name of feature that corresponds to `deviation` for each example
+    kmax = np.empty(len(is_ood), dtype=object)
+    for k, v in metadata.items():
+        # exclude cases where random happens to be out on tails, not interesting.
+        if k == "random":
+            continue
+        # Skip non-numerical features
+        if not all(isinstance(vi, numbers.Number) for vi in v):  # NB: np.nan *is* a number in this context.
+            continue
+        # Get standardization parameters from metadata
+        loc = np.median(v)  # ok, because we checked all were numeric
+        dev = np.asarray(v) - loc  # need to make array from v since it could be a list here.
+        posdev, negdev = dev[dev > 0], dev[dev < 0]
+        pos_scale = np.median(posdev) if posdev.any() else 1.0
+        neg_scale = np.abs(np.median(negdev)) if negdev.any() else 1.0
+        x, x0, dxp, dxn = np.atleast_1d(new_metadata[k]), loc, pos_scale, neg_scale  # just abbreviations
+        dxp = dxp if dxp > 0 else 1.0  # avoids dividing by zero below
+        dxn = dxn if dxn > 0 else 1.0
+        # xdev must be floating-point to avoid getting zero in an integer division.
+        xdev = (x - x0).astype(np.float64)
+        pos = xdev >= 0
+        X = np.zeros_like(xdev)
+        X[pos], X[~pos] = xdev[pos] / dxp, xdev[~pos] / dxn  # keeping track of possible asymmetry of x, but...
+        # ...below here, only need to think about absolute deviation.
+        abig = np.abs(X) > deviation
+        kmax[abig] = k
+        deviation[abig] = np.abs(X[abig])
+    unlikely_features = list(zip(kmax[is_ood], deviation[is_ood]))  # feature names, along with how far out they are.
+    return unlikely_features

dataeval/detectors/ood/metadata_ood_mi.py ADDED Viewed

@@ -0,0 +1,92 @@
+from __future__ import annotations
+import numbers
+import warnings
+from typing import Any
+import numpy as np
+from numpy.typing import NDArray
+from sklearn.feature_selection import mutual_info_classif
+# NATS2BITS is the reciprocal of natural log of 2. If you have an information/entropy-type quantity measured in nats,
+#   which is what many library functions return, multiply it by NATS2BITS to get it in bits.
+NATS2BITS = 1.442695
+def get_metadata_ood_mi(
+    metadata: dict[str, list[Any] | NDArray[Any]],
+    is_ood: NDArray[np.bool_],
+    discrete_features: str | bool | NDArray[np.bool_] = False,
+    random_state: int | None = None,
+) -> dict[str, float]:
+    """Computes mutual information between a set of metadata features and an out-of-distribution flag.
+    Given a metadata dictionary `metadata` (where each key maps to one scalar metadata feature per example), and a
+    corresponding boolean flag `is_ood` indicating whether each example falls out-of-distribution (OOD) relative to a
+    reference dataset, this function finds the strength of association between each metadata feature and `is_ood` by
+    computing their mutual information. Metadata features may be either discrete or continuous; set the
+    `discrete_features` keyword to a bool array set to True for each feature that is discrete, or pass one bool to apply
+    to all features. Returns a dict indicating the strength of association between each individual feature and the OOD
+    flag, measured in bits.
+    Parameters
+    ----------
+    metadata : dict[str, list[Any] | NDArray[Any]]
+        A set of arrays of values, indexed by metadata feature names, with one value per data example per feature.
+    is_ood : NDArray[np.bool_]
+        A boolean array, with one value per example, that indicates which examples are OOD.
+    discrete_features : str | bool | NDArray[np.bool_]
+        Either a boolean array or a single boolean value, indicate which features take on discrete values.
+    random_state : int, optional - default None
+        Determines random number generation for small noise added to continuous variables. Set to a value for
+        reproducible results.
+    Returns
+    -------
+    dict[str, float]
+        A dictionary with keys corresponding to metadata feature names, and values indicating the strength of
+        association between each named feature and the OOD flag, as mutual information measured in bits.
+    Examples
+    --------
+    Imagine we have 3 data examples, and that the corresponding metadata contains 2 features called time and altitude.
+        >>> import numpy
+        >>> metadata = {"time": numpy.linspace(0, 10, 100), "altitude": numpy.linspace(0, 16, 100) ** 2}
+        >>> is_ood = metadata["altitude"] > 100
+        >>> print(get_metadata_ood_mi(metadata, is_ood, discrete_features=False))
+        {'time': 0.933074285817367, 'altitude': 0.9407686591507002}
+    """
+    numerical_keys = [k for k, v in metadata.items() if all(isinstance(vi, numbers.Number) for vi in v)]
+    if len(numerical_keys) < len(metadata):
+        warnings.warn(
+            f"Processing {numerical_keys}, others are non-numerical and will be skipped.",
+            UserWarning,
+        )
+    md_lengths = {len(np.atleast_1d(v)) for v in metadata.values()}
+    if len(md_lengths) > 1:
+        raise ValueError(f"Metadata features have differing sizes: {md_lengths}")
+    if len(is_ood) != (mdl := md_lengths.pop()):
+        raise ValueError(
+            f"OOD flag and metadata features need to be same size, but are different sizes: {len(is_ood)} and {mdl}."
+        )
+    X = np.array([metadata[k] for k in numerical_keys]).T
+    X0, dX = np.mean(X, axis=0), np.std(X, axis=0, ddof=1)
+    Xscl = (X - X0) / dX
+    mutual_info_values = (
+        mutual_info_classif(
+            Xscl,
+            is_ood,
+            discrete_features=discrete_features,  # type: ignore
+            random_state=random_state,
+        )
+        * NATS2BITS
+    )
+    mi_dict = {k: mutual_info_values[i] for i, k in enumerate(numerical_keys)}
+    return mi_dict

dataeval/{_internal/detectors → detectors}/ood/vae.py RENAMED Viewed

@@ -8,6 +8,8 @@ Licensed under Apache Software License (Apache 2.0)
 from __future__ import annotations
+__all__ = ["OOD_VAE"]
 from typing import Callable
 import numpy as np
@@ -15,12 +17,11 @@ import tensorflow as tf
 import tf_keras as keras
 from numpy.typing import ArrayLike
-from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
-from dataeval._internal.interop import to_numpy
-from dataeval._internal.models.tensorflow.autoencoder import VAE
-from dataeval._internal.models.tensorflow.losses import Elbo
-from dataeval._internal.models.tensorflow.utils import predict_batch
-from dataeval._internal.output import set_metadata
+from dataeval.detectors.ood.base import OODBase, OODScoreOutput
+from dataeval.interop import to_numpy
+from dataeval.utils.tensorflow._internal.autoencoder import VAE
+from dataeval.utils.tensorflow._internal.loss import Elbo
+from dataeval.utils.tensorflow._internal.utils import predict_batch
 class OOD_VAE(OODBase):
@@ -38,14 +39,14 @@ class OOD_VAE(OODBase):
     --------
     Instantiate an OOD detector metric with a generic dataset - batch of images with shape (3,25,25)
-    >>> metric = OOD_VAE(create_model(VAE, dataset[0].shape))
+    >>> metric = OOD_VAE(create_model("VAE", dataset[0].shape))
     Adjusting fit parameters,
     including setting the fit threshold at 85% for a training set with about 15% out-of-distribution
     >>> metric.fit(dataset, threshold_perc=85, batch_size=128, verbose=False)
-    Detect out of distribution samples at the 'feature' level
+    Detect :term:`out of distribution<Out-of-Distribution (OOD)>` samples at the 'feature' level
     >>> result = metric.predict(dataset, ood_type="feature")
     """
@@ -58,18 +59,15 @@ class OOD_VAE(OODBase):
         self,
         x_ref: ArrayLike,
         threshold_perc: float = 100.0,
-        loss_fn: Callable[..., tf.Tensor] | None = None,
+        loss_fn: Callable[..., tf.Tensor] = Elbo(0.05),
         optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
         epochs: int = 20,
         batch_size: int = 64,
         verbose: bool = True,
     ) -> None:
-        if loss_fn is None:
-            loss_fn = Elbo(0.05)
         super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
-    @set_metadata("dataeval.detectors")
-    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
+    def _score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
         self._validate(X := to_numpy(X))
         # sample reconstructed instances

dataeval/{_internal/detectors → detectors}/ood/vaegmm.py RENAMED Viewed

@@ -8,6 +8,8 @@ Licensed under Apache Software License (Apache 2.0)
 from __future__ import annotations
+__all__ = ["OOD_VAEGMM"]
 from typing import Callable
 import numpy as np
@@ -15,13 +17,12 @@ import tensorflow as tf
 import tf_keras as keras
 from numpy.typing import ArrayLike
-from dataeval._internal.detectors.ood.base import OODGMMBase, OODScoreOutput
-from dataeval._internal.interop import to_numpy
-from dataeval._internal.models.tensorflow.autoencoder import VAEGMM
-from dataeval._internal.models.tensorflow.gmm import gmm_energy
-from dataeval._internal.models.tensorflow.losses import Elbo, LossGMM
-from dataeval._internal.models.tensorflow.utils import predict_batch
-from dataeval._internal.output import set_metadata
+from dataeval.detectors.ood.base import OODGMMBase, OODScoreOutput
+from dataeval.interop import to_numpy
+from dataeval.utils.tensorflow._internal.autoencoder import VAEGMM
+from dataeval.utils.tensorflow._internal.gmm import gmm_energy
+from dataeval.utils.tensorflow._internal.loss import Elbo, LossGMM
+from dataeval.utils.tensorflow._internal.utils import predict_batch
 class OOD_VAEGMM(OODGMMBase):
@@ -44,38 +45,15 @@ class OOD_VAEGMM(OODGMMBase):
         self,
         x_ref: ArrayLike,
         threshold_perc: float = 100.0,
-        loss_fn: Callable[..., tf.Tensor] | None = None,
+        loss_fn: Callable[..., tf.Tensor] = LossGMM(elbo=Elbo(0.05)),
         optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
         epochs: int = 20,
         batch_size: int = 64,
         verbose: bool = True,
     ) -> None:
-        if loss_fn is None:
-            loss_fn = LossGMM(elbo=Elbo(0.05))
         super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
-    @set_metadata("dataeval.detectors")
-    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
-        """
-        Compute the out-of-distribution (OOD) score for a given dataset.
-        Parameters
-        ----------
-        X : ArrayLike
-            Input data to score.
-        batch_size : int, default 1e10
-            Number of instances to process in each batch.
-            Use a smaller batch size if your dataset is large or if you encounter memory issues.
-        Returns
-        -------
-        OODScoreOutput
-            An object containing the instance-level OOD score.
-        Note
-        ----
-        This model does not produce a feature level score like the OOD_AE or OOD_VAE models.
-        """
+    def _score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
         self._validate(X := to_numpy(X))
         # draw samples from latent space

dataeval/{_internal/interop.py → interop.py} RENAMED Viewed

@@ -1,43 +1,47 @@
 from __future__ import annotations
+__all__ = ["as_numpy", "to_numpy", "to_numpy_iter"]
 from importlib import import_module
 from typing import Any, Iterable, Iterator
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
-module_cache = {}
+_MODULE_CACHE = {}
-def try_import(module_name):
-    if module_name in module_cache:
-        return module_cache[module_name]
+def _try_import(module_name):
+    if module_name in _MODULE_CACHE:
+        return _MODULE_CACHE[module_name]
     try:
         module = import_module(module_name)
     except ImportError:  # pragma: no cover - covered by test_mindeps.py
         module = None
-    module_cache[module_name] = module
+    _MODULE_CACHE[module_name] = module
     return module
 def as_numpy(array: ArrayLike | None) -> NDArray[Any]:
+    """Converts an ArrayLike to Numpy array without copying (if possible)"""
     return to_numpy(array, copy=False)
 def to_numpy(array: ArrayLike | None, copy: bool = True) -> NDArray[Any]:
+    """Converts an ArrayLike to new Numpy array"""
     if array is None:
         return np.ndarray([])
     if isinstance(array, np.ndarray):
         return array.copy() if copy else array
-    tf = try_import("tensorflow")
+    tf = _try_import("tensorflow")
     if tf and tf.is_tensor(array):
         return array.numpy().copy() if copy else array.numpy()  # type: ignore
-    torch = try_import("torch")
+    torch = _try_import("torch")
     if torch and isinstance(array, torch.Tensor):
         return array.detach().cpu().numpy().copy() if copy else array.detach().cpu().numpy()  # type: ignore
@@ -45,5 +49,6 @@ def to_numpy(array: ArrayLike | None, copy: bool = True) -> NDArray[Any]:
 def to_numpy_iter(iterable: Iterable[ArrayLike]) -> Iterator[NDArray[Any]]:
+    """Yields an iterator of numpy arrays from an ArrayLike"""
     for array in iterable:
         yield to_numpy(array)

dataeval/metrics/__init__.py CHANGED Viewed

@@ -3,6 +3,6 @@ Metrics are a way to measure the performance of your models or datasets that
 can then be analyzed in the context of a given problem.
 """
-from . import bias, estimators, stats
+from dataeval.metrics import bias, estimators, stats
 __all__ = ["bias", "estimators", "stats"]

dataeval/metrics/bias/__init__.py CHANGED Viewed

@@ -3,10 +3,10 @@ Bias metrics check for skewed or imbalanced datasets and incomplete feature
 representation which may impact model performance.
 """
-from dataeval._internal.metrics.balance import BalanceOutput, balance
-from dataeval._internal.metrics.coverage import CoverageOutput, coverage
-from dataeval._internal.metrics.diversity import DiversityOutput, diversity
-from dataeval._internal.metrics.parity import ParityOutput, label_parity, parity
+from dataeval.metrics.bias.balance import BalanceOutput, balance
+from dataeval.metrics.bias.coverage import CoverageOutput, coverage
+from dataeval.metrics.bias.diversity import DiversityOutput, diversity
+from dataeval.metrics.bias.parity import ParityOutput, label_parity, parity
 __all__ = [
     "balance",

dataeval/{_internal/metrics → metrics/bias}/balance.py RENAMED Viewed

@@ -1,15 +1,17 @@
 from __future__ import annotations
+__all__ = ["BalanceOutput", "balance"]
 import warnings
 from dataclasses import dataclass
-from typing import Mapping
+from typing import Any, Mapping
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
 from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
-from dataeval._internal.metrics.utils import entropy, preprocess_metadata
-from dataeval._internal.output import OutputMetadata, set_metadata
+from dataeval.metrics.bias.metadata import entropy, heatmap, preprocess_metadata
+from dataeval.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
@@ -25,12 +27,73 @@ class BalanceOutput(OutputMetadata):
         Estimate of inter/intra-factor mutual information
     classwise : NDArray[np.float64]
         Estimate of mutual information between metadata factors and individual class labels
+    class_list: NDArray[np.int64]
+        Class labels for each value in the dataset
+    metadata_names: list[str]
+        Names of each metadata factor
     """
     balance: NDArray[np.float64]
     factors: NDArray[np.float64]
     classwise: NDArray[np.float64]
+    class_list: NDArray[np.int64]
+    metadata_names: list[str]
+    def plot(
+        self,
+        row_labels: NDArray[Any] | None = None,
+        col_labels: NDArray[Any] | None = None,
+        plot_classwise: bool = False,
+    ) -> None:
+        """
+        Plot a heatmap of balance information
+        Parameters
+        ----------
+        row_labels: NDArray | None, default None
+            Array containing the labels for rows in the histogram
+        col_labels: NDArray | None, default None
+            Array containing the labels for columns in the histogram
+        plot_classwise: bool, default False
+            Whether to plot per-class balance instead of global balance
+        """
+        if plot_classwise:
+            if row_labels is None:
+                row_labels = np.unique(self.class_list)
+            if col_labels is None:
+                col_labels = np.concatenate((["class"], self.metadata_names))
+            heatmap(
+                self.classwise,
+                row_labels,
+                col_labels,
+                xlabel="Factors",
+                ylabel="Class",
+                cbarlabel="Normalized Mutual Information",
+            )
+        else:
+            data = np.concatenate([self.balance[np.newaxis, 1:], self.factors], axis=0)
+            # Create a mask for the upper triangle of the symmetrical array, ignoring the diagonal
+            mask = np.triu(data + 1, k=0) < 1
+            # Finalize the data for the plot, last row is last factor x last factor so it gets dropped
+            heat_data = np.where(mask, np.nan, data)[:-1]
+            # Creating label array for heat map axes
+            heat_labels = np.concatenate((["class"], self.metadata_names))
+            if row_labels is None:
+                row_labels = heat_labels[:-1]
+            if col_labels is None:
+                col_labels = heat_labels[1:]
+            heatmap(
+                heat_data,
+                row_labels,
+                col_labels,
+                cbarlabel="Normalized Mutual Information",
+            )
 def validate_num_neighbors(num_neighbors: int) -> int:
     if not isinstance(num_neighbors, (int, float)):
@@ -114,6 +177,9 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
     mi = np.empty((num_factors, num_factors))
     mi[:] = np.nan
+    class_idx = names.index("class_label")
+    class_lbl = np.array(data[:, class_idx], dtype=int)
     for idx in range(num_factors):
         tgt = data[:, idx].astype(int)
@@ -174,4 +240,4 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
     norm_factor = 0.5 * np.add.outer(ent_tgt_bin, ent_all) + 1e-6
     classwise = classwise_mi / norm_factor
-    return BalanceOutput(balance, factors, classwise)
+    return BalanceOutput(balance, factors, classwise, class_lbl, list(metadata.keys()))

dataeval/{_internal/metrics → metrics/bias}/coverage.py RENAMED Viewed

@@ -1,5 +1,7 @@
 from __future__ import annotations
+__all__ = ["CoverageOutput", "coverage"]
 import math
 from dataclasses import dataclass
 from typing import Literal
@@ -8,15 +10,15 @@ import numpy as np
 from numpy.typing import ArrayLike, NDArray
 from scipy.spatial.distance import pdist, squareform
-from dataeval._internal.interop import to_numpy
-from dataeval._internal.metrics.utils import flatten
-from dataeval._internal.output import OutputMetadata, set_metadata
+from dataeval.interop import to_numpy
+from dataeval.output import OutputMetadata, set_metadata
+from dataeval.utils.shared import flatten
 @dataclass(frozen=True)
 class CoverageOutput(OutputMetadata):
     """
-    Output class for :func:`coverage` bias metric
+    Output class for :func:`coverage` :term:`bias<Bias>` metric
     Attributes
     ----------
@@ -25,7 +27,7 @@ class CoverageOutput(OutputMetadata):
     radii : NDArray
         Array of critical value radii
     critical_value : float
-        Radius for coverage
+        Radius for :term:`coverage<Coverage>`
     """
     indices: NDArray[np.intp]
@@ -33,7 +35,7 @@ class CoverageOutput(OutputMetadata):
     critical_value: float
-@set_metadata("dataeval.metrics")
+@set_metadata()
 def coverage(
     embeddings: ArrayLike,
     radius_type: Literal["adaptive", "naive"] = "adaptive",
@@ -41,7 +43,7 @@ def coverage(
     percent: np.float64 = np.float64(0.01),
 ) -> CoverageOutput:
     """
-    Class for evaluating coverage and identifying images/samples that are in undercovered regions.
+    Class for evaluating :term:`coverage<Coverage>` and identifying images/samples that are in undercovered regions.
     Parameters
     ----------
@@ -64,7 +66,7 @@ def coverage(
     Raises
     ------
     ValueError
-        If length of embeddings is less than or equal to k
+        If length of :term:`embeddings<Embeddings>` is less than or equal to k
     ValueError
         If radius_type is unknown

dataeval 0.72.0__py3-none-any.whl → 0.72.2__py3-none-any.whl

dataeval 0.72.0py3-none-any.whl → 0.72.2py3-none-any.whl