PyPI - dataeval - Versions diffs - 0.76.1__py3-none-any.whl → 0.82.0__py3-none-any.whl - Mend

dataeval 0.76.1py3-none-any.whl → 0.82.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

dataeval/__init__.py +3 -3
dataeval/config.py +77 -0
dataeval/detectors/__init__.py +1 -1
dataeval/detectors/drift/__init__.py +6 -6
dataeval/detectors/drift/{base.py → _base.py} +40 -85
dataeval/detectors/drift/{cvm.py → _cvm.py} +21 -28
dataeval/detectors/drift/{ks.py → _ks.py} +20 -26
dataeval/detectors/drift/{mmd.py → _mmd.py} +31 -43
dataeval/detectors/drift/{torch.py → _torch.py} +2 -1
dataeval/detectors/drift/{uncertainty.py → _uncertainty.py} +24 -7
dataeval/detectors/drift/updates.py +20 -3
dataeval/detectors/linters/__init__.py +3 -5
dataeval/detectors/linters/duplicates.py +13 -36
dataeval/detectors/linters/outliers.py +23 -148
dataeval/detectors/ood/__init__.py +1 -1
dataeval/detectors/ood/ae.py +30 -9
dataeval/detectors/ood/base.py +5 -4
dataeval/detectors/ood/mixin.py +21 -7
dataeval/detectors/ood/vae.py +73 -0
dataeval/metadata/__init__.py +6 -0
dataeval/metadata/_distance.py +167 -0
dataeval/metadata/_ood.py +217 -0
dataeval/metadata/_utils.py +44 -0
dataeval/metrics/__init__.py +1 -1
dataeval/metrics/bias/__init__.py +6 -4
dataeval/metrics/bias/{balance.py → _balance.py} +15 -101
dataeval/metrics/bias/_coverage.py +98 -0
dataeval/metrics/bias/{diversity.py → _diversity.py} +18 -111
dataeval/metrics/bias/{parity.py → _parity.py} +39 -77
dataeval/metrics/estimators/__init__.py +15 -4
dataeval/metrics/estimators/{ber.py → _ber.py} +42 -29
dataeval/metrics/estimators/_clusterer.py +44 -0
dataeval/metrics/estimators/{divergence.py → _divergence.py} +18 -30
dataeval/metrics/estimators/{uap.py → _uap.py} +4 -18
dataeval/metrics/stats/__init__.py +16 -13
dataeval/metrics/stats/{base.py → _base.py} +82 -133
dataeval/metrics/stats/{boxratiostats.py → _boxratiostats.py} +15 -18
dataeval/metrics/stats/_dimensionstats.py +75 -0
dataeval/metrics/stats/{hashstats.py → _hashstats.py} +21 -37
dataeval/metrics/stats/_imagestats.py +94 -0
dataeval/metrics/stats/_labelstats.py +131 -0
dataeval/metrics/stats/{pixelstats.py → _pixelstats.py} +19 -50
dataeval/metrics/stats/{visualstats.py → _visualstats.py} +23 -54
dataeval/outputs/__init__.py +53 -0
dataeval/{output.py → outputs/_base.py} +55 -25
dataeval/outputs/_bias.py +381 -0
dataeval/outputs/_drift.py +83 -0
dataeval/outputs/_estimators.py +114 -0
dataeval/outputs/_linters.py +184 -0
dataeval/{detectors/ood/output.py → outputs/_ood.py} +22 -22
dataeval/outputs/_stats.py +387 -0
dataeval/outputs/_utils.py +44 -0
dataeval/outputs/_workflows.py +364 -0
dataeval/typing.py +234 -0
dataeval/utils/__init__.py +2 -2
dataeval/utils/_array.py +169 -0
dataeval/utils/_bin.py +199 -0
dataeval/utils/_clusterer.py +144 -0
dataeval/utils/_fast_mst.py +189 -0
dataeval/utils/{image.py → _image.py} +6 -4
dataeval/utils/_method.py +14 -0
dataeval/utils/{shared.py → _mst.py} +3 -65
dataeval/utils/{plot.py → _plot.py} +6 -6
dataeval/utils/data/__init__.py +26 -0
dataeval/utils/data/_dataset.py +217 -0
dataeval/utils/data/_embeddings.py +104 -0
dataeval/utils/data/_images.py +68 -0
dataeval/utils/data/_metadata.py +360 -0
dataeval/utils/data/_selection.py +126 -0
dataeval/utils/{dataset/split.py → data/_split.py} +12 -38
dataeval/utils/data/_targets.py +85 -0
dataeval/utils/data/collate.py +103 -0
dataeval/utils/data/datasets/__init__.py +17 -0
dataeval/utils/data/datasets/_base.py +254 -0
dataeval/utils/data/datasets/_cifar10.py +134 -0
dataeval/utils/data/datasets/_fileio.py +168 -0
dataeval/utils/data/datasets/_milco.py +153 -0
dataeval/utils/data/datasets/_mixin.py +56 -0
dataeval/utils/data/datasets/_mnist.py +183 -0
dataeval/utils/data/datasets/_ships.py +123 -0
dataeval/utils/data/datasets/_types.py +52 -0
dataeval/utils/data/datasets/_voc.py +352 -0
dataeval/utils/data/selections/__init__.py +15 -0
dataeval/utils/data/selections/_classfilter.py +57 -0
dataeval/utils/data/selections/_indices.py +26 -0
dataeval/utils/data/selections/_limit.py +26 -0
dataeval/utils/data/selections/_reverse.py +18 -0
dataeval/utils/data/selections/_shuffle.py +29 -0
dataeval/utils/metadata.py +51 -376
dataeval/utils/torch/{gmm.py → _gmm.py} +4 -2
dataeval/utils/torch/{internal.py → _internal.py} +21 -51
dataeval/utils/torch/models.py +43 -2
dataeval/workflows/__init__.py +2 -1
dataeval/workflows/sufficiency.py +11 -346
{dataeval-0.76.1.dist-info → dataeval-0.82.0.dist-info}/METADATA +5 -2
dataeval-0.82.0.dist-info/RECORD +104 -0
dataeval/detectors/linters/clusterer.py +0 -512
dataeval/detectors/linters/merged_stats.py +0 -49
dataeval/detectors/ood/metadata_ks_compare.py +0 -129
dataeval/detectors/ood/metadata_least_likely.py +0 -119
dataeval/interop.py +0 -69
dataeval/metrics/bias/coverage.py +0 -194
dataeval/metrics/stats/datasetstats.py +0 -202
dataeval/metrics/stats/dimensionstats.py +0 -115
dataeval/metrics/stats/labelstats.py +0 -210
dataeval/utils/dataset/__init__.py +0 -7
dataeval/utils/dataset/datasets.py +0 -412
dataeval/utils/dataset/read.py +0 -63
dataeval-0.76.1.dist-info/RECORD +0 -67
/dataeval/{log.py → _log.py} +0 -0
/dataeval/utils/torch/{blocks.py → _blocks.py} +0 -0
{dataeval-0.76.1.dist-info → dataeval-0.82.0.dist-info}/LICENSE.txt +0 -0
{dataeval-0.76.1.dist-info → dataeval-0.82.0.dist-info}/WHEEL +0 -0

dataeval/metrics/stats/{hashstats.py → _hashstats.py} RENAMED Viewed

@@ -4,41 +4,24 @@ import warnings
 __all__ = []
-from dataclasses import dataclass
-from typing import Callable, Iterable
+from typing import Any, Callable
 import numpy as np
 import xxhash as xxh
-from numpy.typing import ArrayLike
 from PIL import Image
 from scipy.fftpack import dct
-from dataeval.interop import as_numpy
-from dataeval.metrics.stats.base import BaseStatsOutput, StatsProcessor, run_stats
-from dataeval.output import set_metadata
-from dataeval.utils.image import normalize_image_shape, rescale
+from dataeval.metrics.stats._base import StatsProcessor, run_stats
+from dataeval.outputs import HashStatsOutput
+from dataeval.outputs._base import set_metadata
+from dataeval.typing import ArrayLike, Dataset
+from dataeval.utils._array import as_numpy
+from dataeval.utils._image import normalize_image_shape, rescale
 HASH_SIZE = 8
 MAX_FACTOR = 4
-@dataclass(frozen=True)
-class HashStatsOutput(BaseStatsOutput):
-    """
-    Output class for :func:`hashstats` stats metric.
-    Attributes
-    ----------
-    xxhash : List[str]
-        xxHash hash of the images as a hex string
-    pchash : List[str]
-        :term:`Perception-based Hash` of the images as a hex string
-    """
-    xxhash: list[str]
-    pchash: list[str]
 def pchash(image: ArrayLike) -> str:
     """
     Performs a perceptual hash on an image by resizing to a square NxN image
@@ -122,8 +105,9 @@ class HashStatsProcessor(StatsProcessor[HashStatsOutput]):
 @set_metadata
 def hashstats(
-    images: Iterable[ArrayLike],
-    bboxes: Iterable[ArrayLike] | None = None,
+    dataset: Dataset[ArrayLike] | Dataset[tuple[ArrayLike, Any, Any]],
+    *,
+    per_box: bool = False,
 ) -> HashStatsOutput:
     """
     Calculates hashes for each image.
@@ -133,10 +117,10 @@ def hashstats(
     Parameters
     ----------
-    images : ArrayLike
-        Images to hashing
-    bboxes : Iterable[ArrayLike] or None
-        Bounding boxes in `xyxy` format for each image
+    dataset : Dataset
+        Dataset to perform calculations on.
+    per_box : bool, default False
+        If True, perform calculations on each bounding box.
     Returns
     -------
@@ -149,12 +133,12 @@ def hashstats(
     Examples
     --------
-    Calculating the statistics on the images, whose shape is (C, H, W)
+    Calculate the hashes of a dataset of images, whose shape is (C, H, W)
-    >>> results = hashstats(stats_images)
-    >>> print(results.xxhash)
-    ['6274f837b34ed9f0', '256504fdb6e3d2a4', '7dd0c56ca8474fb0', '50956ad4592f5bbc', '5ba2354079d42aa5']
-    >>> print(results.pchash)
-    ['a666999999666666', 'e666999999266666', 'e666999966663299', 'e666999999266666', '96e91656e91616e9']
+    >>> results = hashstats(dataset)
+    >>> print(results.xxhash[:5])
+    ['66a93f556577c086', 'd8b686fb405c4105', '7ffdb4990ad44ac6', '42cd4c34c80f6006', 'c5519e36ac1f8839']
+    >>> print(results.pchash[:5])
+    ['e666999999266666', 'e666999999266666', 'e666999966666299', 'e666999999266666', '96e91656e91616e9']
     """
-    return run_stats(images, bboxes, False, [HashStatsProcessor])[0]
+    return run_stats(dataset, per_box, False, [HashStatsProcessor])[0]

dataeval/metrics/stats/_imagestats.py ADDED Viewed

@@ -0,0 +1,94 @@
+from __future__ import annotations
+__all__ = []
+from typing import Any, Literal, overload
+from dataeval.metrics.stats._base import run_stats
+from dataeval.metrics.stats._dimensionstats import DimensionStatsProcessor
+from dataeval.metrics.stats._pixelstats import PixelStatsProcessor
+from dataeval.metrics.stats._visualstats import VisualStatsProcessor
+from dataeval.outputs import ChannelStatsOutput, ImageStatsOutput
+from dataeval.outputs._base import set_metadata
+from dataeval.typing import ArrayLike, Dataset
+@overload
+def imagestats(
+    dataset: Dataset[ArrayLike] | Dataset[tuple[ArrayLike, Any, Any]],
+    *,
+    per_box: bool = False,
+    per_channel: Literal[True],
+) -> ChannelStatsOutput: ...
+@overload
+def imagestats(
+    dataset: Dataset[ArrayLike] | Dataset[tuple[ArrayLike, Any, Any]],
+    *,
+    per_box: bool = False,
+    per_channel: Literal[False] = False,
+) -> ImageStatsOutput: ...
+@set_metadata
+def imagestats(
+    dataset: Dataset[ArrayLike] | Dataset[tuple[ArrayLike, Any, Any]],
+    *,
+    per_box: bool = False,
+    per_channel: bool = False,
+) -> ImageStatsOutput | ChannelStatsOutput:
+    """
+    Calculates various :term:`statistics<Statistics>` for each image.
+    This function computes dimension, pixel and visual metrics
+    on the images or individual bounding boxes for each image as
+    well as label statistics if provided.
+    Parameters
+    ----------
+    dataset : Dataset
+        Dataset to perform calculations on.
+    per_box : bool, default False
+        If True, perform calculations on each bounding box.
+    per_channel : bool, default False
+        If True, perform calculations on each channel.
+    Returns
+    -------
+    ImageStatsOutput or ChannelStatsOutput
+        Output class containing the outputs of various stats functions
+    See Also
+    --------
+    dimensionstats, labelstats, pixelstats, visualstats, Outliers
+    Examples
+    --------
+    Calculate dimension, pixel and visual statistics for a dataset containing 8
+    images.
+    >>> stats = imagestats(dataset)
+    >>> print(stats.aspect_ratio)
+    [1.    1.    1.333 1.    0.667 1.    1.    1.   ]
+    >>> print(stats.sharpness)
+    [20.23 20.23 23.33 20.23 77.06 20.23 20.23 20.23]
+    Calculate the pixel and visual stats for a dataset containing 6 3-channel
+    images and 2 1-channel images for a total of 20 channels.
+    >>> ch_stats = imagestats(dataset, per_channel=True)
+    >>> print(ch_stats.brightness)
+    [0.027 0.152 0.277 0.127 0.135 0.142 0.259 0.377 0.385 0.392 0.508 0.626
+     0.634 0.642 0.751 0.759 0.767 0.876 0.884 0.892]
+    """
+    if per_channel:
+        processors = [PixelStatsProcessor, VisualStatsProcessor]
+        output_cls = ChannelStatsOutput
+    else:
+        processors = [DimensionStatsProcessor, PixelStatsProcessor, VisualStatsProcessor]
+        output_cls = ImageStatsOutput
+    outputs = run_stats(dataset, per_box, per_channel, processors)
+    return output_cls(**{k: v for d in outputs for k, v in d.dict().items()})

dataeval/metrics/stats/_labelstats.py ADDED Viewed

@@ -0,0 +1,131 @@
+from __future__ import annotations
+__all__ = []
+from collections import Counter, defaultdict
+from typing import Any, Mapping, TypeVar
+import numpy as np
+from dataeval.outputs import LabelStatsOutput
+from dataeval.outputs._base import set_metadata
+from dataeval.typing import AnnotatedDataset, ArrayLike
+from dataeval.utils._array import as_numpy
+from dataeval.utils.data._metadata import Metadata
+TValue = TypeVar("TValue")
+def _ensure_2d(labels: ArrayLike) -> ArrayLike:
+    if isinstance(labels, np.ndarray):
+        return labels[:, None]
+    else:
+        return [[lbl] for lbl in labels]  # type: ignore
+def _get_list_depth(lst):
+    if isinstance(lst, list) and lst:
+        return 1 + max(_get_list_depth(item) for item in lst)
+    return 0
+def _check_labels_dimension(labels: ArrayLike) -> ArrayLike:
+    # Check for nested lists beyond 2 levels
+    if isinstance(labels, np.ndarray):
+        if labels.ndim == 1:
+            return _ensure_2d(labels)
+        elif labels.ndim == 2:
+            return labels
+        else:
+            raise ValueError("The label array must not have more than 2 dimensions.")
+    elif isinstance(labels, list):
+        depth = _get_list_depth(labels)
+        if depth == 1:
+            return _ensure_2d(labels)
+        elif depth == 2:
+            return labels
+        else:
+            raise ValueError("The label list must not be empty or have more than 2 levels of nesting.")
+    else:
+        raise TypeError("Labels must be either a NumPy array or a list.")
+def _sort_to_list(d: Mapping[int, TValue]) -> list[TValue]:
+    return [v for _, v in sorted(d.items())]
+@set_metadata
+def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
+    """
+    Calculates :term:`statistics<Statistics>` for data labels.
+    This function computes counting metrics (e.g., total per class, total per image)
+    on the labels.
+    Parameters
+    ----------
+    dataset : Metadata or ImageClassificationDataset or ObjectDetect
+    Returns
+    -------
+    LabelStatsOutput
+        A dataclass containing the computed counting metrics for the labels.
+    Examples
+    --------
+    Calculate basic :term:`statistics<Statistics>` on labels for a dataset.
+    >>> from dataeval.utils.data import Metadata
+    >>> stats = labelstats(Metadata(dataset))
+    >>> print(stats.to_table())
+    Class Count: 5
+    Label Count: 15
+    Average # Labels per Image: 1.88
+    --------------------------------------
+      Label: Total Count - Image Count
+      horse:      2      -      2
+        cow:      4      -      3
+      sheep:      2      -      2
+        pig:      2      -      2
+    chicken:      5      -      5
+    """
+    dataset = Metadata(dataset) if isinstance(dataset, AnnotatedDataset) else dataset
+    label_counts: Counter[int] = Counter()
+    image_counts: Counter[int] = Counter()
+    index_location = defaultdict(list[int])
+    label_per_image: list[int] = []
+    index2label = dict(enumerate(dataset.class_names))
+    labels = [target.labels.tolist() for target in dataset.targets]
+    labels_2d = _check_labels_dimension(labels)
+    for i, group in enumerate(labels_2d):
+        group = as_numpy(group).tolist()
+        # Count occurrences of each label in all sublists
+        label_counts.update(group)
+        # Get the number of labels per image
+        label_per_image.append(len(group))
+        # Create a set of unique items in the current sublist
+        unique_items: set[int] = set(group)
+        # Update image counts and index locations
+        image_counts.update(unique_items)
+        for item in unique_items:
+            index_location[item].append(i)
+    return LabelStatsOutput(
+        label_counts_per_class=_sort_to_list(label_counts),
+        label_counts_per_image=label_per_image,
+        image_counts_per_class=_sort_to_list(image_counts),
+        image_indices_per_class=_sort_to_list(index_location),
+        image_count=len(label_per_image),
+        class_count=len(label_counts),
+        label_count=sum(label_counts.values()),
+        class_names=list(index2label.values()),
+    )

dataeval/metrics/stats/{pixelstats.py → _pixelstats.py} RENAMED Viewed

@@ -2,49 +2,15 @@ from __future__ import annotations
 __all__ = []
-from dataclasses import dataclass
-from typing import Any, Callable, Iterable
+from typing import Any, Callable
 import numpy as np
-from numpy.typing import ArrayLike, NDArray
 from scipy.stats import entropy, kurtosis, skew
-from dataeval.metrics.stats.base import BaseStatsOutput, HistogramPlotMixin, StatsProcessor, run_stats
-from dataeval.output import set_metadata
-@dataclass(frozen=True)
-class PixelStatsOutput(BaseStatsOutput, HistogramPlotMixin):
-    """
-    Output class for :func:`pixelstats` stats metric.
-    Attributes
-    ----------
-    mean : NDArray[np.float16]
-        Mean of the pixel values of the images
-    std : NDArray[np.float16]
-        Standard deviation of the pixel values of the images
-    var : NDArray[np.float16]
-        :term:`Variance` of the pixel values of the images
-    skew : NDArray[np.float16]
-        Skew of the pixel values of the images
-    kurtosis : NDArray[np.float16]
-        Kurtosis of the pixel values of the images
-    histogram : NDArray[np.uint32]
-        Histogram of the pixel values of the images across 256 bins scaled between 0 and 1
-    entropy : NDArray[np.float16]
-        Entropy of the pixel values of the images
-    """
-    mean: NDArray[np.float16]
-    std: NDArray[np.float16]
-    var: NDArray[np.float16]
-    skew: NDArray[np.float16]
-    kurtosis: NDArray[np.float16]
-    histogram: NDArray[np.uint32]
-    entropy: NDArray[np.float16]
-    _excluded_keys = ["histogram"]
+from dataeval.metrics.stats._base import StatsProcessor, run_stats
+from dataeval.outputs import PixelStatsOutput
+from dataeval.outputs._base import set_metadata
+from dataeval.typing import ArrayLike, Dataset
 class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
@@ -71,8 +37,9 @@ class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
 @set_metadata
 def pixelstats(
-    images: Iterable[ArrayLike],
-    bboxes: Iterable[ArrayLike] | None = None,
+    dataset: Dataset[ArrayLike] | Dataset[tuple[ArrayLike, Any, Any]],
+    *,
+    per_box: bool = False,
     per_channel: bool = False,
 ) -> PixelStatsOutput:
     """
@@ -83,10 +50,12 @@ def pixelstats(
     Parameters
     ----------
-    images : Iterable[ArrayLike]
-        Images to perform calculations on
-    bboxes : Iterable[ArrayLike] or None
-        Bounding boxes in `xyxy` format for each image to perform calculations
+    dataset : Dataset
+        Dataset to perform calculations on.
+    per_box : bool, default False
+        If True, perform calculations on each bounding box.
+    per_channel : bool, default False
+        If True, perform calculations on each channel.
     Returns
     -------
@@ -106,12 +75,12 @@ def pixelstats(
     Examples
     --------
-    Calculating the statistics on the images, whose shape is (C, H, W)
+    Calculate the pixel statistics of a dataset of 8 images, whose shape is (C, H, W).
-    >>> results = pixelstats(stats_images)
+    >>> results = pixelstats(dataset)
     >>> print(results.mean)
-    [0.2903 0.2108 0.397  0.596  0.743 ]
+    [0.181 0.132 0.248 0.373 0.464 0.613 0.734 0.854]
     >>> print(results.entropy)
-    [4.99  2.371 1.179 2.406 0.668]
+    [4.527 1.883 0.811 1.883 0.298 1.883 1.883 1.883]
     """
-    return run_stats(images, bboxes, per_channel, [PixelStatsProcessor])[0]
+    return run_stats(dataset, per_box, per_channel, [PixelStatsProcessor])[0]

dataeval/metrics/stats/{visualstats.py → _visualstats.py} RENAMED Viewed

@@ -2,60 +2,26 @@ from __future__ import annotations
 __all__ = []
-from dataclasses import dataclass
-from typing import Any, Callable, Iterable
+from typing import Any, Callable
 import numpy as np
-from numpy.typing import ArrayLike, NDArray
-from dataeval.metrics.stats.base import BaseStatsOutput, HistogramPlotMixin, StatsProcessor, run_stats
-from dataeval.output import set_metadata
-from dataeval.utils.image import edge_filter
+from dataeval.metrics.stats._base import StatsProcessor, run_stats
+from dataeval.outputs import VisualStatsOutput
+from dataeval.outputs._base import set_metadata
+from dataeval.typing import ArrayLike, Dataset
+from dataeval.utils._image import edge_filter
 QUARTILES = (0, 25, 50, 75, 100)
-@dataclass(frozen=True)
-class VisualStatsOutput(BaseStatsOutput, HistogramPlotMixin):
-    """
-    Output class for :func:`visualstats` stats metric.
-    Attributes
-    ----------
-    brightness : NDArray[np.float16]
-        Brightness of the images
-    contrast : NDArray[np.float16]
-        Image contrast ratio
-    darkness : NDArray[np.float16]
-        Darkness of the images
-    missing : NDArray[np.float16]
-        Percentage of the images with missing pixels
-    sharpness : NDArray[np.float16]
-        Sharpness of the images
-    zeros : NDArray[np.float16]
-        Percentage of the images with zero value pixels
-    percentiles : NDArray[np.float16]
-        Percentiles of the pixel values of the images with quartiles of (0, 25, 50, 75, 100)
-    """
-    brightness: NDArray[np.float16]
-    contrast: NDArray[np.float16]
-    darkness: NDArray[np.float16]
-    missing: NDArray[np.float16]
-    sharpness: NDArray[np.float16]
-    zeros: NDArray[np.float16]
-    percentiles: NDArray[np.float16]
-    _excluded_keys = ["percentiles"]
 class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
     output_class: type = VisualStatsOutput
     image_function_map: dict[str, Callable[[StatsProcessor[VisualStatsOutput]], Any]] = {
         "brightness": lambda x: x.get("percentiles")[1],
-        "contrast": lambda x: np.nan_to_num(
-            (np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles"))
-        ),
+        "contrast": lambda x: 0
+        if np.mean(x.get("percentiles")) == 0
+        else (np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles")),
         "darkness": lambda x: x.get("percentiles")[-2],
         "missing": lambda x: np.count_nonzero(np.isnan(np.sum(x.image, axis=0))) / np.prod(x.shape[-2:]),
         "sharpness": lambda x: np.std(edge_filter(np.mean(x.image, axis=0))),
@@ -78,8 +44,9 @@ class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
 @set_metadata
 def visualstats(
-    images: Iterable[ArrayLike],
-    bboxes: Iterable[ArrayLike] | None = None,
+    dataset: Dataset[ArrayLike] | Dataset[tuple[ArrayLike, Any, Any]],
+    *,
+    per_box: bool = False,
     per_channel: bool = False,
 ) -> VisualStatsOutput:
     """
@@ -90,10 +57,12 @@ def visualstats(
     Parameters
     ----------
-    images : Iterable[ArrayLike]
-        Images to perform calculations on
-    bboxes : Iterable[ArrayLike] or None
-        Bounding boxes in `xyxy` format for each image to perform calculations on
+    dataset : Dataset
+        Dataset to perform calculations on.
+    per_box : bool, default False
+        If True, perform calculations on each bounding box.
+    per_channel : bool, default False
+        If True, perform calculations on each channel.
     Returns
     -------
@@ -112,12 +81,12 @@ def visualstats(
     Examples
     --------
-    Calculating the :term:`statistics<Statistics>` on the images, whose shape is (C, H, W)
+    Calculate the visual statistics of a dataset of 8 images, whose shape is (C, H, W).
-    >>> results = visualstats(stats_images)
+    >>> results = visualstats(dataset)
     >>> print(results.brightness)
-    [0.1353 0.2085 0.4143 0.6084 0.8135]
+    [0.084 0.13  0.259 0.38  0.508 0.63  0.755 0.88 ]
     >>> print(results.contrast)
-    [2.04  1.331 1.261 1.279 1.253]
+    [2.04  1.331 1.261 1.279 1.253 1.268 1.265 1.263]
     """
-    return run_stats(images, bboxes, per_channel, [VisualStatsProcessor])[0]
+    return run_stats(dataset, per_box, per_channel, [VisualStatsProcessor])[0]

dataeval/outputs/__init__.py ADDED Viewed

@@ -0,0 +1,53 @@
+"""
+Output classes for DataEval to store function and method outputs
+as well as runtime metadata for reproducibility and logging.
+"""
+from ._base import ExecutionMetadata
+from ._bias import BalanceOutput, CoverageOutput, DiversityOutput, LabelParityOutput, ParityOutput
+from ._drift import DriftMMDOutput, DriftOutput
+from ._estimators import BEROutput, ClustererOutput, DivergenceOutput, UAPOutput
+from ._linters import DuplicatesOutput, OutliersOutput
+from ._ood import OODOutput, OODScoreOutput
+from ._stats import (
+    ChannelStatsOutput,
+    DimensionStatsOutput,
+    HashStatsOutput,
+    ImageStatsOutput,
+    LabelStatsOutput,
+    PixelStatsOutput,
+    SourceIndex,
+    VisualStatsOutput,
+)
+from ._utils import SplitDatasetOutput, TrainValSplit
+from ._workflows import SufficiencyOutput
+__all__ = [
+    "BEROutput",
+    "BalanceOutput",
+    "ChannelStatsOutput",
+    "ClustererOutput",
+    "CoverageOutput",
+    "DimensionStatsOutput",
+    "DivergenceOutput",
+    "DiversityOutput",
+    "DriftMMDOutput",
+    "DriftOutput",
+    "DuplicatesOutput",
+    "ExecutionMetadata",
+    "HashStatsOutput",
+    "ImageStatsOutput",
+    "LabelParityOutput",
+    "LabelStatsOutput",
+    "OODOutput",
+    "OODScoreOutput",
+    "OutliersOutput",
+    "ParityOutput",
+    "PixelStatsOutput",
+    "SourceIndex",
+    "SplitDatasetOutput",
+    "SufficiencyOutput",
+    "TrainValSplit",
+    "UAPOutput",
+    "VisualStatsOutput",
+]

dataeval 0.76.1__py3-none-any.whl → 0.82.0__py3-none-any.whl

dataeval 0.76.1py3-none-any.whl → 0.82.0py3-none-any.whl