PyPI - dataeval - Versions diffs - 0.72.0__py3-none-any.whl → 0.72.2__py3-none-any.whl - Mend

dataeval 0.72.0py3-none-any.whl → 0.72.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

dataeval/__init__.py +4 -4
dataeval/detectors/__init__.py +4 -3
dataeval/detectors/drift/__init__.py +10 -11
dataeval/{_internal/detectors → detectors}/drift/base.py +51 -102
dataeval/{_internal/detectors → detectors}/drift/cvm.py +9 -8
dataeval/{_internal/detectors → detectors}/drift/ks.py +11 -10
dataeval/{_internal/detectors → detectors}/drift/mmd.py +33 -34
dataeval/{_internal/detectors → detectors}/drift/torch.py +15 -13
dataeval/{_internal/detectors → detectors}/drift/uncertainty.py +12 -9
dataeval/detectors/drift/updates.py +61 -0
dataeval/detectors/linters/__init__.py +3 -3
dataeval/{_internal/detectors → detectors/linters}/clusterer.py +47 -45
dataeval/{_internal/detectors → detectors/linters}/duplicates.py +20 -10
dataeval/{_internal/detectors → detectors/linters}/merged_stats.py +3 -1
dataeval/{_internal/detectors → detectors/linters}/outliers.py +19 -26
dataeval/detectors/ood/__init__.py +8 -16
dataeval/{_internal/detectors → detectors}/ood/ae.py +9 -9
dataeval/{_internal/detectors → detectors}/ood/aegmm.py +10 -30
dataeval/{_internal/detectors → detectors}/ood/base.py +27 -21
dataeval/{_internal/detectors → detectors}/ood/llr.py +27 -23
dataeval/detectors/ood/metadata_ks_compare.py +99 -0
dataeval/detectors/ood/metadata_least_likely.py +119 -0
dataeval/detectors/ood/metadata_ood_mi.py +92 -0
dataeval/{_internal/detectors → detectors}/ood/vae.py +11 -13
dataeval/{_internal/detectors → detectors}/ood/vaegmm.py +10 -32
dataeval/{_internal/interop.py → interop.py} +12 -7
dataeval/metrics/__init__.py +1 -1
dataeval/metrics/bias/__init__.py +4 -4
dataeval/{_internal/metrics → metrics/bias}/balance.py +70 -4
dataeval/{_internal/metrics → metrics/bias}/coverage.py +10 -8
dataeval/{_internal/metrics → metrics/bias}/diversity.py +54 -20
dataeval/metrics/bias/metadata.py +275 -0
dataeval/{_internal/metrics → metrics/bias}/parity.py +21 -17
dataeval/metrics/estimators/__init__.py +3 -3
dataeval/{_internal/metrics → metrics/estimators}/ber.py +31 -28
dataeval/{_internal/metrics → metrics/estimators}/divergence.py +15 -16
dataeval/{_internal/metrics → metrics/estimators}/uap.py +8 -6
dataeval/metrics/stats/__init__.py +7 -7
dataeval/{_internal/metrics → metrics}/stats/base.py +66 -40
dataeval/{_internal/metrics → metrics}/stats/boxratiostats.py +19 -15
dataeval/{_internal/metrics → metrics}/stats/datasetstats.py +19 -17
dataeval/{_internal/metrics → metrics}/stats/dimensionstats.py +12 -10
dataeval/metrics/stats/hashstats.py +156 -0
dataeval/{_internal/metrics → metrics}/stats/labelstats.py +8 -6
dataeval/{_internal/metrics → metrics}/stats/pixelstats.py +12 -11
dataeval/{_internal/metrics → metrics}/stats/visualstats.py +14 -13
dataeval/{_internal/output.py → output.py} +26 -6
dataeval/utils/__init__.py +8 -4
dataeval/utils/image.py +71 -0
dataeval/utils/shared.py +151 -0
dataeval/utils/split_dataset.py +486 -0
dataeval/utils/tensorflow/__init__.py +9 -7
dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/autoencoder.py +64 -68
dataeval/{_internal/models/tensorflow/losses.py → utils/tensorflow/_internal/loss.py} +10 -9
dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/pixelcnn.py +18 -22
dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/trainer.py +3 -1
dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/utils.py +18 -18
dataeval/utils/tensorflow/loss/__init__.py +6 -2
dataeval/utils/torch/__init__.py +7 -3
dataeval/{_internal/models/pytorch → utils/torch}/blocks.py +19 -14
dataeval/{_internal → utils/torch}/datasets.py +49 -43
dataeval/utils/torch/models.py +138 -0
dataeval/{_internal/models/pytorch/autoencoder.py → utils/torch/trainer.py} +12 -141
dataeval/{_internal → utils/torch}/utils.py +3 -1
dataeval/workflows/__init__.py +1 -1
dataeval/{_internal/workflows → workflows}/sufficiency.py +42 -37
{dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/METADATA +7 -5
dataeval-0.72.2.dist-info/RECORD +72 -0
dataeval/_internal/detectors/__init__.py +0 -0
dataeval/_internal/detectors/drift/__init__.py +0 -0
dataeval/_internal/detectors/ood/__init__.py +0 -0
dataeval/_internal/metrics/__init__.py +0 -0
dataeval/_internal/metrics/stats/hashstats.py +0 -75
dataeval/_internal/metrics/utils.py +0 -447
dataeval/_internal/models/__init__.py +0 -0
dataeval/_internal/models/pytorch/__init__.py +0 -0
dataeval/_internal/models/pytorch/utils.py +0 -67
dataeval/_internal/models/tensorflow/__init__.py +0 -0
dataeval/_internal/workflows/__init__.py +0 -0
dataeval/detectors/drift/kernels/__init__.py +0 -10
dataeval/detectors/drift/updates/__init__.py +0 -7
dataeval/utils/tensorflow/models/__init__.py +0 -9
dataeval/utils/tensorflow/recon/__init__.py +0 -3
dataeval/utils/torch/datasets/__init__.py +0 -12
dataeval/utils/torch/models/__init__.py +0 -11
dataeval/utils/torch/trainer/__init__.py +0 -7
dataeval-0.72.0.dist-info/RECORD +0 -80
/dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/gmm.py +0 -0
{dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/LICENSE.txt +0 -0
{dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/WHEEL +0 -0

dataeval/{_internal/metrics → metrics}/stats/datasetstats.py RENAMED Viewed

@@ -1,19 +1,21 @@
 from __future__ import annotations
+__all__ = ["DatasetStatsOutput", "ChannelStatsOutput", "datasetstats", "channelstats"]
 from dataclasses import dataclass
 from typing import Any, Iterable
 from numpy.typing import ArrayLike
-from dataeval._internal.metrics.stats.base import BaseStatsOutput, run_stats
-from dataeval._internal.metrics.stats.dimensionstats import (
+from dataeval.metrics.stats.base import BaseStatsOutput, run_stats
+from dataeval.metrics.stats.dimensionstats import (
     DimensionStatsOutput,
     DimensionStatsProcessor,
 )
-from dataeval._internal.metrics.stats.labelstats import LabelStatsOutput, labelstats
-from dataeval._internal.metrics.stats.pixelstats import PixelStatsOutput, PixelStatsProcessor
-from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput, VisualStatsProcessor
-from dataeval._internal.output import OutputMetadata, set_metadata
+from dataeval.metrics.stats.labelstats import LabelStatsOutput, labelstats
+from dataeval.metrics.stats.pixelstats import PixelStatsOutput, PixelStatsProcessor
+from dataeval.metrics.stats.visualstats import VisualStatsOutput, VisualStatsProcessor
+from dataeval.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
@@ -39,14 +41,14 @@ class DatasetStatsOutput(OutputMetadata):
     visualstats: VisualStatsOutput
     labelstats: LabelStatsOutput | None = None
-    def outputs(self) -> list[OutputMetadata]:
+    def _outputs(self) -> list[OutputMetadata]:
         return [s for s in (self.dimensionstats, self.pixelstats, self.visualstats, self.labelstats) if s is not None]
     def dict(self) -> dict[str, Any]:
-        return {k: v for o in self.outputs() for k, v in o.dict().items()}
+        return {k: v for o in self._outputs() for k, v in o.dict().items()}
-    def __post_init__(self):
-        lengths = [len(s) for s in self.outputs() if isinstance(s, BaseStatsOutput)]
+    def __post_init__(self) -> None:
+        lengths = [len(s) for s in self._outputs() if isinstance(s, BaseStatsOutput)]
         if not all(length == lengths[0] for length in lengths):
             raise ValueError("All StatsOutput classes must contain the same number of image sources.")
@@ -70,26 +72,26 @@ class ChannelStatsOutput(OutputMetadata):
     pixelstats: PixelStatsOutput
     visualstats: VisualStatsOutput
-    def outputs(self) -> list[BaseStatsOutput]:
-        return [self.pixelstats, self.visualstats]
+    def _outputs(self) -> tuple[PixelStatsOutput, VisualStatsOutput]:
+        return (self.pixelstats, self.visualstats)
     def dict(self) -> dict[str, Any]:
         return {**self.pixelstats.dict(), **self.visualstats.dict()}
-    def __post_init__(self):
-        lengths = [len(s) for s in self.outputs()]
+    def __post_init__(self) -> None:
+        lengths = [len(s) for s in self._outputs()]
         if not all(length == lengths[0] for length in lengths):
             raise ValueError("All StatsOutput classes must contain the same number of image sources.")
-@set_metadata("dataeval.metrics")
+@set_metadata()
 def datasetstats(
     images: Iterable[ArrayLike],
     bboxes: Iterable[ArrayLike] | None = None,
     labels: Iterable[ArrayLike] | None = None,
 ) -> DatasetStatsOutput:
     """
-    Calculates various statistics for each image
+    Calculates various :term:`statistics<Statistics>` for each image
     This function computes dimension, pixel and visual metrics
     on the images or individual bounding boxes for each image as
@@ -129,7 +131,7 @@ def datasetstats(
     return DatasetStatsOutput(*outputs, labelstats=labelstats(labels) if labels else None)  # type: ignore
-@set_metadata("dataeval.metrics")
+@set_metadata()
 def channelstats(
     images: Iterable[ArrayLike],
     bboxes: Iterable[ArrayLike] | None = None,

dataeval/{_internal/metrics → metrics}/stats/dimensionstats.py RENAMED Viewed

@@ -1,14 +1,16 @@
 from __future__ import annotations
+__all__ = ["DimensionStatsOutput", "dimensionstats"]
 from dataclasses import dataclass
-from typing import Iterable
+from typing import Any, Callable, Iterable
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
-from dataeval._internal.metrics.stats.base import BaseStatsOutput, StatsProcessor, run_stats
-from dataeval._internal.metrics.utils import get_bitdepth
-from dataeval._internal.output import set_metadata
+from dataeval.metrics.stats.base import BaseStatsOutput, StatsProcessor, run_stats
+from dataeval.output import set_metadata
+from dataeval.utils.image import get_bitdepth
 @dataclass(frozen=True)
@@ -31,7 +33,7 @@ class DimensionStatsOutput(BaseStatsOutput):
     size : NDArray[np.uint32]
         Size of the images in pixels
     aspect_ratio : NDArray[np.float16]
-        Aspect ratio of the images (width/height)
+        :term:`ASspect Ratio<Aspect Ratio>` of the images (width/height)
     depth : NDArray[np.uint8]
         Color depth of the images in bits
     center : NDArray[np.uint16]
@@ -53,8 +55,8 @@ class DimensionStatsOutput(BaseStatsOutput):
 class DimensionStatsProcessor(StatsProcessor[DimensionStatsOutput]):
-    output_class = DimensionStatsOutput
-    image_function_map = {
+    output_class: type = DimensionStatsOutput
+    image_function_map: dict[str, Callable[[StatsProcessor[DimensionStatsOutput]], Any]] = {
         "left": lambda x: x.box[0],
         "top": lambda x: x.box[1],
         "width": lambda x: x.box[2] - x.box[0],
@@ -71,13 +73,13 @@ class DimensionStatsProcessor(StatsProcessor[DimensionStatsOutput]):
     }
-@set_metadata("dataeval.metrics")
+@set_metadata()
 def dimensionstats(
     images: Iterable[ArrayLike],
     bboxes: Iterable[ArrayLike] | None = None,
 ) -> DimensionStatsOutput:
     """
-    Calculates dimension statistics for each image
+    Calculates dimension :term:`statistics<Statistics>` for each image
     This function computes various dimensional metrics (e.g., width, height, channels)
     on the images or individual bounding boxes for each image.
@@ -94,7 +96,7 @@ def dimensionstats(
     DimensionStatsOutput
         A dictionary-like object containing the computed dimension statistics for each image or bounding
         box. The keys correspond to the names of the statistics (e.g., 'width', 'height'), and the values
-        are lists of results for each image or numpy arrays when the results are multi-dimensional.
+        are lists of results for each image or :term:NumPy` arrays when the results are multi-dimensional.
     See Also
     --------

dataeval/metrics/stats/hashstats.py ADDED Viewed

@@ -0,0 +1,156 @@
+from __future__ import annotations
+__all__ = ["HashStatsOutput", "hashstats"]
+from dataclasses import dataclass
+from typing import Callable, Iterable
+import numpy as np
+import xxhash as xxh
+from numpy.typing import ArrayLike
+from PIL import Image
+from scipy.fftpack import dct
+from dataeval.interop import as_numpy
+from dataeval.metrics.stats.base import BaseStatsOutput, StatsProcessor, run_stats
+from dataeval.output import set_metadata
+from dataeval.utils.image import normalize_image_shape, rescale
+HASH_SIZE = 8
+MAX_FACTOR = 4
+@dataclass(frozen=True)
+class HashStatsOutput(BaseStatsOutput):
+    """
+    Output class for :func:`hashstats` stats metric
+    Attributes
+    ----------
+    xxhash : List[str]
+        xxHash hash of the images as a hex string
+    pchash : List[str]
+        :term:`Perception-based Hash` of the images as a hex string
+    """
+    xxhash: list[str]
+    pchash: list[str]
+def pchash(image: ArrayLike) -> str:
+    """
+    Performs a perceptual hash on an image by resizing to a square NxN image
+    using the Lanczos algorithm where N is 32x32 or the largest multiple of
+    8 that is smaller than the input image dimensions.  The resampled image
+    is compressed using a discrete cosine transform and the lowest frequency
+    component is encoded as a bit array of greater or less than median value
+    and returned as a hex string.
+    Parameters
+    ----------
+    image : ArrayLike
+        An image as a numpy array in CxHxW format
+    Returns
+    -------
+    str
+        The hex string hash of the image using perceptual hashing
+    """
+    # Verify that the image is at least larger than an 8x8 image
+    arr = as_numpy(image)
+    min_dim = min(arr.shape[-2:])
+    if min_dim < HASH_SIZE + 1:
+        raise ValueError(f"Image must be larger than {HASH_SIZE}x{HASH_SIZE} for fuzzy hashing.")
+    # Calculates the dimensions of the resized square image
+    resize_dim = HASH_SIZE * min((min_dim - 1) // HASH_SIZE, MAX_FACTOR)
+    # Normalizes the image to CxHxW and takes the mean over all the channels
+    normalized = np.mean(normalize_image_shape(arr), axis=0).squeeze()
+    # Rescales the pixel values to an 8-bit 0-255 image
+    rescaled = rescale(normalized, 8).astype(np.uint8)
+    # Resizes the image using the Lanczos algorithm to a square image
+    im = np.array(Image.fromarray(rescaled).resize((resize_dim, resize_dim), Image.Resampling.LANCZOS))
+    # Performs discrete cosine transforms to compress the image information and takes the lowest frequency component
+    transform = dct(dct(im.T).T)[:HASH_SIZE, :HASH_SIZE]
+    # Encodes the transform as a bit array over the median value
+    diff = transform > np.median(transform)
+    # Pads the front of the bit array to a multiple of 8 with False
+    padded = np.full(int(np.ceil(diff.size / 8) * 8), False)
+    padded[-diff.size :] = diff.ravel()
+    # Converts the bit array to a hex string and strips leading 0s
+    hash_hex = np.packbits(padded).tobytes().hex().lstrip("0")
+    return hash_hex if hash_hex else "0"
+def xxhash(image: ArrayLike) -> str:
+    """
+    Performs a fast non-cryptographic hash using the xxhash algorithm
+    (xxhash.com) against the image as a flattened bytearray.  The hash
+    is returned as a hex string.
+    Parameters
+    ----------
+    image : ArrayLike
+        An image as a numpy array
+    Returns
+    -------
+    str
+        The hex string hash of the image using the xxHash algorithm
+    """
+    return xxh.xxh3_64_hexdigest(as_numpy(image).ravel().tobytes())
+class HashStatsProcessor(StatsProcessor[HashStatsOutput]):
+    output_class: type = HashStatsOutput
+    image_function_map: dict[str, Callable[[StatsProcessor[HashStatsOutput]], str]] = {
+        "xxhash": lambda x: xxhash(x.image),
+        "pchash": lambda x: pchash(x.image),
+    }
+@set_metadata()
+def hashstats(
+    images: Iterable[ArrayLike],
+    bboxes: Iterable[ArrayLike] | None = None,
+) -> HashStatsOutput:
+    """
+    Calculates hashes for each image
+    This function computes hashes from the images including exact hashes and perception-based
+    hashes. These hash values can be used to determine if images are exact or near matches.
+    Parameters
+    ----------
+    images : ArrayLike
+        Images to hashing
+    bboxes : Iterable[ArrayLike] or None
+        Bounding boxes in `xyxy` format for each image
+    Returns
+    -------
+    HashStatsOutput
+        A dictionary-like object containing the computed hashes for each image.
+    See Also
+    --------
+    Duplicates
+    Examples
+    --------
+    Calculating the statistics on the images, whose shape is (C, H, W)
+    >>> results = hashstats(images)
+    >>> print(results.xxhash)
+    ['a72434443d6e7336', 'efc12c2f14581d79', '4a1e03483a27d674', '3a3ecedbcf814226']
+    >>> print(results.pchash)
+    ['8f25506af46a7c6a', '8000808000008080', '8e71f18e0ef18e0e', 'a956d6a956d6a928']
+    """
+    return run_stats(images, bboxes, False, [HashStatsProcessor])[0]

dataeval/{_internal/metrics → metrics}/stats/labelstats.py RENAMED Viewed

@@ -1,13 +1,15 @@
 from __future__ import annotations
+__all__ = ["LabelStatsOutput", "labelstats"]
 from collections import Counter, defaultdict
 from dataclasses import dataclass
 from typing import Any, Iterable, Mapping, TypeVar
 from numpy.typing import ArrayLike
-from dataeval._internal.interop import to_numpy
-from dataeval._internal.output import OutputMetadata, set_metadata
+from dataeval.interop import to_numpy
+from dataeval.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
@@ -55,12 +57,12 @@ def sort(d: Mapping[TKey, Any]) -> dict[TKey, Any]:
     return dict(sorted(d.items(), key=lambda x: x[0]))
-@set_metadata("dataeval.metrics")
+@set_metadata()
 def labelstats(
     labels: Iterable[ArrayLike],
 ) -> LabelStatsOutput:
     """
-    Calculates statistics for data labels
+    Calculates :term:`statistics<Statistics>` for data labels
     This function computes counting metrics (e.g., total per class, total per image)
     on the labels.
@@ -68,7 +70,7 @@ def labelstats(
     Parameters
     ----------
     labels : ArrayLike, shape - [label] | [[label]] or (N,M) | (N,)
-        Lists or numpy array of labels.
+        Lists or :term:`NumPy` array of labels.
         A set of lists where each list contains all labels per image -
         (e.g. [[label1, label2], [label2], [label1, label3]] or [label1, label2, label1, label3]).
         If a numpy array, N is the number of images, M is the number of labels per image.
@@ -80,7 +82,7 @@ def labelstats(
     Examples
     --------
-    Calculating the statistics on labels for a set of data
+    Calculating the :term:`statistics<Statistics>` on labels for a set of data
     >>> stats = labelstats(labels)
     >>> stats.label_counts_per_class

dataeval/{_internal/metrics → metrics}/stats/pixelstats.py RENAMED Viewed

@@ -1,14 +1,16 @@
 from __future__ import annotations
+__all__ = ["PixelStatsOutput", "pixelstats"]
 from dataclasses import dataclass
-from typing import Iterable
+from typing import Any, Callable, Iterable
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
 from scipy.stats import entropy, kurtosis, skew
-from dataeval._internal.metrics.stats.base import BaseStatsOutput, StatsProcessor, run_stats
-from dataeval._internal.output import set_metadata
+from dataeval.metrics.stats.base import BaseStatsOutput, StatsProcessor, run_stats
+from dataeval.output import set_metadata
 @dataclass(frozen=True)
@@ -23,7 +25,7 @@ class PixelStatsOutput(BaseStatsOutput):
     std : NDArray[np.float16]
         Standard deviation of the pixel values of the images
     var : NDArray[np.float16]
-        Variance of the pixel values of the images
+        :term:`Variance` of the pixel values of the images
     skew : NDArray[np.float16]
         Skew of the pixel values of the images
     kurtosis : NDArray[np.float16]
@@ -44,9 +46,8 @@ class PixelStatsOutput(BaseStatsOutput):
 class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
-    output_class = PixelStatsOutput
-    cache_keys = ["histogram"]
-    image_function_map = {
+    output_class: type = PixelStatsOutput
+    image_function_map: dict[str, Callable[[StatsProcessor[PixelStatsOutput]], Any]] = {
         "mean": lambda self: np.mean(self.scaled),
         "std": lambda x: np.std(x.scaled),
         "var": lambda x: np.var(x.scaled),
@@ -55,7 +56,7 @@ class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
         "histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
         "entropy": lambda x: entropy(x.get("histogram")),
     }
-    channel_function_map = {
+    channel_function_map: dict[str, Callable[[StatsProcessor[PixelStatsOutput]], Any]] = {
         "mean": lambda x: np.mean(x.scaled, axis=1),
         "std": lambda x: np.std(x.scaled, axis=1),
         "var": lambda x: np.var(x.scaled, axis=1),
@@ -66,14 +67,14 @@ class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
     }
-@set_metadata("dataeval.metrics")
+@set_metadata()
 def pixelstats(
     images: Iterable[ArrayLike],
     bboxes: Iterable[ArrayLike] | None = None,
     per_channel: bool = False,
 ) -> PixelStatsOutput:
     """
-    Calculates pixel statistics for each image
+    Calculates pixel :term:`statistics<Statistics>` for each image
     This function computes various statistical metrics (e.g., mean, standard deviation, entropy)
     on the images as a whole.
@@ -90,7 +91,7 @@ def pixelstats(
     PixelStatsOutput
         A dictionary-like object containing the computed statistics for each image. The keys correspond
         to the names of the statistics (e.g., 'mean', 'std'), and the values are lists of results for
-        each image or numpy arrays when the results are multi-dimensional.
+        each image or :term:`NumPy` arrays when the results are multi-dimensional.
     See Also
     --------

dataeval/{_internal/metrics → metrics}/stats/visualstats.py RENAMED Viewed

@@ -1,14 +1,16 @@
 from __future__ import annotations
+__all__ = ["VisualStatsOutput", "visualstats"]
 from dataclasses import dataclass
-from typing import Iterable
+from typing import Any, Callable, Iterable
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
-from dataeval._internal.metrics.stats.base import BaseStatsOutput, StatsProcessor, run_stats
-from dataeval._internal.metrics.utils import edge_filter
-from dataeval._internal.output import set_metadata
+from dataeval.metrics.stats.base import BaseStatsOutput, StatsProcessor, run_stats
+from dataeval.output import set_metadata
+from dataeval.utils.image import edge_filter
 QUARTILES = (0, 25, 50, 75, 100)
@@ -46,9 +48,8 @@ class VisualStatsOutput(BaseStatsOutput):
 class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
-    output_class = VisualStatsOutput
-    cache_keys = ["percentiles"]
-    image_function_map = {
+    output_class: type = VisualStatsOutput
+    image_function_map: dict[str, Callable[[StatsProcessor[VisualStatsOutput]], Any]] = {
         "brightness": lambda x: x.get("percentiles")[1],
         "contrast": lambda x: np.nan_to_num(
             (np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles"))
@@ -59,7 +60,7 @@ class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
         "zeros": lambda x: np.count_nonzero(np.sum(x.image, axis=0) == 0) / np.prod(x.shape[-2:]),
         "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
     }
-    channel_function_map = {
+    channel_function_map: dict[str, Callable[[StatsProcessor[VisualStatsOutput]], Any]] = {
         "brightness": lambda x: x.get("percentiles")[:, 1],
         "contrast": lambda x: np.nan_to_num(
             (np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
@@ -73,7 +74,7 @@ class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
     }
-@set_metadata("dataeval.metrics")
+@set_metadata()
 def visualstats(
     images: Iterable[ArrayLike],
     bboxes: Iterable[ArrayLike] | None = None,
@@ -82,7 +83,7 @@ def visualstats(
     """
     Calculates visual statistics for each image
-    This function computes various visual metrics (e.g., brightness, darkness, contrast, sharpness)
+    This function computes various visual metrics (e.g., :term:`brightness<Brightness>`, darkness, contrast, blurriness)
     on the images as a whole.
     Parameters
@@ -96,8 +97,8 @@ def visualstats(
     -------
     VisualStatsOutput
         A dictionary-like object containing the computed visual statistics for each image. The keys correspond
-        to the names of the statistics (e.g., 'brightness', 'sharpness'), and the values are lists of results for
-        each image or numpy arrays when the results are multi-dimensional.
+        to the names of the statistics (e.g., 'brightness', 'blurriness'), and the values are lists of results for
+        each image or :term:`NumPy` arrays when the results are multi-dimensional.
     See Also
     --------
@@ -109,7 +110,7 @@ def visualstats(
     Examples
     --------
-    Calculating the statistics on the images, whose shape is (C, H, W)
+    Calculating the :term:`statistics<Statistics>` on the images, whose shape is (C, H, W)
     >>> results = visualstats(images)
     >>> print(results.brightness)

dataeval/{_internal/output.py → output.py} RENAMED Viewed

@@ -1,12 +1,20 @@
 from __future__ import annotations
+__all__ = []
 import inspect
+import sys
 from datetime import datetime, timezone
 from functools import wraps
-from typing import Any
+from typing import Any, Callable, Iterable, TypeVar
 import numpy as np
+if sys.version_info >= (3, 10):
+    from typing import ParamSpec
+else:
+    from typing_extensions import ParamSpec
 from dataeval import __version__
@@ -25,10 +33,18 @@ class OutputMetadata:
         return {k.removeprefix("_"): v for k, v in self.__dict__.items() if k.startswith("_")}
-def set_metadata(module_name: str = "", state_attr: list[str] | None = None):
-    def decorator(fn):
+P = ParamSpec("P")
+R = TypeVar("R", bound=OutputMetadata)
+def set_metadata(
+    state_attr: Iterable[str] | None = None,
+) -> Callable[[Callable[P, R]], Callable[P, R]]:
+    """Decorator to stamp OutputMetadata classes with runtime metadata"""
+    def decorator(fn: Callable[P, R]) -> Callable[P, R]:
         @wraps(fn)
-        def wrapper(*args, **kwargs):
+        def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
             def fmt(v):
                 if np.isscalar(v):
                     return v
@@ -52,9 +68,13 @@ def set_metadata(module_name: str = "", state_attr: list[str] | None = None):
                 if "self" in arguments and state_attr
                 else {}
             )
-            name = args[0].__class__.__name__ if "self" in arguments else fn.__name__
+            name = (
+                f"{args[0].__class__.__module__}.{args[0].__class__.__name__}.{fn.__name__}"
+                if "self" in arguments
+                else f"{fn.__module__}.{fn.__qualname__}"
+            )
             metadata = {
-                "_name": f"{module_name}.{name}",
+                "_name": name,
                 "_execution_time": time,
                 "_execution_duration": duration,
                 "_arguments": {k: v for k, v in arguments.items() if k != "self"},

dataeval/utils/__init__.py CHANGED Viewed

@@ -1,19 +1,23 @@
 """
 The utility classes and functions are provided by DataEval to assist users
 in setting up architectures that are guaranteed to work with applicable DataEval
-metrics. Currently DataEval supports both Tensorflow and PyTorch backends.
+metrics. Currently DataEval supports both :term:`TensorFlow` and PyTorch backends.
 """
 from dataeval import _IS_TENSORFLOW_AVAILABLE, _IS_TORCH_AVAILABLE
+from dataeval.utils.split_dataset import split_dataset
-__all__ = []
+__all__ = ["split_dataset"]
 if _IS_TORCH_AVAILABLE:  # pragma: no cover
-    from . import torch
+    from dataeval.utils import torch
     __all__ += ["torch"]
 if _IS_TENSORFLOW_AVAILABLE:  # pragma: no cover
-    from . import tensorflow
+    from dataeval.utils import tensorflow
     __all__ += ["tensorflow"]
+del _IS_TENSORFLOW_AVAILABLE
+del _IS_TORCH_AVAILABLE

dataeval/utils/image.py ADDED Viewed

@@ -0,0 +1,71 @@
+from __future__ import annotations
+__all__ = []
+from typing import Any, NamedTuple
+import numpy as np
+from numpy.typing import ArrayLike, NDArray
+from scipy.signal import convolve2d
+EDGE_KERNEL = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype=np.int8)
+BIT_DEPTH = (1, 8, 12, 16, 32)
+class BitDepth(NamedTuple):
+    depth: int
+    pmin: float | int
+    pmax: float | int
+def get_bitdepth(image: NDArray[Any]) -> BitDepth:
+    """
+    Approximates the bit depth of the image using the
+    min and max pixel values.
+    """
+    pmin, pmax = np.min(image), np.max(image)
+    if pmin < 0:
+        return BitDepth(0, pmin, pmax)
+    else:
+        depth = ([x for x in BIT_DEPTH if 2**x > pmax] or [max(BIT_DEPTH)])[0]
+        return BitDepth(depth, 0, 2**depth - 1)
+def rescale(image: NDArray[Any], depth: int = 1) -> NDArray[Any]:
+    """
+    Rescales the image using the bit depth provided.
+    """
+    bitdepth = get_bitdepth(image)
+    if bitdepth.depth == depth:
+        return image
+    else:
+        normalized = (image + bitdepth.pmin) / (bitdepth.pmax - bitdepth.pmin)
+        return normalized * (2**depth - 1)
+def normalize_image_shape(image: NDArray[Any]) -> NDArray[Any]:
+    """
+    Normalizes the image shape into (C,H,W).
+    """
+    ndim = image.ndim
+    if ndim == 2:
+        return np.expand_dims(image, axis=0)
+    elif ndim == 3:
+        return image
+    elif ndim > 3:
+        # Slice all but the last 3 dimensions
+        return image[(0,) * (ndim - 3)]
+    else:
+        raise ValueError("Images must have 2 or more dimensions.")
+def edge_filter(image: ArrayLike, offset: float = 0.5) -> NDArray[np.uint8]:
+    """
+    Returns the image filtered using a 3x3 edge detection kernel:
+    [[ -1, -1, -1 ],
+     [ -1,  8, -1 ],
+     [ -1, -1, -1 ]]
+    """
+    edges = convolve2d(image, EDGE_KERNEL, mode="same", boundary="symm") + offset
+    np.clip(edges, 0, 255, edges)
+    return edges

dataeval 0.72.0__py3-none-any.whl → 0.72.2__py3-none-any.whl

dataeval 0.72.0py3-none-any.whl → 0.72.2py3-none-any.whl