PyPI - dataeval - Versions diffs - 0.70.0__py3-none-any.whl → 0.71.0__py3-none-any.whl - Mend

dataeval 0.70.0py3-none-any.whl → 0.71.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

dataeval/__init__.py +6 -6
dataeval/_internal/datasets.py +235 -131
dataeval/_internal/detectors/clusterer.py +2 -0
dataeval/_internal/detectors/drift/base.py +2 -2
dataeval/_internal/detectors/drift/mmd.py +1 -1
dataeval/_internal/detectors/duplicates.py +2 -0
dataeval/_internal/detectors/ood/ae.py +5 -3
dataeval/_internal/detectors/ood/aegmm.py +6 -4
dataeval/_internal/detectors/ood/base.py +12 -7
dataeval/_internal/detectors/ood/llr.py +6 -4
dataeval/_internal/detectors/ood/vae.py +5 -3
dataeval/_internal/detectors/ood/vaegmm.py +6 -4
dataeval/_internal/detectors/outliers.py +6 -9
dataeval/_internal/metrics/balance.py +4 -2
dataeval/_internal/metrics/ber.py +2 -0
dataeval/_internal/metrics/coverage.py +4 -0
dataeval/_internal/metrics/divergence.py +6 -2
dataeval/_internal/metrics/diversity.py +8 -6
dataeval/_internal/metrics/parity.py +8 -6
dataeval/_internal/metrics/stats/base.py +105 -46
dataeval/_internal/metrics/stats/datasetstats.py +96 -22
dataeval/_internal/metrics/stats/dimensionstats.py +22 -20
dataeval/_internal/metrics/stats/hashstats.py +11 -9
dataeval/_internal/metrics/stats/labelstats.py +1 -1
dataeval/_internal/metrics/stats/pixelstats.py +28 -26
dataeval/_internal/metrics/stats/visualstats.py +37 -35
dataeval/_internal/metrics/uap.py +6 -2
dataeval/_internal/metrics/utils.py +2 -2
dataeval/_internal/models/pytorch/autoencoder.py +5 -5
dataeval/_internal/models/tensorflow/pixelcnn.py +1 -4
dataeval/_internal/utils.py +11 -16
dataeval/_internal/workflows/sufficiency.py +44 -33
dataeval/detectors/__init__.py +4 -0
dataeval/detectors/drift/__init__.py +8 -3
dataeval/detectors/drift/kernels/__init__.py +4 -0
dataeval/detectors/drift/updates/__init__.py +4 -0
dataeval/detectors/linters/__init__.py +15 -4
dataeval/detectors/ood/__init__.py +14 -2
dataeval/metrics/__init__.py +5 -0
dataeval/metrics/bias/__init__.py +13 -4
dataeval/metrics/estimators/__init__.py +8 -8
dataeval/metrics/stats/__init__.py +24 -6
dataeval/utils/__init__.py +16 -3
dataeval/utils/tensorflow/__init__.py +11 -0
dataeval/utils/torch/__init__.py +12 -0
dataeval/utils/torch/datasets/__init__.py +7 -0
dataeval/workflows/__init__.py +4 -0
{dataeval-0.70.0.dist-info → dataeval-0.71.0.dist-info}/METADATA +11 -2
dataeval-0.71.0.dist-info/RECORD +80 -0
dataeval/tensorflow/__init__.py +0 -3
dataeval/torch/__init__.py +0 -3
dataeval-0.70.0.dist-info/RECORD +0 -79
/dataeval/{tensorflow → utils/tensorflow}/loss/__init__.py +0 -0
/dataeval/{tensorflow → utils/tensorflow}/models/__init__.py +0 -0
/dataeval/{tensorflow → utils/tensorflow}/recon/__init__.py +0 -0
/dataeval/{torch → utils/torch}/models/__init__.py +0 -0
/dataeval/{torch → utils/torch}/trainer/__init__.py +0 -0
{dataeval-0.70.0.dist-info → dataeval-0.71.0.dist-info}/LICENSE.txt +0 -0
{dataeval-0.70.0.dist-info → dataeval-0.71.0.dist-info}/WHEEL +0 -0

dataeval/_internal/metrics/stats/datasetstats.py CHANGED Viewed

@@ -1,21 +1,26 @@
 from __future__ import annotations
 from dataclasses import dataclass
-from typing import Iterable
+from typing import Any, Iterable
 from numpy.typing import ArrayLike
-from dataeval._internal.metrics.stats.base import BaseStatsOutput
-from dataeval._internal.metrics.stats.dimensionstats import DimensionStatsOutput, dimensionstats
+from dataeval._internal.metrics.stats.base import BaseStatsOutput, run_stats
+from dataeval._internal.metrics.stats.dimensionstats import (
+    DimensionStatsOutput,
+    DimensionStatsProcessor,
+)
 from dataeval._internal.metrics.stats.labelstats import LabelStatsOutput, labelstats
-from dataeval._internal.metrics.stats.pixelstats import PixelStatsOutput, pixelstats
-from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput, visualstats
+from dataeval._internal.metrics.stats.pixelstats import PixelStatsOutput, PixelStatsProcessor
+from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput, VisualStatsProcessor
 from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class DatasetStatsOutput(OutputMetadata):
     """
+    Output class for :func:`datasetstats` stats metric
     This class represents the outputs of various stats functions against a single
     dataset, such that each index across all stat outputs are representative of
     the same source image.  Modifying or mixing outputs will result in inaccurate
@@ -23,19 +28,53 @@ class DatasetStatsOutput(OutputMetadata):
     Attributes
     ----------
-    dimensionstats : DimensionStatsOutput or None
-    pixelstats: PixelStatsOutput or None
-    visualstats: VisualStatsOutput or None
-    labelstats: LabelStatsOutput or None, default None
+    dimensionstats : DimensionStatsOutput
+    pixelstats: PixelStatsOutput
+    visualstats: VisualStatsOutput
+    labelstats: LabelStatsOutput or None
     """
-    dimensionstats: DimensionStatsOutput | None
-    pixelstats: PixelStatsOutput | None
-    visualstats: VisualStatsOutput | None
+    dimensionstats: DimensionStatsOutput
+    pixelstats: PixelStatsOutput
+    visualstats: VisualStatsOutput
     labelstats: LabelStatsOutput | None = None
+    def outputs(self) -> list[OutputMetadata]:
+        return [s for s in (self.dimensionstats, self.pixelstats, self.visualstats, self.labelstats) if s is not None]
+    def dict(self) -> dict[str, Any]:
+        return {k: v for o in self.outputs() for k, v in o.dict().items()}
+    def __post_init__(self):
+        lengths = [len(s) for s in self.outputs() if isinstance(s, BaseStatsOutput)]
+        if not all(length == lengths[0] for length in lengths):
+            raise ValueError("All StatsOutput classes must contain the same number of image sources.")
+@dataclass(frozen=True)
+class ChannelStatsOutput(OutputMetadata):
+    """
+    Output class for :func:`channelstats` stats metric
+    This class represents the outputs of various per-channel stats functions against
+    a single dataset, such that each index across all stat outputs are representative
+    of the same source image.  Modifying or mixing outputs will result in inaccurate
+    outlier calculations if not created correctly.
+    Attributes
+    ----------
+    pixelstats: PixelStatsOutput
+    visualstats: VisualStatsOutput
+    """
+    pixelstats: PixelStatsOutput
+    visualstats: VisualStatsOutput
     def outputs(self) -> list[BaseStatsOutput]:
-        return [s for s in (self.dimensionstats, self.pixelstats, self.visualstats) if s is not None]
+        return [self.pixelstats, self.visualstats]
+    def dict(self) -> dict[str, Any]:
+        return {**self.pixelstats.dict(), **self.visualstats.dict()}
     def __post_init__(self):
         lengths = [len(s) for s in self.outputs()]
@@ -48,9 +87,6 @@ def datasetstats(
     images: Iterable[ArrayLike],
     bboxes: Iterable[ArrayLike] | None = None,
     labels: Iterable[ArrayLike] | None = None,
-    use_dimension: bool = True,
-    use_pixel: bool = True,
-    use_visual: bool = True,
 ) -> DatasetStatsOutput:
     """
     Calculates various statistics for each image
@@ -89,9 +125,47 @@ def datasetstats(
     [1.744   1.946   0.1164  0.0635  0.0633  0.06274 0.0429  0.0317  0.0317
      0.02576 0.02081 0.02171 0.01915 0.01767 0.01799 0.01595 0.01433 0.01478]
     """
-    return DatasetStatsOutput(
-        dimensionstats(images, bboxes) if use_dimension else None,
-        pixelstats(images, bboxes) if use_pixel else None,
-        visualstats(images, bboxes) if use_visual else None,
-        labelstats(labels) if labels else None,
-    )
+    outputs = run_stats(images, bboxes, False, [DimensionStatsProcessor, PixelStatsProcessor, VisualStatsProcessor])
+    return DatasetStatsOutput(*outputs, labelstats=labelstats(labels) if labels else None)  # type: ignore
+@set_metadata("dataeval.metrics")
+def channelstats(
+    images: Iterable[ArrayLike],
+    bboxes: Iterable[ArrayLike] | None = None,
+) -> ChannelStatsOutput:
+    """
+    Calculates various per-channel statistics for each image
+    This function computes pixel and visual metrics on the images
+    or individual bounding boxes for each image.
+    Parameters
+    ----------
+    images : Iterable[ArrayLike]
+        Images to perform calculations on
+    bboxes : Iterable[ArrayLike] or None
+        Bounding boxes in `xyxy` format for each image to perform calculations on
+    Returns
+    -------
+    ChannelStatsOutput
+        Output class containing the per-channel outputs of various stats functions
+    See Also
+    --------
+    pixelstats, visualstats
+    Examples
+    --------
+    Calculating the per-channel pixel and visual stats for a dataset
+    >>> stats = channelstats(images)
+    >>> print(stats.visualstats.darkness)
+    [0.02124 0.1213  0.2212  0.1013  0.1076  0.11383 0.2013  0.2076  0.2139
+     0.3013  0.3076  0.3137  0.4014  0.4075  0.4138  0.5015  0.508   0.5137
+     0.6016  0.6074  0.614   0.701   0.7075  0.714   0.8013  0.8076  0.814
+     0.9014  0.9077  0.914  ]
+    """
+    outputs = run_stats(images, bboxes, True, [PixelStatsProcessor, VisualStatsProcessor])
+    return ChannelStatsOutput(*outputs)  # type: ignore

dataeval/_internal/metrics/stats/dimensionstats.py CHANGED Viewed

@@ -11,27 +11,11 @@ from dataeval._internal.metrics.utils import get_bitdepth
 from dataeval._internal.output import set_metadata
-class DimensionStatsProcessor(StatsProcessor):
-    image_function_map = {
-        "left": lambda x: x.box[0],
-        "top": lambda x: x.box[1],
-        "width": lambda x: x.shape[-1],
-        "height": lambda x: x.shape[-2],
-        "channels": lambda x: x.shape[-3],
-        "size": lambda x: np.prod(x.shape[-2:]),
-        "aspect_ratio": lambda x: x.shape[-1] / x.shape[-2],
-        "depth": lambda x: get_bitdepth(x.image).depth,
-        "center": lambda x: np.asarray([(x.box[0] + x.box[2]) / 2, (x.box[1] + x.box[3]) / 2]),
-        "distance": lambda x: np.sqrt(
-            np.square(((x.box[0] + x.box[2]) / 2) - (x.width / 2))
-            + np.square(((x.box[1] + x.box[3]) / 2) - (x.height / 2))
-        ),
-    }
 @dataclass(frozen=True)
 class DimensionStatsOutput(BaseStatsOutput):
     """
+    Output class for :func:`dimensionstats` stats metric
     Attributes
     ----------
     left : NDArray[np.int32]
@@ -68,6 +52,25 @@ class DimensionStatsOutput(BaseStatsOutput):
     distance: NDArray[np.float16]
+class DimensionStatsProcessor(StatsProcessor[DimensionStatsOutput]):
+    output_class = DimensionStatsOutput
+    image_function_map = {
+        "left": lambda x: x.box[0],
+        "top": lambda x: x.box[1],
+        "width": lambda x: x.shape[-1],
+        "height": lambda x: x.shape[-2],
+        "channels": lambda x: x.shape[-3],
+        "size": lambda x: np.prod(x.shape[-2:]),
+        "aspect_ratio": lambda x: x.shape[-1] / x.shape[-2],
+        "depth": lambda x: get_bitdepth(x.image).depth,
+        "center": lambda x: np.asarray([(x.box[0] + x.box[2]) / 2, (x.box[1] + x.box[3]) / 2]),
+        "distance": lambda x: np.sqrt(
+            np.square(((x.box[0] + x.box[2]) / 2) - (x.width / 2))
+            + np.square(((x.box[1] + x.box[3]) / 2) - (x.height / 2))
+        ),
+    }
 @set_metadata("dataeval.metrics")
 def dimensionstats(
     images: Iterable[ArrayLike],
@@ -107,5 +110,4 @@ def dimensionstats(
     >>> print(results.channels)
     [1 1 1 1 1 1 3 1 1 3]
     """
-    output = run_stats(images, bboxes, False, DimensionStatsProcessor, DimensionStatsOutput)
-    return DimensionStatsOutput(**output)
+    return run_stats(images, bboxes, False, [DimensionStatsProcessor])[0]

dataeval/_internal/metrics/stats/hashstats.py CHANGED Viewed

@@ -10,16 +10,11 @@ from dataeval._internal.metrics.utils import pchash, xxhash
 from dataeval._internal.output import set_metadata
-class HashStatsProcessor(StatsProcessor):
-    image_function_map = {
-        "xxhash": lambda x: xxhash(x.image),
-        "pchash": lambda x: pchash(x.image),
-    }
 @dataclass(frozen=True)
 class HashStatsOutput(BaseStatsOutput):
     """
+    Output class for :func:`hashstats` stats metric
     Attributes
     ----------
     xxhash : List[str]
@@ -32,6 +27,14 @@ class HashStatsOutput(BaseStatsOutput):
     pchash: list[str]
+class HashStatsProcessor(StatsProcessor[HashStatsOutput]):
+    output_class = HashStatsOutput
+    image_function_map = {
+        "xxhash": lambda x: xxhash(x.image),
+        "pchash": lambda x: pchash(x.image),
+    }
 @set_metadata("dataeval.metrics")
 def hashstats(
     images: Iterable[ArrayLike],
@@ -69,5 +72,4 @@ def hashstats(
     >>> print(results.pchash)
     ['8f25506af46a7c6a', '8000808000008080', '8e71f18e0ef18e0e', 'a956d6a956d6a928']
     """
-    output = run_stats(images, bboxes, False, HashStatsProcessor, HashStatsOutput)
-    return HashStatsOutput(**output)
+    return run_stats(images, bboxes, False, [HashStatsProcessor])[0]

dataeval/_internal/metrics/stats/labelstats.py CHANGED Viewed

@@ -13,7 +13,7 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class LabelStatsOutput(OutputMetadata):
     """
-    Output class for `labelstats` metrics function
+    Output class for :func:`labelstats` stats metric
     Attributes
     ----------

dataeval/_internal/metrics/stats/pixelstats.py CHANGED Viewed

@@ -11,31 +11,11 @@ from dataeval._internal.metrics.stats.base import BaseStatsOutput, StatsProcesso
 from dataeval._internal.output import set_metadata
-class PixelStatsProcessor(StatsProcessor):
-    cache_keys = ["histogram"]
-    image_function_map = {
-        "mean": lambda self: np.mean(self.scaled),
-        "std": lambda x: np.std(x.scaled),
-        "var": lambda x: np.var(x.scaled),
-        "skew": lambda x: np.nan_to_num(skew(x.scaled.ravel())),
-        "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled.ravel())),
-        "histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
-        "entropy": lambda x: entropy(x.get("histogram")),
-    }
-    channel_function_map = {
-        "mean": lambda x: np.mean(x.scaled, axis=1),
-        "std": lambda x: np.std(x.scaled, axis=1),
-        "var": lambda x: np.var(x.scaled, axis=1),
-        "skew": lambda x: np.nan_to_num(skew(x.scaled, axis=1)),
-        "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled, axis=1)),
-        "histogram": lambda x: np.apply_along_axis(lambda y: np.histogram(y, 256, (0, 1))[0], 1, x.scaled),
-        "entropy": lambda x: entropy(x.get("histogram"), axis=1),
-    }
 @dataclass(frozen=True)
 class PixelStatsOutput(BaseStatsOutput):
     """
+    Output class for :func:`pixelstats` stats metric
     Attributes
     ----------
     mean : NDArray[np.float16]
@@ -63,6 +43,29 @@ class PixelStatsOutput(BaseStatsOutput):
     entropy: NDArray[np.float16]
+class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
+    output_class = PixelStatsOutput
+    cache_keys = ["histogram"]
+    image_function_map = {
+        "mean": lambda self: np.mean(self.scaled),
+        "std": lambda x: np.std(x.scaled),
+        "var": lambda x: np.var(x.scaled),
+        "skew": lambda x: np.nan_to_num(skew(x.scaled.ravel())),
+        "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled.ravel())),
+        "histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
+        "entropy": lambda x: entropy(x.get("histogram")),
+    }
+    channel_function_map = {
+        "mean": lambda x: np.mean(x.scaled, axis=1),
+        "std": lambda x: np.std(x.scaled, axis=1),
+        "var": lambda x: np.var(x.scaled, axis=1),
+        "skew": lambda x: np.nan_to_num(skew(x.scaled, axis=1)),
+        "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled, axis=1)),
+        "histogram": lambda x: np.apply_along_axis(lambda y: np.histogram(y, 256, (0, 1))[0], 1, x.scaled),
+        "entropy": lambda x: entropy(x.get("histogram"), axis=1),
+    }
 @set_metadata("dataeval.metrics")
 def pixelstats(
     images: Iterable[ArrayLike],
@@ -93,8 +96,8 @@ def pixelstats(
     --------
     dimensionstats, visualstats, Outliers
-    Notes
-    -----
+    Note
+    ----
     - All metrics are scaled based on the perceived bit depth (which is derived from the largest pixel value)
       to allow for better comparison between images stored in different formats and different resolutions.
@@ -113,5 +116,4 @@ def pixelstats(
      0.812  0.9883 0.795  0.9243 0.9243 0.795  0.9907 0.8125 1.028  0.8223
      1.046  0.8247 1.041  0.8203 1.012  0.812  0.9883 0.795  0.9243 0.9243]
     """
-    output = run_stats(images, bboxes, per_channel, PixelStatsProcessor, PixelStatsOutput)
-    return PixelStatsOutput(**output)
+    return run_stats(images, bboxes, per_channel, [PixelStatsProcessor])[0]

dataeval/_internal/metrics/stats/visualstats.py CHANGED Viewed

@@ -13,41 +13,16 @@ from dataeval._internal.output import set_metadata
 QUARTILES = (0, 25, 50, 75, 100)
-class VisualStatsProcessor(StatsProcessor):
-    cache_keys = ["percentiles"]
-    image_function_map = {
-        "brightness": lambda x: x.get("percentiles")[-2],
-        "blurriness": lambda x: np.std(edge_filter(np.mean(x.image, axis=0))),
-        "contrast": lambda x: np.nan_to_num(
-            (np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles"))
-        ),
-        "darkness": lambda x: x.get("percentiles")[1],
-        "missing": lambda x: np.sum(np.isnan(x.image)) / np.prod(x.shape[-2:]),
-        "zeros": lambda x: np.count_nonzero(x.image == 0) / np.prod(x.shape[-2:]),
-        "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
-    }
-    channel_function_map = {
-        "brightness": lambda x: x.get("percentiles")[:, -2],
-        "blurriness": lambda x: np.std(np.vectorize(edge_filter, signature="(m,n)->(m,n)")(x.image), axis=(1, 2)),
-        "contrast": lambda x: np.nan_to_num(
-            (np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
-            / np.mean(x.get("percentiles"), axis=1)
-        ),
-        "darkness": lambda x: x.get("percentiles")[:, 1],
-        "missing": lambda x: np.sum(np.isnan(x.image), axis=(1, 2)) / np.prod(x.shape[-2:]),
-        "zeros": lambda x: np.count_nonzero(x.image == 0, axis=(1, 2)) / np.prod(x.shape[-2:]),
-        "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES, axis=1).T,
-    }
 @dataclass(frozen=True)
 class VisualStatsOutput(BaseStatsOutput):
     """
+    Output class for :func:`visualstats` stats metric
     Attributes
     ----------
     brightness : NDArray[np.float16]
         Brightness of the images
-    blurriness : NDArray[np.float16]
+    sharpness : NDArray[np.float16]
         Blurriness of the images
     contrast : NDArray[np.float16]
         Image contrast ratio
@@ -62,7 +37,7 @@ class VisualStatsOutput(BaseStatsOutput):
     """
     brightness: NDArray[np.float16]
-    blurriness: NDArray[np.float16]
+    sharpness: NDArray[np.float16]
     contrast: NDArray[np.float16]
     darkness: NDArray[np.float16]
     missing: NDArray[np.float16]
@@ -70,6 +45,34 @@ class VisualStatsOutput(BaseStatsOutput):
     percentiles: NDArray[np.float16]
+class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
+    output_class = VisualStatsOutput
+    cache_keys = ["percentiles"]
+    image_function_map = {
+        "brightness": lambda x: x.get("percentiles")[-2],
+        "sharpness": lambda x: np.std(edge_filter(np.mean(x.image, axis=0))),
+        "contrast": lambda x: np.nan_to_num(
+            (np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles"))
+        ),
+        "darkness": lambda x: x.get("percentiles")[1],
+        "missing": lambda x: np.count_nonzero(np.isnan(np.sum(x.image, axis=0))) / np.prod(x.shape[-2:]),
+        "zeros": lambda x: np.count_nonzero(np.sum(x.image, axis=0) == 0) / np.prod(x.shape[-2:]),
+        "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
+    }
+    channel_function_map = {
+        "brightness": lambda x: x.get("percentiles")[:, -2],
+        "sharpness": lambda x: np.std(np.vectorize(edge_filter, signature="(m,n)->(m,n)")(x.image), axis=(1, 2)),
+        "contrast": lambda x: np.nan_to_num(
+            (np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
+            / np.mean(x.get("percentiles"), axis=1)
+        ),
+        "darkness": lambda x: x.get("percentiles")[:, 1],
+        "missing": lambda x: np.count_nonzero(np.isnan(x.image), axis=(1, 2)) / np.prod(x.shape[-2:]),
+        "zeros": lambda x: np.count_nonzero(x.image == 0, axis=(1, 2)) / np.prod(x.shape[-2:]),
+        "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES, axis=1).T,
+    }
 @set_metadata("dataeval.metrics")
 def visualstats(
     images: Iterable[ArrayLike],
@@ -79,7 +82,7 @@ def visualstats(
     """
     Calculates visual statistics for each image
-    This function computes various visual metrics (e.g., brightness, darkness, contrast, blurriness)
+    This function computes various visual metrics (e.g., brightness, darkness, contrast, sharpness)
     on the images as a whole.
     Parameters
@@ -93,15 +96,15 @@ def visualstats(
     -------
     VisualStatsOutput
         A dictionary-like object containing the computed visual statistics for each image. The keys correspond
-        to the names of the statistics (e.g., 'brightness', 'blurriness'), and the values are lists of results for
+        to the names of the statistics (e.g., 'brightness', 'sharpness'), and the values are lists of results for
         each image or numpy arrays when the results are multi-dimensional.
     See Also
     --------
     dimensionstats, pixelstats, Outliers
-    Notes
-    -----
+    Note
+    ----
     - `zeros` and `missing` are presented as a percentage of total pixel counts
     Examples
@@ -118,5 +121,4 @@ def visualstats(
      1.258 1.257 1.257 1.256 1.256 1.255 1.255 1.255 1.255 1.254 1.254 1.254
      1.254 1.254 1.254 1.253 1.253 1.253]
     """
-    output = run_stats(images, bboxes, per_channel, VisualStatsProcessor, VisualStatsOutput)
-    return VisualStatsOutput(**output)
+    return run_stats(images, bboxes, per_channel, [VisualStatsProcessor])[0]

dataeval/_internal/metrics/uap.py CHANGED Viewed

@@ -4,6 +4,8 @@ FR Test Statistic based estimate for the upperbound
 average precision using empirical mean precision
 """
+from __future__ import annotations
 from dataclasses import dataclass
 from numpy.typing import ArrayLike
@@ -16,6 +18,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class UAPOutput(OutputMetadata):
     """
+    Output class for :func:`uap` estimator metric
     Attributes
     ----------
     uap : float
@@ -48,8 +52,8 @@ def uap(labels: ArrayLike, scores: ArrayLike) -> UAPOutput:
     ValueError
         If unique classes M < 2
-    Notes
-    -----
+    Note
+    ----
     This function calculates the empirical mean precision using the
     ``average_precision_score`` from scikit-learn, weighted by the class distribution.

dataeval/_internal/metrics/utils.py CHANGED Viewed

@@ -91,8 +91,8 @@ def entropy(
     subset_mask: NDArray[np.bool_] | None
         Boolean mask of samples to bin (e.g. when computing per class).  True -> include in histogram counts
-    Notes
-    -----
+    Note
+    ----
     For continuous variables, histogram bins are chosen automatically.  See
     numpy.histogram for details.

dataeval/_internal/models/pytorch/autoencoder.py CHANGED Viewed

@@ -61,9 +61,9 @@ class AETrainer:
         List[float]
             A list of average loss values for each epoch.
-        Notes
+        Note
         ----
-        To replace this function with a custom function, do
+        To replace this function with a custom function, do:
             AETrainer.train = custom_function
         """
         # Setup training
@@ -120,7 +120,7 @@ class AETrainer:
         Note
         ----
-        To replace this function with a custom function, do
+        To replace this function with a custom function, do:
             AETrainer.eval = custom_function
         """
         self.model.eval()
@@ -155,8 +155,8 @@ class AETrainer:
         torch.Tensor
             Data encoded by the model
-        Notes
-        -----
+        Note
+        ----
         This function should be run after the model has been trained and evaluated.
         """
         self.model.eval()

dataeval/_internal/models/tensorflow/pixelcnn.py CHANGED Viewed

@@ -272,8 +272,6 @@ class PixelCNN(distribution.Distribution):
         The minimum value of the input data.
     dtype : tensorflow dtype, default tf.float32
         Data type of the `Distribution`.
-    name : str, default "PixelCNN"
-        The name of the `Distribution`.
     """
     def __init__(
@@ -293,10 +291,9 @@ class PixelCNN(distribution.Distribution):
         high: int = 255,
         low: int = 0,
         dtype=tf.float32,
-        name: str = "PixelCNN",
     ) -> None:
         parameters = dict(locals())
-        with tf.name_scope(name) as name:
+        with tf.name_scope("PixelCNN") as name:
             super().__init__(
                 dtype=dtype,
                 reparameterization_type=reparameterization.NOT_REPARAMETERIZED,

dataeval/_internal/utils.py CHANGED Viewed

@@ -8,7 +8,7 @@ from torch.utils.data import Dataset
 def read_dataset(dataset: Dataset) -> list[list[Any]]:
     """
-    Extract information from a dataset at each index into a individual lists of each information position
+    Extract information from a dataset at each index into individual lists of each information position
     Parameters
     ----------
@@ -31,36 +31,31 @@ def read_dataset(dataset: Dataset) -> list[list[Any]]:
     Examples
     --------
     >>> import numpy as np
-    >>> data = np.ones((10, 3, 3))
+    >>> data = np.ones((10, 1, 3, 3))
     >>> labels = np.ones((10,))
     >>> class ICDataset:
     ...     def __init__(self, data, labels):
     ...         self.data = data
     ...         self.labels = labels
+    ...
     ...     def __getitem__(self, idx):
     ...         return self.data[idx], self.labels[idx]
     >>> ds = ICDataset(data, labels)
     >>> result = read_dataset(ds)
-    >>> assert len(result) == 2
-    True
-    >>> assert result[0].shape == (10, 3, 3)  # 10 3x3 images
-    True
-    >>> assert result[1].shape == (10,)  # 10 labels
-    True
+    >>> len(result)  # images and labels
+    2
+    >>> np.asarray(result[0]).shape  # images
+    (10, 1, 3, 3)
+    >>> np.asarray(result[1]).shape  # labels
+    (10,)
     """
-    ddict: dict[int, list] = defaultdict(list)
+    ddict: dict[int, list[Any]] = defaultdict(list[Any])
     for data in dataset:
-        # Convert to tuple if single return (e.g. images only)
-        if not isinstance(data, tuple):
-            data = (data,)
-        for i, d in enumerate(data):
+        for i, d in enumerate(data if isinstance(data, tuple) else (data,)):
             ddict[i].append(d)
     return list(ddict.values())

dataeval 0.70.0__py3-none-any.whl → 0.71.0__py3-none-any.whl

dataeval 0.70.0py3-none-any.whl → 0.71.0py3-none-any.whl