PyPI - dataeval - Versions diffs - 0.81.0__py3-none-any.whl → 0.82.1__py3-none-any.whl - Mend

dataeval 0.81.0py3-none-any.whl → 0.82.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

dataeval/__init__.py +1 -1
dataeval/config.py +68 -11
dataeval/detectors/drift/__init__.py +2 -2
dataeval/detectors/drift/_base.py +8 -64
dataeval/detectors/drift/_mmd.py +12 -38
dataeval/detectors/drift/_torch.py +7 -7
dataeval/detectors/drift/_uncertainty.py +6 -5
dataeval/detectors/drift/updates.py +20 -3
dataeval/detectors/linters/__init__.py +3 -2
dataeval/detectors/linters/duplicates.py +14 -46
dataeval/detectors/linters/outliers.py +25 -159
dataeval/detectors/ood/__init__.py +1 -1
dataeval/detectors/ood/ae.py +6 -5
dataeval/detectors/ood/base.py +2 -2
dataeval/detectors/ood/metadata_ood_mi.py +4 -6
dataeval/detectors/ood/mixin.py +3 -4
dataeval/detectors/ood/vae.py +3 -2
dataeval/metadata/__init__.py +2 -1
dataeval/metadata/_distance.py +134 -0
dataeval/metadata/_ood.py +30 -49
dataeval/metadata/_utils.py +44 -0
dataeval/metrics/bias/__init__.py +5 -4
dataeval/metrics/bias/_balance.py +17 -149
dataeval/metrics/bias/_coverage.py +4 -106
dataeval/metrics/bias/_diversity.py +12 -107
dataeval/metrics/bias/_parity.py +7 -71
dataeval/metrics/estimators/__init__.py +5 -4
dataeval/metrics/estimators/_ber.py +2 -20
dataeval/metrics/estimators/_clusterer.py +1 -61
dataeval/metrics/estimators/_divergence.py +2 -19
dataeval/metrics/estimators/_uap.py +2 -16
dataeval/metrics/stats/__init__.py +15 -12
dataeval/metrics/stats/_base.py +41 -128
dataeval/metrics/stats/_boxratiostats.py +13 -13
dataeval/metrics/stats/_dimensionstats.py +17 -58
dataeval/metrics/stats/_hashstats.py +19 -35
dataeval/metrics/stats/_imagestats.py +94 -0
dataeval/metrics/stats/_labelstats.py +42 -121
dataeval/metrics/stats/_pixelstats.py +19 -51
dataeval/metrics/stats/_visualstats.py +19 -51
dataeval/outputs/__init__.py +57 -0
dataeval/outputs/_base.py +182 -0
dataeval/outputs/_bias.py +381 -0
dataeval/outputs/_drift.py +83 -0
dataeval/outputs/_estimators.py +114 -0
dataeval/outputs/_linters.py +186 -0
dataeval/outputs/_metadata.py +54 -0
dataeval/{detectors/ood/output.py → outputs/_ood.py} +22 -22
dataeval/outputs/_stats.py +393 -0
dataeval/outputs/_utils.py +44 -0
dataeval/outputs/_workflows.py +364 -0
dataeval/typing.py +187 -7
dataeval/utils/_method.py +1 -5
dataeval/utils/_plot.py +2 -2
dataeval/utils/data/__init__.py +5 -1
dataeval/utils/data/_dataset.py +217 -0
dataeval/utils/data/_embeddings.py +12 -14
dataeval/utils/data/_images.py +30 -27
dataeval/utils/data/_metadata.py +28 -11
dataeval/utils/data/_selection.py +25 -22
dataeval/utils/data/_split.py +5 -29
dataeval/utils/data/_targets.py +14 -2
dataeval/utils/data/datasets/_base.py +5 -5
dataeval/utils/data/datasets/_cifar10.py +1 -1
dataeval/utils/data/datasets/_milco.py +1 -1
dataeval/utils/data/datasets/_mnist.py +1 -1
dataeval/utils/data/datasets/_ships.py +1 -1
dataeval/utils/data/{_types.py → datasets/_types.py} +10 -16
dataeval/utils/data/datasets/_voc.py +1 -1
dataeval/utils/data/selections/_classfilter.py +4 -5
dataeval/utils/data/selections/_indices.py +2 -2
dataeval/utils/data/selections/_limit.py +2 -2
dataeval/utils/data/selections/_reverse.py +2 -2
dataeval/utils/data/selections/_shuffle.py +2 -2
dataeval/utils/torch/_internal.py +5 -5
dataeval/utils/torch/trainer.py +8 -8
dataeval/workflows/__init__.py +2 -1
dataeval/workflows/sufficiency.py +6 -342
{dataeval-0.81.0.dist-info → dataeval-0.82.1.dist-info}/METADATA +2 -2
dataeval-0.82.1.dist-info/RECORD +105 -0
dataeval/_output.py +0 -137
dataeval/detectors/ood/metadata_ks_compare.py +0 -129
dataeval/metrics/stats/_datasetstats.py +0 -198
dataeval-0.81.0.dist-info/RECORD +0 -94
{dataeval-0.81.0.dist-info → dataeval-0.82.1.dist-info}/LICENSE.txt +0 -0
{dataeval-0.81.0.dist-info → dataeval-0.82.1.dist-info}/WHEEL +0 -0

dataeval/__init__.py CHANGED Viewed

@@ -8,7 +8,7 @@ shifts that impact performance of deployed models.
 from __future__ import annotations
 __all__ = ["config", "detectors", "log", "metrics", "typing", "utils", "workflows"]
-__version__ = "0.81.0"
+__version__ = "0.82.1"
 import logging

dataeval/config.py CHANGED Viewed

@@ -4,36 +4,61 @@ Global configuration settings for DataEval.
 from __future__ import annotations
-__all__ = ["get_device", "set_device", "get_max_processes", "set_max_processes"]
+__all__ = ["get_device", "set_device", "get_max_processes", "set_max_processes", "DeviceLike"]
+import sys
+from typing import Union
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+import numpy as np
 import torch
-from torch import device
-_device: device | None = None
+_device: torch.device | None = None
 _processes: int | None = None
+_seed: int | None = None
+DeviceLike: TypeAlias = Union[int, str, tuple[str, int], torch.device]
+"""
+Type alias for types that are acceptable for specifying a torch.device.
+See Also
+--------
+`torch.device <https://pytorch.org/docs/stable/tensor_attributes.html#torch.device>`_
+"""
+def _todevice(device: DeviceLike) -> torch.device:
+    return torch.device(*device) if isinstance(device, tuple) else torch.device(device)
-def set_device(device: str | device | int) -> None:
+def set_device(device: DeviceLike) -> None:
     """
     Sets the default device to use when executing against a PyTorch backend.
     Parameters
     ----------
-    device : str or int or `torch.device`
-        The default device to use. See `torch.device <https://pytorch.org/docs/stable/tensor_attributes.html#torch.device>`_
-        documentation for more information.
+    device : DeviceLike
+        The default device to use. See documentation for more information.
+    See Also
+    --------
+    `torch.device <https://pytorch.org/docs/stable/tensor_attributes.html#torch.device>`_
     """
     global _device
-    _device = torch.device(device)
+    _device = _todevice(device)
-def get_device(override: str | device | int | None = None) -> torch.device:
+def get_device(override: DeviceLike | None = None) -> torch.device:
     """
     Returns the PyTorch device to use.
     Parameters
     ----------
-    override : str or int or `torch.device` or None, default None
+    override : DeviceLike or None, default None
         The user specified override if provided, otherwise returns the default device.
     Returns
@@ -44,7 +69,7 @@ def get_device(override: str | device | int | None = None) -> torch.device:
         global _device
         return torch.get_default_device() if _device is None else _device
     else:
-        return torch.device(override)
+        return _todevice(override)
 def set_max_processes(processes: int | None) -> None:
@@ -75,3 +100,35 @@ def get_max_processes() -> int | None:
     """
     global _processes
     return _processes
+def set_seed(seed: int | None, all_generators: bool = False) -> None:
+    """
+    Sets the seed for use by classes that allow for a random state or seed.
+    Parameters
+    ----------
+    seed : int or None
+        The seed to use.
+    all_generators : bool, default False
+        Whether to set the seed for all generators, including NumPy and PyTorch.
+    """
+    global _seed
+    _seed = seed
+    if all_generators:
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+def get_seed() -> int | None:
+    """
+    Returns the seed for random state or seed.
+    Returns
+    -------
+    int or None
+        The seed to use.
+    """
+    global _seed
+    return _seed

dataeval/detectors/drift/__init__.py CHANGED Viewed

@@ -14,9 +14,9 @@ __all__ = [
 ]
 from dataeval.detectors.drift import updates
-from dataeval.detectors.drift._base import DriftOutput
 from dataeval.detectors.drift._cvm import DriftCVM
 from dataeval.detectors.drift._ks import DriftKS
-from dataeval.detectors.drift._mmd import DriftMMD, DriftMMDOutput
+from dataeval.detectors.drift._mmd import DriftMMD
 from dataeval.detectors.drift._torch import preprocess_drift
 from dataeval.detectors.drift._uncertainty import DriftUncertainty
+from dataeval.outputs._drift import DriftMMDOutput, DriftOutput

dataeval/detectors/drift/_base.py CHANGED Viewed

@@ -11,84 +11,28 @@ from __future__ import annotations
 __all__ = []
 import math
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
+from abc import abstractmethod
 from functools import wraps
-from typing import Any, Callable, Literal, TypeVar
+from typing import Any, Callable, Literal, Protocol, TypeVar, runtime_checkable
 import numpy as np
 from numpy.typing import NDArray
-from dataeval._output import Output, set_metadata
+from dataeval.outputs import DriftOutput
+from dataeval.outputs._base import set_metadata
 from dataeval.typing import Array, ArrayLike
 from dataeval.utils._array import as_numpy, to_numpy
 R = TypeVar("R")
-class UpdateStrategy(ABC):
+@runtime_checkable
+class UpdateStrategy(Protocol):
     """
-    Updates reference dataset for drift detector
-    Parameters
-    ----------
-    n : int
-        Update with last n instances seen by the detector.
-    """
-    def __init__(self, n: int) -> None:
-        self.n = n
-    @abstractmethod
-    def __call__(self, x_ref: NDArray[Any], x: NDArray[Any], count: int) -> NDArray[Any]:
-        """Abstract implementation of update strategy"""
-@dataclass(frozen=True)
-class DriftBaseOutput(Output):
-    """
-    Base output class for Drift Detector classes
-    """
-    drifted: bool
-    threshold: float
-    p_val: float
-    distance: float
-@dataclass(frozen=True)
-class DriftOutput(DriftBaseOutput):
-    """
-    Output class for :class:`.DriftCVM`, :class:`.DriftKS`, and :class:`.DriftUncertainty` drift detectors.
-    Attributes
-    ----------
-    drifted : bool
-        :term:`Drift` prediction for the images
-    threshold : float
-        Threshold after multivariate correction if needed
-    p_val : float
-        Instance-level p-value
-    distance : float
-        Instance-level distance
-    feature_drift : NDArray
-        Feature-level array of images detected to have drifted
-    feature_threshold : float
-        Feature-level threshold to determine drift
-    p_vals : NDArray
-        Feature-level p-values
-    distances : NDArray
-        Feature-level distances
+    Protocol for reference dataset update strategy for drift detectors
     """
-    # drifted: bool
-    # threshold: float
-    # p_val: float
-    # distance: float
-    feature_drift: NDArray[np.bool_]
-    feature_threshold: float
-    p_vals: NDArray[np.float32]
-    distances: NDArray[np.float32]
+    def __call__(self, x_ref: NDArray[Any], x: NDArray[Any], count: int) -> NDArray[Any]: ...
 def update_x_ref(fn: Callable[..., R]) -> Callable[..., R]:

dataeval/detectors/drift/_mmd.py CHANGED Viewed

@@ -10,44 +10,18 @@ from __future__ import annotations
 __all__ = []
-from dataclasses import dataclass
 from typing import Callable
 import torch
-from dataeval._output import set_metadata
-from dataeval.config import get_device
-from dataeval.detectors.drift._base import BaseDrift, DriftBaseOutput, UpdateStrategy, preprocess_x, update_x_ref
+from dataeval.config import DeviceLike, get_device
+from dataeval.detectors.drift._base import BaseDrift, UpdateStrategy, preprocess_x, update_x_ref
 from dataeval.detectors.drift._torch import GaussianRBF, mmd2_from_kernel_matrix
+from dataeval.outputs import DriftMMDOutput
+from dataeval.outputs._base import set_metadata
 from dataeval.typing import ArrayLike
-@dataclass(frozen=True)
-class DriftMMDOutput(DriftBaseOutput):
-    """
-    Output class for :class:`.DriftMMD` :term:`drift<Drift>` detector.
-    Attributes
-    ----------
-    drifted : bool
-        Drift prediction for the images
-    threshold : float
-        :term:`P-Value` used for significance of the permutation test
-    p_val : float
-        P-value obtained from the permutation test
-    distance : float
-        MMD^2 between the reference and test set
-    distance_threshold : float
-        MMD^2 threshold above which drift is flagged
-    """
-    # drifted: bool
-    # threshold: float
-    # p_val: float
-    # distance: float
-    distance_threshold: float
 class DriftMMD(BaseDrift):
     """
     :term:`Maximum Mean Discrepancy (MMD) Drift Detection` algorithm \
@@ -57,7 +31,7 @@ class DriftMMD(BaseDrift):
     ----------
     x_ref : ArrayLike
         Data used as reference distribution.
-    p_val : float | None, default 0.05
+    p_val : float or None, default 0.05
         :term:`P-value` used for significance of the statistical test for each feature.
         If the FDR correction method is used, this corresponds to the acceptable
         q-value.
@@ -65,14 +39,14 @@ class DriftMMD(BaseDrift):
         Whether the given reference data ``x_ref`` has been preprocessed yet.
         If ``True``, only the test data ``x`` will be preprocessed at prediction time.
         If ``False``, the reference data will also be preprocessed.
-    update_x_ref : UpdateStrategy | None, default None
+    update_x_ref : UpdateStrategy or None, default None
         Reference data can optionally be updated using an UpdateStrategy class. Update
         using the last n instances seen by the detector with LastSeenUpdateStrategy
         or via reservoir sampling with ReservoirSamplingUpdateStrategy.
-    preprocess_fn : Callable | None, default None
+    preprocess_fn : Callable or None, default None
         Function to preprocess the data before computing the data drift metrics.
         Typically a :term:`dimensionality reduction<Dimensionality Reduction>` technique.
-    sigma : ArrayLike | None, default None
+    sigma : ArrayLike or None, default None
         Optionally set the internal GaussianRBF kernel bandwidth. Can also pass multiple
         bandwidth values as an array. The kernel evaluation is then averaged over
         those bandwidths.
@@ -80,9 +54,9 @@ class DriftMMD(BaseDrift):
         Whether to already configure the kernel bandwidth from the reference data.
     n_permutations : int, default 100
         Number of permutations used in the permutation test.
-    device : str | None, default None
-        Device type used. The default None uses the GPU and falls back on CPU.
-        Can be specified by passing either 'cuda', 'gpu' or 'cpu'.
+    device : DeviceLike or None, default None
+        The hardware device to use if specified, otherwise uses the DataEval
+        default or torch default.
     Example
     -------
@@ -110,7 +84,7 @@ class DriftMMD(BaseDrift):
         sigma: ArrayLike | None = None,
         configure_kernel_from_x_ref: bool = True,
         n_permutations: int = 100,
-        device: str | torch.device | None = None,
+        device: DeviceLike | None = None,
     ) -> None:
         super().__init__(x_ref, p_val, x_ref_preprocessed, update_x_ref, preprocess_fn)

dataeval/detectors/drift/_torch.py CHANGED Viewed

@@ -17,7 +17,7 @@ import torch
 import torch.nn as nn
 from numpy.typing import NDArray
-from dataeval.config import get_device
+from dataeval.config import DeviceLike, get_device
 from dataeval.utils.torch._internal import predict_batch
@@ -59,7 +59,7 @@ def mmd2_from_kernel_matrix(
 def preprocess_drift(
     x: NDArray[Any],
     model: nn.Module,
-    device: str | torch.device | None = None,
+    device: DeviceLike | None = None,
     preprocess_batch_fn: Callable | None = None,
     batch_size: int = int(1e10),
     dtype: type[np.generic] | torch.dtype = np.float32,
@@ -73,15 +73,15 @@ def preprocess_drift(
         Batch of instances.
     model : nn.Module
         Model used for preprocessing.
-    device : torch.device | None, default None
-        Device type used. The default None tries to use the GPU and falls back on CPU.
-        Can be specified by passing either torch.device('cuda') or torch.device('cpu').
-    preprocess_batch_fn : Callable | None, default None
+    device : DeviceLike or None, default None
+        The hardware device to use if specified, otherwise uses the DataEval
+        default or torch default.
+    preprocess_batch_fn : Callable or None, default None
         Optional batch preprocessing function. For example to convert a list of objects
         to a batch which can be processed by the PyTorch model.
     batch_size : int, default 1e10
         Batch size used during prediction.
-    dtype : np.dtype | torch.dtype, default np.float32
+    dtype : np.dtype or torch.dtype, default np.float32
         Model output type, either a :term:`NumPy` or torch dtype, e.g. np.float32 or torch.float32.
     Returns

dataeval/detectors/drift/_uncertainty.py CHANGED Viewed

@@ -19,9 +19,10 @@ from scipy.special import softmax
 from scipy.stats import entropy
 from dataeval.config import get_device
-from dataeval.detectors.drift._base import DriftOutput, UpdateStrategy
+from dataeval.detectors.drift._base import UpdateStrategy
 from dataeval.detectors.drift._ks import DriftKS
 from dataeval.detectors.drift._torch import preprocess_drift
+from dataeval.outputs import DriftOutput
 from dataeval.typing import ArrayLike
@@ -84,20 +85,20 @@ class DriftUncertainty:
         Whether the given reference data ``x_ref`` has been preprocessed yet.
         If ``True``, only the test data ``x`` will be preprocessed at prediction time.
         If ``False``, the reference data will also be preprocessed.
-    update_x_ref : UpdateStrategy | None, default None
+    update_x_ref : UpdateStrategy or None, default None
         Reference data can optionally be updated using an UpdateStrategy class. Update
         using the last n instances seen by the detector with LastSeenUpdateStrategy
         or via reservoir sampling with ReservoirSamplingUpdateStrategy.
-    preds_type : "probs" | "logits", default "probs"
+    preds_type : "probs" or "logits", default "probs"
         Type of prediction output by the model. Options are 'probs' (in [0,1]) or
         'logits' (in [-inf,inf]).
     batch_size : int, default 32
         Batch size used to evaluate model. Only relevant when backend has been
         specified for batch prediction.
-    preprocess_batch_fn : Callable | None, default None
+    preprocess_batch_fn : Callable or None, default None
         Optional batch preprocessing function. For example to convert a list of
         objects to a batch which can be processed by the model.
-    device : str | None, default None
+    device : DeviceLike or None, default None
         Device type used. The default None tries to use the GPU and falls back on
         CPU if needed. Can be specified by passing either 'cuda' or 'cpu'.

dataeval/detectors/drift/updates.py CHANGED Viewed

@@ -7,15 +7,32 @@ from __future__ import annotations
 __all__ = ["LastSeenUpdate", "ReservoirSamplingUpdate"]
+from abc import ABC, abstractmethod
 from typing import Any
 import numpy as np
 from numpy.typing import NDArray
-from dataeval.detectors.drift._base import UpdateStrategy
+class BaseUpdateStrategy(ABC):
+    """
+    Updates reference dataset for drift detector
+    Parameters
+    ----------
+    n : int
+        Update with last n instances seen by the detector.
+    """
+    def __init__(self, n: int) -> None:
+        self.n = n
+    @abstractmethod
+    def __call__(self, x_ref: NDArray[Any], x: NDArray[Any], count: int) -> NDArray[Any]:
+        """Abstract implementation of update strategy"""
-class LastSeenUpdate(UpdateStrategy):
+class LastSeenUpdate(BaseUpdateStrategy):
     """
     Updates reference dataset for :term:`drift<Drift>` detector using last seen method.
@@ -30,7 +47,7 @@ class LastSeenUpdate(UpdateStrategy):
         return x_updated[-self.n :]
-class ReservoirSamplingUpdate(UpdateStrategy):
+class ReservoirSamplingUpdate(BaseUpdateStrategy):
     """
     Updates reference dataset for :term:`drift<Drift>` detector using reservoir sampling method.

dataeval/detectors/linters/__init__.py CHANGED Viewed

@@ -9,5 +9,6 @@ __all__ = [
     "OutliersOutput",
 ]
-from dataeval.detectors.linters.duplicates import Duplicates, DuplicatesOutput
-from dataeval.detectors.linters.outliers import Outliers, OutliersOutput
+from dataeval.detectors.linters.duplicates import Duplicates
+from dataeval.detectors.linters.outliers import Outliers
+from dataeval.outputs._linters import DuplicatesOutput, OutliersOutput

dataeval/detectors/linters/duplicates.py CHANGED Viewed

@@ -2,40 +2,15 @@ from __future__ import annotations
 __all__ = []
-from dataclasses import dataclass
-from typing import Any, Generic, Iterable, Sequence, TypeVar, overload
+from typing import Any, Sequence, overload
-from torch.utils.data import Dataset
-from dataeval._output import Output, set_metadata
+from dataeval.metrics.stats import hashstats
 from dataeval.metrics.stats._base import combine_stats, get_dataset_step_from_idx
-from dataeval.metrics.stats._hashstats import HashStatsOutput, hashstats
-from dataeval.typing import ArrayLike
-DuplicateGroup = list[int]
-DatasetDuplicateGroupMap = dict[int, DuplicateGroup]
-TIndexCollection = TypeVar("TIndexCollection", DuplicateGroup, DatasetDuplicateGroupMap)
-@dataclass(frozen=True)
-class DuplicatesOutput(Generic[TIndexCollection], Output):
-    """
-    Output class for :class:`.Duplicates` lint detector.
-    Attributes
-    ----------
-    exact : list[list[int] | dict[int, list[int]]]
-        Indices of images that are exact matches
-    near: list[list[int] | dict[int, list[int]]]
-        Indices of images that are near matches
-    - For a single dataset, indices are returned as a list of index groups.
-    - For multiple datasets, indices are returned as dictionaries where the key is the
-      index of the dataset, and the value is the list index groups from that dataset.
-    """
-    exact: list[TIndexCollection]
-    near: list[TIndexCollection]
+from dataeval.outputs import DuplicatesOutput, HashStatsOutput
+from dataeval.outputs._base import set_metadata
+from dataeval.outputs._linters import DatasetDuplicateGroupMap, DuplicateGroup
+from dataeval.typing import Array, Dataset
+from dataeval.utils.data._images import Images
 class Duplicates:
@@ -113,13 +88,13 @@ class Duplicates:
         """
         if isinstance(hashes, HashStatsOutput):
-            return DuplicatesOutput(**self._get_duplicates(hashes.dict()))
+            return DuplicatesOutput(**self._get_duplicates(hashes.data()))
         if not isinstance(hashes, Sequence):
             raise TypeError("Invalid stats output type; only use output from hashstats.")
         combined, dataset_steps = combine_stats(hashes)
-        duplicates = self._get_duplicates(combined.dict())
+        duplicates = self._get_duplicates(combined.data())
         # split up results from combined dataset into individual dataset buckets
         for dup_type, dup_list in duplicates.items():
@@ -134,22 +109,15 @@ class Duplicates:
         return DuplicatesOutput(**duplicates)
-    @overload
-    def evaluate(self, data: Iterable[ArrayLike]) -> DuplicatesOutput[DuplicateGroup]: ...
-    @overload
-    def evaluate(self, data: Dataset[tuple[ArrayLike, Any, dict[str, Any]]]) -> DuplicatesOutput[DuplicateGroup]: ...
     @set_metadata(state=["only_exact"])
-    def evaluate(
-        self, data: Iterable[ArrayLike] | Dataset[tuple[ArrayLike, Any, dict[str, Any]]]
-    ) -> DuplicatesOutput[DuplicateGroup]:
+    def evaluate(self, data: Dataset[Array] | Dataset[tuple[Array, Any, Any]]) -> DuplicatesOutput[DuplicateGroup]:
         """
         Returns duplicate image indices for both exact matches and near matches
         Parameters
         ----------
-        data : Iterable[ArrayLike], shape - (N, C, H, W) | StatsOutput | Sequence[StatsOutput]
-            A dataset of images in an ArrayLike format or the output(s) from a hashstats analysis
+        data : Iterable[Array], shape - (N, C, H, W) | Dataset[tuple[Array, Any, Any]]
+            A dataset of images in an Array format or the output(s) from a hashstats analysis
         Returns
         -------
@@ -166,7 +134,7 @@ class Duplicates:
         >>> all_dupes.evaluate(duplicate_images)
         DuplicatesOutput(exact=[[3, 20], [16, 37]], near=[[3, 20, 22], [12, 18], [13, 36], [14, 31], [17, 27], [19, 38, 47]])
         """  # noqa: E501
-        images = (d[0] for d in data) if isinstance(data, Dataset) else data
+        images = Images(data) if isinstance(data, Dataset) else data
         self.stats = hashstats(images)
-        duplicates = self._get_duplicates(self.stats.dict())
+        duplicates = self._get_duplicates(self.stats.data())
         return DuplicatesOutput(**duplicates)

dataeval 0.81.0__py3-none-any.whl → 0.82.1__py3-none-any.whl

dataeval 0.81.0py3-none-any.whl → 0.82.1py3-none-any.whl