PyPI - dataeval - Versions diffs - 0.82.0__py3-none-any.whl → 0.82.1__py3-none-any.whl - Mend

dataeval 0.82.0py3-none-any.whl → 0.82.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

dataeval/__init__.py +1 -1
dataeval/config.py +68 -11
dataeval/detectors/drift/_mmd.py +9 -9
dataeval/detectors/drift/_torch.py +7 -7
dataeval/detectors/drift/_uncertainty.py +4 -4
dataeval/detectors/linters/duplicates.py +3 -3
dataeval/detectors/linters/outliers.py +3 -3
dataeval/detectors/ood/ae.py +5 -4
dataeval/detectors/ood/base.py +2 -2
dataeval/detectors/ood/metadata_ood_mi.py +4 -6
dataeval/detectors/ood/mixin.py +1 -1
dataeval/detectors/ood/vae.py +2 -1
dataeval/metadata/_distance.py +11 -44
dataeval/metadata/_ood.py +9 -7
dataeval/metrics/bias/_balance.py +7 -3
dataeval/metrics/bias/_diversity.py +3 -0
dataeval/metrics/bias/_parity.py +2 -0
dataeval/metrics/stats/_base.py +3 -3
dataeval/metrics/stats/_boxratiostats.py +1 -1
dataeval/metrics/stats/_imagestats.py +4 -4
dataeval/outputs/__init__.py +4 -0
dataeval/outputs/_base.py +50 -21
dataeval/outputs/_bias.py +1 -1
dataeval/outputs/_linters.py +4 -2
dataeval/outputs/_metadata.py +54 -0
dataeval/outputs/_stats.py +12 -6
dataeval/utils/data/_embeddings.py +8 -9
dataeval/utils/data/_metadata.py +16 -7
dataeval/utils/data/_selection.py +4 -8
dataeval/utils/data/_split.py +3 -2
dataeval/utils/data/selections/_classfilter.py +5 -3
dataeval/utils/torch/_internal.py +5 -5
dataeval/utils/torch/trainer.py +8 -8
{dataeval-0.82.0.dist-info → dataeval-0.82.1.dist-info}/METADATA +1 -1
{dataeval-0.82.0.dist-info → dataeval-0.82.1.dist-info}/RECORD +37 -36
{dataeval-0.82.0.dist-info → dataeval-0.82.1.dist-info}/LICENSE.txt +0 -0
{dataeval-0.82.0.dist-info → dataeval-0.82.1.dist-info}/WHEEL +0 -0

dataeval/metrics/stats/_base.py CHANGED Viewed

@@ -248,13 +248,13 @@ def add_stats(a: TStatsOutput, b: TStatsOutput) -> TStatsOutput:
     if type(a) is not type(b):
         raise TypeError(f"Types {type(a)} and {type(b)} cannot be added.")
-    sum_dict = deepcopy(a.dict())
+    sum_dict = deepcopy(a.data())
     for k in sum_dict:
         if isinstance(sum_dict[k], list):
-            sum_dict[k].extend(b.dict()[k])
+            sum_dict[k].extend(b.data()[k])
         else:
-            sum_dict[k] = np.concatenate((sum_dict[k], b.dict()[k]))
+            sum_dict[k] = np.concatenate((sum_dict[k], b.data()[k]))
     return type(a)(**sum_dict)

dataeval/metrics/stats/_boxratiostats.py CHANGED Viewed

@@ -153,7 +153,7 @@ def boxratiostats(
         raise ValueError("Input for boxstats and imgstats must have matching channel information.")
     output_dict = {}
-    for key in boxstats.dict():
+    for key in boxstats.data():
         output_dict[key] = calculate_ratios(key, boxstats, imgstats)
     return output_cls(**output_dict)

dataeval/metrics/stats/_imagestats.py CHANGED Viewed

@@ -42,8 +42,8 @@ def imagestats(
     Calculates various :term:`statistics<Statistics>` for each image.
     This function computes dimension, pixel and visual metrics
-    on the images or individual bounding boxes for each image as
-    well as label statistics if provided.
+    on the images or individual bounding boxes for each image. If
+    performing calculations per channel dimension stats are excluded.
     Parameters
     ----------
@@ -61,7 +61,7 @@ def imagestats(
     See Also
     --------
-    dimensionstats, labelstats, pixelstats, visualstats, Outliers
+    dimensionstats, pixelstats, visualstats
     Examples
     --------
@@ -91,4 +91,4 @@ def imagestats(
         output_cls = ImageStatsOutput
     outputs = run_stats(dataset, per_box, per_channel, processors)
-    return output_cls(**{k: v for d in outputs for k, v in d.dict().items()})
+    return output_cls(**{k: v for d in outputs for k, v in d.data().items()})

dataeval/outputs/__init__.py CHANGED Viewed

@@ -8,6 +8,7 @@ from ._bias import BalanceOutput, CoverageOutput, DiversityOutput, LabelParityOu
 from ._drift import DriftMMDOutput, DriftOutput
 from ._estimators import BEROutput, ClustererOutput, DivergenceOutput, UAPOutput
 from ._linters import DuplicatesOutput, OutliersOutput
+from ._metadata import MetadataDistanceOutput, MetadataDistanceValues, MostDeviatedFactorsOutput
 from ._ood import OODOutput, OODScoreOutput
 from ._stats import (
     ChannelStatsOutput,
@@ -39,6 +40,9 @@ __all__ = [
     "ImageStatsOutput",
     "LabelParityOutput",
     "LabelStatsOutput",
+    "MetadataDistanceOutput",
+    "MetadataDistanceValues",
+    "MostDeviatedFactorsOutput",
     "OODOutput",
     "OODScoreOutput",
     "OutliersOutput",

dataeval/outputs/_base.py CHANGED Viewed

@@ -4,11 +4,11 @@ __all__ = []
 import inspect
 import logging
-from collections.abc import Mapping
+from collections.abc import Collection, Mapping, Sequence
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from functools import partial, wraps
-from typing import Any, Callable, Iterator, TypeVar
+from typing import Any, Callable, Generic, Iterator, TypeVar, overload
 import numpy as np
 from typing_extensions import ParamSpec
@@ -56,16 +56,13 @@ class ExecutionMetadata:
         )
-class Output:
-    _meta: ExecutionMetadata | None = None
+T = TypeVar("T", covariant=True)
-    def __str__(self) -> str:
-        return f"{self.__class__.__name__}: {str(self.dict())}"
-    def dict(self) -> dict[str, Any]:
-        return {k: v for k, v in self.__dict__.items() if k != "_meta"}
+class GenericOutput(Generic[T]):
+    _meta: ExecutionMetadata | None = None
-    @property
+    def data(self) -> T: ...
     def meta(self) -> ExecutionMetadata:
         """
         Metadata about the execution of the function or method for the Output class.
@@ -73,34 +70,66 @@ class Output:
         return self._meta or ExecutionMetadata.empty()
-TKey = TypeVar("TKey", str, int, float, set)
-TValue = TypeVar("TValue")
+class Output(GenericOutput[dict[str, Any]]):
+    def data(self) -> dict[str, Any]:
+        return {k: v for k, v in self.__dict__.items() if k != "_meta"}
+    def __repr__(self) -> str:
+        return str(self)
-class MappingOutput(Mapping[TKey, TValue], Output):
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__}({', '.join([f'{k}={v}' for k, v in self.data().items()])})"
+class BaseCollectionMixin(Collection[Any]):
     __slots__ = ["_data"]
+    def data(self) -> Any:
+        return self._data
+    def __len__(self) -> int:
+        return len(self._data)
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}({repr(self._data)})"
+    def __str__(self) -> str:
+        return str(self._data)
+TKey = TypeVar("TKey", str, int, float, set)
+TValue = TypeVar("TValue")
+class MappingOutput(Mapping[TKey, TValue], BaseCollectionMixin, GenericOutput[Mapping[TKey, TValue]]):
     def __init__(self, data: Mapping[TKey, TValue]):
         self._data = data
     def __getitem__(self, key: TKey) -> TValue:
-        return self._data.__getitem__(key)
+        return self._data[key]
     def __iter__(self) -> Iterator[TKey]:
-        return self._data.__iter__()
+        return iter(self._data)
-    def __len__(self) -> int:
-        return self._data.__len__()
-    def dict(self) -> dict[str, TValue]:
-        return {str(k): v for k, v in self._data.items()}
+class SequenceOutput(Sequence[TValue], BaseCollectionMixin, GenericOutput[Sequence[TValue]]):
+    def __init__(self, data: Sequence[TValue]):
+        self._data = data
+    @overload
+    def __getitem__(self, index: int) -> TValue: ...
+    @overload
+    def __getitem__(self, index: slice) -> Sequence[TValue]: ...
-    def __str__(self) -> str:
-        return str(self.dict())
+    def __getitem__(self, index: int | slice) -> TValue | Sequence[TValue]:
+        return self._data[index]
+    def __iter__(self) -> Iterator[TValue]:
+        return iter(self._data)
 P = ParamSpec("P")
-R = TypeVar("R", bound=Output)
+R = TypeVar("R", bound=GenericOutput)
 def set_metadata(fn: Callable[P, R] | None = None, *, state: list[str] | None = None) -> Callable[P, R]:

dataeval/outputs/_bias.py CHANGED Viewed

@@ -364,7 +364,7 @@ class DiversityOutput(Output):
                 col_labels,
                 xlabel="Factors",
                 ylabel="Class",
-                cbarlabel=f"Normalized {asdict(self.meta)['arguments']['method'].title()} Index",
+                cbarlabel=f"Normalized {asdict(self.meta())['arguments']['method'].title()} Index",
             )
         else:

dataeval/outputs/_linters.py CHANGED Viewed

@@ -24,7 +24,7 @@ TIndexIssueMap = TypeVar("TIndexIssueMap", IndexIssueMap, list[IndexIssueMap])
 @dataclass(frozen=True)
-class DuplicatesOutput(Generic[TIndexCollection], Output):
+class DuplicatesOutput(Output, Generic[TIndexCollection]):
     """
     Output class for :class:`.Duplicates` lint detector.
@@ -35,6 +35,8 @@ class DuplicatesOutput(Generic[TIndexCollection], Output):
     near: list[list[int] | dict[int, list[int]]]
         Indices of images that are near matches
+    Notes
+    -----
     - For a single dataset, indices are returned as a list of index groups.
     - For multiple datasets, indices are returned as dictionaries where the key is the
       index of the dataset, and the value is the list index groups from that dataset.
@@ -99,7 +101,7 @@ def _create_pandas_dataframe(class_wise):
 @dataclass(frozen=True)
-class OutliersOutput(Generic[TIndexIssueMap], Output):
+class OutliersOutput(Output, Generic[TIndexIssueMap]):
     """
     Output class for :class:`.Outliers` lint detector.

dataeval/outputs/_metadata.py ADDED Viewed

@@ -0,0 +1,54 @@
+from __future__ import annotations
+__all__ = []
+from typing import NamedTuple
+from dataeval.outputs._base import MappingOutput, SequenceOutput
+class MostDeviatedFactorsOutput(SequenceOutput[tuple[str, float]]):
+    """
+    Output class for results of :func:`.most_deviated_factors` for OOD samples with metadata.
+    Attributes
+    ----------
+    value : tuple[str, float]
+        A tuple of the factor name and deviation of the highest metadata deviation
+    """
+class MetadataDistanceValues(NamedTuple):
+    """
+    Statistics comparing metadata distance.
+    Attributes
+    ----------
+    statistic : float
+        the KS statistic
+    location : float
+        The value at which the KS statistic has its maximum, measured in IQR-normalized units relative
+        to the median of the reference distribution.
+    dist : float
+        The Earth Mover's Distance normalized by the interquartile range (IQR) of the reference
+    pvalue : float
+        The p-value from the KS two-sample test
+    """
+    statistic: float
+    location: float
+    dist: float
+    pvalue: float
+class MetadataDistanceOutput(MappingOutput[str, MetadataDistanceValues]):
+    """
+    Output class for results of ks_2samp featurewise comparisons of new metadata to reference metadata.
+    Attributes
+    ----------
+    key : str
+        Metadata feature names
+    value : :class:`.MetadataDistanceValues`
+        Output per feature name containing the statistic, statistic location, distance, and pvalue.
+    """

dataeval/outputs/_stats.py CHANGED Viewed

@@ -4,7 +4,7 @@ __all__ = []
 import contextlib
 from dataclasses import dataclass
-from typing import Iterable, Optional, Union
+from typing import Any, Iterable, Optional, Union
 import numpy as np
 from numpy.typing import NDArray
@@ -63,7 +63,7 @@ class BaseStatsOutput(Output):
     def __post_init__(self) -> None:
         length = len(self.source_index)
-        bad = {k: len(v) for k, v in self.dict().items() if k not in [SOURCE_INDEX, BOX_COUNT] and len(v) != length}
+        bad = {k: len(v) for k, v in self.data().items() if k not in [SOURCE_INDEX, BOX_COUNT] and len(v) != length}
         if bad:
             raise ValueError(f"All values must have the same length as source_index. Bad values: {str(bad)}.")
@@ -105,7 +105,7 @@ class BaseStatsOutput(Output):
     def _get_channels(
         self, channel_limit: int | None = None, channel_index: int | Iterable[int] | None = None
     ) -> tuple[int, list[bool] | None]:
-        source_index = self.dict()[SOURCE_INDEX]
+        source_index = self.data()[SOURCE_INDEX]
         raw_channels = int(max([si.channel or 0 for si in source_index])) + 1
         if isinstance(channel_index, int):
             max_channels = 1 if channel_index < raw_channels else raw_channels
@@ -127,15 +127,21 @@ class BaseStatsOutput(Output):
         return max_channels, ch_mask
+    def factors(self) -> dict[str, NDArray[Any]]:
+        return {
+            k: v
+            for k, v in self.data().items()
+            if k not in (SOURCE_INDEX, BOX_COUNT) and isinstance(v, np.ndarray) and v[v != 0].size > 0 and v.ndim == 1
+        }
     def plot(
         self, log: bool, channel_limit: int | None = None, channel_index: int | Iterable[int] | None = None
     ) -> None:
         max_channels, ch_mask = self._get_channels(channel_limit, channel_index)
-        d = {k: v for k, v in self.dict().items() if isinstance(v, np.ndarray) and v[v != 0].size > 0 and v.ndim == 1}
         if max_channels == 1:
-            histogram_plot(d, log)
+            histogram_plot(self.factors(), log)
         else:
-            channel_histogram_plot(d, log, max_channels, ch_mask)
+            channel_histogram_plot(self.factors(), log, max_channels, ch_mask)
 @dataclass(frozen=True)

dataeval/utils/data/_embeddings.py CHANGED Viewed

@@ -9,7 +9,7 @@ import torch
 from torch.utils.data import DataLoader, Subset
 from tqdm import tqdm
-from dataeval.config import get_device
+from dataeval.config import DeviceLike, get_device
 from dataeval.typing import Array, Dataset
 from dataeval.utils.torch.models import SupportsEncode
@@ -24,13 +24,14 @@ class Embeddings:
     ----------
     dataset : ImageClassificationDataset or ObjectDetectionDataset
         Dataset to access original images from.
-    batch_size : int, optional
+    batch_size : int
         Batch size to use when encoding images.
-    model : torch.nn.Module, optional
+    model : torch.nn.Module or None, default None
         Model to use for encoding images.
-    device : torch.device, optional
-        Device to use for encoding images.
-    verbose : bool, optional
+    device : DeviceLike or None, default None
+        The hardware device to use if specified, otherwise uses the DataEval
+        default or torch default.
+    verbose : bool, default False
         Whether to print progress bar when encoding images.
     """
@@ -42,9 +43,8 @@ class Embeddings:
         self,
         dataset: Dataset[tuple[Array, Any, Any]],
         batch_size: int,
-        indices: Sequence[int] | None = None,
         model: torch.nn.Module | None = None,
-        device: torch.device | str | None = None,
+        device: DeviceLike | None = None,
         verbose: bool = False,
     ) -> None:
         self.device = get_device(device)
@@ -52,7 +52,6 @@ class Embeddings:
         self.verbose = verbose
         self._dataset = dataset
-        self._indices = indices if indices is not None else range(len(dataset))
         model = torch.nn.Flatten() if model is None else model
         self._model = model.to(self.device).eval()
         self._encoder = model.encode if isinstance(model, SupportsEncode) else model

dataeval/utils/data/_metadata.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 __all__ = []
 import warnings
-from typing import TYPE_CHECKING, Any, Literal, Mapping, Sequence
+from typing import TYPE_CHECKING, Any, Literal, Mapping, Sequence, cast
 import numpy as np
 from numpy.typing import NDArray
@@ -11,6 +11,7 @@ from numpy.typing import NDArray
 from dataeval.typing import (
     AnnotatedDataset,
     Array,
+    ArrayLike,
     ObjectDetectionTarget,
 )
 from dataeval.utils._array import as_numpy, to_numpy
@@ -276,16 +277,12 @@ class Metadata:
         if self._processed and not force:
             return
-        # Trigger collate and merge if not yet done
-        self._collate()
-        self._merge()
+        # Create image indices from targets
+        self._image_indices = np.arange(len(self.raw)) if self.targets.source is None else self.targets.source
         # Validate the metadata dimensions
         self._validate()
-        # Create image indices from targets
-        self._image_indices = np.arange(len(self.raw)) if self.targets.source is None else self.targets.source
         # Include specified metadata keys
         if self.include:
             metadata = {i: self.merged[i] for i in self.include if i in self.merged}
@@ -358,3 +355,15 @@ class Metadata:
         )
         self._total_num_factors = len(self._discrete_factor_names + self._continuous_factor_names) + 1
         self._processed = True
+    def add_factors(self, factors: Mapping[str, ArrayLike]) -> None:
+        self._merge()
+        self._processed = False
+        target_len = len(self.targets.source) if self.targets.source is not None else len(self.targets)
+        if any(len(v) != target_len for v in factors.values()):
+            raise ValueError(
+                "The lists/arrays in the provided factors have a different length than the current metadata factors."
+            )
+        merged = cast(tuple[dict[str, ArrayLike], dict[str, list[str]]], self._merged)[0]
+        for k, v in factors.items():
+            merged[k] = v

dataeval/utils/data/_selection.py CHANGED Viewed

@@ -3,11 +3,11 @@ from __future__ import annotations
 __all__ = []
 from enum import IntEnum
-from typing import Any, Generic, Iterator, Sequence, TypeVar
+from typing import Generic, Iterator, Sequence, TypeVar
 from dataeval.typing import AnnotatedDataset, DatasetMetadata
-_TDatum = TypeVar("_TDatum")
+_TDatum = TypeVar("_TDatum", covariant=True)
 class SelectionStage(IntEnum):
@@ -69,11 +69,11 @@ class Select(AnnotatedDataset[_TDatum]):
         dataset: AnnotatedDataset[_TDatum],
         selections: Selection[_TDatum] | list[Selection[_TDatum]] | None = None,
     ) -> None:
+        self.__dict__.update(dataset.__dict__)
         self._dataset = dataset
         self._size_limit = len(dataset)
         self._selection = list(range(self._size_limit))
         self._selections = self._sort_selections(selections)
-        self.__dict__.update(dataset.__dict__)
         # Ensure metadata is populated correctly as DatasetMetadata TypedDict
         _metadata = getattr(dataset, "metadata", {})
@@ -93,7 +93,7 @@ class Select(AnnotatedDataset[_TDatum]):
         title = f"{self.__class__.__name__} Dataset"
         sep = "-" * len(title)
         selections = f"Selections: [{', '.join([str(s) for s in self._sort_selections(self._selections)])}]"
-        return f"{title}\n{sep}{nt}{selections}\n\n{self._dataset}"
+        return f"{title}\n{sep}{nt}{selections}{nt}Selected Size: {len(self)}\n\n{self._dataset}"
     def _sort_selections(self, selections: Selection[_TDatum] | Sequence[Selection[_TDatum]] | None) -> list[Selection]:
         if not selections:
@@ -111,10 +111,6 @@ class Select(AnnotatedDataset[_TDatum]):
             selection(self)
         self._selection = self._selection[: self._size_limit]
-    def __getattr__(self, name: str, /) -> Any:
-        selfattr = getattr(self._dataset, name, None)
-        return selfattr if selfattr is not None else getattr(self._dataset, name)
     def __getitem__(self, index: int) -> _TDatum:
         return self._dataset[self._selection[index]]

dataeval/utils/data/_split.py CHANGED Viewed

@@ -12,6 +12,7 @@ from sklearn.metrics import silhouette_score
 from sklearn.model_selection import GroupKFold, KFold, StratifiedGroupKFold, StratifiedKFold
 from sklearn.utils.multiclass import type_of_target
+from dataeval.config import get_seed
 from dataeval.outputs._base import set_metadata
 from dataeval.outputs._utils import SplitDatasetOutput, TrainValSplit
@@ -212,9 +213,9 @@ def bin_kmeans(array: NDArray[Any]) -> NDArray[np.intp]:
         best_score = 0.50
     bin_index = np.zeros(len(array), dtype=np.intp)
     for k in range(2, 20):
-        clusterer = KMeans(n_clusters=k)
+        clusterer = KMeans(n_clusters=k, random_state=get_seed())
         cluster_labels = clusterer.fit_predict(array)
-        score = silhouette_score(array, cluster_labels, sample_size=25_000)
+        score = silhouette_score(array, cluster_labels, sample_size=25_000, random_state=get_seed())
         if score > best_score:
             best_score = score
             bin_index = cluster_labels.astype(np.intp)

dataeval/utils/data/selections/_classfilter.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 __all__ = []
-from typing import Sequence
+from typing import Sequence, TypeVar
 import numpy as np
@@ -10,8 +10,10 @@ from dataeval.typing import Array, ImageClassificationDatum
 from dataeval.utils._array import as_numpy
 from dataeval.utils.data._selection import Select, Selection, SelectionStage
+TImageClassificationDatum = TypeVar("TImageClassificationDatum", bound=ImageClassificationDatum, covariant=True)
-class ClassFilter(Selection[ImageClassificationDatum]):
+class ClassFilter(Selection[TImageClassificationDatum]):
     """
     Filter and balance the dataset by class.
@@ -34,7 +36,7 @@ class ClassFilter(Selection[ImageClassificationDatum]):
         self.classes = classes
         self.balance = balance
-    def __call__(self, dataset: Select[ImageClassificationDatum]) -> None:
+    def __call__(self, dataset: Select[TImageClassificationDatum]) -> None:
         if self.classes is None and not self.balance:
             return

dataeval/utils/torch/_internal.py CHANGED Viewed

@@ -11,13 +11,13 @@ from numpy.typing import NDArray
 from torch.utils.data import DataLoader, TensorDataset
 from tqdm import tqdm
-from dataeval.config import get_device
+from dataeval.config import DeviceLike, get_device
 def predict_batch(
     x: NDArray[Any] | torch.Tensor,
     model: Callable | torch.nn.Module | torch.nn.Sequential,
-    device: torch.device | None = None,
+    device: DeviceLike | None = None,
     batch_size: int = int(1e10),
     preprocess_fn: Callable[[torch.Tensor], torch.Tensor] | None = None,
     dtype: type[np.generic] | torch.dtype = np.float32,
@@ -31,9 +31,9 @@ def predict_batch(
         Batch of instances.
     model : Callable | nn.Module | nn.Sequential
         PyTorch model.
-    device : torch.device | None, default None
-        Device type used. The default None tries to use the GPU and falls back on CPU.
-        Can be specified by passing either torch.device('cuda') or torch.device('cpu').
+    device : DeviceLike or None, default None
+        The hardware device to use if specified, otherwise uses the DataEval
+        default or torch default.
     batch_size : int, default 1e10
         Batch size used during prediction.
     preprocess_fn : Callable | None, default None

dataeval/utils/torch/trainer.py CHANGED Viewed

@@ -2,6 +2,8 @@
 from __future__ import annotations
+from dataeval.config import DeviceLike, get_device
 __all__ = ["AETrainer"]
 from typing import Any
@@ -25,9 +27,9 @@ class AETrainer:
     ----------
     model : nn.Module
         The model to be trained.
-    device : str or torch.device, default "auto"
-        The hardware device to use for training.
-        If "auto", the device will be set to "cuda" if available, otherwise "cpu".
+    device : DeviceLike or None, default None
+        The hardware device to use if specified, otherwise uses the DataEval
+        default or torch default.
     batch_size : int, default 8
         The number of images to process in a batch.
     """
@@ -35,13 +37,11 @@ class AETrainer:
     def __init__(
         self,
         model: nn.Module,
-        device: str | torch.device = "auto",
+        device: DeviceLike | None = None,
         batch_size: int = 8,
     ):
-        if device == "auto":
-            device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.device: torch.device = torch.device(device)
-        self.model: nn.Module = model.to(device)
+        self.device: torch.device = get_device(device)
+        self.model: nn.Module = model.to(self.device)
         self.batch_size = batch_size
     def train(self, dataset: Dataset[Any], epochs: int = 25) -> list[float]:

{dataeval-0.82.0.dist-info → dataeval-0.82.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dataeval
-Version: 0.82.0
+Version: 0.82.1
 Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
 Home-page: https://dataeval.ai/
 License: MIT

dataeval 0.82.0__py3-none-any.whl → 0.82.1__py3-none-any.whl

dataeval 0.82.0py3-none-any.whl → 0.82.1py3-none-any.whl