PyPI - dataeval - Versions diffs - 0.76.1__py3-none-any.whl → 0.81.0__py3-none-any.whl - Mend

dataeval 0.76.1py3-none-any.whl → 0.81.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

dataeval/__init__.py +3 -3
dataeval/{output.py → _output.py} +14 -0
dataeval/config.py +77 -0
dataeval/detectors/__init__.py +1 -1
dataeval/detectors/drift/__init__.py +6 -6
dataeval/detectors/drift/{base.py → _base.py} +41 -30
dataeval/detectors/drift/{cvm.py → _cvm.py} +21 -28
dataeval/detectors/drift/{ks.py → _ks.py} +20 -26
dataeval/detectors/drift/{mmd.py → _mmd.py} +33 -19
dataeval/detectors/drift/{torch.py → _torch.py} +2 -1
dataeval/detectors/drift/{uncertainty.py → _uncertainty.py} +23 -7
dataeval/detectors/drift/updates.py +1 -1
dataeval/detectors/linters/__init__.py +0 -3
dataeval/detectors/linters/duplicates.py +17 -8
dataeval/detectors/linters/outliers.py +23 -14
dataeval/detectors/ood/ae.py +29 -8
dataeval/detectors/ood/base.py +5 -4
dataeval/detectors/ood/metadata_ks_compare.py +1 -1
dataeval/detectors/ood/mixin.py +20 -5
dataeval/detectors/ood/output.py +1 -1
dataeval/detectors/ood/vae.py +73 -0
dataeval/metadata/__init__.py +5 -0
dataeval/metadata/_ood.py +238 -0
dataeval/metrics/__init__.py +1 -1
dataeval/metrics/bias/__init__.py +5 -4
dataeval/metrics/bias/{balance.py → _balance.py} +67 -17
dataeval/metrics/bias/{coverage.py → _coverage.py} +41 -35
dataeval/metrics/bias/{diversity.py → _diversity.py} +17 -12
dataeval/metrics/bias/{parity.py → _parity.py} +89 -61
dataeval/metrics/estimators/__init__.py +14 -4
dataeval/metrics/estimators/{ber.py → _ber.py} +42 -11
dataeval/metrics/estimators/_clusterer.py +104 -0
dataeval/metrics/estimators/{divergence.py → _divergence.py} +18 -13
dataeval/metrics/estimators/{uap.py → _uap.py} +4 -4
dataeval/metrics/stats/__init__.py +7 -7
dataeval/metrics/stats/{base.py → _base.py} +52 -16
dataeval/metrics/stats/{boxratiostats.py → _boxratiostats.py} +6 -9
dataeval/metrics/stats/{datasetstats.py → _datasetstats.py} +10 -14
dataeval/metrics/stats/{dimensionstats.py → _dimensionstats.py} +6 -5
dataeval/metrics/stats/{hashstats.py → _hashstats.py} +6 -6
dataeval/metrics/stats/{labelstats.py → _labelstats.py} +4 -4
dataeval/metrics/stats/{pixelstats.py → _pixelstats.py} +5 -4
dataeval/metrics/stats/{visualstats.py → _visualstats.py} +9 -8
dataeval/typing.py +54 -0
dataeval/utils/__init__.py +2 -2
dataeval/utils/_array.py +169 -0
dataeval/utils/_bin.py +199 -0
dataeval/utils/_clusterer.py +144 -0
dataeval/utils/_fast_mst.py +189 -0
dataeval/utils/{image.py → _image.py} +6 -4
dataeval/utils/_method.py +18 -0
dataeval/utils/{shared.py → _mst.py} +3 -65
dataeval/utils/{plot.py → _plot.py} +4 -4
dataeval/utils/data/__init__.py +22 -0
dataeval/utils/data/_embeddings.py +105 -0
dataeval/utils/data/_images.py +65 -0
dataeval/utils/data/_metadata.py +352 -0
dataeval/utils/data/_selection.py +119 -0
dataeval/utils/{dataset/split.py → data/_split.py} +13 -14
dataeval/utils/data/_targets.py +73 -0
dataeval/utils/data/_types.py +58 -0
dataeval/utils/data/collate.py +103 -0
dataeval/utils/data/datasets/__init__.py +17 -0
dataeval/utils/data/datasets/_base.py +254 -0
dataeval/utils/data/datasets/_cifar10.py +134 -0
dataeval/utils/data/datasets/_fileio.py +168 -0
dataeval/utils/data/datasets/_milco.py +153 -0
dataeval/utils/data/datasets/_mixin.py +56 -0
dataeval/utils/data/datasets/_mnist.py +183 -0
dataeval/utils/data/datasets/_ships.py +123 -0
dataeval/utils/data/datasets/_voc.py +352 -0
dataeval/utils/data/selections/__init__.py +15 -0
dataeval/utils/data/selections/_classfilter.py +60 -0
dataeval/utils/data/selections/_indices.py +26 -0
dataeval/utils/data/selections/_limit.py +26 -0
dataeval/utils/data/selections/_reverse.py +18 -0
dataeval/utils/data/selections/_shuffle.py +29 -0
dataeval/utils/metadata.py +51 -376
dataeval/utils/torch/{gmm.py → _gmm.py} +4 -2
dataeval/utils/torch/{internal.py → _internal.py} +21 -51
dataeval/utils/torch/models.py +43 -2
dataeval/workflows/sufficiency.py +10 -9
{dataeval-0.76.1.dist-info → dataeval-0.81.0.dist-info}/METADATA +4 -1
dataeval-0.81.0.dist-info/RECORD +94 -0
dataeval/detectors/linters/clusterer.py +0 -512
dataeval/detectors/linters/merged_stats.py +0 -49
dataeval/detectors/ood/metadata_least_likely.py +0 -119
dataeval/interop.py +0 -69
dataeval/utils/dataset/__init__.py +0 -7
dataeval/utils/dataset/datasets.py +0 -412
dataeval/utils/dataset/read.py +0 -63
dataeval-0.76.1.dist-info/RECORD +0 -67
/dataeval/{log.py → _log.py} +0 -0
/dataeval/utils/torch/{blocks.py → _blocks.py} +0 -0
{dataeval-0.76.1.dist-info → dataeval-0.81.0.dist-info}/LICENSE.txt +0 -0
{dataeval-0.76.1.dist-info → dataeval-0.81.0.dist-info}/WHEEL +0 -0

dataeval/utils/_fast_mst.py ADDED Viewed

@@ -0,0 +1,189 @@
+# Adapted from fast_hdbscan python module
+# Original Authors: Leland McInnes <https://github.com/TutteInstitute/fast_hdbscan>
+# Adapted for DataEval by Ryan Wood
+# License: BSD 2-Clause
+__all__ = []
+import warnings
+import numba
+import numpy as np
+from sklearn.neighbors import NearestNeighbors
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore", category=FutureWarning)
+    from fast_hdbscan.disjoint_set import ds_find, ds_rank_create
+@numba.njit()
+def _ds_union_by_rank(disjoint_set, point, nbr):
+    y = ds_find(disjoint_set, point)
+    x = ds_find(disjoint_set, nbr)
+    if x == y:
+        return 0
+    if disjoint_set.rank[x] < disjoint_set.rank[y]:
+        x, y = y, x
+    disjoint_set.parent[y] = x
+    if disjoint_set.rank[x] == disjoint_set.rank[y]:
+        disjoint_set.rank[x] += 1
+    return 1
+@numba.njit(locals={"i": numba.types.uint32, "nbr": numba.types.uint32, "dist": numba.types.float32})
+def _init_tree(n_neighbors, n_distance):
+    # Initial graph to hold tree connections
+    tree = np.zeros((n_neighbors.size - 1, 3), dtype=np.float32)
+    disjoint_set = ds_rank_create(n_neighbors.size)
+    cluster_points = np.empty(n_neighbors.size, dtype=np.uint32)
+    int_tree = 0
+    for i in range(n_neighbors.size):
+        nbr = n_neighbors[i]
+        connect = _ds_union_by_rank(disjoint_set, i, nbr)
+        if connect == 1:
+            dist = n_distance[i]
+            tree[int_tree] = (np.float32(i), np.float32(nbr), dist)
+            int_tree += 1
+    for i in range(cluster_points.size):
+        cluster_points[i] = ds_find(disjoint_set, i)
+    return tree, int_tree, disjoint_set, cluster_points
+@numba.njit(locals={"i": numba.types.uint32, "nbr": numba.types.uint32})
+def _update_tree_by_distance(tree, int_tree, disjoint_set, n_neighbors, n_distance):
+    cluster_points = np.empty(n_neighbors.size, dtype=np.uint32)
+    sort_dist = np.argsort(n_distance)
+    dist_sorted = n_distance[sort_dist]
+    nbrs_sorted = n_neighbors[sort_dist]
+    points = np.arange(n_neighbors.size)
+    point_sorted = points[sort_dist]
+    for i in range(n_neighbors.size):
+        point = point_sorted[i]
+        nbr = nbrs_sorted[i]
+        connect = _ds_union_by_rank(disjoint_set, point, nbr)
+        if connect == 1:
+            dist = dist_sorted[i]
+            tree[int_tree] = (np.float32(point), np.float32(nbr), dist)
+            int_tree += 1
+    for i in range(cluster_points.size):
+        cluster_points[i] = ds_find(disjoint_set, i)
+    return tree, int_tree, disjoint_set, cluster_points
+@numba.njit(locals={"i": numba.types.uint32})
+def _cluster_edges(tracker, last_idx, cluster_distances):
+    cluster_ids = np.unique(tracker)
+    edge_points = []
+    for idx in range(cluster_ids.size):
+        cluster_points = np.nonzero(tracker == cluster_ids[idx])[0]
+        cluster_size = cluster_points.size
+        cluster_mean = cluster_distances[: last_idx + 1, cluster_points].mean()
+        cluster_std = cluster_distances[: last_idx + 1, cluster_points].std()
+        threshold = cluster_mean + cluster_std
+        points_mean = np.empty_like(cluster_points, dtype=np.float32)
+        for i in range(cluster_size):
+            points_mean[i] = cluster_distances[: last_idx + 1, cluster_points[i]].mean()
+        pts_to_add = cluster_points[np.nonzero(points_mean > threshold)[0]]
+        threshold = int(cluster_size * 0.01) if np.floor(np.log10(cluster_size)) > 2 else int(cluster_size * 0.1)
+        threshold = max(10, threshold)
+        if pts_to_add.size > threshold:
+            edge_points.append(pts_to_add)
+        else:
+            edge_points.append(cluster_points)
+    return edge_points
+def _compute_nn(dataA, dataB, k):
+    distances, neighbors = NearestNeighbors(n_neighbors=k + 1, algorithm="brute").fit(dataA).kneighbors(dataB)
+    neighbors = np.array(neighbors[:, 1 : k + 1], dtype=np.int32)
+    distances = np.array(distances[:, 1 : k + 1], dtype=np.float32)
+    return neighbors, distances
+def _calculate_cluster_neighbors(data, groups, point_array):
+    """Rerun nearest neighbor based on clusters"""
+    cluster_neighbors = np.zeros(point_array.size, dtype=np.uint32)
+    cluster_nbr_distances = np.full(point_array.size, np.inf, dtype=np.float32)
+    for i in range(len(groups)):
+        selectionA = groups[i]
+        groupA = data[selectionA]
+        selectionB = np.concatenate([arr for j, arr in enumerate(groups) if j != i])
+        groupB = data[selectionB]
+        new_neighbors, new_distances = _compute_nn(groupB, groupA, 2)
+        cluster_neighbors[selectionA] = selectionB[new_neighbors[:, 1]]
+        cluster_nbr_distances[selectionA] = new_distances[:, 1]
+    return cluster_neighbors, cluster_nbr_distances
+def minimum_spanning_tree(data, neighbors, distances):
+    # Transpose arrays to get number of samples along a row
+    k_neighbors = neighbors.T.astype(np.uint32).copy()
+    k_distances = distances.T.astype(np.float32).copy()
+    # Create cluster merging tracker
+    merge_tracker = np.full((k_neighbors.shape[0] + 1, k_neighbors.shape[1]), -1, dtype=np.int32)
+    # Initialize tree
+    tree, int_tree, tree_disjoint_set, merge_tracker[0] = _init_tree(k_neighbors[0], k_distances[0])
+    # Loop through all of the neighbors, updating the tree
+    last_idx = 0
+    for i in range(1, k_neighbors.shape[0]):
+        tree, int_tree, tree_disjoint_set, merge_tracker[i] = _update_tree_by_distance(
+            tree, int_tree, tree_disjoint_set, k_neighbors[i], k_distances[i]
+        )
+        last_idx = i
+        if (merge_tracker[i] == merge_tracker[i - 1]).all():
+            last_idx -= 1
+            break
+    # Identify final clusters
+    cluster_ids = np.unique(merge_tracker[last_idx])
+    if cluster_ids.size > 1:
+        # Determining the edge points
+        edge_points = _cluster_edges(merge_tracker[last_idx], last_idx, k_distances)
+        # Run nearest neighbor again between clusters to reach single cluster
+        additional_neighbors, additional_distances = _calculate_cluster_neighbors(
+            data, edge_points, merge_tracker[last_idx]
+        )
+        # Update clusters
+        last_idx += 1
+        tree, int_tree, tree_disjoint_set, merge_tracker[last_idx] = _update_tree_by_distance(
+            tree, int_tree, tree_disjoint_set, additional_neighbors, additional_distances
+        )
+    return tree
+def calculate_neighbor_distances(data: np.ndarray, k: int = 10):
+    # Have the potential to add in other distance calculations - supported calculations:
+    # https://github.com/lmcinnes/pynndescent/blob/master/pynndescent/pynndescent_.py#L524
+    try:
+        from pynndescent import NNDescent
+        max_descent = 30 if k <= 20 else k + 16
+        index = NNDescent(
+            data,
+            metric="euclidean",
+            n_neighbors=max_descent,
+        )
+        neighbors, distances = index.neighbor_graph
+    except ImportError:
+        distances, neighbors = NearestNeighbors(n_neighbors=k + 1, algorithm="brute").fit(data).kneighbors(data)
+    neighbors = np.array(neighbors[:, 1 : k + 1], dtype=np.int32)
+    distances = np.array(distances[:, 1 : k + 1], dtype=np.float32)
+    return neighbors, distances

dataeval/utils/{image.py → _image.py} RENAMED Viewed

@@ -2,17 +2,19 @@ from __future__ import annotations
 __all__ = []
-from typing import Any, NamedTuple
+from dataclasses import dataclass
+from typing import Any
 import numpy as np
-from numpy.typing import ArrayLike, NDArray
+from numpy.typing import NDArray
 from scipy.signal import convolve2d
 EDGE_KERNEL = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype=np.int8)
 BIT_DEPTH = (1, 8, 12, 16, 32)
-class BitDepth(NamedTuple):
+@dataclass
+class BitDepth:
     depth: int
     pmin: float | int
     pmax: float | int
@@ -59,7 +61,7 @@ def normalize_image_shape(image: NDArray[Any]) -> NDArray[Any]:
         raise ValueError("Images must have 2 or more dimensions.")
-def edge_filter(image: ArrayLike, offset: float = 0.5) -> NDArray[np.uint8]:
+def edge_filter(image: NDArray[Any], offset: float = 0.5) -> NDArray[np.uint8]:
     """
     Returns the image filtered using a 3x3 edge detection kernel:
     [[ -1, -1, -1 ],

dataeval/utils/_method.py ADDED Viewed

@@ -0,0 +1,18 @@
+from __future__ import annotations
+import sys
+from typing import Callable, TypeVar
+if sys.version_info >= (3, 10):
+    from typing import ParamSpec
+else:
+    from typing_extensions import ParamSpec
+P = ParamSpec("P")
+R = TypeVar("R")
+def get_method(method_map: dict[str, Callable[P, R]], method: str) -> Callable[P, R]:
+    if method not in method_map:
+        raise ValueError(f"Specified method {method} is not a valid method: {method_map}.")
+    return method_map[method]

dataeval/utils/{shared.py → _mst.py} RENAMED Viewed

@@ -2,53 +2,17 @@ from __future__ import annotations
 __all__ = []
-import sys
-from typing import Any, Callable, Literal, TypeVar
+from typing import Any, Literal
-import numpy as np
-from numpy.typing import ArrayLike, NDArray
+from numpy.typing import NDArray
 from scipy.sparse import csr_matrix
 from scipy.sparse.csgraph import minimum_spanning_tree as mst
 from scipy.spatial.distance import pdist, squareform
 from sklearn.neighbors import NearestNeighbors
-if sys.version_info >= (3, 10):
-    from typing import ParamSpec
-else:
-    from typing_extensions import ParamSpec
-from dataeval.interop import as_numpy
+from dataeval.utils._array import flatten
 EPSILON = 1e-5
-HASH_SIZE = 8
-MAX_FACTOR = 4
-P = ParamSpec("P")
-R = TypeVar("R")
-def get_method(method_map: dict[str, Callable[P, R]], method: str) -> Callable[P, R]:
-    if method not in method_map:
-        raise ValueError(f"Specified method {method} is not a valid method: {method_map}.")
-    return method_map[method]
-def flatten(array: ArrayLike) -> NDArray[Any]:
-    """
-    Flattens input array from (N, ... ) to (N, -1) where all samples N have all data in their last dimension
-    Parameters
-    ----------
-    X : NDArray, shape - (N, ... )
-        Input array
-    Returns
-    -------
-    NDArray, shape - (N, -1)
-    """
-    nparr = as_numpy(array)
-    return nparr.reshape((nparr.shape[0], -1))
 def minimum_spanning_tree(X: NDArray[Any]) -> Any:
@@ -73,32 +37,6 @@ def minimum_spanning_tree(X: NDArray[Any]) -> Any:
     return mst(eudist_csr)
-def get_classes_counts(labels: NDArray[np.int_]) -> tuple[int, int]:
-    """
-    Returns the classes and counts of from an array of labels
-    Parameters
-    ----------
-    label : NDArray
-        Numpy labels array
-    Returns
-    -------
-        Classes and counts
-    Raises
-    ------
-    ValueError
-        If the number of unique classes is less than 2
-    """
-    classes, counts = np.unique(labels, return_counts=True)
-    M = len(classes)
-    if M < 2:
-        raise ValueError("Label vector contains less than 2 classes!")
-    N = int(np.sum(counts))
-    return M, N
 def compute_neighbors(
     A: NDArray[Any],
     B: NDArray[Any],

dataeval/utils/{plot.py → _plot.py} RENAMED Viewed

@@ -6,9 +6,9 @@ import contextlib
 from typing import Any
 import numpy as np
-from numpy.typing import ArrayLike
-from dataeval.interop import to_numpy
+from dataeval.typing import ArrayLike
+from dataeval.utils._array import to_numpy
 with contextlib.suppress(ImportError):
     from matplotlib.figure import Figure
@@ -171,7 +171,7 @@ def histogram_plot(
         data_dict,
     ):
         # Plot the histogram for the chosen metric
-        ax.hist(data_dict[metric], bins=20, log=log)
+        ax.hist(data_dict[metric].astype(np.float64), bins=20, log=log)
         # Add labels to the histogram
         ax.set_title(metric)
@@ -229,7 +229,7 @@ def channel_histogram_plot(
         # Plot the histogram for the chosen metric
         data = data_dict[metric][ch_mask].reshape(-1, max_channels)
         ax.hist(
-            data,
+            data.astype(np.float64),
             bins=20,
             density=True,
             log=log,

dataeval/utils/data/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""Provides utility functions for interacting with Computer Vision datasets."""
+__all__ = [
+    "collate",
+    "datasets",
+    "Embeddings",
+    "Images",
+    "Metadata",
+    "Select",
+    "SplitDatasetOutput",
+    "Targets",
+    "split_dataset",
+]
+from dataeval.utils.data._embeddings import Embeddings
+from dataeval.utils.data._images import Images
+from dataeval.utils.data._metadata import Metadata
+from dataeval.utils.data._selection import Select
+from dataeval.utils.data._split import SplitDatasetOutput, split_dataset
+from dataeval.utils.data._targets import Targets
+from . import collate, datasets

dataeval/utils/data/_embeddings.py ADDED Viewed

@@ -0,0 +1,105 @@
+from __future__ import annotations
+__all__ = []
+import math
+from typing import Any, Iterator, Sequence
+import torch
+from torch.utils.data import DataLoader, Subset
+from tqdm import tqdm
+from dataeval.config import get_device
+from dataeval.typing import TArray
+from dataeval.utils.data._types import Dataset
+from dataeval.utils.torch.models import SupportsEncode
+class Embeddings:
+    """
+    Collection of image embeddings from a dataset.
+    Embeddings are accessed by index or slice and are only loaded on-demand.
+    Parameters
+    ----------
+    dataset : ImageClassificationDataset or ObjectDetectionDataset
+        Dataset to access original images from.
+    batch_size : int, optional
+        Batch size to use when encoding images.
+    model : torch.nn.Module, optional
+        Model to use for encoding images.
+    device : torch.device, optional
+        Device to use for encoding images.
+    verbose : bool, optional
+        Whether to print progress bar when encoding images.
+    """
+    device: torch.device
+    batch_size: int
+    verbose: bool
+    def __init__(
+        self,
+        dataset: Dataset[TArray, Any],
+        batch_size: int,
+        indices: Sequence[int] | None = None,
+        model: torch.nn.Module | None = None,
+        device: torch.device | str | None = None,
+        verbose: bool = False,
+    ) -> None:
+        self.device = get_device(device)
+        self.batch_size = batch_size
+        self.verbose = verbose
+        self._dataset = dataset
+        self._indices = indices if indices is not None else range(len(dataset))
+        model = torch.nn.Flatten() if model is None else model
+        self._model = model.to(self.device).eval()
+        self._encoder = model.encode if isinstance(model, SupportsEncode) else model
+        self._collate_fn = lambda datum: [torch.as_tensor(i) for i, _, _ in datum]
+    def to_tensor(self) -> torch.Tensor:
+        """
+        Converts entire dataset to embeddings.
+        Warning
+        -------
+        Will process the entire dataset in batches and return
+        embeddings as a single Tensor in memory.
+        Returns
+        -------
+        torch.Tensor
+        """
+        return self[:]
+    # Reduce overhead cost by not tracking tensor gradients
+    @torch.no_grad
+    def _batch(self, indices: Sequence[int]) -> Iterator[torch.Tensor]:
+        # manual batching
+        dataloader = DataLoader(Subset(self._dataset, indices), batch_size=self.batch_size, collate_fn=self._collate_fn)
+        for i, images in (
+            tqdm(enumerate(dataloader), total=math.ceil(len(indices) / self.batch_size), desc="Batch processing")
+            if self.verbose
+            else enumerate(dataloader)
+        ):
+            embeddings = self._encoder(torch.stack(images).to(self.device))
+            yield embeddings
+    def __getitem__(self, key: int | slice | list[int]) -> torch.Tensor:
+        if isinstance(key, list):
+            return torch.vstack(list(self._batch(key))).to(self.device)
+        if isinstance(key, slice):
+            return torch.vstack(list(self._batch(range(len(self._dataset))[key]))).to(self.device)
+        elif isinstance(key, int):
+            return self._encoder(torch.as_tensor(self._dataset[key][0]).to(self.device))
+        raise TypeError("Invalid argument type.")
+    def __iter__(self) -> Iterator[torch.Tensor]:
+        # process in batches while yielding individual embeddings
+        for batch in self._batch(range(len(self._dataset))):
+            yield from batch
+    def __len__(self) -> int:
+        return len(self._dataset)

dataeval/utils/data/_images.py ADDED Viewed

@@ -0,0 +1,65 @@
+from __future__ import annotations
+__all__ = []
+from typing import Any, Generic, Iterator, Sequence, overload
+from dataeval.typing import TArray
+from dataeval.utils.data._types import Dataset
+class Images(Generic[TArray]):
+    """
+    Collection of image data from a dataset.
+    Images are accessed by index or slice and are only loaded on-demand.
+    Parameters
+    ----------
+    dataset : ImageClassificationDataset or ObjectDetectionDataset
+        Dataset to access images from.
+    """
+    def __init__(
+        self,
+        dataset: Dataset[TArray, Any],
+    ) -> None:
+        self._dataset = dataset
+    def to_list(self) -> Sequence[TArray]:
+        """
+        Converts entire dataset to a sequence of images.
+        Warning
+        -------
+        Will load the entire dataset and return the images as a
+        single sequence of images in memory.
+        Returns
+        -------
+        list[TArray]
+        """
+        return self[:]
+    @overload
+    def __getitem__(self, key: slice | list[int]) -> Sequence[TArray]: ...
+    @overload
+    def __getitem__(self, key: int) -> TArray: ...
+    def __getitem__(self, key: int | slice | list[int]) -> Sequence[TArray] | TArray:
+        if isinstance(key, list):
+            return [self._dataset[i][0] for i in key]
+        if isinstance(key, slice):
+            indices = list(range(len(self._dataset))[key])
+            return [self._dataset[i][0] for i in indices]
+        elif isinstance(key, int):
+            return self._dataset[key][0]
+        raise TypeError("Invalid argument type.")
+    def __iter__(self) -> Iterator[TArray]:
+        for i in range(len(self._dataset)):
+            yield self._dataset[i][0]
+    def __len__(self) -> int:
+        return len(self._dataset)

dataeval 0.76.1__py3-none-any.whl → 0.81.0__py3-none-any.whl

dataeval 0.76.1py3-none-any.whl → 0.81.0py3-none-any.whl