PyPI - dataeval - Versions diffs - 0.70.0__py3-none-any.whl → 0.70.1__py3-none-any.whl - Mend

dataeval 0.70.0py3-none-any.whl → 0.70.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

dataeval/__init__.py +6 -6
dataeval/_internal/datasets.py +235 -131
dataeval/_internal/detectors/clusterer.py +2 -0
dataeval/_internal/detectors/drift/base.py +2 -2
dataeval/_internal/detectors/drift/mmd.py +1 -1
dataeval/_internal/detectors/duplicates.py +2 -0
dataeval/_internal/detectors/ood/ae.py +5 -3
dataeval/_internal/detectors/ood/aegmm.py +6 -4
dataeval/_internal/detectors/ood/base.py +12 -7
dataeval/_internal/detectors/ood/llr.py +6 -4
dataeval/_internal/detectors/ood/vae.py +5 -3
dataeval/_internal/detectors/ood/vaegmm.py +6 -4
dataeval/_internal/detectors/outliers.py +4 -2
dataeval/_internal/metrics/balance.py +4 -2
dataeval/_internal/metrics/ber.py +2 -0
dataeval/_internal/metrics/coverage.py +4 -0
dataeval/_internal/metrics/divergence.py +6 -2
dataeval/_internal/metrics/diversity.py +8 -6
dataeval/_internal/metrics/parity.py +8 -6
dataeval/_internal/metrics/stats/base.py +2 -2
dataeval/_internal/metrics/stats/datasetstats.py +2 -0
dataeval/_internal/metrics/stats/dimensionstats.py +2 -0
dataeval/_internal/metrics/stats/hashstats.py +2 -0
dataeval/_internal/metrics/stats/labelstats.py +1 -1
dataeval/_internal/metrics/stats/pixelstats.py +4 -2
dataeval/_internal/metrics/stats/visualstats.py +4 -2
dataeval/_internal/metrics/uap.py +6 -2
dataeval/_internal/metrics/utils.py +2 -2
dataeval/_internal/models/pytorch/autoencoder.py +5 -5
dataeval/_internal/models/tensorflow/pixelcnn.py +1 -4
dataeval/_internal/utils.py +11 -16
dataeval/_internal/workflows/sufficiency.py +44 -33
dataeval/detectors/__init__.py +4 -0
dataeval/detectors/drift/__init__.py +8 -3
dataeval/detectors/drift/kernels/__init__.py +4 -0
dataeval/detectors/drift/updates/__init__.py +4 -0
dataeval/detectors/linters/__init__.py +15 -4
dataeval/detectors/ood/__init__.py +14 -2
dataeval/metrics/__init__.py +5 -0
dataeval/metrics/bias/__init__.py +13 -4
dataeval/metrics/estimators/__init__.py +8 -8
dataeval/metrics/stats/__init__.py +17 -6
dataeval/utils/__init__.py +16 -3
dataeval/utils/tensorflow/__init__.py +11 -0
dataeval/utils/torch/__init__.py +12 -0
dataeval/utils/torch/datasets/__init__.py +7 -0
dataeval/workflows/__init__.py +4 -0
{dataeval-0.70.0.dist-info → dataeval-0.70.1.dist-info}/METADATA +10 -2
dataeval-0.70.1.dist-info/RECORD +80 -0
dataeval/tensorflow/__init__.py +0 -3
dataeval/torch/__init__.py +0 -3
dataeval-0.70.0.dist-info/RECORD +0 -79
/dataeval/{tensorflow → utils/tensorflow}/loss/__init__.py +0 -0
/dataeval/{tensorflow → utils/tensorflow}/models/__init__.py +0 -0
/dataeval/{tensorflow → utils/tensorflow}/recon/__init__.py +0 -0
/dataeval/{torch → utils/torch}/models/__init__.py +0 -0
/dataeval/{torch → utils/torch}/trainer/__init__.py +0 -0
{dataeval-0.70.0.dist-info → dataeval-0.70.1.dist-info}/LICENSE.txt +0 -0
{dataeval-0.70.0.dist-info → dataeval-0.70.1.dist-info}/WHEEL +0 -0

dataeval/_internal/detectors/ood/base.py CHANGED Viewed

@@ -10,7 +10,7 @@ from __future__ import annotations
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Callable, Literal, NamedTuple, cast
+from typing import Callable, Literal, cast
 import keras
 import numpy as np
@@ -26,6 +26,9 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class OODOutput(OutputMetadata):
     """
+    Output class for predictions from :class:`OOD_AE`, :class:`OOD_AEGMM`, :class:`OOD_LLR`,
+    :class:`OOD_VAE`, and :class:`OOD_VAEGMM` out-of-distribution detectors
     Attributes
     ----------
     is_ood : NDArray
@@ -41,9 +44,11 @@ class OODOutput(OutputMetadata):
     feature_score: NDArray[np.float32] | None
-class OODScore(NamedTuple):
+@dataclass(frozen=True)
+class OODScoreOutput(OutputMetadata):
     """
-    NamedTuple containing the instance and (optionally) feature score.
+    Output class for instance and feature scores from :class:`OOD_AE`, :class:`OOD_AEGMM`,
+    :class:`OOD_LLR`, :class:`OOD_VAE`, and :class:`OOD_VAEGMM` out-of-distribution detectors
     Parameters
     ----------
@@ -76,7 +81,7 @@ class OODBase(ABC):
     def __init__(self, model: keras.Model) -> None:
         self.model = model
-        self._ref_score: OODScore
+        self._ref_score: OODScoreOutput
         self._threshold_perc: float
         self._data_info: tuple[tuple, type] | None = None
@@ -102,7 +107,7 @@ class OODBase(ABC):
         self._validate(X)
     @abstractmethod
-    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScore:
+    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
         """
         Compute the out-of-distribution (OOD) scores for a given dataset.
@@ -116,7 +121,7 @@ class OODBase(ABC):
         Returns
         -------
-        OODScore
+        OODScoreOutput
             An object containing the instance-level and feature-level OOD scores.
         """
@@ -197,7 +202,7 @@ class OODBase(ABC):
         # compute outlier scores
         score = self.score(X, batch_size=batch_size)
         ood_pred = score.get(ood_type) > self._threshold_score(ood_type)
-        return OODOutput(is_ood=ood_pred, **score._asdict())
+        return OODOutput(is_ood=ood_pred, **score.dict())
 class OODGMMBase(OODBase):

dataeval/_internal/detectors/ood/llr.py CHANGED Viewed

@@ -18,11 +18,12 @@ from keras.layers import Input
 from keras.models import Model
 from numpy.typing import ArrayLike, NDArray
-from dataeval._internal.detectors.ood.base import OODBase, OODScore
+from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
 from dataeval._internal.interop import to_numpy
 from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
 from dataeval._internal.models.tensorflow.trainer import trainer
 from dataeval._internal.models.tensorflow.utils import predict_batch
+from dataeval._internal.output import set_metadata
 def build_model(
@@ -124,7 +125,7 @@ class OOD_LLR(OODBase):
         self.sequential = sequential
         self.log_prob = log_prob
-        self._ref_score: OODScore
+        self._ref_score: OODScoreOutput
         self._threshold_perc: float
         self._data_info: tuple[tuple, type] | None = None
@@ -279,12 +280,13 @@ class OOD_LLR(OODBase):
         logp_b = logp_fn(self.dist_b, X, return_per_feature=return_per_feature, batch_size=batch_size)
         return logp_s - logp_b
+    @set_metadata("dataeval.detectors")
     def score(
         self,
         X: ArrayLike,
         batch_size: int = int(1e10),
-    ) -> OODScore:
+    ) -> OODScoreOutput:
         self._validate(X := to_numpy(X))
         fscore = -self._llr(X, True, batch_size=batch_size)
         iscore = -self._llr(X, False, batch_size=batch_size)
-        return OODScore(iscore, fscore)
+        return OODScoreOutput(iscore, fscore)

dataeval/_internal/detectors/ood/vae.py CHANGED Viewed

@@ -15,11 +15,12 @@ import numpy as np
 import tensorflow as tf
 from numpy.typing import ArrayLike
-from dataeval._internal.detectors.ood.base import OODBase, OODScore
+from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
 from dataeval._internal.interop import to_numpy
 from dataeval._internal.models.tensorflow.autoencoder import VAE
 from dataeval._internal.models.tensorflow.losses import Elbo
 from dataeval._internal.models.tensorflow.utils import predict_batch
+from dataeval._internal.output import set_metadata
 class OOD_VAE(OODBase):
@@ -67,7 +68,8 @@ class OOD_VAE(OODBase):
             loss_fn = Elbo(0.05)
         super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
-    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScore:
+    @set_metadata("dataeval.detectors")
+    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
         self._validate(X := to_numpy(X))
         # sample reconstructed instances
@@ -86,4 +88,4 @@ class OOD_VAE(OODBase):
         sorted_fscore_perc = sorted_fscore[:, -n_score_features:]
         iscore = np.mean(sorted_fscore_perc, axis=1)
-        return OODScore(iscore, fscore)
+        return OODScoreOutput(iscore, fscore)

dataeval/_internal/detectors/ood/vaegmm.py CHANGED Viewed

@@ -15,12 +15,13 @@ import numpy as np
 import tensorflow as tf
 from numpy.typing import ArrayLike
-from dataeval._internal.detectors.ood.base import OODGMMBase, OODScore
+from dataeval._internal.detectors.ood.base import OODGMMBase, OODScoreOutput
 from dataeval._internal.interop import to_numpy
 from dataeval._internal.models.tensorflow.autoencoder import VAEGMM
 from dataeval._internal.models.tensorflow.gmm import gmm_energy
 from dataeval._internal.models.tensorflow.losses import Elbo, LossGMM
 from dataeval._internal.models.tensorflow.utils import predict_batch
+from dataeval._internal.output import set_metadata
 class OOD_VAEGMM(OODGMMBase):
@@ -53,7 +54,8 @@ class OOD_VAEGMM(OODGMMBase):
             loss_fn = LossGMM(elbo=Elbo(0.05))
         super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
-    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScore:
+    @set_metadata("dataeval.detectors")
+    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
         """
         Compute the out-of-distribution (OOD) score for a given dataset.
@@ -67,7 +69,7 @@ class OOD_VAEGMM(OODGMMBase):
         Returns
         -------
-        OODScore
+        OODScoreOutput
             An object containing the instance-level OOD score.
         Note
@@ -84,4 +86,4 @@ class OOD_VAEGMM(OODGMMBase):
         energy, _ = gmm_energy(z, self.gmm_params, return_mean=False)
         energy_samples = energy.numpy().reshape((-1, self.samples))  # type: ignore
         iscore = np.mean(energy_samples, axis=-1)
-        return OODScore(iscore)
+        return OODScoreOutput(iscore)

dataeval/_internal/detectors/outliers.py CHANGED Viewed

@@ -22,6 +22,8 @@ TIndexIssueMap = TypeVar("TIndexIssueMap", IndexIssueMap, list[IndexIssueMap])
 @dataclass(frozen=True)
 class OutliersOutput(Generic[TIndexIssueMap], OutputMetadata):
     """
+    Output class for :class:`Outliers` lint detector
     Attributes
     ----------
     issues : dict[int, dict[str, float]] | list[dict[int, dict[str, float]]]
@@ -86,8 +88,8 @@ class Outliers:
     --------
     Duplicates
-    Notes
-    ------
+    Note
+    ----
     There are 3 different statistical methods:
     - zscore

dataeval/_internal/metrics/balance.py CHANGED Viewed

@@ -15,6 +15,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class BalanceOutput(OutputMetadata):
     """
+    Output class for :func:`balance` bias metric
     Attributes
     ----------
     balance : NDArray[np.float64]
@@ -71,8 +73,8 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
         (num_factors+1) x (num_factors+1) estimate of mutual information
         between num_factors metadata factors and class label. Symmetry is enforced.
-    Notes
-    -----
+    Note
+    ----
     We use `mutual_info_classif` from sklearn since class label is categorical.
     `mutual_info_classif` outputs are consistent up to O(1e-4) and depend on a random
     seed. MI is computed differently for categorical and continuous variables, and

dataeval/_internal/metrics/ber.py CHANGED Viewed

@@ -25,6 +25,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class BEROutput(OutputMetadata):
     """
+    Output class for :func:`ber` estimator metric
     Attributes
     ----------
     ber : float

dataeval/_internal/metrics/coverage.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import math
 from dataclasses import dataclass
 from typing import Literal
@@ -14,6 +16,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class CoverageOutput(OutputMetadata):
     """
+    Output class for :func:`coverage` bias metric
     Attributes
     ----------
     indices : NDArray

dataeval/_internal/metrics/divergence.py CHANGED Viewed

@@ -3,6 +3,8 @@ This module contains the implementation of HP Divergence
 using the Fast Nearest Neighbor and Minimum Spanning Tree algorithms
 """
+from __future__ import annotations
 from dataclasses import dataclass
 from typing import Literal
@@ -17,6 +19,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class DivergenceOutput(OutputMetadata):
     """
+    Output class for :func:`divergence` estimator metric
     Attributes
     ----------
     divergence : float
@@ -96,8 +100,8 @@ def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST
     DivergenceOutput
         The divergence value (0.0..1.0) and the number of differing edges between the datasets
-    Notes
-    -----
+    Note
+    ----
     The divergence value indicates how similar the 2 datasets are
     with 0 indicating approximately identical data distributions.

dataeval/_internal/metrics/diversity.py CHANGED Viewed

@@ -13,6 +13,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class DiversityOutput(OutputMetadata):
     """
+    Output class for :func:`diversity` bias metric
     Attributes
     ----------
     diversity_index : NDArray[np.float64]
@@ -52,8 +54,8 @@ def diversity_shannon(
     subset_mask: NDArray[np.bool_] | None
         Boolean mask of samples to bin (e.g. when computing per class).  True -> include in histogram counts
-    Notes
-    -----
+    Note
+    ----
     For continuous variables, histogram bins are chosen automatically.  See `numpy.histogram` for details.
     Returns
@@ -103,8 +105,8 @@ def diversity_simpson(
     subset_mask: NDArray[np.bool_] | None
         Boolean mask of samples to bin (e.g. when computing per class).  True -> include in histogram counts
-    Notes
-    -----
+    Note
+    ----
     For continuous variables, histogram bins are chosen automatically.  See
         numpy.histogram for details.
     If there is only one category, the diversity index takes a value of 0.
@@ -162,8 +164,8 @@ def diversity(
     method: Literal["shannon", "simpson"], default "simpson"
         Indicates which diversity index should be computed
-    Notes
-    -----
+    Note
+    ----
     - For continuous variables, histogram bins are chosen automatically. See numpy.histogram for details.
     - The expression is undefined for q=1, but it approaches the Shannon entropy in the limit.
     - If there is only one category, the diversity index takes a value of 1 = 1/N = 1/1. Entropy will take a value of 0.

dataeval/_internal/metrics/parity.py CHANGED Viewed

@@ -17,6 +17,8 @@ TData = TypeVar("TData", np.float64, NDArray[np.float64])
 @dataclass(frozen=True)
 class ParityOutput(Generic[TData], OutputMetadata):
     """
+    Output class for :func:`parity` and :func:`label_parity` bias metrics
     Attributes
     ----------
     score : np.float64 | NDArray[np.float64]
@@ -137,8 +139,8 @@ def normalize_expected_dist(expected_dist: NDArray, observed_dist: NDArray) -> N
     ValueError
         If the expected distribution is all zeros.
-    Notes
-    -----
+    Note
+    ----
     The function ensures that the total number of labels in the expected distribution matches the total
     number of labels in the observed distribution by scaling the expected distribution.
     """
@@ -224,8 +226,8 @@ def label_parity(
         of unique classes between the observed and expected distributions.
-    Notes
-    -----
+    Note
+    ----
     - Providing ``num_classes`` can be helpful if there are classes with zero instances in one of the distributions.
     - The function first validates the observed distribution and normalizes the expected distribution so that it
       has the same total number of labels as the observed distribution.
@@ -317,8 +319,8 @@ def parity(
         factor values either 0 times or at least 5 times. Alternatively, continuous-valued factors can be digitized
         into fewer bins.
-    Notes
-    -----
+    Note
+    ----
     - Each key of the ``continuous_factor_bincounts`` dictionary must occur as a key in data_factors.
     - A high score with a low p-value suggests that a metadata factor is strongly correlated with a class label.
     - The function creates a contingency matrix for each factor, where each entry represents the frequency of a

dataeval/_internal/metrics/stats/base.py CHANGED Viewed

@@ -179,8 +179,8 @@ def run_stats(
         The dictionary keys correspond to the names of the statistics, and the values are NumPy arrays
         with the results of the computations.
-    Notes
-    -----
+    Note
+    ----
     - The function performs image normalization (rescaling the image values)
       before applying some of the statistics.
     - Pixel-level statistics (e.g., brightness, entropy) are computed after

dataeval/_internal/metrics/stats/datasetstats.py CHANGED Viewed

@@ -16,6 +16,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class DatasetStatsOutput(OutputMetadata):
     """
+    Output class for :func:`datasetstats` stats metric
     This class represents the outputs of various stats functions against a single
     dataset, such that each index across all stat outputs are representative of
     the same source image.  Modifying or mixing outputs will result in inaccurate

dataeval/_internal/metrics/stats/dimensionstats.py CHANGED Viewed

@@ -32,6 +32,8 @@ class DimensionStatsProcessor(StatsProcessor):
 @dataclass(frozen=True)
 class DimensionStatsOutput(BaseStatsOutput):
     """
+    Output class for :func:`dimensionstats` stats metric
     Attributes
     ----------
     left : NDArray[np.int32]

dataeval/_internal/metrics/stats/hashstats.py CHANGED Viewed

@@ -20,6 +20,8 @@ class HashStatsProcessor(StatsProcessor):
 @dataclass(frozen=True)
 class HashStatsOutput(BaseStatsOutput):
     """
+    Output class for :func:`hashstats` stats metric
     Attributes
     ----------
     xxhash : List[str]

dataeval/_internal/metrics/stats/labelstats.py CHANGED Viewed

@@ -13,7 +13,7 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class LabelStatsOutput(OutputMetadata):
     """
-    Output class for `labelstats` metrics function
+    Output class for :func:`labelstats` stats metric
     Attributes
     ----------

dataeval/_internal/metrics/stats/pixelstats.py CHANGED Viewed

@@ -36,6 +36,8 @@ class PixelStatsProcessor(StatsProcessor):
 @dataclass(frozen=True)
 class PixelStatsOutput(BaseStatsOutput):
     """
+    Output class for :func:`pixelstats` stats metric
     Attributes
     ----------
     mean : NDArray[np.float16]
@@ -93,8 +95,8 @@ def pixelstats(
     --------
     dimensionstats, visualstats, Outliers
-    Notes
-    -----
+    Note
+    ----
     - All metrics are scaled based on the perceived bit depth (which is derived from the largest pixel value)
       to allow for better comparison between images stored in different formats and different resolutions.

dataeval/_internal/metrics/stats/visualstats.py CHANGED Viewed

@@ -43,6 +43,8 @@ class VisualStatsProcessor(StatsProcessor):
 @dataclass(frozen=True)
 class VisualStatsOutput(BaseStatsOutput):
     """
+    Output class for :func:`visualstats` stats metric
     Attributes
     ----------
     brightness : NDArray[np.float16]
@@ -100,8 +102,8 @@ def visualstats(
     --------
     dimensionstats, pixelstats, Outliers
-    Notes
-    -----
+    Note
+    ----
     - `zeros` and `missing` are presented as a percentage of total pixel counts
     Examples

dataeval/_internal/metrics/uap.py CHANGED Viewed

@@ -4,6 +4,8 @@ FR Test Statistic based estimate for the upperbound
 average precision using empirical mean precision
 """
+from __future__ import annotations
 from dataclasses import dataclass
 from numpy.typing import ArrayLike
@@ -16,6 +18,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class UAPOutput(OutputMetadata):
     """
+    Output class for :func:`uap` estimator metric
     Attributes
     ----------
     uap : float
@@ -48,8 +52,8 @@ def uap(labels: ArrayLike, scores: ArrayLike) -> UAPOutput:
     ValueError
         If unique classes M < 2
-    Notes
-    -----
+    Note
+    ----
     This function calculates the empirical mean precision using the
     ``average_precision_score`` from scikit-learn, weighted by the class distribution.

dataeval/_internal/metrics/utils.py CHANGED Viewed

@@ -91,8 +91,8 @@ def entropy(
     subset_mask: NDArray[np.bool_] | None
         Boolean mask of samples to bin (e.g. when computing per class).  True -> include in histogram counts
-    Notes
-    -----
+    Note
+    ----
     For continuous variables, histogram bins are chosen automatically.  See
     numpy.histogram for details.

dataeval/_internal/models/pytorch/autoencoder.py CHANGED Viewed

@@ -61,9 +61,9 @@ class AETrainer:
         List[float]
             A list of average loss values for each epoch.
-        Notes
+        Note
         ----
-        To replace this function with a custom function, do
+        To replace this function with a custom function, do:
             AETrainer.train = custom_function
         """
         # Setup training
@@ -120,7 +120,7 @@ class AETrainer:
         Note
         ----
-        To replace this function with a custom function, do
+        To replace this function with a custom function, do:
             AETrainer.eval = custom_function
         """
         self.model.eval()
@@ -155,8 +155,8 @@ class AETrainer:
         torch.Tensor
             Data encoded by the model
-        Notes
-        -----
+        Note
+        ----
         This function should be run after the model has been trained and evaluated.
         """
         self.model.eval()

dataeval/_internal/models/tensorflow/pixelcnn.py CHANGED Viewed

@@ -272,8 +272,6 @@ class PixelCNN(distribution.Distribution):
         The minimum value of the input data.
     dtype : tensorflow dtype, default tf.float32
         Data type of the `Distribution`.
-    name : str, default "PixelCNN"
-        The name of the `Distribution`.
     """
     def __init__(
@@ -293,10 +291,9 @@ class PixelCNN(distribution.Distribution):
         high: int = 255,
         low: int = 0,
         dtype=tf.float32,
-        name: str = "PixelCNN",
     ) -> None:
         parameters = dict(locals())
-        with tf.name_scope(name) as name:
+        with tf.name_scope("PixelCNN") as name:
             super().__init__(
                 dtype=dtype,
                 reparameterization_type=reparameterization.NOT_REPARAMETERIZED,

dataeval/_internal/utils.py CHANGED Viewed

@@ -8,7 +8,7 @@ from torch.utils.data import Dataset
 def read_dataset(dataset: Dataset) -> list[list[Any]]:
     """
-    Extract information from a dataset at each index into a individual lists of each information position
+    Extract information from a dataset at each index into individual lists of each information position
     Parameters
     ----------
@@ -31,36 +31,31 @@ def read_dataset(dataset: Dataset) -> list[list[Any]]:
     Examples
     --------
     >>> import numpy as np
-    >>> data = np.ones((10, 3, 3))
+    >>> data = np.ones((10, 1, 3, 3))
     >>> labels = np.ones((10,))
     >>> class ICDataset:
     ...     def __init__(self, data, labels):
     ...         self.data = data
     ...         self.labels = labels
+    ...
     ...     def __getitem__(self, idx):
     ...         return self.data[idx], self.labels[idx]
     >>> ds = ICDataset(data, labels)
     >>> result = read_dataset(ds)
-    >>> assert len(result) == 2
-    True
-    >>> assert result[0].shape == (10, 3, 3)  # 10 3x3 images
-    True
-    >>> assert result[1].shape == (10,)  # 10 labels
-    True
+    >>> len(result)  # images and labels
+    2
+    >>> np.asarray(result[0]).shape  # images
+    (10, 1, 3, 3)
+    >>> np.asarray(result[1]).shape  # labels
+    (10,)
     """
-    ddict: dict[int, list] = defaultdict(list)
+    ddict: dict[int, list[Any]] = defaultdict(list[Any])
     for data in dataset:
-        # Convert to tuple if single return (e.g. images only)
-        if not isinstance(data, tuple):
-            data = (data,)
-        for i, d in enumerate(data):
+        for i, d in enumerate(data if isinstance(data, tuple) else (data,)):
             ddict[i].append(d)
     return list(ddict.values())

dataeval 0.70.0__py3-none-any.whl → 0.70.1__py3-none-any.whl

dataeval 0.70.0py3-none-any.whl → 0.70.1py3-none-any.whl