PyPI - dataeval - Versions diffs - 0.86.2__py3-none-any.whl → 0.86.4__py3-none-any.whl - Mend

dataeval 0.86.2py3-none-any.whl → 0.86.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

dataeval/__init__.py +1 -1
dataeval/data/__init__.py +0 -2
dataeval/data/_images.py +3 -1
dataeval/data/_metadata.py +40 -63
dataeval/data/selections/_classfilter.py +2 -2
dataeval/metadata/_distance.py +1 -1
dataeval/metadata/_utils.py +4 -2
dataeval/metrics/bias/_balance.py +6 -5
dataeval/metrics/bias/_parity.py +2 -1
dataeval/metrics/stats/_labelstats.py +24 -32
dataeval/outputs/_base.py +1 -1
dataeval/outputs/_bias.py +21 -18
dataeval/outputs/_estimators.py +2 -1
dataeval/outputs/_linters.py +18 -18
dataeval/outputs/_stats.py +20 -20
dataeval/outputs/_utils.py +3 -2
dataeval/outputs/_workflows.py +9 -7
dataeval/typing.py +4 -4
dataeval/utils/_plot.py +10 -10
{dataeval-0.86.2.dist-info → dataeval-0.86.4.dist-info}/METADATA +1 -1
{dataeval-0.86.2.dist-info → dataeval-0.86.4.dist-info}/RECORD +23 -24
dataeval/data/_targets.py +0 -89
{dataeval-0.86.2.dist-info → dataeval-0.86.4.dist-info}/LICENSE.txt +0 -0
{dataeval-0.86.2.dist-info → dataeval-0.86.4.dist-info}/WHEEL +0 -0

dataeval/__init__.py CHANGED Viewed

@@ -8,7 +8,7 @@ shifts that impact performance of deployed models.
 from __future__ import annotations
 __all__ = ["config", "detectors", "log", "metrics", "typing", "utils", "workflows"]
-__version__ = "0.86.2"
+__version__ = "0.86.4"
 import logging

dataeval/data/__init__.py CHANGED Viewed

@@ -6,7 +6,6 @@ __all__ = [
     "Metadata",
     "Select",
     "SplitDatasetOutput",
-    "Targets",
     "split_dataset",
 ]
@@ -15,5 +14,4 @@ from dataeval.data._images import Images
 from dataeval.data._metadata import Metadata
 from dataeval.data._selection import Select
 from dataeval.data._split import split_dataset
-from dataeval.data._targets import Targets
 from dataeval.outputs._utils import SplitDatasetOutput

dataeval/data/_images.py CHANGED Viewed

@@ -4,6 +4,8 @@ __all__ = []
 from typing import TYPE_CHECKING, Any, Generic, Iterator, Sequence, TypeVar, cast, overload
+import numpy as np
 from dataeval.typing import Array, ArrayLike, Dataset
 from dataeval.utils._array import as_numpy, channels_first_to_last
@@ -58,7 +60,7 @@ class Images(Generic[T]):
         num_images = len(indices)
         num_rows = (num_images + images_per_row - 1) // images_per_row
         fig, axes = plt.subplots(num_rows, images_per_row, figsize=figsize)
-        for i, ax in enumerate(axes.flatten()):
+        for i, ax in enumerate(np.asarray(axes).flatten()):
             image = channels_first_to_last(as_numpy(self[i]))
             ax.imshow(image)
             ax.axis("off")

dataeval/data/_metadata.py CHANGED Viewed

@@ -4,7 +4,7 @@ __all__ = []
 import warnings
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Iterable, Literal, Mapping, Sequence, Sized
+from typing import Any, Iterable, Literal, Mapping, Sequence
 import numpy as np
 import polars as pl
@@ -19,10 +19,9 @@ from dataeval.utils._array import as_numpy
 from dataeval.utils._bin import bin_data, digitize_data
 from dataeval.utils.data.metadata import merge
-if TYPE_CHECKING:
-    from dataeval.data import Targets
-else:
-    from dataeval.data._targets import Targets
+def _binned(name: str) -> str:
+    return f"{name}[]"
 @dataclass
@@ -51,20 +50,20 @@ class Metadata:
     def __init__(
         self,
-        dataset: AnnotatedDataset[tuple[Any, Any, dict[str, Any]]],
+        dataset: AnnotatedDataset[tuple[Any, Any, Mapping[str, Any]]],
         *,
         continuous_factor_bins: Mapping[str, int | Sequence[float]] | None = None,
         auto_bin_method: Literal["uniform_width", "uniform_count", "clusters"] = "uniform_width",
         exclude: Sequence[str] | None = None,
         include: Sequence[str] | None = None,
     ) -> None:
-        self._targets: Targets
         self._class_labels: NDArray[np.intp]
         self._class_names: list[str]
         self._image_indices: NDArray[np.intp]
         self._factors: dict[str, FactorInfo]
         self._dropped_factors: dict[str, list[str]]
         self._dataframe: pl.DataFrame
+        self._raw: Sequence[Mapping[str, Any]]
         self._is_structured = False
         self._is_binned = False
@@ -80,13 +79,7 @@ class Metadata:
         self._include = set(include or ())
     @property
-    def targets(self) -> Targets:
-        """Target information for the dataset."""
-        self._structure()
-        return self._targets
-    @property
-    def raw(self) -> list[dict[str, Any]]:
+    def raw(self) -> Sequence[Mapping[str, Any]]:
         """The raw list of metadata dictionaries for the dataset."""
         self._structure()
         return self._raw
@@ -146,7 +139,7 @@ class Metadata:
         return self._dataframe
     @property
-    def dropped_factors(self) -> dict[str, list[str]]:
+    def dropped_factors(self) -> Mapping[str, Sequence[str]]:
         """Factors that were dropped during preprocessing and the reasons why they were dropped."""
         self._structure()
         return self._dropped_factors
@@ -165,16 +158,16 @@ class Metadata:
         )
     @property
-    def factor_names(self) -> list[str]:
+    def factor_names(self) -> Sequence[str]:
         """Factor names of the metadata."""
         self._structure()
-        return list(self._factors)
+        return list(filter(self._filter, self._factors))
     @property
-    def factor_info(self) -> dict[str, FactorInfo]:
+    def factor_info(self) -> Mapping[str, FactorInfo]:
         """Factor types of the metadata."""
         self._bin()
-        return self._factors
+        return dict(filter(self._filter, self._factors.items()))
     @property
     def factor_data(self) -> NDArray[Any]:
@@ -192,7 +185,7 @@ class Metadata:
         return self._class_labels
     @property
-    def class_names(self) -> list[str]:
+    def class_names(self) -> Sequence[str]:
         """Class names as a list of strings."""
         self._structure()
         return self._class_names
@@ -206,13 +199,17 @@ class Metadata:
     @property
     def image_count(self) -> int:
         self._bin()
-        return int(self._image_indices.max() + 1)
+        return 0 if self._image_indices.size == 0 else int(self._image_indices.max() + 1)
+    def _filter(self, factor: str | tuple[str, Any]) -> bool:
+        factor = factor[0] if isinstance(factor, tuple) else factor
+        return factor in self.include if self.include else factor not in self.exclude
     def _reset_bins(self, cols: Iterable[str] | None = None) -> None:
         if self._is_binned:
             columns = self._dataframe.columns
-            for col in (col for col in cols or columns if f"{col}[|]" in columns):
-                self._dataframe.drop_in_place(f"{col}[|]")
+            for col in (col for col in cols or columns if _binned(col) in columns):
+                self._dataframe.drop_in_place(_binned(col))
                 self._factors[col] = FactorInfo()
             self._is_binned = False
@@ -220,7 +217,7 @@ class Metadata:
         if self._is_structured:
             return
-        raw: list[dict[str, Any]] = []
+        raw: Sequence[Mapping[str, Any]] = []
         labels = []
         bboxes = []
@@ -255,6 +252,14 @@ class Metadata:
         bboxes = as_numpy(bboxes).astype(np.float32) if is_od else None
         srcidx = as_numpy(srcidx).astype(np.intp) if is_od else None
+        index2label = self._dataset.metadata.get("index2label", {i: str(i) for i in np.unique(labels)})
+        targets_per_image = None if srcidx is None else np.unique(srcidx, return_counts=True)[1].tolist()
+        merged = merge(raw, return_dropped=True, ignore_lists=False, targets_per_image=targets_per_image)
+        reserved = ["image_index", "class_label", "score", "box"]
+        factor_dict = {f"metadata_{k}" if k in reserved else k: v for k, v in merged[0].items() if k != "_image_index"}
         target_dict = {
             "image_index": srcidx if srcidx is not None else np.arange(len(labels)),
             "class_label": labels,
@@ -262,20 +267,11 @@ class Metadata:
             "box": bboxes if bboxes is not None else [None] * len(labels),
         }
-        self._targets = Targets(labels, scores, bboxes, srcidx)
         self._raw = raw
-        index2label = self._dataset.metadata.get("index2label", {})
+        self._index2label = index2label
         self._class_labels = labels
-        self._class_names = [index2label.get(i, str(i)) for i in np.unique(self._class_labels)]
+        self._class_names = list(index2label.values())
         self._image_indices = target_dict["image_index"]
-        targets_per_image = None if srcidx is None else np.unique(srcidx, return_counts=True)[1].tolist()
-        merged = merge(raw, return_dropped=True, ignore_lists=False, targets_per_image=targets_per_image)
-        reserved = ["image_index", "class_label", "score", "box"]
-        factor_dict = {f"metadata_{k}" if k in reserved else k: v for k, v in merged[0].items() if k != "_image_index"}
         self._factors = dict.fromkeys(factor_dict, FactorInfo())
         self._dataframe = pl.DataFrame({**target_dict, **factor_dict})
         self._dropped_factors = merged[1]
@@ -302,10 +298,10 @@ class Metadata:
             )
         column_set = set(df.columns)
-        for col in (col for col in self.factor_names if f"{col}[|]" not in column_set):
+        for col in (col for col in self.factor_names if _binned(col) not in column_set):
             # Get data as numpy array for processing
             data = df[col].to_numpy()
-            col_dz = f"{col}[|]"
+            col_dz = _binned(col)
             if col in factor_bins:
                 # User provided binning
                 bins = factor_bins[col]
@@ -332,31 +328,14 @@ class Metadata:
                     df = df.with_columns(pl.Series(name=col_dz, values=binned_data.astype(np.int64)))
                     factor_info[col] = FactorInfo("continuous", col_dz)
                 else:
-                    factor_info[col] = FactorInfo("discrete", col_dz)
+                    factor_info[col] = FactorInfo("discrete", col)
         # Store the results
         self._dataframe = df
         self._factors.update(factor_info)
         self._is_binned = True
-    def get_factors_by_type(self, factor_type: Literal["categorical", "continuous", "discrete"]) -> list[str]:
-        """
-        Get the names of factors of a specific type.
-        Parameters
-        ----------
-        factor_type : Literal["categorical", "continuous", "discrete"]
-            The type of factors to retrieve.
-        Returns
-        -------
-        list[str]
-            List of factor names of the specified type.
-        """
-        self._bin()
-        return [name for name, info in self.factor_info.items() if info.factor_type == factor_type]
-    def add_factors(self, factors: Mapping[str, Any]) -> None:
+    def add_factors(self, factors: Mapping[str, Array | Sequence[Any]]) -> None:
         """
         Add additional factors to the metadata.
@@ -365,16 +344,15 @@ class Metadata:
         Parameters
         ----------
-        factors : Mapping[str, ArrayLike]
+        factors : Mapping[str, Array | Sequence[Any]]
             Dictionary of factors to add to the metadata.
         """
         self._structure()
-        targets = len(self.targets.source) if self.targets.source is not None else len(self.targets)
+        targets = len(self.dataframe)
         images = self.image_count
-        lengths = {k: len(v if isinstance(v, Sized) else np.atleast_1d(as_numpy(v))) for k, v in factors.items()}
-        targets_match = all(f == targets for f in lengths.values())
-        images_match = targets_match if images == targets else all(f == images for f in lengths.values())
+        targets_match = all(len(v) == targets for v in factors.values())
+        images_match = targets_match if images == targets else all(len(v) == images for v in factors.values())
         if not targets_match and not images_match:
             raise ValueError(
                 "The lists/arrays in the provided factors have a different length than the current metadata factors."
@@ -382,8 +360,7 @@ class Metadata:
         new_columns = []
         for k, v in factors.items():
-            v = as_numpy(v)
-            data = v if (self.targets.source is None or lengths[k] == targets) else v[self.targets.source]
+            data = as_numpy(v)[self.image_indices]
             new_columns.append(pl.Series(name=k, values=data))
             self._factors[k] = FactorInfo()

dataeval/data/selections/_classfilter.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 __all__ = []
-from typing import Any, Generic, Iterable, Sequence, Sized, TypeVar, cast
+from typing import Any, Generic, Iterable, Mapping, Sequence, Sized, TypeVar, cast
 import numpy as np
 from numpy.typing import NDArray
@@ -92,7 +92,7 @@ class ClassFilterSubSelection(Subselection[Any]):
     def __init__(self, classes: Sequence[int]) -> None:
         self.classes = classes
-    def _filter(self, d: dict[str, Any], mask: NDArray[np.bool_]) -> dict[str, Any]:
+    def _filter(self, d: Mapping[str, Any], mask: NDArray[np.bool_]) -> dict[str, Any]:
         return {k: self._filter(v, mask) if isinstance(v, dict) else _try_mask_object(v, mask) for k, v in d.items()}
     def __call__(self, datum: _TDatum) -> _TDatum:

dataeval/metadata/_distance.py CHANGED Viewed

@@ -81,7 +81,7 @@ def metadata_distance(metadata1: Metadata, metadata2: Metadata) -> MetadataDista
     """
     _compare_keys(metadata1.factor_names, metadata2.factor_names)
-    cont_fnames = metadata1.get_factors_by_type("continuous")
+    cont_fnames = [name for name, info in metadata1.factor_info.items() if info.factor_type == "continuous"]
     if not cont_fnames:
         return MetadataDistanceOutput({})

dataeval/metadata/_utils.py CHANGED Viewed

@@ -1,9 +1,11 @@
 __all__ = []
+from typing import Sequence
 from numpy.typing import NDArray
-def _compare_keys(keys1: list[str], keys2: list[str]) -> None:
+def _compare_keys(keys1: Sequence[str], keys2: Sequence[str]) -> None:
     """
     Raises error when two lists are not equivalent including ordering
@@ -24,7 +26,7 @@ def _compare_keys(keys1: list[str], keys2: list[str]) -> None:
         raise ValueError(f"Metadata keys must be identical, got {keys1} and {keys2}")
-def _validate_factors_and_data(factors: list[str], data: NDArray) -> None:
+def _validate_factors_and_data(factors: Sequence[str], data: NDArray) -> None:
     """
     Raises error when the number of factors and number of rows do not match

dataeval/metrics/bias/_balance.py CHANGED Viewed

@@ -99,9 +99,10 @@ def balance(
     factor_types = {"class_label": "categorical"} | {k: v.factor_type for k, v in metadata.factor_info.items()}
     is_discrete = [factor_type != "continuous" for factor_type in factor_types.values()]
     num_factors = len(factor_types)
+    class_labels = metadata.class_labels
     mi = np.full((num_factors, num_factors), np.nan, dtype=np.float32)
-    data = np.hstack((metadata.class_labels[:, np.newaxis], data))
+    data = np.hstack((class_labels[:, np.newaxis], data))
     for idx, factor_type in enumerate(factor_types.values()):
         if factor_type != "continuous":
@@ -132,12 +133,12 @@ def balance(
     factors = nmi[1:, 1:]
     # assume class is a factor
-    num_classes = len(metadata.class_names)
+    u_classes = np.unique(class_labels)
+    num_classes = len(u_classes)
     classwise_mi = np.full((num_classes, num_factors), np.nan, dtype=np.float32)
     # classwise targets
-    classes = np.unique(metadata.class_labels)
-    tgt_bin = data[:, 0][:, None] == classes
+    tgt_bin = data[:, 0][:, None] == u_classes
     # classification MI for discrete/categorical features
     for idx in range(num_classes):
@@ -157,6 +158,6 @@ def balance(
     classwise = classwise_mi / norm_factor
     # Grabbing factor names for plotting function
-    factor_names = ["class_label"] + metadata.factor_names
+    factor_names = ["class_label"] + list(metadata.factor_names)
     return BalanceOutput(balance, factors, classwise, factor_names, metadata.class_names)

dataeval/metrics/bias/_parity.py CHANGED Viewed

@@ -259,7 +259,8 @@ def parity(metadata: Metadata) -> ParityOutput:
         counts = np.nonzero(contingency_matrix < 5)
         unique_factor_values = np.unique(col_data)
         current_factor_name = metadata.factor_names[i]
-        for int_factor, int_class in zip(counts[0], counts[1]):
+        for _factor, _class in zip(counts[0], counts[1]):
+            int_factor, int_class = int(_factor), int(_class)
             if contingency_matrix[int_factor, int_class] > 0:
                 factor_category = unique_factor_values[int_factor].item()
                 class_name = metadata.class_names[int_class]

dataeval/metrics/stats/_labelstats.py CHANGED Viewed

@@ -2,8 +2,9 @@ from __future__ import annotations
 __all__ = []
-from collections import Counter, defaultdict
-from typing import Any, Mapping, TypeVar
+from typing import Any, TypeVar
+import polars as pl
 from dataeval.data._metadata import Metadata
 from dataeval.outputs import LabelStatsOutput
@@ -13,10 +14,6 @@ from dataeval.typing import AnnotatedDataset
 TValue = TypeVar("TValue")
-def _sort_to_list(d: Mapping[int, TValue]) -> list[TValue]:
-    return [t[1] for t in sorted(d.items())]
 @set_metadata
 def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
     """
@@ -52,39 +49,34 @@ def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
         pig:      2      -      2
     chicken:      5      -      5
     """
-    dataset = Metadata(dataset) if isinstance(dataset, AnnotatedDataset) else dataset
-    label_counts: Counter[int] = Counter()
-    image_counts: Counter[int] = Counter()
-    index_location = defaultdict(list[int])
-    label_per_image: list[int] = []
-    index2label = dict(enumerate(dataset.class_names))
-    for i, target in enumerate(dataset.targets):
-        group = target.labels.tolist()
+    metadata = Metadata(dataset) if isinstance(dataset, AnnotatedDataset) else dataset
+    metadata_df = metadata.dataframe
-        # Count occurrences of each label in all sublists
-        label_counts.update(group)
+    # Count occurrences of each label across all images
+    label_counts_df = metadata_df.group_by("class_label").len()
+    label_counts = dict(zip(label_counts_df["class_label"], label_counts_df["len"]))
-        # Get the number of labels per image
-        label_per_image.append(len(group))
+    # Count unique images per label (how many images contain each label)
+    image_counts_df = metadata_df.select(["image_index", "class_label"]).unique().group_by("class_label").len()
+    image_counts = dict(zip(image_counts_df["class_label"], image_counts_df["len"]))
-        # Create a set of unique items in the current sublist
-        unique_items: set[int] = set(group)
+    # Create index_location mapping (which images contain each label)
+    index_location: dict[int, list[int]] = {}
+    for row in metadata_df.group_by("class_label").agg(pl.col("image_index")).to_dicts():
+        indices = row["image_index"]
+        index_location[row["class_label"]] = sorted(dict.fromkeys(indices)) if isinstance(indices, list) else [indices]
-        # Update image counts and index locations
-        image_counts.update(unique_items)
-        for item in unique_items:
-            index_location[item].append(i)
+    # Count labels per image
+    label_per_image_df = metadata_df.group_by("image_index").agg(pl.len().alias("label_count"))
+    label_per_image = label_per_image_df.sort("image_index")["label_count"].to_list()
     return LabelStatsOutput(
-        label_counts_per_class=_sort_to_list(label_counts),
+        label_counts_per_class=label_counts,
         label_counts_per_image=label_per_image,
-        image_counts_per_class=_sort_to_list(image_counts),
-        image_indices_per_class=_sort_to_list(index_location),
+        image_counts_per_class=image_counts,
+        image_indices_per_class=index_location,
         image_count=len(label_per_image),
-        class_count=len(label_counts),
+        class_count=len(metadata.class_names),
         label_count=sum(label_counts.values()),
-        class_names=list(index2label.values()),
+        class_names=metadata.class_names,
     )

dataeval/outputs/_base.py CHANGED Viewed

@@ -147,7 +147,7 @@ P = ParamSpec("P")
 R = TypeVar("R", bound=GenericOutput)
-def set_metadata(fn: Callable[P, R] | None = None, *, state: list[str] | None = None) -> Callable[P, R]:
+def set_metadata(fn: Callable[P, R] | None = None, *, state: Sequence[str] | None = None) -> Callable[P, R]:
     """Decorator to stamp Output classes with runtime metadata"""
     if fn is None:

dataeval/outputs/_bias.py CHANGED Viewed

@@ -4,7 +4,7 @@ __all__ = []
 import contextlib
 from dataclasses import asdict, dataclass
-from typing import Any, TypeVar
+from typing import Any, Mapping, Sequence, TypeVar
 import numpy as np
 import pandas as pd
@@ -39,7 +39,7 @@ class ToDataFrameMixin:
         This method requires `pandas <https://pandas.pydata.org/>`_ to be installed.
         """
         return pd.DataFrame(
-            index=self.factor_names,  # type: ignore - list[str] is documented as acceptable index type
+            index=self.factor_names,  # type: ignore - Sequence[str] is documented as acceptable index type
             data={
                 "score": self.score.round(2),
                 "p-value": self.p_value.round(2),
@@ -58,7 +58,7 @@ class ParityOutput(ToDataFrameMixin, Output):
         chi-squared score(s) of the test
     p_value : NDArray[np.float64]
         p-value(s) of the test
-    factor_names : list[str]
+    factor_names : Sequence[str]
         Names of each metadata factor
     insufficient_data: dict
         Dictionary of metadata factors with less than 5 class occurrences per value
@@ -66,8 +66,8 @@ class ParityOutput(ToDataFrameMixin, Output):
     score: NDArray[np.float64]
     p_value: NDArray[np.float64]
-    factor_names: list[str]
-    insufficient_data: dict[str, dict[int, dict[str, int]]]
+    factor_names: Sequence[str]
+    insufficient_data: Mapping[str, Mapping[int, Mapping[str, int]]]
 @dataclass(frozen=True)
@@ -145,12 +145,15 @@ class CoverageOutput(Output):
         cols = min(3, num_images)
         fig, axs = plt.subplots(rows, cols, figsize=(3 * cols, 3 * rows))
-        for image, ax in zip(images[:num_images], axs.flat):
+        # Flatten axes using numpy array explicitly for compatibility
+        axs_flat = np.asarray(axs).flatten()
+        for image, ax in zip(images[:num_images], axs_flat):
             image = channels_first_to_last(as_numpy(image))
             ax.imshow(image)
             ax.axis("off")
-        for ax in axs.flat[num_images:]:
+        for ax in axs_flat[num_images:]:
             ax.axis("off")
         fig.tight_layout()
@@ -187,22 +190,22 @@ class BalanceOutput(Output):
         Estimate of inter/intra-factor mutual information
     classwise : NDArray[np.float64]
         Estimate of mutual information between metadata factors and individual class labels
-    factor_names : list[str]
+    factor_names : Sequence[str]
         Names of each metadata factor
-    class_names : list[str]
+    class_names : Sequence[str]
         List of the class labels present in the dataset
     """
     balance: NDArray[np.float64]
     factors: NDArray[np.float64]
     classwise: NDArray[np.float64]
-    factor_names: list[str]
-    class_names: list[str]
+    factor_names: Sequence[str]
+    class_names: Sequence[str]
     def plot(
         self,
-        row_labels: list[Any] | NDArray[Any] | None = None,
-        col_labels: list[Any] | NDArray[Any] | None = None,
+        row_labels: Sequence[Any] | NDArray[Any] | None = None,
+        col_labels: Sequence[Any] | NDArray[Any] | None = None,
         plot_classwise: bool = False,
     ) -> Figure:
         """
@@ -276,16 +279,16 @@ class DiversityOutput(Output):
         :term:`Diversity` index for classes and factors
     classwise : NDArray[np.double]
         Classwise diversity index [n_class x n_factor]
-    factor_names : list[str]
+    factor_names : Sequence[str]
         Names of each metadata factor
-    class_names : list[str]
+    class_names : Sequence[str]
         Class labels for each value in the dataset
     """
     diversity_index: NDArray[np.double]
     classwise: NDArray[np.double]
-    factor_names: list[str]
-    class_names: list[str]
+    factor_names: Sequence[str]
+    class_names: Sequence[str]
     def plot(
         self,
@@ -333,7 +336,7 @@ class DiversityOutput(Output):
             import matplotlib.pyplot as plt
             fig, ax = plt.subplots(figsize=(8, 8))
-            heat_labels = ["class_labels"] + self.factor_names
+            heat_labels = ["class_labels"] + list(self.factor_names)
             ax.bar(heat_labels, self.diversity_index)
             ax.set_xlabel("Factors")
             plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")

dataeval/outputs/_estimators.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 __all__ = []
 from dataclasses import dataclass
+from typing import Sequence
 import numpy as np
 from numpy.typing import NDArray
@@ -64,7 +65,7 @@ class ClustererOutput(Output):
         """
         return np.nonzero(self.clusters == -1)[0]
-    def find_duplicates(self) -> tuple[list[list[int]], list[list[int]]]:
+    def find_duplicates(self) -> tuple[Sequence[Sequence[int]], Sequence[Sequence[int]]]:
         """
         Finds duplicate and near duplicate data based on cluster average distance

dataeval/outputs/_linters.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 __all__ = []
 from dataclasses import dataclass
-from typing import Generic, TypeVar, Union
+from typing import Generic, Mapping, Sequence, TypeVar, Union
 import pandas as pd
 from typing_extensions import TypeAlias
@@ -11,13 +11,13 @@ from typing_extensions import TypeAlias
 from dataeval.outputs._base import Output
 from dataeval.outputs._stats import DimensionStatsOutput, LabelStatsOutput, PixelStatsOutput, VisualStatsOutput
-DuplicateGroup: TypeAlias = list[int]
-DatasetDuplicateGroupMap: TypeAlias = dict[int, DuplicateGroup]
+DuplicateGroup: TypeAlias = Sequence[int]
+DatasetDuplicateGroupMap: TypeAlias = Mapping[int, DuplicateGroup]
 TIndexCollection = TypeVar("TIndexCollection", DuplicateGroup, DatasetDuplicateGroupMap)
-IndexIssueMap: TypeAlias = dict[int, dict[str, float]]
+IndexIssueMap: TypeAlias = Mapping[int, Mapping[str, float]]
 OutlierStatsOutput: TypeAlias = Union[DimensionStatsOutput, PixelStatsOutput, VisualStatsOutput]
-TIndexIssueMap = TypeVar("TIndexIssueMap", IndexIssueMap, list[IndexIssueMap])
+TIndexIssueMap = TypeVar("TIndexIssueMap", IndexIssueMap, Sequence[IndexIssueMap])
 @dataclass(frozen=True)
@@ -27,9 +27,9 @@ class DuplicatesOutput(Output, Generic[TIndexCollection]):
     Attributes
     ----------
-    exact : list[list[int] | dict[int, list[int]]]
+    exact : Sequence[Sequence[int] | Mapping[int, Sequence[int]]]
         Indices of images that are exact matches
-    near: list[list[int] | dict[int, list[int]]]
+    near: Sequence[Sequence[int] | Mapping[int, Sequence[int]]]
         Indices of images that are near matches
     Notes
@@ -39,13 +39,13 @@ class DuplicatesOutput(Output, Generic[TIndexCollection]):
       index of the dataset, and the value is the list index groups from that dataset.
     """
-    exact: list[TIndexCollection]
-    near: list[TIndexCollection]
+    exact: Sequence[TIndexCollection]
+    near: Sequence[TIndexCollection]
 def _reorganize_by_class_and_metric(
     result: IndexIssueMap, lstats: LabelStatsOutput
-) -> tuple[dict[str, list[int]], dict[str, dict[str, int]]]:
+) -> tuple[Mapping[str, Sequence[int]], Mapping[str, Mapping[str, int]]]:
     """Flip result from grouping by image to grouping by class and metric"""
     metrics: dict[str, list[int]] = {}
     class_wise: dict[str, dict[str, int]] = {label: {} for label in lstats.class_names}
@@ -54,14 +54,14 @@ def _reorganize_by_class_and_metric(
     for img, group in result.items():
         for extreme in group:
             metrics.setdefault(extreme, []).append(img)
-            for i, images in enumerate(lstats.image_indices_per_class):
+            for i, images in lstats.image_indices_per_class.items():
                 if img in images:
                     class_wise[lstats.class_names[i]][extreme] = class_wise[lstats.class_names[i]].get(extreme, 0) + 1
     return metrics, class_wise
-def _create_table(metrics: dict[str, list[int]], class_wise: dict[str, dict[str, int]]) -> list[str]:
+def _create_table(metrics: Mapping[str, Sequence[int]], class_wise: Mapping[str, Mapping[str, int]]) -> Sequence[str]:
     """Create table for displaying the results"""
     max_class_length = max(len(str(label)) for label in class_wise) + 2
     max_total = max(len(metrics[group]) for group in metrics) + 2
@@ -71,7 +71,7 @@ def _create_table(metrics: dict[str, list[int]], class_wise: dict[str, dict[str,
         + [f"{group:^{max(5, len(str(group))) + 2}}" for group in sorted(metrics.keys())]
         + [f"{'Total':<{max_total}}"]
     )
-    table_rows: list[str] = []
+    table_rows: Sequence[str] = []
     for class_cat, results in class_wise.items():
         table_value = [f"{class_cat:>{max_class_length}}"]
@@ -86,7 +86,7 @@ def _create_table(metrics: dict[str, list[int]], class_wise: dict[str, dict[str,
     return [table_header] + table_rows
-def _create_pandas_dataframe(class_wise: dict[str, dict[str, int]]) -> list[dict[str, str | int]]:
+def _create_pandas_dataframe(class_wise: Mapping[str, Mapping[str, int]]) -> Sequence[Mapping[str, str | int]]:
     """Create data for pandas dataframe"""
     data = []
     for label, metrics_dict in class_wise.items():
@@ -105,7 +105,7 @@ class OutliersOutput(Output, Generic[TIndexIssueMap]):
     Attributes
     ----------
-    issues : dict[int, dict[str, float]] | list[dict[int, dict[str, float]]]
+    issues : Mapping[int, Mapping[str, float]] | Sequence[Mapping[int, Mapping[str, float]]]
         Indices of image Outliers with their associated issue type and calculated values.
     - For a single dataset, a dictionary containing the indices of outliers and
@@ -117,7 +117,7 @@ class OutliersOutput(Output, Generic[TIndexIssueMap]):
     issues: TIndexIssueMap
     def __len__(self) -> int:
-        if isinstance(self.issues, dict):
+        if isinstance(self.issues, Mapping):
             return len(self.issues)
         return sum(len(d) for d in self.issues)
@@ -134,7 +134,7 @@ class OutliersOutput(Output, Generic[TIndexIssueMap]):
         -------
         str
         """
-        if isinstance(self.issues, dict):
+        if isinstance(self.issues, Mapping):
             metrics, classwise = _reorganize_by_class_and_metric(self.issues, labelstats)
             listed_table = _create_table(metrics, classwise)
             table = "\n".join(listed_table)
@@ -165,7 +165,7 @@ class OutliersOutput(Output, Generic[TIndexIssueMap]):
         -----
         This method requires `pandas <https://pandas.pydata.org/>`_ to be installed.
         """
-        if isinstance(self.issues, dict):
+        if isinstance(self.issues, Mapping):
             _, classwise = _reorganize_by_class_and_metric(self.issues, labelstats)
             data = _create_pandas_dataframe(classwise)
             df = pd.DataFrame(data)

dataeval/outputs/_stats.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 __all__ = []
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Iterable, NamedTuple, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Any, Iterable, Mapping, NamedTuple, Optional, Sequence, Union
 import numpy as np
 import pandas as pd
@@ -61,7 +61,7 @@ class BaseStatsOutput(Output):
         The number of detected objects in each image
     """
-    source_index: list[SourceIndex]
+    source_index: Sequence[SourceIndex]
     object_count: NDArray[np.uint16]
     image_count: int
@@ -80,7 +80,7 @@ class BaseStatsOutput(Output):
         self,
         channel_index: OptionalRange,
         channel_count: OptionalRange = None,
-    ) -> list[bool]:
+    ) -> Sequence[bool]:
         """
         Boolean mask for results filtered to specified channel index and optionally the count
         of the channels per image.
@@ -92,8 +92,8 @@ class BaseStatsOutput(Output):
         channel_count : int | Iterable[int] | None
             Optional count(s) of channels to filter for
         """
-        mask: list[bool] = []
-        cur_mask: list[bool] = []
+        mask: Sequence[bool] = []
+        cur_mask: Sequence[bool] = []
         cur_image = 0
         cur_max_channel = 0
         for source_index in list(self.source_index) + [None]:
@@ -113,7 +113,7 @@ class BaseStatsOutput(Output):
     def _get_channels(
         self, channel_limit: int | None = None, channel_index: int | Iterable[int] | None = None
-    ) -> tuple[int, list[bool] | None]:
+    ) -> tuple[int, Sequence[bool] | None]:
         source_index = self.data()[SOURCE_INDEX]
         raw_channels = int(max([si.channel or 0 for si in source_index])) + 1
         if isinstance(channel_index, int):
@@ -140,7 +140,7 @@ class BaseStatsOutput(Output):
         self,
         filter: str | Sequence[str] | None = None,  # noqa: A002
         exclude_constant: bool = False,
-    ) -> dict[str, NDArray[Any]]:
+    ) -> Mapping[str, NDArray[Any]]:
         """
         Returns all 1-dimensional data as a dictionary of numpy arrays.
@@ -153,7 +153,7 @@ class BaseStatsOutput(Output):
         Returns
         -------
-        dict[str, NDArray[Any]]
+        Mapping[str, NDArray[Any]]
         """
         filter_ = [filter] if isinstance(filter, str) else filter
         return {
@@ -253,8 +253,8 @@ class HashStatsOutput(BaseStatsOutput):
         :term:`Perception-based Hash` of the images as a hex string
     """
-    xxhash: list[str]
-    pchash: list[str]
+    xxhash: Sequence[str]
+    pchash: Sequence[str]
 @dataclass(frozen=True)
@@ -264,15 +264,15 @@ class LabelStatsOutput(Output):
     Attributes
     ----------
-    label_counts_per_class : dict[int, int]
+    label_counts_per_class : Mapping[int, int]
         Dictionary whose keys are the different label classes and
         values are total counts of each class
-    label_counts_per_image : list[int]
+    label_counts_per_image : Sequence[int]
         Number of labels per image
-    image_counts_per_class : dict[int, int]
+    image_counts_per_class : Mapping[int, int]
         Dictionary whose keys are the different label classes and
         values are total counts of each image the class is present in
-    image_indices_per_class : dict[int, list]
+    image_indices_per_class : Mapping[int, Sequence[int]]
         Dictionary whose keys are the different label classes and
         values are lists containing the images that have that label
     image_count : int
@@ -281,17 +281,17 @@ class LabelStatsOutput(Output):
         Total number of classes present
     label_count : int
         Total number of labels present
-    class_names : list[str]
+    class_names : Sequence[str]
     """
-    label_counts_per_class: list[int]
-    label_counts_per_image: list[int]
-    image_counts_per_class: list[int]
-    image_indices_per_class: list[list[int]]
+    label_counts_per_class: Mapping[int, int]
+    label_counts_per_image: Sequence[int]
+    image_counts_per_class: Mapping[int, int]
+    image_indices_per_class: Mapping[int, Sequence[int]]
     image_count: int
     class_count: int
     label_count: int
-    class_names: list[str]
+    class_names: Sequence[str]
     def to_table(self) -> str:
         """

dataeval/outputs/_utils.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 __all__ = []
 from dataclasses import dataclass
+from typing import Sequence
 import numpy as np
 from numpy.typing import NDArray
@@ -36,9 +37,9 @@ class SplitDatasetOutput(Output):
     ----------
     test: NDArray[np.intp]
         Indices for the test set
-    folds: list[TrainValSplit]
+    folds: Sequence[TrainValSplit]
         List of train and validation split indices
     """
     test: NDArray[np.intp]
-    folds: list[TrainValSplit]
+    folds: Sequence[TrainValSplit]

dataeval/outputs/_workflows.py CHANGED Viewed

@@ -177,7 +177,9 @@ def calc_params(p_i: NDArray[Any], n_i: NDArray[Any], niter: int) -> NDArray[Any
     return res.x
-def get_curve_params(measures: dict[str, NDArray[Any]], ranges: NDArray[Any], niter: int) -> dict[str, NDArray[Any]]:
+def get_curve_params(
+    measures: Mapping[str, NDArray[Any]], ranges: NDArray[Any], niter: int
+) -> Mapping[str, NDArray[Any]]:
     """Calculates and aggregates parameters for both single and multi-class metrics"""
     output = {}
     for name, measure in measures.items():
@@ -208,7 +210,7 @@ class SufficiencyOutput(Output):
     """
     steps: NDArray[np.uint32]
-    measures: dict[str, NDArray[np.float64]]
+    measures: Mapping[str, NDArray[np.float64]]
     n_iter: int = 1000
     def __post_init__(self) -> None:
@@ -220,7 +222,7 @@ class SufficiencyOutput(Output):
         self._params = None
     @property
-    def params(self) -> dict[str, NDArray[Any]]:
+    def params(self) -> Mapping[str, NDArray[Any]]:
         if self._params is None:
             self._params = {}
         if self.n_iter not in self._params:
@@ -270,7 +272,7 @@ class SufficiencyOutput(Output):
         proj._params = self._params
         return proj
-    def plot(self, class_names: Sequence[str] | None = None) -> list[Figure]:
+    def plot(self, class_names: Sequence[str] | None = None) -> Sequence[Figure]:
         """
         Plotting function for data :term:`sufficience<Sufficiency>` tasks.
@@ -281,7 +283,7 @@ class SufficiencyOutput(Output):
         Returns
         -------
-        list[Figure]
+        Sequence[Figure]
             List of Figures for each measure
         Raises
@@ -325,7 +327,7 @@ class SufficiencyOutput(Output):
     def inv_project(
         self, targets: Mapping[str, ArrayLike], n_iter: int | None = None
-    ) -> dict[str, NDArray[np.float64]]:
+    ) -> Mapping[str, NDArray[np.float64]]:
         """
         Calculate training samples needed to achieve target model metric values.
@@ -339,7 +341,7 @@ class SufficiencyOutput(Output):
         Returns
         -------
-        dict[str, NDArray]
+        Mapping[str, NDArray]
             List of the number of training samples needed to achieve each
             corresponding entry in targets
         """

dataeval/typing.py CHANGED Viewed

@@ -21,7 +21,7 @@ __all__ = [
 import sys
-from typing import Any, Generic, Iterator, Protocol, TypedDict, TypeVar, runtime_checkable
+from typing import Any, Generic, Iterator, Mapping, Protocol, TypedDict, TypeVar, runtime_checkable
 import numpy.typing
 from typing_extensions import NotRequired, ReadOnly, Required
@@ -159,7 +159,7 @@ class AnnotatedDataset(Dataset[_T_co], Generic[_T_co], Protocol):
 # ========== IMAGE CLASSIFICATION DATASETS ==========
-ImageClassificationDatum: TypeAlias = tuple[ArrayLike, ArrayLike, dict[str, Any]]
+ImageClassificationDatum: TypeAlias = tuple[ArrayLike, ArrayLike, Mapping[str, Any]]
 """
 Type alias for an image classification datum tuple.
@@ -199,7 +199,7 @@ class ObjectDetectionTarget(Protocol):
     def scores(self) -> ArrayLike: ...
-ObjectDetectionDatum: TypeAlias = tuple[ArrayLike, ObjectDetectionTarget, dict[str, Any]]
+ObjectDetectionDatum: TypeAlias = tuple[ArrayLike, ObjectDetectionTarget, Mapping[str, Any]]
 """
 Type alias for an object detection datum tuple.
@@ -240,7 +240,7 @@ class SegmentationTarget(Protocol):
     def scores(self) -> ArrayLike: ...
-SegmentationDatum: TypeAlias = tuple[ArrayLike, SegmentationTarget, dict[str, Any]]
+SegmentationDatum: TypeAlias = tuple[ArrayLike, SegmentationTarget, Mapping[str, Any]]
 """
 Type alias for an image classification datum tuple.

dataeval/utils/_plot.py CHANGED Viewed

@@ -4,7 +4,7 @@ __all__ = []
 import contextlib
 import math
-from typing import Any
+from typing import Any, Mapping, Sequence
 import numpy as np
@@ -134,7 +134,7 @@ def format_text(*args: str) -> str:
 def histogram_plot(
-    data_dict: dict[str, Any],
+    data_dict: Mapping[str, Any],
     log: bool = True,
     xlabel: str = "values",
     ylabel: str = "counts",
@@ -164,9 +164,9 @@ def histogram_plot(
     rows = math.ceil(num_metrics / 3)
     cols = min(num_metrics, 3)
     fig, axs = plt.subplots(rows, 3, figsize=(cols * 3 + 1, rows * 3))
+    axs_flat = np.asarray(axs).flatten()
     for ax, metric in zip(
-        axs.flat,
+        axs_flat,
         data_dict,
     ):
         # Plot the histogram for the chosen metric
@@ -177,7 +177,7 @@ def histogram_plot(
         ax.set_ylabel(ylabel)
         ax.set_xlabel(xlabel)
-    for ax in axs.flat[num_metrics:]:
+    for ax in axs_flat[num_metrics:]:
         ax.axis("off")
         ax.set_visible(False)
@@ -186,10 +186,10 @@ def histogram_plot(
 def channel_histogram_plot(
-    data_dict: dict[str, Any],
+    data_dict: Mapping[str, Any],
     log: bool = True,
     max_channels: int = 3,
-    ch_mask: list[bool] | None = None,
+    ch_mask: Sequence[bool] | None = None,
     xlabel: str = "values",
     ylabel: str = "counts",
 ) -> Figure:
@@ -222,9 +222,9 @@ def channel_histogram_plot(
     rows = math.ceil(num_metrics / 3)
     cols = min(num_metrics, 3)
     fig, axs = plt.subplots(rows, 3, figsize=(cols * 3 + 1, rows * 3))
+    axs_flat = np.asarray(axs).flatten()
     for ax, metric in zip(
-        axs.flat,
+        axs_flat,
         data_keys,
     ):
         # Plot the histogram for the chosen metric
@@ -246,7 +246,7 @@ def channel_histogram_plot(
         ax.set_ylabel(ylabel)
         ax.set_xlabel(xlabel)
-    for ax in axs.flat[num_metrics:]:
+    for ax in axs_flat[num_metrics:]:
         ax.axis("off")
         ax.set_visible(False)

{dataeval-0.86.2.dist-info → dataeval-0.86.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dataeval
-Version: 0.86.2
+Version: 0.86.4
 Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
 Home-page: https://dataeval.ai/
 License: MIT

{dataeval-0.86.2.dist-info → dataeval-0.86.4.dist-info}/RECORD RENAMED Viewed

@@ -1,16 +1,15 @@
-dataeval/__init__.py,sha256=7Q_nGiQN6g8Le7VtOsemNgn5mC_6gR3NhazolD_arSQ,1636
+dataeval/__init__.py,sha256=6gfYCGo82QKKO58jQSma27Mr-R316vmCDbTjXRh5B7o,1636
 dataeval/_log.py,sha256=C7AGkIRzymvYJ0LQXtnShiy3i5Xrp8T58JzIHHguk_Q,365
 dataeval/config.py,sha256=hjad0TK1UmaKQlUuxqxt64_OAUqZkHjicBf06cvTyrQ,4082
-dataeval/data/__init__.py,sha256=qNnRRiVP_sLthkkHpUrMgI_r8dQK-cC-xoGrrjQeRKc,544
+dataeval/data/__init__.py,sha256=wzQ6uUFLNB3VJR0a2QnRBYwEmwXT93q0WpHu7FmFW1E,486
 dataeval/data/_embeddings.py,sha256=PFjpdV9bfusCB4taTIYSzx1hP8nJb_KCkZTN8kMw-Hs,12885
-dataeval/data/_images.py,sha256=3d4Cv-xg5z6_LVtw1eL_QdFwzbDI1cwvPNQblkrMEMk,2622
-dataeval/data/_metadata.py,sha256=GzXtecy7EvrB3ZJJbaCQjmpsdHXRL5788ckKbzeI54w,14994
+dataeval/data/_images.py,sha256=Rc_59CuU4zfN7Xm7an1XUx8ZghQg6a56VJWMZD9edRw,2654
+dataeval/data/_metadata.py,sha256=5pND6IZ5KeEGrhCDiBVxhU_BXWU0okBxt8oNkZ9a2_M,14309
 dataeval/data/_selection.py,sha256=r06xeiyK8nTWPLyItkoPQRWZI1i6LATSue_cuEbCdc4,4463
 dataeval/data/_split.py,sha256=nQABR05vxil2Qx7-uX4Fm0_DWpibskBGDJOYj_b1u3I,16737
-dataeval/data/_targets.py,sha256=pXrHBwT4Pi8DauaOxDVnIMwowWWlXuvSb07ShW7O2zk,3119
 dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
 dataeval/data/selections/_classbalance.py,sha256=7v8ApoL3X8eCZ6fGDNTehE_bZ1loaP3TlhsJLaICVWg,1458
-dataeval/data/selections/_classfilter.py,sha256=KQOmcTIcV3ZPWuiwqOmwX0SB5I2qlbxLSlwINUZWOjU,4339
+dataeval/data/selections/_classfilter.py,sha256=bXfoYnWnAfUGsAQSlLufJeF2PfgRKekFHfBx8hv1r3w,4351
 dataeval/data/selections/_indices.py,sha256=RFsR9z10aM3N0gJSfKrukFpi-LkiQGXoOwXhmOQ5cpg,630
 dataeval/data/selections/_limit.py,sha256=JG4GmEiNKt3sk4PbOUbBnGGzNlyz72H-kQrt8COMm4Y,512
 dataeval/data/selections/_prioritize.py,sha256=4dGUvgR7m6NGzzPU0N_bw0Xhujo8b72Wo8L4PGHbvBo,11233
@@ -39,16 +38,16 @@ dataeval/detectors/ood/ae.py,sha256=fTrUfFxv6xUqzKpwMC8rW3JrizA16M_bgzqLuBKMrS0,
 dataeval/detectors/ood/base.py,sha256=9b-Ljznf0lB1SXF4F_Aj3eJ4Y3ijGEDPMjucUsWOGJM,3051
 dataeval/detectors/ood/mixin.py,sha256=0_o-1HPvgf3-Lf1MSOIfjj5UB8LTLEBGYtJJfyCCzwc,5431
 dataeval/metadata/__init__.py,sha256=XDDmJbOZBNM6pL0r6Nbu6oMRoyAh22IDkPYGndNlkZU,316
-dataeval/metadata/_distance.py,sha256=AABrGoQyD13z9Fqlz3NyfX0Iow_vjBwAugIv6OSRTTE,4187
+dataeval/metadata/_distance.py,sha256=MbXM9idsooNWnGLaTKg8j4ZqavUeJUjuW7EPW3-UQyg,4234
 dataeval/metadata/_ood.py,sha256=lNPHouj_9WfM_uTtsaiRaPn46RcVy3YebD1c32vDj-c,8981
-dataeval/metadata/_utils.py,sha256=r8qBJT83RblobD5W5zyTVi6vYi51Dwkqswizdbzss-M,1169
+dataeval/metadata/_utils.py,sha256=BcGoYVfA4AkAWpInY5txOc3QBpsGf6cnnUAsHOQTJAE,1210
 dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
 dataeval/metrics/bias/__init__.py,sha256=329S1_3WnWqeU4-qVcbe0fMy4lDrj9uKslWHIQf93yg,839
-dataeval/metrics/bias/_balance.py,sha256=FcMOA3ge-sQ-0Id2E0K_6hTjNAV3ejJhlB5r4lxlJWI,5519
+dataeval/metrics/bias/_balance.py,sha256=fREtoMLUZPOf_ivqNKwij6oPiKMTk02ECO5rWURf3KY,5541
 dataeval/metrics/bias/_completeness.py,sha256=BysXU2Jpw33n5dl3acJFEqF3mFGiJLsfG4n5Q2fkTaY,4608
 dataeval/metrics/bias/_coverage.py,sha256=PeUoOiaghUEdn6Ov8z2-am7-fnBVIPcFbJK7Ty5JObA,3647
 dataeval/metrics/bias/_diversity.py,sha256=25udDKmel9IjeVT5nM4dOa1apda66QdRxBc922yuUvI,5830
-dataeval/metrics/bias/_parity.py,sha256=OHUSHPOeC8e1I3acALHbQv5bK4V7SqAT7ds9gNVNzSU,11371
+dataeval/metrics/bias/_parity.py,sha256=Kmzr9-NXxGzGtj6A-qUa88FTGaRyJU2xQj7tsplXJH4,11427
 dataeval/metrics/estimators/__init__.py,sha256=Pnds8uIyAovt2fKqZjiHCIP_kVoBWlVllekYuK5UmmU,568
 dataeval/metrics/estimators/_ber.py,sha256=C30E5LiGGTAfo31zWFYDptDg0R7CTJGJ-a60YgzSkYY,5382
 dataeval/metrics/estimators/_clusterer.py,sha256=1HrpihGTJ63IkNSOy4Ibw633Gllkm1RxKmoKT5MOgt0,1434
@@ -60,22 +59,22 @@ dataeval/metrics/stats/_boxratiostats.py,sha256=ROZrlqgbowkGfCR5PJ5TL7Og40iMOdUq
 dataeval/metrics/stats/_dimensionstats.py,sha256=EVO-BlxrZl8qrP09lwPbyWdrG1ZeDtgj4LiswDwEZ1I,2896
 dataeval/metrics/stats/_hashstats.py,sha256=qa1CYRgOebkxqkALfffaPM-kJ074ZbyfpWbfOfuObSs,4758
 dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSnlR_7guCg,3007
-dataeval/metrics/stats/_labelstats.py,sha256=lz8I6eSd8tFkmQqy5cOG8hn9yxs0mP-Ic9ratFHiuoU,2813
+dataeval/metrics/stats/_labelstats.py,sha256=UG7aKpFctLJvca3rC9sPT_25sCes77KpgZguJYMXfU0,2949
 dataeval/metrics/stats/_pixelstats.py,sha256=5RCQh0OQkHiCkn3DgCPVxKoFfifX_FOtwsnotADSZ0I,3265
 dataeval/metrics/stats/_visualstats.py,sha256=0k6bvAL_d66nQMfG7bydCOFJb7B0dhgG7fqCjVTp1sg,3707
 dataeval/outputs/__init__.py,sha256=geHB5M3QOiFFaQGV4ZwDTTKpqZPvPePbqG7lzaPhaXQ,1741
-dataeval/outputs/_base.py,sha256=7KRWFIEw0UHdhb1em92bPE1YqbMYumAW1QD0QfPwVLc,5900
-dataeval/outputs/_bias.py,sha256=W5QWjtZzMfCaztw6lf0VTZsuSDrNgCcdAvNx6P4fIAo,10254
+dataeval/outputs/_base.py,sha256=-Wa0gFcBVLbfWPMZyCql7x4vGsnkLP4pecsQIeUZ2_Y,5904
+dataeval/outputs/_bias.py,sha256=1OZpKncYTryjPLRHb4d6NlhE27uPT57gCob_5jtjKDI,10456
 dataeval/outputs/_drift.py,sha256=rKn5vqMR6XNujgSqfHsH76oFkoGsUusquZL2Qy4Ae6Y,4581
-dataeval/outputs/_estimators.py,sha256=a2oAIxxEDZ9WLGfMWH8KD-BVUS_SnULRPR-iI9hFPoQ,3047
-dataeval/outputs/_linters.py,sha256=3vI8zsSF-JecQut500A629sICidQLWqhEZcj7o7_cfs,6554
+dataeval/outputs/_estimators.py,sha256=mh-R08CgYtmq9ffANDMYR-V4vrZnSjOjEyOMiMDZ2Ic,3091
+dataeval/outputs/_linters.py,sha256=k8lkd8EZ23q0m-HOD-FgqMcLQFy1UH7vws2ucLPyn08,6697
 dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI,1710
 dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
-dataeval/outputs/_stats.py,sha256=YDdVQmFcOvb4_NYc_d2a2JCA0Zkuh1o6_qupQkc_X1w,15142
-dataeval/outputs/_utils.py,sha256=HHlGC7sk416m_3Bgn075Qdblz_aPup_UOafJpB0RuXY,893
-dataeval/outputs/_workflows.py,sha256=0xSwPxBATa29tvwJtpovjYrq4la9fkbamHM_qsw-Llc,10799
+dataeval/outputs/_stats.py,sha256=F-515PGBNB69DXM-YaCkGHAyaXkCD-yYvKfj4-q7R4w,15247
+dataeval/outputs/_utils.py,sha256=NfhYaGT2PZlhIs8ICKUsPWHZXjhWYDkEJqBDdqMeaOM,929
+dataeval/outputs/_workflows.py,sha256=K786mOgegxVi81diUA-qpbwGEkwa8YA7Fk4ttgjJeaY,10831
 dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-dataeval/typing.py,sha256=GDMuef-oFFukNtsiKFmsExHdNvYR_j-tQcsCwZ9reow,7198
+dataeval/typing.py,sha256=W8rqFFkAqE5a5ar3MmB-O5gcMJqvoDKXC8Y0ggBqAKo,7216
 dataeval/utils/__init__.py,sha256=hRvyUK7b3d6JBEV5u47rFcOHEcmDYqAvZQw_T5pDAWw,264
 dataeval/utils/_array.py,sha256=ftX8S6HKAIUOuc1xd30VC3Pz5yUzRglDpCLisWY_tHs,5888
 dataeval/utils/_bin.py,sha256=w3eJ2Szw5eapqQ0cGv731rhNgLFGW0cCz2pXo9I6CuY,7296
@@ -84,7 +83,7 @@ dataeval/utils/_fast_mst.py,sha256=pv42flr1Uf5RBa9qDG0YLDXWH7Mr7a9zpauO1HqZXaY,8
 dataeval/utils/_image.py,sha256=4uxTIOYZZlRJOfNmdA3ek3no3FrLWCK5un48kStMDt8,3578
 dataeval/utils/_method.py,sha256=9B9JQbgqWJBRhQJb7glajUtWaQzUTIUuvrZ9_bisxsM,394
 dataeval/utils/_mst.py,sha256=bLmJmu_1Dtj3hC5gQp3oAiJ_7TKtEjahTqusVRRU4eI,2168
-dataeval/utils/_plot.py,sha256=zP0bEvtrLdws7r1Jte8Camq-q5K5F6T8iuv3bStnEJc,7116
+dataeval/utils/_plot.py,sha256=1rnMkBRvTFLoTAHqXwF7c7GJ5_5iqlgarZKAzmYciLk,7225
 dataeval/utils/data/__init__.py,sha256=xGzrjrOxOP2DP1tU84AWMKPnSxFvSjM81CTlDg4rNM8,331
 dataeval/utils/data/_dataset.py,sha256=CFK9h-XPN7J-iF2nXol6keMDbGm6VIweFAMAjXRUlhg,9527
 dataeval/utils/data/collate.py,sha256=5egEEKhNNCGeNLChO1p6dZ4Wg6x51VEaMNHz7hEZUxI,3936
@@ -108,7 +107,7 @@ dataeval/utils/torch/models.py,sha256=1idpXyjrYcCBSsbxxRUOto8xr4MJNjDEqQHiIXVU5Z
 dataeval/utils/torch/trainer.py,sha256=Oc2lK13uPGhmLYbmAqlPWyKxgG4YJFlnSXCqFHUZbdA,5528
 dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
 dataeval/workflows/sufficiency.py,sha256=j-R8dg4XE6a66p_oTXG2GNzgg3vGk85CTblxhFXaxog,8513
-dataeval-0.86.2.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
-dataeval-0.86.2.dist-info/METADATA,sha256=6y6bI8GBv_VjBs1mpjAZJ9R5UBTKT7RHQRRUGJdyPCk,5353
-dataeval-0.86.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-dataeval-0.86.2.dist-info/RECORD,,
+dataeval-0.86.4.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
+dataeval-0.86.4.dist-info/METADATA,sha256=qdxTuVh3WxpHvsdRZhAvQIYxiATJLDixoF97xMFYrXM,5353
+dataeval-0.86.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+dataeval-0.86.4.dist-info/RECORD,,

dataeval/data/_targets.py DELETED Viewed

@@ -1,89 +0,0 @@
-from __future__ import annotations
-from typing import Iterator
-__all__ = []
-from dataclasses import dataclass
-import numpy as np
-from numpy.typing import NDArray
-def _len(arr: NDArray, dim: int) -> int:
-    return 0 if len(arr) == 0 else len(np.atleast_1d(arr) if dim == 1 else np.atleast_2d(arr))
-@dataclass(frozen=True)
-class Targets:
-    """
-    Dataclass defining targets for image classification or object detection.
-    Attributes
-    ----------
-    labels : NDArray[np.intp]
-        Labels (N,) for N images or objects
-    scores : NDArray[np.float32]
-        Probability scores (N, M) for N images of M classes or confidence score (N,) of objects
-    bboxes : NDArray[np.float32] | None
-        Bounding boxes (N, 4) for N objects in (x0, y0, x1, y1) format
-    source : NDArray[np.intp] | None
-        Source image index (N,) for N objects
-    size : int
-        Count of objects
-    """
-    labels: NDArray[np.intp]
-    scores: NDArray[np.float32]
-    bboxes: NDArray[np.float32] | None
-    source: NDArray[np.intp] | None
-    def __post_init__(self) -> None:
-        if (self.bboxes is None) != (self.source is None):
-            raise ValueError("Either both bboxes and source must be provided or neither.")
-        labels = _len(self.labels, 1)
-        scores = _len(self.scores, 2) if self.bboxes is None else _len(self.scores, 1)
-        bboxes = labels if self.bboxes is None else _len(self.bboxes, 2)
-        source = labels if self.source is None else _len(self.source, 1)
-        if labels != scores or labels != bboxes or labels != source:
-            raise ValueError(
-                "Labels, scores, bboxes and source must be the same length (if provided).\n"
-                + f"    labels: {self.labels.shape}\n"
-                + f"    scores: {self.scores.shape}\n"
-                + f"    bboxes: {None if self.bboxes is None else self.bboxes.shape}\n"
-                + f"    source: {None if self.source is None else self.source.shape}\n"
-            )
-        if self.bboxes is not None and len(self.bboxes) > 0 and self.bboxes.shape[-1] != 4:
-            raise ValueError("Bounding boxes must be in (x0, y0, x1, y1) format.")
-    @property
-    def size(self) -> int:
-        return len(self.labels)
-    def __len__(self) -> int:
-        if self.source is None:
-            return len(self.labels)
-        return len(np.unique(self.source))
-    def __getitem__(self, idx: int, /) -> Targets:
-        if self.source is None or self.bboxes is None:
-            return Targets(
-                np.atleast_1d(self.labels[idx]),
-                np.atleast_2d(self.scores[idx]),
-                None,
-                None,
-            )
-        mask = np.where(self.source == idx, True, False)
-        return Targets(
-            np.atleast_1d(self.labels[mask]),
-            np.atleast_1d(self.scores[mask]),
-            np.atleast_2d(self.bboxes[mask]),
-            np.atleast_1d(self.source[mask]),
-        )
-    def __iter__(self) -> Iterator[Targets]:
-        for i in range(len(self.labels)) if self.source is None else np.unique(self.source):
-            yield self[i]

{dataeval-0.86.2.dist-info → dataeval-0.86.4.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{dataeval-0.86.2.dist-info → dataeval-0.86.4.dist-info}/WHEEL RENAMED Viewed

File without changes

dataeval 0.86.2__py3-none-any.whl → 0.86.4__py3-none-any.whl

dataeval 0.86.2py3-none-any.whl → 0.86.4py3-none-any.whl