PyPI - dataeval - Versions diffs - 0.86.2__tar.gz → 0.86.4__tar.gz - Mend

dataeval 0.86.2tar.gz → 0.86.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

{dataeval-0.86.2 → dataeval-0.86.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dataeval
-Version: 0.86.2
+Version: 0.86.4
 Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
 Home-page: https://dataeval.ai/
 License: MIT

{dataeval-0.86.2 → dataeval-0.86.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dataeval"
-version = "0.86.2" # dynamic
+version = "0.86.4" # dynamic
 description = "DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks"
 license = "MIT"
 readme = "README.md"

{dataeval-0.86.2 → dataeval-0.86.4}/src/dataeval/__init__.py RENAMED Viewed

@@ -8,7 +8,7 @@ shifts that impact performance of deployed models.
 from __future__ import annotations
 __all__ = ["config", "detectors", "log", "metrics", "typing", "utils", "workflows"]
-__version__ = "0.86.2"
+__version__ = "0.86.4"
 import logging

{dataeval-0.86.2 → dataeval-0.86.4}/src/dataeval/data/__init__.py RENAMED Viewed

@@ -6,7 +6,6 @@ __all__ = [
     "Metadata",
     "Select",
     "SplitDatasetOutput",
-    "Targets",
     "split_dataset",
 ]
@@ -15,5 +14,4 @@ from dataeval.data._images import Images
 from dataeval.data._metadata import Metadata
 from dataeval.data._selection import Select
 from dataeval.data._split import split_dataset
-from dataeval.data._targets import Targets
 from dataeval.outputs._utils import SplitDatasetOutput

{dataeval-0.86.2 → dataeval-0.86.4}/src/dataeval/data/_images.py RENAMED Viewed

@@ -4,6 +4,8 @@ __all__ = []
 from typing import TYPE_CHECKING, Any, Generic, Iterator, Sequence, TypeVar, cast, overload
+import numpy as np
 from dataeval.typing import Array, ArrayLike, Dataset
 from dataeval.utils._array import as_numpy, channels_first_to_last
@@ -58,7 +60,7 @@ class Images(Generic[T]):
         num_images = len(indices)
         num_rows = (num_images + images_per_row - 1) // images_per_row
         fig, axes = plt.subplots(num_rows, images_per_row, figsize=figsize)
-        for i, ax in enumerate(axes.flatten()):
+        for i, ax in enumerate(np.asarray(axes).flatten()):
             image = channels_first_to_last(as_numpy(self[i]))
             ax.imshow(image)
             ax.axis("off")

{dataeval-0.86.2 → dataeval-0.86.4}/src/dataeval/data/_metadata.py RENAMED Viewed

@@ -4,7 +4,7 @@ __all__ = []
 import warnings
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Iterable, Literal, Mapping, Sequence, Sized
+from typing import Any, Iterable, Literal, Mapping, Sequence
 import numpy as np
 import polars as pl
@@ -19,10 +19,9 @@ from dataeval.utils._array import as_numpy
 from dataeval.utils._bin import bin_data, digitize_data
 from dataeval.utils.data.metadata import merge
-if TYPE_CHECKING:
-    from dataeval.data import Targets
-else:
-    from dataeval.data._targets import Targets
+def _binned(name: str) -> str:
+    return f"{name}[]"
 @dataclass
@@ -51,20 +50,20 @@ class Metadata:
     def __init__(
         self,
-        dataset: AnnotatedDataset[tuple[Any, Any, dict[str, Any]]],
+        dataset: AnnotatedDataset[tuple[Any, Any, Mapping[str, Any]]],
         *,
         continuous_factor_bins: Mapping[str, int | Sequence[float]] | None = None,
         auto_bin_method: Literal["uniform_width", "uniform_count", "clusters"] = "uniform_width",
         exclude: Sequence[str] | None = None,
         include: Sequence[str] | None = None,
     ) -> None:
-        self._targets: Targets
         self._class_labels: NDArray[np.intp]
         self._class_names: list[str]
         self._image_indices: NDArray[np.intp]
         self._factors: dict[str, FactorInfo]
         self._dropped_factors: dict[str, list[str]]
         self._dataframe: pl.DataFrame
+        self._raw: Sequence[Mapping[str, Any]]
         self._is_structured = False
         self._is_binned = False
@@ -80,13 +79,7 @@ class Metadata:
         self._include = set(include or ())
     @property
-    def targets(self) -> Targets:
-        """Target information for the dataset."""
-        self._structure()
-        return self._targets
-    @property
-    def raw(self) -> list[dict[str, Any]]:
+    def raw(self) -> Sequence[Mapping[str, Any]]:
         """The raw list of metadata dictionaries for the dataset."""
         self._structure()
         return self._raw
@@ -146,7 +139,7 @@ class Metadata:
         return self._dataframe
     @property
-    def dropped_factors(self) -> dict[str, list[str]]:
+    def dropped_factors(self) -> Mapping[str, Sequence[str]]:
         """Factors that were dropped during preprocessing and the reasons why they were dropped."""
         self._structure()
         return self._dropped_factors
@@ -165,16 +158,16 @@ class Metadata:
         )
     @property
-    def factor_names(self) -> list[str]:
+    def factor_names(self) -> Sequence[str]:
         """Factor names of the metadata."""
         self._structure()
-        return list(self._factors)
+        return list(filter(self._filter, self._factors))
     @property
-    def factor_info(self) -> dict[str, FactorInfo]:
+    def factor_info(self) -> Mapping[str, FactorInfo]:
         """Factor types of the metadata."""
         self._bin()
-        return self._factors
+        return dict(filter(self._filter, self._factors.items()))
     @property
     def factor_data(self) -> NDArray[Any]:
@@ -192,7 +185,7 @@ class Metadata:
         return self._class_labels
     @property
-    def class_names(self) -> list[str]:
+    def class_names(self) -> Sequence[str]:
         """Class names as a list of strings."""
         self._structure()
         return self._class_names
@@ -206,13 +199,17 @@ class Metadata:
     @property
     def image_count(self) -> int:
         self._bin()
-        return int(self._image_indices.max() + 1)
+        return 0 if self._image_indices.size == 0 else int(self._image_indices.max() + 1)
+    def _filter(self, factor: str | tuple[str, Any]) -> bool:
+        factor = factor[0] if isinstance(factor, tuple) else factor
+        return factor in self.include if self.include else factor not in self.exclude
     def _reset_bins(self, cols: Iterable[str] | None = None) -> None:
         if self._is_binned:
             columns = self._dataframe.columns
-            for col in (col for col in cols or columns if f"{col}[|]" in columns):
-                self._dataframe.drop_in_place(f"{col}[|]")
+            for col in (col for col in cols or columns if _binned(col) in columns):
+                self._dataframe.drop_in_place(_binned(col))
                 self._factors[col] = FactorInfo()
             self._is_binned = False
@@ -220,7 +217,7 @@ class Metadata:
         if self._is_structured:
             return
-        raw: list[dict[str, Any]] = []
+        raw: Sequence[Mapping[str, Any]] = []
         labels = []
         bboxes = []
@@ -255,6 +252,14 @@ class Metadata:
         bboxes = as_numpy(bboxes).astype(np.float32) if is_od else None
         srcidx = as_numpy(srcidx).astype(np.intp) if is_od else None
+        index2label = self._dataset.metadata.get("index2label", {i: str(i) for i in np.unique(labels)})
+        targets_per_image = None if srcidx is None else np.unique(srcidx, return_counts=True)[1].tolist()
+        merged = merge(raw, return_dropped=True, ignore_lists=False, targets_per_image=targets_per_image)
+        reserved = ["image_index", "class_label", "score", "box"]
+        factor_dict = {f"metadata_{k}" if k in reserved else k: v for k, v in merged[0].items() if k != "_image_index"}
         target_dict = {
             "image_index": srcidx if srcidx is not None else np.arange(len(labels)),
             "class_label": labels,
@@ -262,20 +267,11 @@ class Metadata:
             "box": bboxes if bboxes is not None else [None] * len(labels),
         }
-        self._targets = Targets(labels, scores, bboxes, srcidx)
         self._raw = raw
-        index2label = self._dataset.metadata.get("index2label", {})
+        self._index2label = index2label
         self._class_labels = labels
-        self._class_names = [index2label.get(i, str(i)) for i in np.unique(self._class_labels)]
+        self._class_names = list(index2label.values())
         self._image_indices = target_dict["image_index"]
-        targets_per_image = None if srcidx is None else np.unique(srcidx, return_counts=True)[1].tolist()
-        merged = merge(raw, return_dropped=True, ignore_lists=False, targets_per_image=targets_per_image)
-        reserved = ["image_index", "class_label", "score", "box"]
-        factor_dict = {f"metadata_{k}" if k in reserved else k: v for k, v in merged[0].items() if k != "_image_index"}
         self._factors = dict.fromkeys(factor_dict, FactorInfo())
         self._dataframe = pl.DataFrame({**target_dict, **factor_dict})
         self._dropped_factors = merged[1]
@@ -302,10 +298,10 @@ class Metadata:
             )
         column_set = set(df.columns)
-        for col in (col for col in self.factor_names if f"{col}[|]" not in column_set):
+        for col in (col for col in self.factor_names if _binned(col) not in column_set):
             # Get data as numpy array for processing
             data = df[col].to_numpy()
-            col_dz = f"{col}[|]"
+            col_dz = _binned(col)
             if col in factor_bins:
                 # User provided binning
                 bins = factor_bins[col]
@@ -332,31 +328,14 @@ class Metadata:
                     df = df.with_columns(pl.Series(name=col_dz, values=binned_data.astype(np.int64)))
                     factor_info[col] = FactorInfo("continuous", col_dz)
                 else:
-                    factor_info[col] = FactorInfo("discrete", col_dz)
+                    factor_info[col] = FactorInfo("discrete", col)
         # Store the results
         self._dataframe = df
         self._factors.update(factor_info)
         self._is_binned = True
-    def get_factors_by_type(self, factor_type: Literal["categorical", "continuous", "discrete"]) -> list[str]:
-        """
-        Get the names of factors of a specific type.
-        Parameters
-        ----------
-        factor_type : Literal["categorical", "continuous", "discrete"]
-            The type of factors to retrieve.
-        Returns
-        -------
-        list[str]
-            List of factor names of the specified type.
-        """
-        self._bin()
-        return [name for name, info in self.factor_info.items() if info.factor_type == factor_type]
-    def add_factors(self, factors: Mapping[str, Any]) -> None:
+    def add_factors(self, factors: Mapping[str, Array | Sequence[Any]]) -> None:
         """
         Add additional factors to the metadata.
@@ -365,16 +344,15 @@ class Metadata:
         Parameters
         ----------
-        factors : Mapping[str, ArrayLike]
+        factors : Mapping[str, Array | Sequence[Any]]
             Dictionary of factors to add to the metadata.
         """
         self._structure()
-        targets = len(self.targets.source) if self.targets.source is not None else len(self.targets)
+        targets = len(self.dataframe)
         images = self.image_count
-        lengths = {k: len(v if isinstance(v, Sized) else np.atleast_1d(as_numpy(v))) for k, v in factors.items()}
-        targets_match = all(f == targets for f in lengths.values())
-        images_match = targets_match if images == targets else all(f == images for f in lengths.values())
+        targets_match = all(len(v) == targets for v in factors.values())
+        images_match = targets_match if images == targets else all(len(v) == images for v in factors.values())
         if not targets_match and not images_match:
             raise ValueError(
                 "The lists/arrays in the provided factors have a different length than the current metadata factors."
@@ -382,8 +360,7 @@ class Metadata:
         new_columns = []
         for k, v in factors.items():
-            v = as_numpy(v)
-            data = v if (self.targets.source is None or lengths[k] == targets) else v[self.targets.source]
+            data = as_numpy(v)[self.image_indices]
             new_columns.append(pl.Series(name=k, values=data))
             self._factors[k] = FactorInfo()

{dataeval-0.86.2 → dataeval-0.86.4}/src/dataeval/data/selections/_classfilter.py RENAMED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 __all__ = []
-from typing import Any, Generic, Iterable, Sequence, Sized, TypeVar, cast
+from typing import Any, Generic, Iterable, Mapping, Sequence, Sized, TypeVar, cast
 import numpy as np
 from numpy.typing import NDArray
@@ -92,7 +92,7 @@ class ClassFilterSubSelection(Subselection[Any]):
     def __init__(self, classes: Sequence[int]) -> None:
         self.classes = classes
-    def _filter(self, d: dict[str, Any], mask: NDArray[np.bool_]) -> dict[str, Any]:
+    def _filter(self, d: Mapping[str, Any], mask: NDArray[np.bool_]) -> dict[str, Any]:
         return {k: self._filter(v, mask) if isinstance(v, dict) else _try_mask_object(v, mask) for k, v in d.items()}
     def __call__(self, datum: _TDatum) -> _TDatum:

{dataeval-0.86.2 → dataeval-0.86.4}/src/dataeval/metadata/_distance.py RENAMED Viewed

@@ -81,7 +81,7 @@ def metadata_distance(metadata1: Metadata, metadata2: Metadata) -> MetadataDista
     """
     _compare_keys(metadata1.factor_names, metadata2.factor_names)
-    cont_fnames = metadata1.get_factors_by_type("continuous")
+    cont_fnames = [name for name, info in metadata1.factor_info.items() if info.factor_type == "continuous"]
     if not cont_fnames:
         return MetadataDistanceOutput({})

{dataeval-0.86.2 → dataeval-0.86.4}/src/dataeval/metadata/_utils.py RENAMED Viewed

@@ -1,9 +1,11 @@
 __all__ = []
+from typing import Sequence
 from numpy.typing import NDArray
-def _compare_keys(keys1: list[str], keys2: list[str]) -> None:
+def _compare_keys(keys1: Sequence[str], keys2: Sequence[str]) -> None:
     """
     Raises error when two lists are not equivalent including ordering
@@ -24,7 +26,7 @@ def _compare_keys(keys1: list[str], keys2: list[str]) -> None:
         raise ValueError(f"Metadata keys must be identical, got {keys1} and {keys2}")
-def _validate_factors_and_data(factors: list[str], data: NDArray) -> None:
+def _validate_factors_and_data(factors: Sequence[str], data: NDArray) -> None:
     """
     Raises error when the number of factors and number of rows do not match

{dataeval-0.86.2 → dataeval-0.86.4}/src/dataeval/metrics/bias/_balance.py RENAMED Viewed

@@ -99,9 +99,10 @@ def balance(
     factor_types = {"class_label": "categorical"} | {k: v.factor_type for k, v in metadata.factor_info.items()}
     is_discrete = [factor_type != "continuous" for factor_type in factor_types.values()]
     num_factors = len(factor_types)
+    class_labels = metadata.class_labels
     mi = np.full((num_factors, num_factors), np.nan, dtype=np.float32)
-    data = np.hstack((metadata.class_labels[:, np.newaxis], data))
+    data = np.hstack((class_labels[:, np.newaxis], data))
     for idx, factor_type in enumerate(factor_types.values()):
         if factor_type != "continuous":
@@ -132,12 +133,12 @@ def balance(
     factors = nmi[1:, 1:]
     # assume class is a factor
-    num_classes = len(metadata.class_names)
+    u_classes = np.unique(class_labels)
+    num_classes = len(u_classes)
     classwise_mi = np.full((num_classes, num_factors), np.nan, dtype=np.float32)
     # classwise targets
-    classes = np.unique(metadata.class_labels)
-    tgt_bin = data[:, 0][:, None] == classes
+    tgt_bin = data[:, 0][:, None] == u_classes
     # classification MI for discrete/categorical features
     for idx in range(num_classes):
@@ -157,6 +158,6 @@ def balance(
     classwise = classwise_mi / norm_factor
     # Grabbing factor names for plotting function
-    factor_names = ["class_label"] + metadata.factor_names
+    factor_names = ["class_label"] + list(metadata.factor_names)
     return BalanceOutput(balance, factors, classwise, factor_names, metadata.class_names)

{dataeval-0.86.2 → dataeval-0.86.4}/src/dataeval/metrics/bias/_parity.py RENAMED Viewed

@@ -259,7 +259,8 @@ def parity(metadata: Metadata) -> ParityOutput:
         counts = np.nonzero(contingency_matrix < 5)
         unique_factor_values = np.unique(col_data)
         current_factor_name = metadata.factor_names[i]
-        for int_factor, int_class in zip(counts[0], counts[1]):
+        for _factor, _class in zip(counts[0], counts[1]):
+            int_factor, int_class = int(_factor), int(_class)
             if contingency_matrix[int_factor, int_class] > 0:
                 factor_category = unique_factor_values[int_factor].item()
                 class_name = metadata.class_names[int_class]

{dataeval-0.86.2 → dataeval-0.86.4}/src/dataeval/metrics/stats/_labelstats.py RENAMED Viewed

@@ -2,8 +2,9 @@ from __future__ import annotations
 __all__ = []
-from collections import Counter, defaultdict
-from typing import Any, Mapping, TypeVar
+from typing import Any, TypeVar
+import polars as pl
 from dataeval.data._metadata import Metadata
 from dataeval.outputs import LabelStatsOutput
@@ -13,10 +14,6 @@ from dataeval.typing import AnnotatedDataset
 TValue = TypeVar("TValue")
-def _sort_to_list(d: Mapping[int, TValue]) -> list[TValue]:
-    return [t[1] for t in sorted(d.items())]
 @set_metadata
 def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
     """
@@ -52,39 +49,34 @@ def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
         pig:      2      -      2
     chicken:      5      -      5
     """
-    dataset = Metadata(dataset) if isinstance(dataset, AnnotatedDataset) else dataset
-    label_counts: Counter[int] = Counter()
-    image_counts: Counter[int] = Counter()
-    index_location = defaultdict(list[int])
-    label_per_image: list[int] = []
-    index2label = dict(enumerate(dataset.class_names))
-    for i, target in enumerate(dataset.targets):
-        group = target.labels.tolist()
+    metadata = Metadata(dataset) if isinstance(dataset, AnnotatedDataset) else dataset
+    metadata_df = metadata.dataframe
-        # Count occurrences of each label in all sublists
-        label_counts.update(group)
+    # Count occurrences of each label across all images
+    label_counts_df = metadata_df.group_by("class_label").len()
+    label_counts = dict(zip(label_counts_df["class_label"], label_counts_df["len"]))
-        # Get the number of labels per image
-        label_per_image.append(len(group))
+    # Count unique images per label (how many images contain each label)
+    image_counts_df = metadata_df.select(["image_index", "class_label"]).unique().group_by("class_label").len()
+    image_counts = dict(zip(image_counts_df["class_label"], image_counts_df["len"]))
-        # Create a set of unique items in the current sublist
-        unique_items: set[int] = set(group)
+    # Create index_location mapping (which images contain each label)
+    index_location: dict[int, list[int]] = {}
+    for row in metadata_df.group_by("class_label").agg(pl.col("image_index")).to_dicts():
+        indices = row["image_index"]
+        index_location[row["class_label"]] = sorted(dict.fromkeys(indices)) if isinstance(indices, list) else [indices]
-        # Update image counts and index locations
-        image_counts.update(unique_items)
-        for item in unique_items:
-            index_location[item].append(i)
+    # Count labels per image
+    label_per_image_df = metadata_df.group_by("image_index").agg(pl.len().alias("label_count"))
+    label_per_image = label_per_image_df.sort("image_index")["label_count"].to_list()
     return LabelStatsOutput(
-        label_counts_per_class=_sort_to_list(label_counts),
+        label_counts_per_class=label_counts,
         label_counts_per_image=label_per_image,
-        image_counts_per_class=_sort_to_list(image_counts),
-        image_indices_per_class=_sort_to_list(index_location),
+        image_counts_per_class=image_counts,
+        image_indices_per_class=index_location,
         image_count=len(label_per_image),
-        class_count=len(label_counts),
+        class_count=len(metadata.class_names),
         label_count=sum(label_counts.values()),
-        class_names=list(index2label.values()),
+        class_names=metadata.class_names,
     )

{dataeval-0.86.2 → dataeval-0.86.4}/src/dataeval/outputs/_base.py RENAMED Viewed

@@ -147,7 +147,7 @@ P = ParamSpec("P")
 R = TypeVar("R", bound=GenericOutput)
-def set_metadata(fn: Callable[P, R] | None = None, *, state: list[str] | None = None) -> Callable[P, R]:
+def set_metadata(fn: Callable[P, R] | None = None, *, state: Sequence[str] | None = None) -> Callable[P, R]:
     """Decorator to stamp Output classes with runtime metadata"""
     if fn is None:

{dataeval-0.86.2 → dataeval-0.86.4}/src/dataeval/outputs/_bias.py RENAMED Viewed

@@ -4,7 +4,7 @@ __all__ = []
 import contextlib
 from dataclasses import asdict, dataclass
-from typing import Any, TypeVar
+from typing import Any, Mapping, Sequence, TypeVar
 import numpy as np
 import pandas as pd
@@ -39,7 +39,7 @@ class ToDataFrameMixin:
         This method requires `pandas <https://pandas.pydata.org/>`_ to be installed.
         """
         return pd.DataFrame(
-            index=self.factor_names,  # type: ignore - list[str] is documented as acceptable index type
+            index=self.factor_names,  # type: ignore - Sequence[str] is documented as acceptable index type
             data={
                 "score": self.score.round(2),
                 "p-value": self.p_value.round(2),
@@ -58,7 +58,7 @@ class ParityOutput(ToDataFrameMixin, Output):
         chi-squared score(s) of the test
     p_value : NDArray[np.float64]
         p-value(s) of the test
-    factor_names : list[str]
+    factor_names : Sequence[str]
         Names of each metadata factor
     insufficient_data: dict
         Dictionary of metadata factors with less than 5 class occurrences per value
@@ -66,8 +66,8 @@ class ParityOutput(ToDataFrameMixin, Output):
     score: NDArray[np.float64]
     p_value: NDArray[np.float64]
-    factor_names: list[str]
-    insufficient_data: dict[str, dict[int, dict[str, int]]]
+    factor_names: Sequence[str]
+    insufficient_data: Mapping[str, Mapping[int, Mapping[str, int]]]
 @dataclass(frozen=True)
@@ -145,12 +145,15 @@ class CoverageOutput(Output):
         cols = min(3, num_images)
         fig, axs = plt.subplots(rows, cols, figsize=(3 * cols, 3 * rows))
-        for image, ax in zip(images[:num_images], axs.flat):
+        # Flatten axes using numpy array explicitly for compatibility
+        axs_flat = np.asarray(axs).flatten()
+        for image, ax in zip(images[:num_images], axs_flat):
             image = channels_first_to_last(as_numpy(image))
             ax.imshow(image)
             ax.axis("off")
-        for ax in axs.flat[num_images:]:
+        for ax in axs_flat[num_images:]:
             ax.axis("off")
         fig.tight_layout()
@@ -187,22 +190,22 @@ class BalanceOutput(Output):
         Estimate of inter/intra-factor mutual information
     classwise : NDArray[np.float64]
         Estimate of mutual information between metadata factors and individual class labels
-    factor_names : list[str]
+    factor_names : Sequence[str]
         Names of each metadata factor
-    class_names : list[str]
+    class_names : Sequence[str]
         List of the class labels present in the dataset
     """
     balance: NDArray[np.float64]
     factors: NDArray[np.float64]
     classwise: NDArray[np.float64]
-    factor_names: list[str]
-    class_names: list[str]
+    factor_names: Sequence[str]
+    class_names: Sequence[str]
     def plot(
         self,
-        row_labels: list[Any] | NDArray[Any] | None = None,
-        col_labels: list[Any] | NDArray[Any] | None = None,
+        row_labels: Sequence[Any] | NDArray[Any] | None = None,
+        col_labels: Sequence[Any] | NDArray[Any] | None = None,
         plot_classwise: bool = False,
     ) -> Figure:
         """
@@ -276,16 +279,16 @@ class DiversityOutput(Output):
         :term:`Diversity` index for classes and factors
     classwise : NDArray[np.double]
         Classwise diversity index [n_class x n_factor]
-    factor_names : list[str]
+    factor_names : Sequence[str]
         Names of each metadata factor
-    class_names : list[str]
+    class_names : Sequence[str]
         Class labels for each value in the dataset
     """
     diversity_index: NDArray[np.double]
     classwise: NDArray[np.double]
-    factor_names: list[str]
-    class_names: list[str]
+    factor_names: Sequence[str]
+    class_names: Sequence[str]
     def plot(
         self,
@@ -333,7 +336,7 @@ class DiversityOutput(Output):
             import matplotlib.pyplot as plt
             fig, ax = plt.subplots(figsize=(8, 8))
-            heat_labels = ["class_labels"] + self.factor_names
+            heat_labels = ["class_labels"] + list(self.factor_names)
             ax.bar(heat_labels, self.diversity_index)
             ax.set_xlabel("Factors")
             plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")

{dataeval-0.86.2 → dataeval-0.86.4}/src/dataeval/outputs/_estimators.py RENAMED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 __all__ = []
 from dataclasses import dataclass
+from typing import Sequence
 import numpy as np
 from numpy.typing import NDArray
@@ -64,7 +65,7 @@ class ClustererOutput(Output):
         """
         return np.nonzero(self.clusters == -1)[0]
-    def find_duplicates(self) -> tuple[list[list[int]], list[list[int]]]:
+    def find_duplicates(self) -> tuple[Sequence[Sequence[int]], Sequence[Sequence[int]]]:
         """
         Finds duplicate and near duplicate data based on cluster average distance

dataeval 0.86.2__tar.gz → 0.86.4__tar.gz

dataeval 0.86.2tar.gz → 0.86.4tar.gz