PyPI - dataeval - Versions diffs - 0.86.0__tar.gz → 0.86.2__tar.gz - Mend

dataeval 0.86.0tar.gz → 0.86.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

{dataeval-0.86.0 → dataeval-0.86.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dataeval
-Version: 0.86.0
+Version: 0.86.2
 Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
 Home-page: https://dataeval.ai/
 License: MIT
@@ -29,6 +29,7 @@ Requires-Dist: numba (>=0.59.1)
 Requires-Dist: numpy (>=1.24.2)
 Requires-Dist: pandas (>=2.0)
 Requires-Dist: pillow (>=10.3.0)
+Requires-Dist: polars (>=1.0.0)
 Requires-Dist: requests
 Requires-Dist: scikit-learn (>=1.5.0)
 Requires-Dist: scipy (>=1.10)

{dataeval-0.86.0 → dataeval-0.86.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dataeval"
-version = "0.86.0" # dynamic
+version = "0.86.2" # dynamic
 description = "DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks"
 license = "MIT"
 readme = "README.md"
@@ -49,6 +49,7 @@ numba = {version = ">=0.59.1"}
 numpy = {version = ">=1.24.2"}
 pandas = {version = ">=2.0"}
 pillow = {version = ">=10.3.0"}
+polars = {version = ">=1.0.0"}
 requests = {version = "*"}
 scipy = {version = ">=1.10"}
 scikit-learn = {version = ">=1.5.0"}
@@ -134,6 +135,7 @@ markers = [
   "optional: marks tests for optional features",
   "requires_all: marks tests that require the all extras",
   "cuda: marks tests that require cuda",
+  "year: marks tests that need a specified dataset year",
 ]
 [tool.coverage.run]
@@ -175,12 +177,12 @@ target-version = "py38"
 extend-include = ["*.ipynb"]
 [tool.ruff.lint]
-select = ["A", "E", "F", "C4", "I", "UP", "NPY", "SIM", "RUF100"]
-ignore = ["NPY002"]
+select = ["A", "ANN", "C4", "C90", "E", "F", "I", "NPY", "S", "SIM", "RET", "RUF100", "UP"]
+ignore = ["ANN401", "NPY002"]
 fixable = ["ALL"]
 unfixable = []
 dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
-per-file-ignores = { "*.ipynb" = ["E402"] }
+per-file-ignores = { "*.ipynb" = ["E402"],  "!src/*" = ["ANN", "S", "RET"]}
 [tool.ruff.lint.isort]
 known-first-party = ["dataeval"]

{dataeval-0.86.0 → dataeval-0.86.2}/src/dataeval/__init__.py RENAMED Viewed

@@ -8,7 +8,7 @@ shifts that impact performance of deployed models.
 from __future__ import annotations
 __all__ = ["config", "detectors", "log", "metrics", "typing", "utils", "workflows"]
-__version__ = "0.86.0"
+__version__ = "0.86.2"
 import logging

{dataeval-0.86.0 → dataeval-0.86.2}/src/dataeval/_log.py RENAMED Viewed

@@ -8,7 +8,7 @@ class LogMessage:
     Deferred message callback for logging expensive messages.
     """
-    def __init__(self, fn: Callable[..., str]):
+    def __init__(self, fn: Callable[..., str]) -> None:
         self._fn = fn
         self._str = None

{dataeval-0.86.0 → dataeval-0.86.2}/src/dataeval/config.py RENAMED Viewed

@@ -4,10 +4,10 @@ Global configuration settings for DataEval.
 from __future__ import annotations
-__all__ = ["get_device", "set_device", "get_max_processes", "set_max_processes", "DeviceLike"]
+__all__ = ["get_device", "set_device", "get_max_processes", "set_max_processes", "use_max_processes", "DeviceLike"]
 import sys
-from typing import Union
+from typing import Any, Union
 if sys.version_info >= (3, 10):
     from typing import TypeAlias
@@ -78,8 +78,7 @@ def get_device(override: DeviceLike | None = None) -> torch.device:
     if override is None:
         global _device
         return torch.get_default_device() if _device is None else _device
-    else:
-        return _todevice(override)
+    return _todevice(override)
 def set_max_processes(processes: int | None) -> None:
@@ -112,6 +111,24 @@ def get_max_processes() -> int | None:
     return _processes
+class MaxProcessesContextManager:
+    def __init__(self, processes: int) -> None:
+        self._processes = processes
+    def __enter__(self) -> None:
+        global _processes
+        self._old = _processes
+        set_max_processes(self._processes)
+    def __exit__(self, *args: tuple[Any, ...]) -> None:
+        global _processes
+        _processes = self._old
+def use_max_processes(processes: int) -> MaxProcessesContextManager:
+    return MaxProcessesContextManager(processes)
 def set_seed(seed: int | None, all_generators: bool = False) -> None:
     """
     Sets the seed for use by classes that allow for a random state or seed.

{dataeval-0.86.0 → dataeval-0.86.2}/src/dataeval/data/_embeddings.py RENAMED Viewed

@@ -144,8 +144,7 @@ class Embeddings:
         """
         if indices is not None:
             return torch.vstack(list(self._batch(indices))).to(self.device)
-        else:
-            return self[:]
+        return self[:]
     def to_numpy(self, indices: Sequence[int] | None = None) -> NDArray[Any]:
         """
@@ -248,6 +247,7 @@ class Embeddings:
             _logger.log(logging.DEBUG, f"Saved embeddings cache from {path}")
         except Exception as e:
             _logger.log(logging.ERROR, f"Failed to save embeddings cache: {e}")
+            raise e
     @classmethod
     def load(cls, path: Path | str) -> Embeddings:

{dataeval-0.86.0 → dataeval-0.86.2}/src/dataeval/data/_images.py RENAMED Viewed

@@ -73,15 +73,14 @@ class Images(Generic[T]):
     def __getitem__(self, key: int | slice, /) -> Sequence[T] | T:
         if isinstance(key, slice):
             return [self._get_image(k) for k in range(len(self._dataset))[key]]
-        elif hasattr(key, "__int__"):
+        if hasattr(key, "__int__"):
             return self._get_image(int(key))
         raise TypeError(f"Key must be integers or slices, not {type(key)}")
     def _get_image(self, index: int) -> T:
         if self._is_tuple_datum:
             return cast(Dataset[tuple[T, Any, Any]], self._dataset)[index][0]
-        else:
-            return cast(Dataset[T], self._dataset)[index]
+        return cast(Dataset[T], self._dataset)[index]
     def __iter__(self) -> Iterator[T]:
         for i in range(len(self._dataset)):

dataeval-0.86.2/src/dataeval/data/_metadata.py ADDED Viewed

@@ -0,0 +1,392 @@
+from __future__ import annotations
+__all__ = []
+import warnings
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, Iterable, Literal, Mapping, Sequence, Sized
+import numpy as np
+import polars as pl
+from numpy.typing import NDArray
+from dataeval.typing import (
+    AnnotatedDataset,
+    Array,
+    ObjectDetectionTarget,
+)
+from dataeval.utils._array import as_numpy
+from dataeval.utils._bin import bin_data, digitize_data
+from dataeval.utils.data.metadata import merge
+if TYPE_CHECKING:
+    from dataeval.data import Targets
+else:
+    from dataeval.data._targets import Targets
+@dataclass
+class FactorInfo:
+    factor_type: Literal["categorical", "continuous", "discrete"] | None = None
+    discretized_col: str | None = None
+class Metadata:
+    """
+    Class containing binned metadata using Polars DataFrames.
+    Parameters
+    ----------
+    dataset : ImageClassificationDataset or ObjectDetectionDataset
+        Dataset to access original targets and metadata from.
+    continuous_factor_bins : Mapping[str, int | Sequence[float]] | None, default None
+        Mapping from continuous factor name to the number of bins or bin edges
+    auto_bin_method : Literal["uniform_width", "uniform_count", "clusters"], default "uniform_width"
+        Method for automatically determining the number of bins for continuous factors
+    exclude : Sequence[str] | None, default None
+        Filter metadata factors to exclude the specified factors, cannot be set with `include`
+    include : Sequence[str] | None, default None
+        Filter metadata factors to include the specified factors, cannot be set with `exclude`
+    """
+    def __init__(
+        self,
+        dataset: AnnotatedDataset[tuple[Any, Any, dict[str, Any]]],
+        *,
+        continuous_factor_bins: Mapping[str, int | Sequence[float]] | None = None,
+        auto_bin_method: Literal["uniform_width", "uniform_count", "clusters"] = "uniform_width",
+        exclude: Sequence[str] | None = None,
+        include: Sequence[str] | None = None,
+    ) -> None:
+        self._targets: Targets
+        self._class_labels: NDArray[np.intp]
+        self._class_names: list[str]
+        self._image_indices: NDArray[np.intp]
+        self._factors: dict[str, FactorInfo]
+        self._dropped_factors: dict[str, list[str]]
+        self._dataframe: pl.DataFrame
+        self._is_structured = False
+        self._is_binned = False
+        self._dataset = dataset
+        self._continuous_factor_bins = dict(continuous_factor_bins) if continuous_factor_bins else {}
+        self._auto_bin_method: Literal["uniform_width", "uniform_count", "clusters"] = auto_bin_method
+        if exclude is not None and include is not None:
+            raise ValueError("Filters for `exclude` and `include` are mutually exclusive.")
+        self._exclude = set(exclude or ())
+        self._include = set(include or ())
+    @property
+    def targets(self) -> Targets:
+        """Target information for the dataset."""
+        self._structure()
+        return self._targets
+    @property
+    def raw(self) -> list[dict[str, Any]]:
+        """The raw list of metadata dictionaries for the dataset."""
+        self._structure()
+        return self._raw
+    @property
+    def exclude(self) -> set[str]:
+        """Factors to exclude from the metadata."""
+        return self._exclude
+    @exclude.setter
+    def exclude(self, value: Sequence[str]) -> None:
+        exclude = set(value)
+        if self._exclude != exclude:
+            self._exclude = exclude
+            self._include = set()
+            self._is_binned = False
+    @property
+    def include(self) -> set[str]:
+        """Factors to include from the metadata."""
+        return self._include
+    @include.setter
+    def include(self, value: Sequence[str]) -> None:
+        include = set(value)
+        if self._include != include:
+            self._include = include
+            self._exclude = set()
+            self._is_binned = False
+    @property
+    def continuous_factor_bins(self) -> Mapping[str, int | Sequence[float]]:
+        """Map of factor names to bin counts or bin edges."""
+        return self._continuous_factor_bins
+    @continuous_factor_bins.setter
+    def continuous_factor_bins(self, bins: Mapping[str, int | Sequence[float]]) -> None:
+        if self._continuous_factor_bins != bins:
+            self._continuous_factor_bins = dict(bins)
+            self._reset_bins(bins)
+    @property
+    def auto_bin_method(self) -> Literal["uniform_width", "uniform_count", "clusters"]:
+        """Binning method to use when continuous_factor_bins is not defined."""
+        return self._auto_bin_method
+    @auto_bin_method.setter
+    def auto_bin_method(self, method: Literal["uniform_width", "uniform_count", "clusters"]) -> None:
+        if self._auto_bin_method != method:
+            self._auto_bin_method = method
+            self._reset_bins()
+    @property
+    def dataframe(self) -> pl.DataFrame:
+        """Dataframe containing target information and metadata factors."""
+        self._structure()
+        return self._dataframe
+    @property
+    def dropped_factors(self) -> dict[str, list[str]]:
+        """Factors that were dropped during preprocessing and the reasons why they were dropped."""
+        self._structure()
+        return self._dropped_factors
+    @property
+    def discretized_data(self) -> NDArray[np.int64]:
+        """Factor data with continuous data discretized."""
+        if not self.factor_names:
+            return np.array([], dtype=np.int64)
+        self._bin()
+        return (
+            self.dataframe.select([info.discretized_col or name for name, info in self.factor_info.items()])
+            .to_numpy()
+            .astype(np.int64)
+        )
+    @property
+    def factor_names(self) -> list[str]:
+        """Factor names of the metadata."""
+        self._structure()
+        return list(self._factors)
+    @property
+    def factor_info(self) -> dict[str, FactorInfo]:
+        """Factor types of the metadata."""
+        self._bin()
+        return self._factors
+    @property
+    def factor_data(self) -> NDArray[Any]:
+        """Factor data as a NumPy array."""
+        if not self.factor_names:
+            return np.array([], dtype=np.float64)
+        # Extract continuous columns and convert to NumPy array
+        return self.dataframe.select(self.factor_names).to_numpy()
+    @property
+    def class_labels(self) -> NDArray[np.intp]:
+        """Class labels as a NumPy array."""
+        self._structure()
+        return self._class_labels
+    @property
+    def class_names(self) -> list[str]:
+        """Class names as a list of strings."""
+        self._structure()
+        return self._class_names
+    @property
+    def image_indices(self) -> NDArray[np.intp]:
+        """Indices of images as a NumPy array."""
+        self._bin()
+        return self._image_indices
+    @property
+    def image_count(self) -> int:
+        self._bin()
+        return int(self._image_indices.max() + 1)
+    def _reset_bins(self, cols: Iterable[str] | None = None) -> None:
+        if self._is_binned:
+            columns = self._dataframe.columns
+            for col in (col for col in cols or columns if f"{col}[|]" in columns):
+                self._dataframe.drop_in_place(f"{col}[|]")
+                self._factors[col] = FactorInfo()
+            self._is_binned = False
+    def _structure(self) -> None:
+        if self._is_structured:
+            return
+        raw: list[dict[str, Any]] = []
+        labels = []
+        bboxes = []
+        scores = []
+        srcidx = []
+        is_od = None
+        for i in range(len(self._dataset)):
+            _, target, metadata = self._dataset[i]
+            raw.append(metadata)
+            if is_od_target := isinstance(target, ObjectDetectionTarget):
+                target_labels = as_numpy(target.labels)
+                target_len = len(target_labels)
+                labels.extend(target_labels.tolist())
+                bboxes.extend(as_numpy(target.boxes).tolist())
+                scores.extend(as_numpy(target.scores).tolist())
+                srcidx.extend([i] * target_len)
+            elif isinstance(target, Array):
+                target_len = 1
+                labels.append(int(np.argmax(as_numpy(target))))
+                scores.append(target)
+            else:
+                raise TypeError("Encountered unsupported target type in dataset")
+            is_od = is_od_target if is_od is None else is_od
+            if is_od != is_od_target:
+                raise ValueError("Encountered unexpected target type in dataset")
+        labels = as_numpy(labels).astype(np.intp)
+        scores = as_numpy(scores).astype(np.float32)
+        bboxes = as_numpy(bboxes).astype(np.float32) if is_od else None
+        srcidx = as_numpy(srcidx).astype(np.intp) if is_od else None
+        target_dict = {
+            "image_index": srcidx if srcidx is not None else np.arange(len(labels)),
+            "class_label": labels,
+            "score": scores,
+            "box": bboxes if bboxes is not None else [None] * len(labels),
+        }
+        self._targets = Targets(labels, scores, bboxes, srcidx)
+        self._raw = raw
+        index2label = self._dataset.metadata.get("index2label", {})
+        self._class_labels = labels
+        self._class_names = [index2label.get(i, str(i)) for i in np.unique(self._class_labels)]
+        self._image_indices = target_dict["image_index"]
+        targets_per_image = None if srcidx is None else np.unique(srcidx, return_counts=True)[1].tolist()
+        merged = merge(raw, return_dropped=True, ignore_lists=False, targets_per_image=targets_per_image)
+        reserved = ["image_index", "class_label", "score", "box"]
+        factor_dict = {f"metadata_{k}" if k in reserved else k: v for k, v in merged[0].items() if k != "_image_index"}
+        self._factors = dict.fromkeys(factor_dict, FactorInfo())
+        self._dataframe = pl.DataFrame({**target_dict, **factor_dict})
+        self._dropped_factors = merged[1]
+        self._is_structured = True
+    def _bin(self) -> None:
+        """Populate factor info and bin non-categorical factors."""
+        if self._is_binned:
+            return
+        # Start with an empty set of factor info
+        factor_info: dict[str, FactorInfo] = {}
+        # Create a mutable DataFrame for updates
+        df = self.dataframe.clone()
+        factor_bins = self.continuous_factor_bins
+        # Check for invalid keys
+        invalid_keys = set(factor_bins.keys()) - set(df.columns)
+        if invalid_keys:
+            warnings.warn(
+                f"The keys - {invalid_keys} - are present in the `continuous_factor_bins` dictionary "
+                "but are not columns in the metadata DataFrame. Unknown keys will be ignored."
+            )
+        column_set = set(df.columns)
+        for col in (col for col in self.factor_names if f"{col}[|]" not in column_set):
+            # Get data as numpy array for processing
+            data = df[col].to_numpy()
+            col_dz = f"{col}[|]"
+            if col in factor_bins:
+                # User provided binning
+                bins = factor_bins[col]
+                df = df.with_columns(pl.Series(name=col_dz, values=digitize_data(data, bins).astype(np.int64)))
+                factor_info[col] = FactorInfo("continuous", col_dz)
+            else:
+                # Check if data is numeric
+                unique, ordinal = np.unique(data, return_inverse=True)
+                if not np.issubdtype(data.dtype, np.number) or unique.size <= max(20, data.size * 0.01):
+                    # Non-numeric data or small number of unique values - convert to categorical
+                    df = df.with_columns(pl.Series(name=col_dz, values=ordinal.astype(np.int64)))
+                    factor_info[col] = FactorInfo("categorical", col_dz)
+                elif data.dtype == float:
+                    # Many unique values - discretize by binning
+                    warnings.warn(
+                        f"A user defined binning was not provided for {col}. "
+                        f"Using the {self.auto_bin_method} method to discretize the data. "
+                        "It is recommended that the user rerun and supply the desired "
+                        "bins using the continuous_factor_bins parameter.",
+                        UserWarning,
+                    )
+                    # Create binned version
+                    binned_data = bin_data(data, self.auto_bin_method)
+                    df = df.with_columns(pl.Series(name=col_dz, values=binned_data.astype(np.int64)))
+                    factor_info[col] = FactorInfo("continuous", col_dz)
+                else:
+                    factor_info[col] = FactorInfo("discrete", col_dz)
+        # Store the results
+        self._dataframe = df
+        self._factors.update(factor_info)
+        self._is_binned = True
+    def get_factors_by_type(self, factor_type: Literal["categorical", "continuous", "discrete"]) -> list[str]:
+        """
+        Get the names of factors of a specific type.
+        Parameters
+        ----------
+        factor_type : Literal["categorical", "continuous", "discrete"]
+            The type of factors to retrieve.
+        Returns
+        -------
+        list[str]
+            List of factor names of the specified type.
+        """
+        self._bin()
+        return [name for name, info in self.factor_info.items() if info.factor_type == factor_type]
+    def add_factors(self, factors: Mapping[str, Any]) -> None:
+        """
+        Add additional factors to the metadata.
+        The number of measures per factor must match the number of images
+        in the dataset or the number of detections in the dataset.
+        Parameters
+        ----------
+        factors : Mapping[str, ArrayLike]
+            Dictionary of factors to add to the metadata.
+        """
+        self._structure()
+        targets = len(self.targets.source) if self.targets.source is not None else len(self.targets)
+        images = self.image_count
+        lengths = {k: len(v if isinstance(v, Sized) else np.atleast_1d(as_numpy(v))) for k, v in factors.items()}
+        targets_match = all(f == targets for f in lengths.values())
+        images_match = targets_match if images == targets else all(f == images for f in lengths.values())
+        if not targets_match and not images_match:
+            raise ValueError(
+                "The lists/arrays in the provided factors have a different length than the current metadata factors."
+            )
+        new_columns = []
+        for k, v in factors.items():
+            v = as_numpy(v)
+            data = v if (self.targets.source is None or lengths[k] == targets) else v[self.targets.source]
+            new_columns.append(pl.Series(name=k, values=data))
+            self._factors[k] = FactorInfo()
+        if new_columns:
+            self._dataframe = self.dataframe.with_columns(new_columns)
+            self._is_binned = False

{dataeval-0.86.0 → dataeval-0.86.2}/src/dataeval/data/_selection.py RENAMED Viewed

@@ -110,8 +110,7 @@ class Select(AnnotatedDataset[_TDatum]):
         grouped: dict[int, list[Selection[_TDatum]]] = {}
         for selection in selections_list:
             grouped.setdefault(selection.stage, []).append(selection)
-        selection_list = [selection for category in sorted(grouped) for selection in grouped[category]]
-        return selection_list
+        return [selection for category in sorted(grouped) for selection in grouped[category]]
     def _apply_selections(self) -> None:
         for selection in self._selections:

{dataeval-0.86.0 → dataeval-0.86.2}/src/dataeval/data/_split.py RENAMED Viewed

@@ -23,7 +23,7 @@ _logger = logging.getLogger(__name__)
 class KFoldSplitter(Protocol):
     """Protocol covering sklearn KFold variant splitters"""
-    def __init__(self, n_splits: int): ...
+    def __init__(self, n_splits: int) -> None: ...
     def split(self, X: Any, y: Any, groups: Any) -> Iterator[tuple[NDArray[Any], NDArray[Any]]]: ...
@@ -207,10 +207,9 @@ def get_groups(metadata: Metadata, split_on: Sequence[str] | None) -> NDArray[np
         return None
     split_set = set(split_on)
-    indices = [i for i, name in enumerate(metadata.discrete_factor_names) if name in split_set]
-    binned_features = metadata.discrete_data[:, indices]
-    group_ids = np.unique(binned_features, axis=0, return_inverse=True)[1]
-    return group_ids
+    indices = [i for i, name in enumerate(metadata.factor_names) if name in split_set]
+    binned_features = metadata.discretized_data[:, indices]
+    return np.unique(binned_features, axis=0, return_inverse=True)[1]
 def make_splits(

{dataeval-0.86.0 → dataeval-0.86.2}/src/dataeval/data/_targets.py RENAMED Viewed

@@ -24,11 +24,13 @@ class Targets:
     labels : NDArray[np.intp]
         Labels (N,) for N images or objects
     scores : NDArray[np.float32]
-        Probability scores (N,M) for N images of M classes or confidence score (N,) of objects
+        Probability scores (N, M) for N images of M classes or confidence score (N,) of objects
     bboxes : NDArray[np.float32] | None
-        Bounding boxes (N,4) for N objects in (x0,y0,x1,y1) format
+        Bounding boxes (N, 4) for N objects in (x0, y0, x1, y1) format
     source : NDArray[np.intp] | None
         Source image index (N,) for N objects
+    size : int
+        Count of objects
     """
     labels: NDArray[np.intp]
@@ -55,13 +57,16 @@ class Targets:
             )
         if self.bboxes is not None and len(self.bboxes) > 0 and self.bboxes.shape[-1] != 4:
-            raise ValueError("Bounding boxes must be in (x0,y0,x1,y1) format.")
+            raise ValueError("Bounding boxes must be in (x0, y0, x1, y1) format.")
+    @property
+    def size(self) -> int:
+        return len(self.labels)
     def __len__(self) -> int:
         if self.source is None:
             return len(self.labels)
-        else:
-            return len(np.unique(self.source))
+        return len(np.unique(self.source))
     def __getitem__(self, idx: int, /) -> Targets:
         if self.source is None or self.bboxes is None:
@@ -71,14 +76,13 @@ class Targets:
                 None,
                 None,
             )
-        else:
-            mask = np.where(self.source == idx, True, False)
-            return Targets(
-                np.atleast_1d(self.labels[mask]),
-                np.atleast_1d(self.scores[mask]),
-                np.atleast_2d(self.bboxes[mask]),
-                np.atleast_1d(self.source[mask]),
-            )
+        mask = np.where(self.source == idx, True, False)
+        return Targets(
+            np.atleast_1d(self.labels[mask]),
+            np.atleast_1d(self.scores[mask]),
+            np.atleast_2d(self.bboxes[mask]),
+            np.atleast_1d(self.source[mask]),
+        )
     def __iter__(self) -> Iterator[Targets]:
         for i in range(len(self.labels)) if self.source is None else np.unique(self.source):

{dataeval-0.86.0 → dataeval-0.86.2}/src/dataeval/data/selections/_classfilter.py RENAMED Viewed

@@ -68,11 +68,8 @@ _TTarget = TypeVar("_TTarget", ObjectDetectionTarget, SegmentationTarget)
 def _try_mask_object(obj: _T, mask: NDArray[np.bool_]) -> _T:
-    if isinstance(obj, Sized) and not isinstance(obj, (str, bytes, bytearray)) and len(obj) == len(mask):
-        if isinstance(obj, Array):
-            return obj[mask]
-        elif isinstance(obj, Sequence):
-            return cast(_T, [item for i, item in enumerate(obj) if mask[i]])
+    if not isinstance(obj, (str, bytes, bytearray)) and isinstance(obj, (Sequence, Array)) and len(obj) == len(mask):
+        return obj[mask] if isinstance(obj, Array) else cast(_T, [item for i, item in enumerate(obj) if mask[i]])
     return obj

dataeval 0.86.0__tar.gz → 0.86.2__tar.gz

dataeval 0.86.0tar.gz → 0.86.2tar.gz