PyPI - dataeval - Versions diffs - 0.86.3__py3-none-any.whl → 0.86.5__py3-none-any.whl - Mend

dataeval 0.86.3py3-none-any.whl → 0.86.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

dataeval/__init__.py +1 -1
dataeval/config.py +1 -1
dataeval/data/_images.py +3 -1
dataeval/data/_metadata.py +22 -28
dataeval/detectors/drift/_nml/_result.py +2 -2
dataeval/detectors/linters/outliers.py +52 -21
dataeval/metadata/_distance.py +1 -1
dataeval/metrics/bias/_balance.py +5 -4
dataeval/metrics/stats/_base.py +4 -4
dataeval/metrics/stats/_labelstats.py +11 -11
dataeval/metrics/stats/_pixelstats.py +5 -4
dataeval/metrics/stats/_visualstats.py +7 -8
dataeval/outputs/_drift.py +1 -1
dataeval/outputs/_linters.py +1 -1
dataeval/outputs/_stats.py +64 -19
dataeval/utils/_plot.py +6 -6
dataeval/utils/data/_dataset.py +2 -3
dataeval/utils/datasets/_fileio.py +1 -1
dataeval/utils/torch/_internal.py +1 -1
{dataeval-0.86.3.dist-info → dataeval-0.86.5.dist-info}/METADATA +1 -1
{dataeval-0.86.3.dist-info → dataeval-0.86.5.dist-info}/RECORD +23 -23
{dataeval-0.86.3.dist-info → dataeval-0.86.5.dist-info}/LICENSE.txt +0 -0
{dataeval-0.86.3.dist-info → dataeval-0.86.5.dist-info}/WHEEL +0 -0

dataeval/__init__.py CHANGED Viewed

@@ -8,7 +8,7 @@ shifts that impact performance of deployed models.
 from __future__ import annotations
 __all__ = ["config", "detectors", "log", "metrics", "typing", "utils", "workflows"]
-__version__ = "0.86.3"
+__version__ = "0.86.5"
 import logging

dataeval/config.py CHANGED Viewed

@@ -25,7 +25,7 @@ _seed: int | None = None
 ### CONSTS ###
-EPSILON = 1e-10
+EPSILON = 1e-12
 ### TYPES ###

dataeval/data/_images.py CHANGED Viewed

@@ -4,6 +4,8 @@ __all__ = []
 from typing import TYPE_CHECKING, Any, Generic, Iterator, Sequence, TypeVar, cast, overload
+import numpy as np
 from dataeval.typing import Array, ArrayLike, Dataset
 from dataeval.utils._array import as_numpy, channels_first_to_last
@@ -58,7 +60,7 @@ class Images(Generic[T]):
         num_images = len(indices)
         num_rows = (num_images + images_per_row - 1) // images_per_row
         fig, axes = plt.subplots(num_rows, images_per_row, figsize=figsize)
-        for i, ax in enumerate(axes.flatten()):
+        for i, ax in enumerate(np.asarray(axes).flatten()):
             image = channels_first_to_last(as_numpy(self[i]))
             ax.imshow(image)
             ax.axis("off")

dataeval/data/_metadata.py CHANGED Viewed

@@ -4,7 +4,7 @@ __all__ = []
 import warnings
 from dataclasses import dataclass
-from typing import Any, Iterable, Literal, Mapping, Sequence
+from typing import Any, Iterable, Literal, Mapping, Sequence, Sized
 import numpy as np
 import polars as pl
@@ -20,6 +20,10 @@ from dataeval.utils._bin import bin_data, digitize_data
 from dataeval.utils.data.metadata import merge
+def _binned(name: str) -> str:
+    return f"{name}[]"
 @dataclass
 class FactorInfo:
     factor_type: Literal["categorical", "continuous", "discrete"] | None = None
@@ -65,6 +69,7 @@ class Metadata:
         self._is_binned = False
         self._dataset = dataset
+        self._count = len(dataset) if isinstance(dataset, Sized) else 0
         self._continuous_factor_bins = dict(continuous_factor_bins) if continuous_factor_bins else {}
         self._auto_bin_method: Literal["uniform_width", "uniform_count", "clusters"] = auto_bin_method
@@ -157,13 +162,13 @@ class Metadata:
     def factor_names(self) -> Sequence[str]:
         """Factor names of the metadata."""
         self._structure()
-        return list(self._factors)
+        return list(filter(self._filter, self._factors))
     @property
     def factor_info(self) -> Mapping[str, FactorInfo]:
         """Factor types of the metadata."""
         self._bin()
-        return self._factors
+        return dict(filter(self._filter, self._factors.items()))
     @property
     def factor_data(self) -> NDArray[Any]:
@@ -194,14 +199,19 @@ class Metadata:
     @property
     def image_count(self) -> int:
-        self._bin()
-        return int(self._image_indices.max() + 1)
+        if self._count == 0:
+            self._structure()
+        return self._count
+    def _filter(self, factor: str | tuple[str, Any]) -> bool:
+        factor = factor[0] if isinstance(factor, tuple) else factor
+        return factor in self.include if self.include else factor not in self.exclude
     def _reset_bins(self, cols: Iterable[str] | None = None) -> None:
         if self._is_binned:
             columns = self._dataframe.columns
-            for col in (col for col in cols or columns if f"{col}[|]" in columns):
-                self._dataframe.drop_in_place(f"{col}[|]")
+            for col in (col for col in cols or columns if _binned(col) in columns):
+                self._dataframe.drop_in_place(_binned(col))
                 self._factors[col] = FactorInfo()
             self._is_binned = False
@@ -244,7 +254,7 @@ class Metadata:
         bboxes = as_numpy(bboxes).astype(np.float32) if is_od else None
         srcidx = as_numpy(srcidx).astype(np.intp) if is_od else None
-        index2label = self._dataset.metadata.get("index2label", {})
+        index2label = self._dataset.metadata.get("index2label", {i: str(i) for i in np.unique(labels)})
         targets_per_image = None if srcidx is None else np.unique(srcidx, return_counts=True)[1].tolist()
         merged = merge(raw, return_dropped=True, ignore_lists=False, targets_per_image=targets_per_image)
@@ -260,8 +270,9 @@ class Metadata:
         }
         self._raw = raw
+        self._index2label = index2label
         self._class_labels = labels
-        self._class_names = [index2label.get(i, str(i)) for i in np.unique(labels)]
+        self._class_names = list(index2label.values())
         self._image_indices = target_dict["image_index"]
         self._factors = dict.fromkeys(factor_dict, FactorInfo())
         self._dataframe = pl.DataFrame({**target_dict, **factor_dict})
@@ -289,10 +300,10 @@ class Metadata:
             )
         column_set = set(df.columns)
-        for col in (col for col in self.factor_names if f"{col}[|]" not in column_set):
+        for col in (col for col in self.factor_names if _binned(col) not in column_set):
             # Get data as numpy array for processing
             data = df[col].to_numpy()
-            col_dz = f"{col}[|]"
+            col_dz = _binned(col)
             if col in factor_bins:
                 # User provided binning
                 bins = factor_bins[col]
@@ -326,23 +337,6 @@ class Metadata:
         self._factors.update(factor_info)
         self._is_binned = True
-    def get_factors_by_type(self, factor_type: Literal["categorical", "continuous", "discrete"]) -> Sequence[str]:
-        """
-        Get the names of factors of a specific type.
-        Parameters
-        ----------
-        factor_type : Literal["categorical", "continuous", "discrete"]
-            The type of factors to retrieve.
-        Returns
-        -------
-        list[str]
-            List of factor names of the specified type.
-        """
-        self._bin()
-        return [name for name, info in self.factor_info.items() if info.factor_type == factor_type]
     def add_factors(self, factors: Mapping[str, Array | Sequence[Any]]) -> None:
         """
         Add additional factors to the metadata.

dataeval/detectors/drift/_nml/_result.py CHANGED Viewed

@@ -29,7 +29,7 @@ class AbstractResult(GenericOutput[pd.DataFrame]):
         self._data = results_data.copy(deep=True)
     def data(self) -> pd.DataFrame:
-        return self.to_df()
+        return self.to_dataframe()
     @property
     def empty(self) -> bool:
@@ -38,7 +38,7 @@ class AbstractResult(GenericOutput[pd.DataFrame]):
     def __len__(self) -> int:
         return 0 if self.empty else len(self._data)
-    def to_df(self, multilevel: bool = True) -> pd.DataFrame:
+    def to_dataframe(self, multilevel: bool = True) -> pd.DataFrame:
         """Export results to pandas dataframe."""
         if multilevel:
             return self._data

dataeval/detectors/linters/outliers.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import Any, Literal, Sequence, overload
 import numpy as np
 from numpy.typing import NDArray
+from dataeval.config import EPSILON
 from dataeval.data._images import Images
 from dataeval.metrics.stats._base import combine_stats, get_dataset_step_from_idx
 from dataeval.metrics.stats._imagestats import imagestats
@@ -18,26 +19,56 @@ from dataeval.typing import ArrayLike, Dataset
 def _get_outlier_mask(
-    values: NDArray, method: Literal["zscore", "modzscore", "iqr"], threshold: float | None
-) -> NDArray:
+    values: NDArray[Any], method: Literal["zscore", "modzscore", "iqr"], threshold: float | None
+) -> NDArray[np.bool_]:
+    if len(values) == 0:
+        return np.array([], dtype=bool)
     values = values.astype(np.float64)
+    valid_mask = ~np.isnan(values)
+    outliers = np.full(values.shape, False, dtype=bool)
+    if not np.any(valid_mask):
+        return outliers
     if method == "zscore":
-        threshold = threshold if threshold else 3.0
-        std = np.std(values)
-        abs_diff = np.abs(values - np.mean(values))
-        return std != 0 and (abs_diff / std) > threshold
-    if method == "modzscore":
-        threshold = threshold if threshold else 3.5
-        abs_diff = np.abs(values - np.median(values))
-        med_abs_diff = np.median(abs_diff) if np.median(abs_diff) != 0 else np.mean(abs_diff)
-        mod_z_score = 0.6745 * abs_diff / med_abs_diff
-        return mod_z_score > threshold
-    if method == "iqr":
-        threshold = threshold if threshold else 1.5
-        qrt = np.percentile(values, q=(25, 75), method="midpoint")
-        iqr = (qrt[1] - qrt[0]) * threshold
-        return (values < (qrt[0] - iqr)) | (values > (qrt[1] + iqr))
-    raise ValueError("Outlier method must be 'zscore' 'modzscore' or 'iqr'.")
+        threshold = threshold if threshold is not None else 3.0
+        std_val = np.nanstd(values)
+        if std_val > EPSILON:
+            mean_val = np.nanmean(values)
+            abs_diff = np.abs(values - mean_val)
+            outliers = (abs_diff / std_val) > threshold
+    elif method == "modzscore":
+        threshold = threshold if threshold is not None else 3.5
+        median_val = np.nanmedian(values)
+        abs_diff = np.abs(values - median_val)
+        m_abs_diff = np.nanmedian(abs_diff)
+        m_abs_diff = np.nanmean(abs_diff) if m_abs_diff <= EPSILON else m_abs_diff
+        if m_abs_diff > EPSILON:
+            mod_z_score = 0.6745 * abs_diff / m_abs_diff
+            outliers = mod_z_score > threshold
+    elif method == "iqr":
+        threshold = threshold if threshold is not None else 1.5
+        qrt = np.nanpercentile(values, q=(25, 75), method="midpoint")
+        iqr_val = qrt[1] - qrt[0]
+        if iqr_val > EPSILON:
+            iqr_threshold = iqr_val * threshold
+            outliers = (values < (qrt[0] - iqr_threshold)) | (values > (qrt[1] + iqr_threshold))
+    else:
+        raise ValueError("Outlier method must be 'zscore' 'modzscore' or 'iqr'.")
+    outliers[~valid_mask] = False
+    return outliers
 class Outliers:
@@ -164,10 +195,10 @@ class Outliers:
         >>> len(results)
         2
         >>> results.issues[0]
-        {10: {'skew': -3.906, 'kurtosis': 13.266, 'entropy': 0.2128}, 12: {'std': 0.00536, 'var': 2.87e-05, 'skew': -3.906, 'kurtosis': 13.266, 'entropy': 0.2128}}
+        {10: {'entropy': 0.2128}, 12: {'std': 0.00536, 'var': 2.87e-05, 'entropy': 0.2128}}
         >>> results.issues[1]
         {}
-        """  # noqa: E501
+        """
         if isinstance(stats, (ImageStatsOutput, DimensionStatsOutput, PixelStatsOutput, VisualStatsOutput)):
             return OutliersOutput(self._get_outliers(stats.data()))
@@ -221,7 +252,7 @@ class Outliers:
         >>> list(results.issues)
         [10, 12]
         >>> results.issues[10]
-        {'contrast': 1.25, 'zeros': 0.05493, 'skew': -3.906, 'kurtosis': 13.266, 'entropy': 0.2128}
+        {'contrast': 1.25, 'zeros': 0.05493, 'entropy': 0.2128}
         """
         images = Images(data) if isinstance(data, Dataset) else data
         self.stats = imagestats(images)

dataeval/metadata/_distance.py CHANGED Viewed

@@ -81,7 +81,7 @@ def metadata_distance(metadata1: Metadata, metadata2: Metadata) -> MetadataDista
     """
     _compare_keys(metadata1.factor_names, metadata2.factor_names)
-    cont_fnames = metadata1.get_factors_by_type("continuous")
+    cont_fnames = [name for name, info in metadata1.factor_info.items() if info.factor_type == "continuous"]
     if not cont_fnames:
         return MetadataDistanceOutput({})

dataeval/metrics/bias/_balance.py CHANGED Viewed

@@ -99,9 +99,10 @@ def balance(
     factor_types = {"class_label": "categorical"} | {k: v.factor_type for k, v in metadata.factor_info.items()}
     is_discrete = [factor_type != "continuous" for factor_type in factor_types.values()]
     num_factors = len(factor_types)
+    class_labels = metadata.class_labels
     mi = np.full((num_factors, num_factors), np.nan, dtype=np.float32)
-    data = np.hstack((metadata.class_labels[:, np.newaxis], data))
+    data = np.hstack((class_labels[:, np.newaxis], data))
     for idx, factor_type in enumerate(factor_types.values()):
         if factor_type != "continuous":
@@ -132,12 +133,12 @@ def balance(
     factors = nmi[1:, 1:]
     # assume class is a factor
-    num_classes = len(metadata.class_names)
+    u_classes = np.unique(class_labels)
+    num_classes = len(u_classes)
     classwise_mi = np.full((num_classes, num_factors), np.nan, dtype=np.float32)
     # classwise targets
-    classes = np.unique(metadata.class_labels)
-    tgt_bin = data[:, 0][:, None] == classes
+    tgt_bin = data[:, 0][:, None] == u_classes
     # classification MI for discrete/categorical features
     for idx in range(num_classes):

dataeval/metrics/stats/_base.py CHANGED Viewed

@@ -13,8 +13,8 @@ from multiprocessing import Pool
 from typing import Any, Callable, Generic, Iterable, Iterator, Sequence, TypeVar
 import numpy as np
-import tqdm
 from numpy.typing import NDArray
+from tqdm.auto import tqdm
 from dataeval.config import get_max_processes
 from dataeval.outputs._stats import BASE_ATTRS, BaseStatsOutput, SourceIndex
@@ -77,7 +77,7 @@ class PoolWrapper:
     """
     def __init__(self, processes: int | None) -> None:
-        self.pool = Pool(processes) if processes is not None and processes > 1 else None
+        self.pool = Pool(processes) if processes is None or processes > 1 else None
     def imap(self, func: Callable[[_S], _T], iterable: Iterable[_S]) -> Iterator[_T]:
         return map(func, iterable) if self.pool is None else self.pool.imap(func, iterable)
@@ -93,7 +93,7 @@ class PoolWrapper:
 class StatsProcessor(Generic[TStatsOutput]):
     output_class: type[TStatsOutput]
-    cache_keys: list[str] = []
+    cache_keys: set[str] = set()
     image_function_map: dict[str, Callable[[StatsProcessor[TStatsOutput]], Any]] = {}
     channel_function_map: dict[str, Callable[[StatsProcessor[TStatsOutput]], Any]] = {}
@@ -267,7 +267,7 @@ def run_stats(
     stats_processor_cls = stats_processor_cls if isinstance(stats_processor_cls, Iterable) else [stats_processor_cls]
     with PoolWrapper(processes=get_max_processes()) as p:
-        for r in tqdm.tqdm(
+        for r in tqdm(
             p.imap(
                 partial(
                     process_stats_unpack,

dataeval/metrics/stats/_labelstats.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 __all__ = []
-from typing import Any, Mapping, TypeVar
+from typing import Any, TypeVar
 import polars as pl
@@ -14,10 +14,6 @@ from dataeval.typing import AnnotatedDataset
 TValue = TypeVar("TValue")
-def _sort_to_list(d: Mapping[int, TValue]) -> list[TValue]:
-    return [t[1] for t in sorted(d.items())]
 @set_metadata
 def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
     """
@@ -58,21 +54,25 @@ def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
     # Count occurrences of each label across all images
     label_counts_df = metadata_df.group_by("class_label").len()
-    label_counts = label_counts_df.sort("class_label")["len"].to_list()
+    label_counts = dict(zip(label_counts_df["class_label"], label_counts_df["len"]))
     # Count unique images per label (how many images contain each label)
     image_counts_df = metadata_df.select(["image_index", "class_label"]).unique().group_by("class_label").len()
-    image_counts = image_counts_df.sort("class_label")["len"].to_list()
+    image_counts = dict(zip(image_counts_df["class_label"], image_counts_df["len"]))
     # Create index_location mapping (which images contain each label)
-    index_location: list[list[int]] = [[] for _ in range(len(metadata.class_names))]
+    index_location: dict[int, list[int]] = {}
     for row in metadata_df.group_by("class_label").agg(pl.col("image_index")).to_dicts():
         indices = row["image_index"]
         index_location[row["class_label"]] = sorted(dict.fromkeys(indices)) if isinstance(indices, list) else [indices]
     # Count labels per image
-    label_per_image_df = metadata_df.group_by("image_index").agg(pl.count().alias("label_count"))
-    label_per_image = label_per_image_df.sort("image_index")["label_count"].to_list()
+    label_per_image_df = metadata_df.group_by("image_index").agg(pl.len().alias("label_count"))
+    # Join with all indices to include missing ones with 0 count
+    all_indices = pl.DataFrame({"image_index": range(metadata.image_count)})
+    complete_label_df = all_indices.join(label_per_image_df, on="image_index", how="left").fill_null(0)
+    label_per_image = complete_label_df.sort("image_index")["label_count"].to_list()
     return LabelStatsOutput(
         label_counts_per_class=label_counts,
@@ -81,6 +81,6 @@ def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
         image_indices_per_class=index_location,
         image_count=len(label_per_image),
         class_count=len(metadata.class_names),
-        label_count=sum(label_counts),
+        label_count=sum(label_counts.values()),
         class_names=metadata.class_names,
     )

dataeval/metrics/stats/_pixelstats.py CHANGED Viewed

@@ -15,12 +15,13 @@ from dataeval.typing import ArrayLike, Dataset
 class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
     output_class: type = PixelStatsOutput
+    cache_keys = {"histogram"}
     image_function_map: dict[str, Callable[[StatsProcessor[PixelStatsOutput]], Any]] = {
         "mean": lambda x: np.nanmean(x.scaled),
         "std": lambda x: np.nanstd(x.scaled),
         "var": lambda x: np.nanvar(x.scaled),
-        "skew": lambda x: np.nan_to_num(skew(x.scaled.ravel())),
-        "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled.ravel())),
+        "skew": lambda x: skew(x.scaled.ravel(), nan_policy="omit"),
+        "kurtosis": lambda x: kurtosis(x.scaled.ravel(), nan_policy="omit"),
         "histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
         "entropy": lambda x: entropy(x.get("histogram")),
     }
@@ -28,8 +29,8 @@ class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
         "mean": lambda x: np.nanmean(x.scaled, axis=1),
         "std": lambda x: np.nanstd(x.scaled, axis=1),
         "var": lambda x: np.nanvar(x.scaled, axis=1),
-        "skew": lambda x: np.nan_to_num(skew(x.scaled, axis=1)),
-        "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled, axis=1)),
+        "skew": lambda x: skew(x.scaled, axis=1, nan_policy="omit"),
+        "kurtosis": lambda x: kurtosis(x.scaled, axis=1, nan_policy="omit"),
         "histogram": lambda x: np.apply_along_axis(lambda y: np.histogram(y, 256, (0, 1))[0], 1, x.scaled),
         "entropy": lambda x: entropy(x.get("histogram"), axis=1),
     }

dataeval/metrics/stats/_visualstats.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import Any, Callable
 import numpy as np
+from dataeval.config import EPSILON
 from dataeval.metrics.stats._base import StatsProcessor, run_stats
 from dataeval.outputs import VisualStatsOutput
 from dataeval.outputs._base import set_metadata
@@ -17,23 +18,21 @@ QUARTILES = (0, 25, 50, 75, 100)
 class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
     output_class: type = VisualStatsOutput
+    cache_keys: set[str] = {"percentiles"}
     image_function_map: dict[str, Callable[[StatsProcessor[VisualStatsOutput]], Any]] = {
         "brightness": lambda x: x.get("percentiles")[1],
-        "contrast": lambda x: 0
-        if np.mean(x.get("percentiles")) == 0
-        else (np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles")),
+        "contrast": lambda x: (np.max(x.get("percentiles")) - np.min(x.get("percentiles")))
+        / (np.mean(x.get("percentiles")) + EPSILON),
         "darkness": lambda x: x.get("percentiles")[-2],
         "missing": lambda x: np.count_nonzero(np.isnan(np.sum(x.image, axis=0))) / np.prod(x.shape[-2:]),
         "sharpness": lambda x: np.nanstd(edge_filter(np.mean(x.image, axis=0))),
-        "zeros": lambda x: np.count_nonzero(np.nansum(x.image, axis=0) == 0) / np.prod(x.shape[-2:]),
+        "zeros": lambda x: np.count_nonzero(np.sum(x.image, axis=0) == 0) / np.prod(x.shape[-2:]),
         "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
     }
     channel_function_map: dict[str, Callable[[StatsProcessor[VisualStatsOutput]], Any]] = {
         "brightness": lambda x: x.get("percentiles")[:, 1],
-        "contrast": lambda x: np.nan_to_num(
-            (np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
-            / np.mean(x.get("percentiles"), axis=1)
-        ),
+        "contrast": lambda x: (np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
+        / (np.mean(x.get("percentiles"), axis=1) + EPSILON),
         "darkness": lambda x: x.get("percentiles")[:, -2],
         "missing": lambda x: np.count_nonzero(np.isnan(x.image), axis=(1, 2)) / np.prod(x.shape[-2:]),
         "sharpness": lambda x: np.nanstd(np.vectorize(edge_filter, signature="(m,n)->(m,n)")(x.image), axis=(1, 2)),

dataeval/outputs/_drift.py CHANGED Viewed

@@ -114,7 +114,7 @@ class DriftMVDCOutput(PerMetricResult):
         import matplotlib.pyplot as plt
         fig, ax = plt.subplots(dpi=300)
-        resdf = self.to_df()
+        resdf = self.to_dataframe()
         xticks = np.arange(resdf.shape[0])
         trndf = resdf[resdf["chunk"]["period"] == "reference"]
         tstdf = resdf[resdf["chunk"]["period"] == "analysis"]

dataeval/outputs/_linters.py CHANGED Viewed

@@ -54,7 +54,7 @@ def _reorganize_by_class_and_metric(
     for img, group in result.items():
         for extreme in group:
             metrics.setdefault(extreme, []).append(img)
-            for i, images in enumerate(lstats.image_indices_per_class):
+            for i, images in lstats.image_indices_per_class.items():
                 if img in images:
                     class_wise[lstats.class_names[i]][extreme] = class_wise[lstats.class_names[i]].get(extreme, 0) + 1

dataeval/outputs/_stats.py CHANGED Viewed

@@ -6,7 +6,7 @@ from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, Iterable, Mapping, NamedTuple, Optional, Sequence, Union
 import numpy as np
-import pandas as pd
+import polars as pl
 from numpy.typing import NDArray
 from typing_extensions import TypeAlias
@@ -22,7 +22,7 @@ SOURCE_INDEX = "source_index"
 OBJECT_COUNT = "object_count"
 IMAGE_COUNT = "image_count"
-BASE_ATTRS = (SOURCE_INDEX, OBJECT_COUNT, IMAGE_COUNT)
+BASE_ATTRS = [SOURCE_INDEX, OBJECT_COUNT, IMAGE_COUNT]
 class SourceIndex(NamedTuple):
@@ -156,14 +156,21 @@ class BaseStatsOutput(Output):
         Mapping[str, NDArray[Any]]
         """
         filter_ = [filter] if isinstance(filter, str) else filter
+        """
+        Performs validation checks to ensure selected keys and constant or 1-D values
+        Each set of checks returns True if a valid value.
+        Only one set of final checks needs to be True to allow the value through
+        """
         return {
             k: v
             for k, v in self.data().items()
-            if k not in BASE_ATTRS
-            and (filter_ is None or k in filter_)
-            and isinstance(v, np.ndarray)
-            and v.ndim == 1
-            and (not exclude_constant or len(np.unique(v)) > 1)
+            if (
+                k not in BASE_ATTRS  # Ignore BaseStatsOutput attributes
+                and (filter_ is None or k in filter_)  # Key is selected
+                and (isinstance(v, np.ndarray) and v.ndim == 1)  # Check valid array
+                and (not exclude_constant or len(np.unique(v)) > 1)  # Check valid numpy "constant"
+            )
         }
     def plot(
@@ -195,6 +202,11 @@ class BaseStatsOutput(Output):
             return histogram_plot(factors, log)
         return channel_histogram_plot(factors, log, max_channels, ch_mask)
+    def to_dataframe(self) -> pl.DataFrame:
+        """Returns the processed factors a polars dataframe of shape (factors, samples)"""
+        return pl.DataFrame(self.factors())
 @dataclass(frozen=True)
 class DimensionStatsOutput(BaseStatsOutput):
@@ -256,6 +268,43 @@ class HashStatsOutput(BaseStatsOutput):
     xxhash: Sequence[str]
     pchash: Sequence[str]
+    def to_dataframe(self) -> pl.DataFrame:
+        """
+        Returns a polars dataframe for the xxhash and pchash attributes of each sample
+        Note
+        ----
+        xxhash and pchash do not follow the normal definition of factors but are
+        helpful attributes of the data
+        Examples
+        --------
+        Display the hashes of a dataset of images, whose shape is (C, H, W),
+        as a polars DataFrame
+        >>> from dataeval.metrics.stats import hashstats
+        >>> results = hashstats(dataset)
+        >>> print(results.to_dataframe())
+        shape: (8, 2)
+        ┌──────────────────┬──────────────────┐
+        │ xxhash           ┆ pchash           │
+        │ ---              ┆ ---              │
+        │ str              ┆ str              │
+        ╞══════════════════╪══════════════════╡
+        │ 69b50a5f06af238c ┆ e666999999266666 │
+        │ 5a861d7a23d1afe7 ┆ e666999999266666 │
+        │ 7ffdb4990ad44ac6 ┆ e666999966666299 │
+        │ 4f0c366a3298ceac ┆ e666999999266666 │
+        │ c5519e36ac1f8839 ┆ 96e91656e91616e9 │
+        │ e7e92346159a4567 ┆ e666999999266666 │
+        │ 9a538f797a5ba8ee ┆ e666999999266666 │
+        │ 1a658bd2a1baee25 ┆ e666999999266666 │
+        └──────────────────┴──────────────────┘
+        """
+        data = {"xxhash": self.xxhash, "pchash": self.pchash}
+        schema = {"xxhash": str, "pchash": str}
+        return pl.DataFrame(data=data, schema=schema)
 @dataclass(frozen=True)
 class LabelStatsOutput(Output):
@@ -272,7 +321,7 @@ class LabelStatsOutput(Output):
     image_counts_per_class : Mapping[int, int]
         Dictionary whose keys are the different label classes and
         values are total counts of each image the class is present in
-    image_indices_per_class : Mapping[int, list]
+    image_indices_per_class : Mapping[int, Sequence[int]]
         Dictionary whose keys are the different label classes and
         values are lists containing the images that have that label
     image_count : int
@@ -284,10 +333,10 @@ class LabelStatsOutput(Output):
     class_names : Sequence[str]
     """
-    label_counts_per_class: Sequence[int]
+    label_counts_per_class: Mapping[int, int]
     label_counts_per_image: Sequence[int]
-    image_counts_per_class: Sequence[int]
-    image_indices_per_class: Sequence[Sequence[int]]
+    image_counts_per_class: Mapping[int, int]
+    image_indices_per_class: Mapping[int, Sequence[int]]
     image_count: int
     class_count: int
     label_count: int
@@ -325,17 +374,13 @@ class LabelStatsOutput(Output):
         return "\n".join(table_str)
-    def to_dataframe(self) -> pd.DataFrame:
+    def to_dataframe(self) -> pl.DataFrame:
         """
-        Exports the label statistics output results to a pandas DataFrame.
-        Notes
-        -----
-        This method requires `pandas <https://pandas.pydata.org/>`_ to be installed.
+        Exports the label statistics output results to a polars DataFrame.
         Returns
         -------
-        pd.DataFrame
+        pl.DataFrame
         """
         total_count = []
         image_count = []
@@ -343,7 +388,7 @@ class LabelStatsOutput(Output):
             total_count.append(self.label_counts_per_class[cls])
             image_count.append(self.image_counts_per_class[cls])
-        return pd.DataFrame(
+        return pl.DataFrame(
             {
                 "Label": self.class_names,
                 "Total Count": total_count,

dataeval/utils/_plot.py CHANGED Viewed

@@ -164,9 +164,9 @@ def histogram_plot(
     rows = math.ceil(num_metrics / 3)
     cols = min(num_metrics, 3)
     fig, axs = plt.subplots(rows, 3, figsize=(cols * 3 + 1, rows * 3))
+    axs_flat = np.asarray(axs).flatten()
     for ax, metric in zip(
-        axs.flat,
+        axs_flat,
         data_dict,
     ):
         # Plot the histogram for the chosen metric
@@ -177,7 +177,7 @@ def histogram_plot(
         ax.set_ylabel(ylabel)
         ax.set_xlabel(xlabel)
-    for ax in axs.flat[num_metrics:]:
+    for ax in axs_flat[num_metrics:]:
         ax.axis("off")
         ax.set_visible(False)
@@ -222,9 +222,9 @@ def channel_histogram_plot(
     rows = math.ceil(num_metrics / 3)
     cols = min(num_metrics, 3)
     fig, axs = plt.subplots(rows, 3, figsize=(cols * 3 + 1, rows * 3))
+    axs_flat = np.asarray(axs).flatten()
     for ax, metric in zip(
-        axs.flat,
+        axs_flat,
         data_keys,
     ):
         # Plot the histogram for the chosen metric
@@ -246,7 +246,7 @@ def channel_histogram_plot(
         ax.set_ylabel(ylabel)
         ax.set_xlabel(xlabel)
-    for ax in axs.flat[num_metrics:]:
+    for ax in axs_flat[num_metrics:]:
         ax.axis("off")
         ax.set_visible(False)

dataeval/utils/data/_dataset.py CHANGED Viewed

@@ -72,9 +72,8 @@ def _listify_metadata(
 def _find_max(arr: ArrayLike) -> Any:
     if not isinstance(arr, (bytes, str)) and isinstance(arr, (Iterable, Sequence, Array)):
-        if isinstance(arr[0], (Iterable, Sequence, Array)):
-            return max([_find_max(x) for x in arr])  # type: ignore
-        return max(arr)
+        nested = [x for x in [_find_max(x) for x in arr] if x is not None]
+        return max(nested) if len(nested) > 0 else None
     return arr

dataeval/utils/datasets/_fileio.py CHANGED Viewed

@@ -8,7 +8,7 @@ import zipfile
 from pathlib import Path
 import requests
-from tqdm import tqdm
+from tqdm.auto import tqdm
 ARCHIVE_ENDINGS = [".zip", ".tar", ".tgz"]
 COMPRESS_ENDINGS = [".gz", ".bz2"]

dataeval/utils/torch/_internal.py CHANGED Viewed

@@ -8,7 +8,7 @@ import numpy as np
 import torch
 from numpy.typing import NDArray
 from torch.utils.data import DataLoader, TensorDataset
-from tqdm import tqdm
+from tqdm.auto import tqdm
 from dataeval.config import DeviceLike, get_device
 from dataeval.typing import Array

{dataeval-0.86.3.dist-info → dataeval-0.86.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dataeval
-Version: 0.86.3
+Version: 0.86.5
 Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
 Home-page: https://dataeval.ai/
 License: MIT

{dataeval-0.86.3.dist-info → dataeval-0.86.5.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
-dataeval/__init__.py,sha256=Z_VUOb2gf--uAtqeXyzIPUm11noNeEj16OSfkc6H6-Y,1636
+dataeval/__init__.py,sha256=5qOVdEDEZt5O--VufuRJXGEByzQC7pJWZluFGzPuNOc,1636
 dataeval/_log.py,sha256=C7AGkIRzymvYJ0LQXtnShiy3i5Xrp8T58JzIHHguk_Q,365
-dataeval/config.py,sha256=hjad0TK1UmaKQlUuxqxt64_OAUqZkHjicBf06cvTyrQ,4082
+dataeval/config.py,sha256=bHa8np4FCtLLv8_xlfdDC4lb1InJ_kT0vXDO5P42rvk,4082
 dataeval/data/__init__.py,sha256=wzQ6uUFLNB3VJR0a2QnRBYwEmwXT93q0WpHu7FmFW1E,486
 dataeval/data/_embeddings.py,sha256=PFjpdV9bfusCB4taTIYSzx1hP8nJb_KCkZTN8kMw-Hs,12885
-dataeval/data/_images.py,sha256=3d4Cv-xg5z6_LVtw1eL_QdFwzbDI1cwvPNQblkrMEMk,2622
-dataeval/data/_metadata.py,sha256=jEfGZhhvry7qtjU47VifL8ZO1hqXg1jntR3CztkaoWU,14462
+dataeval/data/_images.py,sha256=Rc_59CuU4zfN7Xm7an1XUx8ZghQg6a56VJWMZD9edRw,2654
+dataeval/data/_metadata.py,sha256=OTda9V7DA5Ejxip_NR16LCK2C8HMtpjWHHiFoW3LrLY,14364
 dataeval/data/_selection.py,sha256=r06xeiyK8nTWPLyItkoPQRWZI1i6LATSue_cuEbCdc4,4463
 dataeval/data/_split.py,sha256=nQABR05vxil2Qx7-uX4Fm0_DWpibskBGDJOYj_b1u3I,16737
 dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
@@ -26,24 +26,24 @@ dataeval/detectors/drift/_nml/__init__.py,sha256=MNyKyZlfTjr5uQql2uBBfRkUdsuduie
 dataeval/detectors/drift/_nml/_base.py,sha256=o34LcCsD9p1A6u8UdQn-dxIVwC2CMr6uCpC0vq16JX0,2663
 dataeval/detectors/drift/_nml/_chunk.py,sha256=t12eouanRNiu5DJXOaYDZXUvFMqfcp1BETLOufdV79M,13567
 dataeval/detectors/drift/_nml/_domainclassifier.py,sha256=n7Ttq5Ej7sAY9Jn2iagaGj4IIWiG8gmA3wwFizlBqes,7292
-dataeval/detectors/drift/_nml/_result.py,sha256=Nz_qTRu_EcJ1OcywSTVXFm9fx3UyuX66ZWACrffG5dI,3255
+dataeval/detectors/drift/_nml/_result.py,sha256=TMK17bnlgSdL0MCRHtQZJO8YoWWe4C2kh_akESrlP1g,3269
 dataeval/detectors/drift/_nml/_thresholds.py,sha256=WGdkLei9w_EvvsRHQzWdDyFVoZHIwM78k_aB3eoh31Q,12060
 dataeval/detectors/drift/_uncertainty.py,sha256=BHlykJ-r7TGLJxdPfoazXnoAJ1qVDzbk5HjAMdsnHz8,5847
 dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
 dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
 dataeval/detectors/linters/duplicates.py,sha256=X5WSEvI_BHkLoXjkaHK6wTnSkx4IjpO_exMRjSlhc70,4963
-dataeval/detectors/linters/outliers.py,sha256=R3-p8kzia77Q3k2grXeRXnRiv7nMhosoPY1sDLQVKrs,9049
+dataeval/detectors/linters/outliers.py,sha256=WO686jVbGbtDjO-8CuYVLxpeUGv8MpIK9QjADlTdd40,9596
 dataeval/detectors/ood/__init__.py,sha256=juCYBDs7CQEAtMhnEpPqF6uTrOIH9kTBSuQ_GRw6a8o,283
 dataeval/detectors/ood/ae.py,sha256=fTrUfFxv6xUqzKpwMC8rW3JrizA16M_bgzqLuBKMrS0,2944
 dataeval/detectors/ood/base.py,sha256=9b-Ljznf0lB1SXF4F_Aj3eJ4Y3ijGEDPMjucUsWOGJM,3051
 dataeval/detectors/ood/mixin.py,sha256=0_o-1HPvgf3-Lf1MSOIfjj5UB8LTLEBGYtJJfyCCzwc,5431
 dataeval/metadata/__init__.py,sha256=XDDmJbOZBNM6pL0r6Nbu6oMRoyAh22IDkPYGndNlkZU,316
-dataeval/metadata/_distance.py,sha256=AABrGoQyD13z9Fqlz3NyfX0Iow_vjBwAugIv6OSRTTE,4187
+dataeval/metadata/_distance.py,sha256=MbXM9idsooNWnGLaTKg8j4ZqavUeJUjuW7EPW3-UQyg,4234
 dataeval/metadata/_ood.py,sha256=lNPHouj_9WfM_uTtsaiRaPn46RcVy3YebD1c32vDj-c,8981
 dataeval/metadata/_utils.py,sha256=BcGoYVfA4AkAWpInY5txOc3QBpsGf6cnnUAsHOQTJAE,1210
 dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
 dataeval/metrics/bias/__init__.py,sha256=329S1_3WnWqeU4-qVcbe0fMy4lDrj9uKslWHIQf93yg,839
-dataeval/metrics/bias/_balance.py,sha256=L5TR8Twwodulk8xkhE-L7PR-isPGw4LusIjL3ZHIH8c,5525
+dataeval/metrics/bias/_balance.py,sha256=fREtoMLUZPOf_ivqNKwij6oPiKMTk02ECO5rWURf3KY,5541
 dataeval/metrics/bias/_completeness.py,sha256=BysXU2Jpw33n5dl3acJFEqF3mFGiJLsfG4n5Q2fkTaY,4608
 dataeval/metrics/bias/_coverage.py,sha256=PeUoOiaghUEdn6Ov8z2-am7-fnBVIPcFbJK7Ty5JObA,3647
 dataeval/metrics/bias/_diversity.py,sha256=25udDKmel9IjeVT5nM4dOa1apda66QdRxBc922yuUvI,5830
@@ -54,23 +54,23 @@ dataeval/metrics/estimators/_clusterer.py,sha256=1HrpihGTJ63IkNSOy4Ibw633Gllkm1R
 dataeval/metrics/estimators/_divergence.py,sha256=-np4nWNtRrHnvo4xdWuTzkyJJmobyjDnVDBOMjtBS1Y,4003
 dataeval/metrics/estimators/_uap.py,sha256=BULEBbJ9BQ1IcTeZf0x7iI60QHAWCccBOM97FIu9VXA,1928
 dataeval/metrics/stats/__init__.py,sha256=6tA_9nbbM5ObJ6cds8Y1VBtTQiTOxrpGQSFLu_lWGGA,1098
-dataeval/metrics/stats/_base.py,sha256=-TIDSHT-zwNXqWMTM6Nu3BQ11cWP4TFYFaUF40vIChs,12534
+dataeval/metrics/stats/_base.py,sha256=R-hxoEPLreZcxYxBfyjbKfdoGMMTPiqJ5g2zSO-1UYM,12541
 dataeval/metrics/stats/_boxratiostats.py,sha256=ROZrlqgbowkGfCR5PJ5TL7Og40iMOdUqJnsCtaz_Xek,6450
 dataeval/metrics/stats/_dimensionstats.py,sha256=EVO-BlxrZl8qrP09lwPbyWdrG1ZeDtgj4LiswDwEZ1I,2896
 dataeval/metrics/stats/_hashstats.py,sha256=qa1CYRgOebkxqkALfffaPM-kJ074ZbyfpWbfOfuObSs,4758
 dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSnlR_7guCg,3007
-dataeval/metrics/stats/_labelstats.py,sha256=bOLH4FEBN4JZ5njdRVjEK7GUb47XBMq9eqYUgXSLmCY,3071
-dataeval/metrics/stats/_pixelstats.py,sha256=5RCQh0OQkHiCkn3DgCPVxKoFfifX_FOtwsnotADSZ0I,3265
-dataeval/metrics/stats/_visualstats.py,sha256=0k6bvAL_d66nQMfG7bydCOFJb7B0dhgG7fqCjVTp1sg,3707
+dataeval/metrics/stats/_labelstats.py,sha256=_dXt3p8_-SHEtHvJWbL0rnQvO2g30zxX42mG2LGJepU,3195
+dataeval/metrics/stats/_pixelstats.py,sha256=N9e7RXuzSHtlJtWU7l5IcTTIXe2kOmWiuj6lnJpZWq0,3312
+dataeval/metrics/stats/_visualstats.py,sha256=b6jMq36_UlKduMrkwfq2i0fXNalDEcMdqPgoynXl5hI,3713
 dataeval/outputs/__init__.py,sha256=geHB5M3QOiFFaQGV4ZwDTTKpqZPvPePbqG7lzaPhaXQ,1741
 dataeval/outputs/_base.py,sha256=-Wa0gFcBVLbfWPMZyCql7x4vGsnkLP4pecsQIeUZ2_Y,5904
 dataeval/outputs/_bias.py,sha256=1OZpKncYTryjPLRHb4d6NlhE27uPT57gCob_5jtjKDI,10456
-dataeval/outputs/_drift.py,sha256=rKn5vqMR6XNujgSqfHsH76oFkoGsUusquZL2Qy4Ae6Y,4581
+dataeval/outputs/_drift.py,sha256=hXILED_soY8ppIQZgftQvmumtwDrTnABbYl-flIGEU4,4588
 dataeval/outputs/_estimators.py,sha256=mh-R08CgYtmq9ffANDMYR-V4vrZnSjOjEyOMiMDZ2Ic,3091
-dataeval/outputs/_linters.py,sha256=ZClITD4XY99TunS_9ABTl7eauppoUdpCZU1pCVvD0cI,6700
+dataeval/outputs/_linters.py,sha256=k8lkd8EZ23q0m-HOD-FgqMcLQFy1UH7vws2ucLPyn08,6697
 dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI,1710
 dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
-dataeval/outputs/_stats.py,sha256=KIghl-glm9A_h1eVQDKqdTQg8o2zedltWyX4NkCsv2U,15226
+dataeval/outputs/_stats.py,sha256=_ItGjs9YaMHqjivkR1YBcSErD5ICfa_-iV9nq0l8bTM,17451
 dataeval/outputs/_utils.py,sha256=NfhYaGT2PZlhIs8ICKUsPWHZXjhWYDkEJqBDdqMeaOM,929
 dataeval/outputs/_workflows.py,sha256=K786mOgegxVi81diUA-qpbwGEkwa8YA7Fk4ttgjJeaY,10831
 dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -83,16 +83,16 @@ dataeval/utils/_fast_mst.py,sha256=pv42flr1Uf5RBa9qDG0YLDXWH7Mr7a9zpauO1HqZXaY,8
 dataeval/utils/_image.py,sha256=4uxTIOYZZlRJOfNmdA3ek3no3FrLWCK5un48kStMDt8,3578
 dataeval/utils/_method.py,sha256=9B9JQbgqWJBRhQJb7glajUtWaQzUTIUuvrZ9_bisxsM,394
 dataeval/utils/_mst.py,sha256=bLmJmu_1Dtj3hC5gQp3oAiJ_7TKtEjahTqusVRRU4eI,2168
-dataeval/utils/_plot.py,sha256=3yn5UGL2xUeayPNws2bkvxm9ZCURsVUkpvSrwOqUE7g,7145
+dataeval/utils/_plot.py,sha256=1rnMkBRvTFLoTAHqXwF7c7GJ5_5iqlgarZKAzmYciLk,7225
 dataeval/utils/data/__init__.py,sha256=xGzrjrOxOP2DP1tU84AWMKPnSxFvSjM81CTlDg4rNM8,331
-dataeval/utils/data/_dataset.py,sha256=CFK9h-XPN7J-iF2nXol6keMDbGm6VIweFAMAjXRUlhg,9527
+dataeval/utils/data/_dataset.py,sha256=tC_vqgWnmojAoAANo5BUVfEUYXl7GzOBSeYjR9olbDk,9506
 dataeval/utils/data/collate.py,sha256=5egEEKhNNCGeNLChO1p6dZ4Wg6x51VEaMNHz7hEZUxI,3936
 dataeval/utils/data/metadata.py,sha256=L1c2bCiMj0aR0QCoKkjwBujIftJDEMgW_3ZbgeS8WHo,14703
 dataeval/utils/datasets/__init__.py,sha256=pAXqHX76yAoBI8XB3m6zGuW-u3s3PCoIXG5GDzxH7Zs,572
 dataeval/utils/datasets/_antiuav.py,sha256=kA_ia1fYNcJiz9SpCvh-Z8iSc7iJrdogjBI3soyaa7A,8304
 dataeval/utils/datasets/_base.py,sha256=pyfpJda3ku469M3TFRsJn9S2oAiQODOGTlLcdcoEW9U,9031
 dataeval/utils/datasets/_cifar10.py,sha256=hZc_A30yKYBbv2kvVdEkZ9egyEe6XBUnmksoIAoJ-5Y,8265
-dataeval/utils/datasets/_fileio.py,sha256=OASFA9uX3KgfyPb5vza12BlZyAi9Y8Al9lUR_IYPcsM,5449
+dataeval/utils/datasets/_fileio.py,sha256=LEoFVNdryRdi7mKpWw-9D8lA6XMa-Jaszd85bv93POo,5454
 dataeval/utils/datasets/_milco.py,sha256=iXf4C1I3Eg_3gHKUe4XPi21yFMBO51zxTIqAkGf9bYg,7869
 dataeval/utils/datasets/_mixin.py,sha256=S8iii-SoYUsFFYNXjw2thlZkpBvRLnZ4XI8wTqOKXgU,1729
 dataeval/utils/datasets/_mnist.py,sha256=uz46sE1Go3TgGjG6x2cXckSVQ0mSg2mhgk8BUvLWjb0,8149
@@ -102,12 +102,12 @@ dataeval/utils/datasets/_voc.py,sha256=pafY112O80isYkrdy7Quie9SBm_TmYhREuyl8Sxts
 dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
 dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
 dataeval/utils/torch/_gmm.py,sha256=XM68GNEP97EjaB1U49-ZXRb81d0CEFnPS910alrcB3g,3740
-dataeval/utils/torch/_internal.py,sha256=vHy-DzPhmvE8h3wmWc3aciBJ8nDGzQ1z1jTZgGjmDyM,4154
+dataeval/utils/torch/_internal.py,sha256=HuyBB7NWFI9sUrRbOCZFxOfZjRGPdqr5iF7_DT2S0wo,4159
 dataeval/utils/torch/models.py,sha256=1idpXyjrYcCBSsbxxRUOto8xr4MJNjDEqQHiIXVU5Zc,9700
 dataeval/utils/torch/trainer.py,sha256=Oc2lK13uPGhmLYbmAqlPWyKxgG4YJFlnSXCqFHUZbdA,5528
 dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
 dataeval/workflows/sufficiency.py,sha256=j-R8dg4XE6a66p_oTXG2GNzgg3vGk85CTblxhFXaxog,8513
-dataeval-0.86.3.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
-dataeval-0.86.3.dist-info/METADATA,sha256=1zOfOabm9w57nxAWZw5InEzmqyWRRko10btPqT0h64o,5353
-dataeval-0.86.3.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-dataeval-0.86.3.dist-info/RECORD,,
+dataeval-0.86.5.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
+dataeval-0.86.5.dist-info/METADATA,sha256=qx7aNDgzyAfRRKWjDXkfXojBdsBFnjMgwTVl0JsLbbw,5353
+dataeval-0.86.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+dataeval-0.86.5.dist-info/RECORD,,

{dataeval-0.86.3.dist-info → dataeval-0.86.5.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{dataeval-0.86.3.dist-info → dataeval-0.86.5.dist-info}/WHEEL RENAMED Viewed

File without changes

dataeval 0.86.3__py3-none-any.whl → 0.86.5__py3-none-any.whl

dataeval 0.86.3py3-none-any.whl → 0.86.5py3-none-any.whl