PyPI - dataeval - Versions diffs - 0.76.1__py3-none-any.whl → 0.81.0__py3-none-any.whl - Mend

dataeval 0.76.1py3-none-any.whl → 0.81.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

dataeval/__init__.py +3 -3
dataeval/{output.py → _output.py} +14 -0
dataeval/config.py +77 -0
dataeval/detectors/__init__.py +1 -1
dataeval/detectors/drift/__init__.py +6 -6
dataeval/detectors/drift/{base.py → _base.py} +41 -30
dataeval/detectors/drift/{cvm.py → _cvm.py} +21 -28
dataeval/detectors/drift/{ks.py → _ks.py} +20 -26
dataeval/detectors/drift/{mmd.py → _mmd.py} +33 -19
dataeval/detectors/drift/{torch.py → _torch.py} +2 -1
dataeval/detectors/drift/{uncertainty.py → _uncertainty.py} +23 -7
dataeval/detectors/drift/updates.py +1 -1
dataeval/detectors/linters/__init__.py +0 -3
dataeval/detectors/linters/duplicates.py +17 -8
dataeval/detectors/linters/outliers.py +23 -14
dataeval/detectors/ood/ae.py +29 -8
dataeval/detectors/ood/base.py +5 -4
dataeval/detectors/ood/metadata_ks_compare.py +1 -1
dataeval/detectors/ood/mixin.py +20 -5
dataeval/detectors/ood/output.py +1 -1
dataeval/detectors/ood/vae.py +73 -0
dataeval/metadata/__init__.py +5 -0
dataeval/metadata/_ood.py +238 -0
dataeval/metrics/__init__.py +1 -1
dataeval/metrics/bias/__init__.py +5 -4
dataeval/metrics/bias/{balance.py → _balance.py} +67 -17
dataeval/metrics/bias/{coverage.py → _coverage.py} +41 -35
dataeval/metrics/bias/{diversity.py → _diversity.py} +17 -12
dataeval/metrics/bias/{parity.py → _parity.py} +89 -61
dataeval/metrics/estimators/__init__.py +14 -4
dataeval/metrics/estimators/{ber.py → _ber.py} +42 -11
dataeval/metrics/estimators/_clusterer.py +104 -0
dataeval/metrics/estimators/{divergence.py → _divergence.py} +18 -13
dataeval/metrics/estimators/{uap.py → _uap.py} +4 -4
dataeval/metrics/stats/__init__.py +7 -7
dataeval/metrics/stats/{base.py → _base.py} +52 -16
dataeval/metrics/stats/{boxratiostats.py → _boxratiostats.py} +6 -9
dataeval/metrics/stats/{datasetstats.py → _datasetstats.py} +10 -14
dataeval/metrics/stats/{dimensionstats.py → _dimensionstats.py} +6 -5
dataeval/metrics/stats/{hashstats.py → _hashstats.py} +6 -6
dataeval/metrics/stats/{labelstats.py → _labelstats.py} +4 -4
dataeval/metrics/stats/{pixelstats.py → _pixelstats.py} +5 -4
dataeval/metrics/stats/{visualstats.py → _visualstats.py} +9 -8
dataeval/typing.py +54 -0
dataeval/utils/__init__.py +2 -2
dataeval/utils/_array.py +169 -0
dataeval/utils/_bin.py +199 -0
dataeval/utils/_clusterer.py +144 -0
dataeval/utils/_fast_mst.py +189 -0
dataeval/utils/{image.py → _image.py} +6 -4
dataeval/utils/_method.py +18 -0
dataeval/utils/{shared.py → _mst.py} +3 -65
dataeval/utils/{plot.py → _plot.py} +4 -4
dataeval/utils/data/__init__.py +22 -0
dataeval/utils/data/_embeddings.py +105 -0
dataeval/utils/data/_images.py +65 -0
dataeval/utils/data/_metadata.py +352 -0
dataeval/utils/data/_selection.py +119 -0
dataeval/utils/{dataset/split.py → data/_split.py} +13 -14
dataeval/utils/data/_targets.py +73 -0
dataeval/utils/data/_types.py +58 -0
dataeval/utils/data/collate.py +103 -0
dataeval/utils/data/datasets/__init__.py +17 -0
dataeval/utils/data/datasets/_base.py +254 -0
dataeval/utils/data/datasets/_cifar10.py +134 -0
dataeval/utils/data/datasets/_fileio.py +168 -0
dataeval/utils/data/datasets/_milco.py +153 -0
dataeval/utils/data/datasets/_mixin.py +56 -0
dataeval/utils/data/datasets/_mnist.py +183 -0
dataeval/utils/data/datasets/_ships.py +123 -0
dataeval/utils/data/datasets/_voc.py +352 -0
dataeval/utils/data/selections/__init__.py +15 -0
dataeval/utils/data/selections/_classfilter.py +60 -0
dataeval/utils/data/selections/_indices.py +26 -0
dataeval/utils/data/selections/_limit.py +26 -0
dataeval/utils/data/selections/_reverse.py +18 -0
dataeval/utils/data/selections/_shuffle.py +29 -0
dataeval/utils/metadata.py +51 -376
dataeval/utils/torch/{gmm.py → _gmm.py} +4 -2
dataeval/utils/torch/{internal.py → _internal.py} +21 -51
dataeval/utils/torch/models.py +43 -2
dataeval/workflows/sufficiency.py +10 -9
{dataeval-0.76.1.dist-info → dataeval-0.81.0.dist-info}/METADATA +4 -1
dataeval-0.81.0.dist-info/RECORD +94 -0
dataeval/detectors/linters/clusterer.py +0 -512
dataeval/detectors/linters/merged_stats.py +0 -49
dataeval/detectors/ood/metadata_least_likely.py +0 -119
dataeval/interop.py +0 -69
dataeval/utils/dataset/__init__.py +0 -7
dataeval/utils/dataset/datasets.py +0 -412
dataeval/utils/dataset/read.py +0 -63
dataeval-0.76.1.dist-info/RECORD +0 -67
/dataeval/{log.py → _log.py} +0 -0
/dataeval/utils/torch/{blocks.py → _blocks.py} +0 -0
{dataeval-0.76.1.dist-info → dataeval-0.81.0.dist-info}/LICENSE.txt +0 -0
{dataeval-0.76.1.dist-info → dataeval-0.81.0.dist-info}/WHEEL +0 -0

dataeval/metrics/estimators/__init__.py CHANGED Viewed

@@ -2,8 +2,18 @@
 Estimators calculate performance bounds and the statistical distance between datasets.
 """
-__all__ = ["ber", "divergence", "uap", "BEROutput", "DivergenceOutput", "UAPOutput"]
+__all__ = [
+    "ber",
+    "clusterer",
+    "divergence",
+    "uap",
+    "BEROutput",
+    "ClustererOutput",
+    "DivergenceOutput",
+    "UAPOutput",
+]
-from dataeval.metrics.estimators.ber import BEROutput, ber
-from dataeval.metrics.estimators.divergence import DivergenceOutput, divergence
-from dataeval.metrics.estimators.uap import UAPOutput, uap
+from dataeval.metrics.estimators._ber import BEROutput, ber
+from dataeval.metrics.estimators._clusterer import ClustererOutput, clusterer
+from dataeval.metrics.estimators._divergence import DivergenceOutput, divergence
+from dataeval.metrics.estimators._uap import UAPOutput, uap

dataeval/metrics/estimators/{ber.py → _ber.py} RENAMED Viewed

@@ -16,19 +16,21 @@ from dataclasses import dataclass
 from typing import Literal
 import numpy as np
-from numpy.typing import ArrayLike, NDArray
+from numpy.typing import NDArray
 from scipy.sparse import coo_matrix
 from scipy.stats import mode
-from dataeval.interop import as_numpy
-from dataeval.output import Output, set_metadata
-from dataeval.utils.shared import compute_neighbors, get_classes_counts, get_method, minimum_spanning_tree
+from dataeval._output import Output, set_metadata
+from dataeval.typing import ArrayLike
+from dataeval.utils._array import as_numpy, ensure_embeddings
+from dataeval.utils._method import get_method
+from dataeval.utils._mst import compute_neighbors, minimum_spanning_tree
 @dataclass(frozen=True)
 class BEROutput(Output):
     """
-    Output class for :func:`ber` estimator metric.
+    Output class for :func:`.ber` estimator metric.
     Attributes
     ----------
@@ -116,18 +118,21 @@ def knn_lowerbound(value: float, classes: int, k: int) -> float:
     return ((classes - 1) / classes) * (1 - np.sqrt(max(0, 1 - ((classes / (classes - 1)) * value))))
+_BER_FN_MAP = {"KNN": ber_knn, "MST": ber_mst}
 @set_metadata
-def ber(images: ArrayLike, labels: ArrayLike, k: int = 1, method: Literal["KNN", "MST"] = "KNN") -> BEROutput:
+def ber(embeddings: ArrayLike, labels: ArrayLike, k: int = 1, method: Literal["KNN", "MST"] = "KNN") -> BEROutput:
     """
     An estimator for Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` \
     using FR or KNN test statistic basis.
     Parameters
     ----------
-    images : ArrayLike (N, ... )
-        Array of images or image :term:`embeddings<Embeddings>`
+    embeddings : ArrayLike (N, ... )
+        Array of image :term:`embeddings<Embeddings>`
     labels : ArrayLike (N, 1)
-        Array of labels for each image or image embedding
+        Array of labels for each image
     k : int, default 1
         Number of nearest neighbors for KNN estimator -- ignored by MST estimator
     method : Literal["KNN", "MST"], default "KNN"
@@ -152,8 +157,34 @@ def ber(images: ArrayLike, labels: ArrayLike, k: int = 1, method: Literal["KNN",
     >>> ber(images, labels)
     BEROutput(ber=0.04, ber_lower=0.020416847668728033)
     """
-    ber_fn = get_method({"KNN": ber_knn, "MST": ber_mst}, method)
-    X = as_numpy(images)
+    ber_fn = get_method(_BER_FN_MAP, method)
+    X = ensure_embeddings(embeddings, dtype=np.float64)
     y = as_numpy(labels)
     upper, lower = ber_fn(X, y, k)
     return BEROutput(upper, lower)
+def get_classes_counts(labels: NDArray[np.int_]) -> tuple[int, int]:
+    """
+    Returns the classes and counts of from an array of labels
+    Parameters
+    ----------
+    label : NDArray
+        Numpy labels array
+    Returns
+    -------
+        Classes and counts
+    Raises
+    ------
+    ValueError
+        If the number of unique classes is less than 2
+    """
+    classes, counts = np.unique(labels, return_counts=True)
+    M = len(classes)
+    if M < 2:
+        raise ValueError("Label vector contains less than 2 classes!")
+    N = int(np.sum(counts))
+    return M, N

dataeval/metrics/estimators/_clusterer.py ADDED Viewed

@@ -0,0 +1,104 @@
+from __future__ import annotations
+__all__ = []
+from dataclasses import dataclass
+import numpy as np
+from numpy.typing import NDArray
+from dataeval._output import Output
+from dataeval.typing import ArrayLike
+from dataeval.utils._array import as_numpy
+@dataclass(frozen=True)
+class ClustererOutput(Output):
+    """
+    Output class for :func:`.clusterer`.
+    Attributes
+    ----------
+    clusters : NDArray[int]
+        Assigned clusters
+    mst : NDArray[int]
+        The minimum spanning tree of the data
+    linkage_tree : NDArray[float]
+        The linkage array of the data
+    condensed_tree : NDArray[float]
+        The condensed tree of the data
+    membership_strengths : NDArray[float]
+        The strength of the data point belonging to the assigned cluster
+    """
+    clusters: NDArray[np.int_]
+    mst: NDArray[np.double]
+    linkage_tree: NDArray[np.double]
+    condensed_tree: NDArray[np.double]
+    membership_strengths: NDArray[np.double]
+    def find_outliers(self) -> NDArray[np.int_]:
+        """
+        Retrieves Outliers based on when the sample was added to the cluster
+        and how far it was from the cluster when it was added
+        Returns
+        -------
+        NDArray[int]
+            A numpy array of the outlier indices
+        """
+        return np.nonzero(self.clusters == -1)[0]
+    def find_duplicates(self) -> tuple[list[list[int]], list[list[int]]]:
+        """
+        Finds duplicate and near duplicate data based on cluster average distance
+        Returns
+        -------
+        Tuple[List[List[int]], List[List[int]]]
+            The exact :term:`duplicates<Duplicates>` and near duplicates as lists of related indices
+        """
+        # Delay load numba compiled functions
+        from dataeval.utils._clusterer import compare_links_to_cluster_std, sorted_union_find
+        exact_indices, near_indices = compare_links_to_cluster_std(self.mst, self.clusters)
+        exact_dupes = sorted_union_find(exact_indices)
+        near_dupes = sorted_union_find(near_indices)
+        return [[int(ii) for ii in il] for il in exact_dupes], [[int(ii) for ii in il] for il in near_dupes]
+def clusterer(data: ArrayLike) -> ClustererOutput:
+    """
+    Uses hierarchical clustering on the flattened data and returns clustering
+    information.
+    Parameters
+    ----------
+    data : ArrayLike, shape - (N, ...)
+        A dataset in an ArrayLike format. Function expects the data to have 2
+        or more dimensions which will flatten to (N, P) where N number of
+        observations in a P-dimensional space.
+    Returns
+    -------
+    :class:`.ClustererOutput`
+    Note
+    ----
+    The clusterer works best when the length of the feature dimension, P, is
+    less than 500. If flattening a CxHxW image results in a dimension larger
+    than 500, then it is recommended to reduce the dimensions.
+    Example
+    -------
+    >>> clusterer(clusterer_images).clusters
+    array([ 2,  0,  0,  0,  0,  0,  4,  0,  3,  1,  1,  0,  2,  0,  0,  0,  0,
+            4,  2,  0,  0,  1,  2,  0,  1,  3,  0,  3,  3,  4,  0,  0,  3,  0,
+            3, -1,  0,  0,  2,  4,  3,  4,  0,  1,  0, -1,  3,  0,  0,  0])
+    """
+    # Delay load numba compiled functions
+    from dataeval.utils._clusterer import cluster
+    c = cluster(data)
+    return ClustererOutput(c.clusters, c.mst, c.linkage_tree, as_numpy(c.condensed_tree), c.membership_strengths)

dataeval/metrics/estimators/{divergence.py → _divergence.py} RENAMED Viewed

@@ -11,17 +11,19 @@ from dataclasses import dataclass
 from typing import Literal
 import numpy as np
-from numpy.typing import ArrayLike, NDArray
+from numpy.typing import NDArray
-from dataeval.interop import as_numpy
-from dataeval.output import Output, set_metadata
-from dataeval.utils.shared import compute_neighbors, get_method, minimum_spanning_tree
+from dataeval._output import Output, set_metadata
+from dataeval.typing import ArrayLike
+from dataeval.utils._array import ensure_embeddings
+from dataeval.utils._method import get_method
+from dataeval.utils._mst import compute_neighbors, minimum_spanning_tree
 @dataclass(frozen=True)
 class DivergenceOutput(Output):
     """
-    Output class for :func:`divergence` estimator metric.
+    Output class for :func:`.divergence` estimator metric.
     Attributes
     ----------
@@ -78,18 +80,21 @@ def divergence_fnn(data: NDArray[np.float64], labels: NDArray[np.int_]) -> int:
     return errors
+_DIVERGENCE_FN_MAP = {"FNN": divergence_fnn, "MST": divergence_mst}
 @set_metadata
-def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST"] = "FNN") -> DivergenceOutput:
+def divergence(emb_a: ArrayLike, emb_b: ArrayLike, method: Literal["FNN", "MST"] = "FNN") -> DivergenceOutput:
     """
     Calculates the :term:`divergence` and any errors between the datasets.
     Parameters
     ----------
-    data_a : ArrayLike, shape - (N, P)
-        A dataset in an ArrayLike format to compare.
+    emb_a : ArrayLike, shape - (N, P)
+        Image embeddings in an ArrayLike format to compare.
         Function expects the data to have 2 dimensions, N number of observations in a P-dimensionial space.
-    data_b : ArrayLike, shape - (N, P)
-        A dataset in an ArrayLike format to compare.
+    emb_b : ArrayLike, shape - (N, P)
+        Image embeddings in an ArrayLike format to compare.
         Function expects the data to have 2 dimensions, N number of observations in a P-dimensionial space.
     method : Literal["MST, "FNN"], default "FNN"
         Method used to estimate dataset :term:`divergence<Divergence>`
@@ -125,9 +130,9 @@ def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST
     >>> divergence(datasetA, datasetB)
     DivergenceOutput(divergence=0.28, errors=36)
     """
-    div_fn = get_method({"FNN": divergence_fnn, "MST": divergence_mst}, method)
-    a = as_numpy(data_a)
-    b = as_numpy(data_b)
+    div_fn = get_method(_DIVERGENCE_FN_MAP, method)
+    a = ensure_embeddings(emb_a, dtype=np.float64)
+    b = ensure_embeddings(emb_b, dtype=np.float64)
     N = a.shape[0]
     M = b.shape[0]

dataeval/metrics/estimators/{uap.py → _uap.py} RENAMED Viewed

@@ -10,17 +10,17 @@ __all__ = []
 from dataclasses import dataclass
-from numpy.typing import ArrayLike
 from sklearn.metrics import average_precision_score
-from dataeval.interop import as_numpy
-from dataeval.output import Output, set_metadata
+from dataeval._output import Output, set_metadata
+from dataeval.typing import ArrayLike
+from dataeval.utils._array import as_numpy
 @dataclass(frozen=True)
 class UAPOutput(Output):
     """
-    Output class for :func:`uap` estimator metric.
+    Output class for :func:`.uap` estimator metric.
     Attributes
     ----------

dataeval/metrics/stats/__init__.py CHANGED Viewed

@@ -21,15 +21,15 @@ __all__ = [
     "visualstats",
 ]
-from dataeval.metrics.stats.boxratiostats import boxratiostats
-from dataeval.metrics.stats.datasetstats import (
+from dataeval.metrics.stats._boxratiostats import boxratiostats
+from dataeval.metrics.stats._datasetstats import (
     ChannelStatsOutput,
     DatasetStatsOutput,
     channelstats,
     datasetstats,
 )
-from dataeval.metrics.stats.dimensionstats import DimensionStatsOutput, dimensionstats
-from dataeval.metrics.stats.hashstats import HashStatsOutput, hashstats
-from dataeval.metrics.stats.labelstats import LabelStatsOutput, labelstats
-from dataeval.metrics.stats.pixelstats import PixelStatsOutput, pixelstats
-from dataeval.metrics.stats.visualstats import VisualStatsOutput, visualstats
+from dataeval.metrics.stats._dimensionstats import DimensionStatsOutput, dimensionstats
+from dataeval.metrics.stats._hashstats import HashStatsOutput, hashstats
+from dataeval.metrics.stats._labelstats import LabelStatsOutput, labelstats
+from dataeval.metrics.stats._pixelstats import PixelStatsOutput, pixelstats
+from dataeval.metrics.stats._visualstats import VisualStatsOutput, visualstats

dataeval/metrics/stats/{base.py → _base.py} RENAMED Viewed

@@ -1,32 +1,31 @@
 from __future__ import annotations
-from dataeval.utils.plot import histogram_plot
 __all__ = []
 import re
 import warnings
+from copy import deepcopy
 from dataclasses import dataclass
 from functools import partial
 from itertools import repeat
 from multiprocessing import Pool
-from typing import Any, Callable, Generic, Iterable, NamedTuple, Optional, TypeVar, Union
+from typing import Any, Callable, Generic, Iterable, Optional, Sequence, Sized, TypeVar, Union
 import numpy as np
 import tqdm
-from numpy.typing import ArrayLike, NDArray
+from numpy.typing import NDArray
-from dataeval.interop import to_numpy_iter
-from dataeval.output import Output
-from dataeval.utils.image import normalize_image_shape, rescale
+from dataeval._output import Output
+from dataeval.config import get_max_processes
+from dataeval.typing import ArrayLike
+from dataeval.utils._array import to_numpy_iter
+from dataeval.utils._image import normalize_image_shape, rescale
+from dataeval.utils._plot import histogram_plot
 DTYPE_REGEX = re.compile(r"NDArray\[np\.(.*?)\]")
 SOURCE_INDEX = "source_index"
 BOX_COUNT = "box_count"
-# TODO: Replace with global config
-DEFAULT_PROCESSES: int | None = None
 OptionalRange = Optional[Union[int, Iterable[int]]]
@@ -49,7 +48,8 @@ def normalize_box_shape(bounding_box: NDArray[Any]) -> NDArray[Any]:
         return bounding_box
-class SourceIndex(NamedTuple):
+@dataclass
+class SourceIndex:
     """
     Attributes
     ----------
@@ -205,7 +205,8 @@ class StatsProcessor(Generic[TStatsOutput]):
         return cls.output_class(**output, source_index=source_index, box_count=np.asarray(box_count, dtype=np.uint16))
-class StatsProcessorOutput(NamedTuple):
+@dataclass
+class StatsProcessorOutput:
     results: list[dict[str, Any]]
     source_indices: list[SourceIndex]
     box_counts: list[int]
@@ -272,8 +273,6 @@ def run_stats(
         A flag which determines if the states should be evaluated on a per-channel basis or not.
     stats_processor_cls : Iterable[type[StatsProcessor]]
         An iterable of stats processor classes that calculate stats and return output classes.
-    processes : int | None, default None
-        Number of processes to use, defaults to None which uses all available CPU cores.
     Returns
     -------
@@ -297,11 +296,11 @@ def run_stats(
     bbox_iter = repeat(None) if bboxes is None else to_numpy_iter(bboxes)
     warning_list = []
-    total_for_status = getattr(images, "__len__")() if hasattr(images, "__len__") else None
+    total_for_status = len(images) if isinstance(images, Sized) else None
     stats_processor_cls = stats_processor_cls if isinstance(stats_processor_cls, Iterable) else [stats_processor_cls]
     # TODO: Introduce global controls for CPU job parallelism and GPU configurations
-    with Pool(processes=DEFAULT_PROCESSES) as p:
+    with Pool(processes=get_max_processes()) as p:
         for r in tqdm.tqdm(
             p.imap(
                 partial(process_stats_unpack, per_channel=per_channel, stats_processor_cls=stats_processor_cls),
@@ -330,3 +329,40 @@ def run_stats(
     outputs = [s.convert_output(output, source_index, box_count) for s in stats_processor_cls]
     return outputs
+def add_stats(a: TStatsOutput, b: TStatsOutput) -> TStatsOutput:
+    if type(a) is not type(b):
+        raise TypeError(f"Types {type(a)} and {type(b)} cannot be added.")
+    sum_dict = deepcopy(a.dict())
+    for k in sum_dict:
+        if isinstance(sum_dict[k], list):
+            sum_dict[k].extend(b.dict()[k])
+        else:
+            sum_dict[k] = np.concatenate((sum_dict[k], b.dict()[k]))
+    return type(a)(**sum_dict)
+def combine_stats(stats: Sequence[TStatsOutput]) -> tuple[TStatsOutput, list[int]]:
+    output = None
+    dataset_steps = []
+    cur_len = 0
+    for s in stats:
+        output = s if output is None else add_stats(output, s)
+        cur_len += len(s)
+        dataset_steps.append(cur_len)
+    if output is None:
+        raise TypeError("Cannot combine empty sequence of stats.")
+    return output, dataset_steps
+def get_dataset_step_from_idx(idx: int, dataset_steps: list[int]) -> tuple[int, int]:
+    last_step = 0
+    for i, step in enumerate(dataset_steps):
+        if idx < step:
+            return i, idx - last_step
+        last_step = step
+    return -1, idx

dataeval/metrics/stats/{boxratiostats.py → _boxratiostats.py} RENAMED Viewed

@@ -8,9 +8,9 @@ from typing import Any, Callable, Generic, TypeVar, cast
 import numpy as np
 from numpy.typing import NDArray
-from dataeval.metrics.stats.base import BOX_COUNT, SOURCE_INDEX, BaseStatsOutput
-from dataeval.metrics.stats.dimensionstats import DimensionStatsOutput
-from dataeval.output import set_metadata
+from dataeval._output import set_metadata
+from dataeval.metrics.stats._base import BOX_COUNT, SOURCE_INDEX, BaseStatsOutput
+from dataeval.metrics.stats._dimensionstats import DimensionStatsOutput
 TStatOutput = TypeVar("TStatOutput", bound=BaseStatsOutput, contravariant=True)
 ArraySlice = tuple[int, int]
@@ -50,7 +50,7 @@ RATIOSTATS_OVERRIDE_MAP: dict[type, dict[str, Callable[..., NDArray[Any]]]] = {
             "depth": lambda x: x.box["depth"],
             "distance": lambda x: x.box["distance"],
         }
-    )
+    ),
 }
@@ -87,11 +87,8 @@ def calculate_ratios(key: str, box_stats: BaseStatsOutput, img_stats: BaseStatsO
         stats = BoxImageStatsOutputSlice(box_stats, (box_i, box_j), img_stats, (img_i, img_j))
         out_type = type(box_stats)
         use_override = out_type in RATIOSTATS_OVERRIDE_MAP and key in RATIOSTATS_OVERRIDE_MAP[out_type]
-        ratio = (
-            RATIOSTATS_OVERRIDE_MAP[out_type][key](stats)
-            if use_override
-            else np.nan_to_num(stats.box[key] / stats.img[key])
-        )
+        with np.errstate(divide="ignore", invalid="ignore"):
+            ratio = RATIOSTATS_OVERRIDE_MAP[out_type][key](stats) if use_override else stats.box[key] / stats.img[key]
         out_stats[box_i:box_j] = ratio.reshape(-1, *out_stats[box_i].shape)
     return out_stats

dataeval/metrics/stats/{datasetstats.py → _datasetstats.py} RENAMED Viewed

@@ -5,24 +5,20 @@ __all__ = []
 from dataclasses import dataclass
 from typing import Any, Iterable
-from numpy.typing import ArrayLike
-from dataeval.metrics.stats.base import BaseStatsOutput, HistogramPlotMixin, _is_plottable, run_stats
-from dataeval.metrics.stats.dimensionstats import (
-    DimensionStatsOutput,
-    DimensionStatsProcessor,
-)
-from dataeval.metrics.stats.labelstats import LabelStatsOutput, labelstats
-from dataeval.metrics.stats.pixelstats import PixelStatsOutput, PixelStatsProcessor
-from dataeval.metrics.stats.visualstats import VisualStatsOutput, VisualStatsProcessor
-from dataeval.output import Output, set_metadata
-from dataeval.utils.plot import channel_histogram_plot
+from dataeval._output import Output, set_metadata
+from dataeval.metrics.stats._base import BaseStatsOutput, HistogramPlotMixin, _is_plottable, run_stats
+from dataeval.metrics.stats._dimensionstats import DimensionStatsOutput, DimensionStatsProcessor
+from dataeval.metrics.stats._labelstats import LabelStatsOutput, labelstats
+from dataeval.metrics.stats._pixelstats import PixelStatsOutput, PixelStatsProcessor
+from dataeval.metrics.stats._visualstats import VisualStatsOutput, VisualStatsProcessor
+from dataeval.typing import ArrayLike
+from dataeval.utils._plot import channel_histogram_plot
 @dataclass(frozen=True)
 class DatasetStatsOutput(Output, HistogramPlotMixin):
     """
-    Output class for :func:`datasetstats` stats metric.
+    Output class for :func:`.datasetstats` stats metric.
     This class represents the outputs of various stats functions against a single
     dataset, such that each index across all stat outputs are representative of
@@ -82,7 +78,7 @@ def _get_channels(cls, channel_limit: int | None = None, channel_index: int | It
 @dataclass(frozen=True)
 class ChannelStatsOutput(Output):
     """
-    Output class for :func:`channelstats` stats metric.
+    Output class for :func:`.channelstats` stats metric.
     This class represents the outputs of various per-channel stats functions against
     a single dataset, such that each index across all stat outputs are representative

dataeval/metrics/stats/{dimensionstats.py → _dimensionstats.py} RENAMED Viewed

@@ -6,17 +6,18 @@ from dataclasses import dataclass
 from typing import Any, Callable, Iterable
 import numpy as np
-from numpy.typing import ArrayLike, NDArray
+from numpy.typing import NDArray
-from dataeval.metrics.stats.base import BaseStatsOutput, HistogramPlotMixin, StatsProcessor, run_stats
-from dataeval.output import set_metadata
-from dataeval.utils.image import get_bitdepth
+from dataeval._output import set_metadata
+from dataeval.metrics.stats._base import BaseStatsOutput, HistogramPlotMixin, StatsProcessor, run_stats
+from dataeval.typing import ArrayLike
+from dataeval.utils._image import get_bitdepth
 @dataclass(frozen=True)
 class DimensionStatsOutput(BaseStatsOutput, HistogramPlotMixin):
     """
-    Output class for :func:`dimensionstats` stats metric.
+    Output class for :func:`.dimensionstats` stats metric.
     Attributes
     ----------

dataeval/metrics/stats/{hashstats.py → _hashstats.py} RENAMED Viewed

@@ -9,14 +9,14 @@ from typing import Callable, Iterable
 import numpy as np
 import xxhash as xxh
-from numpy.typing import ArrayLike
 from PIL import Image
 from scipy.fftpack import dct
-from dataeval.interop import as_numpy
-from dataeval.metrics.stats.base import BaseStatsOutput, StatsProcessor, run_stats
-from dataeval.output import set_metadata
-from dataeval.utils.image import normalize_image_shape, rescale
+from dataeval._output import set_metadata
+from dataeval.metrics.stats._base import BaseStatsOutput, StatsProcessor, run_stats
+from dataeval.typing import ArrayLike
+from dataeval.utils._array import as_numpy
+from dataeval.utils._image import normalize_image_shape, rescale
 HASH_SIZE = 8
 MAX_FACTOR = 4
@@ -25,7 +25,7 @@ MAX_FACTOR = 4
 @dataclass(frozen=True)
 class HashStatsOutput(BaseStatsOutput):
     """
-    Output class for :func:`hashstats` stats metric.
+    Output class for :func:`.hashstats` stats metric.
     Attributes
     ----------

dataeval/metrics/stats/{labelstats.py → _labelstats.py} RENAMED Viewed

@@ -8,10 +8,10 @@ from dataclasses import dataclass
 from typing import Any, Iterable, Mapping, TypeVar
 import numpy as np
-from numpy.typing import ArrayLike
-from dataeval.interop import as_numpy
-from dataeval.output import Output, set_metadata
+from dataeval._output import Output, set_metadata
+from dataeval.typing import ArrayLike
+from dataeval.utils._array import as_numpy
 with contextlib.suppress(ImportError):
     import pandas as pd
@@ -20,7 +20,7 @@ with contextlib.suppress(ImportError):
 @dataclass(frozen=True)
 class LabelStatsOutput(Output):
     """
-    Output class for :func:`labelstats` stats metric.
+    Output class for :func:`.labelstats` stats metric.
     Attributes
     ----------

dataeval/metrics/stats/{pixelstats.py → _pixelstats.py} RENAMED Viewed

@@ -6,17 +6,18 @@ from dataclasses import dataclass
 from typing import Any, Callable, Iterable
 import numpy as np
-from numpy.typing import ArrayLike, NDArray
+from numpy.typing import NDArray
 from scipy.stats import entropy, kurtosis, skew
-from dataeval.metrics.stats.base import BaseStatsOutput, HistogramPlotMixin, StatsProcessor, run_stats
-from dataeval.output import set_metadata
+from dataeval._output import set_metadata
+from dataeval.metrics.stats._base import BaseStatsOutput, HistogramPlotMixin, StatsProcessor, run_stats
+from dataeval.typing import ArrayLike
 @dataclass(frozen=True)
 class PixelStatsOutput(BaseStatsOutput, HistogramPlotMixin):
     """
-    Output class for :func:`pixelstats` stats metric.
+    Output class for :func:`.pixelstats` stats metric.
     Attributes
     ----------

dataeval 0.76.1__py3-none-any.whl → 0.81.0__py3-none-any.whl

dataeval 0.76.1py3-none-any.whl → 0.81.0py3-none-any.whl