PyPI - dataeval - Versions diffs - 0.72.0__py3-none-any.whl → 0.72.2__py3-none-any.whl - Mend

dataeval 0.72.0py3-none-any.whl → 0.72.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

dataeval/__init__.py +4 -4
dataeval/detectors/__init__.py +4 -3
dataeval/detectors/drift/__init__.py +10 -11
dataeval/{_internal/detectors → detectors}/drift/base.py +51 -102
dataeval/{_internal/detectors → detectors}/drift/cvm.py +9 -8
dataeval/{_internal/detectors → detectors}/drift/ks.py +11 -10
dataeval/{_internal/detectors → detectors}/drift/mmd.py +33 -34
dataeval/{_internal/detectors → detectors}/drift/torch.py +15 -13
dataeval/{_internal/detectors → detectors}/drift/uncertainty.py +12 -9
dataeval/detectors/drift/updates.py +61 -0
dataeval/detectors/linters/__init__.py +3 -3
dataeval/{_internal/detectors → detectors/linters}/clusterer.py +47 -45
dataeval/{_internal/detectors → detectors/linters}/duplicates.py +20 -10
dataeval/{_internal/detectors → detectors/linters}/merged_stats.py +3 -1
dataeval/{_internal/detectors → detectors/linters}/outliers.py +19 -26
dataeval/detectors/ood/__init__.py +8 -16
dataeval/{_internal/detectors → detectors}/ood/ae.py +9 -9
dataeval/{_internal/detectors → detectors}/ood/aegmm.py +10 -30
dataeval/{_internal/detectors → detectors}/ood/base.py +27 -21
dataeval/{_internal/detectors → detectors}/ood/llr.py +27 -23
dataeval/detectors/ood/metadata_ks_compare.py +99 -0
dataeval/detectors/ood/metadata_least_likely.py +119 -0
dataeval/detectors/ood/metadata_ood_mi.py +92 -0
dataeval/{_internal/detectors → detectors}/ood/vae.py +11 -13
dataeval/{_internal/detectors → detectors}/ood/vaegmm.py +10 -32
dataeval/{_internal/interop.py → interop.py} +12 -7
dataeval/metrics/__init__.py +1 -1
dataeval/metrics/bias/__init__.py +4 -4
dataeval/{_internal/metrics → metrics/bias}/balance.py +70 -4
dataeval/{_internal/metrics → metrics/bias}/coverage.py +10 -8
dataeval/{_internal/metrics → metrics/bias}/diversity.py +54 -20
dataeval/metrics/bias/metadata.py +275 -0
dataeval/{_internal/metrics → metrics/bias}/parity.py +21 -17
dataeval/metrics/estimators/__init__.py +3 -3
dataeval/{_internal/metrics → metrics/estimators}/ber.py +31 -28
dataeval/{_internal/metrics → metrics/estimators}/divergence.py +15 -16
dataeval/{_internal/metrics → metrics/estimators}/uap.py +8 -6
dataeval/metrics/stats/__init__.py +7 -7
dataeval/{_internal/metrics → metrics}/stats/base.py +66 -40
dataeval/{_internal/metrics → metrics}/stats/boxratiostats.py +19 -15
dataeval/{_internal/metrics → metrics}/stats/datasetstats.py +19 -17
dataeval/{_internal/metrics → metrics}/stats/dimensionstats.py +12 -10
dataeval/metrics/stats/hashstats.py +156 -0
dataeval/{_internal/metrics → metrics}/stats/labelstats.py +8 -6
dataeval/{_internal/metrics → metrics}/stats/pixelstats.py +12 -11
dataeval/{_internal/metrics → metrics}/stats/visualstats.py +14 -13
dataeval/{_internal/output.py → output.py} +26 -6
dataeval/utils/__init__.py +8 -4
dataeval/utils/image.py +71 -0
dataeval/utils/shared.py +151 -0
dataeval/utils/split_dataset.py +486 -0
dataeval/utils/tensorflow/__init__.py +9 -7
dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/autoencoder.py +64 -68
dataeval/{_internal/models/tensorflow/losses.py → utils/tensorflow/_internal/loss.py} +10 -9
dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/pixelcnn.py +18 -22
dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/trainer.py +3 -1
dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/utils.py +18 -18
dataeval/utils/tensorflow/loss/__init__.py +6 -2
dataeval/utils/torch/__init__.py +7 -3
dataeval/{_internal/models/pytorch → utils/torch}/blocks.py +19 -14
dataeval/{_internal → utils/torch}/datasets.py +49 -43
dataeval/utils/torch/models.py +138 -0
dataeval/{_internal/models/pytorch/autoencoder.py → utils/torch/trainer.py} +12 -141
dataeval/{_internal → utils/torch}/utils.py +3 -1
dataeval/workflows/__init__.py +1 -1
dataeval/{_internal/workflows → workflows}/sufficiency.py +42 -37
{dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/METADATA +7 -5
dataeval-0.72.2.dist-info/RECORD +72 -0
dataeval/_internal/detectors/__init__.py +0 -0
dataeval/_internal/detectors/drift/__init__.py +0 -0
dataeval/_internal/detectors/ood/__init__.py +0 -0
dataeval/_internal/metrics/__init__.py +0 -0
dataeval/_internal/metrics/stats/hashstats.py +0 -75
dataeval/_internal/metrics/utils.py +0 -447
dataeval/_internal/models/__init__.py +0 -0
dataeval/_internal/models/pytorch/__init__.py +0 -0
dataeval/_internal/models/pytorch/utils.py +0 -67
dataeval/_internal/models/tensorflow/__init__.py +0 -0
dataeval/_internal/workflows/__init__.py +0 -0
dataeval/detectors/drift/kernels/__init__.py +0 -10
dataeval/detectors/drift/updates/__init__.py +0 -7
dataeval/utils/tensorflow/models/__init__.py +0 -9
dataeval/utils/tensorflow/recon/__init__.py +0 -3
dataeval/utils/torch/datasets/__init__.py +0 -12
dataeval/utils/torch/models/__init__.py +0 -11
dataeval/utils/torch/trainer/__init__.py +0 -7
dataeval-0.72.0.dist-info/RECORD +0 -80
/dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/gmm.py +0 -0
{dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/LICENSE.txt +0 -0
{dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/WHEEL +0 -0

dataeval/{_internal/metrics → metrics/estimators}/ber.py RENAMED Viewed

@@ -1,7 +1,7 @@
 """
 This module contains the implementation of the
 FR Test Statistic based estimate and the
-KNN based estimate for the Bayes Error Rate
+KNN based estimate for the :term:`Bayes error rate<Bayes Error Rate (BER)>`
 Learning to Bound the Multi-class Bayes Error (Th. 3 and Th. 4)
 https://arxiv.org/abs/1811.06419
@@ -9,6 +9,8 @@ https://arxiv.org/abs/1811.06419
 from __future__ import annotations
+__all__ = ["BEROutput", "ber"]
 from dataclasses import dataclass
 from typing import Literal
@@ -17,9 +19,9 @@ from numpy.typing import ArrayLike, NDArray
 from scipy.sparse import coo_matrix
 from scipy.stats import mode
-from dataeval._internal.interop import as_numpy
-from dataeval._internal.metrics.utils import compute_neighbors, get_classes_counts, get_method, minimum_spanning_tree
-from dataeval._internal.output import OutputMetadata, set_metadata
+from dataeval.interop import as_numpy
+from dataeval.output import OutputMetadata, set_metadata
+from dataeval.utils.shared import compute_neighbors, get_classes_counts, get_method, minimum_spanning_tree
 @dataclass(frozen=True)
@@ -30,7 +32,7 @@ class BEROutput(OutputMetadata):
     Attributes
     ----------
     ber : float
-        The upper bounds of the Bayes Error Rate
+        The upper bounds of the :term:`Bayes error rate<Bayes Error Rate (BER)>`
     ber_lower : float
         The lower bounds of the Bayes Error Rate
     """
@@ -39,51 +41,55 @@ class BEROutput(OutputMetadata):
     ber_lower: float
-def ber_mst(X: NDArray, y: NDArray) -> tuple[float, float]:
-    """Calculates the Bayes Error Rate using a minimum spanning tree
+def ber_mst(images: NDArray[np.float64], labels: NDArray[np.int_], k: int = 1) -> tuple[float, float]:
+    """Calculates the :term:`Bayes error rate<Bayes Error Rate (BER)>` using a minimum spanning tree
     Parameters
     ----------
-    X : NDArray, shape - (N, ... )
+    images : NDArray, shape - (N, ... )
         n_samples containing n_features
-    y : NDArray, shape - (N, 1)
+    labels : NDArray, shape - (N, 1)
         Labels corresponding to each sample
+    k : int
+        Unused
     Returns
     -------
     Tuple[float, float]
         The upper and lower bounds of the bayes error rate
     """
-    M, N = get_classes_counts(y)
+    M, N = get_classes_counts(labels)
-    tree = coo_matrix(minimum_spanning_tree(X))
-    matches = np.sum([y[tree.row[i]] != y[tree.col[i]] for i in range(N - 1)])
+    tree = coo_matrix(minimum_spanning_tree(images))
+    matches = np.sum([labels[tree.row[i]] != labels[tree.col[i]] for i in range(N - 1)])
     deltas = matches / (2 * N)
     upper = 2 * deltas
     lower = ((M - 1) / (M)) * (1 - max(1 - 2 * ((M) / (M - 1)) * deltas, 0) ** 0.5)
     return upper, lower
-def ber_knn(X: NDArray, y: NDArray, k: int) -> tuple[float, float]:
-    """Calculates the Bayes Error Rate using K-nearest neighbors
+def ber_knn(images: NDArray[np.float64], labels: NDArray[np.int_], k: int) -> tuple[float, float]:
+    """Calculates the :term:`Bayes error rate<Bayes Error Rate (BER)>` using K-nearest neighbors
     Parameters
     ----------
-    X : NDArray, shape - (N, ... )
+    images : NDArray, shape - (N, ... )
         n_samples containing n_features
-    y : NDArray, shape - (N, 1)
+    labels : NDArray, shape - (N, 1)
         Labels corresponding to each sample
+    k : int
+        The number of neighbors to find
     Returns
     -------
     Tuple[float, float]
         The upper and lower bounds of the bayes error rate
     """
-    M, N = get_classes_counts(y)
-    nn_indices = compute_neighbors(X, X, k=k)
+    M, N = get_classes_counts(labels)
+    nn_indices = compute_neighbors(images, images, k=k)
     nn_indices = np.expand_dims(nn_indices, axis=1) if nn_indices.ndim == 1 else nn_indices
-    modal_class = mode(y[nn_indices], axis=1, keepdims=True).mode.squeeze()
-    upper = float(np.count_nonzero(modal_class - y) / N)
+    modal_class = mode(labels[nn_indices], axis=1, keepdims=True).mode.squeeze()
+    upper = float(np.count_nonzero(modal_class - labels) / N)
     lower = knn_lowerbound(upper, M, k)
     return upper, lower
@@ -108,18 +114,15 @@ def knn_lowerbound(value: float, classes: int, k: int) -> float:
     return ((classes - 1) / classes) * (1 - np.sqrt(max(0, 1 - ((classes / (classes - 1)) * value))))
-BER_FN_MAP = {"KNN": ber_knn, "MST": ber_mst}
-@set_metadata("dataeval.metrics")
+@set_metadata()
 def ber(images: ArrayLike, labels: ArrayLike, k: int = 1, method: Literal["KNN", "MST"] = "KNN") -> BEROutput:
     """
-    An estimator for Multi-class Bayes Error Rate using FR or KNN test statistic basis
+    An estimator for Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` using FR or KNN test statistic basis
     Parameters
     ----------
     images : ArrayLike (N, ... )
-        Array of images or image embeddings
+        Array of images or image :term:`embeddings<Embeddings>`
     labels : ArrayLike (N, 1)
         Array of labels for each image or image embedding
     k : int, default 1
@@ -146,8 +149,8 @@ def ber(images: ArrayLike, labels: ArrayLike, k: int = 1, method: Literal["KNN",
     >>> ber(images, labels)
     BEROutput(ber=0.04, ber_lower=0.020416847668728033)
     """
-    ber_fn = get_method(BER_FN_MAP, method)
+    ber_fn = get_method({"KNN": ber_knn, "MST": ber_mst}, method)
     X = as_numpy(images)
     y = as_numpy(labels)
-    upper, lower = ber_fn(X, y, k) if method == "KNN" else ber_fn(X, y)
+    upper, lower = ber_fn(X, y, k)
     return BEROutput(upper, lower)

dataeval/{_internal/metrics → metrics/estimators}/divergence.py RENAMED Viewed

@@ -1,19 +1,21 @@
 """
-This module contains the implementation of HP Divergence
+This module contains the implementation of HP :term:`divergence<Divergence>`
 using the Fast Nearest Neighbor and Minimum Spanning Tree algorithms
 """
 from __future__ import annotations
+__all__ = ["DivergenceOutput", "divergence"]
 from dataclasses import dataclass
 from typing import Literal
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
-from dataeval._internal.interop import as_numpy
-from dataeval._internal.metrics.utils import compute_neighbors, get_method, minimum_spanning_tree
-from dataeval._internal.output import OutputMetadata, set_metadata
+from dataeval.interop import as_numpy
+from dataeval.output import OutputMetadata, set_metadata
+from dataeval.utils.shared import compute_neighbors, get_method, minimum_spanning_tree
 @dataclass(frozen=True)
@@ -24,7 +26,7 @@ class DivergenceOutput(OutputMetadata):
     Attributes
     ----------
     divergence : float
-        Divergence value calculated between 2 datasets ranging between 0.0 and 1.0
+        :term:`Divergence` value calculated between 2 datasets ranging between 0.0 and 1.0
     errors : int
         The number of differing edges between the datasets
     """
@@ -33,7 +35,7 @@ class DivergenceOutput(OutputMetadata):
     errors: int
-def divergence_mst(data: NDArray, labels: NDArray) -> int:
+def divergence_mst(data: NDArray[np.float64], labels: NDArray[np.int_]) -> int:
     """
     Calculates the estimated label errors based on the minimum spanning tree
@@ -55,7 +57,7 @@ def divergence_mst(data: NDArray, labels: NDArray) -> int:
     return errors
-def divergence_fnn(data: NDArray, labels: NDArray) -> int:
+def divergence_fnn(data: NDArray[np.float64], labels: NDArray[np.int_]) -> int:
     """
     Calculates the estimated label errors based on their nearest neighbors
@@ -76,13 +78,10 @@ def divergence_fnn(data: NDArray, labels: NDArray) -> int:
     return errors
-DIVERGENCE_FN_MAP = {"FNN": divergence_fnn, "MST": divergence_mst}
-@set_metadata("dataeval.metrics")
+@set_metadata()
 def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST"] = "FNN") -> DivergenceOutput:
     """
-    Calculates the divergence and any errors between the datasets
+    Calculates the :term`divergence` and any errors between the datasets
     Parameters
     ----------
@@ -93,7 +92,7 @@ def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST
         A dataset in an ArrayLike format to compare.
         Function expects the data to have 2 dimensions, N number of observations in a P-dimensionial space.
     method : Literal["MST, "FNN"], default "FNN"
-        Method used to estimate dataset divergence
+        Method used to estimate dataset :term:`divergence<Divergence>`
     Returns
     -------
@@ -124,16 +123,16 @@ def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST
     Evaluate the datasets:
     >>> divergence(datasetA, datasetB)
-    DivergenceOutput(divergence=0.28, errors=36.0)
+    DivergenceOutput(divergence=0.28, errors=36)
     """
-    div_fn = get_method(DIVERGENCE_FN_MAP, method)
+    div_fn = get_method({"FNN": divergence_fnn, "MST": divergence_mst}, method)
     a = as_numpy(data_a)
     b = as_numpy(data_b)
     N = a.shape[0]
     M = b.shape[0]
     stacked_data = np.vstack((a, b))
-    labels = np.vstack([np.zeros([N, 1]), np.ones([M, 1])])
+    labels = np.vstack([np.zeros([N, 1], dtype=np.int_), np.ones([M, 1], dtype=np.int_)])
     errors = div_fn(stacked_data, labels)
     dp = max(0.0, 1 - ((M + N) / (2 * M * N)) * errors)

dataeval/{_internal/metrics → metrics/estimators}/uap.py RENAMED Viewed

@@ -1,18 +1,20 @@
 """
 This module contains the implementation of the
-FR Test Statistic based estimate for the upperbound
-average precision using empirical mean precision
+FR Test Statistic based estimate for the :term:`upper-bound
+average precision<Upper-Bound Average Precision (UAP)>` using empirical mean precision
 """
 from __future__ import annotations
+__all__ = ["UAPOutput", "uap"]
 from dataclasses import dataclass
 from numpy.typing import ArrayLike
 from sklearn.metrics import average_precision_score
-from dataeval._internal.interop import as_numpy
-from dataeval._internal.output import OutputMetadata, set_metadata
+from dataeval.interop import as_numpy
+from dataeval.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
@@ -29,7 +31,7 @@ class UAPOutput(OutputMetadata):
     uap: float
-@set_metadata("dataeval.metrics")
+@set_metadata()
 def uap(labels: ArrayLike, scores: ArrayLike) -> UAPOutput:
     """
     FR Test Statistic based estimate of the empirical mean precision for
@@ -38,7 +40,7 @@ def uap(labels: ArrayLike, scores: ArrayLike) -> UAPOutput:
     Parameters
     ----------
     labels : ArrayLike
-        A numpy array of n_samples of class labels with M unique classes.
+        A term:`NumPy` array of n_samples of class labels with M unique classes.
     scores : ArrayLike
         A 2D array of class probabilities per image

dataeval/metrics/stats/__init__.py CHANGED Viewed

@@ -3,18 +3,18 @@ Statistics metrics calculate a variety of image properties and pixel statistics
 and label statistics against the images and labels of a dataset.
 """
-from dataeval._internal.metrics.stats.boxratiostats import boxratiostats
-from dataeval._internal.metrics.stats.datasetstats import (
+from dataeval.metrics.stats.boxratiostats import boxratiostats
+from dataeval.metrics.stats.datasetstats import (
     ChannelStatsOutput,
     DatasetStatsOutput,
     channelstats,
     datasetstats,
 )
-from dataeval._internal.metrics.stats.dimensionstats import DimensionStatsOutput, dimensionstats
-from dataeval._internal.metrics.stats.hashstats import HashStatsOutput, hashstats
-from dataeval._internal.metrics.stats.labelstats import LabelStatsOutput, labelstats
-from dataeval._internal.metrics.stats.pixelstats import PixelStatsOutput, pixelstats
-from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput, visualstats
+from dataeval.metrics.stats.dimensionstats import DimensionStatsOutput, dimensionstats
+from dataeval.metrics.stats.hashstats import HashStatsOutput, hashstats
+from dataeval.metrics.stats.labelstats import LabelStatsOutput, labelstats
+from dataeval.metrics.stats.pixelstats import PixelStatsOutput, pixelstats
+from dataeval.metrics.stats.visualstats import VisualStatsOutput, visualstats
 __all__ = [
     "boxratiostats",

dataeval/{_internal/metrics → metrics}/stats/base.py RENAMED Viewed

@@ -1,5 +1,7 @@
 from __future__ import annotations
+__all__ = []
 import re
 import warnings
 from dataclasses import dataclass
@@ -12,14 +14,17 @@ import numpy as np
 import tqdm
 from numpy.typing import ArrayLike, NDArray
-from dataeval._internal.interop import to_numpy_iter
-from dataeval._internal.metrics.utils import normalize_box_shape, normalize_image_shape, rescale
-from dataeval._internal.output import OutputMetadata
+from dataeval.interop import to_numpy_iter
+from dataeval.output import OutputMetadata
+from dataeval.utils.image import normalize_image_shape, rescale
 DTYPE_REGEX = re.compile(r"NDArray\[np\.(.*?)\]")
 SOURCE_INDEX = "source_index"
 BOX_COUNT = "box_count"
+# TODO: Replace with global config
+DEFAULT_PROCESSES: int | None = None
 OptionalRange = Optional[Union[int, Iterable[int]]]
@@ -29,6 +34,19 @@ def matches(index: int | None, opt_range: OptionalRange) -> bool:
     return index in opt_range if isinstance(opt_range, Iterable) else index == opt_range
+def normalize_box_shape(bounding_box: NDArray[Any]) -> NDArray[Any]:
+    """
+    Normalizes the bounding box shape into (N,4).
+    """
+    ndim = bounding_box.ndim
+    if ndim == 1:
+        return np.expand_dims(bounding_box, axis=0)
+    elif ndim > 2:
+        raise ValueError("Bounding boxes must have 2 dimensions: (# of boxes in an image, [X,Y,W,H]) -> (N,4)")
+    else:
+        return bounding_box
 class SourceIndex(NamedTuple):
     """
     Attributes
@@ -101,39 +119,39 @@ TStatsOutput = TypeVar("TStatsOutput", bound=BaseStatsOutput, covariant=True)
 class StatsProcessor(Generic[TStatsOutput]):
     output_class: type[TStatsOutput]
     cache_keys: list[str] = []
-    image_function_map: dict[str, Callable[[StatsProcessor], Any]] = {}
-    channel_function_map: dict[str, Callable[[StatsProcessor], Any]] = {}
+    image_function_map: dict[str, Callable[[StatsProcessor[TStatsOutput]], Any]] = {}
+    channel_function_map: dict[str, Callable[[StatsProcessor[TStatsOutput]], Any]] = {}
-    def __init__(self, image: NDArray, box: NDArray | None, per_channel: bool):
+    def __init__(self, image: NDArray[Any], box: NDArray[Any] | None, per_channel: bool) -> None:
         self.raw = image
-        self.width = image.shape[-1]
-        self.height = image.shape[-2]
-        self.box = np.array([0, 0, self.width, self.height]) if box is None else box
-        self.per_channel = per_channel
+        self.width: int = image.shape[-1]
+        self.height: int = image.shape[-2]
+        self.box: NDArray[Any] = np.array([0, 0, self.width, self.height]) if box is None else box
+        self._per_channel = per_channel
         self._image = None
         self._shape = None
         self._scaled = None
-        self.cache = {}
-        self.fn_map = self.channel_function_map if per_channel else self.image_function_map
-        self.is_valid_slice = box is None or bool(
+        self._cache = {}
+        self._fn_map = self.channel_function_map if per_channel else self.image_function_map
+        self._is_valid_slice = box is None or bool(
             box[0] >= 0 and box[1] >= 0 and box[2] <= image.shape[-1] and box[3] <= image.shape[-2]
         )
-    def get(self, fn_key: str) -> NDArray:
+    def get(self, fn_key: str) -> NDArray[Any]:
         if fn_key in self.cache_keys:
-            if fn_key not in self.cache:
-                self.cache[fn_key] = self.fn_map[fn_key](self)
-            return self.cache[fn_key]
+            if fn_key not in self._cache:
+                self._cache[fn_key] = self._fn_map[fn_key](self)
+            return self._cache[fn_key]
         else:
-            return self.fn_map[fn_key](self)
+            return self._fn_map[fn_key](self)
-    def process(self) -> dict:
-        return {k: self.fn_map[k](self) for k in self.fn_map}
+    def process(self) -> dict[str, Any]:
+        return {k: self._fn_map[k](self) for k in self._fn_map}
     @property
-    def image(self) -> NDArray:
+    def image(self) -> NDArray[Any]:
         if self._image is None:
-            if self.is_valid_slice:
+            if self._is_valid_slice:
                 norm = normalize_image_shape(self.raw)
                 self._image = norm[:, self.box[1] : self.box[3], self.box[0] : self.box[2]]
             else:
@@ -141,16 +159,16 @@ class StatsProcessor(Generic[TStatsOutput]):
         return self._image
     @property
-    def shape(self) -> tuple:
+    def shape(self) -> tuple[int, ...]:
         if self._shape is None:
             self._shape = self.image.shape
         return self._shape
     @property
-    def scaled(self) -> NDArray:
+    def scaled(self) -> NDArray[Any]:
         if self._scaled is None:
             self._scaled = rescale(self.image)
-            if self.per_channel:
+            if self._per_channel:
                 self._scaled = self._scaled.reshape(self.image.shape[0], -1)
         return self._scaled
@@ -175,25 +193,25 @@ class StatsProcessorOutput(NamedTuple):
     results: list[dict[str, Any]]
     source_indices: list[SourceIndex]
     box_counts: list[int]
-    warnings_list: list[tuple[int, int, NDArray, tuple[int, ...]]]
+    warnings_list: list[tuple[int, int, NDArray[np.float64], tuple[int, ...]]]
 def process_stats(
     i: int,
-    image_boxes: tuple[NDArray, NDArray | None],
+    image_boxes: tuple[NDArray[Any], NDArray[Any] | None],
     per_channel: bool,
-    stats_processor_cls: Iterable[type[StatsProcessor]],
+    stats_processor_cls: Iterable[type[StatsProcessor[TStatsOutput]]],
 ) -> StatsProcessorOutput:
     image, boxes = image_boxes
     results_list: list[dict[str, Any]] = []
     source_indices: list[SourceIndex] = []
     box_counts: list[int] = []
-    warnings_list: list[tuple[int, int, NDArray, tuple[int, ...]]] = []
+    warnings_list: list[tuple[int, int, NDArray[np.float64], tuple[int, ...]]] = []
     nboxes = [None] if boxes is None else normalize_box_shape(boxes)
     for i_b, box in enumerate(nboxes):
         i_b = None if box is None else i_b
         processor_list = [p(image, box, per_channel) for p in stats_processor_cls]
-        if any(not p.is_valid_slice for p in processor_list) and i_b is not None and box is not None:
+        if any(not p._is_valid_slice for p in processor_list) and i_b is not None and box is not None:
             warnings_list.append((i, i_b, box, image.shape))
         results_list.append({k: v for p in processor_list for k, v in p.process().items()})
         if per_channel:
@@ -204,7 +222,11 @@ def process_stats(
     return StatsProcessorOutput(results_list, source_indices, box_counts, warnings_list)
-def process_stats_unpack(args, per_channel: bool, stats_processor_cls: Iterable[type[StatsProcessor]]):
+def process_stats_unpack(
+    args: tuple[int, tuple[NDArray[Any], NDArray[Any] | None]],
+    per_channel: bool,
+    stats_processor_cls: Iterable[type[StatsProcessor[TStatsOutput]]],
+) -> StatsProcessorOutput:
     return process_stats(*args, per_channel=per_channel, stats_processor_cls=stats_processor_cls)
@@ -215,7 +237,7 @@ def run_stats(
     stats_processor_cls: Iterable[type[StatsProcessor[TStatsOutput]]],
 ) -> list[TStatsOutput]:
     """
-    Compute specified statistics on a set of images.
+    Compute specified :term:`statistics<Statistics>` on a set of images.
     This function applies a set of statistical operations to each image in the input iterable,
     based on the specified output class. The function determines which statistics to apply
@@ -225,7 +247,7 @@ def run_stats(
     ----------
     images : Iterable[ArrayLike]
         An iterable of images (e.g., list of arrays), where each image is represented as an
-        array-like structure (e.g., NumPy arrays).
+        array-like structure (e.g., :term:`NumPy` arrays).
     bboxes : Iterable[ArrayLike]
         An iterable of bounding boxes (e.g. list of arrays) where each bounding box is represented
         as an array-like structure in the format of (X0, Y0, X1, Y1). The length of the bounding boxes
@@ -234,24 +256,28 @@ def run_stats(
         A flag which determines if the states should be evaluated on a per-channel basis or not.
     stats_processor_cls : Iterable[type[StatsProcessor]]
         An iterable of stats processor classes that calculate stats and return output classes.
+    processes : int | None, default None
+        Number of processes to use, defaults to None which uses all available CPU cores.
     Returns
     -------
-    list[TStatsOutput]
-        A list of output classes corresponding to the input processor types.
+    dict[str, NDArray]]
+        A dictionary containing the computed statistics for each image.
+        The dictionary keys correspond to the names of the statistics, and the values are :term:`NumPy` arrays
+        with the results of the computations.
     Note
     ----
     - The function performs image normalization (rescaling the image values)
       before applying some of the statistics.
-    - Pixel-level statistics (e.g., brightness, entropy) are computed after
+    - Pixel-level statistics (e.g., :term:`brightness<Brightness>`, entropy) are computed after
       rescaling and, optionally, flattening the images.
     - For statistics like histograms and entropy, intermediate results may
       be reused to avoid redundant computation.
     """
-    results_list: list[dict[str, NDArray]] = []
-    source_index = []
-    box_count = []
+    results_list: list[dict[str, NDArray[np.float64]]] = []
+    source_index: list[SourceIndex] = []
+    box_count: list[int] = []
     bbox_iter = repeat(None) if bboxes is None else to_numpy_iter(bboxes)
     warning_list = []
@@ -259,7 +285,7 @@ def run_stats(
     stats_processor_cls = stats_processor_cls if isinstance(stats_processor_cls, Iterable) else [stats_processor_cls]
     # TODO: Introduce global controls for CPU job parallelism and GPU configurations
-    with Pool(16) as p:
+    with Pool(processes=DEFAULT_PROCESSES) as p:
         for r in tqdm.tqdm(
             p.imap(
                 partial(process_stats_unpack, per_channel=per_channel, stats_processor_cls=stats_processor_cls),

dataeval/{_internal/metrics → metrics}/stats/boxratiostats.py RENAMED Viewed

@@ -1,14 +1,16 @@
 from __future__ import annotations
+__all__ = ["boxratiostats"]
 import copy
-from typing import Callable, Generic, TypeVar, cast
+from typing import Any, Callable, Generic, TypeVar, cast
 import numpy as np
 from numpy.typing import NDArray
-from dataeval._internal.metrics.stats.base import BOX_COUNT, SOURCE_INDEX, BaseStatsOutput
-from dataeval._internal.metrics.stats.dimensionstats import DimensionStatsOutput
-from dataeval._internal.output import set_metadata
+from dataeval.metrics.stats.base import BOX_COUNT, SOURCE_INDEX, BaseStatsOutput
+from dataeval.metrics.stats.dimensionstats import DimensionStatsOutput
+from dataeval.output import set_metadata
 TStatOutput = TypeVar("TStatOutput", bound=BaseStatsOutput, contravariant=True)
 ArraySlice = tuple[int, int]
@@ -39,14 +41,16 @@ class BoxImageStatsOutputSlice(Generic[TStatOutput]):
         self.img = self.StatSlicer(img_stats, img_slice)
-RATIOSTATS_OVERRIDE_MAP: dict[type, dict[str, Callable[[BoxImageStatsOutputSlice], NDArray]]] = {
-    DimensionStatsOutput: {
-        "left": lambda x: x.box["left"] / x.img["width"],
-        "top": lambda x: x.box["top"] / x.img["height"],
-        "channels": lambda x: x.box["channels"],
-        "depth": lambda x: x.box["depth"],
-        "distance": lambda x: x.box["distance"],
-    }
+RATIOSTATS_OVERRIDE_MAP: dict[type, dict[str, Callable[..., NDArray[Any]]]] = {
+    DimensionStatsOutput: dict[str, Callable[[BoxImageStatsOutputSlice[DimensionStatsOutput]], NDArray[Any]]](
+        {
+            "left": lambda x: x.box["left"] / x.img["width"],
+            "top": lambda x: x.box["top"] / x.img["height"],
+            "channels": lambda x: x.box["channels"],
+            "depth": lambda x: x.box["depth"],
+            "distance": lambda x: x.box["distance"],
+        }
+    )
 }
@@ -60,7 +64,7 @@ def get_index_map(stats: BaseStatsOutput) -> list[int]:
     return index_map
-def calculate_ratios(key: str, box_stats: BaseStatsOutput, img_stats: BaseStatsOutput) -> NDArray:
+def calculate_ratios(key: str, box_stats: BaseStatsOutput, img_stats: BaseStatsOutput) -> NDArray[np.float64]:
     if not hasattr(box_stats, key) or not hasattr(img_stats, key):
         raise KeyError("Invalid key for provided stats output object.")
@@ -92,13 +96,13 @@ def calculate_ratios(key: str, box_stats: BaseStatsOutput, img_stats: BaseStatsO
     return out_stats
-@set_metadata("dataeval.metrics")
+@set_metadata()
 def boxratiostats(
     boxstats: TStatOutput,
     imgstats: TStatOutput,
 ) -> TStatOutput:
     """
-    Calculates ratio statistics of box outputs over image outputs
+    Calculates ratio :term:`statistics<Statistics>` of box outputs over image outputs
     Parameters
     ----------

dataeval 0.72.0__py3-none-any.whl → 0.72.2__py3-none-any.whl

dataeval 0.72.0py3-none-any.whl → 0.72.2py3-none-any.whl