PyPI - dataeval - Versions diffs - 0.70.0__py3-none-any.whl → 0.71.0__py3-none-any.whl - Mend

dataeval 0.70.0py3-none-any.whl → 0.71.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

dataeval/__init__.py +6 -6
dataeval/_internal/datasets.py +235 -131
dataeval/_internal/detectors/clusterer.py +2 -0
dataeval/_internal/detectors/drift/base.py +2 -2
dataeval/_internal/detectors/drift/mmd.py +1 -1
dataeval/_internal/detectors/duplicates.py +2 -0
dataeval/_internal/detectors/ood/ae.py +5 -3
dataeval/_internal/detectors/ood/aegmm.py +6 -4
dataeval/_internal/detectors/ood/base.py +12 -7
dataeval/_internal/detectors/ood/llr.py +6 -4
dataeval/_internal/detectors/ood/vae.py +5 -3
dataeval/_internal/detectors/ood/vaegmm.py +6 -4
dataeval/_internal/detectors/outliers.py +6 -9
dataeval/_internal/metrics/balance.py +4 -2
dataeval/_internal/metrics/ber.py +2 -0
dataeval/_internal/metrics/coverage.py +4 -0
dataeval/_internal/metrics/divergence.py +6 -2
dataeval/_internal/metrics/diversity.py +8 -6
dataeval/_internal/metrics/parity.py +8 -6
dataeval/_internal/metrics/stats/base.py +105 -46
dataeval/_internal/metrics/stats/datasetstats.py +96 -22
dataeval/_internal/metrics/stats/dimensionstats.py +22 -20
dataeval/_internal/metrics/stats/hashstats.py +11 -9
dataeval/_internal/metrics/stats/labelstats.py +1 -1
dataeval/_internal/metrics/stats/pixelstats.py +28 -26
dataeval/_internal/metrics/stats/visualstats.py +37 -35
dataeval/_internal/metrics/uap.py +6 -2
dataeval/_internal/metrics/utils.py +2 -2
dataeval/_internal/models/pytorch/autoencoder.py +5 -5
dataeval/_internal/models/tensorflow/pixelcnn.py +1 -4
dataeval/_internal/utils.py +11 -16
dataeval/_internal/workflows/sufficiency.py +44 -33
dataeval/detectors/__init__.py +4 -0
dataeval/detectors/drift/__init__.py +8 -3
dataeval/detectors/drift/kernels/__init__.py +4 -0
dataeval/detectors/drift/updates/__init__.py +4 -0
dataeval/detectors/linters/__init__.py +15 -4
dataeval/detectors/ood/__init__.py +14 -2
dataeval/metrics/__init__.py +5 -0
dataeval/metrics/bias/__init__.py +13 -4
dataeval/metrics/estimators/__init__.py +8 -8
dataeval/metrics/stats/__init__.py +24 -6
dataeval/utils/__init__.py +16 -3
dataeval/utils/tensorflow/__init__.py +11 -0
dataeval/utils/torch/__init__.py +12 -0
dataeval/utils/torch/datasets/__init__.py +7 -0
dataeval/workflows/__init__.py +4 -0
{dataeval-0.70.0.dist-info → dataeval-0.71.0.dist-info}/METADATA +11 -2
dataeval-0.71.0.dist-info/RECORD +80 -0
dataeval/tensorflow/__init__.py +0 -3
dataeval/torch/__init__.py +0 -3
dataeval-0.70.0.dist-info/RECORD +0 -79
/dataeval/{tensorflow → utils/tensorflow}/loss/__init__.py +0 -0
/dataeval/{tensorflow → utils/tensorflow}/models/__init__.py +0 -0
/dataeval/{tensorflow → utils/tensorflow}/recon/__init__.py +0 -0
/dataeval/{torch → utils/torch}/models/__init__.py +0 -0
/dataeval/{torch → utils/torch}/trainer/__init__.py +0 -0
{dataeval-0.70.0.dist-info → dataeval-0.71.0.dist-info}/LICENSE.txt +0 -0
{dataeval-0.70.0.dist-info → dataeval-0.71.0.dist-info}/WHEEL +0 -0

dataeval/_internal/detectors/ood/base.py CHANGED Viewed

@@ -10,7 +10,7 @@ from __future__ import annotations
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Callable, Literal, NamedTuple, cast
+from typing import Callable, Literal, cast
 import keras
 import numpy as np
@@ -26,6 +26,9 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class OODOutput(OutputMetadata):
     """
+    Output class for predictions from :class:`OOD_AE`, :class:`OOD_AEGMM`, :class:`OOD_LLR`,
+    :class:`OOD_VAE`, and :class:`OOD_VAEGMM` out-of-distribution detectors
     Attributes
     ----------
     is_ood : NDArray
@@ -41,9 +44,11 @@ class OODOutput(OutputMetadata):
     feature_score: NDArray[np.float32] | None
-class OODScore(NamedTuple):
+@dataclass(frozen=True)
+class OODScoreOutput(OutputMetadata):
     """
-    NamedTuple containing the instance and (optionally) feature score.
+    Output class for instance and feature scores from :class:`OOD_AE`, :class:`OOD_AEGMM`,
+    :class:`OOD_LLR`, :class:`OOD_VAE`, and :class:`OOD_VAEGMM` out-of-distribution detectors
     Parameters
     ----------
@@ -76,7 +81,7 @@ class OODBase(ABC):
     def __init__(self, model: keras.Model) -> None:
         self.model = model
-        self._ref_score: OODScore
+        self._ref_score: OODScoreOutput
         self._threshold_perc: float
         self._data_info: tuple[tuple, type] | None = None
@@ -102,7 +107,7 @@ class OODBase(ABC):
         self._validate(X)
     @abstractmethod
-    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScore:
+    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
         """
         Compute the out-of-distribution (OOD) scores for a given dataset.
@@ -116,7 +121,7 @@ class OODBase(ABC):
         Returns
         -------
-        OODScore
+        OODScoreOutput
             An object containing the instance-level and feature-level OOD scores.
         """
@@ -197,7 +202,7 @@ class OODBase(ABC):
         # compute outlier scores
         score = self.score(X, batch_size=batch_size)
         ood_pred = score.get(ood_type) > self._threshold_score(ood_type)
-        return OODOutput(is_ood=ood_pred, **score._asdict())
+        return OODOutput(is_ood=ood_pred, **score.dict())
 class OODGMMBase(OODBase):

dataeval/_internal/detectors/ood/llr.py CHANGED Viewed

@@ -18,11 +18,12 @@ from keras.layers import Input
 from keras.models import Model
 from numpy.typing import ArrayLike, NDArray
-from dataeval._internal.detectors.ood.base import OODBase, OODScore
+from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
 from dataeval._internal.interop import to_numpy
 from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
 from dataeval._internal.models.tensorflow.trainer import trainer
 from dataeval._internal.models.tensorflow.utils import predict_batch
+from dataeval._internal.output import set_metadata
 def build_model(
@@ -124,7 +125,7 @@ class OOD_LLR(OODBase):
         self.sequential = sequential
         self.log_prob = log_prob
-        self._ref_score: OODScore
+        self._ref_score: OODScoreOutput
         self._threshold_perc: float
         self._data_info: tuple[tuple, type] | None = None
@@ -279,12 +280,13 @@ class OOD_LLR(OODBase):
         logp_b = logp_fn(self.dist_b, X, return_per_feature=return_per_feature, batch_size=batch_size)
         return logp_s - logp_b
+    @set_metadata("dataeval.detectors")
     def score(
         self,
         X: ArrayLike,
         batch_size: int = int(1e10),
-    ) -> OODScore:
+    ) -> OODScoreOutput:
         self._validate(X := to_numpy(X))
         fscore = -self._llr(X, True, batch_size=batch_size)
         iscore = -self._llr(X, False, batch_size=batch_size)
-        return OODScore(iscore, fscore)
+        return OODScoreOutput(iscore, fscore)

dataeval/_internal/detectors/ood/vae.py CHANGED Viewed

@@ -15,11 +15,12 @@ import numpy as np
 import tensorflow as tf
 from numpy.typing import ArrayLike
-from dataeval._internal.detectors.ood.base import OODBase, OODScore
+from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
 from dataeval._internal.interop import to_numpy
 from dataeval._internal.models.tensorflow.autoencoder import VAE
 from dataeval._internal.models.tensorflow.losses import Elbo
 from dataeval._internal.models.tensorflow.utils import predict_batch
+from dataeval._internal.output import set_metadata
 class OOD_VAE(OODBase):
@@ -67,7 +68,8 @@ class OOD_VAE(OODBase):
             loss_fn = Elbo(0.05)
         super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
-    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScore:
+    @set_metadata("dataeval.detectors")
+    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
         self._validate(X := to_numpy(X))
         # sample reconstructed instances
@@ -86,4 +88,4 @@ class OOD_VAE(OODBase):
         sorted_fscore_perc = sorted_fscore[:, -n_score_features:]
         iscore = np.mean(sorted_fscore_perc, axis=1)
-        return OODScore(iscore, fscore)
+        return OODScoreOutput(iscore, fscore)

dataeval/_internal/detectors/ood/vaegmm.py CHANGED Viewed

@@ -15,12 +15,13 @@ import numpy as np
 import tensorflow as tf
 from numpy.typing import ArrayLike
-from dataeval._internal.detectors.ood.base import OODGMMBase, OODScore
+from dataeval._internal.detectors.ood.base import OODGMMBase, OODScoreOutput
 from dataeval._internal.interop import to_numpy
 from dataeval._internal.models.tensorflow.autoencoder import VAEGMM
 from dataeval._internal.models.tensorflow.gmm import gmm_energy
 from dataeval._internal.models.tensorflow.losses import Elbo, LossGMM
 from dataeval._internal.models.tensorflow.utils import predict_batch
+from dataeval._internal.output import set_metadata
 class OOD_VAEGMM(OODGMMBase):
@@ -53,7 +54,8 @@ class OOD_VAEGMM(OODGMMBase):
             loss_fn = LossGMM(elbo=Elbo(0.05))
         super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
-    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScore:
+    @set_metadata("dataeval.detectors")
+    def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
         """
         Compute the out-of-distribution (OOD) score for a given dataset.
@@ -67,7 +69,7 @@ class OOD_VAEGMM(OODGMMBase):
         Returns
         -------
-        OODScore
+        OODScoreOutput
             An object containing the instance-level OOD score.
         Note
@@ -84,4 +86,4 @@ class OOD_VAEGMM(OODGMMBase):
         energy, _ = gmm_energy(z, self.gmm_params, return_mean=False)
         energy_samples = energy.numpy().reshape((-1, self.samples))  # type: ignore
         iscore = np.mean(energy_samples, axis=-1)
-        return OODScore(iscore)
+        return OODScoreOutput(iscore)

dataeval/_internal/detectors/outliers.py CHANGED Viewed

@@ -22,6 +22,8 @@ TIndexIssueMap = TypeVar("TIndexIssueMap", IndexIssueMap, list[IndexIssueMap])
 @dataclass(frozen=True)
 class OutliersOutput(Generic[TIndexIssueMap], OutputMetadata):
     """
+    Output class for :class:`Outliers` lint detector
     Attributes
     ----------
     issues : dict[int, dict[str, float]] | list[dict[int, dict[str, float]]]
@@ -86,8 +88,8 @@ class Outliers:
     --------
     Duplicates
-    Notes
-    ------
+    Note
+    ----
     There are 3 different statistical methods:
     - zscore
@@ -259,11 +261,6 @@ class Outliers:
         >>> results.issues[10]
         {'skew': -3.906, 'kurtosis': 13.266, 'entropy': 0.2128, 'contrast': 1.25, 'zeros': 0.05493}
         """
-        self.stats = datasetstats(
-            images=data,
-            use_dimension=self.use_dimension,
-            use_pixel=self.use_pixel,
-            use_visual=self.use_visual,
-        )
-        outliers = self._get_outliers({k: v for o in self.stats.outputs() for k, v in o.dict().items()})
+        self.stats = datasetstats(images=data)
+        outliers = self._get_outliers(self.stats.dict())
         return OutliersOutput(outliers)

dataeval/_internal/metrics/balance.py CHANGED Viewed

@@ -15,6 +15,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class BalanceOutput(OutputMetadata):
     """
+    Output class for :func:`balance` bias metric
     Attributes
     ----------
     balance : NDArray[np.float64]
@@ -71,8 +73,8 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
         (num_factors+1) x (num_factors+1) estimate of mutual information
         between num_factors metadata factors and class label. Symmetry is enforced.
-    Notes
-    -----
+    Note
+    ----
     We use `mutual_info_classif` from sklearn since class label is categorical.
     `mutual_info_classif` outputs are consistent up to O(1e-4) and depend on a random
     seed. MI is computed differently for categorical and continuous variables, and

dataeval/_internal/metrics/ber.py CHANGED Viewed

@@ -25,6 +25,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class BEROutput(OutputMetadata):
     """
+    Output class for :func:`ber` estimator metric
     Attributes
     ----------
     ber : float

dataeval/_internal/metrics/coverage.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import math
 from dataclasses import dataclass
 from typing import Literal
@@ -14,6 +16,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class CoverageOutput(OutputMetadata):
     """
+    Output class for :func:`coverage` bias metric
     Attributes
     ----------
     indices : NDArray

dataeval/_internal/metrics/divergence.py CHANGED Viewed

@@ -3,6 +3,8 @@ This module contains the implementation of HP Divergence
 using the Fast Nearest Neighbor and Minimum Spanning Tree algorithms
 """
+from __future__ import annotations
 from dataclasses import dataclass
 from typing import Literal
@@ -17,6 +19,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class DivergenceOutput(OutputMetadata):
     """
+    Output class for :func:`divergence` estimator metric
     Attributes
     ----------
     divergence : float
@@ -96,8 +100,8 @@ def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST
     DivergenceOutput
         The divergence value (0.0..1.0) and the number of differing edges between the datasets
-    Notes
-    -----
+    Note
+    ----
     The divergence value indicates how similar the 2 datasets are
     with 0 indicating approximately identical data distributions.

dataeval/_internal/metrics/diversity.py CHANGED Viewed

@@ -13,6 +13,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
 @dataclass(frozen=True)
 class DiversityOutput(OutputMetadata):
     """
+    Output class for :func:`diversity` bias metric
     Attributes
     ----------
     diversity_index : NDArray[np.float64]
@@ -52,8 +54,8 @@ def diversity_shannon(
     subset_mask: NDArray[np.bool_] | None
         Boolean mask of samples to bin (e.g. when computing per class).  True -> include in histogram counts
-    Notes
-    -----
+    Note
+    ----
     For continuous variables, histogram bins are chosen automatically.  See `numpy.histogram` for details.
     Returns
@@ -103,8 +105,8 @@ def diversity_simpson(
     subset_mask: NDArray[np.bool_] | None
         Boolean mask of samples to bin (e.g. when computing per class).  True -> include in histogram counts
-    Notes
-    -----
+    Note
+    ----
     For continuous variables, histogram bins are chosen automatically.  See
         numpy.histogram for details.
     If there is only one category, the diversity index takes a value of 0.
@@ -162,8 +164,8 @@ def diversity(
     method: Literal["shannon", "simpson"], default "simpson"
         Indicates which diversity index should be computed
-    Notes
-    -----
+    Note
+    ----
     - For continuous variables, histogram bins are chosen automatically. See numpy.histogram for details.
     - The expression is undefined for q=1, but it approaches the Shannon entropy in the limit.
     - If there is only one category, the diversity index takes a value of 1 = 1/N = 1/1. Entropy will take a value of 0.

dataeval/_internal/metrics/parity.py CHANGED Viewed

@@ -17,6 +17,8 @@ TData = TypeVar("TData", np.float64, NDArray[np.float64])
 @dataclass(frozen=True)
 class ParityOutput(Generic[TData], OutputMetadata):
     """
+    Output class for :func:`parity` and :func:`label_parity` bias metrics
     Attributes
     ----------
     score : np.float64 | NDArray[np.float64]
@@ -137,8 +139,8 @@ def normalize_expected_dist(expected_dist: NDArray, observed_dist: NDArray) -> N
     ValueError
         If the expected distribution is all zeros.
-    Notes
-    -----
+    Note
+    ----
     The function ensures that the total number of labels in the expected distribution matches the total
     number of labels in the observed distribution by scaling the expected distribution.
     """
@@ -224,8 +226,8 @@ def label_parity(
         of unique classes between the observed and expected distributions.
-    Notes
-    -----
+    Note
+    ----
     - Providing ``num_classes`` can be helpful if there are classes with zero instances in one of the distributions.
     - The function first validates the observed distribution and normalizes the expected distribution so that it
       has the same total number of labels as the observed distribution.
@@ -317,8 +319,8 @@ def parity(
         factor values either 0 times or at least 5 times. Alternatively, continuous-valued factors can be digitized
         into fewer bins.
-    Notes
-    -----
+    Note
+    ----
     - Each key of the ``continuous_factor_bincounts`` dictionary must occur as a key in data_factors.
     - A high score with a low p-value suggests that a metadata factor is strongly correlated with a class label.
     - The function creates a contingency matrix for each factor, where each entry represents the frequency of a

dataeval/_internal/metrics/stats/base.py CHANGED Viewed

@@ -3,9 +3,13 @@ from __future__ import annotations
 import re
 import warnings
 from dataclasses import dataclass
-from typing import Any, Callable, Iterable, NamedTuple, Optional, Union
+from functools import partial
+from itertools import repeat
+from multiprocessing import Pool
+from typing import Any, Callable, Generic, Iterable, NamedTuple, Optional, TypeVar, Union
 import numpy as np
+import tqdm
 from numpy.typing import ArrayLike, NDArray
 from dataeval._internal.interop import to_numpy_iter
@@ -91,7 +95,11 @@ class BaseStatsOutput(OutputMetadata):
         return len(self.source_index)
-class StatsProcessor:
+TStatsOutput = TypeVar("TStatsOutput", bound=BaseStatsOutput, covariant=True)
+class StatsProcessor(Generic[TStatsOutput]):
+    output_class: type[TStatsOutput]
     cache_keys: list[str] = []
     image_function_map: dict[str, Callable[[StatsProcessor], Any]] = {}
     channel_function_map: dict[str, Callable[[StatsProcessor], Any]] = {}
@@ -119,6 +127,9 @@ class StatsProcessor:
         else:
             return self.fn_map[fn_key](self)
+    def process(self) -> dict:
+        return {k: self.fn_map[k](self) for k in self.fn_map}
     @property
     def image(self) -> NDArray:
         if self._image is None:
@@ -143,14 +154,66 @@ class StatsProcessor:
                 self._scaled = self._scaled.reshape(self.image.shape[0], -1)
         return self._scaled
+    @classmethod
+    def convert_output(
+        cls, source: dict[str, Any], source_index: list[SourceIndex], box_count: list[int]
+    ) -> TStatsOutput:
+        output = {}
+        for key in source:
+            if key not in cls.output_class.__annotations__:
+                continue
+            stat_type: str = cls.output_class.__annotations__[key]
+            dtype_match = re.match(DTYPE_REGEX, stat_type)
+            if dtype_match is not None:
+                output[key] = np.asarray(source[key], dtype=np.dtype(dtype_match.group(1)))
+            else:
+                output[key] = source[key]
+        return cls.output_class(**output, source_index=source_index, box_count=np.asarray(box_count, dtype=np.uint16))
+class StatsProcessorOutput(NamedTuple):
+    results: list[dict[str, Any]]
+    source_indices: list[SourceIndex]
+    box_counts: list[int]
+    warnings_list: list[tuple[int, int, NDArray, tuple[int, ...]]]
+def process_stats(
+    i: int,
+    image_boxes: tuple[NDArray, NDArray | None],
+    per_channel: bool,
+    stats_processor_cls: Iterable[type[StatsProcessor]],
+) -> StatsProcessorOutput:
+    image, boxes = image_boxes
+    results_list: list[dict[str, Any]] = []
+    source_indices: list[SourceIndex] = []
+    box_counts: list[int] = []
+    warnings_list: list[tuple[int, int, NDArray, tuple[int, ...]]] = []
+    nboxes = [None] if boxes is None else normalize_box_shape(boxes)
+    for i_b, box in enumerate(nboxes):
+        i_b = None if box is None else i_b
+        processor_list = [p(image, box, per_channel) for p in stats_processor_cls]
+        if any(not p.is_valid_slice for p in processor_list) and i_b is not None and box is not None:
+            warnings_list.append((i, i_b, box, image.shape))
+        results_list.append({k: v for p in processor_list for k, v in p.process().items()})
+        if per_channel:
+            source_indices.extend([SourceIndex(i, i_b, c) for c in range(image_boxes[0].shape[-3])])
+        else:
+            source_indices.append(SourceIndex(i, i_b, None))
+    box_counts.append(0 if boxes is None else len(boxes))
+    return StatsProcessorOutput(results_list, source_indices, box_counts, warnings_list)
+def process_stats_unpack(args, per_channel: bool, stats_processor_cls: Iterable[type[StatsProcessor]]):
+    return process_stats(*args, per_channel=per_channel, stats_processor_cls=stats_processor_cls)
 def run_stats(
     images: Iterable[ArrayLike],
     bboxes: Iterable[ArrayLike] | None,
     per_channel: bool,
-    stats_processor_cls: type,
-    output_cls: type,
-) -> dict:
+    stats_processor_cls: Iterable[type[StatsProcessor[TStatsOutput]]],
+) -> list[TStatsOutput]:
     """
     Compute specified statistics on a set of images.
@@ -169,18 +232,16 @@ def run_stats(
         iterable should match the length of the input images.
     per_channel : bool
         A flag which determines if the states should be evaluated on a per-channel basis or not.
-    output_cls : type
-        The output class for which stats values will be calculated.
+    stats_processor_cls : Iterable[type[StatsProcessor]]
+        An iterable of stats processor classes that calculate stats and return output classes.
     Returns
     -------
-    dict[str, NDArray]]
-        A dictionary containing the computed statistics for each image.
-        The dictionary keys correspond to the names of the statistics, and the values are NumPy arrays
-        with the results of the computations.
+    list[TStatsOutput]
+        A list of output classes corresponding to the input processor types.
-    Notes
-    -----
+    Note
+    ----
     - The function performs image normalization (rescaling the image values)
       before applying some of the statistics.
     - Pixel-level statistics (e.g., brightness, entropy) are computed after
@@ -189,43 +250,41 @@ def run_stats(
       be reused to avoid redundant computation.
     """
     results_list: list[dict[str, NDArray]] = []
-    output_list = list(output_cls.__annotations__)
     source_index = []
     box_count = []
-    bbox_iter = (None for _ in images) if bboxes is None else to_numpy_iter(bboxes)
-    for i, (boxes, image) in enumerate(zip(bbox_iter, to_numpy_iter(images))):
-        nboxes = [None] if boxes is None else normalize_box_shape(boxes)
-        for i_b, box in enumerate(nboxes):
-            i_b = None if box is None else i_b
-            processor: StatsProcessor = stats_processor_cls(image, box, per_channel)
-            if not processor.is_valid_slice:
-                warnings.warn(f"Bounding box {i_b}: {box} is out of bounds of image {i}: {image.shape}.")
-            results_list.append({stat: processor.get(stat) for stat in output_list})
-            if per_channel:
-                source_index.extend([SourceIndex(i, i_b, c) for c in range(image.shape[-3])])
-            else:
-                source_index.append(SourceIndex(i, i_b, None))
-        box_count.append(0 if boxes is None else len(boxes))
+    bbox_iter = repeat(None) if bboxes is None else to_numpy_iter(bboxes)
+    warning_list = []
+    total_for_status = getattr(images, "__len__")() if hasattr(images, "__len__") else None
+    stats_processor_cls = stats_processor_cls if isinstance(stats_processor_cls, Iterable) else [stats_processor_cls]
+    # TODO: Introduce global controls for CPU job parallelism and GPU configurations
+    with Pool(16) as p:
+        for r in tqdm.tqdm(
+            p.imap(
+                partial(process_stats_unpack, per_channel=per_channel, stats_processor_cls=stats_processor_cls),
+                enumerate(zip(to_numpy_iter(images), bbox_iter)),
+            ),
+            total=total_for_status,
+        ):
+            results_list.extend(r.results)
+            source_index.extend(r.source_indices)
+            box_count.extend(r.box_counts)
+            warning_list.extend(r.warnings_list)
+    p.close()
+    p.join()
+    # warnings are not emitted while in multiprocessing pools so we emit after gathering all warnings
+    for w in warning_list:
+        warnings.warn(f"Bounding box [{w[0]}][{w[1]}]: {w[2]} is out of bounds of {w[3]}.", UserWarning)
     output = {}
-    if per_channel:
-        for i, results in enumerate(results_list):
-            for stat, result in results.items():
+    for results in results_list:
+        for stat, result in results.items():
+            if per_channel:
                 output.setdefault(stat, []).extend(result.tolist())
-    else:
-        for results in results_list:
-            for stat, result in results.items():
+            else:
                 output.setdefault(stat, []).append(result.tolist() if isinstance(result, np.ndarray) else result)
-    for stat in output:
-        stat_type: str = output_cls.__annotations__[stat]
-        dtype_match = re.match(DTYPE_REGEX, stat_type)
-        if dtype_match is not None:
-            output[stat] = np.asarray(output[stat], dtype=np.dtype(dtype_match.group(1)))
-    output[SOURCE_INDEX] = source_index
-    output[BOX_COUNT] = np.asarray(box_count, dtype=np.uint16)
-    return output
+    outputs = [s.convert_output(output, source_index, box_count) for s in stats_processor_cls]
+    return outputs

dataeval 0.70.0__py3-none-any.whl → 0.71.0__py3-none-any.whl

dataeval 0.70.0py3-none-any.whl → 0.71.0py3-none-any.whl