PyPI - dataeval - Versions diffs - 0.81.0__py3-none-any.whl → 0.82.1__py3-none-any.whl - Mend

dataeval 0.81.0py3-none-any.whl → 0.82.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

dataeval/__init__.py +1 -1
dataeval/config.py +68 -11
dataeval/detectors/drift/__init__.py +2 -2
dataeval/detectors/drift/_base.py +8 -64
dataeval/detectors/drift/_mmd.py +12 -38
dataeval/detectors/drift/_torch.py +7 -7
dataeval/detectors/drift/_uncertainty.py +6 -5
dataeval/detectors/drift/updates.py +20 -3
dataeval/detectors/linters/__init__.py +3 -2
dataeval/detectors/linters/duplicates.py +14 -46
dataeval/detectors/linters/outliers.py +25 -159
dataeval/detectors/ood/__init__.py +1 -1
dataeval/detectors/ood/ae.py +6 -5
dataeval/detectors/ood/base.py +2 -2
dataeval/detectors/ood/metadata_ood_mi.py +4 -6
dataeval/detectors/ood/mixin.py +3 -4
dataeval/detectors/ood/vae.py +3 -2
dataeval/metadata/__init__.py +2 -1
dataeval/metadata/_distance.py +134 -0
dataeval/metadata/_ood.py +30 -49
dataeval/metadata/_utils.py +44 -0
dataeval/metrics/bias/__init__.py +5 -4
dataeval/metrics/bias/_balance.py +17 -149
dataeval/metrics/bias/_coverage.py +4 -106
dataeval/metrics/bias/_diversity.py +12 -107
dataeval/metrics/bias/_parity.py +7 -71
dataeval/metrics/estimators/__init__.py +5 -4
dataeval/metrics/estimators/_ber.py +2 -20
dataeval/metrics/estimators/_clusterer.py +1 -61
dataeval/metrics/estimators/_divergence.py +2 -19
dataeval/metrics/estimators/_uap.py +2 -16
dataeval/metrics/stats/__init__.py +15 -12
dataeval/metrics/stats/_base.py +41 -128
dataeval/metrics/stats/_boxratiostats.py +13 -13
dataeval/metrics/stats/_dimensionstats.py +17 -58
dataeval/metrics/stats/_hashstats.py +19 -35
dataeval/metrics/stats/_imagestats.py +94 -0
dataeval/metrics/stats/_labelstats.py +42 -121
dataeval/metrics/stats/_pixelstats.py +19 -51
dataeval/metrics/stats/_visualstats.py +19 -51
dataeval/outputs/__init__.py +57 -0
dataeval/outputs/_base.py +182 -0
dataeval/outputs/_bias.py +381 -0
dataeval/outputs/_drift.py +83 -0
dataeval/outputs/_estimators.py +114 -0
dataeval/outputs/_linters.py +186 -0
dataeval/outputs/_metadata.py +54 -0
dataeval/{detectors/ood/output.py → outputs/_ood.py} +22 -22
dataeval/outputs/_stats.py +393 -0
dataeval/outputs/_utils.py +44 -0
dataeval/outputs/_workflows.py +364 -0
dataeval/typing.py +187 -7
dataeval/utils/_method.py +1 -5
dataeval/utils/_plot.py +2 -2
dataeval/utils/data/__init__.py +5 -1
dataeval/utils/data/_dataset.py +217 -0
dataeval/utils/data/_embeddings.py +12 -14
dataeval/utils/data/_images.py +30 -27
dataeval/utils/data/_metadata.py +28 -11
dataeval/utils/data/_selection.py +25 -22
dataeval/utils/data/_split.py +5 -29
dataeval/utils/data/_targets.py +14 -2
dataeval/utils/data/datasets/_base.py +5 -5
dataeval/utils/data/datasets/_cifar10.py +1 -1
dataeval/utils/data/datasets/_milco.py +1 -1
dataeval/utils/data/datasets/_mnist.py +1 -1
dataeval/utils/data/datasets/_ships.py +1 -1
dataeval/utils/data/{_types.py → datasets/_types.py} +10 -16
dataeval/utils/data/datasets/_voc.py +1 -1
dataeval/utils/data/selections/_classfilter.py +4 -5
dataeval/utils/data/selections/_indices.py +2 -2
dataeval/utils/data/selections/_limit.py +2 -2
dataeval/utils/data/selections/_reverse.py +2 -2
dataeval/utils/data/selections/_shuffle.py +2 -2
dataeval/utils/torch/_internal.py +5 -5
dataeval/utils/torch/trainer.py +8 -8
dataeval/workflows/__init__.py +2 -1
dataeval/workflows/sufficiency.py +6 -342
{dataeval-0.81.0.dist-info → dataeval-0.82.1.dist-info}/METADATA +2 -2
dataeval-0.82.1.dist-info/RECORD +105 -0
dataeval/_output.py +0 -137
dataeval/detectors/ood/metadata_ks_compare.py +0 -129
dataeval/metrics/stats/_datasetstats.py +0 -198
dataeval-0.81.0.dist-info/RECORD +0 -94
{dataeval-0.81.0.dist-info → dataeval-0.82.1.dist-info}/LICENSE.txt +0 -0
{dataeval-0.81.0.dist-info → dataeval-0.82.1.dist-info}/WHEEL +0 -0

dataeval/metrics/stats/_labelstats.py CHANGED Viewed

@@ -2,108 +2,21 @@ from __future__ import annotations
 __all__ = []
-import contextlib
 from collections import Counter, defaultdict
-from dataclasses import dataclass
-from typing import Any, Iterable, Mapping, TypeVar
+from typing import Any, Mapping, TypeVar
 import numpy as np
-from dataeval._output import Output, set_metadata
-from dataeval.typing import ArrayLike
+from dataeval.outputs import LabelStatsOutput
+from dataeval.outputs._base import set_metadata
+from dataeval.typing import AnnotatedDataset, ArrayLike
 from dataeval.utils._array import as_numpy
+from dataeval.utils.data._metadata import Metadata
-with contextlib.suppress(ImportError):
-    import pandas as pd
+TValue = TypeVar("TValue")
-@dataclass(frozen=True)
-class LabelStatsOutput(Output):
-    """
-    Output class for :func:`.labelstats` stats metric.
-    Attributes
-    ----------
-    label_counts_per_class : dict[str | int, int]
-        Dictionary whose keys are the different label classes and
-        values are total counts of each class
-    label_counts_per_image : list[int]
-        Number of labels per image
-    image_counts_per_label : dict[str | int, int]
-        Dictionary whose keys are the different label classes and
-        values are total counts of each image the class is present in
-    image_indices_per_label : dict[str | int, list]
-        Dictionary whose keys are the different label classes and
-        values are lists containing the images that have that label
-    image_count : int
-        Total number of images present
-    class_count : int
-        Total number of classes present
-    label_count : int
-        Total number of labels present
-    """
-    label_counts_per_class: dict[str | int, int]
-    label_counts_per_image: list[int]
-    image_counts_per_label: dict[str | int, int]
-    image_indices_per_label: dict[str | int, list[int]]
-    image_count: int
-    class_count: int
-    label_count: int
-    def to_table(self) -> str:
-        max_char = max(len(key) if isinstance(key, str) else key // 10 + 1 for key in self.label_counts_per_class)
-        max_char = max(max_char, 5)
-        max_label = max(list(self.label_counts_per_class.values()))
-        max_img = max(list(self.image_counts_per_label.values()))
-        max_num = int(np.ceil(np.log10(max(max_label, max_img))))
-        max_num = max(max_num, 11)
-        # Display basic counts
-        table_str = f"Class Count: {self.class_count}\n"
-        table_str += f"Label Count: {self.label_count}\n"
-        table_str += f"Average # Labels per Image: {round(np.mean(self.label_counts_per_image), 2)}\n"
-        table_str += "--------------------------------------\n"
-        # Display counts per class
-        table_str += f"{'Label':>{max_char}}: Total Count - Image Count\n"
-        for cls in self.label_counts_per_class:
-            table_str += f"{cls:>{max_char}}: {self.label_counts_per_class[cls]:^{max_num}} "
-            table_str += f"- {self.image_counts_per_label[cls]:^{max_num}}\n"
-        return table_str
-    def to_dataframe(self) -> pd.DataFrame:
-        import pandas as pd
-        class_list = []
-        total_count = []
-        image_count = []
-        for cls in self.label_counts_per_class:
-            class_list.append(cls)
-            total_count.append(self.label_counts_per_class[cls])
-            image_count.append(self.image_counts_per_label[cls])
-        return pd.DataFrame(
-            {
-                "Label": class_list,
-                "Total Count": total_count,
-                "Image Count": image_count,
-            }
-        )
-TKey = TypeVar("TKey", int, str)
-def sort(d: Mapping[TKey, Any]) -> dict[TKey, Any]:
-    """
-    Sort mappings by key in increasing order
-    """
-    return dict(sorted(d.items(), key=lambda x: x[0]))
-def _ensure_2d(labels: Iterable[ArrayLike]) -> Iterable[ArrayLike]:
+def _ensure_2d(labels: ArrayLike) -> ArrayLike:
     if isinstance(labels, np.ndarray):
         return labels[:, None]
     else:
@@ -116,7 +29,7 @@ def _get_list_depth(lst):
     return 0
-def _check_labels_dimension(labels: Iterable[ArrayLike]) -> Iterable[ArrayLike]:
+def _check_labels_dimension(labels: ArrayLike) -> ArrayLike:
     # Check for nested lists beyond 2 levels
     if isinstance(labels, np.ndarray):
@@ -138,10 +51,12 @@ def _check_labels_dimension(labels: Iterable[ArrayLike]) -> Iterable[ArrayLike]:
         raise TypeError("Labels must be either a NumPy array or a list.")
+def _sort_to_list(d: Mapping[int, TValue]) -> list[TValue]:
+    return [v for _, v in sorted(d.items())]
 @set_metadata
-def labelstats(
-    labels: Iterable[ArrayLike],
-) -> LabelStatsOutput:
+def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
     """
     Calculates :term:`statistics<Statistics>` for data labels.
@@ -150,40 +65,45 @@ def labelstats(
     Parameters
     ----------
-    labels : ArrayLike, shape - [label] | [[label]] or (N,M) | (N,)
-        Lists or :term:`NumPy` array of labels.
-        A set of lists where each list contains all labels per image -
-        (e.g. [[label1, label2], [label2], [label1, label3]] or [label1, label2, label1, label3]).
-        If a numpy array, N is the number of images, M is the number of labels per image.
+    dataset : Metadata or ImageClassificationDataset or ObjectDetect
     Returns
     -------
     LabelStatsOutput
-        A dictionary-like object containing the computed counting metrics for the labels.
+        A dataclass containing the computed counting metrics for the labels.
     Examples
     --------
-    Calculating the :term:`statistics<Statistics>` on labels for a set of data
-    >>> stats = labelstats(labels)
-    >>> stats.label_counts_per_class
-    {'chicken': 12, 'cow': 5, 'horse': 4, 'pig': 7, 'sheep': 4}
-    >>> stats.label_counts_per_image
-    [3, 3, 5, 3, 2, 5, 5, 2, 2, 2]
-    >>> stats.image_counts_per_label
-    {'chicken': 8, 'cow': 4, 'horse': 4, 'pig': 7, 'sheep': 4}
-    >>> (stats.image_count, stats.class_count, stats.label_count)
-    (10, 5, 32)
+    Calculate basic :term:`statistics<Statistics>` on labels for a dataset.
+    >>> from dataeval.utils.data import Metadata
+    >>> stats = labelstats(Metadata(dataset))
+    >>> print(stats.to_table())
+    Class Count: 5
+    Label Count: 15
+    Average # Labels per Image: 1.88
+    --------------------------------------
+      Label: Total Count - Image Count
+      horse:      2      -      2
+        cow:      4      -      3
+      sheep:      2      -      2
+        pig:      2      -      2
+    chicken:      5      -      5
     """
-    label_counts = Counter()
-    image_counts = Counter()
+    dataset = Metadata(dataset) if isinstance(dataset, AnnotatedDataset) else dataset
+    label_counts: Counter[int] = Counter()
+    image_counts: Counter[int] = Counter()
     index_location = defaultdict(list[int])
     label_per_image: list[int] = []
+    index2label = dict(enumerate(dataset.class_names))
+    labels = [target.labels.tolist() for target in dataset.targets]
     labels_2d = _check_labels_dimension(labels)
     for i, group in enumerate(labels_2d):
-        group = as_numpy(group)
+        group = as_numpy(group).tolist()
         # Count occurrences of each label in all sublists
         label_counts.update(group)
@@ -200,11 +120,12 @@ def labelstats(
             index_location[item].append(i)
     return LabelStatsOutput(
-        label_counts_per_class=sort(label_counts),
+        label_counts_per_class=_sort_to_list(label_counts),
         label_counts_per_image=label_per_image,
-        image_counts_per_label=sort(image_counts),
-        image_indices_per_label=sort(index_location),
+        image_counts_per_class=_sort_to_list(image_counts),
+        image_indices_per_class=_sort_to_list(index_location),
         image_count=len(label_per_image),
         class_count=len(label_counts),
         label_count=sum(label_counts.values()),
+        class_names=list(index2label.values()),
     )

dataeval/metrics/stats/_pixelstats.py CHANGED Viewed

@@ -2,50 +2,15 @@ from __future__ import annotations
 __all__ = []
-from dataclasses import dataclass
-from typing import Any, Callable, Iterable
+from typing import Any, Callable
 import numpy as np
-from numpy.typing import NDArray
 from scipy.stats import entropy, kurtosis, skew
-from dataeval._output import set_metadata
-from dataeval.metrics.stats._base import BaseStatsOutput, HistogramPlotMixin, StatsProcessor, run_stats
-from dataeval.typing import ArrayLike
-@dataclass(frozen=True)
-class PixelStatsOutput(BaseStatsOutput, HistogramPlotMixin):
-    """
-    Output class for :func:`.pixelstats` stats metric.
-    Attributes
-    ----------
-    mean : NDArray[np.float16]
-        Mean of the pixel values of the images
-    std : NDArray[np.float16]
-        Standard deviation of the pixel values of the images
-    var : NDArray[np.float16]
-        :term:`Variance` of the pixel values of the images
-    skew : NDArray[np.float16]
-        Skew of the pixel values of the images
-    kurtosis : NDArray[np.float16]
-        Kurtosis of the pixel values of the images
-    histogram : NDArray[np.uint32]
-        Histogram of the pixel values of the images across 256 bins scaled between 0 and 1
-    entropy : NDArray[np.float16]
-        Entropy of the pixel values of the images
-    """
-    mean: NDArray[np.float16]
-    std: NDArray[np.float16]
-    var: NDArray[np.float16]
-    skew: NDArray[np.float16]
-    kurtosis: NDArray[np.float16]
-    histogram: NDArray[np.uint32]
-    entropy: NDArray[np.float16]
-    _excluded_keys = ["histogram"]
+from dataeval.metrics.stats._base import StatsProcessor, run_stats
+from dataeval.outputs import PixelStatsOutput
+from dataeval.outputs._base import set_metadata
+from dataeval.typing import ArrayLike, Dataset
 class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
@@ -72,8 +37,9 @@ class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
 @set_metadata
 def pixelstats(
-    images: Iterable[ArrayLike],
-    bboxes: Iterable[ArrayLike] | None = None,
+    dataset: Dataset[ArrayLike] | Dataset[tuple[ArrayLike, Any, Any]],
+    *,
+    per_box: bool = False,
     per_channel: bool = False,
 ) -> PixelStatsOutput:
     """
@@ -84,10 +50,12 @@ def pixelstats(
     Parameters
     ----------
-    images : Iterable[ArrayLike]
-        Images to perform calculations on
-    bboxes : Iterable[ArrayLike] or None
-        Bounding boxes in `xyxy` format for each image to perform calculations
+    dataset : Dataset
+        Dataset to perform calculations on.
+    per_box : bool, default False
+        If True, perform calculations on each bounding box.
+    per_channel : bool, default False
+        If True, perform calculations on each channel.
     Returns
     -------
@@ -107,12 +75,12 @@ def pixelstats(
     Examples
     --------
-    Calculating the statistics on the images, whose shape is (C, H, W)
+    Calculate the pixel statistics of a dataset of 8 images, whose shape is (C, H, W).
-    >>> results = pixelstats(stats_images)
+    >>> results = pixelstats(dataset)
     >>> print(results.mean)
-    [0.2903 0.2108 0.397  0.596  0.743 ]
+    [0.181 0.132 0.248 0.373 0.464 0.613 0.734 0.854]
     >>> print(results.entropy)
-    [4.99  2.371 1.179 2.406 0.668]
+    [4.527 1.883 0.811 1.883 0.298 1.883 1.883 1.883]
     """
-    return run_stats(images, bboxes, per_channel, [PixelStatsProcessor])[0]
+    return run_stats(dataset, per_box, per_channel, [PixelStatsProcessor])[0]

dataeval/metrics/stats/_visualstats.py CHANGED Viewed

@@ -2,54 +2,19 @@ from __future__ import annotations
 __all__ = []
-from dataclasses import dataclass
-from typing import Any, Callable, Iterable
+from typing import Any, Callable
 import numpy as np
-from numpy.typing import NDArray
-from dataeval._output import set_metadata
-from dataeval.metrics.stats._base import BaseStatsOutput, HistogramPlotMixin, StatsProcessor, run_stats
-from dataeval.typing import ArrayLike
+from dataeval.metrics.stats._base import StatsProcessor, run_stats
+from dataeval.outputs import VisualStatsOutput
+from dataeval.outputs._base import set_metadata
+from dataeval.typing import ArrayLike, Dataset
 from dataeval.utils._image import edge_filter
 QUARTILES = (0, 25, 50, 75, 100)
-@dataclass(frozen=True)
-class VisualStatsOutput(BaseStatsOutput, HistogramPlotMixin):
-    """
-    Output class for :func:`.visualstats` stats metric.
-    Attributes
-    ----------
-    brightness : NDArray[np.float16]
-        Brightness of the images
-    contrast : NDArray[np.float16]
-        Image contrast ratio
-    darkness : NDArray[np.float16]
-        Darkness of the images
-    missing : NDArray[np.float16]
-        Percentage of the images with missing pixels
-    sharpness : NDArray[np.float16]
-        Sharpness of the images
-    zeros : NDArray[np.float16]
-        Percentage of the images with zero value pixels
-    percentiles : NDArray[np.float16]
-        Percentiles of the pixel values of the images with quartiles of (0, 25, 50, 75, 100)
-    """
-    brightness: NDArray[np.float16]
-    contrast: NDArray[np.float16]
-    darkness: NDArray[np.float16]
-    missing: NDArray[np.float16]
-    sharpness: NDArray[np.float16]
-    zeros: NDArray[np.float16]
-    percentiles: NDArray[np.float16]
-    _excluded_keys = ["percentiles"]
 class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
     output_class: type = VisualStatsOutput
     image_function_map: dict[str, Callable[[StatsProcessor[VisualStatsOutput]], Any]] = {
@@ -79,8 +44,9 @@ class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
 @set_metadata
 def visualstats(
-    images: Iterable[ArrayLike],
-    bboxes: Iterable[ArrayLike] | None = None,
+    dataset: Dataset[ArrayLike] | Dataset[tuple[ArrayLike, Any, Any]],
+    *,
+    per_box: bool = False,
     per_channel: bool = False,
 ) -> VisualStatsOutput:
     """
@@ -91,10 +57,12 @@ def visualstats(
     Parameters
     ----------
-    images : Iterable[ArrayLike]
-        Images to perform calculations on
-    bboxes : Iterable[ArrayLike] or None
-        Bounding boxes in `xyxy` format for each image to perform calculations on
+    dataset : Dataset
+        Dataset to perform calculations on.
+    per_box : bool, default False
+        If True, perform calculations on each bounding box.
+    per_channel : bool, default False
+        If True, perform calculations on each channel.
     Returns
     -------
@@ -113,12 +81,12 @@ def visualstats(
     Examples
     --------
-    Calculating the :term:`statistics<Statistics>` on the images, whose shape is (C, H, W)
+    Calculate the visual statistics of a dataset of 8 images, whose shape is (C, H, W).
-    >>> results = visualstats(stats_images)
+    >>> results = visualstats(dataset)
     >>> print(results.brightness)
-    [0.1353 0.2085 0.4143 0.6084 0.8135]
+    [0.084 0.13  0.259 0.38  0.508 0.63  0.755 0.88 ]
     >>> print(results.contrast)
-    [2.04  1.331 1.261 1.279 1.253]
+    [2.04  1.331 1.261 1.279 1.253 1.268 1.265 1.263]
     """
-    return run_stats(images, bboxes, per_channel, [VisualStatsProcessor])[0]
+    return run_stats(dataset, per_box, per_channel, [VisualStatsProcessor])[0]

dataeval/outputs/__init__.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""
+Output classes for DataEval to store function and method outputs
+as well as runtime metadata for reproducibility and logging.
+"""
+from ._base import ExecutionMetadata
+from ._bias import BalanceOutput, CoverageOutput, DiversityOutput, LabelParityOutput, ParityOutput
+from ._drift import DriftMMDOutput, DriftOutput
+from ._estimators import BEROutput, ClustererOutput, DivergenceOutput, UAPOutput
+from ._linters import DuplicatesOutput, OutliersOutput
+from ._metadata import MetadataDistanceOutput, MetadataDistanceValues, MostDeviatedFactorsOutput
+from ._ood import OODOutput, OODScoreOutput
+from ._stats import (
+    ChannelStatsOutput,
+    DimensionStatsOutput,
+    HashStatsOutput,
+    ImageStatsOutput,
+    LabelStatsOutput,
+    PixelStatsOutput,
+    SourceIndex,
+    VisualStatsOutput,
+)
+from ._utils import SplitDatasetOutput, TrainValSplit
+from ._workflows import SufficiencyOutput
+__all__ = [
+    "BEROutput",
+    "BalanceOutput",
+    "ChannelStatsOutput",
+    "ClustererOutput",
+    "CoverageOutput",
+    "DimensionStatsOutput",
+    "DivergenceOutput",
+    "DiversityOutput",
+    "DriftMMDOutput",
+    "DriftOutput",
+    "DuplicatesOutput",
+    "ExecutionMetadata",
+    "HashStatsOutput",
+    "ImageStatsOutput",
+    "LabelParityOutput",
+    "LabelStatsOutput",
+    "MetadataDistanceOutput",
+    "MetadataDistanceValues",
+    "MostDeviatedFactorsOutput",
+    "OODOutput",
+    "OODScoreOutput",
+    "OutliersOutput",
+    "ParityOutput",
+    "PixelStatsOutput",
+    "SourceIndex",
+    "SplitDatasetOutput",
+    "SufficiencyOutput",
+    "TrainValSplit",
+    "UAPOutput",
+    "VisualStatsOutput",
+]

dataeval/outputs/_base.py ADDED Viewed

@@ -0,0 +1,182 @@
+from __future__ import annotations
+__all__ = []
+import inspect
+import logging
+from collections.abc import Collection, Mapping, Sequence
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from functools import partial, wraps
+from typing import Any, Callable, Generic, Iterator, TypeVar, overload
+import numpy as np
+from typing_extensions import ParamSpec
+from dataeval import __version__
+@dataclass(frozen=True)
+class ExecutionMetadata:
+    """
+    Metadata about the execution of the function or method for the Output class.
+    Attributes
+    ----------
+    name: str
+        Name of the function or method
+    execution_time: datetime
+        Time of execution
+    execution_duration: float
+        Duration of execution in seconds
+    arguments: dict[str, Any]
+        Arguments passed to the function or method
+    state: dict[str, Any]
+        State attributes of the executing class
+    version: str
+        Version of DataEval
+    """
+    name: str
+    execution_time: datetime
+    execution_duration: float
+    arguments: dict[str, Any]
+    state: dict[str, Any]
+    version: str
+    @classmethod
+    def empty(cls) -> ExecutionMetadata:
+        return ExecutionMetadata(
+            name="",
+            execution_time=datetime.min,
+            execution_duration=0.0,
+            arguments={},
+            state={},
+            version=__version__,
+        )
+T = TypeVar("T", covariant=True)
+class GenericOutput(Generic[T]):
+    _meta: ExecutionMetadata | None = None
+    def data(self) -> T: ...
+    def meta(self) -> ExecutionMetadata:
+        """
+        Metadata about the execution of the function or method for the Output class.
+        """
+        return self._meta or ExecutionMetadata.empty()
+class Output(GenericOutput[dict[str, Any]]):
+    def data(self) -> dict[str, Any]:
+        return {k: v for k, v in self.__dict__.items() if k != "_meta"}
+    def __repr__(self) -> str:
+        return str(self)
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__}({', '.join([f'{k}={v}' for k, v in self.data().items()])})"
+class BaseCollectionMixin(Collection[Any]):
+    __slots__ = ["_data"]
+    def data(self) -> Any:
+        return self._data
+    def __len__(self) -> int:
+        return len(self._data)
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}({repr(self._data)})"
+    def __str__(self) -> str:
+        return str(self._data)
+TKey = TypeVar("TKey", str, int, float, set)
+TValue = TypeVar("TValue")
+class MappingOutput(Mapping[TKey, TValue], BaseCollectionMixin, GenericOutput[Mapping[TKey, TValue]]):
+    def __init__(self, data: Mapping[TKey, TValue]):
+        self._data = data
+    def __getitem__(self, key: TKey) -> TValue:
+        return self._data[key]
+    def __iter__(self) -> Iterator[TKey]:
+        return iter(self._data)
+class SequenceOutput(Sequence[TValue], BaseCollectionMixin, GenericOutput[Sequence[TValue]]):
+    def __init__(self, data: Sequence[TValue]):
+        self._data = data
+    @overload
+    def __getitem__(self, index: int) -> TValue: ...
+    @overload
+    def __getitem__(self, index: slice) -> Sequence[TValue]: ...
+    def __getitem__(self, index: int | slice) -> TValue | Sequence[TValue]:
+        return self._data[index]
+    def __iter__(self) -> Iterator[TValue]:
+        return iter(self._data)
+P = ParamSpec("P")
+R = TypeVar("R", bound=GenericOutput)
+def set_metadata(fn: Callable[P, R] | None = None, *, state: list[str] | None = None) -> Callable[P, R]:
+    """Decorator to stamp Output classes with runtime metadata"""
+    if fn is None:
+        return partial(set_metadata, state=state)  # type: ignore
+    @wraps(fn)
+    def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
+        def fmt(v):
+            if np.isscalar(v):
+                return v
+            if hasattr(v, "shape"):
+                return f"{v.__class__.__name__}: shape={getattr(v, 'shape')}"
+            if hasattr(v, "__len__"):
+                return f"{v.__class__.__name__}: len={len(v)}"
+            return f"{v.__class__.__name__}"
+        # Collect function metadata
+        # set all params with defaults then update params with mapped arguments and explicit keyword args
+        fn_params = inspect.signature(fn).parameters
+        arguments = {k: None if v.default is inspect.Parameter.empty else v.default for k, v in fn_params.items()}
+        arguments.update(zip(fn_params, args))
+        arguments.update(kwargs)
+        arguments = {k: fmt(v) for k, v in arguments.items()}
+        is_method = "self" in arguments
+        state_attrs = {k: fmt(getattr(args[0], k)) for k in state or []} if is_method else {}
+        module = args[0].__class__.__module__ if is_method else fn.__module__.removeprefix("src.")
+        class_prefix = f".{args[0].__class__.__name__}." if is_method else "."
+        name = f"{module}{class_prefix}{fn.__name__}"
+        arguments = {k: v for k, v in arguments.items() if k != "self"}
+        _logger = logging.getLogger(module)
+        time = datetime.now(timezone.utc)
+        _logger.log(logging.INFO, f">>> Executing '{name}': args={arguments} state={state} <<<")
+        ##### EXECUTE FUNCTION #####
+        result = fn(*args, **kwargs)
+        ############################
+        duration = (datetime.now(timezone.utc) - time).total_seconds()
+        _logger.log(logging.INFO, f">>> Completed '{name}': args={arguments} state={state} duration={duration} <<<")
+        # Update output with recorded metadata
+        metadata = ExecutionMetadata(name, time, duration, arguments, state_attrs, __version__)
+        object.__setattr__(result, "_meta", metadata)
+        return result
+    return wrapper

dataeval 0.81.0__py3-none-any.whl → 0.82.1__py3-none-any.whl

dataeval 0.81.0py3-none-any.whl → 0.82.1py3-none-any.whl