dataeval 0.74.0__py3-none-any.whl → 0.74.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +23 -10
- dataeval/detectors/__init__.py +2 -10
- dataeval/detectors/drift/base.py +3 -3
- dataeval/detectors/drift/mmd.py +1 -1
- dataeval/detectors/linters/clusterer.py +3 -3
- dataeval/detectors/linters/duplicates.py +4 -4
- dataeval/detectors/linters/outliers.py +4 -4
- dataeval/detectors/ood/__init__.py +5 -12
- dataeval/detectors/ood/base.py +5 -5
- dataeval/detectors/ood/metadata_ks_compare.py +12 -13
- dataeval/interop.py +15 -3
- dataeval/logging.py +16 -0
- dataeval/metrics/bias/balance.py +3 -3
- dataeval/metrics/bias/coverage.py +3 -3
- dataeval/metrics/bias/diversity.py +3 -3
- dataeval/metrics/bias/metadata_preprocessing.py +3 -3
- dataeval/metrics/bias/parity.py +4 -4
- dataeval/metrics/estimators/ber.py +3 -3
- dataeval/metrics/estimators/divergence.py +3 -3
- dataeval/metrics/estimators/uap.py +3 -3
- dataeval/metrics/stats/base.py +2 -2
- dataeval/metrics/stats/boxratiostats.py +1 -1
- dataeval/metrics/stats/datasetstats.py +6 -6
- dataeval/metrics/stats/dimensionstats.py +1 -1
- dataeval/metrics/stats/hashstats.py +1 -1
- dataeval/metrics/stats/labelstats.py +3 -3
- dataeval/metrics/stats/pixelstats.py +1 -1
- dataeval/metrics/stats/visualstats.py +1 -1
- dataeval/output.py +81 -57
- dataeval/utils/__init__.py +1 -7
- dataeval/utils/split_dataset.py +306 -279
- dataeval/workflows/sufficiency.py +4 -4
- {dataeval-0.74.0.dist-info → dataeval-0.74.2.dist-info}/METADATA +3 -8
- dataeval-0.74.2.dist-info/RECORD +66 -0
- dataeval/detectors/ood/ae.py +0 -76
- dataeval/detectors/ood/aegmm.py +0 -67
- dataeval/detectors/ood/base_tf.py +0 -109
- dataeval/detectors/ood/llr.py +0 -302
- dataeval/detectors/ood/vae.py +0 -98
- dataeval/detectors/ood/vaegmm.py +0 -76
- dataeval/utils/lazy.py +0 -26
- dataeval/utils/tensorflow/__init__.py +0 -19
- dataeval/utils/tensorflow/_internal/gmm.py +0 -103
- dataeval/utils/tensorflow/_internal/loss.py +0 -121
- dataeval/utils/tensorflow/_internal/models.py +0 -1394
- dataeval/utils/tensorflow/_internal/trainer.py +0 -114
- dataeval/utils/tensorflow/_internal/utils.py +0 -256
- dataeval/utils/tensorflow/loss/__init__.py +0 -11
- dataeval-0.74.0.dist-info/RECORD +0 -79
- {dataeval-0.74.0.dist-info → dataeval-0.74.2.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.74.0.dist-info → dataeval-0.74.2.dist-info}/WHEEL +0 -0
@@ -15,11 +15,11 @@ from dataeval.metrics.stats.dimensionstats import (
|
|
15
15
|
from dataeval.metrics.stats.labelstats import LabelStatsOutput, labelstats
|
16
16
|
from dataeval.metrics.stats.pixelstats import PixelStatsOutput, PixelStatsProcessor
|
17
17
|
from dataeval.metrics.stats.visualstats import VisualStatsOutput, VisualStatsProcessor
|
18
|
-
from dataeval.output import
|
18
|
+
from dataeval.output import Output, set_metadata
|
19
19
|
|
20
20
|
|
21
21
|
@dataclass(frozen=True)
|
22
|
-
class DatasetStatsOutput(
|
22
|
+
class DatasetStatsOutput(Output):
|
23
23
|
"""
|
24
24
|
Output class for :func:`datasetstats` stats metric
|
25
25
|
|
@@ -41,7 +41,7 @@ class DatasetStatsOutput(OutputMetadata):
|
|
41
41
|
visualstats: VisualStatsOutput
|
42
42
|
labelstats: LabelStatsOutput | None = None
|
43
43
|
|
44
|
-
def _outputs(self) -> list[
|
44
|
+
def _outputs(self) -> list[Output]:
|
45
45
|
return [s for s in (self.dimensionstats, self.pixelstats, self.visualstats, self.labelstats) if s is not None]
|
46
46
|
|
47
47
|
def dict(self) -> dict[str, Any]:
|
@@ -54,7 +54,7 @@ class DatasetStatsOutput(OutputMetadata):
|
|
54
54
|
|
55
55
|
|
56
56
|
@dataclass(frozen=True)
|
57
|
-
class ChannelStatsOutput(
|
57
|
+
class ChannelStatsOutput(Output):
|
58
58
|
"""
|
59
59
|
Output class for :func:`channelstats` stats metric
|
60
60
|
|
@@ -84,7 +84,7 @@ class ChannelStatsOutput(OutputMetadata):
|
|
84
84
|
raise ValueError("All StatsOutput classes must contain the same number of image sources.")
|
85
85
|
|
86
86
|
|
87
|
-
@set_metadata
|
87
|
+
@set_metadata
|
88
88
|
def datasetstats(
|
89
89
|
images: Iterable[ArrayLike],
|
90
90
|
bboxes: Iterable[ArrayLike] | None = None,
|
@@ -131,7 +131,7 @@ def datasetstats(
|
|
131
131
|
return DatasetStatsOutput(*outputs, labelstats=labelstats(labels) if labels else None) # type: ignore
|
132
132
|
|
133
133
|
|
134
|
-
@set_metadata
|
134
|
+
@set_metadata
|
135
135
|
def channelstats(
|
136
136
|
images: Iterable[ArrayLike],
|
137
137
|
bboxes: Iterable[ArrayLike] | None = None,
|
@@ -9,11 +9,11 @@ from typing import Any, Iterable, Mapping, TypeVar
|
|
9
9
|
from numpy.typing import ArrayLike
|
10
10
|
|
11
11
|
from dataeval.interop import to_numpy
|
12
|
-
from dataeval.output import
|
12
|
+
from dataeval.output import Output, set_metadata
|
13
13
|
|
14
14
|
|
15
15
|
@dataclass(frozen=True)
|
16
|
-
class LabelStatsOutput(
|
16
|
+
class LabelStatsOutput(Output):
|
17
17
|
"""
|
18
18
|
Output class for :func:`labelstats` stats metric
|
19
19
|
|
@@ -57,7 +57,7 @@ def sort(d: Mapping[TKey, Any]) -> dict[TKey, Any]:
|
|
57
57
|
return dict(sorted(d.items(), key=lambda x: x[0]))
|
58
58
|
|
59
59
|
|
60
|
-
@set_metadata
|
60
|
+
@set_metadata
|
61
61
|
def labelstats(
|
62
62
|
labels: Iterable[ArrayLike],
|
63
63
|
) -> LabelStatsOutput:
|
dataeval/output.py
CHANGED
@@ -4,9 +4,10 @@ __all__ = []
|
|
4
4
|
|
5
5
|
import inspect
|
6
6
|
import sys
|
7
|
+
from collections.abc import Mapping
|
7
8
|
from datetime import datetime, timezone
|
8
|
-
from functools import wraps
|
9
|
-
from typing import Any, Callable,
|
9
|
+
from functools import partial, wraps
|
10
|
+
from typing import Any, Callable, Iterator, TypeVar
|
10
11
|
|
11
12
|
import numpy as np
|
12
13
|
|
@@ -18,7 +19,7 @@ else:
|
|
18
19
|
from dataeval import __version__
|
19
20
|
|
20
21
|
|
21
|
-
class
|
22
|
+
class Output:
|
22
23
|
_name: str
|
23
24
|
_execution_time: datetime
|
24
25
|
_execution_duration: float
|
@@ -26,6 +27,9 @@ class OutputMetadata:
|
|
26
27
|
_state: dict[str, str]
|
27
28
|
_version: str
|
28
29
|
|
30
|
+
def __str__(self) -> str:
|
31
|
+
return f"{self.__class__.__name__}: {str(self.dict())}"
|
32
|
+
|
29
33
|
def dict(self) -> dict[str, Any]:
|
30
34
|
return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
|
31
35
|
|
@@ -33,58 +37,78 @@ class OutputMetadata:
|
|
33
37
|
return {k.removeprefix("_"): v for k, v in self.__dict__.items() if k.startswith("_")}
|
34
38
|
|
35
39
|
|
40
|
+
TKey = TypeVar("TKey", str, int, float, set)
|
41
|
+
TValue = TypeVar("TValue")
|
42
|
+
|
43
|
+
|
44
|
+
class MappingOutput(Mapping[TKey, TValue], Output):
|
45
|
+
__slots__ = ["_data"]
|
46
|
+
|
47
|
+
def __init__(self, data: Mapping[TKey, TValue]):
|
48
|
+
self._data = data
|
49
|
+
|
50
|
+
def __getitem__(self, key: TKey) -> TValue:
|
51
|
+
return self._data.__getitem__(key)
|
52
|
+
|
53
|
+
def __iter__(self) -> Iterator[TKey]:
|
54
|
+
return self._data.__iter__()
|
55
|
+
|
56
|
+
def __len__(self) -> int:
|
57
|
+
return self._data.__len__()
|
58
|
+
|
59
|
+
def dict(self) -> dict[str, TValue]:
|
60
|
+
return {str(k): v for k, v in self._data.items()}
|
61
|
+
|
62
|
+
|
36
63
|
P = ParamSpec("P")
|
37
|
-
R = TypeVar("R", bound=
|
38
|
-
|
39
|
-
|
40
|
-
def set_metadata(
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
return wrapper
|
89
|
-
|
90
|
-
return decorator
|
64
|
+
R = TypeVar("R", bound=Output)
|
65
|
+
|
66
|
+
|
67
|
+
def set_metadata(fn: Callable[P, R] | None = None, *, state: list[str] | None = None) -> Callable[P, R]:
|
68
|
+
"""Decorator to stamp Output classes with runtime metadata"""
|
69
|
+
|
70
|
+
if fn is None:
|
71
|
+
return partial(set_metadata, state=state) # type: ignore
|
72
|
+
|
73
|
+
@wraps(fn)
|
74
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
75
|
+
def fmt(v):
|
76
|
+
if np.isscalar(v):
|
77
|
+
return v
|
78
|
+
if hasattr(v, "shape"):
|
79
|
+
return f"{v.__class__.__name__}: shape={getattr(v, 'shape')}"
|
80
|
+
if hasattr(v, "__len__"):
|
81
|
+
return f"{v.__class__.__name__}: len={len(v)}"
|
82
|
+
return f"{v.__class__.__name__}"
|
83
|
+
|
84
|
+
time = datetime.now(timezone.utc)
|
85
|
+
result = fn(*args, **kwargs)
|
86
|
+
duration = (datetime.now(timezone.utc) - time).total_seconds()
|
87
|
+
fn_params = inspect.signature(fn).parameters
|
88
|
+
|
89
|
+
# set all params with defaults then update params with mapped arguments and explicit keyword args
|
90
|
+
arguments = {k: None if v.default is inspect.Parameter.empty else v.default for k, v in fn_params.items()}
|
91
|
+
arguments.update(zip(fn_params, args))
|
92
|
+
arguments.update(kwargs)
|
93
|
+
arguments = {k: fmt(v) for k, v in arguments.items()}
|
94
|
+
state_attrs = (
|
95
|
+
{k: fmt(getattr(args[0], k)) for k in state if "self" in arguments} if "self" in arguments and state else {}
|
96
|
+
)
|
97
|
+
name = (
|
98
|
+
f"{args[0].__class__.__module__}.{args[0].__class__.__name__}.{fn.__name__}"
|
99
|
+
if "self" in arguments
|
100
|
+
else f"{fn.__module__}.{fn.__qualname__}"
|
101
|
+
)
|
102
|
+
metadata = {
|
103
|
+
"_name": name,
|
104
|
+
"_execution_time": time,
|
105
|
+
"_execution_duration": duration,
|
106
|
+
"_arguments": {k: v for k, v in arguments.items() if k != "self"},
|
107
|
+
"_state": state_attrs,
|
108
|
+
"_version": __version__,
|
109
|
+
}
|
110
|
+
for k, v in metadata.items():
|
111
|
+
object.__setattr__(result, k, v)
|
112
|
+
return result
|
113
|
+
|
114
|
+
return wrapper
|
dataeval/utils/__init__.py
CHANGED
@@ -4,7 +4,7 @@ in setting up architectures that are guaranteed to work with applicable DataEval
|
|
4
4
|
metrics. Currently DataEval supports both :term:`TensorFlow` and PyTorch backends.
|
5
5
|
"""
|
6
6
|
|
7
|
-
from dataeval import
|
7
|
+
from dataeval import _IS_TORCH_AVAILABLE
|
8
8
|
from dataeval.utils.metadata import merge_metadata
|
9
9
|
from dataeval.utils.split_dataset import split_dataset
|
10
10
|
|
@@ -15,10 +15,4 @@ if _IS_TORCH_AVAILABLE:
|
|
15
15
|
|
16
16
|
__all__ += ["torch"]
|
17
17
|
|
18
|
-
if _IS_TENSORFLOW_AVAILABLE:
|
19
|
-
from dataeval.utils import tensorflow
|
20
|
-
|
21
|
-
__all__ += ["tensorflow"]
|
22
|
-
|
23
|
-
del _IS_TENSORFLOW_AVAILABLE
|
24
18
|
del _IS_TORCH_AVAILABLE
|