dataeval 0.74.0__py3-none-any.whl → 0.74.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. dataeval/__init__.py +23 -10
  2. dataeval/detectors/__init__.py +2 -10
  3. dataeval/detectors/drift/base.py +3 -3
  4. dataeval/detectors/drift/mmd.py +1 -1
  5. dataeval/detectors/linters/clusterer.py +3 -3
  6. dataeval/detectors/linters/duplicates.py +4 -4
  7. dataeval/detectors/linters/outliers.py +4 -4
  8. dataeval/detectors/ood/__init__.py +5 -12
  9. dataeval/detectors/ood/base.py +5 -5
  10. dataeval/detectors/ood/metadata_ks_compare.py +12 -13
  11. dataeval/interop.py +15 -3
  12. dataeval/logging.py +16 -0
  13. dataeval/metrics/bias/balance.py +3 -3
  14. dataeval/metrics/bias/coverage.py +3 -3
  15. dataeval/metrics/bias/diversity.py +3 -3
  16. dataeval/metrics/bias/metadata_preprocessing.py +3 -3
  17. dataeval/metrics/bias/parity.py +4 -4
  18. dataeval/metrics/estimators/ber.py +3 -3
  19. dataeval/metrics/estimators/divergence.py +3 -3
  20. dataeval/metrics/estimators/uap.py +3 -3
  21. dataeval/metrics/stats/base.py +2 -2
  22. dataeval/metrics/stats/boxratiostats.py +1 -1
  23. dataeval/metrics/stats/datasetstats.py +6 -6
  24. dataeval/metrics/stats/dimensionstats.py +1 -1
  25. dataeval/metrics/stats/hashstats.py +1 -1
  26. dataeval/metrics/stats/labelstats.py +3 -3
  27. dataeval/metrics/stats/pixelstats.py +1 -1
  28. dataeval/metrics/stats/visualstats.py +1 -1
  29. dataeval/output.py +81 -57
  30. dataeval/utils/__init__.py +1 -7
  31. dataeval/utils/split_dataset.py +306 -279
  32. dataeval/workflows/sufficiency.py +4 -4
  33. {dataeval-0.74.0.dist-info → dataeval-0.74.2.dist-info}/METADATA +3 -8
  34. dataeval-0.74.2.dist-info/RECORD +66 -0
  35. dataeval/detectors/ood/ae.py +0 -76
  36. dataeval/detectors/ood/aegmm.py +0 -67
  37. dataeval/detectors/ood/base_tf.py +0 -109
  38. dataeval/detectors/ood/llr.py +0 -302
  39. dataeval/detectors/ood/vae.py +0 -98
  40. dataeval/detectors/ood/vaegmm.py +0 -76
  41. dataeval/utils/lazy.py +0 -26
  42. dataeval/utils/tensorflow/__init__.py +0 -19
  43. dataeval/utils/tensorflow/_internal/gmm.py +0 -103
  44. dataeval/utils/tensorflow/_internal/loss.py +0 -121
  45. dataeval/utils/tensorflow/_internal/models.py +0 -1394
  46. dataeval/utils/tensorflow/_internal/trainer.py +0 -114
  47. dataeval/utils/tensorflow/_internal/utils.py +0 -256
  48. dataeval/utils/tensorflow/loss/__init__.py +0 -11
  49. dataeval-0.74.0.dist-info/RECORD +0 -79
  50. {dataeval-0.74.0.dist-info → dataeval-0.74.2.dist-info}/LICENSE.txt +0 -0
  51. {dataeval-0.74.0.dist-info → dataeval-0.74.2.dist-info}/WHEEL +0 -0
@@ -15,11 +15,11 @@ from dataeval.metrics.stats.dimensionstats import (
15
15
  from dataeval.metrics.stats.labelstats import LabelStatsOutput, labelstats
16
16
  from dataeval.metrics.stats.pixelstats import PixelStatsOutput, PixelStatsProcessor
17
17
  from dataeval.metrics.stats.visualstats import VisualStatsOutput, VisualStatsProcessor
18
- from dataeval.output import OutputMetadata, set_metadata
18
+ from dataeval.output import Output, set_metadata
19
19
 
20
20
 
21
21
  @dataclass(frozen=True)
22
- class DatasetStatsOutput(OutputMetadata):
22
+ class DatasetStatsOutput(Output):
23
23
  """
24
24
  Output class for :func:`datasetstats` stats metric
25
25
 
@@ -41,7 +41,7 @@ class DatasetStatsOutput(OutputMetadata):
41
41
  visualstats: VisualStatsOutput
42
42
  labelstats: LabelStatsOutput | None = None
43
43
 
44
- def _outputs(self) -> list[OutputMetadata]:
44
+ def _outputs(self) -> list[Output]:
45
45
  return [s for s in (self.dimensionstats, self.pixelstats, self.visualstats, self.labelstats) if s is not None]
46
46
 
47
47
  def dict(self) -> dict[str, Any]:
@@ -54,7 +54,7 @@ class DatasetStatsOutput(OutputMetadata):
54
54
 
55
55
 
56
56
  @dataclass(frozen=True)
57
- class ChannelStatsOutput(OutputMetadata):
57
+ class ChannelStatsOutput(Output):
58
58
  """
59
59
  Output class for :func:`channelstats` stats metric
60
60
 
@@ -84,7 +84,7 @@ class ChannelStatsOutput(OutputMetadata):
84
84
  raise ValueError("All StatsOutput classes must contain the same number of image sources.")
85
85
 
86
86
 
87
- @set_metadata()
87
+ @set_metadata
88
88
  def datasetstats(
89
89
  images: Iterable[ArrayLike],
90
90
  bboxes: Iterable[ArrayLike] | None = None,
@@ -131,7 +131,7 @@ def datasetstats(
131
131
  return DatasetStatsOutput(*outputs, labelstats=labelstats(labels) if labels else None) # type: ignore
132
132
 
133
133
 
134
- @set_metadata()
134
+ @set_metadata
135
135
  def channelstats(
136
136
  images: Iterable[ArrayLike],
137
137
  bboxes: Iterable[ArrayLike] | None = None,
@@ -73,7 +73,7 @@ class DimensionStatsProcessor(StatsProcessor[DimensionStatsOutput]):
73
73
  }
74
74
 
75
75
 
76
- @set_metadata()
76
+ @set_metadata
77
77
  def dimensionstats(
78
78
  images: Iterable[ArrayLike],
79
79
  bboxes: Iterable[ArrayLike] | None = None,
@@ -116,7 +116,7 @@ class HashStatsProcessor(StatsProcessor[HashStatsOutput]):
116
116
  }
117
117
 
118
118
 
119
- @set_metadata()
119
+ @set_metadata
120
120
  def hashstats(
121
121
  images: Iterable[ArrayLike],
122
122
  bboxes: Iterable[ArrayLike] | None = None,
@@ -9,11 +9,11 @@ from typing import Any, Iterable, Mapping, TypeVar
9
9
  from numpy.typing import ArrayLike
10
10
 
11
11
  from dataeval.interop import to_numpy
12
- from dataeval.output import OutputMetadata, set_metadata
12
+ from dataeval.output import Output, set_metadata
13
13
 
14
14
 
15
15
  @dataclass(frozen=True)
16
- class LabelStatsOutput(OutputMetadata):
16
+ class LabelStatsOutput(Output):
17
17
  """
18
18
  Output class for :func:`labelstats` stats metric
19
19
 
@@ -57,7 +57,7 @@ def sort(d: Mapping[TKey, Any]) -> dict[TKey, Any]:
57
57
  return dict(sorted(d.items(), key=lambda x: x[0]))
58
58
 
59
59
 
60
- @set_metadata()
60
+ @set_metadata
61
61
  def labelstats(
62
62
  labels: Iterable[ArrayLike],
63
63
  ) -> LabelStatsOutput:
@@ -67,7 +67,7 @@ class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
67
67
  }
68
68
 
69
69
 
70
- @set_metadata()
70
+ @set_metadata
71
71
  def pixelstats(
72
72
  images: Iterable[ArrayLike],
73
73
  bboxes: Iterable[ArrayLike] | None = None,
@@ -74,7 +74,7 @@ class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
74
74
  }
75
75
 
76
76
 
77
- @set_metadata()
77
+ @set_metadata
78
78
  def visualstats(
79
79
  images: Iterable[ArrayLike],
80
80
  bboxes: Iterable[ArrayLike] | None = None,
dataeval/output.py CHANGED
@@ -4,9 +4,10 @@ __all__ = []
4
4
 
5
5
  import inspect
6
6
  import sys
7
+ from collections.abc import Mapping
7
8
  from datetime import datetime, timezone
8
- from functools import wraps
9
- from typing import Any, Callable, Iterable, TypeVar
9
+ from functools import partial, wraps
10
+ from typing import Any, Callable, Iterator, TypeVar
10
11
 
11
12
  import numpy as np
12
13
 
@@ -18,7 +19,7 @@ else:
18
19
  from dataeval import __version__
19
20
 
20
21
 
21
- class OutputMetadata:
22
+ class Output:
22
23
  _name: str
23
24
  _execution_time: datetime
24
25
  _execution_duration: float
@@ -26,6 +27,9 @@ class OutputMetadata:
26
27
  _state: dict[str, str]
27
28
  _version: str
28
29
 
30
+ def __str__(self) -> str:
31
+ return f"{self.__class__.__name__}: {str(self.dict())}"
32
+
29
33
  def dict(self) -> dict[str, Any]:
30
34
  return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
31
35
 
@@ -33,58 +37,78 @@ class OutputMetadata:
33
37
  return {k.removeprefix("_"): v for k, v in self.__dict__.items() if k.startswith("_")}
34
38
 
35
39
 
40
+ TKey = TypeVar("TKey", str, int, float, set)
41
+ TValue = TypeVar("TValue")
42
+
43
+
44
+ class MappingOutput(Mapping[TKey, TValue], Output):
45
+ __slots__ = ["_data"]
46
+
47
+ def __init__(self, data: Mapping[TKey, TValue]):
48
+ self._data = data
49
+
50
+ def __getitem__(self, key: TKey) -> TValue:
51
+ return self._data.__getitem__(key)
52
+
53
+ def __iter__(self) -> Iterator[TKey]:
54
+ return self._data.__iter__()
55
+
56
+ def __len__(self) -> int:
57
+ return self._data.__len__()
58
+
59
+ def dict(self) -> dict[str, TValue]:
60
+ return {str(k): v for k, v in self._data.items()}
61
+
62
+
36
63
  P = ParamSpec("P")
37
- R = TypeVar("R", bound=OutputMetadata)
38
-
39
-
40
- def set_metadata(
41
- state_attr: Iterable[str] | None = None,
42
- ) -> Callable[[Callable[P, R]], Callable[P, R]]:
43
- """Decorator to stamp OutputMetadata classes with runtime metadata"""
44
-
45
- def decorator(fn: Callable[P, R]) -> Callable[P, R]:
46
- @wraps(fn)
47
- def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
48
- def fmt(v):
49
- if np.isscalar(v):
50
- return v
51
- if hasattr(v, "shape"):
52
- return f"{v.__class__.__name__}: shape={getattr(v, 'shape')}"
53
- if hasattr(v, "__len__"):
54
- return f"{v.__class__.__name__}: len={len(v)}"
55
- return f"{v.__class__.__name__}"
56
-
57
- time = datetime.now(timezone.utc)
58
- result = fn(*args, **kwargs)
59
- duration = (datetime.now(timezone.utc) - time).total_seconds()
60
- fn_params = inspect.signature(fn).parameters
61
- # set all params with defaults then update params with mapped arguments and explicit keyword args
62
- arguments = {k: None if v.default is inspect.Parameter.empty else v.default for k, v in fn_params.items()}
63
- arguments.update(zip(fn_params, args))
64
- arguments.update(kwargs)
65
- arguments = {k: fmt(v) for k, v in arguments.items()}
66
- state = (
67
- {k: fmt(getattr(args[0], k)) for k in state_attr if "self" in arguments}
68
- if "self" in arguments and state_attr
69
- else {}
70
- )
71
- name = (
72
- f"{args[0].__class__.__module__}.{args[0].__class__.__name__}.{fn.__name__}"
73
- if "self" in arguments
74
- else f"{fn.__module__}.{fn.__qualname__}"
75
- )
76
- metadata = {
77
- "_name": name,
78
- "_execution_time": time,
79
- "_execution_duration": duration,
80
- "_arguments": {k: v for k, v in arguments.items() if k != "self"},
81
- "_state": state,
82
- "_version": __version__,
83
- }
84
- for k, v in metadata.items():
85
- object.__setattr__(result, k, v)
86
- return result
87
-
88
- return wrapper
89
-
90
- return decorator
64
+ R = TypeVar("R", bound=Output)
65
+
66
+
67
+ def set_metadata(fn: Callable[P, R] | None = None, *, state: list[str] | None = None) -> Callable[P, R]:
68
+ """Decorator to stamp Output classes with runtime metadata"""
69
+
70
+ if fn is None:
71
+ return partial(set_metadata, state=state) # type: ignore
72
+
73
+ @wraps(fn)
74
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
75
+ def fmt(v):
76
+ if np.isscalar(v):
77
+ return v
78
+ if hasattr(v, "shape"):
79
+ return f"{v.__class__.__name__}: shape={getattr(v, 'shape')}"
80
+ if hasattr(v, "__len__"):
81
+ return f"{v.__class__.__name__}: len={len(v)}"
82
+ return f"{v.__class__.__name__}"
83
+
84
+ time = datetime.now(timezone.utc)
85
+ result = fn(*args, **kwargs)
86
+ duration = (datetime.now(timezone.utc) - time).total_seconds()
87
+ fn_params = inspect.signature(fn).parameters
88
+
89
+ # set all params with defaults then update params with mapped arguments and explicit keyword args
90
+ arguments = {k: None if v.default is inspect.Parameter.empty else v.default for k, v in fn_params.items()}
91
+ arguments.update(zip(fn_params, args))
92
+ arguments.update(kwargs)
93
+ arguments = {k: fmt(v) for k, v in arguments.items()}
94
+ state_attrs = (
95
+ {k: fmt(getattr(args[0], k)) for k in state if "self" in arguments} if "self" in arguments and state else {}
96
+ )
97
+ name = (
98
+ f"{args[0].__class__.__module__}.{args[0].__class__.__name__}.{fn.__name__}"
99
+ if "self" in arguments
100
+ else f"{fn.__module__}.{fn.__qualname__}"
101
+ )
102
+ metadata = {
103
+ "_name": name,
104
+ "_execution_time": time,
105
+ "_execution_duration": duration,
106
+ "_arguments": {k: v for k, v in arguments.items() if k != "self"},
107
+ "_state": state_attrs,
108
+ "_version": __version__,
109
+ }
110
+ for k, v in metadata.items():
111
+ object.__setattr__(result, k, v)
112
+ return result
113
+
114
+ return wrapper
@@ -4,7 +4,7 @@ in setting up architectures that are guaranteed to work with applicable DataEval
4
4
  metrics. Currently DataEval supports both :term:`TensorFlow` and PyTorch backends.
5
5
  """
6
6
 
7
- from dataeval import _IS_TENSORFLOW_AVAILABLE, _IS_TORCH_AVAILABLE
7
+ from dataeval import _IS_TORCH_AVAILABLE
8
8
  from dataeval.utils.metadata import merge_metadata
9
9
  from dataeval.utils.split_dataset import split_dataset
10
10
 
@@ -15,10 +15,4 @@ if _IS_TORCH_AVAILABLE:
15
15
 
16
16
  __all__ += ["torch"]
17
17
 
18
- if _IS_TENSORFLOW_AVAILABLE:
19
- from dataeval.utils import tensorflow
20
-
21
- __all__ += ["tensorflow"]
22
-
23
- del _IS_TENSORFLOW_AVAILABLE
24
18
  del _IS_TORCH_AVAILABLE