dataeval 0.70.0__py3-none-any.whl → 0.71.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. dataeval/__init__.py +6 -6
  2. dataeval/_internal/datasets.py +235 -131
  3. dataeval/_internal/detectors/clusterer.py +2 -0
  4. dataeval/_internal/detectors/drift/base.py +2 -2
  5. dataeval/_internal/detectors/drift/mmd.py +1 -1
  6. dataeval/_internal/detectors/duplicates.py +2 -0
  7. dataeval/_internal/detectors/ood/ae.py +5 -3
  8. dataeval/_internal/detectors/ood/aegmm.py +6 -4
  9. dataeval/_internal/detectors/ood/base.py +12 -7
  10. dataeval/_internal/detectors/ood/llr.py +6 -4
  11. dataeval/_internal/detectors/ood/vae.py +5 -3
  12. dataeval/_internal/detectors/ood/vaegmm.py +6 -4
  13. dataeval/_internal/detectors/outliers.py +6 -9
  14. dataeval/_internal/metrics/balance.py +4 -2
  15. dataeval/_internal/metrics/ber.py +2 -0
  16. dataeval/_internal/metrics/coverage.py +4 -0
  17. dataeval/_internal/metrics/divergence.py +6 -2
  18. dataeval/_internal/metrics/diversity.py +8 -6
  19. dataeval/_internal/metrics/parity.py +8 -6
  20. dataeval/_internal/metrics/stats/base.py +105 -46
  21. dataeval/_internal/metrics/stats/datasetstats.py +96 -22
  22. dataeval/_internal/metrics/stats/dimensionstats.py +22 -20
  23. dataeval/_internal/metrics/stats/hashstats.py +11 -9
  24. dataeval/_internal/metrics/stats/labelstats.py +1 -1
  25. dataeval/_internal/metrics/stats/pixelstats.py +28 -26
  26. dataeval/_internal/metrics/stats/visualstats.py +37 -35
  27. dataeval/_internal/metrics/uap.py +6 -2
  28. dataeval/_internal/metrics/utils.py +2 -2
  29. dataeval/_internal/models/pytorch/autoencoder.py +5 -5
  30. dataeval/_internal/models/tensorflow/pixelcnn.py +1 -4
  31. dataeval/_internal/utils.py +11 -16
  32. dataeval/_internal/workflows/sufficiency.py +44 -33
  33. dataeval/detectors/__init__.py +4 -0
  34. dataeval/detectors/drift/__init__.py +8 -3
  35. dataeval/detectors/drift/kernels/__init__.py +4 -0
  36. dataeval/detectors/drift/updates/__init__.py +4 -0
  37. dataeval/detectors/linters/__init__.py +15 -4
  38. dataeval/detectors/ood/__init__.py +14 -2
  39. dataeval/metrics/__init__.py +5 -0
  40. dataeval/metrics/bias/__init__.py +13 -4
  41. dataeval/metrics/estimators/__init__.py +8 -8
  42. dataeval/metrics/stats/__init__.py +24 -6
  43. dataeval/utils/__init__.py +16 -3
  44. dataeval/utils/tensorflow/__init__.py +11 -0
  45. dataeval/utils/torch/__init__.py +12 -0
  46. dataeval/utils/torch/datasets/__init__.py +7 -0
  47. dataeval/workflows/__init__.py +4 -0
  48. {dataeval-0.70.0.dist-info → dataeval-0.71.0.dist-info}/METADATA +11 -2
  49. dataeval-0.71.0.dist-info/RECORD +80 -0
  50. dataeval/tensorflow/__init__.py +0 -3
  51. dataeval/torch/__init__.py +0 -3
  52. dataeval-0.70.0.dist-info/RECORD +0 -79
  53. /dataeval/{tensorflow → utils/tensorflow}/loss/__init__.py +0 -0
  54. /dataeval/{tensorflow → utils/tensorflow}/models/__init__.py +0 -0
  55. /dataeval/{tensorflow → utils/tensorflow}/recon/__init__.py +0 -0
  56. /dataeval/{torch → utils/torch}/models/__init__.py +0 -0
  57. /dataeval/{torch → utils/torch}/trainer/__init__.py +0 -0
  58. {dataeval-0.70.0.dist-info → dataeval-0.71.0.dist-info}/LICENSE.txt +0 -0
  59. {dataeval-0.70.0.dist-info → dataeval-0.71.0.dist-info}/WHEEL +0 -0
@@ -1,21 +1,26 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from dataclasses import dataclass
4
- from typing import Iterable
4
+ from typing import Any, Iterable
5
5
 
6
6
  from numpy.typing import ArrayLike
7
7
 
8
- from dataeval._internal.metrics.stats.base import BaseStatsOutput
9
- from dataeval._internal.metrics.stats.dimensionstats import DimensionStatsOutput, dimensionstats
8
+ from dataeval._internal.metrics.stats.base import BaseStatsOutput, run_stats
9
+ from dataeval._internal.metrics.stats.dimensionstats import (
10
+ DimensionStatsOutput,
11
+ DimensionStatsProcessor,
12
+ )
10
13
  from dataeval._internal.metrics.stats.labelstats import LabelStatsOutput, labelstats
11
- from dataeval._internal.metrics.stats.pixelstats import PixelStatsOutput, pixelstats
12
- from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput, visualstats
14
+ from dataeval._internal.metrics.stats.pixelstats import PixelStatsOutput, PixelStatsProcessor
15
+ from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput, VisualStatsProcessor
13
16
  from dataeval._internal.output import OutputMetadata, set_metadata
14
17
 
15
18
 
16
19
  @dataclass(frozen=True)
17
20
  class DatasetStatsOutput(OutputMetadata):
18
21
  """
22
+ Output class for :func:`datasetstats` stats metric
23
+
19
24
  This class represents the outputs of various stats functions against a single
20
25
  dataset, such that each index across all stat outputs are representative of
21
26
  the same source image. Modifying or mixing outputs will result in inaccurate
@@ -23,19 +28,53 @@ class DatasetStatsOutput(OutputMetadata):
23
28
 
24
29
  Attributes
25
30
  ----------
26
- dimensionstats : DimensionStatsOutput or None
27
- pixelstats: PixelStatsOutput or None
28
- visualstats: VisualStatsOutput or None
29
- labelstats: LabelStatsOutput or None, default None
31
+ dimensionstats : DimensionStatsOutput
32
+ pixelstats: PixelStatsOutput
33
+ visualstats: VisualStatsOutput
34
+ labelstats: LabelStatsOutput or None
30
35
  """
31
36
 
32
- dimensionstats: DimensionStatsOutput | None
33
- pixelstats: PixelStatsOutput | None
34
- visualstats: VisualStatsOutput | None
37
+ dimensionstats: DimensionStatsOutput
38
+ pixelstats: PixelStatsOutput
39
+ visualstats: VisualStatsOutput
35
40
  labelstats: LabelStatsOutput | None = None
36
41
 
42
+ def outputs(self) -> list[OutputMetadata]:
43
+ return [s for s in (self.dimensionstats, self.pixelstats, self.visualstats, self.labelstats) if s is not None]
44
+
45
+ def dict(self) -> dict[str, Any]:
46
+ return {k: v for o in self.outputs() for k, v in o.dict().items()}
47
+
48
+ def __post_init__(self):
49
+ lengths = [len(s) for s in self.outputs() if isinstance(s, BaseStatsOutput)]
50
+ if not all(length == lengths[0] for length in lengths):
51
+ raise ValueError("All StatsOutput classes must contain the same number of image sources.")
52
+
53
+
54
+ @dataclass(frozen=True)
55
+ class ChannelStatsOutput(OutputMetadata):
56
+ """
57
+ Output class for :func:`channelstats` stats metric
58
+
59
+ This class represents the outputs of various per-channel stats functions against
60
+ a single dataset, such that each index across all stat outputs are representative
61
+ of the same source image. Modifying or mixing outputs will result in inaccurate
62
+ outlier calculations if not created correctly.
63
+
64
+ Attributes
65
+ ----------
66
+ pixelstats: PixelStatsOutput
67
+ visualstats: VisualStatsOutput
68
+ """
69
+
70
+ pixelstats: PixelStatsOutput
71
+ visualstats: VisualStatsOutput
72
+
37
73
  def outputs(self) -> list[BaseStatsOutput]:
38
- return [s for s in (self.dimensionstats, self.pixelstats, self.visualstats) if s is not None]
74
+ return [self.pixelstats, self.visualstats]
75
+
76
+ def dict(self) -> dict[str, Any]:
77
+ return {**self.pixelstats.dict(), **self.visualstats.dict()}
39
78
 
40
79
  def __post_init__(self):
41
80
  lengths = [len(s) for s in self.outputs()]
@@ -48,9 +87,6 @@ def datasetstats(
48
87
  images: Iterable[ArrayLike],
49
88
  bboxes: Iterable[ArrayLike] | None = None,
50
89
  labels: Iterable[ArrayLike] | None = None,
51
- use_dimension: bool = True,
52
- use_pixel: bool = True,
53
- use_visual: bool = True,
54
90
  ) -> DatasetStatsOutput:
55
91
  """
56
92
  Calculates various statistics for each image
@@ -89,9 +125,47 @@ def datasetstats(
89
125
  [1.744 1.946 0.1164 0.0635 0.0633 0.06274 0.0429 0.0317 0.0317
90
126
  0.02576 0.02081 0.02171 0.01915 0.01767 0.01799 0.01595 0.01433 0.01478]
91
127
  """
92
- return DatasetStatsOutput(
93
- dimensionstats(images, bboxes) if use_dimension else None,
94
- pixelstats(images, bboxes) if use_pixel else None,
95
- visualstats(images, bboxes) if use_visual else None,
96
- labelstats(labels) if labels else None,
97
- )
128
+ outputs = run_stats(images, bboxes, False, [DimensionStatsProcessor, PixelStatsProcessor, VisualStatsProcessor])
129
+ return DatasetStatsOutput(*outputs, labelstats=labelstats(labels) if labels else None) # type: ignore
130
+
131
+
132
+ @set_metadata("dataeval.metrics")
133
+ def channelstats(
134
+ images: Iterable[ArrayLike],
135
+ bboxes: Iterable[ArrayLike] | None = None,
136
+ ) -> ChannelStatsOutput:
137
+ """
138
+ Calculates various per-channel statistics for each image
139
+
140
+ This function computes pixel and visual metrics on the images
141
+ or individual bounding boxes for each image.
142
+
143
+ Parameters
144
+ ----------
145
+ images : Iterable[ArrayLike]
146
+ Images to perform calculations on
147
+ bboxes : Iterable[ArrayLike] or None
148
+ Bounding boxes in `xyxy` format for each image to perform calculations on
149
+
150
+ Returns
151
+ -------
152
+ ChannelStatsOutput
153
+ Output class containing the per-channel outputs of various stats functions
154
+
155
+ See Also
156
+ --------
157
+ pixelstats, visualstats
158
+
159
+ Examples
160
+ --------
161
+ Calculating the per-channel pixel and visual stats for a dataset
162
+
163
+ >>> stats = channelstats(images)
164
+ >>> print(stats.visualstats.darkness)
165
+ [0.02124 0.1213 0.2212 0.1013 0.1076 0.11383 0.2013 0.2076 0.2139
166
+ 0.3013 0.3076 0.3137 0.4014 0.4075 0.4138 0.5015 0.508 0.5137
167
+ 0.6016 0.6074 0.614 0.701 0.7075 0.714 0.8013 0.8076 0.814
168
+ 0.9014 0.9077 0.914 ]
169
+ """
170
+ outputs = run_stats(images, bboxes, True, [PixelStatsProcessor, VisualStatsProcessor])
171
+ return ChannelStatsOutput(*outputs) # type: ignore
@@ -11,27 +11,11 @@ from dataeval._internal.metrics.utils import get_bitdepth
11
11
  from dataeval._internal.output import set_metadata
12
12
 
13
13
 
14
- class DimensionStatsProcessor(StatsProcessor):
15
- image_function_map = {
16
- "left": lambda x: x.box[0],
17
- "top": lambda x: x.box[1],
18
- "width": lambda x: x.shape[-1],
19
- "height": lambda x: x.shape[-2],
20
- "channels": lambda x: x.shape[-3],
21
- "size": lambda x: np.prod(x.shape[-2:]),
22
- "aspect_ratio": lambda x: x.shape[-1] / x.shape[-2],
23
- "depth": lambda x: get_bitdepth(x.image).depth,
24
- "center": lambda x: np.asarray([(x.box[0] + x.box[2]) / 2, (x.box[1] + x.box[3]) / 2]),
25
- "distance": lambda x: np.sqrt(
26
- np.square(((x.box[0] + x.box[2]) / 2) - (x.width / 2))
27
- + np.square(((x.box[1] + x.box[3]) / 2) - (x.height / 2))
28
- ),
29
- }
30
-
31
-
32
14
  @dataclass(frozen=True)
33
15
  class DimensionStatsOutput(BaseStatsOutput):
34
16
  """
17
+ Output class for :func:`dimensionstats` stats metric
18
+
35
19
  Attributes
36
20
  ----------
37
21
  left : NDArray[np.int32]
@@ -68,6 +52,25 @@ class DimensionStatsOutput(BaseStatsOutput):
68
52
  distance: NDArray[np.float16]
69
53
 
70
54
 
55
+ class DimensionStatsProcessor(StatsProcessor[DimensionStatsOutput]):
56
+ output_class = DimensionStatsOutput
57
+ image_function_map = {
58
+ "left": lambda x: x.box[0],
59
+ "top": lambda x: x.box[1],
60
+ "width": lambda x: x.shape[-1],
61
+ "height": lambda x: x.shape[-2],
62
+ "channels": lambda x: x.shape[-3],
63
+ "size": lambda x: np.prod(x.shape[-2:]),
64
+ "aspect_ratio": lambda x: x.shape[-1] / x.shape[-2],
65
+ "depth": lambda x: get_bitdepth(x.image).depth,
66
+ "center": lambda x: np.asarray([(x.box[0] + x.box[2]) / 2, (x.box[1] + x.box[3]) / 2]),
67
+ "distance": lambda x: np.sqrt(
68
+ np.square(((x.box[0] + x.box[2]) / 2) - (x.width / 2))
69
+ + np.square(((x.box[1] + x.box[3]) / 2) - (x.height / 2))
70
+ ),
71
+ }
72
+
73
+
71
74
  @set_metadata("dataeval.metrics")
72
75
  def dimensionstats(
73
76
  images: Iterable[ArrayLike],
@@ -107,5 +110,4 @@ def dimensionstats(
107
110
  >>> print(results.channels)
108
111
  [1 1 1 1 1 1 3 1 1 3]
109
112
  """
110
- output = run_stats(images, bboxes, False, DimensionStatsProcessor, DimensionStatsOutput)
111
- return DimensionStatsOutput(**output)
113
+ return run_stats(images, bboxes, False, [DimensionStatsProcessor])[0]
@@ -10,16 +10,11 @@ from dataeval._internal.metrics.utils import pchash, xxhash
10
10
  from dataeval._internal.output import set_metadata
11
11
 
12
12
 
13
- class HashStatsProcessor(StatsProcessor):
14
- image_function_map = {
15
- "xxhash": lambda x: xxhash(x.image),
16
- "pchash": lambda x: pchash(x.image),
17
- }
18
-
19
-
20
13
  @dataclass(frozen=True)
21
14
  class HashStatsOutput(BaseStatsOutput):
22
15
  """
16
+ Output class for :func:`hashstats` stats metric
17
+
23
18
  Attributes
24
19
  ----------
25
20
  xxhash : List[str]
@@ -32,6 +27,14 @@ class HashStatsOutput(BaseStatsOutput):
32
27
  pchash: list[str]
33
28
 
34
29
 
30
+ class HashStatsProcessor(StatsProcessor[HashStatsOutput]):
31
+ output_class = HashStatsOutput
32
+ image_function_map = {
33
+ "xxhash": lambda x: xxhash(x.image),
34
+ "pchash": lambda x: pchash(x.image),
35
+ }
36
+
37
+
35
38
  @set_metadata("dataeval.metrics")
36
39
  def hashstats(
37
40
  images: Iterable[ArrayLike],
@@ -69,5 +72,4 @@ def hashstats(
69
72
  >>> print(results.pchash)
70
73
  ['8f25506af46a7c6a', '8000808000008080', '8e71f18e0ef18e0e', 'a956d6a956d6a928']
71
74
  """
72
- output = run_stats(images, bboxes, False, HashStatsProcessor, HashStatsOutput)
73
- return HashStatsOutput(**output)
75
+ return run_stats(images, bboxes, False, [HashStatsProcessor])[0]
@@ -13,7 +13,7 @@ from dataeval._internal.output import OutputMetadata, set_metadata
13
13
  @dataclass(frozen=True)
14
14
  class LabelStatsOutput(OutputMetadata):
15
15
  """
16
- Output class for `labelstats` metrics function
16
+ Output class for :func:`labelstats` stats metric
17
17
 
18
18
  Attributes
19
19
  ----------
@@ -11,31 +11,11 @@ from dataeval._internal.metrics.stats.base import BaseStatsOutput, StatsProcesso
11
11
  from dataeval._internal.output import set_metadata
12
12
 
13
13
 
14
- class PixelStatsProcessor(StatsProcessor):
15
- cache_keys = ["histogram"]
16
- image_function_map = {
17
- "mean": lambda self: np.mean(self.scaled),
18
- "std": lambda x: np.std(x.scaled),
19
- "var": lambda x: np.var(x.scaled),
20
- "skew": lambda x: np.nan_to_num(skew(x.scaled.ravel())),
21
- "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled.ravel())),
22
- "histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
23
- "entropy": lambda x: entropy(x.get("histogram")),
24
- }
25
- channel_function_map = {
26
- "mean": lambda x: np.mean(x.scaled, axis=1),
27
- "std": lambda x: np.std(x.scaled, axis=1),
28
- "var": lambda x: np.var(x.scaled, axis=1),
29
- "skew": lambda x: np.nan_to_num(skew(x.scaled, axis=1)),
30
- "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled, axis=1)),
31
- "histogram": lambda x: np.apply_along_axis(lambda y: np.histogram(y, 256, (0, 1))[0], 1, x.scaled),
32
- "entropy": lambda x: entropy(x.get("histogram"), axis=1),
33
- }
34
-
35
-
36
14
  @dataclass(frozen=True)
37
15
  class PixelStatsOutput(BaseStatsOutput):
38
16
  """
17
+ Output class for :func:`pixelstats` stats metric
18
+
39
19
  Attributes
40
20
  ----------
41
21
  mean : NDArray[np.float16]
@@ -63,6 +43,29 @@ class PixelStatsOutput(BaseStatsOutput):
63
43
  entropy: NDArray[np.float16]
64
44
 
65
45
 
46
+ class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
47
+ output_class = PixelStatsOutput
48
+ cache_keys = ["histogram"]
49
+ image_function_map = {
50
+ "mean": lambda self: np.mean(self.scaled),
51
+ "std": lambda x: np.std(x.scaled),
52
+ "var": lambda x: np.var(x.scaled),
53
+ "skew": lambda x: np.nan_to_num(skew(x.scaled.ravel())),
54
+ "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled.ravel())),
55
+ "histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
56
+ "entropy": lambda x: entropy(x.get("histogram")),
57
+ }
58
+ channel_function_map = {
59
+ "mean": lambda x: np.mean(x.scaled, axis=1),
60
+ "std": lambda x: np.std(x.scaled, axis=1),
61
+ "var": lambda x: np.var(x.scaled, axis=1),
62
+ "skew": lambda x: np.nan_to_num(skew(x.scaled, axis=1)),
63
+ "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled, axis=1)),
64
+ "histogram": lambda x: np.apply_along_axis(lambda y: np.histogram(y, 256, (0, 1))[0], 1, x.scaled),
65
+ "entropy": lambda x: entropy(x.get("histogram"), axis=1),
66
+ }
67
+
68
+
66
69
  @set_metadata("dataeval.metrics")
67
70
  def pixelstats(
68
71
  images: Iterable[ArrayLike],
@@ -93,8 +96,8 @@ def pixelstats(
93
96
  --------
94
97
  dimensionstats, visualstats, Outliers
95
98
 
96
- Notes
97
- -----
99
+ Note
100
+ ----
98
101
  - All metrics are scaled based on the perceived bit depth (which is derived from the largest pixel value)
99
102
  to allow for better comparison between images stored in different formats and different resolutions.
100
103
 
@@ -113,5 +116,4 @@ def pixelstats(
113
116
  0.812 0.9883 0.795 0.9243 0.9243 0.795 0.9907 0.8125 1.028 0.8223
114
117
  1.046 0.8247 1.041 0.8203 1.012 0.812 0.9883 0.795 0.9243 0.9243]
115
118
  """
116
- output = run_stats(images, bboxes, per_channel, PixelStatsProcessor, PixelStatsOutput)
117
- return PixelStatsOutput(**output)
119
+ return run_stats(images, bboxes, per_channel, [PixelStatsProcessor])[0]
@@ -13,41 +13,16 @@ from dataeval._internal.output import set_metadata
13
13
  QUARTILES = (0, 25, 50, 75, 100)
14
14
 
15
15
 
16
- class VisualStatsProcessor(StatsProcessor):
17
- cache_keys = ["percentiles"]
18
- image_function_map = {
19
- "brightness": lambda x: x.get("percentiles")[-2],
20
- "blurriness": lambda x: np.std(edge_filter(np.mean(x.image, axis=0))),
21
- "contrast": lambda x: np.nan_to_num(
22
- (np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles"))
23
- ),
24
- "darkness": lambda x: x.get("percentiles")[1],
25
- "missing": lambda x: np.sum(np.isnan(x.image)) / np.prod(x.shape[-2:]),
26
- "zeros": lambda x: np.count_nonzero(x.image == 0) / np.prod(x.shape[-2:]),
27
- "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
28
- }
29
- channel_function_map = {
30
- "brightness": lambda x: x.get("percentiles")[:, -2],
31
- "blurriness": lambda x: np.std(np.vectorize(edge_filter, signature="(m,n)->(m,n)")(x.image), axis=(1, 2)),
32
- "contrast": lambda x: np.nan_to_num(
33
- (np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
34
- / np.mean(x.get("percentiles"), axis=1)
35
- ),
36
- "darkness": lambda x: x.get("percentiles")[:, 1],
37
- "missing": lambda x: np.sum(np.isnan(x.image), axis=(1, 2)) / np.prod(x.shape[-2:]),
38
- "zeros": lambda x: np.count_nonzero(x.image == 0, axis=(1, 2)) / np.prod(x.shape[-2:]),
39
- "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES, axis=1).T,
40
- }
41
-
42
-
43
16
  @dataclass(frozen=True)
44
17
  class VisualStatsOutput(BaseStatsOutput):
45
18
  """
19
+ Output class for :func:`visualstats` stats metric
20
+
46
21
  Attributes
47
22
  ----------
48
23
  brightness : NDArray[np.float16]
49
24
  Brightness of the images
50
- blurriness : NDArray[np.float16]
25
+ sharpness : NDArray[np.float16]
51
26
  Blurriness of the images
52
27
  contrast : NDArray[np.float16]
53
28
  Image contrast ratio
@@ -62,7 +37,7 @@ class VisualStatsOutput(BaseStatsOutput):
62
37
  """
63
38
 
64
39
  brightness: NDArray[np.float16]
65
- blurriness: NDArray[np.float16]
40
+ sharpness: NDArray[np.float16]
66
41
  contrast: NDArray[np.float16]
67
42
  darkness: NDArray[np.float16]
68
43
  missing: NDArray[np.float16]
@@ -70,6 +45,34 @@ class VisualStatsOutput(BaseStatsOutput):
70
45
  percentiles: NDArray[np.float16]
71
46
 
72
47
 
48
+ class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
49
+ output_class = VisualStatsOutput
50
+ cache_keys = ["percentiles"]
51
+ image_function_map = {
52
+ "brightness": lambda x: x.get("percentiles")[-2],
53
+ "sharpness": lambda x: np.std(edge_filter(np.mean(x.image, axis=0))),
54
+ "contrast": lambda x: np.nan_to_num(
55
+ (np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles"))
56
+ ),
57
+ "darkness": lambda x: x.get("percentiles")[1],
58
+ "missing": lambda x: np.count_nonzero(np.isnan(np.sum(x.image, axis=0))) / np.prod(x.shape[-2:]),
59
+ "zeros": lambda x: np.count_nonzero(np.sum(x.image, axis=0) == 0) / np.prod(x.shape[-2:]),
60
+ "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
61
+ }
62
+ channel_function_map = {
63
+ "brightness": lambda x: x.get("percentiles")[:, -2],
64
+ "sharpness": lambda x: np.std(np.vectorize(edge_filter, signature="(m,n)->(m,n)")(x.image), axis=(1, 2)),
65
+ "contrast": lambda x: np.nan_to_num(
66
+ (np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
67
+ / np.mean(x.get("percentiles"), axis=1)
68
+ ),
69
+ "darkness": lambda x: x.get("percentiles")[:, 1],
70
+ "missing": lambda x: np.count_nonzero(np.isnan(x.image), axis=(1, 2)) / np.prod(x.shape[-2:]),
71
+ "zeros": lambda x: np.count_nonzero(x.image == 0, axis=(1, 2)) / np.prod(x.shape[-2:]),
72
+ "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES, axis=1).T,
73
+ }
74
+
75
+
73
76
  @set_metadata("dataeval.metrics")
74
77
  def visualstats(
75
78
  images: Iterable[ArrayLike],
@@ -79,7 +82,7 @@ def visualstats(
79
82
  """
80
83
  Calculates visual statistics for each image
81
84
 
82
- This function computes various visual metrics (e.g., brightness, darkness, contrast, blurriness)
85
+ This function computes various visual metrics (e.g., brightness, darkness, contrast, sharpness)
83
86
  on the images as a whole.
84
87
 
85
88
  Parameters
@@ -93,15 +96,15 @@ def visualstats(
93
96
  -------
94
97
  VisualStatsOutput
95
98
  A dictionary-like object containing the computed visual statistics for each image. The keys correspond
96
- to the names of the statistics (e.g., 'brightness', 'blurriness'), and the values are lists of results for
99
+ to the names of the statistics (e.g., 'brightness', 'sharpness'), and the values are lists of results for
97
100
  each image or numpy arrays when the results are multi-dimensional.
98
101
 
99
102
  See Also
100
103
  --------
101
104
  dimensionstats, pixelstats, Outliers
102
105
 
103
- Notes
104
- -----
106
+ Note
107
+ ----
105
108
  - `zeros` and `missing` are presented as a percentage of total pixel counts
106
109
 
107
110
  Examples
@@ -118,5 +121,4 @@ def visualstats(
118
121
  1.258 1.257 1.257 1.256 1.256 1.255 1.255 1.255 1.255 1.254 1.254 1.254
119
122
  1.254 1.254 1.254 1.253 1.253 1.253]
120
123
  """
121
- output = run_stats(images, bboxes, per_channel, VisualStatsProcessor, VisualStatsOutput)
122
- return VisualStatsOutput(**output)
124
+ return run_stats(images, bboxes, per_channel, [VisualStatsProcessor])[0]
@@ -4,6 +4,8 @@ FR Test Statistic based estimate for the upperbound
4
4
  average precision using empirical mean precision
5
5
  """
6
6
 
7
+ from __future__ import annotations
8
+
7
9
  from dataclasses import dataclass
8
10
 
9
11
  from numpy.typing import ArrayLike
@@ -16,6 +18,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
16
18
  @dataclass(frozen=True)
17
19
  class UAPOutput(OutputMetadata):
18
20
  """
21
+ Output class for :func:`uap` estimator metric
22
+
19
23
  Attributes
20
24
  ----------
21
25
  uap : float
@@ -48,8 +52,8 @@ def uap(labels: ArrayLike, scores: ArrayLike) -> UAPOutput:
48
52
  ValueError
49
53
  If unique classes M < 2
50
54
 
51
- Notes
52
- -----
55
+ Note
56
+ ----
53
57
  This function calculates the empirical mean precision using the
54
58
  ``average_precision_score`` from scikit-learn, weighted by the class distribution.
55
59
 
@@ -91,8 +91,8 @@ def entropy(
91
91
  subset_mask: NDArray[np.bool_] | None
92
92
  Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
93
93
 
94
- Notes
95
- -----
94
+ Note
95
+ ----
96
96
  For continuous variables, histogram bins are chosen automatically. See
97
97
  numpy.histogram for details.
98
98
 
@@ -61,9 +61,9 @@ class AETrainer:
61
61
  List[float]
62
62
  A list of average loss values for each epoch.
63
63
 
64
- Notes
64
+ Note
65
65
  ----
66
- To replace this function with a custom function, do
66
+ To replace this function with a custom function, do:
67
67
  AETrainer.train = custom_function
68
68
  """
69
69
  # Setup training
@@ -120,7 +120,7 @@ class AETrainer:
120
120
 
121
121
  Note
122
122
  ----
123
- To replace this function with a custom function, do
123
+ To replace this function with a custom function, do:
124
124
  AETrainer.eval = custom_function
125
125
  """
126
126
  self.model.eval()
@@ -155,8 +155,8 @@ class AETrainer:
155
155
  torch.Tensor
156
156
  Data encoded by the model
157
157
 
158
- Notes
159
- -----
158
+ Note
159
+ ----
160
160
  This function should be run after the model has been trained and evaluated.
161
161
  """
162
162
  self.model.eval()
@@ -272,8 +272,6 @@ class PixelCNN(distribution.Distribution):
272
272
  The minimum value of the input data.
273
273
  dtype : tensorflow dtype, default tf.float32
274
274
  Data type of the `Distribution`.
275
- name : str, default "PixelCNN"
276
- The name of the `Distribution`.
277
275
  """
278
276
 
279
277
  def __init__(
@@ -293,10 +291,9 @@ class PixelCNN(distribution.Distribution):
293
291
  high: int = 255,
294
292
  low: int = 0,
295
293
  dtype=tf.float32,
296
- name: str = "PixelCNN",
297
294
  ) -> None:
298
295
  parameters = dict(locals())
299
- with tf.name_scope(name) as name:
296
+ with tf.name_scope("PixelCNN") as name:
300
297
  super().__init__(
301
298
  dtype=dtype,
302
299
  reparameterization_type=reparameterization.NOT_REPARAMETERIZED,
@@ -8,7 +8,7 @@ from torch.utils.data import Dataset
8
8
 
9
9
  def read_dataset(dataset: Dataset) -> list[list[Any]]:
10
10
  """
11
- Extract information from a dataset at each index into a individual lists of each information position
11
+ Extract information from a dataset at each index into individual lists of each information position
12
12
 
13
13
  Parameters
14
14
  ----------
@@ -31,36 +31,31 @@ def read_dataset(dataset: Dataset) -> list[list[Any]]:
31
31
  Examples
32
32
  --------
33
33
  >>> import numpy as np
34
-
35
- >>> data = np.ones((10, 3, 3))
34
+ >>> data = np.ones((10, 1, 3, 3))
36
35
  >>> labels = np.ones((10,))
37
36
  >>> class ICDataset:
38
37
  ... def __init__(self, data, labels):
39
38
  ... self.data = data
40
39
  ... self.labels = labels
41
-
40
+ ...
42
41
  ... def __getitem__(self, idx):
43
42
  ... return self.data[idx], self.labels[idx]
44
43
 
45
44
  >>> ds = ICDataset(data, labels)
46
45
 
47
46
  >>> result = read_dataset(ds)
48
- >>> assert len(result) == 2
49
- True
50
- >>> assert result[0].shape == (10, 3, 3) # 10 3x3 images
51
- True
52
- >>> assert result[1].shape == (10,) # 10 labels
53
- True
47
+ >>> len(result) # images and labels
48
+ 2
49
+ >>> np.asarray(result[0]).shape # images
50
+ (10, 1, 3, 3)
51
+ >>> np.asarray(result[1]).shape # labels
52
+ (10,)
54
53
  """
55
54
 
56
- ddict: dict[int, list] = defaultdict(list)
55
+ ddict: dict[int, list[Any]] = defaultdict(list[Any])
57
56
 
58
57
  for data in dataset:
59
- # Convert to tuple if single return (e.g. images only)
60
- if not isinstance(data, tuple):
61
- data = (data,)
62
-
63
- for i, d in enumerate(data):
58
+ for i, d in enumerate(data if isinstance(data, tuple) else (data,)):
64
59
  ddict[i].append(d)
65
60
 
66
61
  return list(ddict.values())