dataeval 0.70.0__py3-none-any.whl → 0.71.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +6 -6
- dataeval/_internal/datasets.py +235 -131
- dataeval/_internal/detectors/clusterer.py +2 -0
- dataeval/_internal/detectors/drift/base.py +2 -2
- dataeval/_internal/detectors/drift/mmd.py +1 -1
- dataeval/_internal/detectors/duplicates.py +2 -0
- dataeval/_internal/detectors/ood/ae.py +5 -3
- dataeval/_internal/detectors/ood/aegmm.py +6 -4
- dataeval/_internal/detectors/ood/base.py +12 -7
- dataeval/_internal/detectors/ood/llr.py +6 -4
- dataeval/_internal/detectors/ood/vae.py +5 -3
- dataeval/_internal/detectors/ood/vaegmm.py +6 -4
- dataeval/_internal/detectors/outliers.py +6 -9
- dataeval/_internal/metrics/balance.py +4 -2
- dataeval/_internal/metrics/ber.py +2 -0
- dataeval/_internal/metrics/coverage.py +4 -0
- dataeval/_internal/metrics/divergence.py +6 -2
- dataeval/_internal/metrics/diversity.py +8 -6
- dataeval/_internal/metrics/parity.py +8 -6
- dataeval/_internal/metrics/stats/base.py +105 -46
- dataeval/_internal/metrics/stats/datasetstats.py +96 -22
- dataeval/_internal/metrics/stats/dimensionstats.py +22 -20
- dataeval/_internal/metrics/stats/hashstats.py +11 -9
- dataeval/_internal/metrics/stats/labelstats.py +1 -1
- dataeval/_internal/metrics/stats/pixelstats.py +28 -26
- dataeval/_internal/metrics/stats/visualstats.py +37 -35
- dataeval/_internal/metrics/uap.py +6 -2
- dataeval/_internal/metrics/utils.py +2 -2
- dataeval/_internal/models/pytorch/autoencoder.py +5 -5
- dataeval/_internal/models/tensorflow/pixelcnn.py +1 -4
- dataeval/_internal/utils.py +11 -16
- dataeval/_internal/workflows/sufficiency.py +44 -33
- dataeval/detectors/__init__.py +4 -0
- dataeval/detectors/drift/__init__.py +8 -3
- dataeval/detectors/drift/kernels/__init__.py +4 -0
- dataeval/detectors/drift/updates/__init__.py +4 -0
- dataeval/detectors/linters/__init__.py +15 -4
- dataeval/detectors/ood/__init__.py +14 -2
- dataeval/metrics/__init__.py +5 -0
- dataeval/metrics/bias/__init__.py +13 -4
- dataeval/metrics/estimators/__init__.py +8 -8
- dataeval/metrics/stats/__init__.py +24 -6
- dataeval/utils/__init__.py +16 -3
- dataeval/utils/tensorflow/__init__.py +11 -0
- dataeval/utils/torch/__init__.py +12 -0
- dataeval/utils/torch/datasets/__init__.py +7 -0
- dataeval/workflows/__init__.py +4 -0
- {dataeval-0.70.0.dist-info → dataeval-0.71.0.dist-info}/METADATA +11 -2
- dataeval-0.71.0.dist-info/RECORD +80 -0
- dataeval/tensorflow/__init__.py +0 -3
- dataeval/torch/__init__.py +0 -3
- dataeval-0.70.0.dist-info/RECORD +0 -79
- /dataeval/{tensorflow → utils/tensorflow}/loss/__init__.py +0 -0
- /dataeval/{tensorflow → utils/tensorflow}/models/__init__.py +0 -0
- /dataeval/{tensorflow → utils/tensorflow}/recon/__init__.py +0 -0
- /dataeval/{torch → utils/torch}/models/__init__.py +0 -0
- /dataeval/{torch → utils/torch}/trainer/__init__.py +0 -0
- {dataeval-0.70.0.dist-info → dataeval-0.71.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.70.0.dist-info → dataeval-0.71.0.dist-info}/WHEEL +0 -0
@@ -1,21 +1,26 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from dataclasses import dataclass
|
4
|
-
from typing import Iterable
|
4
|
+
from typing import Any, Iterable
|
5
5
|
|
6
6
|
from numpy.typing import ArrayLike
|
7
7
|
|
8
|
-
from dataeval._internal.metrics.stats.base import BaseStatsOutput
|
9
|
-
from dataeval._internal.metrics.stats.dimensionstats import
|
8
|
+
from dataeval._internal.metrics.stats.base import BaseStatsOutput, run_stats
|
9
|
+
from dataeval._internal.metrics.stats.dimensionstats import (
|
10
|
+
DimensionStatsOutput,
|
11
|
+
DimensionStatsProcessor,
|
12
|
+
)
|
10
13
|
from dataeval._internal.metrics.stats.labelstats import LabelStatsOutput, labelstats
|
11
|
-
from dataeval._internal.metrics.stats.pixelstats import PixelStatsOutput,
|
12
|
-
from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput,
|
14
|
+
from dataeval._internal.metrics.stats.pixelstats import PixelStatsOutput, PixelStatsProcessor
|
15
|
+
from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput, VisualStatsProcessor
|
13
16
|
from dataeval._internal.output import OutputMetadata, set_metadata
|
14
17
|
|
15
18
|
|
16
19
|
@dataclass(frozen=True)
|
17
20
|
class DatasetStatsOutput(OutputMetadata):
|
18
21
|
"""
|
22
|
+
Output class for :func:`datasetstats` stats metric
|
23
|
+
|
19
24
|
This class represents the outputs of various stats functions against a single
|
20
25
|
dataset, such that each index across all stat outputs are representative of
|
21
26
|
the same source image. Modifying or mixing outputs will result in inaccurate
|
@@ -23,19 +28,53 @@ class DatasetStatsOutput(OutputMetadata):
|
|
23
28
|
|
24
29
|
Attributes
|
25
30
|
----------
|
26
|
-
dimensionstats : DimensionStatsOutput
|
27
|
-
pixelstats: PixelStatsOutput
|
28
|
-
visualstats: VisualStatsOutput
|
29
|
-
labelstats: LabelStatsOutput or None
|
31
|
+
dimensionstats : DimensionStatsOutput
|
32
|
+
pixelstats: PixelStatsOutput
|
33
|
+
visualstats: VisualStatsOutput
|
34
|
+
labelstats: LabelStatsOutput or None
|
30
35
|
"""
|
31
36
|
|
32
|
-
dimensionstats: DimensionStatsOutput
|
33
|
-
pixelstats: PixelStatsOutput
|
34
|
-
visualstats: VisualStatsOutput
|
37
|
+
dimensionstats: DimensionStatsOutput
|
38
|
+
pixelstats: PixelStatsOutput
|
39
|
+
visualstats: VisualStatsOutput
|
35
40
|
labelstats: LabelStatsOutput | None = None
|
36
41
|
|
42
|
+
def outputs(self) -> list[OutputMetadata]:
|
43
|
+
return [s for s in (self.dimensionstats, self.pixelstats, self.visualstats, self.labelstats) if s is not None]
|
44
|
+
|
45
|
+
def dict(self) -> dict[str, Any]:
|
46
|
+
return {k: v for o in self.outputs() for k, v in o.dict().items()}
|
47
|
+
|
48
|
+
def __post_init__(self):
|
49
|
+
lengths = [len(s) for s in self.outputs() if isinstance(s, BaseStatsOutput)]
|
50
|
+
if not all(length == lengths[0] for length in lengths):
|
51
|
+
raise ValueError("All StatsOutput classes must contain the same number of image sources.")
|
52
|
+
|
53
|
+
|
54
|
+
@dataclass(frozen=True)
|
55
|
+
class ChannelStatsOutput(OutputMetadata):
|
56
|
+
"""
|
57
|
+
Output class for :func:`channelstats` stats metric
|
58
|
+
|
59
|
+
This class represents the outputs of various per-channel stats functions against
|
60
|
+
a single dataset, such that each index across all stat outputs are representative
|
61
|
+
of the same source image. Modifying or mixing outputs will result in inaccurate
|
62
|
+
outlier calculations if not created correctly.
|
63
|
+
|
64
|
+
Attributes
|
65
|
+
----------
|
66
|
+
pixelstats: PixelStatsOutput
|
67
|
+
visualstats: VisualStatsOutput
|
68
|
+
"""
|
69
|
+
|
70
|
+
pixelstats: PixelStatsOutput
|
71
|
+
visualstats: VisualStatsOutput
|
72
|
+
|
37
73
|
def outputs(self) -> list[BaseStatsOutput]:
|
38
|
-
return [
|
74
|
+
return [self.pixelstats, self.visualstats]
|
75
|
+
|
76
|
+
def dict(self) -> dict[str, Any]:
|
77
|
+
return {**self.pixelstats.dict(), **self.visualstats.dict()}
|
39
78
|
|
40
79
|
def __post_init__(self):
|
41
80
|
lengths = [len(s) for s in self.outputs()]
|
@@ -48,9 +87,6 @@ def datasetstats(
|
|
48
87
|
images: Iterable[ArrayLike],
|
49
88
|
bboxes: Iterable[ArrayLike] | None = None,
|
50
89
|
labels: Iterable[ArrayLike] | None = None,
|
51
|
-
use_dimension: bool = True,
|
52
|
-
use_pixel: bool = True,
|
53
|
-
use_visual: bool = True,
|
54
90
|
) -> DatasetStatsOutput:
|
55
91
|
"""
|
56
92
|
Calculates various statistics for each image
|
@@ -89,9 +125,47 @@ def datasetstats(
|
|
89
125
|
[1.744 1.946 0.1164 0.0635 0.0633 0.06274 0.0429 0.0317 0.0317
|
90
126
|
0.02576 0.02081 0.02171 0.01915 0.01767 0.01799 0.01595 0.01433 0.01478]
|
91
127
|
"""
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
128
|
+
outputs = run_stats(images, bboxes, False, [DimensionStatsProcessor, PixelStatsProcessor, VisualStatsProcessor])
|
129
|
+
return DatasetStatsOutput(*outputs, labelstats=labelstats(labels) if labels else None) # type: ignore
|
130
|
+
|
131
|
+
|
132
|
+
@set_metadata("dataeval.metrics")
|
133
|
+
def channelstats(
|
134
|
+
images: Iterable[ArrayLike],
|
135
|
+
bboxes: Iterable[ArrayLike] | None = None,
|
136
|
+
) -> ChannelStatsOutput:
|
137
|
+
"""
|
138
|
+
Calculates various per-channel statistics for each image
|
139
|
+
|
140
|
+
This function computes pixel and visual metrics on the images
|
141
|
+
or individual bounding boxes for each image.
|
142
|
+
|
143
|
+
Parameters
|
144
|
+
----------
|
145
|
+
images : Iterable[ArrayLike]
|
146
|
+
Images to perform calculations on
|
147
|
+
bboxes : Iterable[ArrayLike] or None
|
148
|
+
Bounding boxes in `xyxy` format for each image to perform calculations on
|
149
|
+
|
150
|
+
Returns
|
151
|
+
-------
|
152
|
+
ChannelStatsOutput
|
153
|
+
Output class containing the per-channel outputs of various stats functions
|
154
|
+
|
155
|
+
See Also
|
156
|
+
--------
|
157
|
+
pixelstats, visualstats
|
158
|
+
|
159
|
+
Examples
|
160
|
+
--------
|
161
|
+
Calculating the per-channel pixel and visual stats for a dataset
|
162
|
+
|
163
|
+
>>> stats = channelstats(images)
|
164
|
+
>>> print(stats.visualstats.darkness)
|
165
|
+
[0.02124 0.1213 0.2212 0.1013 0.1076 0.11383 0.2013 0.2076 0.2139
|
166
|
+
0.3013 0.3076 0.3137 0.4014 0.4075 0.4138 0.5015 0.508 0.5137
|
167
|
+
0.6016 0.6074 0.614 0.701 0.7075 0.714 0.8013 0.8076 0.814
|
168
|
+
0.9014 0.9077 0.914 ]
|
169
|
+
"""
|
170
|
+
outputs = run_stats(images, bboxes, True, [PixelStatsProcessor, VisualStatsProcessor])
|
171
|
+
return ChannelStatsOutput(*outputs) # type: ignore
|
@@ -11,27 +11,11 @@ from dataeval._internal.metrics.utils import get_bitdepth
|
|
11
11
|
from dataeval._internal.output import set_metadata
|
12
12
|
|
13
13
|
|
14
|
-
class DimensionStatsProcessor(StatsProcessor):
|
15
|
-
image_function_map = {
|
16
|
-
"left": lambda x: x.box[0],
|
17
|
-
"top": lambda x: x.box[1],
|
18
|
-
"width": lambda x: x.shape[-1],
|
19
|
-
"height": lambda x: x.shape[-2],
|
20
|
-
"channels": lambda x: x.shape[-3],
|
21
|
-
"size": lambda x: np.prod(x.shape[-2:]),
|
22
|
-
"aspect_ratio": lambda x: x.shape[-1] / x.shape[-2],
|
23
|
-
"depth": lambda x: get_bitdepth(x.image).depth,
|
24
|
-
"center": lambda x: np.asarray([(x.box[0] + x.box[2]) / 2, (x.box[1] + x.box[3]) / 2]),
|
25
|
-
"distance": lambda x: np.sqrt(
|
26
|
-
np.square(((x.box[0] + x.box[2]) / 2) - (x.width / 2))
|
27
|
-
+ np.square(((x.box[1] + x.box[3]) / 2) - (x.height / 2))
|
28
|
-
),
|
29
|
-
}
|
30
|
-
|
31
|
-
|
32
14
|
@dataclass(frozen=True)
|
33
15
|
class DimensionStatsOutput(BaseStatsOutput):
|
34
16
|
"""
|
17
|
+
Output class for :func:`dimensionstats` stats metric
|
18
|
+
|
35
19
|
Attributes
|
36
20
|
----------
|
37
21
|
left : NDArray[np.int32]
|
@@ -68,6 +52,25 @@ class DimensionStatsOutput(BaseStatsOutput):
|
|
68
52
|
distance: NDArray[np.float16]
|
69
53
|
|
70
54
|
|
55
|
+
class DimensionStatsProcessor(StatsProcessor[DimensionStatsOutput]):
|
56
|
+
output_class = DimensionStatsOutput
|
57
|
+
image_function_map = {
|
58
|
+
"left": lambda x: x.box[0],
|
59
|
+
"top": lambda x: x.box[1],
|
60
|
+
"width": lambda x: x.shape[-1],
|
61
|
+
"height": lambda x: x.shape[-2],
|
62
|
+
"channels": lambda x: x.shape[-3],
|
63
|
+
"size": lambda x: np.prod(x.shape[-2:]),
|
64
|
+
"aspect_ratio": lambda x: x.shape[-1] / x.shape[-2],
|
65
|
+
"depth": lambda x: get_bitdepth(x.image).depth,
|
66
|
+
"center": lambda x: np.asarray([(x.box[0] + x.box[2]) / 2, (x.box[1] + x.box[3]) / 2]),
|
67
|
+
"distance": lambda x: np.sqrt(
|
68
|
+
np.square(((x.box[0] + x.box[2]) / 2) - (x.width / 2))
|
69
|
+
+ np.square(((x.box[1] + x.box[3]) / 2) - (x.height / 2))
|
70
|
+
),
|
71
|
+
}
|
72
|
+
|
73
|
+
|
71
74
|
@set_metadata("dataeval.metrics")
|
72
75
|
def dimensionstats(
|
73
76
|
images: Iterable[ArrayLike],
|
@@ -107,5 +110,4 @@ def dimensionstats(
|
|
107
110
|
>>> print(results.channels)
|
108
111
|
[1 1 1 1 1 1 3 1 1 3]
|
109
112
|
"""
|
110
|
-
|
111
|
-
return DimensionStatsOutput(**output)
|
113
|
+
return run_stats(images, bboxes, False, [DimensionStatsProcessor])[0]
|
@@ -10,16 +10,11 @@ from dataeval._internal.metrics.utils import pchash, xxhash
|
|
10
10
|
from dataeval._internal.output import set_metadata
|
11
11
|
|
12
12
|
|
13
|
-
class HashStatsProcessor(StatsProcessor):
|
14
|
-
image_function_map = {
|
15
|
-
"xxhash": lambda x: xxhash(x.image),
|
16
|
-
"pchash": lambda x: pchash(x.image),
|
17
|
-
}
|
18
|
-
|
19
|
-
|
20
13
|
@dataclass(frozen=True)
|
21
14
|
class HashStatsOutput(BaseStatsOutput):
|
22
15
|
"""
|
16
|
+
Output class for :func:`hashstats` stats metric
|
17
|
+
|
23
18
|
Attributes
|
24
19
|
----------
|
25
20
|
xxhash : List[str]
|
@@ -32,6 +27,14 @@ class HashStatsOutput(BaseStatsOutput):
|
|
32
27
|
pchash: list[str]
|
33
28
|
|
34
29
|
|
30
|
+
class HashStatsProcessor(StatsProcessor[HashStatsOutput]):
|
31
|
+
output_class = HashStatsOutput
|
32
|
+
image_function_map = {
|
33
|
+
"xxhash": lambda x: xxhash(x.image),
|
34
|
+
"pchash": lambda x: pchash(x.image),
|
35
|
+
}
|
36
|
+
|
37
|
+
|
35
38
|
@set_metadata("dataeval.metrics")
|
36
39
|
def hashstats(
|
37
40
|
images: Iterable[ArrayLike],
|
@@ -69,5 +72,4 @@ def hashstats(
|
|
69
72
|
>>> print(results.pchash)
|
70
73
|
['8f25506af46a7c6a', '8000808000008080', '8e71f18e0ef18e0e', 'a956d6a956d6a928']
|
71
74
|
"""
|
72
|
-
|
73
|
-
return HashStatsOutput(**output)
|
75
|
+
return run_stats(images, bboxes, False, [HashStatsProcessor])[0]
|
@@ -13,7 +13,7 @@ from dataeval._internal.output import OutputMetadata, set_metadata
|
|
13
13
|
@dataclass(frozen=True)
|
14
14
|
class LabelStatsOutput(OutputMetadata):
|
15
15
|
"""
|
16
|
-
Output class for
|
16
|
+
Output class for :func:`labelstats` stats metric
|
17
17
|
|
18
18
|
Attributes
|
19
19
|
----------
|
@@ -11,31 +11,11 @@ from dataeval._internal.metrics.stats.base import BaseStatsOutput, StatsProcesso
|
|
11
11
|
from dataeval._internal.output import set_metadata
|
12
12
|
|
13
13
|
|
14
|
-
class PixelStatsProcessor(StatsProcessor):
|
15
|
-
cache_keys = ["histogram"]
|
16
|
-
image_function_map = {
|
17
|
-
"mean": lambda self: np.mean(self.scaled),
|
18
|
-
"std": lambda x: np.std(x.scaled),
|
19
|
-
"var": lambda x: np.var(x.scaled),
|
20
|
-
"skew": lambda x: np.nan_to_num(skew(x.scaled.ravel())),
|
21
|
-
"kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled.ravel())),
|
22
|
-
"histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
|
23
|
-
"entropy": lambda x: entropy(x.get("histogram")),
|
24
|
-
}
|
25
|
-
channel_function_map = {
|
26
|
-
"mean": lambda x: np.mean(x.scaled, axis=1),
|
27
|
-
"std": lambda x: np.std(x.scaled, axis=1),
|
28
|
-
"var": lambda x: np.var(x.scaled, axis=1),
|
29
|
-
"skew": lambda x: np.nan_to_num(skew(x.scaled, axis=1)),
|
30
|
-
"kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled, axis=1)),
|
31
|
-
"histogram": lambda x: np.apply_along_axis(lambda y: np.histogram(y, 256, (0, 1))[0], 1, x.scaled),
|
32
|
-
"entropy": lambda x: entropy(x.get("histogram"), axis=1),
|
33
|
-
}
|
34
|
-
|
35
|
-
|
36
14
|
@dataclass(frozen=True)
|
37
15
|
class PixelStatsOutput(BaseStatsOutput):
|
38
16
|
"""
|
17
|
+
Output class for :func:`pixelstats` stats metric
|
18
|
+
|
39
19
|
Attributes
|
40
20
|
----------
|
41
21
|
mean : NDArray[np.float16]
|
@@ -63,6 +43,29 @@ class PixelStatsOutput(BaseStatsOutput):
|
|
63
43
|
entropy: NDArray[np.float16]
|
64
44
|
|
65
45
|
|
46
|
+
class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
|
47
|
+
output_class = PixelStatsOutput
|
48
|
+
cache_keys = ["histogram"]
|
49
|
+
image_function_map = {
|
50
|
+
"mean": lambda self: np.mean(self.scaled),
|
51
|
+
"std": lambda x: np.std(x.scaled),
|
52
|
+
"var": lambda x: np.var(x.scaled),
|
53
|
+
"skew": lambda x: np.nan_to_num(skew(x.scaled.ravel())),
|
54
|
+
"kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled.ravel())),
|
55
|
+
"histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
|
56
|
+
"entropy": lambda x: entropy(x.get("histogram")),
|
57
|
+
}
|
58
|
+
channel_function_map = {
|
59
|
+
"mean": lambda x: np.mean(x.scaled, axis=1),
|
60
|
+
"std": lambda x: np.std(x.scaled, axis=1),
|
61
|
+
"var": lambda x: np.var(x.scaled, axis=1),
|
62
|
+
"skew": lambda x: np.nan_to_num(skew(x.scaled, axis=1)),
|
63
|
+
"kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled, axis=1)),
|
64
|
+
"histogram": lambda x: np.apply_along_axis(lambda y: np.histogram(y, 256, (0, 1))[0], 1, x.scaled),
|
65
|
+
"entropy": lambda x: entropy(x.get("histogram"), axis=1),
|
66
|
+
}
|
67
|
+
|
68
|
+
|
66
69
|
@set_metadata("dataeval.metrics")
|
67
70
|
def pixelstats(
|
68
71
|
images: Iterable[ArrayLike],
|
@@ -93,8 +96,8 @@ def pixelstats(
|
|
93
96
|
--------
|
94
97
|
dimensionstats, visualstats, Outliers
|
95
98
|
|
96
|
-
|
97
|
-
|
99
|
+
Note
|
100
|
+
----
|
98
101
|
- All metrics are scaled based on the perceived bit depth (which is derived from the largest pixel value)
|
99
102
|
to allow for better comparison between images stored in different formats and different resolutions.
|
100
103
|
|
@@ -113,5 +116,4 @@ def pixelstats(
|
|
113
116
|
0.812 0.9883 0.795 0.9243 0.9243 0.795 0.9907 0.8125 1.028 0.8223
|
114
117
|
1.046 0.8247 1.041 0.8203 1.012 0.812 0.9883 0.795 0.9243 0.9243]
|
115
118
|
"""
|
116
|
-
|
117
|
-
return PixelStatsOutput(**output)
|
119
|
+
return run_stats(images, bboxes, per_channel, [PixelStatsProcessor])[0]
|
@@ -13,41 +13,16 @@ from dataeval._internal.output import set_metadata
|
|
13
13
|
QUARTILES = (0, 25, 50, 75, 100)
|
14
14
|
|
15
15
|
|
16
|
-
class VisualStatsProcessor(StatsProcessor):
|
17
|
-
cache_keys = ["percentiles"]
|
18
|
-
image_function_map = {
|
19
|
-
"brightness": lambda x: x.get("percentiles")[-2],
|
20
|
-
"blurriness": lambda x: np.std(edge_filter(np.mean(x.image, axis=0))),
|
21
|
-
"contrast": lambda x: np.nan_to_num(
|
22
|
-
(np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles"))
|
23
|
-
),
|
24
|
-
"darkness": lambda x: x.get("percentiles")[1],
|
25
|
-
"missing": lambda x: np.sum(np.isnan(x.image)) / np.prod(x.shape[-2:]),
|
26
|
-
"zeros": lambda x: np.count_nonzero(x.image == 0) / np.prod(x.shape[-2:]),
|
27
|
-
"percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
|
28
|
-
}
|
29
|
-
channel_function_map = {
|
30
|
-
"brightness": lambda x: x.get("percentiles")[:, -2],
|
31
|
-
"blurriness": lambda x: np.std(np.vectorize(edge_filter, signature="(m,n)->(m,n)")(x.image), axis=(1, 2)),
|
32
|
-
"contrast": lambda x: np.nan_to_num(
|
33
|
-
(np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
|
34
|
-
/ np.mean(x.get("percentiles"), axis=1)
|
35
|
-
),
|
36
|
-
"darkness": lambda x: x.get("percentiles")[:, 1],
|
37
|
-
"missing": lambda x: np.sum(np.isnan(x.image), axis=(1, 2)) / np.prod(x.shape[-2:]),
|
38
|
-
"zeros": lambda x: np.count_nonzero(x.image == 0, axis=(1, 2)) / np.prod(x.shape[-2:]),
|
39
|
-
"percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES, axis=1).T,
|
40
|
-
}
|
41
|
-
|
42
|
-
|
43
16
|
@dataclass(frozen=True)
|
44
17
|
class VisualStatsOutput(BaseStatsOutput):
|
45
18
|
"""
|
19
|
+
Output class for :func:`visualstats` stats metric
|
20
|
+
|
46
21
|
Attributes
|
47
22
|
----------
|
48
23
|
brightness : NDArray[np.float16]
|
49
24
|
Brightness of the images
|
50
|
-
|
25
|
+
sharpness : NDArray[np.float16]
|
51
26
|
Blurriness of the images
|
52
27
|
contrast : NDArray[np.float16]
|
53
28
|
Image contrast ratio
|
@@ -62,7 +37,7 @@ class VisualStatsOutput(BaseStatsOutput):
|
|
62
37
|
"""
|
63
38
|
|
64
39
|
brightness: NDArray[np.float16]
|
65
|
-
|
40
|
+
sharpness: NDArray[np.float16]
|
66
41
|
contrast: NDArray[np.float16]
|
67
42
|
darkness: NDArray[np.float16]
|
68
43
|
missing: NDArray[np.float16]
|
@@ -70,6 +45,34 @@ class VisualStatsOutput(BaseStatsOutput):
|
|
70
45
|
percentiles: NDArray[np.float16]
|
71
46
|
|
72
47
|
|
48
|
+
class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
|
49
|
+
output_class = VisualStatsOutput
|
50
|
+
cache_keys = ["percentiles"]
|
51
|
+
image_function_map = {
|
52
|
+
"brightness": lambda x: x.get("percentiles")[-2],
|
53
|
+
"sharpness": lambda x: np.std(edge_filter(np.mean(x.image, axis=0))),
|
54
|
+
"contrast": lambda x: np.nan_to_num(
|
55
|
+
(np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles"))
|
56
|
+
),
|
57
|
+
"darkness": lambda x: x.get("percentiles")[1],
|
58
|
+
"missing": lambda x: np.count_nonzero(np.isnan(np.sum(x.image, axis=0))) / np.prod(x.shape[-2:]),
|
59
|
+
"zeros": lambda x: np.count_nonzero(np.sum(x.image, axis=0) == 0) / np.prod(x.shape[-2:]),
|
60
|
+
"percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
|
61
|
+
}
|
62
|
+
channel_function_map = {
|
63
|
+
"brightness": lambda x: x.get("percentiles")[:, -2],
|
64
|
+
"sharpness": lambda x: np.std(np.vectorize(edge_filter, signature="(m,n)->(m,n)")(x.image), axis=(1, 2)),
|
65
|
+
"contrast": lambda x: np.nan_to_num(
|
66
|
+
(np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
|
67
|
+
/ np.mean(x.get("percentiles"), axis=1)
|
68
|
+
),
|
69
|
+
"darkness": lambda x: x.get("percentiles")[:, 1],
|
70
|
+
"missing": lambda x: np.count_nonzero(np.isnan(x.image), axis=(1, 2)) / np.prod(x.shape[-2:]),
|
71
|
+
"zeros": lambda x: np.count_nonzero(x.image == 0, axis=(1, 2)) / np.prod(x.shape[-2:]),
|
72
|
+
"percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES, axis=1).T,
|
73
|
+
}
|
74
|
+
|
75
|
+
|
73
76
|
@set_metadata("dataeval.metrics")
|
74
77
|
def visualstats(
|
75
78
|
images: Iterable[ArrayLike],
|
@@ -79,7 +82,7 @@ def visualstats(
|
|
79
82
|
"""
|
80
83
|
Calculates visual statistics for each image
|
81
84
|
|
82
|
-
This function computes various visual metrics (e.g., brightness, darkness, contrast,
|
85
|
+
This function computes various visual metrics (e.g., brightness, darkness, contrast, sharpness)
|
83
86
|
on the images as a whole.
|
84
87
|
|
85
88
|
Parameters
|
@@ -93,15 +96,15 @@ def visualstats(
|
|
93
96
|
-------
|
94
97
|
VisualStatsOutput
|
95
98
|
A dictionary-like object containing the computed visual statistics for each image. The keys correspond
|
96
|
-
to the names of the statistics (e.g., 'brightness', '
|
99
|
+
to the names of the statistics (e.g., 'brightness', 'sharpness'), and the values are lists of results for
|
97
100
|
each image or numpy arrays when the results are multi-dimensional.
|
98
101
|
|
99
102
|
See Also
|
100
103
|
--------
|
101
104
|
dimensionstats, pixelstats, Outliers
|
102
105
|
|
103
|
-
|
104
|
-
|
106
|
+
Note
|
107
|
+
----
|
105
108
|
- `zeros` and `missing` are presented as a percentage of total pixel counts
|
106
109
|
|
107
110
|
Examples
|
@@ -118,5 +121,4 @@ def visualstats(
|
|
118
121
|
1.258 1.257 1.257 1.256 1.256 1.255 1.255 1.255 1.255 1.254 1.254 1.254
|
119
122
|
1.254 1.254 1.254 1.253 1.253 1.253]
|
120
123
|
"""
|
121
|
-
|
122
|
-
return VisualStatsOutput(**output)
|
124
|
+
return run_stats(images, bboxes, per_channel, [VisualStatsProcessor])[0]
|
@@ -4,6 +4,8 @@ FR Test Statistic based estimate for the upperbound
|
|
4
4
|
average precision using empirical mean precision
|
5
5
|
"""
|
6
6
|
|
7
|
+
from __future__ import annotations
|
8
|
+
|
7
9
|
from dataclasses import dataclass
|
8
10
|
|
9
11
|
from numpy.typing import ArrayLike
|
@@ -16,6 +18,8 @@ from dataeval._internal.output import OutputMetadata, set_metadata
|
|
16
18
|
@dataclass(frozen=True)
|
17
19
|
class UAPOutput(OutputMetadata):
|
18
20
|
"""
|
21
|
+
Output class for :func:`uap` estimator metric
|
22
|
+
|
19
23
|
Attributes
|
20
24
|
----------
|
21
25
|
uap : float
|
@@ -48,8 +52,8 @@ def uap(labels: ArrayLike, scores: ArrayLike) -> UAPOutput:
|
|
48
52
|
ValueError
|
49
53
|
If unique classes M < 2
|
50
54
|
|
51
|
-
|
52
|
-
|
55
|
+
Note
|
56
|
+
----
|
53
57
|
This function calculates the empirical mean precision using the
|
54
58
|
``average_precision_score`` from scikit-learn, weighted by the class distribution.
|
55
59
|
|
@@ -91,8 +91,8 @@ def entropy(
|
|
91
91
|
subset_mask: NDArray[np.bool_] | None
|
92
92
|
Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
|
93
93
|
|
94
|
-
|
95
|
-
|
94
|
+
Note
|
95
|
+
----
|
96
96
|
For continuous variables, histogram bins are chosen automatically. See
|
97
97
|
numpy.histogram for details.
|
98
98
|
|
@@ -61,9 +61,9 @@ class AETrainer:
|
|
61
61
|
List[float]
|
62
62
|
A list of average loss values for each epoch.
|
63
63
|
|
64
|
-
|
64
|
+
Note
|
65
65
|
----
|
66
|
-
To replace this function with a custom function, do
|
66
|
+
To replace this function with a custom function, do:
|
67
67
|
AETrainer.train = custom_function
|
68
68
|
"""
|
69
69
|
# Setup training
|
@@ -120,7 +120,7 @@ class AETrainer:
|
|
120
120
|
|
121
121
|
Note
|
122
122
|
----
|
123
|
-
To replace this function with a custom function, do
|
123
|
+
To replace this function with a custom function, do:
|
124
124
|
AETrainer.eval = custom_function
|
125
125
|
"""
|
126
126
|
self.model.eval()
|
@@ -155,8 +155,8 @@ class AETrainer:
|
|
155
155
|
torch.Tensor
|
156
156
|
Data encoded by the model
|
157
157
|
|
158
|
-
|
159
|
-
|
158
|
+
Note
|
159
|
+
----
|
160
160
|
This function should be run after the model has been trained and evaluated.
|
161
161
|
"""
|
162
162
|
self.model.eval()
|
@@ -272,8 +272,6 @@ class PixelCNN(distribution.Distribution):
|
|
272
272
|
The minimum value of the input data.
|
273
273
|
dtype : tensorflow dtype, default tf.float32
|
274
274
|
Data type of the `Distribution`.
|
275
|
-
name : str, default "PixelCNN"
|
276
|
-
The name of the `Distribution`.
|
277
275
|
"""
|
278
276
|
|
279
277
|
def __init__(
|
@@ -293,10 +291,9 @@ class PixelCNN(distribution.Distribution):
|
|
293
291
|
high: int = 255,
|
294
292
|
low: int = 0,
|
295
293
|
dtype=tf.float32,
|
296
|
-
name: str = "PixelCNN",
|
297
294
|
) -> None:
|
298
295
|
parameters = dict(locals())
|
299
|
-
with tf.name_scope(
|
296
|
+
with tf.name_scope("PixelCNN") as name:
|
300
297
|
super().__init__(
|
301
298
|
dtype=dtype,
|
302
299
|
reparameterization_type=reparameterization.NOT_REPARAMETERIZED,
|
dataeval/_internal/utils.py
CHANGED
@@ -8,7 +8,7 @@ from torch.utils.data import Dataset
|
|
8
8
|
|
9
9
|
def read_dataset(dataset: Dataset) -> list[list[Any]]:
|
10
10
|
"""
|
11
|
-
Extract information from a dataset at each index into
|
11
|
+
Extract information from a dataset at each index into individual lists of each information position
|
12
12
|
|
13
13
|
Parameters
|
14
14
|
----------
|
@@ -31,36 +31,31 @@ def read_dataset(dataset: Dataset) -> list[list[Any]]:
|
|
31
31
|
Examples
|
32
32
|
--------
|
33
33
|
>>> import numpy as np
|
34
|
-
|
35
|
-
>>> data = np.ones((10, 3, 3))
|
34
|
+
>>> data = np.ones((10, 1, 3, 3))
|
36
35
|
>>> labels = np.ones((10,))
|
37
36
|
>>> class ICDataset:
|
38
37
|
... def __init__(self, data, labels):
|
39
38
|
... self.data = data
|
40
39
|
... self.labels = labels
|
41
|
-
|
40
|
+
...
|
42
41
|
... def __getitem__(self, idx):
|
43
42
|
... return self.data[idx], self.labels[idx]
|
44
43
|
|
45
44
|
>>> ds = ICDataset(data, labels)
|
46
45
|
|
47
46
|
>>> result = read_dataset(ds)
|
48
|
-
>>>
|
49
|
-
|
50
|
-
>>>
|
51
|
-
|
52
|
-
>>>
|
53
|
-
|
47
|
+
>>> len(result) # images and labels
|
48
|
+
2
|
49
|
+
>>> np.asarray(result[0]).shape # images
|
50
|
+
(10, 1, 3, 3)
|
51
|
+
>>> np.asarray(result[1]).shape # labels
|
52
|
+
(10,)
|
54
53
|
"""
|
55
54
|
|
56
|
-
ddict: dict[int, list] = defaultdict(list)
|
55
|
+
ddict: dict[int, list[Any]] = defaultdict(list[Any])
|
57
56
|
|
58
57
|
for data in dataset:
|
59
|
-
|
60
|
-
if not isinstance(data, tuple):
|
61
|
-
data = (data,)
|
62
|
-
|
63
|
-
for i, d in enumerate(data):
|
58
|
+
for i, d in enumerate(data if isinstance(data, tuple) else (data,)):
|
64
59
|
ddict[i].append(d)
|
65
60
|
|
66
61
|
return list(ddict.values())
|