dataeval 0.70.1__py3-none-any.whl → 0.71.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +2 -1
- dataeval/_internal/detectors/ood/ae.py +1 -1
- dataeval/_internal/detectors/ood/aegmm.py +1 -1
- dataeval/_internal/detectors/ood/base.py +1 -1
- dataeval/_internal/detectors/ood/llr.py +3 -3
- dataeval/_internal/detectors/ood/vae.py +1 -1
- dataeval/_internal/detectors/ood/vaegmm.py +1 -1
- dataeval/_internal/detectors/outliers.py +3 -8
- dataeval/_internal/metrics/stats/base.py +103 -44
- dataeval/_internal/metrics/stats/datasetstats.py +94 -22
- dataeval/_internal/metrics/stats/dimensionstats.py +20 -20
- dataeval/_internal/metrics/stats/hashstats.py +9 -9
- dataeval/_internal/metrics/stats/pixelstats.py +24 -24
- dataeval/_internal/metrics/stats/visualstats.py +38 -37
- dataeval/_internal/models/tensorflow/autoencoder.py +2 -2
- dataeval/_internal/models/tensorflow/losses.py +1 -1
- dataeval/_internal/models/tensorflow/pixelcnn.py +1 -1
- dataeval/_internal/models/tensorflow/trainer.py +1 -1
- dataeval/_internal/models/tensorflow/utils.py +5 -5
- dataeval/metrics/stats/__init__.py +8 -1
- dataeval/utils/tensorflow/__init__.py +7 -1
- dataeval/utils/tensorflow/loss/__init__.py +5 -1
- dataeval/utils/tensorflow/models/__init__.py +5 -1
- dataeval/utils/torch/__init__.py +11 -2
- dataeval/utils/torch/datasets/__init__.py +7 -2
- dataeval/utils/torch/models/__init__.py +5 -1
- dataeval/utils/torch/trainer/__init__.py +5 -1
- {dataeval-0.70.1.dist-info → dataeval-0.71.1.dist-info}/METADATA +5 -3
- {dataeval-0.70.1.dist-info → dataeval-0.71.1.dist-info}/RECORD +31 -31
- {dataeval-0.70.1.dist-info → dataeval-0.71.1.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.70.1.dist-info → dataeval-0.71.1.dist-info}/WHEEL +0 -0
dataeval/__init__.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
-
__version__ = "0.
|
1
|
+
__version__ = "0.71.1"
|
2
2
|
|
3
3
|
from importlib.util import find_spec
|
4
4
|
|
5
5
|
_IS_TORCH_AVAILABLE = find_spec("torch") is not None
|
6
|
+
_IS_TORCHVISION_AVAILABLE = find_spec("torchvision") is not None
|
6
7
|
_IS_TENSORFLOW_AVAILABLE = find_spec("tensorflow") is not None and find_spec("tensorflow_probability") is not None
|
7
8
|
|
8
9
|
del find_spec
|
@@ -10,9 +10,9 @@ from __future__ import annotations
|
|
10
10
|
|
11
11
|
from typing import Callable
|
12
12
|
|
13
|
-
import keras
|
14
13
|
import numpy as np
|
15
14
|
import tensorflow as tf
|
15
|
+
import tf_keras as keras
|
16
16
|
from numpy.typing import ArrayLike
|
17
17
|
|
18
18
|
from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
|
@@ -10,8 +10,8 @@ from __future__ import annotations
|
|
10
10
|
|
11
11
|
from typing import Callable
|
12
12
|
|
13
|
-
import keras
|
14
13
|
import tensorflow as tf
|
14
|
+
import tf_keras as keras
|
15
15
|
from numpy.typing import ArrayLike
|
16
16
|
|
17
17
|
from dataeval._internal.detectors.ood.base import OODGMMBase, OODScoreOutput
|
@@ -12,9 +12,9 @@ from abc import ABC, abstractmethod
|
|
12
12
|
from dataclasses import dataclass
|
13
13
|
from typing import Callable, Literal, cast
|
14
14
|
|
15
|
-
import keras
|
16
15
|
import numpy as np
|
17
16
|
import tensorflow as tf
|
17
|
+
import tf_keras as keras
|
18
18
|
from numpy.typing import ArrayLike, NDArray
|
19
19
|
|
20
20
|
from dataeval._internal.interop import to_numpy
|
@@ -11,12 +11,12 @@ from __future__ import annotations
|
|
11
11
|
from functools import partial
|
12
12
|
from typing import Callable
|
13
13
|
|
14
|
-
import keras
|
15
14
|
import numpy as np
|
16
15
|
import tensorflow as tf
|
17
|
-
|
18
|
-
from keras.models import Model
|
16
|
+
import tf_keras as keras
|
19
17
|
from numpy.typing import ArrayLike, NDArray
|
18
|
+
from tf_keras.layers import Input
|
19
|
+
from tf_keras.models import Model
|
20
20
|
|
21
21
|
from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
|
22
22
|
from dataeval._internal.interop import to_numpy
|
@@ -10,9 +10,9 @@ from __future__ import annotations
|
|
10
10
|
|
11
11
|
from typing import Callable
|
12
12
|
|
13
|
-
import keras
|
14
13
|
import numpy as np
|
15
14
|
import tensorflow as tf
|
15
|
+
import tf_keras as keras
|
16
16
|
from numpy.typing import ArrayLike
|
17
17
|
|
18
18
|
from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
|
@@ -10,9 +10,9 @@ from __future__ import annotations
|
|
10
10
|
|
11
11
|
from typing import Callable
|
12
12
|
|
13
|
-
import keras
|
14
13
|
import numpy as np
|
15
14
|
import tensorflow as tf
|
15
|
+
import tf_keras as keras
|
16
16
|
from numpy.typing import ArrayLike
|
17
17
|
|
18
18
|
from dataeval._internal.detectors.ood.base import OODGMMBase, OODScoreOutput
|
@@ -147,7 +147,7 @@ class Outliers:
|
|
147
147
|
mask = _get_outlier_mask(values.astype(np.float64), self.outlier_method, self.outlier_threshold)
|
148
148
|
indices = np.flatnonzero(mask)
|
149
149
|
for i, value in zip(indices, values[mask]):
|
150
|
-
flagged_images.setdefault(i, {}).update({stat: value})
|
150
|
+
flagged_images.setdefault(int(i), {}).update({stat: value})
|
151
151
|
|
152
152
|
return dict(sorted(flagged_images.items()))
|
153
153
|
|
@@ -261,11 +261,6 @@ class Outliers:
|
|
261
261
|
>>> results.issues[10]
|
262
262
|
{'skew': -3.906, 'kurtosis': 13.266, 'entropy': 0.2128, 'contrast': 1.25, 'zeros': 0.05493}
|
263
263
|
"""
|
264
|
-
self.stats = datasetstats(
|
265
|
-
|
266
|
-
use_dimension=self.use_dimension,
|
267
|
-
use_pixel=self.use_pixel,
|
268
|
-
use_visual=self.use_visual,
|
269
|
-
)
|
270
|
-
outliers = self._get_outliers({k: v for o in self.stats.outputs() for k, v in o.dict().items()})
|
264
|
+
self.stats = datasetstats(images=data)
|
265
|
+
outliers = self._get_outliers(self.stats.dict())
|
271
266
|
return OutliersOutput(outliers)
|
@@ -3,9 +3,13 @@ from __future__ import annotations
|
|
3
3
|
import re
|
4
4
|
import warnings
|
5
5
|
from dataclasses import dataclass
|
6
|
-
from
|
6
|
+
from functools import partial
|
7
|
+
from itertools import repeat
|
8
|
+
from multiprocessing import Pool
|
9
|
+
from typing import Any, Callable, Generic, Iterable, NamedTuple, Optional, TypeVar, Union
|
7
10
|
|
8
11
|
import numpy as np
|
12
|
+
import tqdm
|
9
13
|
from numpy.typing import ArrayLike, NDArray
|
10
14
|
|
11
15
|
from dataeval._internal.interop import to_numpy_iter
|
@@ -91,7 +95,11 @@ class BaseStatsOutput(OutputMetadata):
|
|
91
95
|
return len(self.source_index)
|
92
96
|
|
93
97
|
|
94
|
-
|
98
|
+
TStatsOutput = TypeVar("TStatsOutput", bound=BaseStatsOutput, covariant=True)
|
99
|
+
|
100
|
+
|
101
|
+
class StatsProcessor(Generic[TStatsOutput]):
|
102
|
+
output_class: type[TStatsOutput]
|
95
103
|
cache_keys: list[str] = []
|
96
104
|
image_function_map: dict[str, Callable[[StatsProcessor], Any]] = {}
|
97
105
|
channel_function_map: dict[str, Callable[[StatsProcessor], Any]] = {}
|
@@ -119,6 +127,9 @@ class StatsProcessor:
|
|
119
127
|
else:
|
120
128
|
return self.fn_map[fn_key](self)
|
121
129
|
|
130
|
+
def process(self) -> dict:
|
131
|
+
return {k: self.fn_map[k](self) for k in self.fn_map}
|
132
|
+
|
122
133
|
@property
|
123
134
|
def image(self) -> NDArray:
|
124
135
|
if self._image is None:
|
@@ -143,14 +154,66 @@ class StatsProcessor:
|
|
143
154
|
self._scaled = self._scaled.reshape(self.image.shape[0], -1)
|
144
155
|
return self._scaled
|
145
156
|
|
157
|
+
@classmethod
|
158
|
+
def convert_output(
|
159
|
+
cls, source: dict[str, Any], source_index: list[SourceIndex], box_count: list[int]
|
160
|
+
) -> TStatsOutput:
|
161
|
+
output = {}
|
162
|
+
for key in source:
|
163
|
+
if key not in cls.output_class.__annotations__:
|
164
|
+
continue
|
165
|
+
stat_type: str = cls.output_class.__annotations__[key]
|
166
|
+
dtype_match = re.match(DTYPE_REGEX, stat_type)
|
167
|
+
if dtype_match is not None:
|
168
|
+
output[key] = np.asarray(source[key], dtype=np.dtype(dtype_match.group(1)))
|
169
|
+
else:
|
170
|
+
output[key] = source[key]
|
171
|
+
return cls.output_class(**output, source_index=source_index, box_count=np.asarray(box_count, dtype=np.uint16))
|
172
|
+
|
173
|
+
|
174
|
+
class StatsProcessorOutput(NamedTuple):
|
175
|
+
results: list[dict[str, Any]]
|
176
|
+
source_indices: list[SourceIndex]
|
177
|
+
box_counts: list[int]
|
178
|
+
warnings_list: list[tuple[int, int, NDArray, tuple[int, ...]]]
|
179
|
+
|
180
|
+
|
181
|
+
def process_stats(
|
182
|
+
i: int,
|
183
|
+
image_boxes: tuple[NDArray, NDArray | None],
|
184
|
+
per_channel: bool,
|
185
|
+
stats_processor_cls: Iterable[type[StatsProcessor]],
|
186
|
+
) -> StatsProcessorOutput:
|
187
|
+
image, boxes = image_boxes
|
188
|
+
results_list: list[dict[str, Any]] = []
|
189
|
+
source_indices: list[SourceIndex] = []
|
190
|
+
box_counts: list[int] = []
|
191
|
+
warnings_list: list[tuple[int, int, NDArray, tuple[int, ...]]] = []
|
192
|
+
nboxes = [None] if boxes is None else normalize_box_shape(boxes)
|
193
|
+
for i_b, box in enumerate(nboxes):
|
194
|
+
i_b = None if box is None else i_b
|
195
|
+
processor_list = [p(image, box, per_channel) for p in stats_processor_cls]
|
196
|
+
if any(not p.is_valid_slice for p in processor_list) and i_b is not None and box is not None:
|
197
|
+
warnings_list.append((i, i_b, box, image.shape))
|
198
|
+
results_list.append({k: v for p in processor_list for k, v in p.process().items()})
|
199
|
+
if per_channel:
|
200
|
+
source_indices.extend([SourceIndex(i, i_b, c) for c in range(image_boxes[0].shape[-3])])
|
201
|
+
else:
|
202
|
+
source_indices.append(SourceIndex(i, i_b, None))
|
203
|
+
box_counts.append(0 if boxes is None else len(boxes))
|
204
|
+
return StatsProcessorOutput(results_list, source_indices, box_counts, warnings_list)
|
205
|
+
|
206
|
+
|
207
|
+
def process_stats_unpack(args, per_channel: bool, stats_processor_cls: Iterable[type[StatsProcessor]]):
|
208
|
+
return process_stats(*args, per_channel=per_channel, stats_processor_cls=stats_processor_cls)
|
209
|
+
|
146
210
|
|
147
211
|
def run_stats(
|
148
212
|
images: Iterable[ArrayLike],
|
149
213
|
bboxes: Iterable[ArrayLike] | None,
|
150
214
|
per_channel: bool,
|
151
|
-
stats_processor_cls: type,
|
152
|
-
|
153
|
-
) -> dict:
|
215
|
+
stats_processor_cls: Iterable[type[StatsProcessor[TStatsOutput]]],
|
216
|
+
) -> list[TStatsOutput]:
|
154
217
|
"""
|
155
218
|
Compute specified statistics on a set of images.
|
156
219
|
|
@@ -169,15 +232,13 @@ def run_stats(
|
|
169
232
|
iterable should match the length of the input images.
|
170
233
|
per_channel : bool
|
171
234
|
A flag which determines if the states should be evaluated on a per-channel basis or not.
|
172
|
-
|
173
|
-
|
235
|
+
stats_processor_cls : Iterable[type[StatsProcessor]]
|
236
|
+
An iterable of stats processor classes that calculate stats and return output classes.
|
174
237
|
|
175
238
|
Returns
|
176
239
|
-------
|
177
|
-
|
178
|
-
A
|
179
|
-
The dictionary keys correspond to the names of the statistics, and the values are NumPy arrays
|
180
|
-
with the results of the computations.
|
240
|
+
list[TStatsOutput]
|
241
|
+
A list of output classes corresponding to the input processor types.
|
181
242
|
|
182
243
|
Note
|
183
244
|
----
|
@@ -189,43 +250,41 @@ def run_stats(
|
|
189
250
|
be reused to avoid redundant computation.
|
190
251
|
"""
|
191
252
|
results_list: list[dict[str, NDArray]] = []
|
192
|
-
output_list = list(output_cls.__annotations__)
|
193
253
|
source_index = []
|
194
254
|
box_count = []
|
195
|
-
bbox_iter = (None
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
255
|
+
bbox_iter = repeat(None) if bboxes is None else to_numpy_iter(bboxes)
|
256
|
+
|
257
|
+
warning_list = []
|
258
|
+
total_for_status = getattr(images, "__len__")() if hasattr(images, "__len__") else None
|
259
|
+
stats_processor_cls = stats_processor_cls if isinstance(stats_processor_cls, Iterable) else [stats_processor_cls]
|
260
|
+
|
261
|
+
# TODO: Introduce global controls for CPU job parallelism and GPU configurations
|
262
|
+
with Pool(16) as p:
|
263
|
+
for r in tqdm.tqdm(
|
264
|
+
p.imap(
|
265
|
+
partial(process_stats_unpack, per_channel=per_channel, stats_processor_cls=stats_processor_cls),
|
266
|
+
enumerate(zip(to_numpy_iter(images), bbox_iter)),
|
267
|
+
),
|
268
|
+
total=total_for_status,
|
269
|
+
):
|
270
|
+
results_list.extend(r.results)
|
271
|
+
source_index.extend(r.source_indices)
|
272
|
+
box_count.extend(r.box_counts)
|
273
|
+
warning_list.extend(r.warnings_list)
|
274
|
+
p.close()
|
275
|
+
p.join()
|
276
|
+
|
277
|
+
# warnings are not emitted while in multiprocessing pools so we emit after gathering all warnings
|
278
|
+
for w in warning_list:
|
279
|
+
warnings.warn(f"Bounding box [{w[0]}][{w[1]}]: {w[2]} is out of bounds of {w[3]}.", UserWarning)
|
210
280
|
|
211
281
|
output = {}
|
212
|
-
|
213
|
-
for
|
214
|
-
|
282
|
+
for results in results_list:
|
283
|
+
for stat, result in results.items():
|
284
|
+
if per_channel:
|
215
285
|
output.setdefault(stat, []).extend(result.tolist())
|
216
|
-
|
217
|
-
for results in results_list:
|
218
|
-
for stat, result in results.items():
|
286
|
+
else:
|
219
287
|
output.setdefault(stat, []).append(result.tolist() if isinstance(result, np.ndarray) else result)
|
220
288
|
|
221
|
-
for
|
222
|
-
|
223
|
-
|
224
|
-
dtype_match = re.match(DTYPE_REGEX, stat_type)
|
225
|
-
if dtype_match is not None:
|
226
|
-
output[stat] = np.asarray(output[stat], dtype=np.dtype(dtype_match.group(1)))
|
227
|
-
|
228
|
-
output[SOURCE_INDEX] = source_index
|
229
|
-
output[BOX_COUNT] = np.asarray(box_count, dtype=np.uint16)
|
230
|
-
|
231
|
-
return output
|
289
|
+
outputs = [s.convert_output(output, source_index, box_count) for s in stats_processor_cls]
|
290
|
+
return outputs
|
@@ -1,15 +1,18 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from dataclasses import dataclass
|
4
|
-
from typing import Iterable
|
4
|
+
from typing import Any, Iterable
|
5
5
|
|
6
6
|
from numpy.typing import ArrayLike
|
7
7
|
|
8
|
-
from dataeval._internal.metrics.stats.base import BaseStatsOutput
|
9
|
-
from dataeval._internal.metrics.stats.dimensionstats import
|
8
|
+
from dataeval._internal.metrics.stats.base import BaseStatsOutput, run_stats
|
9
|
+
from dataeval._internal.metrics.stats.dimensionstats import (
|
10
|
+
DimensionStatsOutput,
|
11
|
+
DimensionStatsProcessor,
|
12
|
+
)
|
10
13
|
from dataeval._internal.metrics.stats.labelstats import LabelStatsOutput, labelstats
|
11
|
-
from dataeval._internal.metrics.stats.pixelstats import PixelStatsOutput,
|
12
|
-
from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput,
|
14
|
+
from dataeval._internal.metrics.stats.pixelstats import PixelStatsOutput, PixelStatsProcessor
|
15
|
+
from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput, VisualStatsProcessor
|
13
16
|
from dataeval._internal.output import OutputMetadata, set_metadata
|
14
17
|
|
15
18
|
|
@@ -25,19 +28,53 @@ class DatasetStatsOutput(OutputMetadata):
|
|
25
28
|
|
26
29
|
Attributes
|
27
30
|
----------
|
28
|
-
dimensionstats : DimensionStatsOutput
|
29
|
-
pixelstats: PixelStatsOutput
|
30
|
-
visualstats: VisualStatsOutput
|
31
|
-
labelstats: LabelStatsOutput or None
|
31
|
+
dimensionstats : DimensionStatsOutput
|
32
|
+
pixelstats: PixelStatsOutput
|
33
|
+
visualstats: VisualStatsOutput
|
34
|
+
labelstats: LabelStatsOutput or None
|
32
35
|
"""
|
33
36
|
|
34
|
-
dimensionstats: DimensionStatsOutput
|
35
|
-
pixelstats: PixelStatsOutput
|
36
|
-
visualstats: VisualStatsOutput
|
37
|
+
dimensionstats: DimensionStatsOutput
|
38
|
+
pixelstats: PixelStatsOutput
|
39
|
+
visualstats: VisualStatsOutput
|
37
40
|
labelstats: LabelStatsOutput | None = None
|
38
41
|
|
42
|
+
def outputs(self) -> list[OutputMetadata]:
|
43
|
+
return [s for s in (self.dimensionstats, self.pixelstats, self.visualstats, self.labelstats) if s is not None]
|
44
|
+
|
45
|
+
def dict(self) -> dict[str, Any]:
|
46
|
+
return {k: v for o in self.outputs() for k, v in o.dict().items()}
|
47
|
+
|
48
|
+
def __post_init__(self):
|
49
|
+
lengths = [len(s) for s in self.outputs() if isinstance(s, BaseStatsOutput)]
|
50
|
+
if not all(length == lengths[0] for length in lengths):
|
51
|
+
raise ValueError("All StatsOutput classes must contain the same number of image sources.")
|
52
|
+
|
53
|
+
|
54
|
+
@dataclass(frozen=True)
|
55
|
+
class ChannelStatsOutput(OutputMetadata):
|
56
|
+
"""
|
57
|
+
Output class for :func:`channelstats` stats metric
|
58
|
+
|
59
|
+
This class represents the outputs of various per-channel stats functions against
|
60
|
+
a single dataset, such that each index across all stat outputs are representative
|
61
|
+
of the same source image. Modifying or mixing outputs will result in inaccurate
|
62
|
+
outlier calculations if not created correctly.
|
63
|
+
|
64
|
+
Attributes
|
65
|
+
----------
|
66
|
+
pixelstats: PixelStatsOutput
|
67
|
+
visualstats: VisualStatsOutput
|
68
|
+
"""
|
69
|
+
|
70
|
+
pixelstats: PixelStatsOutput
|
71
|
+
visualstats: VisualStatsOutput
|
72
|
+
|
39
73
|
def outputs(self) -> list[BaseStatsOutput]:
|
40
|
-
return [
|
74
|
+
return [self.pixelstats, self.visualstats]
|
75
|
+
|
76
|
+
def dict(self) -> dict[str, Any]:
|
77
|
+
return {**self.pixelstats.dict(), **self.visualstats.dict()}
|
41
78
|
|
42
79
|
def __post_init__(self):
|
43
80
|
lengths = [len(s) for s in self.outputs()]
|
@@ -50,9 +87,6 @@ def datasetstats(
|
|
50
87
|
images: Iterable[ArrayLike],
|
51
88
|
bboxes: Iterable[ArrayLike] | None = None,
|
52
89
|
labels: Iterable[ArrayLike] | None = None,
|
53
|
-
use_dimension: bool = True,
|
54
|
-
use_pixel: bool = True,
|
55
|
-
use_visual: bool = True,
|
56
90
|
) -> DatasetStatsOutput:
|
57
91
|
"""
|
58
92
|
Calculates various statistics for each image
|
@@ -91,9 +125,47 @@ def datasetstats(
|
|
91
125
|
[1.744 1.946 0.1164 0.0635 0.0633 0.06274 0.0429 0.0317 0.0317
|
92
126
|
0.02576 0.02081 0.02171 0.01915 0.01767 0.01799 0.01595 0.01433 0.01478]
|
93
127
|
"""
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
128
|
+
outputs = run_stats(images, bboxes, False, [DimensionStatsProcessor, PixelStatsProcessor, VisualStatsProcessor])
|
129
|
+
return DatasetStatsOutput(*outputs, labelstats=labelstats(labels) if labels else None) # type: ignore
|
130
|
+
|
131
|
+
|
132
|
+
@set_metadata("dataeval.metrics")
|
133
|
+
def channelstats(
|
134
|
+
images: Iterable[ArrayLike],
|
135
|
+
bboxes: Iterable[ArrayLike] | None = None,
|
136
|
+
) -> ChannelStatsOutput:
|
137
|
+
"""
|
138
|
+
Calculates various per-channel statistics for each image
|
139
|
+
|
140
|
+
This function computes pixel and visual metrics on the images
|
141
|
+
or individual bounding boxes for each image.
|
142
|
+
|
143
|
+
Parameters
|
144
|
+
----------
|
145
|
+
images : Iterable[ArrayLike]
|
146
|
+
Images to perform calculations on
|
147
|
+
bboxes : Iterable[ArrayLike] or None
|
148
|
+
Bounding boxes in `xyxy` format for each image to perform calculations on
|
149
|
+
|
150
|
+
Returns
|
151
|
+
-------
|
152
|
+
ChannelStatsOutput
|
153
|
+
Output class containing the per-channel outputs of various stats functions
|
154
|
+
|
155
|
+
See Also
|
156
|
+
--------
|
157
|
+
pixelstats, visualstats
|
158
|
+
|
159
|
+
Examples
|
160
|
+
--------
|
161
|
+
Calculating the per-channel pixel and visual stats for a dataset
|
162
|
+
|
163
|
+
>>> stats = channelstats(images)
|
164
|
+
>>> print(stats.visualstats.darkness)
|
165
|
+
[0.07495 0.1748 0.275 0.1047 0.11096 0.1172 0.2047 0.2109 0.2172
|
166
|
+
0.3047 0.311 0.3171 0.4048 0.411 0.4172 0.505 0.5107 0.517
|
167
|
+
0.6045 0.611 0.617 0.7046 0.711 0.7173 0.8047 0.811 0.8174
|
168
|
+
0.905 0.911 0.917 ]
|
169
|
+
"""
|
170
|
+
outputs = run_stats(images, bboxes, True, [PixelStatsProcessor, VisualStatsProcessor])
|
171
|
+
return ChannelStatsOutput(*outputs) # type: ignore
|
@@ -11,24 +11,6 @@ from dataeval._internal.metrics.utils import get_bitdepth
|
|
11
11
|
from dataeval._internal.output import set_metadata
|
12
12
|
|
13
13
|
|
14
|
-
class DimensionStatsProcessor(StatsProcessor):
|
15
|
-
image_function_map = {
|
16
|
-
"left": lambda x: x.box[0],
|
17
|
-
"top": lambda x: x.box[1],
|
18
|
-
"width": lambda x: x.shape[-1],
|
19
|
-
"height": lambda x: x.shape[-2],
|
20
|
-
"channels": lambda x: x.shape[-3],
|
21
|
-
"size": lambda x: np.prod(x.shape[-2:]),
|
22
|
-
"aspect_ratio": lambda x: x.shape[-1] / x.shape[-2],
|
23
|
-
"depth": lambda x: get_bitdepth(x.image).depth,
|
24
|
-
"center": lambda x: np.asarray([(x.box[0] + x.box[2]) / 2, (x.box[1] + x.box[3]) / 2]),
|
25
|
-
"distance": lambda x: np.sqrt(
|
26
|
-
np.square(((x.box[0] + x.box[2]) / 2) - (x.width / 2))
|
27
|
-
+ np.square(((x.box[1] + x.box[3]) / 2) - (x.height / 2))
|
28
|
-
),
|
29
|
-
}
|
30
|
-
|
31
|
-
|
32
14
|
@dataclass(frozen=True)
|
33
15
|
class DimensionStatsOutput(BaseStatsOutput):
|
34
16
|
"""
|
@@ -70,6 +52,25 @@ class DimensionStatsOutput(BaseStatsOutput):
|
|
70
52
|
distance: NDArray[np.float16]
|
71
53
|
|
72
54
|
|
55
|
+
class DimensionStatsProcessor(StatsProcessor[DimensionStatsOutput]):
|
56
|
+
output_class = DimensionStatsOutput
|
57
|
+
image_function_map = {
|
58
|
+
"left": lambda x: x.box[0],
|
59
|
+
"top": lambda x: x.box[1],
|
60
|
+
"width": lambda x: x.box[2] - x.box[0],
|
61
|
+
"height": lambda x: x.box[3] - x.box[1],
|
62
|
+
"channels": lambda x: x.shape[-3],
|
63
|
+
"size": lambda x: (x.box[2] - x.box[0]) * (x.box[3] - x.box[1]),
|
64
|
+
"aspect_ratio": lambda x: (x.box[2] - x.box[0]) / (x.box[3] - x.box[1]),
|
65
|
+
"depth": lambda x: get_bitdepth(x.image).depth,
|
66
|
+
"center": lambda x: np.asarray([(x.box[0] + x.box[2]) / 2, (x.box[1] + x.box[3]) / 2]),
|
67
|
+
"distance": lambda x: np.sqrt(
|
68
|
+
np.square(((x.box[0] + x.box[2]) / 2) - (x.shape[-1] / 2))
|
69
|
+
+ np.square(((x.box[1] + x.box[3]) / 2) - (x.shape[-2] / 2))
|
70
|
+
),
|
71
|
+
}
|
72
|
+
|
73
|
+
|
73
74
|
@set_metadata("dataeval.metrics")
|
74
75
|
def dimensionstats(
|
75
76
|
images: Iterable[ArrayLike],
|
@@ -109,5 +110,4 @@ def dimensionstats(
|
|
109
110
|
>>> print(results.channels)
|
110
111
|
[1 1 1 1 1 1 3 1 1 3]
|
111
112
|
"""
|
112
|
-
|
113
|
-
return DimensionStatsOutput(**output)
|
113
|
+
return run_stats(images, bboxes, False, [DimensionStatsProcessor])[0]
|
@@ -10,13 +10,6 @@ from dataeval._internal.metrics.utils import pchash, xxhash
|
|
10
10
|
from dataeval._internal.output import set_metadata
|
11
11
|
|
12
12
|
|
13
|
-
class HashStatsProcessor(StatsProcessor):
|
14
|
-
image_function_map = {
|
15
|
-
"xxhash": lambda x: xxhash(x.image),
|
16
|
-
"pchash": lambda x: pchash(x.image),
|
17
|
-
}
|
18
|
-
|
19
|
-
|
20
13
|
@dataclass(frozen=True)
|
21
14
|
class HashStatsOutput(BaseStatsOutput):
|
22
15
|
"""
|
@@ -34,6 +27,14 @@ class HashStatsOutput(BaseStatsOutput):
|
|
34
27
|
pchash: list[str]
|
35
28
|
|
36
29
|
|
30
|
+
class HashStatsProcessor(StatsProcessor[HashStatsOutput]):
|
31
|
+
output_class = HashStatsOutput
|
32
|
+
image_function_map = {
|
33
|
+
"xxhash": lambda x: xxhash(x.image),
|
34
|
+
"pchash": lambda x: pchash(x.image),
|
35
|
+
}
|
36
|
+
|
37
|
+
|
37
38
|
@set_metadata("dataeval.metrics")
|
38
39
|
def hashstats(
|
39
40
|
images: Iterable[ArrayLike],
|
@@ -71,5 +72,4 @@ def hashstats(
|
|
71
72
|
>>> print(results.pchash)
|
72
73
|
['8f25506af46a7c6a', '8000808000008080', '8e71f18e0ef18e0e', 'a956d6a956d6a928']
|
73
74
|
"""
|
74
|
-
|
75
|
-
return HashStatsOutput(**output)
|
75
|
+
return run_stats(images, bboxes, False, [HashStatsProcessor])[0]
|
@@ -11,28 +11,6 @@ from dataeval._internal.metrics.stats.base import BaseStatsOutput, StatsProcesso
|
|
11
11
|
from dataeval._internal.output import set_metadata
|
12
12
|
|
13
13
|
|
14
|
-
class PixelStatsProcessor(StatsProcessor):
|
15
|
-
cache_keys = ["histogram"]
|
16
|
-
image_function_map = {
|
17
|
-
"mean": lambda self: np.mean(self.scaled),
|
18
|
-
"std": lambda x: np.std(x.scaled),
|
19
|
-
"var": lambda x: np.var(x.scaled),
|
20
|
-
"skew": lambda x: np.nan_to_num(skew(x.scaled.ravel())),
|
21
|
-
"kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled.ravel())),
|
22
|
-
"histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
|
23
|
-
"entropy": lambda x: entropy(x.get("histogram")),
|
24
|
-
}
|
25
|
-
channel_function_map = {
|
26
|
-
"mean": lambda x: np.mean(x.scaled, axis=1),
|
27
|
-
"std": lambda x: np.std(x.scaled, axis=1),
|
28
|
-
"var": lambda x: np.var(x.scaled, axis=1),
|
29
|
-
"skew": lambda x: np.nan_to_num(skew(x.scaled, axis=1)),
|
30
|
-
"kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled, axis=1)),
|
31
|
-
"histogram": lambda x: np.apply_along_axis(lambda y: np.histogram(y, 256, (0, 1))[0], 1, x.scaled),
|
32
|
-
"entropy": lambda x: entropy(x.get("histogram"), axis=1),
|
33
|
-
}
|
34
|
-
|
35
|
-
|
36
14
|
@dataclass(frozen=True)
|
37
15
|
class PixelStatsOutput(BaseStatsOutput):
|
38
16
|
"""
|
@@ -65,6 +43,29 @@ class PixelStatsOutput(BaseStatsOutput):
|
|
65
43
|
entropy: NDArray[np.float16]
|
66
44
|
|
67
45
|
|
46
|
+
class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
|
47
|
+
output_class = PixelStatsOutput
|
48
|
+
cache_keys = ["histogram"]
|
49
|
+
image_function_map = {
|
50
|
+
"mean": lambda self: np.mean(self.scaled),
|
51
|
+
"std": lambda x: np.std(x.scaled),
|
52
|
+
"var": lambda x: np.var(x.scaled),
|
53
|
+
"skew": lambda x: np.nan_to_num(skew(x.scaled.ravel())),
|
54
|
+
"kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled.ravel())),
|
55
|
+
"histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
|
56
|
+
"entropy": lambda x: entropy(x.get("histogram")),
|
57
|
+
}
|
58
|
+
channel_function_map = {
|
59
|
+
"mean": lambda x: np.mean(x.scaled, axis=1),
|
60
|
+
"std": lambda x: np.std(x.scaled, axis=1),
|
61
|
+
"var": lambda x: np.var(x.scaled, axis=1),
|
62
|
+
"skew": lambda x: np.nan_to_num(skew(x.scaled, axis=1)),
|
63
|
+
"kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled, axis=1)),
|
64
|
+
"histogram": lambda x: np.apply_along_axis(lambda y: np.histogram(y, 256, (0, 1))[0], 1, x.scaled),
|
65
|
+
"entropy": lambda x: entropy(x.get("histogram"), axis=1),
|
66
|
+
}
|
67
|
+
|
68
|
+
|
68
69
|
@set_metadata("dataeval.metrics")
|
69
70
|
def pixelstats(
|
70
71
|
images: Iterable[ArrayLike],
|
@@ -115,5 +116,4 @@ def pixelstats(
|
|
115
116
|
0.812 0.9883 0.795 0.9243 0.9243 0.795 0.9907 0.8125 1.028 0.8223
|
116
117
|
1.046 0.8247 1.041 0.8203 1.012 0.812 0.9883 0.795 0.9243 0.9243]
|
117
118
|
"""
|
118
|
-
|
119
|
-
return PixelStatsOutput(**output)
|
119
|
+
return run_stats(images, bboxes, per_channel, [PixelStatsProcessor])[0]
|
@@ -13,33 +13,6 @@ from dataeval._internal.output import set_metadata
|
|
13
13
|
QUARTILES = (0, 25, 50, 75, 100)
|
14
14
|
|
15
15
|
|
16
|
-
class VisualStatsProcessor(StatsProcessor):
|
17
|
-
cache_keys = ["percentiles"]
|
18
|
-
image_function_map = {
|
19
|
-
"brightness": lambda x: x.get("percentiles")[-2],
|
20
|
-
"blurriness": lambda x: np.std(edge_filter(np.mean(x.image, axis=0))),
|
21
|
-
"contrast": lambda x: np.nan_to_num(
|
22
|
-
(np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles"))
|
23
|
-
),
|
24
|
-
"darkness": lambda x: x.get("percentiles")[1],
|
25
|
-
"missing": lambda x: np.sum(np.isnan(x.image)) / np.prod(x.shape[-2:]),
|
26
|
-
"zeros": lambda x: np.count_nonzero(x.image == 0) / np.prod(x.shape[-2:]),
|
27
|
-
"percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
|
28
|
-
}
|
29
|
-
channel_function_map = {
|
30
|
-
"brightness": lambda x: x.get("percentiles")[:, -2],
|
31
|
-
"blurriness": lambda x: np.std(np.vectorize(edge_filter, signature="(m,n)->(m,n)")(x.image), axis=(1, 2)),
|
32
|
-
"contrast": lambda x: np.nan_to_num(
|
33
|
-
(np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
|
34
|
-
/ np.mean(x.get("percentiles"), axis=1)
|
35
|
-
),
|
36
|
-
"darkness": lambda x: x.get("percentiles")[:, 1],
|
37
|
-
"missing": lambda x: np.sum(np.isnan(x.image), axis=(1, 2)) / np.prod(x.shape[-2:]),
|
38
|
-
"zeros": lambda x: np.count_nonzero(x.image == 0, axis=(1, 2)) / np.prod(x.shape[-2:]),
|
39
|
-
"percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES, axis=1).T,
|
40
|
-
}
|
41
|
-
|
42
|
-
|
43
16
|
@dataclass(frozen=True)
|
44
17
|
class VisualStatsOutput(BaseStatsOutput):
|
45
18
|
"""
|
@@ -49,14 +22,14 @@ class VisualStatsOutput(BaseStatsOutput):
|
|
49
22
|
----------
|
50
23
|
brightness : NDArray[np.float16]
|
51
24
|
Brightness of the images
|
52
|
-
blurriness : NDArray[np.float16]
|
53
|
-
Blurriness of the images
|
54
25
|
contrast : NDArray[np.float16]
|
55
26
|
Image contrast ratio
|
56
27
|
darkness : NDArray[np.float16]
|
57
28
|
Darkness of the images
|
58
29
|
missing : NDArray[np.float16]
|
59
30
|
Percentage of the images with missing pixels
|
31
|
+
sharpness : NDArray[np.float16]
|
32
|
+
Sharpness of the images
|
60
33
|
zeros : NDArray[np.float16]
|
61
34
|
Percentage of the images with zero value pixels
|
62
35
|
percentiles : NDArray[np.float16]
|
@@ -64,14 +37,42 @@ class VisualStatsOutput(BaseStatsOutput):
|
|
64
37
|
"""
|
65
38
|
|
66
39
|
brightness: NDArray[np.float16]
|
67
|
-
blurriness: NDArray[np.float16]
|
68
40
|
contrast: NDArray[np.float16]
|
69
41
|
darkness: NDArray[np.float16]
|
70
42
|
missing: NDArray[np.float16]
|
43
|
+
sharpness: NDArray[np.float16]
|
71
44
|
zeros: NDArray[np.float16]
|
72
45
|
percentiles: NDArray[np.float16]
|
73
46
|
|
74
47
|
|
48
|
+
class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
|
49
|
+
output_class = VisualStatsOutput
|
50
|
+
cache_keys = ["percentiles"]
|
51
|
+
image_function_map = {
|
52
|
+
"brightness": lambda x: x.get("percentiles")[1],
|
53
|
+
"contrast": lambda x: np.nan_to_num(
|
54
|
+
(np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles"))
|
55
|
+
),
|
56
|
+
"darkness": lambda x: x.get("percentiles")[-2],
|
57
|
+
"missing": lambda x: np.count_nonzero(np.isnan(np.sum(x.image, axis=0))) / np.prod(x.shape[-2:]),
|
58
|
+
"sharpness": lambda x: np.std(edge_filter(np.mean(x.image, axis=0))),
|
59
|
+
"zeros": lambda x: np.count_nonzero(np.sum(x.image, axis=0) == 0) / np.prod(x.shape[-2:]),
|
60
|
+
"percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
|
61
|
+
}
|
62
|
+
channel_function_map = {
|
63
|
+
"brightness": lambda x: x.get("percentiles")[:, 1],
|
64
|
+
"contrast": lambda x: np.nan_to_num(
|
65
|
+
(np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
|
66
|
+
/ np.mean(x.get("percentiles"), axis=1)
|
67
|
+
),
|
68
|
+
"darkness": lambda x: x.get("percentiles")[:, -2],
|
69
|
+
"missing": lambda x: np.count_nonzero(np.isnan(x.image), axis=(1, 2)) / np.prod(x.shape[-2:]),
|
70
|
+
"sharpness": lambda x: np.std(np.vectorize(edge_filter, signature="(m,n)->(m,n)")(x.image), axis=(1, 2)),
|
71
|
+
"zeros": lambda x: np.count_nonzero(x.image == 0, axis=(1, 2)) / np.prod(x.shape[-2:]),
|
72
|
+
"percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES, axis=1).T,
|
73
|
+
}
|
74
|
+
|
75
|
+
|
75
76
|
@set_metadata("dataeval.metrics")
|
76
77
|
def visualstats(
|
77
78
|
images: Iterable[ArrayLike],
|
@@ -81,7 +82,7 @@ def visualstats(
|
|
81
82
|
"""
|
82
83
|
Calculates visual statistics for each image
|
83
84
|
|
84
|
-
This function computes various visual metrics (e.g., brightness, darkness, contrast,
|
85
|
+
This function computes various visual metrics (e.g., brightness, darkness, contrast, sharpness)
|
85
86
|
on the images as a whole.
|
86
87
|
|
87
88
|
Parameters
|
@@ -95,7 +96,7 @@ def visualstats(
|
|
95
96
|
-------
|
96
97
|
VisualStatsOutput
|
97
98
|
A dictionary-like object containing the computed visual statistics for each image. The keys correspond
|
98
|
-
to the names of the statistics (e.g., 'brightness', '
|
99
|
+
to the names of the statistics (e.g., 'brightness', 'sharpness'), and the values are lists of results for
|
99
100
|
each image or numpy arrays when the results are multi-dimensional.
|
100
101
|
|
101
102
|
See Also
|
@@ -112,13 +113,13 @@ def visualstats(
|
|
112
113
|
|
113
114
|
>>> results = visualstats(images)
|
114
115
|
>>> print(results.brightness)
|
115
|
-
[0.
|
116
|
-
0.
|
117
|
-
0.
|
116
|
+
[0.02246 0.5557 0.06805 0.1014 0.1348 0.1681 0.2014 0.2347 0.268
|
117
|
+
0.3015 0.3347 0.3682 0.4014 0.4348 0.468 0.5015 0.5347 0.568
|
118
|
+
0.6016 0.635 0.668 0.701 0.735 0.768 0.8013 0.835 0.868
|
119
|
+
0.9014 0.9346 0.9683 ]
|
118
120
|
>>> print(results.contrast)
|
119
121
|
[2.041 1.332 1.293 1.279 1.272 1.268 1.265 1.263 1.261 1.26 1.259 1.258
|
120
122
|
1.258 1.257 1.257 1.256 1.256 1.255 1.255 1.255 1.255 1.254 1.254 1.254
|
121
123
|
1.254 1.254 1.254 1.253 1.253 1.253]
|
122
124
|
"""
|
123
|
-
|
124
|
-
return VisualStatsOutput(**output)
|
125
|
+
return run_stats(images, bboxes, per_channel, [VisualStatsProcessor])[0]
|
@@ -11,11 +11,11 @@ from __future__ import annotations
|
|
11
11
|
from typing import Literal, cast
|
12
12
|
|
13
13
|
import tensorflow as tf
|
14
|
-
from keras.layers import Flatten
|
15
14
|
from numpy.typing import NDArray
|
16
15
|
from tensorflow_probability.python.distributions.mvn_diag import MultivariateNormalDiag
|
17
16
|
from tensorflow_probability.python.distributions.mvn_tril import MultivariateNormalTriL
|
18
17
|
from tensorflow_probability.python.stats import covariance
|
18
|
+
from tf_keras.layers import Flatten
|
19
19
|
|
20
20
|
from dataeval._internal.models.tensorflow.gmm import gmm_energy, gmm_params
|
21
21
|
|
@@ -13,9 +13,9 @@ from __future__ import annotations
|
|
13
13
|
import functools
|
14
14
|
import warnings
|
15
15
|
|
16
|
-
import keras
|
17
16
|
import numpy as np
|
18
17
|
import tensorflow as tf
|
18
|
+
import tf_keras as keras
|
19
19
|
from tensorflow_probability.python.bijectors import bijector
|
20
20
|
from tensorflow_probability.python.distributions import (
|
21
21
|
categorical,
|
@@ -11,11 +11,13 @@ from __future__ import annotations
|
|
11
11
|
import math
|
12
12
|
from typing import Callable, Union, cast
|
13
13
|
|
14
|
-
import keras as keras
|
15
14
|
import numpy as np
|
16
15
|
import tensorflow as tf
|
17
|
-
|
18
|
-
from
|
16
|
+
import tf_keras as keras
|
17
|
+
from numpy.typing import NDArray
|
18
|
+
from tensorflow._api.v2.nn import relu, softmax, tanh
|
19
|
+
from tf_keras import Sequential
|
20
|
+
from tf_keras.layers import (
|
19
21
|
Conv2D,
|
20
22
|
Conv2DTranspose,
|
21
23
|
Dense,
|
@@ -23,8 +25,6 @@ from keras.layers import (
|
|
23
25
|
InputLayer,
|
24
26
|
Reshape,
|
25
27
|
)
|
26
|
-
from numpy.typing import NDArray
|
27
|
-
from tensorflow._api.v2.nn import relu, softmax, tanh
|
28
28
|
|
29
29
|
from dataeval._internal.models.tensorflow.autoencoder import AE, AEGMM, VAE, VAEGMM
|
30
30
|
from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
|
@@ -4,7 +4,12 @@ and label statistics against the images and labels of a dataset.
|
|
4
4
|
"""
|
5
5
|
|
6
6
|
from dataeval._internal.metrics.stats.boxratiostats import boxratiostats
|
7
|
-
from dataeval._internal.metrics.stats.datasetstats import
|
7
|
+
from dataeval._internal.metrics.stats.datasetstats import (
|
8
|
+
ChannelStatsOutput,
|
9
|
+
DatasetStatsOutput,
|
10
|
+
channelstats,
|
11
|
+
datasetstats,
|
12
|
+
)
|
8
13
|
from dataeval._internal.metrics.stats.dimensionstats import DimensionStatsOutput, dimensionstats
|
9
14
|
from dataeval._internal.metrics.stats.hashstats import HashStatsOutput, hashstats
|
10
15
|
from dataeval._internal.metrics.stats.labelstats import LabelStatsOutput, labelstats
|
@@ -13,12 +18,14 @@ from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput, visu
|
|
13
18
|
|
14
19
|
__all__ = [
|
15
20
|
"boxratiostats",
|
21
|
+
"channelstats",
|
16
22
|
"datasetstats",
|
17
23
|
"dimensionstats",
|
18
24
|
"hashstats",
|
19
25
|
"labelstats",
|
20
26
|
"pixelstats",
|
21
27
|
"visualstats",
|
28
|
+
"ChannelStatsOutput",
|
22
29
|
"DatasetStatsOutput",
|
23
30
|
"DimensionStatsOutput",
|
24
31
|
"HashStatsOutput",
|
@@ -6,6 +6,12 @@ as well as constructors which allow for customization of the encoder, decoder an
|
|
6
6
|
layers used by the model.
|
7
7
|
"""
|
8
8
|
|
9
|
+
from dataeval import _IS_TENSORFLOW_AVAILABLE
|
10
|
+
|
9
11
|
from . import loss, models, recon
|
10
12
|
|
11
|
-
__all__ = [
|
13
|
+
__all__ = []
|
14
|
+
|
15
|
+
|
16
|
+
if _IS_TENSORFLOW_AVAILABLE:
|
17
|
+
__all__ = ["loss", "models", "recon"]
|
@@ -1,5 +1,9 @@
|
|
1
|
+
from dataeval import _IS_TENSORFLOW_AVAILABLE
|
1
2
|
from dataeval._internal.models.tensorflow.autoencoder import AE, AEGMM, VAE, VAEGMM
|
2
3
|
from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
|
3
4
|
from dataeval._internal.models.tensorflow.utils import create_model
|
4
5
|
|
5
|
-
__all__ = [
|
6
|
+
__all__ = []
|
7
|
+
|
8
|
+
if _IS_TENSORFLOW_AVAILABLE:
|
9
|
+
__all__ += ["create_model", "AE", "AEGMM", "PixelCNN", "VAE", "VAEGMM"]
|
dataeval/utils/torch/__init__.py
CHANGED
@@ -5,8 +5,17 @@ While these metrics can take in custom models, DataEval provides utility classes
|
|
5
5
|
to create a seamless integration between custom models and DataEval's metrics.
|
6
6
|
"""
|
7
7
|
|
8
|
+
from dataeval import _IS_TORCH_AVAILABLE, _IS_TORCHVISION_AVAILABLE
|
8
9
|
from dataeval._internal.utils import read_dataset
|
9
10
|
|
10
|
-
|
11
|
+
__all__ = []
|
11
12
|
|
12
|
-
|
13
|
+
if _IS_TORCH_AVAILABLE:
|
14
|
+
from . import models, trainer
|
15
|
+
|
16
|
+
__all__ += ["read_dataset", "models", "trainer"]
|
17
|
+
|
18
|
+
if _IS_TORCHVISION_AVAILABLE:
|
19
|
+
from . import datasets
|
20
|
+
|
21
|
+
__all__ += ["datasets"]
|
@@ -2,6 +2,11 @@
|
|
2
2
|
Provide access to common Torch datasets used for computer vision
|
3
3
|
"""
|
4
4
|
|
5
|
-
from dataeval
|
5
|
+
from dataeval import _IS_TORCHVISION_AVAILABLE
|
6
6
|
|
7
|
-
__all__ = [
|
7
|
+
__all__ = []
|
8
|
+
|
9
|
+
if _IS_TORCHVISION_AVAILABLE:
|
10
|
+
from dataeval._internal.datasets import CIFAR10, MNIST, VOCDetection
|
11
|
+
|
12
|
+
__all__ += ["CIFAR10", "MNIST", "VOCDetection"]
|
@@ -1,7 +1,11 @@
|
|
1
|
+
from dataeval import _IS_TORCH_AVAILABLE
|
1
2
|
from dataeval._internal.models.pytorch.autoencoder import (
|
2
3
|
AriaAutoencoder,
|
3
4
|
Decoder,
|
4
5
|
Encoder,
|
5
6
|
)
|
6
7
|
|
7
|
-
__all__ = [
|
8
|
+
__all__ = []
|
9
|
+
|
10
|
+
if _IS_TORCH_AVAILABLE:
|
11
|
+
__all__ += ["AriaAutoencoder", "Decoder", "Encoder"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.71.1
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
5
|
Home-page: https://dataeval.ai/
|
6
6
|
License: MIT
|
@@ -29,10 +29,12 @@ Requires-Dist: nvidia-cudnn-cu11 (>=8.6.0.163) ; extra == "tensorflow" or extra
|
|
29
29
|
Requires-Dist: pillow (>=10.3.0)
|
30
30
|
Requires-Dist: scikit-learn (>=1.5.0)
|
31
31
|
Requires-Dist: scipy (>=1.10)
|
32
|
-
Requires-Dist: tensorflow (>=2.
|
33
|
-
Requires-Dist: tensorflow_probability (>=0.
|
32
|
+
Requires-Dist: tensorflow (>=2.16) ; extra == "tensorflow" or extra == "all"
|
33
|
+
Requires-Dist: tensorflow_probability (>=0.24) ; extra == "tensorflow" or extra == "all"
|
34
|
+
Requires-Dist: tf-keras (>2.16) ; extra == "tensorflow" or extra == "all"
|
34
35
|
Requires-Dist: torch (>=2.2.0) ; extra == "torch" or extra == "all"
|
35
36
|
Requires-Dist: torchvision (>=0.17.0) ; extra == "torch" or extra == "all"
|
37
|
+
Requires-Dist: tqdm
|
36
38
|
Requires-Dist: xxhash (>=3.3)
|
37
39
|
Project-URL: Documentation, https://dataeval.readthedocs.io/
|
38
40
|
Project-URL: Repository, https://github.com/aria-ml/dataeval/
|
@@ -1,4 +1,4 @@
|
|
1
|
-
dataeval/__init__.py,sha256=
|
1
|
+
dataeval/__init__.py,sha256=Qm1rDTX_NyCAtZl2ilQ49v0j_zqnWhhVwIhe0cvrKjk,620
|
2
2
|
dataeval/_internal/datasets.py,sha256=KbXSR-vOAzFamfXHRnI9mhhqUzEPyGpK47fZsirQn1I,14638
|
3
3
|
dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
dataeval/_internal/detectors/clusterer.py,sha256=srqTHzh9kIy7Ty4VYaptwuQlBh8emFeiEAeS_mYFKro,20750
|
@@ -12,13 +12,13 @@ dataeval/_internal/detectors/drift/uncertainty.py,sha256=Ot8L42AnFbkij4J3Tis7VzX
|
|
12
12
|
dataeval/_internal/detectors/duplicates.py,sha256=wggaIl3uFxihNBQhPv5JcreZbhVaFKoMAJMv_9-aaHU,5324
|
13
13
|
dataeval/_internal/detectors/merged_stats.py,sha256=okXGrqAgsqfANMxfIjiUQlZWlaIh5TVvIB9UPsOJZ7k,1351
|
14
14
|
dataeval/_internal/detectors/ood/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
-
dataeval/_internal/detectors/ood/ae.py,sha256=
|
16
|
-
dataeval/_internal/detectors/ood/aegmm.py,sha256=
|
17
|
-
dataeval/_internal/detectors/ood/base.py,sha256=
|
18
|
-
dataeval/_internal/detectors/ood/llr.py,sha256=
|
19
|
-
dataeval/_internal/detectors/ood/vae.py,sha256=
|
20
|
-
dataeval/_internal/detectors/ood/vaegmm.py,sha256=
|
21
|
-
dataeval/_internal/detectors/outliers.py,sha256=
|
15
|
+
dataeval/_internal/detectors/ood/ae.py,sha256=OTcfvoiCdSFGaAAkejBKwwiHaHKB6sa01aW5fVBI1Bk,2152
|
16
|
+
dataeval/_internal/detectors/ood/aegmm.py,sha256=7fRcTXfyUgYfcZOaa9GpGNNxAAp4sQ9zYowfs4s4420,2530
|
17
|
+
dataeval/_internal/detectors/ood/base.py,sha256=jMMObJgPUZc2Vbql_UYNXvQAFO305TRhdVxk0YqfzJo,8573
|
18
|
+
dataeval/_internal/detectors/ood/llr.py,sha256=wzWOeyqbD0WdXHRa1Qf-_3TbJYEfT6OuTQfcepPsbTM,10235
|
19
|
+
dataeval/_internal/detectors/ood/vae.py,sha256=y_HP3tk7Clo3YG9hl-gke9_tJ4XW8x8sQlrC9ZtbVLw,3042
|
20
|
+
dataeval/_internal/detectors/ood/vaegmm.py,sha256=SvdUKC8cVyEWfEGcczRmyA4SGJhbol0eDSDry1mZxII,2959
|
21
|
+
dataeval/_internal/detectors/outliers.py,sha256=C7Iu66ze5KCCRQNc1TsqkFVKDFGfP4qjGMUv6RUpk-E,10206
|
22
22
|
dataeval/_internal/interop.py,sha256=FLXJY-5hwJcKCtruyvaarqynXCMfcLbQSFvGnrWQDPo,1338
|
23
23
|
dataeval/_internal/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24
24
|
dataeval/_internal/metrics/balance.py,sha256=8KwuR5HvytJtS1YW9KkNrCu2dPn_gP4FSbXrxQ-9kK8,6182
|
@@ -27,14 +27,14 @@ dataeval/_internal/metrics/coverage.py,sha256=jxvzWpVQrfmN3S9rpXvyb35vpRn1ovV2Id
|
|
27
27
|
dataeval/_internal/metrics/divergence.py,sha256=gKQt9rxmhW8RnODCoLgFSPnCUWEMjmNIPlCV2w6E6oU,4211
|
28
28
|
dataeval/_internal/metrics/diversity.py,sha256=ZSlq1KBvkMRVAvlznils2QmlPC73TTpHs1ux7PoFrio,7664
|
29
29
|
dataeval/_internal/metrics/parity.py,sha256=eTjltNBJOTFH6T_t9V9-1EFr_U0vqlU642o3x2RWgz0,16527
|
30
|
-
dataeval/_internal/metrics/stats/base.py,sha256=
|
30
|
+
dataeval/_internal/metrics/stats/base.py,sha256=DRTaaFVtbH1M-wLO2NrtuAAXx699vlEjP9d2no72pM4,11066
|
31
31
|
dataeval/_internal/metrics/stats/boxratiostats.py,sha256=Ac6nB41q43xHCJRDEXHNgsJF80VE8MpH8_kySxA84BE,6342
|
32
|
-
dataeval/_internal/metrics/stats/datasetstats.py,sha256=
|
33
|
-
dataeval/_internal/metrics/stats/dimensionstats.py,sha256=
|
34
|
-
dataeval/_internal/metrics/stats/hashstats.py,sha256=
|
32
|
+
dataeval/_internal/metrics/stats/datasetstats.py,sha256=1H8Njtr27oNO8Hn3pwizAlOFkVe3QpbJb-RYk4dLKkY,6201
|
33
|
+
dataeval/_internal/metrics/stats/dimensionstats.py,sha256=EIXrRia7OyB147WgAW2tqEwPMcCNWmSQidx5uQukSqQ,3915
|
34
|
+
dataeval/_internal/metrics/stats/hashstats.py,sha256=I-aX-R0Rlvjwo7A5bjq3Bqs7-utTapnXB87z9TyC12w,2088
|
35
35
|
dataeval/_internal/metrics/stats/labelstats.py,sha256=BNxI2flvKhSps2o4-TPbN9nf52ctatI2SuDZ07hah5E,4058
|
36
|
-
dataeval/_internal/metrics/stats/pixelstats.py,sha256=
|
37
|
-
dataeval/_internal/metrics/stats/visualstats.py,sha256=
|
36
|
+
dataeval/_internal/metrics/stats/pixelstats.py,sha256=_b0TdjHZwe2yj5Cdmz2IhbQP4LTnHI1qFlDgPV8fuCs,4420
|
37
|
+
dataeval/_internal/metrics/stats/visualstats.py,sha256=TdPwiehv0dY5HJmOOQk4_omfMd725NqOPG21A-q_t0I,4788
|
38
38
|
dataeval/_internal/metrics/uap.py,sha256=RumSQey6vNoz9CtOG2_Inb-TurKJrAHqwhkyWBirxhk,2128
|
39
39
|
dataeval/_internal/metrics/utils.py,sha256=vW3mQHjF0AvYlml27X5dZgd0YBk3zyBvvztLEfdRkvI,13475
|
40
40
|
dataeval/_internal/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -43,12 +43,12 @@ dataeval/_internal/models/pytorch/autoencoder.py,sha256=nPyLjLZrPNla-lMnym3fUW-O
|
|
43
43
|
dataeval/_internal/models/pytorch/blocks.py,sha256=pm2xwsDZjZJYXrhhiz8husvh2vHmrkFMSYEn-EDUD5Q,1354
|
44
44
|
dataeval/_internal/models/pytorch/utils.py,sha256=Qgwym1PxGuwxbXCKUT-8r6Iyrxqm7x94oj45Vf5_CjE,1675
|
45
45
|
dataeval/_internal/models/tensorflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
46
|
-
dataeval/_internal/models/tensorflow/autoencoder.py,sha256=
|
46
|
+
dataeval/_internal/models/tensorflow/autoencoder.py,sha256=l-3utb7Rwq6_KiqOPBHnYxR7ngYhpIDFvbvNZc5UvNI,10369
|
47
47
|
dataeval/_internal/models/tensorflow/gmm.py,sha256=QoEgbeax1GETqRmUF7A2ih9uFOZfFAjGzgH2ljExlAc,3669
|
48
|
-
dataeval/_internal/models/tensorflow/losses.py,sha256=
|
49
|
-
dataeval/_internal/models/tensorflow/pixelcnn.py,sha256=
|
50
|
-
dataeval/_internal/models/tensorflow/trainer.py,sha256=
|
51
|
-
dataeval/_internal/models/tensorflow/utils.py,sha256=
|
48
|
+
dataeval/_internal/models/tensorflow/losses.py,sha256=LavFmi9AWfE_HO4YxQ54kV8LZG5-UeCOhOlcPpxo-ic,3979
|
49
|
+
dataeval/_internal/models/tensorflow/pixelcnn.py,sha256=ru4KF8CZHKbOpp-ZgDxuRdbcv_nTCs1i1H2lTMamL7Y,48331
|
50
|
+
dataeval/_internal/models/tensorflow/trainer.py,sha256=LJ3t6Ud95cofKN-cgb5o5nDrYSFse7LSDOYIBkMgDJk,4094
|
51
|
+
dataeval/_internal/models/tensorflow/utils.py,sha256=Uq6eUTEeUHGopL1_VBH656-Ue18v6WgiEUjmk8SMsc8,8741
|
52
52
|
dataeval/_internal/output.py,sha256=qVbOi41dvfQICQ4uxysHPWBRKo1XR61kXHPL_vKOPm0,2545
|
53
53
|
dataeval/_internal/utils.py,sha256=jo6bGJZAgyuZqRpAAC4gwhAHYE12316na19ZuFwMqes,1504
|
54
54
|
dataeval/_internal/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -62,19 +62,19 @@ dataeval/detectors/ood/__init__.py,sha256=K5QrSJg2QePs6Pa3Cg80ZwXu7BELLrSlbEpTdx
|
|
62
62
|
dataeval/metrics/__init__.py,sha256=U0sRw5eiqeeDLbLPxT_rznZsvtNwONVxKVwfC0qVOgo,223
|
63
63
|
dataeval/metrics/bias/__init__.py,sha256=Wn1Ui_g-9cR4c4IS7RFKJ6UH5DLXKjEBoXTuEYPXSBc,619
|
64
64
|
dataeval/metrics/estimators/__init__.py,sha256=4VFMKLPsJdaWiflf84bXGQ2k8ertFQ4WEPhyWqjFFvE,377
|
65
|
-
dataeval/metrics/stats/__init__.py,sha256=
|
65
|
+
dataeval/metrics/stats/__init__.py,sha256=AKlNelORMOM2OA9XIvwZ9nOn6dK6k-r-69ldEAuqgLA,1156
|
66
66
|
dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
67
67
|
dataeval/utils/__init__.py,sha256=cW_5-DIZG2OFRs3FVLOz0uCv4JWdaoVO7C9rOlR7ZEA,526
|
68
|
-
dataeval/utils/tensorflow/__init__.py,sha256=
|
69
|
-
dataeval/utils/tensorflow/loss/__init__.py,sha256=
|
70
|
-
dataeval/utils/tensorflow/models/__init__.py,sha256=
|
68
|
+
dataeval/utils/tensorflow/__init__.py,sha256=sKRG3b_MLQUrAftkRZ17JyNZt6gjEguvTK83hO_IwRQ,530
|
69
|
+
dataeval/utils/tensorflow/loss/__init__.py,sha256=s7tD_5dYWcNDmntGiEHhG7bVDsMAY1UO8FpQFe9cUns,195
|
70
|
+
dataeval/utils/tensorflow/models/__init__.py,sha256=1R9Oi5DOYwT0W3JSEfoMsPOvhYFaKqKilwkrUifNnig,385
|
71
71
|
dataeval/utils/tensorflow/recon/__init__.py,sha256=xe6gAQqK9tyAoDQTtaJAxIPK1humt5QzsG_9NPsqx58,116
|
72
|
-
dataeval/utils/torch/__init__.py,sha256=
|
73
|
-
dataeval/utils/torch/datasets/__init__.py,sha256=
|
74
|
-
dataeval/utils/torch/models/__init__.py,sha256=
|
75
|
-
dataeval/utils/torch/trainer/__init__.py,sha256=
|
72
|
+
dataeval/utils/torch/__init__.py,sha256=430fNKbqLByuGSeNhnoIJy3g9Z94ckZsAKWUZ15MVP4,575
|
73
|
+
dataeval/utils/torch/datasets/__init__.py,sha256=94k7fMQdxYlQXDYouAHUgrQJ2oBwnvq4koFJpyhlUVA,292
|
74
|
+
dataeval/utils/torch/models/__init__.py,sha256=q1BzoLHWA0uBXzT2glWJDrxVA1BN7xnkT2r_d-7Dlyw,246
|
75
|
+
dataeval/utils/torch/trainer/__init__.py,sha256=hpcrlCCXPzb8b7FOzEAKqFy6Z7Zl4V_cx3yA7n3L1L4,177
|
76
76
|
dataeval/workflows/__init__.py,sha256=VFeJyMhZxvj8WnU5Un32mwO8lNfBQOBjD9IdOqexnAE,320
|
77
|
-
dataeval-0.
|
78
|
-
dataeval-0.
|
79
|
-
dataeval-0.
|
80
|
-
dataeval-0.
|
77
|
+
dataeval-0.71.1.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
|
78
|
+
dataeval-0.71.1.dist-info/METADATA,sha256=j1HnzvkOSLR-D6debnFFLlp2vgue8ueFQvjYlrX75cw,4580
|
79
|
+
dataeval-0.71.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
80
|
+
dataeval-0.71.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|