dataeval 0.70.1__py3-none-any.whl → 0.71.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. dataeval/__init__.py +2 -1
  2. dataeval/_internal/detectors/ood/ae.py +1 -1
  3. dataeval/_internal/detectors/ood/aegmm.py +1 -1
  4. dataeval/_internal/detectors/ood/base.py +1 -1
  5. dataeval/_internal/detectors/ood/llr.py +3 -3
  6. dataeval/_internal/detectors/ood/vae.py +1 -1
  7. dataeval/_internal/detectors/ood/vaegmm.py +1 -1
  8. dataeval/_internal/detectors/outliers.py +3 -8
  9. dataeval/_internal/metrics/stats/base.py +103 -44
  10. dataeval/_internal/metrics/stats/datasetstats.py +94 -22
  11. dataeval/_internal/metrics/stats/dimensionstats.py +20 -20
  12. dataeval/_internal/metrics/stats/hashstats.py +9 -9
  13. dataeval/_internal/metrics/stats/pixelstats.py +24 -24
  14. dataeval/_internal/metrics/stats/visualstats.py +38 -37
  15. dataeval/_internal/models/tensorflow/autoencoder.py +2 -2
  16. dataeval/_internal/models/tensorflow/losses.py +1 -1
  17. dataeval/_internal/models/tensorflow/pixelcnn.py +1 -1
  18. dataeval/_internal/models/tensorflow/trainer.py +1 -1
  19. dataeval/_internal/models/tensorflow/utils.py +5 -5
  20. dataeval/metrics/stats/__init__.py +8 -1
  21. dataeval/utils/tensorflow/__init__.py +7 -1
  22. dataeval/utils/tensorflow/loss/__init__.py +5 -1
  23. dataeval/utils/tensorflow/models/__init__.py +5 -1
  24. dataeval/utils/torch/__init__.py +11 -2
  25. dataeval/utils/torch/datasets/__init__.py +7 -2
  26. dataeval/utils/torch/models/__init__.py +5 -1
  27. dataeval/utils/torch/trainer/__init__.py +5 -1
  28. {dataeval-0.70.1.dist-info → dataeval-0.71.1.dist-info}/METADATA +5 -3
  29. {dataeval-0.70.1.dist-info → dataeval-0.71.1.dist-info}/RECORD +31 -31
  30. {dataeval-0.70.1.dist-info → dataeval-0.71.1.dist-info}/LICENSE.txt +0 -0
  31. {dataeval-0.70.1.dist-info → dataeval-0.71.1.dist-info}/WHEEL +0 -0
dataeval/__init__.py CHANGED
@@ -1,8 +1,9 @@
1
- __version__ = "0.70.1"
1
+ __version__ = "0.71.1"
2
2
 
3
3
  from importlib.util import find_spec
4
4
 
5
5
  _IS_TORCH_AVAILABLE = find_spec("torch") is not None
6
+ _IS_TORCHVISION_AVAILABLE = find_spec("torchvision") is not None
6
7
  _IS_TENSORFLOW_AVAILABLE = find_spec("tensorflow") is not None and find_spec("tensorflow_probability") is not None
7
8
 
8
9
  del find_spec
@@ -10,9 +10,9 @@ from __future__ import annotations
10
10
 
11
11
  from typing import Callable
12
12
 
13
- import keras
14
13
  import numpy as np
15
14
  import tensorflow as tf
15
+ import tf_keras as keras
16
16
  from numpy.typing import ArrayLike
17
17
 
18
18
  from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
@@ -10,8 +10,8 @@ from __future__ import annotations
10
10
 
11
11
  from typing import Callable
12
12
 
13
- import keras
14
13
  import tensorflow as tf
14
+ import tf_keras as keras
15
15
  from numpy.typing import ArrayLike
16
16
 
17
17
  from dataeval._internal.detectors.ood.base import OODGMMBase, OODScoreOutput
@@ -12,9 +12,9 @@ from abc import ABC, abstractmethod
12
12
  from dataclasses import dataclass
13
13
  from typing import Callable, Literal, cast
14
14
 
15
- import keras
16
15
  import numpy as np
17
16
  import tensorflow as tf
17
+ import tf_keras as keras
18
18
  from numpy.typing import ArrayLike, NDArray
19
19
 
20
20
  from dataeval._internal.interop import to_numpy
@@ -11,12 +11,12 @@ from __future__ import annotations
11
11
  from functools import partial
12
12
  from typing import Callable
13
13
 
14
- import keras
15
14
  import numpy as np
16
15
  import tensorflow as tf
17
- from keras.layers import Input
18
- from keras.models import Model
16
+ import tf_keras as keras
19
17
  from numpy.typing import ArrayLike, NDArray
18
+ from tf_keras.layers import Input
19
+ from tf_keras.models import Model
20
20
 
21
21
  from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
22
22
  from dataeval._internal.interop import to_numpy
@@ -10,9 +10,9 @@ from __future__ import annotations
10
10
 
11
11
  from typing import Callable
12
12
 
13
- import keras
14
13
  import numpy as np
15
14
  import tensorflow as tf
15
+ import tf_keras as keras
16
16
  from numpy.typing import ArrayLike
17
17
 
18
18
  from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
@@ -10,9 +10,9 @@ from __future__ import annotations
10
10
 
11
11
  from typing import Callable
12
12
 
13
- import keras
14
13
  import numpy as np
15
14
  import tensorflow as tf
15
+ import tf_keras as keras
16
16
  from numpy.typing import ArrayLike
17
17
 
18
18
  from dataeval._internal.detectors.ood.base import OODGMMBase, OODScoreOutput
@@ -147,7 +147,7 @@ class Outliers:
147
147
  mask = _get_outlier_mask(values.astype(np.float64), self.outlier_method, self.outlier_threshold)
148
148
  indices = np.flatnonzero(mask)
149
149
  for i, value in zip(indices, values[mask]):
150
- flagged_images.setdefault(i, {}).update({stat: value})
150
+ flagged_images.setdefault(int(i), {}).update({stat: value})
151
151
 
152
152
  return dict(sorted(flagged_images.items()))
153
153
 
@@ -261,11 +261,6 @@ class Outliers:
261
261
  >>> results.issues[10]
262
262
  {'skew': -3.906, 'kurtosis': 13.266, 'entropy': 0.2128, 'contrast': 1.25, 'zeros': 0.05493}
263
263
  """
264
- self.stats = datasetstats(
265
- images=data,
266
- use_dimension=self.use_dimension,
267
- use_pixel=self.use_pixel,
268
- use_visual=self.use_visual,
269
- )
270
- outliers = self._get_outliers({k: v for o in self.stats.outputs() for k, v in o.dict().items()})
264
+ self.stats = datasetstats(images=data)
265
+ outliers = self._get_outliers(self.stats.dict())
271
266
  return OutliersOutput(outliers)
@@ -3,9 +3,13 @@ from __future__ import annotations
3
3
  import re
4
4
  import warnings
5
5
  from dataclasses import dataclass
6
- from typing import Any, Callable, Iterable, NamedTuple, Optional, Union
6
+ from functools import partial
7
+ from itertools import repeat
8
+ from multiprocessing import Pool
9
+ from typing import Any, Callable, Generic, Iterable, NamedTuple, Optional, TypeVar, Union
7
10
 
8
11
  import numpy as np
12
+ import tqdm
9
13
  from numpy.typing import ArrayLike, NDArray
10
14
 
11
15
  from dataeval._internal.interop import to_numpy_iter
@@ -91,7 +95,11 @@ class BaseStatsOutput(OutputMetadata):
91
95
  return len(self.source_index)
92
96
 
93
97
 
94
- class StatsProcessor:
98
+ TStatsOutput = TypeVar("TStatsOutput", bound=BaseStatsOutput, covariant=True)
99
+
100
+
101
+ class StatsProcessor(Generic[TStatsOutput]):
102
+ output_class: type[TStatsOutput]
95
103
  cache_keys: list[str] = []
96
104
  image_function_map: dict[str, Callable[[StatsProcessor], Any]] = {}
97
105
  channel_function_map: dict[str, Callable[[StatsProcessor], Any]] = {}
@@ -119,6 +127,9 @@ class StatsProcessor:
119
127
  else:
120
128
  return self.fn_map[fn_key](self)
121
129
 
130
+ def process(self) -> dict:
131
+ return {k: self.fn_map[k](self) for k in self.fn_map}
132
+
122
133
  @property
123
134
  def image(self) -> NDArray:
124
135
  if self._image is None:
@@ -143,14 +154,66 @@ class StatsProcessor:
143
154
  self._scaled = self._scaled.reshape(self.image.shape[0], -1)
144
155
  return self._scaled
145
156
 
157
+ @classmethod
158
+ def convert_output(
159
+ cls, source: dict[str, Any], source_index: list[SourceIndex], box_count: list[int]
160
+ ) -> TStatsOutput:
161
+ output = {}
162
+ for key in source:
163
+ if key not in cls.output_class.__annotations__:
164
+ continue
165
+ stat_type: str = cls.output_class.__annotations__[key]
166
+ dtype_match = re.match(DTYPE_REGEX, stat_type)
167
+ if dtype_match is not None:
168
+ output[key] = np.asarray(source[key], dtype=np.dtype(dtype_match.group(1)))
169
+ else:
170
+ output[key] = source[key]
171
+ return cls.output_class(**output, source_index=source_index, box_count=np.asarray(box_count, dtype=np.uint16))
172
+
173
+
174
+ class StatsProcessorOutput(NamedTuple):
175
+ results: list[dict[str, Any]]
176
+ source_indices: list[SourceIndex]
177
+ box_counts: list[int]
178
+ warnings_list: list[tuple[int, int, NDArray, tuple[int, ...]]]
179
+
180
+
181
+ def process_stats(
182
+ i: int,
183
+ image_boxes: tuple[NDArray, NDArray | None],
184
+ per_channel: bool,
185
+ stats_processor_cls: Iterable[type[StatsProcessor]],
186
+ ) -> StatsProcessorOutput:
187
+ image, boxes = image_boxes
188
+ results_list: list[dict[str, Any]] = []
189
+ source_indices: list[SourceIndex] = []
190
+ box_counts: list[int] = []
191
+ warnings_list: list[tuple[int, int, NDArray, tuple[int, ...]]] = []
192
+ nboxes = [None] if boxes is None else normalize_box_shape(boxes)
193
+ for i_b, box in enumerate(nboxes):
194
+ i_b = None if box is None else i_b
195
+ processor_list = [p(image, box, per_channel) for p in stats_processor_cls]
196
+ if any(not p.is_valid_slice for p in processor_list) and i_b is not None and box is not None:
197
+ warnings_list.append((i, i_b, box, image.shape))
198
+ results_list.append({k: v for p in processor_list for k, v in p.process().items()})
199
+ if per_channel:
200
+ source_indices.extend([SourceIndex(i, i_b, c) for c in range(image_boxes[0].shape[-3])])
201
+ else:
202
+ source_indices.append(SourceIndex(i, i_b, None))
203
+ box_counts.append(0 if boxes is None else len(boxes))
204
+ return StatsProcessorOutput(results_list, source_indices, box_counts, warnings_list)
205
+
206
+
207
+ def process_stats_unpack(args, per_channel: bool, stats_processor_cls: Iterable[type[StatsProcessor]]):
208
+ return process_stats(*args, per_channel=per_channel, stats_processor_cls=stats_processor_cls)
209
+
146
210
 
147
211
  def run_stats(
148
212
  images: Iterable[ArrayLike],
149
213
  bboxes: Iterable[ArrayLike] | None,
150
214
  per_channel: bool,
151
- stats_processor_cls: type,
152
- output_cls: type,
153
- ) -> dict:
215
+ stats_processor_cls: Iterable[type[StatsProcessor[TStatsOutput]]],
216
+ ) -> list[TStatsOutput]:
154
217
  """
155
218
  Compute specified statistics on a set of images.
156
219
 
@@ -169,15 +232,13 @@ def run_stats(
169
232
  iterable should match the length of the input images.
170
233
  per_channel : bool
171
234
  A flag which determines if the states should be evaluated on a per-channel basis or not.
172
- output_cls : type
173
- The output class for which stats values will be calculated.
235
+ stats_processor_cls : Iterable[type[StatsProcessor]]
236
+ An iterable of stats processor classes that calculate stats and return output classes.
174
237
 
175
238
  Returns
176
239
  -------
177
- dict[str, NDArray]]
178
- A dictionary containing the computed statistics for each image.
179
- The dictionary keys correspond to the names of the statistics, and the values are NumPy arrays
180
- with the results of the computations.
240
+ list[TStatsOutput]
241
+ A list of output classes corresponding to the input processor types.
181
242
 
182
243
  Note
183
244
  ----
@@ -189,43 +250,41 @@ def run_stats(
189
250
  be reused to avoid redundant computation.
190
251
  """
191
252
  results_list: list[dict[str, NDArray]] = []
192
- output_list = list(output_cls.__annotations__)
193
253
  source_index = []
194
254
  box_count = []
195
- bbox_iter = (None for _ in images) if bboxes is None else to_numpy_iter(bboxes)
196
-
197
- for i, (boxes, image) in enumerate(zip(bbox_iter, to_numpy_iter(images))):
198
- nboxes = [None] if boxes is None else normalize_box_shape(boxes)
199
- for i_b, box in enumerate(nboxes):
200
- i_b = None if box is None else i_b
201
- processor: StatsProcessor = stats_processor_cls(image, box, per_channel)
202
- if not processor.is_valid_slice:
203
- warnings.warn(f"Bounding box {i_b}: {box} is out of bounds of image {i}: {image.shape}.")
204
- results_list.append({stat: processor.get(stat) for stat in output_list})
205
- if per_channel:
206
- source_index.extend([SourceIndex(i, i_b, c) for c in range(image.shape[-3])])
207
- else:
208
- source_index.append(SourceIndex(i, i_b, None))
209
- box_count.append(0 if boxes is None else len(boxes))
255
+ bbox_iter = repeat(None) if bboxes is None else to_numpy_iter(bboxes)
256
+
257
+ warning_list = []
258
+ total_for_status = getattr(images, "__len__")() if hasattr(images, "__len__") else None
259
+ stats_processor_cls = stats_processor_cls if isinstance(stats_processor_cls, Iterable) else [stats_processor_cls]
260
+
261
+ # TODO: Introduce global controls for CPU job parallelism and GPU configurations
262
+ with Pool(16) as p:
263
+ for r in tqdm.tqdm(
264
+ p.imap(
265
+ partial(process_stats_unpack, per_channel=per_channel, stats_processor_cls=stats_processor_cls),
266
+ enumerate(zip(to_numpy_iter(images), bbox_iter)),
267
+ ),
268
+ total=total_for_status,
269
+ ):
270
+ results_list.extend(r.results)
271
+ source_index.extend(r.source_indices)
272
+ box_count.extend(r.box_counts)
273
+ warning_list.extend(r.warnings_list)
274
+ p.close()
275
+ p.join()
276
+
277
+ # warnings are not emitted while in multiprocessing pools so we emit after gathering all warnings
278
+ for w in warning_list:
279
+ warnings.warn(f"Bounding box [{w[0]}][{w[1]}]: {w[2]} is out of bounds of {w[3]}.", UserWarning)
210
280
 
211
281
  output = {}
212
- if per_channel:
213
- for i, results in enumerate(results_list):
214
- for stat, result in results.items():
282
+ for results in results_list:
283
+ for stat, result in results.items():
284
+ if per_channel:
215
285
  output.setdefault(stat, []).extend(result.tolist())
216
- else:
217
- for results in results_list:
218
- for stat, result in results.items():
286
+ else:
219
287
  output.setdefault(stat, []).append(result.tolist() if isinstance(result, np.ndarray) else result)
220
288
 
221
- for stat in output:
222
- stat_type: str = output_cls.__annotations__[stat]
223
-
224
- dtype_match = re.match(DTYPE_REGEX, stat_type)
225
- if dtype_match is not None:
226
- output[stat] = np.asarray(output[stat], dtype=np.dtype(dtype_match.group(1)))
227
-
228
- output[SOURCE_INDEX] = source_index
229
- output[BOX_COUNT] = np.asarray(box_count, dtype=np.uint16)
230
-
231
- return output
289
+ outputs = [s.convert_output(output, source_index, box_count) for s in stats_processor_cls]
290
+ return outputs
@@ -1,15 +1,18 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from dataclasses import dataclass
4
- from typing import Iterable
4
+ from typing import Any, Iterable
5
5
 
6
6
  from numpy.typing import ArrayLike
7
7
 
8
- from dataeval._internal.metrics.stats.base import BaseStatsOutput
9
- from dataeval._internal.metrics.stats.dimensionstats import DimensionStatsOutput, dimensionstats
8
+ from dataeval._internal.metrics.stats.base import BaseStatsOutput, run_stats
9
+ from dataeval._internal.metrics.stats.dimensionstats import (
10
+ DimensionStatsOutput,
11
+ DimensionStatsProcessor,
12
+ )
10
13
  from dataeval._internal.metrics.stats.labelstats import LabelStatsOutput, labelstats
11
- from dataeval._internal.metrics.stats.pixelstats import PixelStatsOutput, pixelstats
12
- from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput, visualstats
14
+ from dataeval._internal.metrics.stats.pixelstats import PixelStatsOutput, PixelStatsProcessor
15
+ from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput, VisualStatsProcessor
13
16
  from dataeval._internal.output import OutputMetadata, set_metadata
14
17
 
15
18
 
@@ -25,19 +28,53 @@ class DatasetStatsOutput(OutputMetadata):
25
28
 
26
29
  Attributes
27
30
  ----------
28
- dimensionstats : DimensionStatsOutput or None
29
- pixelstats: PixelStatsOutput or None
30
- visualstats: VisualStatsOutput or None
31
- labelstats: LabelStatsOutput or None, default None
31
+ dimensionstats : DimensionStatsOutput
32
+ pixelstats: PixelStatsOutput
33
+ visualstats: VisualStatsOutput
34
+ labelstats: LabelStatsOutput or None
32
35
  """
33
36
 
34
- dimensionstats: DimensionStatsOutput | None
35
- pixelstats: PixelStatsOutput | None
36
- visualstats: VisualStatsOutput | None
37
+ dimensionstats: DimensionStatsOutput
38
+ pixelstats: PixelStatsOutput
39
+ visualstats: VisualStatsOutput
37
40
  labelstats: LabelStatsOutput | None = None
38
41
 
42
+ def outputs(self) -> list[OutputMetadata]:
43
+ return [s for s in (self.dimensionstats, self.pixelstats, self.visualstats, self.labelstats) if s is not None]
44
+
45
+ def dict(self) -> dict[str, Any]:
46
+ return {k: v for o in self.outputs() for k, v in o.dict().items()}
47
+
48
+ def __post_init__(self):
49
+ lengths = [len(s) for s in self.outputs() if isinstance(s, BaseStatsOutput)]
50
+ if not all(length == lengths[0] for length in lengths):
51
+ raise ValueError("All StatsOutput classes must contain the same number of image sources.")
52
+
53
+
54
+ @dataclass(frozen=True)
55
+ class ChannelStatsOutput(OutputMetadata):
56
+ """
57
+ Output class for :func:`channelstats` stats metric
58
+
59
+ This class represents the outputs of various per-channel stats functions against
60
+ a single dataset, such that each index across all stat outputs are representative
61
+ of the same source image. Modifying or mixing outputs will result in inaccurate
62
+ outlier calculations if not created correctly.
63
+
64
+ Attributes
65
+ ----------
66
+ pixelstats: PixelStatsOutput
67
+ visualstats: VisualStatsOutput
68
+ """
69
+
70
+ pixelstats: PixelStatsOutput
71
+ visualstats: VisualStatsOutput
72
+
39
73
  def outputs(self) -> list[BaseStatsOutput]:
40
- return [s for s in (self.dimensionstats, self.pixelstats, self.visualstats) if s is not None]
74
+ return [self.pixelstats, self.visualstats]
75
+
76
+ def dict(self) -> dict[str, Any]:
77
+ return {**self.pixelstats.dict(), **self.visualstats.dict()}
41
78
 
42
79
  def __post_init__(self):
43
80
  lengths = [len(s) for s in self.outputs()]
@@ -50,9 +87,6 @@ def datasetstats(
50
87
  images: Iterable[ArrayLike],
51
88
  bboxes: Iterable[ArrayLike] | None = None,
52
89
  labels: Iterable[ArrayLike] | None = None,
53
- use_dimension: bool = True,
54
- use_pixel: bool = True,
55
- use_visual: bool = True,
56
90
  ) -> DatasetStatsOutput:
57
91
  """
58
92
  Calculates various statistics for each image
@@ -91,9 +125,47 @@ def datasetstats(
91
125
  [1.744 1.946 0.1164 0.0635 0.0633 0.06274 0.0429 0.0317 0.0317
92
126
  0.02576 0.02081 0.02171 0.01915 0.01767 0.01799 0.01595 0.01433 0.01478]
93
127
  """
94
- return DatasetStatsOutput(
95
- dimensionstats(images, bboxes) if use_dimension else None,
96
- pixelstats(images, bboxes) if use_pixel else None,
97
- visualstats(images, bboxes) if use_visual else None,
98
- labelstats(labels) if labels else None,
99
- )
128
+ outputs = run_stats(images, bboxes, False, [DimensionStatsProcessor, PixelStatsProcessor, VisualStatsProcessor])
129
+ return DatasetStatsOutput(*outputs, labelstats=labelstats(labels) if labels else None) # type: ignore
130
+
131
+
132
+ @set_metadata("dataeval.metrics")
133
+ def channelstats(
134
+ images: Iterable[ArrayLike],
135
+ bboxes: Iterable[ArrayLike] | None = None,
136
+ ) -> ChannelStatsOutput:
137
+ """
138
+ Calculates various per-channel statistics for each image
139
+
140
+ This function computes pixel and visual metrics on the images
141
+ or individual bounding boxes for each image.
142
+
143
+ Parameters
144
+ ----------
145
+ images : Iterable[ArrayLike]
146
+ Images to perform calculations on
147
+ bboxes : Iterable[ArrayLike] or None
148
+ Bounding boxes in `xyxy` format for each image to perform calculations on
149
+
150
+ Returns
151
+ -------
152
+ ChannelStatsOutput
153
+ Output class containing the per-channel outputs of various stats functions
154
+
155
+ See Also
156
+ --------
157
+ pixelstats, visualstats
158
+
159
+ Examples
160
+ --------
161
+ Calculating the per-channel pixel and visual stats for a dataset
162
+
163
+ >>> stats = channelstats(images)
164
+ >>> print(stats.visualstats.darkness)
165
+ [0.07495 0.1748 0.275 0.1047 0.11096 0.1172 0.2047 0.2109 0.2172
166
+ 0.3047 0.311 0.3171 0.4048 0.411 0.4172 0.505 0.5107 0.517
167
+ 0.6045 0.611 0.617 0.7046 0.711 0.7173 0.8047 0.811 0.8174
168
+ 0.905 0.911 0.917 ]
169
+ """
170
+ outputs = run_stats(images, bboxes, True, [PixelStatsProcessor, VisualStatsProcessor])
171
+ return ChannelStatsOutput(*outputs) # type: ignore
@@ -11,24 +11,6 @@ from dataeval._internal.metrics.utils import get_bitdepth
11
11
  from dataeval._internal.output import set_metadata
12
12
 
13
13
 
14
- class DimensionStatsProcessor(StatsProcessor):
15
- image_function_map = {
16
- "left": lambda x: x.box[0],
17
- "top": lambda x: x.box[1],
18
- "width": lambda x: x.shape[-1],
19
- "height": lambda x: x.shape[-2],
20
- "channels": lambda x: x.shape[-3],
21
- "size": lambda x: np.prod(x.shape[-2:]),
22
- "aspect_ratio": lambda x: x.shape[-1] / x.shape[-2],
23
- "depth": lambda x: get_bitdepth(x.image).depth,
24
- "center": lambda x: np.asarray([(x.box[0] + x.box[2]) / 2, (x.box[1] + x.box[3]) / 2]),
25
- "distance": lambda x: np.sqrt(
26
- np.square(((x.box[0] + x.box[2]) / 2) - (x.width / 2))
27
- + np.square(((x.box[1] + x.box[3]) / 2) - (x.height / 2))
28
- ),
29
- }
30
-
31
-
32
14
  @dataclass(frozen=True)
33
15
  class DimensionStatsOutput(BaseStatsOutput):
34
16
  """
@@ -70,6 +52,25 @@ class DimensionStatsOutput(BaseStatsOutput):
70
52
  distance: NDArray[np.float16]
71
53
 
72
54
 
55
+ class DimensionStatsProcessor(StatsProcessor[DimensionStatsOutput]):
56
+ output_class = DimensionStatsOutput
57
+ image_function_map = {
58
+ "left": lambda x: x.box[0],
59
+ "top": lambda x: x.box[1],
60
+ "width": lambda x: x.box[2] - x.box[0],
61
+ "height": lambda x: x.box[3] - x.box[1],
62
+ "channels": lambda x: x.shape[-3],
63
+ "size": lambda x: (x.box[2] - x.box[0]) * (x.box[3] - x.box[1]),
64
+ "aspect_ratio": lambda x: (x.box[2] - x.box[0]) / (x.box[3] - x.box[1]),
65
+ "depth": lambda x: get_bitdepth(x.image).depth,
66
+ "center": lambda x: np.asarray([(x.box[0] + x.box[2]) / 2, (x.box[1] + x.box[3]) / 2]),
67
+ "distance": lambda x: np.sqrt(
68
+ np.square(((x.box[0] + x.box[2]) / 2) - (x.shape[-1] / 2))
69
+ + np.square(((x.box[1] + x.box[3]) / 2) - (x.shape[-2] / 2))
70
+ ),
71
+ }
72
+
73
+
73
74
  @set_metadata("dataeval.metrics")
74
75
  def dimensionstats(
75
76
  images: Iterable[ArrayLike],
@@ -109,5 +110,4 @@ def dimensionstats(
109
110
  >>> print(results.channels)
110
111
  [1 1 1 1 1 1 3 1 1 3]
111
112
  """
112
- output = run_stats(images, bboxes, False, DimensionStatsProcessor, DimensionStatsOutput)
113
- return DimensionStatsOutput(**output)
113
+ return run_stats(images, bboxes, False, [DimensionStatsProcessor])[0]
@@ -10,13 +10,6 @@ from dataeval._internal.metrics.utils import pchash, xxhash
10
10
  from dataeval._internal.output import set_metadata
11
11
 
12
12
 
13
- class HashStatsProcessor(StatsProcessor):
14
- image_function_map = {
15
- "xxhash": lambda x: xxhash(x.image),
16
- "pchash": lambda x: pchash(x.image),
17
- }
18
-
19
-
20
13
  @dataclass(frozen=True)
21
14
  class HashStatsOutput(BaseStatsOutput):
22
15
  """
@@ -34,6 +27,14 @@ class HashStatsOutput(BaseStatsOutput):
34
27
  pchash: list[str]
35
28
 
36
29
 
30
+ class HashStatsProcessor(StatsProcessor[HashStatsOutput]):
31
+ output_class = HashStatsOutput
32
+ image_function_map = {
33
+ "xxhash": lambda x: xxhash(x.image),
34
+ "pchash": lambda x: pchash(x.image),
35
+ }
36
+
37
+
37
38
  @set_metadata("dataeval.metrics")
38
39
  def hashstats(
39
40
  images: Iterable[ArrayLike],
@@ -71,5 +72,4 @@ def hashstats(
71
72
  >>> print(results.pchash)
72
73
  ['8f25506af46a7c6a', '8000808000008080', '8e71f18e0ef18e0e', 'a956d6a956d6a928']
73
74
  """
74
- output = run_stats(images, bboxes, False, HashStatsProcessor, HashStatsOutput)
75
- return HashStatsOutput(**output)
75
+ return run_stats(images, bboxes, False, [HashStatsProcessor])[0]
@@ -11,28 +11,6 @@ from dataeval._internal.metrics.stats.base import BaseStatsOutput, StatsProcesso
11
11
  from dataeval._internal.output import set_metadata
12
12
 
13
13
 
14
- class PixelStatsProcessor(StatsProcessor):
15
- cache_keys = ["histogram"]
16
- image_function_map = {
17
- "mean": lambda self: np.mean(self.scaled),
18
- "std": lambda x: np.std(x.scaled),
19
- "var": lambda x: np.var(x.scaled),
20
- "skew": lambda x: np.nan_to_num(skew(x.scaled.ravel())),
21
- "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled.ravel())),
22
- "histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
23
- "entropy": lambda x: entropy(x.get("histogram")),
24
- }
25
- channel_function_map = {
26
- "mean": lambda x: np.mean(x.scaled, axis=1),
27
- "std": lambda x: np.std(x.scaled, axis=1),
28
- "var": lambda x: np.var(x.scaled, axis=1),
29
- "skew": lambda x: np.nan_to_num(skew(x.scaled, axis=1)),
30
- "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled, axis=1)),
31
- "histogram": lambda x: np.apply_along_axis(lambda y: np.histogram(y, 256, (0, 1))[0], 1, x.scaled),
32
- "entropy": lambda x: entropy(x.get("histogram"), axis=1),
33
- }
34
-
35
-
36
14
  @dataclass(frozen=True)
37
15
  class PixelStatsOutput(BaseStatsOutput):
38
16
  """
@@ -65,6 +43,29 @@ class PixelStatsOutput(BaseStatsOutput):
65
43
  entropy: NDArray[np.float16]
66
44
 
67
45
 
46
+ class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
47
+ output_class = PixelStatsOutput
48
+ cache_keys = ["histogram"]
49
+ image_function_map = {
50
+ "mean": lambda self: np.mean(self.scaled),
51
+ "std": lambda x: np.std(x.scaled),
52
+ "var": lambda x: np.var(x.scaled),
53
+ "skew": lambda x: np.nan_to_num(skew(x.scaled.ravel())),
54
+ "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled.ravel())),
55
+ "histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
56
+ "entropy": lambda x: entropy(x.get("histogram")),
57
+ }
58
+ channel_function_map = {
59
+ "mean": lambda x: np.mean(x.scaled, axis=1),
60
+ "std": lambda x: np.std(x.scaled, axis=1),
61
+ "var": lambda x: np.var(x.scaled, axis=1),
62
+ "skew": lambda x: np.nan_to_num(skew(x.scaled, axis=1)),
63
+ "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled, axis=1)),
64
+ "histogram": lambda x: np.apply_along_axis(lambda y: np.histogram(y, 256, (0, 1))[0], 1, x.scaled),
65
+ "entropy": lambda x: entropy(x.get("histogram"), axis=1),
66
+ }
67
+
68
+
68
69
  @set_metadata("dataeval.metrics")
69
70
  def pixelstats(
70
71
  images: Iterable[ArrayLike],
@@ -115,5 +116,4 @@ def pixelstats(
115
116
  0.812 0.9883 0.795 0.9243 0.9243 0.795 0.9907 0.8125 1.028 0.8223
116
117
  1.046 0.8247 1.041 0.8203 1.012 0.812 0.9883 0.795 0.9243 0.9243]
117
118
  """
118
- output = run_stats(images, bboxes, per_channel, PixelStatsProcessor, PixelStatsOutput)
119
- return PixelStatsOutput(**output)
119
+ return run_stats(images, bboxes, per_channel, [PixelStatsProcessor])[0]
@@ -13,33 +13,6 @@ from dataeval._internal.output import set_metadata
13
13
  QUARTILES = (0, 25, 50, 75, 100)
14
14
 
15
15
 
16
- class VisualStatsProcessor(StatsProcessor):
17
- cache_keys = ["percentiles"]
18
- image_function_map = {
19
- "brightness": lambda x: x.get("percentiles")[-2],
20
- "blurriness": lambda x: np.std(edge_filter(np.mean(x.image, axis=0))),
21
- "contrast": lambda x: np.nan_to_num(
22
- (np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles"))
23
- ),
24
- "darkness": lambda x: x.get("percentiles")[1],
25
- "missing": lambda x: np.sum(np.isnan(x.image)) / np.prod(x.shape[-2:]),
26
- "zeros": lambda x: np.count_nonzero(x.image == 0) / np.prod(x.shape[-2:]),
27
- "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
28
- }
29
- channel_function_map = {
30
- "brightness": lambda x: x.get("percentiles")[:, -2],
31
- "blurriness": lambda x: np.std(np.vectorize(edge_filter, signature="(m,n)->(m,n)")(x.image), axis=(1, 2)),
32
- "contrast": lambda x: np.nan_to_num(
33
- (np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
34
- / np.mean(x.get("percentiles"), axis=1)
35
- ),
36
- "darkness": lambda x: x.get("percentiles")[:, 1],
37
- "missing": lambda x: np.sum(np.isnan(x.image), axis=(1, 2)) / np.prod(x.shape[-2:]),
38
- "zeros": lambda x: np.count_nonzero(x.image == 0, axis=(1, 2)) / np.prod(x.shape[-2:]),
39
- "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES, axis=1).T,
40
- }
41
-
42
-
43
16
  @dataclass(frozen=True)
44
17
  class VisualStatsOutput(BaseStatsOutput):
45
18
  """
@@ -49,14 +22,14 @@ class VisualStatsOutput(BaseStatsOutput):
49
22
  ----------
50
23
  brightness : NDArray[np.float16]
51
24
  Brightness of the images
52
- blurriness : NDArray[np.float16]
53
- Blurriness of the images
54
25
  contrast : NDArray[np.float16]
55
26
  Image contrast ratio
56
27
  darkness : NDArray[np.float16]
57
28
  Darkness of the images
58
29
  missing : NDArray[np.float16]
59
30
  Percentage of the images with missing pixels
31
+ sharpness : NDArray[np.float16]
32
+ Sharpness of the images
60
33
  zeros : NDArray[np.float16]
61
34
  Percentage of the images with zero value pixels
62
35
  percentiles : NDArray[np.float16]
@@ -64,14 +37,42 @@ class VisualStatsOutput(BaseStatsOutput):
64
37
  """
65
38
 
66
39
  brightness: NDArray[np.float16]
67
- blurriness: NDArray[np.float16]
68
40
  contrast: NDArray[np.float16]
69
41
  darkness: NDArray[np.float16]
70
42
  missing: NDArray[np.float16]
43
+ sharpness: NDArray[np.float16]
71
44
  zeros: NDArray[np.float16]
72
45
  percentiles: NDArray[np.float16]
73
46
 
74
47
 
48
+ class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
49
+ output_class = VisualStatsOutput
50
+ cache_keys = ["percentiles"]
51
+ image_function_map = {
52
+ "brightness": lambda x: x.get("percentiles")[1],
53
+ "contrast": lambda x: np.nan_to_num(
54
+ (np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles"))
55
+ ),
56
+ "darkness": lambda x: x.get("percentiles")[-2],
57
+ "missing": lambda x: np.count_nonzero(np.isnan(np.sum(x.image, axis=0))) / np.prod(x.shape[-2:]),
58
+ "sharpness": lambda x: np.std(edge_filter(np.mean(x.image, axis=0))),
59
+ "zeros": lambda x: np.count_nonzero(np.sum(x.image, axis=0) == 0) / np.prod(x.shape[-2:]),
60
+ "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
61
+ }
62
+ channel_function_map = {
63
+ "brightness": lambda x: x.get("percentiles")[:, 1],
64
+ "contrast": lambda x: np.nan_to_num(
65
+ (np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
66
+ / np.mean(x.get("percentiles"), axis=1)
67
+ ),
68
+ "darkness": lambda x: x.get("percentiles")[:, -2],
69
+ "missing": lambda x: np.count_nonzero(np.isnan(x.image), axis=(1, 2)) / np.prod(x.shape[-2:]),
70
+ "sharpness": lambda x: np.std(np.vectorize(edge_filter, signature="(m,n)->(m,n)")(x.image), axis=(1, 2)),
71
+ "zeros": lambda x: np.count_nonzero(x.image == 0, axis=(1, 2)) / np.prod(x.shape[-2:]),
72
+ "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES, axis=1).T,
73
+ }
74
+
75
+
75
76
  @set_metadata("dataeval.metrics")
76
77
  def visualstats(
77
78
  images: Iterable[ArrayLike],
@@ -81,7 +82,7 @@ def visualstats(
81
82
  """
82
83
  Calculates visual statistics for each image
83
84
 
84
- This function computes various visual metrics (e.g., brightness, darkness, contrast, blurriness)
85
+ This function computes various visual metrics (e.g., brightness, darkness, contrast, sharpness)
85
86
  on the images as a whole.
86
87
 
87
88
  Parameters
@@ -95,7 +96,7 @@ def visualstats(
95
96
  -------
96
97
  VisualStatsOutput
97
98
  A dictionary-like object containing the computed visual statistics for each image. The keys correspond
98
- to the names of the statistics (e.g., 'brightness', 'blurriness'), and the values are lists of results for
99
+ to the names of the statistics (e.g., 'brightness', 'sharpness'), and the values are lists of results for
99
100
  each image or numpy arrays when the results are multi-dimensional.
100
101
 
101
102
  See Also
@@ -112,13 +113,13 @@ def visualstats(
112
113
 
113
114
  >>> results = visualstats(images)
114
115
  >>> print(results.brightness)
115
- [0.0737 0.607 0.0713 0.1046 0.138 0.1713 0.2046 0.2379 0.2712 0.3047
116
- 0.338 0.3713 0.4045 0.438 0.4712 0.5044 0.538 0.5713 0.6045 0.638
117
- 0.6714 0.7046 0.738 0.7715 0.8047 0.838 0.871 0.905 0.938 0.971 ]
116
+ [0.02246 0.5557 0.06805 0.1014 0.1348 0.1681 0.2014 0.2347 0.268
117
+ 0.3015 0.3347 0.3682 0.4014 0.4348 0.468 0.5015 0.5347 0.568
118
+ 0.6016 0.635 0.668 0.701 0.735 0.768 0.8013 0.835 0.868
119
+ 0.9014 0.9346 0.9683 ]
118
120
  >>> print(results.contrast)
119
121
  [2.041 1.332 1.293 1.279 1.272 1.268 1.265 1.263 1.261 1.26 1.259 1.258
120
122
  1.258 1.257 1.257 1.256 1.256 1.255 1.255 1.255 1.255 1.254 1.254 1.254
121
123
  1.254 1.254 1.254 1.253 1.253 1.253]
122
124
  """
123
- output = run_stats(images, bboxes, per_channel, VisualStatsProcessor, VisualStatsOutput)
124
- return VisualStatsOutput(**output)
125
+ return run_stats(images, bboxes, per_channel, [VisualStatsProcessor])[0]
@@ -12,9 +12,9 @@ from __future__ import annotations
12
12
 
13
13
  from typing import Callable, cast
14
14
 
15
- import keras
16
15
  import tensorflow as tf
17
- from keras.layers import (
16
+ import tf_keras as keras
17
+ from tf_keras.layers import (
18
18
  Dense,
19
19
  Flatten,
20
20
  Layer,
@@ -11,11 +11,11 @@ from __future__ import annotations
11
11
  from typing import Literal, cast
12
12
 
13
13
  import tensorflow as tf
14
- from keras.layers import Flatten
15
14
  from numpy.typing import NDArray
16
15
  from tensorflow_probability.python.distributions.mvn_diag import MultivariateNormalDiag
17
16
  from tensorflow_probability.python.distributions.mvn_tril import MultivariateNormalTriL
18
17
  from tensorflow_probability.python.stats import covariance
18
+ from tf_keras.layers import Flatten
19
19
 
20
20
  from dataeval._internal.models.tensorflow.gmm import gmm_energy, gmm_params
21
21
 
@@ -13,9 +13,9 @@ from __future__ import annotations
13
13
  import functools
14
14
  import warnings
15
15
 
16
- import keras
17
16
  import numpy as np
18
17
  import tensorflow as tf
18
+ import tf_keras as keras
19
19
  from tensorflow_probability.python.bijectors import bijector
20
20
  from tensorflow_probability.python.distributions import (
21
21
  categorical,
@@ -10,9 +10,9 @@ from __future__ import annotations
10
10
 
11
11
  from typing import Callable, Iterable, cast
12
12
 
13
- import keras
14
13
  import numpy as np
15
14
  import tensorflow as tf
15
+ import tf_keras as keras
16
16
  from numpy.typing import NDArray
17
17
 
18
18
 
@@ -11,11 +11,13 @@ from __future__ import annotations
11
11
  import math
12
12
  from typing import Callable, Union, cast
13
13
 
14
- import keras as keras
15
14
  import numpy as np
16
15
  import tensorflow as tf
17
- from keras import Sequential
18
- from keras.layers import (
16
+ import tf_keras as keras
17
+ from numpy.typing import NDArray
18
+ from tensorflow._api.v2.nn import relu, softmax, tanh
19
+ from tf_keras import Sequential
20
+ from tf_keras.layers import (
19
21
  Conv2D,
20
22
  Conv2DTranspose,
21
23
  Dense,
@@ -23,8 +25,6 @@ from keras.layers import (
23
25
  InputLayer,
24
26
  Reshape,
25
27
  )
26
- from numpy.typing import NDArray
27
- from tensorflow._api.v2.nn import relu, softmax, tanh
28
28
 
29
29
  from dataeval._internal.models.tensorflow.autoencoder import AE, AEGMM, VAE, VAEGMM
30
30
  from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
@@ -4,7 +4,12 @@ and label statistics against the images and labels of a dataset.
4
4
  """
5
5
 
6
6
  from dataeval._internal.metrics.stats.boxratiostats import boxratiostats
7
- from dataeval._internal.metrics.stats.datasetstats import DatasetStatsOutput, datasetstats
7
+ from dataeval._internal.metrics.stats.datasetstats import (
8
+ ChannelStatsOutput,
9
+ DatasetStatsOutput,
10
+ channelstats,
11
+ datasetstats,
12
+ )
8
13
  from dataeval._internal.metrics.stats.dimensionstats import DimensionStatsOutput, dimensionstats
9
14
  from dataeval._internal.metrics.stats.hashstats import HashStatsOutput, hashstats
10
15
  from dataeval._internal.metrics.stats.labelstats import LabelStatsOutput, labelstats
@@ -13,12 +18,14 @@ from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput, visu
13
18
 
14
19
  __all__ = [
15
20
  "boxratiostats",
21
+ "channelstats",
16
22
  "datasetstats",
17
23
  "dimensionstats",
18
24
  "hashstats",
19
25
  "labelstats",
20
26
  "pixelstats",
21
27
  "visualstats",
28
+ "ChannelStatsOutput",
22
29
  "DatasetStatsOutput",
23
30
  "DimensionStatsOutput",
24
31
  "HashStatsOutput",
@@ -6,6 +6,12 @@ as well as constructors which allow for customization of the encoder, decoder an
6
6
  layers used by the model.
7
7
  """
8
8
 
9
+ from dataeval import _IS_TENSORFLOW_AVAILABLE
10
+
9
11
  from . import loss, models, recon
10
12
 
11
- __all__ = ["loss", "models", "recon"]
13
+ __all__ = []
14
+
15
+
16
+ if _IS_TENSORFLOW_AVAILABLE:
17
+ __all__ = ["loss", "models", "recon"]
@@ -1,3 +1,7 @@
1
+ from dataeval import _IS_TENSORFLOW_AVAILABLE
1
2
  from dataeval._internal.models.tensorflow.losses import Elbo, LossGMM
2
3
 
3
- __all__ = ["Elbo", "LossGMM"]
4
+ __all__ = []
5
+
6
+ if _IS_TENSORFLOW_AVAILABLE:
7
+ __all__ += ["Elbo", "LossGMM"]
@@ -1,5 +1,9 @@
1
+ from dataeval import _IS_TENSORFLOW_AVAILABLE
1
2
  from dataeval._internal.models.tensorflow.autoencoder import AE, AEGMM, VAE, VAEGMM
2
3
  from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
3
4
  from dataeval._internal.models.tensorflow.utils import create_model
4
5
 
5
- __all__ = ["create_model", "AE", "AEGMM", "PixelCNN", "VAE", "VAEGMM"]
6
+ __all__ = []
7
+
8
+ if _IS_TENSORFLOW_AVAILABLE:
9
+ __all__ += ["create_model", "AE", "AEGMM", "PixelCNN", "VAE", "VAEGMM"]
@@ -5,8 +5,17 @@ While these metrics can take in custom models, DataEval provides utility classes
5
5
  to create a seamless integration between custom models and DataEval's metrics.
6
6
  """
7
7
 
8
+ from dataeval import _IS_TORCH_AVAILABLE, _IS_TORCHVISION_AVAILABLE
8
9
  from dataeval._internal.utils import read_dataset
9
10
 
10
- from . import models, trainer
11
+ __all__ = []
11
12
 
12
- __all__ = ["read_dataset", "models", "trainer"]
13
+ if _IS_TORCH_AVAILABLE:
14
+ from . import models, trainer
15
+
16
+ __all__ += ["read_dataset", "models", "trainer"]
17
+
18
+ if _IS_TORCHVISION_AVAILABLE:
19
+ from . import datasets
20
+
21
+ __all__ += ["datasets"]
@@ -2,6 +2,11 @@
2
2
  Provide access to common Torch datasets used for computer vision
3
3
  """
4
4
 
5
- from dataeval._internal.datasets import CIFAR10, MNIST, VOCDetection
5
+ from dataeval import _IS_TORCHVISION_AVAILABLE
6
6
 
7
- __all__ = ["CIFAR10", "MNIST", "VOCDetection"]
7
+ __all__ = []
8
+
9
+ if _IS_TORCHVISION_AVAILABLE:
10
+ from dataeval._internal.datasets import CIFAR10, MNIST, VOCDetection
11
+
12
+ __all__ += ["CIFAR10", "MNIST", "VOCDetection"]
@@ -1,7 +1,11 @@
1
+ from dataeval import _IS_TORCH_AVAILABLE
1
2
  from dataeval._internal.models.pytorch.autoencoder import (
2
3
  AriaAutoencoder,
3
4
  Decoder,
4
5
  Encoder,
5
6
  )
6
7
 
7
- __all__ = ["AriaAutoencoder", "Decoder", "Encoder"]
8
+ __all__ = []
9
+
10
+ if _IS_TORCH_AVAILABLE:
11
+ __all__ += ["AriaAutoencoder", "Decoder", "Encoder"]
@@ -1,3 +1,7 @@
1
+ from dataeval import _IS_TORCH_AVAILABLE
1
2
  from dataeval._internal.models.pytorch.autoencoder import AETrainer
2
3
 
3
- __all__ = ["AETrainer"]
4
+ __all__ = []
5
+
6
+ if _IS_TORCH_AVAILABLE:
7
+ __all__ += ["AETrainer"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.70.1
3
+ Version: 0.71.1
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -29,10 +29,12 @@ Requires-Dist: nvidia-cudnn-cu11 (>=8.6.0.163) ; extra == "tensorflow" or extra
29
29
  Requires-Dist: pillow (>=10.3.0)
30
30
  Requires-Dist: scikit-learn (>=1.5.0)
31
31
  Requires-Dist: scipy (>=1.10)
32
- Requires-Dist: tensorflow (>=2.14.1,<2.16) ; extra == "tensorflow" or extra == "all"
33
- Requires-Dist: tensorflow_probability (>=0.22.1,<0.24) ; extra == "tensorflow" or extra == "all"
32
+ Requires-Dist: tensorflow (>=2.16) ; extra == "tensorflow" or extra == "all"
33
+ Requires-Dist: tensorflow_probability (>=0.24) ; extra == "tensorflow" or extra == "all"
34
+ Requires-Dist: tf-keras (>2.16) ; extra == "tensorflow" or extra == "all"
34
35
  Requires-Dist: torch (>=2.2.0) ; extra == "torch" or extra == "all"
35
36
  Requires-Dist: torchvision (>=0.17.0) ; extra == "torch" or extra == "all"
37
+ Requires-Dist: tqdm
36
38
  Requires-Dist: xxhash (>=3.3)
37
39
  Project-URL: Documentation, https://dataeval.readthedocs.io/
38
40
  Project-URL: Repository, https://github.com/aria-ml/dataeval/
@@ -1,4 +1,4 @@
1
- dataeval/__init__.py,sha256=AIHxRS7PYlqg4s7fZJTPKuTtyWFWoVROw4knVoSBH6E,555
1
+ dataeval/__init__.py,sha256=Qm1rDTX_NyCAtZl2ilQ49v0j_zqnWhhVwIhe0cvrKjk,620
2
2
  dataeval/_internal/datasets.py,sha256=KbXSR-vOAzFamfXHRnI9mhhqUzEPyGpK47fZsirQn1I,14638
3
3
  dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  dataeval/_internal/detectors/clusterer.py,sha256=srqTHzh9kIy7Ty4VYaptwuQlBh8emFeiEAeS_mYFKro,20750
@@ -12,13 +12,13 @@ dataeval/_internal/detectors/drift/uncertainty.py,sha256=Ot8L42AnFbkij4J3Tis7VzX
12
12
  dataeval/_internal/detectors/duplicates.py,sha256=wggaIl3uFxihNBQhPv5JcreZbhVaFKoMAJMv_9-aaHU,5324
13
13
  dataeval/_internal/detectors/merged_stats.py,sha256=okXGrqAgsqfANMxfIjiUQlZWlaIh5TVvIB9UPsOJZ7k,1351
14
14
  dataeval/_internal/detectors/ood/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- dataeval/_internal/detectors/ood/ae.py,sha256=AIffh11UKZX-3oocDDp8-t-uDUm8aQKvrV0nIE3BLQo,2140
16
- dataeval/_internal/detectors/ood/aegmm.py,sha256=q2kRXZM5X0PoA10mRsi8Gh-W5FdFzEsnM1yDq3GFzn0,2518
17
- dataeval/_internal/detectors/ood/base.py,sha256=qYSmcN74x5-qL0_I7uNo4eQ8X8pr7M4cwjs2qvkJt5g,8561
18
- dataeval/_internal/detectors/ood/llr.py,sha256=VgZtMrMgI8zDVUzsqm2l4tqsULFIhdQeDd4lzdo_G7U,10217
19
- dataeval/_internal/detectors/ood/vae.py,sha256=iXEltu5pATWr42-28hZ3ARZavJrptLwUM5P9c8omA_s,3030
20
- dataeval/_internal/detectors/ood/vaegmm.py,sha256=ujp6UN0wpZcmPDPkVfTHZxgka5kuTOSzgXMmbKdmK2U,2947
21
- dataeval/_internal/detectors/outliers.py,sha256=JmAXoMO0Od7tc9RVFGJsDyOnByciPFG5FdS54Iu0BII,10396
15
+ dataeval/_internal/detectors/ood/ae.py,sha256=OTcfvoiCdSFGaAAkejBKwwiHaHKB6sa01aW5fVBI1Bk,2152
16
+ dataeval/_internal/detectors/ood/aegmm.py,sha256=7fRcTXfyUgYfcZOaa9GpGNNxAAp4sQ9zYowfs4s4420,2530
17
+ dataeval/_internal/detectors/ood/base.py,sha256=jMMObJgPUZc2Vbql_UYNXvQAFO305TRhdVxk0YqfzJo,8573
18
+ dataeval/_internal/detectors/ood/llr.py,sha256=wzWOeyqbD0WdXHRa1Qf-_3TbJYEfT6OuTQfcepPsbTM,10235
19
+ dataeval/_internal/detectors/ood/vae.py,sha256=y_HP3tk7Clo3YG9hl-gke9_tJ4XW8x8sQlrC9ZtbVLw,3042
20
+ dataeval/_internal/detectors/ood/vaegmm.py,sha256=SvdUKC8cVyEWfEGcczRmyA4SGJhbol0eDSDry1mZxII,2959
21
+ dataeval/_internal/detectors/outliers.py,sha256=C7Iu66ze5KCCRQNc1TsqkFVKDFGfP4qjGMUv6RUpk-E,10206
22
22
  dataeval/_internal/interop.py,sha256=FLXJY-5hwJcKCtruyvaarqynXCMfcLbQSFvGnrWQDPo,1338
23
23
  dataeval/_internal/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  dataeval/_internal/metrics/balance.py,sha256=8KwuR5HvytJtS1YW9KkNrCu2dPn_gP4FSbXrxQ-9kK8,6182
@@ -27,14 +27,14 @@ dataeval/_internal/metrics/coverage.py,sha256=jxvzWpVQrfmN3S9rpXvyb35vpRn1ovV2Id
27
27
  dataeval/_internal/metrics/divergence.py,sha256=gKQt9rxmhW8RnODCoLgFSPnCUWEMjmNIPlCV2w6E6oU,4211
28
28
  dataeval/_internal/metrics/diversity.py,sha256=ZSlq1KBvkMRVAvlznils2QmlPC73TTpHs1ux7PoFrio,7664
29
29
  dataeval/_internal/metrics/parity.py,sha256=eTjltNBJOTFH6T_t9V9-1EFr_U0vqlU642o3x2RWgz0,16527
30
- dataeval/_internal/metrics/stats/base.py,sha256=HyjgHTQZqgkkCWDzOF-aNZBr88IAjnao8VSbHC5ZtbI,8554
30
+ dataeval/_internal/metrics/stats/base.py,sha256=DRTaaFVtbH1M-wLO2NrtuAAXx699vlEjP9d2no72pM4,11066
31
31
  dataeval/_internal/metrics/stats/boxratiostats.py,sha256=Ac6nB41q43xHCJRDEXHNgsJF80VE8MpH8_kySxA84BE,6342
32
- dataeval/_internal/metrics/stats/datasetstats.py,sha256=6DFl3TE7t2ggDD8WBVgPH7F2bRvae7NR2PVoEWL92dw,3759
33
- dataeval/_internal/metrics/stats/dimensionstats.py,sha256=MUQJgrWmRoQFap7gPf8vTFXJ_z7G7bAQpZ7kCPRtNkA,3847
34
- dataeval/_internal/metrics/stats/hashstats.py,sha256=xH0k_wOeGO5UC7-0fhAIg4WV2fO8fnF0Jdn18gYhW88,2087
32
+ dataeval/_internal/metrics/stats/datasetstats.py,sha256=1H8Njtr27oNO8Hn3pwizAlOFkVe3QpbJb-RYk4dLKkY,6201
33
+ dataeval/_internal/metrics/stats/dimensionstats.py,sha256=EIXrRia7OyB147WgAW2tqEwPMcCNWmSQidx5uQukSqQ,3915
34
+ dataeval/_internal/metrics/stats/hashstats.py,sha256=I-aX-R0Rlvjwo7A5bjq3Bqs7-utTapnXB87z9TyC12w,2088
35
35
  dataeval/_internal/metrics/stats/labelstats.py,sha256=BNxI2flvKhSps2o4-TPbN9nf52ctatI2SuDZ07hah5E,4058
36
- dataeval/_internal/metrics/stats/pixelstats.py,sha256=LxoDQ6afsNuzB0WnOgmzkEUV7s534MrAYkzS6Be7PPQ,4419
37
- dataeval/_internal/metrics/stats/visualstats.py,sha256=3uET0N3WgV5dcxst8Xb9DhcATiNfAXsx1OKbPz2mU4Q,4712
36
+ dataeval/_internal/metrics/stats/pixelstats.py,sha256=_b0TdjHZwe2yj5Cdmz2IhbQP4LTnHI1qFlDgPV8fuCs,4420
37
+ dataeval/_internal/metrics/stats/visualstats.py,sha256=TdPwiehv0dY5HJmOOQk4_omfMd725NqOPG21A-q_t0I,4788
38
38
  dataeval/_internal/metrics/uap.py,sha256=RumSQey6vNoz9CtOG2_Inb-TurKJrAHqwhkyWBirxhk,2128
39
39
  dataeval/_internal/metrics/utils.py,sha256=vW3mQHjF0AvYlml27X5dZgd0YBk3zyBvvztLEfdRkvI,13475
40
40
  dataeval/_internal/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -43,12 +43,12 @@ dataeval/_internal/models/pytorch/autoencoder.py,sha256=nPyLjLZrPNla-lMnym3fUW-O
43
43
  dataeval/_internal/models/pytorch/blocks.py,sha256=pm2xwsDZjZJYXrhhiz8husvh2vHmrkFMSYEn-EDUD5Q,1354
44
44
  dataeval/_internal/models/pytorch/utils.py,sha256=Qgwym1PxGuwxbXCKUT-8r6Iyrxqm7x94oj45Vf5_CjE,1675
45
45
  dataeval/_internal/models/tensorflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
- dataeval/_internal/models/tensorflow/autoencoder.py,sha256=Ryn11jDbpZJOM5De-kMGPdbJBQEdwip6B20ajS8HqpE,10354
46
+ dataeval/_internal/models/tensorflow/autoencoder.py,sha256=l-3utb7Rwq6_KiqOPBHnYxR7ngYhpIDFvbvNZc5UvNI,10369
47
47
  dataeval/_internal/models/tensorflow/gmm.py,sha256=QoEgbeax1GETqRmUF7A2ih9uFOZfFAjGzgH2ljExlAc,3669
48
- dataeval/_internal/models/tensorflow/losses.py,sha256=pZH5RnlM9R0RrBde9Lgq32muwAp7_PWc56Mu4u8RVvo,3976
49
- dataeval/_internal/models/tensorflow/pixelcnn.py,sha256=keI1gTNjBk18YD91Cp4exfuGYWU9lt-wapvhSazhcVs,48319
50
- dataeval/_internal/models/tensorflow/trainer.py,sha256=xNY0Iw7Qa1TnCuy9N1b77_VduFoW_BhbZjfQCxOVby4,4082
51
- dataeval/_internal/models/tensorflow/utils.py,sha256=l6jXKMWyQAEI4LpAONq95Xwr7CPgrs408ypf9TuNxkY,8732
48
+ dataeval/_internal/models/tensorflow/losses.py,sha256=LavFmi9AWfE_HO4YxQ54kV8LZG5-UeCOhOlcPpxo-ic,3979
49
+ dataeval/_internal/models/tensorflow/pixelcnn.py,sha256=ru4KF8CZHKbOpp-ZgDxuRdbcv_nTCs1i1H2lTMamL7Y,48331
50
+ dataeval/_internal/models/tensorflow/trainer.py,sha256=LJ3t6Ud95cofKN-cgb5o5nDrYSFse7LSDOYIBkMgDJk,4094
51
+ dataeval/_internal/models/tensorflow/utils.py,sha256=Uq6eUTEeUHGopL1_VBH656-Ue18v6WgiEUjmk8SMsc8,8741
52
52
  dataeval/_internal/output.py,sha256=qVbOi41dvfQICQ4uxysHPWBRKo1XR61kXHPL_vKOPm0,2545
53
53
  dataeval/_internal/utils.py,sha256=jo6bGJZAgyuZqRpAAC4gwhAHYE12316na19ZuFwMqes,1504
54
54
  dataeval/_internal/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -62,19 +62,19 @@ dataeval/detectors/ood/__init__.py,sha256=K5QrSJg2QePs6Pa3Cg80ZwXu7BELLrSlbEpTdx
62
62
  dataeval/metrics/__init__.py,sha256=U0sRw5eiqeeDLbLPxT_rznZsvtNwONVxKVwfC0qVOgo,223
63
63
  dataeval/metrics/bias/__init__.py,sha256=Wn1Ui_g-9cR4c4IS7RFKJ6UH5DLXKjEBoXTuEYPXSBc,619
64
64
  dataeval/metrics/estimators/__init__.py,sha256=4VFMKLPsJdaWiflf84bXGQ2k8ertFQ4WEPhyWqjFFvE,377
65
- dataeval/metrics/stats/__init__.py,sha256=UcD41gFwFhcQMtqwWkPQlg6cFA2_gdj6yGRCDrKYXM8,1055
65
+ dataeval/metrics/stats/__init__.py,sha256=AKlNelORMOM2OA9XIvwZ9nOn6dK6k-r-69ldEAuqgLA,1156
66
66
  dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
67
  dataeval/utils/__init__.py,sha256=cW_5-DIZG2OFRs3FVLOz0uCv4JWdaoVO7C9rOlR7ZEA,526
68
- dataeval/utils/tensorflow/__init__.py,sha256=XgjqrMtI00ERPPpdokbO1lDyc_H3CZ1TTqUXIj0B6PI,435
69
- dataeval/utils/tensorflow/loss/__init__.py,sha256=E9eB87LNh0o5nUCqssB027EXBsOfEayNHPcNW0QGFdA,101
70
- dataeval/utils/tensorflow/models/__init__.py,sha256=OVpmHF8itDcgOlfw6N9jr7IphZPbMJoiu7OdqYhU9fs,291
68
+ dataeval/utils/tensorflow/__init__.py,sha256=sKRG3b_MLQUrAftkRZ17JyNZt6gjEguvTK83hO_IwRQ,530
69
+ dataeval/utils/tensorflow/loss/__init__.py,sha256=s7tD_5dYWcNDmntGiEHhG7bVDsMAY1UO8FpQFe9cUns,195
70
+ dataeval/utils/tensorflow/models/__init__.py,sha256=1R9Oi5DOYwT0W3JSEfoMsPOvhYFaKqKilwkrUifNnig,385
71
71
  dataeval/utils/tensorflow/recon/__init__.py,sha256=xe6gAQqK9tyAoDQTtaJAxIPK1humt5QzsG_9NPsqx58,116
72
- dataeval/utils/torch/__init__.py,sha256=bYUm-nNlNVU3bqDz7dQHFmaRWgLy3lLrD4cSDumDlxQ,373
73
- dataeval/utils/torch/datasets/__init__.py,sha256=S6C4OaxEjJJaIpHSZcZfkl4U5iS5YtZ9N5GYHqvbzvM,191
74
- dataeval/utils/torch/models/__init__.py,sha256=YnDnePYpRIKHyYn3F5qR1OObMSb-g0FGvI8X-uTB09E,162
75
- dataeval/utils/torch/trainer/__init__.py,sha256=Te-qElt8h-Zv8NN0r-VJOEdCPHTQ2yO3rd2MhRiZGZs,93
72
+ dataeval/utils/torch/__init__.py,sha256=430fNKbqLByuGSeNhnoIJy3g9Z94ckZsAKWUZ15MVP4,575
73
+ dataeval/utils/torch/datasets/__init__.py,sha256=94k7fMQdxYlQXDYouAHUgrQJ2oBwnvq4koFJpyhlUVA,292
74
+ dataeval/utils/torch/models/__init__.py,sha256=q1BzoLHWA0uBXzT2glWJDrxVA1BN7xnkT2r_d-7Dlyw,246
75
+ dataeval/utils/torch/trainer/__init__.py,sha256=hpcrlCCXPzb8b7FOzEAKqFy6Z7Zl4V_cx3yA7n3L1L4,177
76
76
  dataeval/workflows/__init__.py,sha256=VFeJyMhZxvj8WnU5Un32mwO8lNfBQOBjD9IdOqexnAE,320
77
- dataeval-0.70.1.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
78
- dataeval-0.70.1.dist-info/METADATA,sha256=B2slR1eY_xRR4QcUTpV8EJh5Z_plWmHFqTT5j4r2Vvk,4502
79
- dataeval-0.70.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
80
- dataeval-0.70.1.dist-info/RECORD,,
77
+ dataeval-0.71.1.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
78
+ dataeval-0.71.1.dist-info/METADATA,sha256=j1HnzvkOSLR-D6debnFFLlp2vgue8ueFQvjYlrX75cw,4580
79
+ dataeval-0.71.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
80
+ dataeval-0.71.1.dist-info/RECORD,,