dataeval 0.74.1__py3-none-any.whl → 0.75.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. dataeval/__init__.py +33 -10
  2. dataeval/detectors/__init__.py +2 -2
  3. dataeval/detectors/drift/__init__.py +14 -12
  4. dataeval/detectors/drift/base.py +1 -1
  5. dataeval/detectors/drift/cvm.py +1 -1
  6. dataeval/detectors/drift/ks.py +1 -1
  7. dataeval/detectors/drift/mmd.py +6 -5
  8. dataeval/detectors/drift/torch.py +12 -12
  9. dataeval/detectors/drift/uncertainty.py +3 -2
  10. dataeval/detectors/linters/__init__.py +4 -4
  11. dataeval/detectors/linters/clusterer.py +2 -7
  12. dataeval/detectors/linters/duplicates.py +6 -10
  13. dataeval/detectors/linters/outliers.py +4 -2
  14. dataeval/detectors/ood/__init__.py +3 -10
  15. dataeval/detectors/ood/{ae_torch.py → ae.py} +6 -4
  16. dataeval/detectors/ood/base.py +64 -161
  17. dataeval/detectors/ood/metadata_ks_compare.py +34 -42
  18. dataeval/detectors/ood/metadata_least_likely.py +3 -3
  19. dataeval/detectors/ood/metadata_ood_mi.py +6 -5
  20. dataeval/detectors/ood/mixin.py +146 -0
  21. dataeval/detectors/ood/output.py +63 -0
  22. dataeval/interop.py +16 -3
  23. dataeval/log.py +18 -0
  24. dataeval/metrics/__init__.py +2 -2
  25. dataeval/metrics/bias/__init__.py +9 -12
  26. dataeval/metrics/bias/balance.py +10 -8
  27. dataeval/metrics/bias/coverage.py +52 -4
  28. dataeval/metrics/bias/diversity.py +42 -14
  29. dataeval/metrics/bias/parity.py +15 -12
  30. dataeval/metrics/estimators/__init__.py +2 -2
  31. dataeval/metrics/estimators/ber.py +3 -1
  32. dataeval/metrics/estimators/divergence.py +1 -1
  33. dataeval/metrics/estimators/uap.py +1 -1
  34. dataeval/metrics/stats/__init__.py +18 -18
  35. dataeval/metrics/stats/base.py +4 -4
  36. dataeval/metrics/stats/boxratiostats.py +8 -9
  37. dataeval/metrics/stats/datasetstats.py +10 -14
  38. dataeval/metrics/stats/dimensionstats.py +4 -4
  39. dataeval/metrics/stats/hashstats.py +12 -8
  40. dataeval/metrics/stats/labelstats.py +5 -5
  41. dataeval/metrics/stats/pixelstats.py +4 -9
  42. dataeval/metrics/stats/visualstats.py +4 -9
  43. dataeval/output.py +1 -1
  44. dataeval/utils/__init__.py +4 -13
  45. dataeval/utils/dataset/__init__.py +7 -0
  46. dataeval/utils/{torch → dataset}/datasets.py +2 -0
  47. dataeval/utils/dataset/read.py +63 -0
  48. dataeval/utils/dataset/split.py +527 -0
  49. dataeval/utils/image.py +2 -2
  50. dataeval/utils/metadata.py +310 -5
  51. dataeval/{metrics/bias/metadata_utils.py → utils/plot.py} +1 -104
  52. dataeval/utils/torch/__init__.py +2 -17
  53. dataeval/utils/torch/gmm.py +29 -6
  54. dataeval/utils/torch/{utils.py → internal.py} +82 -58
  55. dataeval/utils/torch/models.py +10 -8
  56. dataeval/utils/torch/trainer.py +6 -85
  57. dataeval/workflows/__init__.py +2 -5
  58. dataeval/workflows/sufficiency.py +16 -6
  59. dataeval-0.75.0.dist-info/METADATA +136 -0
  60. dataeval-0.75.0.dist-info/RECORD +67 -0
  61. dataeval/detectors/ood/base_torch.py +0 -109
  62. dataeval/metrics/bias/metadata_preprocessing.py +0 -285
  63. dataeval/utils/gmm.py +0 -26
  64. dataeval/utils/split_dataset.py +0 -492
  65. dataeval-0.74.1.dist-info/METADATA +0 -120
  66. dataeval-0.74.1.dist-info/RECORD +0 -65
  67. {dataeval-0.74.1.dist-info → dataeval-0.75.0.dist-info}/LICENSE.txt +0 -0
  68. {dataeval-0.74.1.dist-info → dataeval-0.75.0.dist-info}/WHEEL +0 -0
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- __all__ = ["LabelStatsOutput", "labelstats"]
3
+ __all__ = []
4
4
 
5
5
  from collections import Counter, defaultdict
6
6
  from dataclasses import dataclass
@@ -86,13 +86,13 @@ def labelstats(
86
86
 
87
87
  >>> stats = labelstats(labels)
88
88
  >>> stats.label_counts_per_class
89
- {'chicken': 3, 'cow': 8, 'horse': 9, 'pig': 7, 'sheep': 7}
89
+ {'chicken': 12, 'cow': 5, 'horse': 4, 'pig': 7, 'sheep': 4}
90
90
  >>> stats.label_counts_per_image
91
- [3, 2, 3, 4, 1, 5, 4, 4, 4, 4]
91
+ [3, 3, 5, 3, 2, 5, 5, 2, 2, 2]
92
92
  >>> stats.image_counts_per_label
93
- {'chicken': 2, 'cow': 6, 'horse': 7, 'pig': 5, 'sheep': 7}
93
+ {'chicken': 8, 'cow': 4, 'horse': 4, 'pig': 7, 'sheep': 4}
94
94
  >>> (stats.image_count, stats.class_count, stats.label_count)
95
- (10, 5, 34)
95
+ (10, 5, 32)
96
96
  """
97
97
  label_counts = Counter()
98
98
  image_counts = Counter()
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- __all__ = ["PixelStatsOutput", "pixelstats"]
3
+ __all__ = []
4
4
 
5
5
  from dataclasses import dataclass
6
6
  from typing import Any, Callable, Iterable
@@ -106,15 +106,10 @@ def pixelstats(
106
106
  --------
107
107
  Calculating the statistics on the images, whose shape is (C, H, W)
108
108
 
109
- >>> results = pixelstats(images)
109
+ >>> results = pixelstats(stats_images)
110
110
  >>> print(results.mean)
111
- [0.04828 0.562 0.06726 0.09937 0.1315 0.1636 0.1957 0.2278 0.26
112
- 0.292 0.3242 0.3562 0.3884 0.4204 0.4526 0.4846 0.5166 0.549
113
- 0.581 0.6133 0.6455 0.6772 0.7095 0.7417 0.774 0.8057 0.838
114
- 0.87 0.9023 0.934 ]
111
+ [0.2903 0.2108 0.397 0.596 0.743 ]
115
112
  >>> print(results.entropy)
116
- [3.238 3.303 0.8125 1.028 0.8223 1.046 0.8247 1.041 0.8203 1.012
117
- 0.812 0.9883 0.795 0.9243 0.9243 0.795 0.9907 0.8125 1.028 0.8223
118
- 1.046 0.8247 1.041 0.8203 1.012 0.812 0.9883 0.795 0.9243 0.9243]
113
+ [4.99 2.371 1.179 2.406 0.668]
119
114
  """
120
115
  return run_stats(images, bboxes, per_channel, [PixelStatsProcessor])[0]
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- __all__ = ["VisualStatsOutput", "visualstats"]
3
+ __all__ = []
4
4
 
5
5
  from dataclasses import dataclass
6
6
  from typing import Any, Callable, Iterable
@@ -112,15 +112,10 @@ def visualstats(
112
112
  --------
113
113
  Calculating the :term:`statistics<Statistics>` on the images, whose shape is (C, H, W)
114
114
 
115
- >>> results = visualstats(images)
115
+ >>> results = visualstats(stats_images)
116
116
  >>> print(results.brightness)
117
- [0.02246 0.5557 0.06805 0.1014 0.1348 0.1681 0.2014 0.2347 0.268
118
- 0.3015 0.3347 0.3682 0.4014 0.4348 0.468 0.5015 0.5347 0.568
119
- 0.6016 0.635 0.668 0.701 0.735 0.768 0.8013 0.835 0.868
120
- 0.9014 0.9346 0.9683 ]
117
+ [0.1353 0.2085 0.4143 0.6084 0.8135]
121
118
  >>> print(results.contrast)
122
- [2.041 1.332 1.293 1.279 1.272 1.268 1.265 1.263 1.261 1.26 1.259 1.258
123
- 1.258 1.257 1.257 1.256 1.256 1.255 1.255 1.255 1.255 1.254 1.254 1.254
124
- 1.254 1.254 1.254 1.253 1.253 1.253]
119
+ [2.04 1.331 1.261 1.279 1.253]
125
120
  """
126
121
  return run_stats(images, bboxes, per_channel, [VisualStatsProcessor])[0]
dataeval/output.py CHANGED
@@ -65,7 +65,7 @@ R = TypeVar("R", bound=Output)
65
65
 
66
66
 
67
67
  def set_metadata(fn: Callable[P, R] | None = None, *, state: list[str] | None = None) -> Callable[P, R]:
68
- """Decorator to stamp OutputMetadata classes with runtime metadata"""
68
+ """Decorator to stamp Output classes with runtime metadata"""
69
69
 
70
70
  if fn is None:
71
71
  return partial(set_metadata, state=state) # type: ignore
@@ -1,18 +1,9 @@
1
1
  """
2
2
  The utility classes and functions are provided by DataEval to assist users
3
- in setting up architectures that are guaranteed to work with applicable DataEval
4
- metrics. Currently DataEval supports both :term:`TensorFlow` and PyTorch backends.
3
+ in setting up data and architectures that are guaranteed to work with applicable
4
+ DataEval metrics.
5
5
  """
6
6
 
7
- from dataeval import _IS_TORCH_AVAILABLE
8
- from dataeval.utils.metadata import merge_metadata
9
- from dataeval.utils.split_dataset import split_dataset
7
+ __all__ = ["dataset", "metadata", "torch"]
10
8
 
11
- __all__ = ["split_dataset", "merge_metadata"]
12
-
13
- if _IS_TORCH_AVAILABLE:
14
- from dataeval.utils import torch
15
-
16
- __all__ += ["torch"]
17
-
18
- del _IS_TORCH_AVAILABLE
9
+ from dataeval.utils import dataset, metadata, torch
@@ -0,0 +1,7 @@
1
+ """Provides utility functions for interacting with Computer Vision datasets."""
2
+
3
+ __all__ = ["datasets", "read_dataset", "SplitDatasetOutput", "split_dataset"]
4
+
5
+ from dataeval.utils.dataset import datasets
6
+ from dataeval.utils.dataset.read import read_dataset
7
+ from dataeval.utils.dataset.split import SplitDatasetOutput, split_dataset
@@ -1,3 +1,5 @@
1
+ """Provides access to common Computer Vision datasets."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
5
  __all__ = ["MNIST", "CIFAR10", "VOCDetection"]
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ __all__ = []
4
+
5
+ from collections import defaultdict
6
+ from typing import Any
7
+
8
+ from torch.utils.data import Dataset
9
+
10
+
11
+ def read_dataset(dataset: Dataset[Any]) -> list[list[Any]]:
12
+ """
13
+ Extract information from a dataset at each index into individual lists of each information position
14
+
15
+ Parameters
16
+ ----------
17
+ dataset : torch.utils.data.Dataset
18
+ Input dataset
19
+
20
+ Returns
21
+ -------
22
+ List[List[Any]]
23
+ All objects in individual lists based on return position from dataset
24
+
25
+ Warning
26
+ -------
27
+ No type checking is done between lists or data inside lists
28
+
29
+ See Also
30
+ --------
31
+ torch.utils.data.Dataset
32
+
33
+ Examples
34
+ --------
35
+ >>> import numpy as np
36
+ >>> data = np.ones((10, 1, 3, 3))
37
+ >>> labels = np.ones((10,))
38
+ >>> class ICDataset:
39
+ ... def __init__(self, data, labels):
40
+ ... self.data = data
41
+ ... self.labels = labels
42
+ ...
43
+ ... def __getitem__(self, idx):
44
+ ... return self.data[idx], self.labels[idx]
45
+
46
+ >>> ds = ICDataset(data, labels)
47
+
48
+ >>> result = read_dataset(ds)
49
+ >>> len(result) # images and labels
50
+ 2
51
+ >>> np.asarray(result[0]).shape # images
52
+ (10, 1, 3, 3)
53
+ >>> np.asarray(result[1]).shape # labels
54
+ (10,)
55
+ """
56
+
57
+ ddict: dict[int, list[Any]] = defaultdict(list[Any])
58
+
59
+ for data in dataset:
60
+ for i, d in enumerate(data if isinstance(data, tuple) else (data,)):
61
+ ddict[i].append(d)
62
+
63
+ return list(ddict.values())