dataeval 0.74.1__py3-none-any.whl → 0.75.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +33 -10
- dataeval/detectors/__init__.py +2 -2
- dataeval/detectors/drift/__init__.py +14 -12
- dataeval/detectors/drift/base.py +1 -1
- dataeval/detectors/drift/cvm.py +1 -1
- dataeval/detectors/drift/ks.py +1 -1
- dataeval/detectors/drift/mmd.py +6 -5
- dataeval/detectors/drift/torch.py +12 -12
- dataeval/detectors/drift/uncertainty.py +3 -2
- dataeval/detectors/linters/__init__.py +4 -4
- dataeval/detectors/linters/clusterer.py +2 -7
- dataeval/detectors/linters/duplicates.py +6 -10
- dataeval/detectors/linters/outliers.py +4 -2
- dataeval/detectors/ood/__init__.py +3 -10
- dataeval/detectors/ood/{ae_torch.py → ae.py} +6 -4
- dataeval/detectors/ood/base.py +64 -161
- dataeval/detectors/ood/metadata_ks_compare.py +34 -42
- dataeval/detectors/ood/metadata_least_likely.py +3 -3
- dataeval/detectors/ood/metadata_ood_mi.py +6 -5
- dataeval/detectors/ood/mixin.py +146 -0
- dataeval/detectors/ood/output.py +63 -0
- dataeval/interop.py +16 -3
- dataeval/log.py +18 -0
- dataeval/metrics/__init__.py +2 -2
- dataeval/metrics/bias/__init__.py +9 -12
- dataeval/metrics/bias/balance.py +10 -8
- dataeval/metrics/bias/coverage.py +52 -4
- dataeval/metrics/bias/diversity.py +42 -14
- dataeval/metrics/bias/parity.py +15 -12
- dataeval/metrics/estimators/__init__.py +2 -2
- dataeval/metrics/estimators/ber.py +3 -1
- dataeval/metrics/estimators/divergence.py +1 -1
- dataeval/metrics/estimators/uap.py +1 -1
- dataeval/metrics/stats/__init__.py +18 -18
- dataeval/metrics/stats/base.py +4 -4
- dataeval/metrics/stats/boxratiostats.py +8 -9
- dataeval/metrics/stats/datasetstats.py +10 -14
- dataeval/metrics/stats/dimensionstats.py +4 -4
- dataeval/metrics/stats/hashstats.py +12 -8
- dataeval/metrics/stats/labelstats.py +5 -5
- dataeval/metrics/stats/pixelstats.py +4 -9
- dataeval/metrics/stats/visualstats.py +4 -9
- dataeval/output.py +1 -1
- dataeval/utils/__init__.py +4 -13
- dataeval/utils/dataset/__init__.py +7 -0
- dataeval/utils/{torch → dataset}/datasets.py +2 -0
- dataeval/utils/dataset/read.py +63 -0
- dataeval/utils/dataset/split.py +527 -0
- dataeval/utils/image.py +2 -2
- dataeval/utils/metadata.py +310 -5
- dataeval/{metrics/bias/metadata_utils.py → utils/plot.py} +1 -104
- dataeval/utils/torch/__init__.py +2 -17
- dataeval/utils/torch/gmm.py +29 -6
- dataeval/utils/torch/{utils.py → internal.py} +82 -58
- dataeval/utils/torch/models.py +10 -8
- dataeval/utils/torch/trainer.py +6 -85
- dataeval/workflows/__init__.py +2 -5
- dataeval/workflows/sufficiency.py +16 -6
- dataeval-0.75.0.dist-info/METADATA +136 -0
- dataeval-0.75.0.dist-info/RECORD +67 -0
- dataeval/detectors/ood/base_torch.py +0 -109
- dataeval/metrics/bias/metadata_preprocessing.py +0 -285
- dataeval/utils/gmm.py +0 -26
- dataeval/utils/split_dataset.py +0 -492
- dataeval-0.74.1.dist-info/METADATA +0 -120
- dataeval-0.74.1.dist-info/RECORD +0 -65
- {dataeval-0.74.1.dist-info → dataeval-0.75.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.74.1.dist-info → dataeval-0.75.0.dist-info}/WHEEL +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
__all__ = [
|
3
|
+
__all__ = []
|
4
4
|
|
5
5
|
from collections import Counter, defaultdict
|
6
6
|
from dataclasses import dataclass
|
@@ -86,13 +86,13 @@ def labelstats(
|
|
86
86
|
|
87
87
|
>>> stats = labelstats(labels)
|
88
88
|
>>> stats.label_counts_per_class
|
89
|
-
{'chicken':
|
89
|
+
{'chicken': 12, 'cow': 5, 'horse': 4, 'pig': 7, 'sheep': 4}
|
90
90
|
>>> stats.label_counts_per_image
|
91
|
-
[3,
|
91
|
+
[3, 3, 5, 3, 2, 5, 5, 2, 2, 2]
|
92
92
|
>>> stats.image_counts_per_label
|
93
|
-
{'chicken':
|
93
|
+
{'chicken': 8, 'cow': 4, 'horse': 4, 'pig': 7, 'sheep': 4}
|
94
94
|
>>> (stats.image_count, stats.class_count, stats.label_count)
|
95
|
-
(10, 5,
|
95
|
+
(10, 5, 32)
|
96
96
|
"""
|
97
97
|
label_counts = Counter()
|
98
98
|
image_counts = Counter()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
__all__ = [
|
3
|
+
__all__ = []
|
4
4
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
from typing import Any, Callable, Iterable
|
@@ -106,15 +106,10 @@ def pixelstats(
|
|
106
106
|
--------
|
107
107
|
Calculating the statistics on the images, whose shape is (C, H, W)
|
108
108
|
|
109
|
-
>>> results = pixelstats(
|
109
|
+
>>> results = pixelstats(stats_images)
|
110
110
|
>>> print(results.mean)
|
111
|
-
[0.
|
112
|
-
0.292 0.3242 0.3562 0.3884 0.4204 0.4526 0.4846 0.5166 0.549
|
113
|
-
0.581 0.6133 0.6455 0.6772 0.7095 0.7417 0.774 0.8057 0.838
|
114
|
-
0.87 0.9023 0.934 ]
|
111
|
+
[0.2903 0.2108 0.397 0.596 0.743 ]
|
115
112
|
>>> print(results.entropy)
|
116
|
-
[
|
117
|
-
0.812 0.9883 0.795 0.9243 0.9243 0.795 0.9907 0.8125 1.028 0.8223
|
118
|
-
1.046 0.8247 1.041 0.8203 1.012 0.812 0.9883 0.795 0.9243 0.9243]
|
113
|
+
[4.99 2.371 1.179 2.406 0.668]
|
119
114
|
"""
|
120
115
|
return run_stats(images, bboxes, per_channel, [PixelStatsProcessor])[0]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
__all__ = [
|
3
|
+
__all__ = []
|
4
4
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
from typing import Any, Callable, Iterable
|
@@ -112,15 +112,10 @@ def visualstats(
|
|
112
112
|
--------
|
113
113
|
Calculating the :term:`statistics<Statistics>` on the images, whose shape is (C, H, W)
|
114
114
|
|
115
|
-
>>> results = visualstats(
|
115
|
+
>>> results = visualstats(stats_images)
|
116
116
|
>>> print(results.brightness)
|
117
|
-
[0.
|
118
|
-
0.3015 0.3347 0.3682 0.4014 0.4348 0.468 0.5015 0.5347 0.568
|
119
|
-
0.6016 0.635 0.668 0.701 0.735 0.768 0.8013 0.835 0.868
|
120
|
-
0.9014 0.9346 0.9683 ]
|
117
|
+
[0.1353 0.2085 0.4143 0.6084 0.8135]
|
121
118
|
>>> print(results.contrast)
|
122
|
-
[2.
|
123
|
-
1.258 1.257 1.257 1.256 1.256 1.255 1.255 1.255 1.255 1.254 1.254 1.254
|
124
|
-
1.254 1.254 1.254 1.253 1.253 1.253]
|
119
|
+
[2.04 1.331 1.261 1.279 1.253]
|
125
120
|
"""
|
126
121
|
return run_stats(images, bboxes, per_channel, [VisualStatsProcessor])[0]
|
dataeval/output.py
CHANGED
@@ -65,7 +65,7 @@ R = TypeVar("R", bound=Output)
|
|
65
65
|
|
66
66
|
|
67
67
|
def set_metadata(fn: Callable[P, R] | None = None, *, state: list[str] | None = None) -> Callable[P, R]:
|
68
|
-
"""Decorator to stamp
|
68
|
+
"""Decorator to stamp Output classes with runtime metadata"""
|
69
69
|
|
70
70
|
if fn is None:
|
71
71
|
return partial(set_metadata, state=state) # type: ignore
|
dataeval/utils/__init__.py
CHANGED
@@ -1,18 +1,9 @@
|
|
1
1
|
"""
|
2
2
|
The utility classes and functions are provided by DataEval to assist users
|
3
|
-
in setting up architectures that are guaranteed to work with applicable
|
4
|
-
|
3
|
+
in setting up data and architectures that are guaranteed to work with applicable
|
4
|
+
DataEval metrics.
|
5
5
|
"""
|
6
6
|
|
7
|
-
|
8
|
-
from dataeval.utils.metadata import merge_metadata
|
9
|
-
from dataeval.utils.split_dataset import split_dataset
|
7
|
+
__all__ = ["dataset", "metadata", "torch"]
|
10
8
|
|
11
|
-
|
12
|
-
|
13
|
-
if _IS_TORCH_AVAILABLE:
|
14
|
-
from dataeval.utils import torch
|
15
|
-
|
16
|
-
__all__ += ["torch"]
|
17
|
-
|
18
|
-
del _IS_TORCH_AVAILABLE
|
9
|
+
from dataeval.utils import dataset, metadata, torch
|
@@ -0,0 +1,7 @@
|
|
1
|
+
"""Provides utility functions for interacting with Computer Vision datasets."""
|
2
|
+
|
3
|
+
__all__ = ["datasets", "read_dataset", "SplitDatasetOutput", "split_dataset"]
|
4
|
+
|
5
|
+
from dataeval.utils.dataset import datasets
|
6
|
+
from dataeval.utils.dataset.read import read_dataset
|
7
|
+
from dataeval.utils.dataset.split import SplitDatasetOutput, split_dataset
|
@@ -0,0 +1,63 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
__all__ = []
|
4
|
+
|
5
|
+
from collections import defaultdict
|
6
|
+
from typing import Any
|
7
|
+
|
8
|
+
from torch.utils.data import Dataset
|
9
|
+
|
10
|
+
|
11
|
+
def read_dataset(dataset: Dataset[Any]) -> list[list[Any]]:
|
12
|
+
"""
|
13
|
+
Extract information from a dataset at each index into individual lists of each information position
|
14
|
+
|
15
|
+
Parameters
|
16
|
+
----------
|
17
|
+
dataset : torch.utils.data.Dataset
|
18
|
+
Input dataset
|
19
|
+
|
20
|
+
Returns
|
21
|
+
-------
|
22
|
+
List[List[Any]]
|
23
|
+
All objects in individual lists based on return position from dataset
|
24
|
+
|
25
|
+
Warning
|
26
|
+
-------
|
27
|
+
No type checking is done between lists or data inside lists
|
28
|
+
|
29
|
+
See Also
|
30
|
+
--------
|
31
|
+
torch.utils.data.Dataset
|
32
|
+
|
33
|
+
Examples
|
34
|
+
--------
|
35
|
+
>>> import numpy as np
|
36
|
+
>>> data = np.ones((10, 1, 3, 3))
|
37
|
+
>>> labels = np.ones((10,))
|
38
|
+
>>> class ICDataset:
|
39
|
+
... def __init__(self, data, labels):
|
40
|
+
... self.data = data
|
41
|
+
... self.labels = labels
|
42
|
+
...
|
43
|
+
... def __getitem__(self, idx):
|
44
|
+
... return self.data[idx], self.labels[idx]
|
45
|
+
|
46
|
+
>>> ds = ICDataset(data, labels)
|
47
|
+
|
48
|
+
>>> result = read_dataset(ds)
|
49
|
+
>>> len(result) # images and labels
|
50
|
+
2
|
51
|
+
>>> np.asarray(result[0]).shape # images
|
52
|
+
(10, 1, 3, 3)
|
53
|
+
>>> np.asarray(result[1]).shape # labels
|
54
|
+
(10,)
|
55
|
+
"""
|
56
|
+
|
57
|
+
ddict: dict[int, list[Any]] = defaultdict(list[Any])
|
58
|
+
|
59
|
+
for data in dataset:
|
60
|
+
for i, d in enumerate(data if isinstance(data, tuple) else (data,)):
|
61
|
+
ddict[i].append(d)
|
62
|
+
|
63
|
+
return list(ddict.values())
|