dataeval 0.72.1__py3-none-any.whl → 0.72.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +4 -4
- dataeval/detectors/__init__.py +4 -3
- dataeval/detectors/drift/__init__.py +9 -10
- dataeval/{_internal/detectors → detectors}/drift/base.py +39 -91
- dataeval/{_internal/detectors → detectors}/drift/cvm.py +4 -3
- dataeval/{_internal/detectors → detectors}/drift/ks.py +4 -3
- dataeval/{_internal/detectors → detectors}/drift/mmd.py +23 -25
- dataeval/{_internal/detectors → detectors}/drift/torch.py +13 -11
- dataeval/{_internal/detectors → detectors}/drift/uncertainty.py +7 -5
- dataeval/detectors/drift/updates.py +61 -0
- dataeval/detectors/linters/__init__.py +3 -3
- dataeval/{_internal/detectors → detectors/linters}/clusterer.py +41 -39
- dataeval/{_internal/detectors → detectors/linters}/duplicates.py +19 -9
- dataeval/{_internal/detectors → detectors/linters}/merged_stats.py +3 -1
- dataeval/{_internal/detectors → detectors/linters}/outliers.py +14 -21
- dataeval/detectors/ood/__init__.py +6 -6
- dataeval/{_internal/detectors → detectors}/ood/ae.py +7 -7
- dataeval/{_internal/detectors → detectors}/ood/aegmm.py +9 -29
- dataeval/{_internal/detectors → detectors}/ood/base.py +24 -18
- dataeval/{_internal/detectors → detectors}/ood/llr.py +24 -20
- dataeval/detectors/ood/metadata_ks_compare.py +99 -0
- dataeval/detectors/ood/metadata_least_likely.py +119 -0
- dataeval/detectors/ood/metadata_ood_mi.py +92 -0
- dataeval/{_internal/detectors → detectors}/ood/vae.py +10 -12
- dataeval/{_internal/detectors → detectors}/ood/vaegmm.py +10 -32
- dataeval/{_internal/interop.py → interop.py} +12 -7
- dataeval/metrics/__init__.py +1 -1
- dataeval/metrics/bias/__init__.py +4 -4
- dataeval/{_internal/metrics → metrics/bias}/balance.py +75 -9
- dataeval/{_internal/metrics → metrics/bias}/coverage.py +6 -4
- dataeval/{_internal/metrics → metrics/bias}/diversity.py +48 -14
- dataeval/metrics/bias/metadata.py +275 -0
- dataeval/{_internal/metrics → metrics/bias}/parity.py +12 -10
- dataeval/metrics/estimators/__init__.py +3 -3
- dataeval/{_internal/metrics → metrics/estimators}/ber.py +25 -22
- dataeval/{_internal/metrics → metrics/estimators}/divergence.py +11 -12
- dataeval/{_internal/metrics → metrics/estimators}/uap.py +5 -3
- dataeval/metrics/stats/__init__.py +7 -7
- dataeval/{_internal/metrics → metrics}/stats/base.py +59 -35
- dataeval/{_internal/metrics → metrics}/stats/boxratiostats.py +18 -14
- dataeval/{_internal/metrics → metrics}/stats/datasetstats.py +18 -16
- dataeval/{_internal/metrics → metrics}/stats/dimensionstats.py +9 -7
- dataeval/metrics/stats/hashstats.py +156 -0
- dataeval/{_internal/metrics → metrics}/stats/labelstats.py +5 -3
- dataeval/{_internal/metrics → metrics}/stats/pixelstats.py +9 -8
- dataeval/{_internal/metrics → metrics}/stats/visualstats.py +10 -9
- dataeval/{_internal/output.py → output.py} +26 -6
- dataeval/utils/__init__.py +7 -3
- dataeval/utils/image.py +71 -0
- dataeval/utils/shared.py +151 -0
- dataeval/{_internal → utils}/split_dataset.py +98 -33
- dataeval/utils/tensorflow/__init__.py +7 -6
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/autoencoder.py +60 -64
- dataeval/{_internal/models/tensorflow/losses.py → utils/tensorflow/_internal/loss.py} +9 -8
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/pixelcnn.py +16 -20
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/trainer.py +3 -1
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/utils.py +17 -17
- dataeval/utils/tensorflow/loss/__init__.py +6 -2
- dataeval/utils/torch/__init__.py +7 -3
- dataeval/{_internal/models/pytorch → utils/torch}/blocks.py +19 -14
- dataeval/{_internal → utils/torch}/datasets.py +48 -42
- dataeval/utils/torch/models.py +138 -0
- dataeval/{_internal/models/pytorch/autoencoder.py → utils/torch/trainer.py} +7 -136
- dataeval/{_internal → utils/torch}/utils.py +3 -1
- dataeval/workflows/__init__.py +1 -1
- dataeval/{_internal/workflows → workflows}/sufficiency.py +39 -34
- {dataeval-0.72.1.dist-info → dataeval-0.72.2.dist-info}/METADATA +2 -1
- dataeval-0.72.2.dist-info/RECORD +72 -0
- dataeval/_internal/detectors/__init__.py +0 -0
- dataeval/_internal/detectors/drift/__init__.py +0 -0
- dataeval/_internal/detectors/ood/__init__.py +0 -0
- dataeval/_internal/metrics/__init__.py +0 -0
- dataeval/_internal/metrics/stats/hashstats.py +0 -75
- dataeval/_internal/metrics/utils.py +0 -447
- dataeval/_internal/models/__init__.py +0 -0
- dataeval/_internal/models/pytorch/__init__.py +0 -0
- dataeval/_internal/models/pytorch/utils.py +0 -67
- dataeval/_internal/models/tensorflow/__init__.py +0 -0
- dataeval/_internal/workflows/__init__.py +0 -0
- dataeval/detectors/drift/kernels/__init__.py +0 -10
- dataeval/detectors/drift/updates/__init__.py +0 -8
- dataeval/utils/tensorflow/models/__init__.py +0 -9
- dataeval/utils/tensorflow/recon/__init__.py +0 -3
- dataeval/utils/torch/datasets/__init__.py +0 -12
- dataeval/utils/torch/models/__init__.py +0 -11
- dataeval/utils/torch/trainer/__init__.py +0 -7
- dataeval-0.72.1.dist-info/RECORD +0 -81
- /dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/gmm.py +0 -0
- {dataeval-0.72.1.dist-info → dataeval-0.72.2.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.72.1.dist-info → dataeval-0.72.2.dist-info}/WHEEL +0 -0
@@ -9,6 +9,8 @@ https://arxiv.org/abs/1811.06419
|
|
9
9
|
|
10
10
|
from __future__ import annotations
|
11
11
|
|
12
|
+
__all__ = ["BEROutput", "ber"]
|
13
|
+
|
12
14
|
from dataclasses import dataclass
|
13
15
|
from typing import Literal
|
14
16
|
|
@@ -17,9 +19,9 @@ from numpy.typing import ArrayLike, NDArray
|
|
17
19
|
from scipy.sparse import coo_matrix
|
18
20
|
from scipy.stats import mode
|
19
21
|
|
20
|
-
from dataeval.
|
21
|
-
from dataeval.
|
22
|
-
from dataeval.
|
22
|
+
from dataeval.interop import as_numpy
|
23
|
+
from dataeval.output import OutputMetadata, set_metadata
|
24
|
+
from dataeval.utils.shared import compute_neighbors, get_classes_counts, get_method, minimum_spanning_tree
|
23
25
|
|
24
26
|
|
25
27
|
@dataclass(frozen=True)
|
@@ -39,51 +41,55 @@ class BEROutput(OutputMetadata):
|
|
39
41
|
ber_lower: float
|
40
42
|
|
41
43
|
|
42
|
-
def ber_mst(
|
44
|
+
def ber_mst(images: NDArray[np.float64], labels: NDArray[np.int_], k: int = 1) -> tuple[float, float]:
|
43
45
|
"""Calculates the :term:`Bayes error rate<Bayes Error Rate (BER)>` using a minimum spanning tree
|
44
46
|
|
45
47
|
Parameters
|
46
48
|
----------
|
47
|
-
|
49
|
+
images : NDArray, shape - (N, ... )
|
48
50
|
n_samples containing n_features
|
49
|
-
|
51
|
+
labels : NDArray, shape - (N, 1)
|
50
52
|
Labels corresponding to each sample
|
53
|
+
k : int
|
54
|
+
Unused
|
51
55
|
|
52
56
|
Returns
|
53
57
|
-------
|
54
58
|
Tuple[float, float]
|
55
59
|
The upper and lower bounds of the bayes error rate
|
56
60
|
"""
|
57
|
-
M, N = get_classes_counts(
|
61
|
+
M, N = get_classes_counts(labels)
|
58
62
|
|
59
|
-
tree = coo_matrix(minimum_spanning_tree(
|
60
|
-
matches = np.sum([
|
63
|
+
tree = coo_matrix(minimum_spanning_tree(images))
|
64
|
+
matches = np.sum([labels[tree.row[i]] != labels[tree.col[i]] for i in range(N - 1)])
|
61
65
|
deltas = matches / (2 * N)
|
62
66
|
upper = 2 * deltas
|
63
67
|
lower = ((M - 1) / (M)) * (1 - max(1 - 2 * ((M) / (M - 1)) * deltas, 0) ** 0.5)
|
64
68
|
return upper, lower
|
65
69
|
|
66
70
|
|
67
|
-
def ber_knn(
|
71
|
+
def ber_knn(images: NDArray[np.float64], labels: NDArray[np.int_], k: int) -> tuple[float, float]:
|
68
72
|
"""Calculates the :term:`Bayes error rate<Bayes Error Rate (BER)>` using K-nearest neighbors
|
69
73
|
|
70
74
|
Parameters
|
71
75
|
----------
|
72
|
-
|
76
|
+
images : NDArray, shape - (N, ... )
|
73
77
|
n_samples containing n_features
|
74
|
-
|
78
|
+
labels : NDArray, shape - (N, 1)
|
75
79
|
Labels corresponding to each sample
|
80
|
+
k : int
|
81
|
+
The number of neighbors to find
|
76
82
|
|
77
83
|
Returns
|
78
84
|
-------
|
79
85
|
Tuple[float, float]
|
80
86
|
The upper and lower bounds of the bayes error rate
|
81
87
|
"""
|
82
|
-
M, N = get_classes_counts(
|
83
|
-
nn_indices = compute_neighbors(
|
88
|
+
M, N = get_classes_counts(labels)
|
89
|
+
nn_indices = compute_neighbors(images, images, k=k)
|
84
90
|
nn_indices = np.expand_dims(nn_indices, axis=1) if nn_indices.ndim == 1 else nn_indices
|
85
|
-
modal_class = mode(
|
86
|
-
upper = float(np.count_nonzero(modal_class -
|
91
|
+
modal_class = mode(labels[nn_indices], axis=1, keepdims=True).mode.squeeze()
|
92
|
+
upper = float(np.count_nonzero(modal_class - labels) / N)
|
87
93
|
lower = knn_lowerbound(upper, M, k)
|
88
94
|
return upper, lower
|
89
95
|
|
@@ -108,10 +114,7 @@ def knn_lowerbound(value: float, classes: int, k: int) -> float:
|
|
108
114
|
return ((classes - 1) / classes) * (1 - np.sqrt(max(0, 1 - ((classes / (classes - 1)) * value))))
|
109
115
|
|
110
116
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
@set_metadata("dataeval.metrics")
|
117
|
+
@set_metadata()
|
115
118
|
def ber(images: ArrayLike, labels: ArrayLike, k: int = 1, method: Literal["KNN", "MST"] = "KNN") -> BEROutput:
|
116
119
|
"""
|
117
120
|
An estimator for Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` using FR or KNN test statistic basis
|
@@ -146,8 +149,8 @@ def ber(images: ArrayLike, labels: ArrayLike, k: int = 1, method: Literal["KNN",
|
|
146
149
|
>>> ber(images, labels)
|
147
150
|
BEROutput(ber=0.04, ber_lower=0.020416847668728033)
|
148
151
|
"""
|
149
|
-
ber_fn = get_method(
|
152
|
+
ber_fn = get_method({"KNN": ber_knn, "MST": ber_mst}, method)
|
150
153
|
X = as_numpy(images)
|
151
154
|
y = as_numpy(labels)
|
152
|
-
upper, lower = ber_fn(X, y, k)
|
155
|
+
upper, lower = ber_fn(X, y, k)
|
153
156
|
return BEROutput(upper, lower)
|
@@ -5,15 +5,17 @@ using the Fast Nearest Neighbor and Minimum Spanning Tree algorithms
|
|
5
5
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
+
__all__ = ["DivergenceOutput", "divergence"]
|
9
|
+
|
8
10
|
from dataclasses import dataclass
|
9
11
|
from typing import Literal
|
10
12
|
|
11
13
|
import numpy as np
|
12
14
|
from numpy.typing import ArrayLike, NDArray
|
13
15
|
|
14
|
-
from dataeval.
|
15
|
-
from dataeval.
|
16
|
-
from dataeval.
|
16
|
+
from dataeval.interop import as_numpy
|
17
|
+
from dataeval.output import OutputMetadata, set_metadata
|
18
|
+
from dataeval.utils.shared import compute_neighbors, get_method, minimum_spanning_tree
|
17
19
|
|
18
20
|
|
19
21
|
@dataclass(frozen=True)
|
@@ -33,7 +35,7 @@ class DivergenceOutput(OutputMetadata):
|
|
33
35
|
errors: int
|
34
36
|
|
35
37
|
|
36
|
-
def divergence_mst(data: NDArray, labels: NDArray) -> int:
|
38
|
+
def divergence_mst(data: NDArray[np.float64], labels: NDArray[np.int_]) -> int:
|
37
39
|
"""
|
38
40
|
Calculates the estimated label errors based on the minimum spanning tree
|
39
41
|
|
@@ -55,7 +57,7 @@ def divergence_mst(data: NDArray, labels: NDArray) -> int:
|
|
55
57
|
return errors
|
56
58
|
|
57
59
|
|
58
|
-
def divergence_fnn(data: NDArray, labels: NDArray) -> int:
|
60
|
+
def divergence_fnn(data: NDArray[np.float64], labels: NDArray[np.int_]) -> int:
|
59
61
|
"""
|
60
62
|
Calculates the estimated label errors based on their nearest neighbors
|
61
63
|
|
@@ -76,10 +78,7 @@ def divergence_fnn(data: NDArray, labels: NDArray) -> int:
|
|
76
78
|
return errors
|
77
79
|
|
78
80
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
@set_metadata("dataeval.metrics")
|
81
|
+
@set_metadata()
|
83
82
|
def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST"] = "FNN") -> DivergenceOutput:
|
84
83
|
"""
|
85
84
|
Calculates the :term`divergence` and any errors between the datasets
|
@@ -124,16 +123,16 @@ def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST
|
|
124
123
|
Evaluate the datasets:
|
125
124
|
|
126
125
|
>>> divergence(datasetA, datasetB)
|
127
|
-
DivergenceOutput(divergence=0.28, errors=36
|
126
|
+
DivergenceOutput(divergence=0.28, errors=36)
|
128
127
|
"""
|
129
|
-
div_fn = get_method(
|
128
|
+
div_fn = get_method({"FNN": divergence_fnn, "MST": divergence_mst}, method)
|
130
129
|
a = as_numpy(data_a)
|
131
130
|
b = as_numpy(data_b)
|
132
131
|
N = a.shape[0]
|
133
132
|
M = b.shape[0]
|
134
133
|
|
135
134
|
stacked_data = np.vstack((a, b))
|
136
|
-
labels = np.vstack([np.zeros([N, 1]), np.ones([M, 1])])
|
135
|
+
labels = np.vstack([np.zeros([N, 1], dtype=np.int_), np.ones([M, 1], dtype=np.int_)])
|
137
136
|
|
138
137
|
errors = div_fn(stacked_data, labels)
|
139
138
|
dp = max(0.0, 1 - ((M + N) / (2 * M * N)) * errors)
|
@@ -6,13 +6,15 @@ average precision<Upper-Bound Average Precision (UAP)>` using empirical mean pre
|
|
6
6
|
|
7
7
|
from __future__ import annotations
|
8
8
|
|
9
|
+
__all__ = ["UAPOutput", "uap"]
|
10
|
+
|
9
11
|
from dataclasses import dataclass
|
10
12
|
|
11
13
|
from numpy.typing import ArrayLike
|
12
14
|
from sklearn.metrics import average_precision_score
|
13
15
|
|
14
|
-
from dataeval.
|
15
|
-
from dataeval.
|
16
|
+
from dataeval.interop import as_numpy
|
17
|
+
from dataeval.output import OutputMetadata, set_metadata
|
16
18
|
|
17
19
|
|
18
20
|
@dataclass(frozen=True)
|
@@ -29,7 +31,7 @@ class UAPOutput(OutputMetadata):
|
|
29
31
|
uap: float
|
30
32
|
|
31
33
|
|
32
|
-
@set_metadata(
|
34
|
+
@set_metadata()
|
33
35
|
def uap(labels: ArrayLike, scores: ArrayLike) -> UAPOutput:
|
34
36
|
"""
|
35
37
|
FR Test Statistic based estimate of the empirical mean precision for
|
@@ -3,18 +3,18 @@ Statistics metrics calculate a variety of image properties and pixel statistics
|
|
3
3
|
and label statistics against the images and labels of a dataset.
|
4
4
|
"""
|
5
5
|
|
6
|
-
from dataeval.
|
7
|
-
from dataeval.
|
6
|
+
from dataeval.metrics.stats.boxratiostats import boxratiostats
|
7
|
+
from dataeval.metrics.stats.datasetstats import (
|
8
8
|
ChannelStatsOutput,
|
9
9
|
DatasetStatsOutput,
|
10
10
|
channelstats,
|
11
11
|
datasetstats,
|
12
12
|
)
|
13
|
-
from dataeval.
|
14
|
-
from dataeval.
|
15
|
-
from dataeval.
|
16
|
-
from dataeval.
|
17
|
-
from dataeval.
|
13
|
+
from dataeval.metrics.stats.dimensionstats import DimensionStatsOutput, dimensionstats
|
14
|
+
from dataeval.metrics.stats.hashstats import HashStatsOutput, hashstats
|
15
|
+
from dataeval.metrics.stats.labelstats import LabelStatsOutput, labelstats
|
16
|
+
from dataeval.metrics.stats.pixelstats import PixelStatsOutput, pixelstats
|
17
|
+
from dataeval.metrics.stats.visualstats import VisualStatsOutput, visualstats
|
18
18
|
|
19
19
|
__all__ = [
|
20
20
|
"boxratiostats",
|
@@ -1,5 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
__all__ = []
|
4
|
+
|
3
5
|
import re
|
4
6
|
import warnings
|
5
7
|
from dataclasses import dataclass
|
@@ -12,14 +14,17 @@ import numpy as np
|
|
12
14
|
import tqdm
|
13
15
|
from numpy.typing import ArrayLike, NDArray
|
14
16
|
|
15
|
-
from dataeval.
|
16
|
-
from dataeval.
|
17
|
-
from dataeval.
|
17
|
+
from dataeval.interop import to_numpy_iter
|
18
|
+
from dataeval.output import OutputMetadata
|
19
|
+
from dataeval.utils.image import normalize_image_shape, rescale
|
18
20
|
|
19
21
|
DTYPE_REGEX = re.compile(r"NDArray\[np\.(.*?)\]")
|
20
22
|
SOURCE_INDEX = "source_index"
|
21
23
|
BOX_COUNT = "box_count"
|
22
24
|
|
25
|
+
# TODO: Replace with global config
|
26
|
+
DEFAULT_PROCESSES: int | None = None
|
27
|
+
|
23
28
|
OptionalRange = Optional[Union[int, Iterable[int]]]
|
24
29
|
|
25
30
|
|
@@ -29,6 +34,19 @@ def matches(index: int | None, opt_range: OptionalRange) -> bool:
|
|
29
34
|
return index in opt_range if isinstance(opt_range, Iterable) else index == opt_range
|
30
35
|
|
31
36
|
|
37
|
+
def normalize_box_shape(bounding_box: NDArray[Any]) -> NDArray[Any]:
|
38
|
+
"""
|
39
|
+
Normalizes the bounding box shape into (N,4).
|
40
|
+
"""
|
41
|
+
ndim = bounding_box.ndim
|
42
|
+
if ndim == 1:
|
43
|
+
return np.expand_dims(bounding_box, axis=0)
|
44
|
+
elif ndim > 2:
|
45
|
+
raise ValueError("Bounding boxes must have 2 dimensions: (# of boxes in an image, [X,Y,W,H]) -> (N,4)")
|
46
|
+
else:
|
47
|
+
return bounding_box
|
48
|
+
|
49
|
+
|
32
50
|
class SourceIndex(NamedTuple):
|
33
51
|
"""
|
34
52
|
Attributes
|
@@ -101,39 +119,39 @@ TStatsOutput = TypeVar("TStatsOutput", bound=BaseStatsOutput, covariant=True)
|
|
101
119
|
class StatsProcessor(Generic[TStatsOutput]):
|
102
120
|
output_class: type[TStatsOutput]
|
103
121
|
cache_keys: list[str] = []
|
104
|
-
image_function_map: dict[str, Callable[[StatsProcessor], Any]] = {}
|
105
|
-
channel_function_map: dict[str, Callable[[StatsProcessor], Any]] = {}
|
122
|
+
image_function_map: dict[str, Callable[[StatsProcessor[TStatsOutput]], Any]] = {}
|
123
|
+
channel_function_map: dict[str, Callable[[StatsProcessor[TStatsOutput]], Any]] = {}
|
106
124
|
|
107
|
-
def __init__(self, image: NDArray, box: NDArray | None, per_channel: bool):
|
125
|
+
def __init__(self, image: NDArray[Any], box: NDArray[Any] | None, per_channel: bool) -> None:
|
108
126
|
self.raw = image
|
109
|
-
self.width = image.shape[-1]
|
110
|
-
self.height = image.shape[-2]
|
111
|
-
self.box = np.array([0, 0, self.width, self.height]) if box is None else box
|
112
|
-
self.
|
127
|
+
self.width: int = image.shape[-1]
|
128
|
+
self.height: int = image.shape[-2]
|
129
|
+
self.box: NDArray[Any] = np.array([0, 0, self.width, self.height]) if box is None else box
|
130
|
+
self._per_channel = per_channel
|
113
131
|
self._image = None
|
114
132
|
self._shape = None
|
115
133
|
self._scaled = None
|
116
|
-
self.
|
117
|
-
self.
|
118
|
-
self.
|
134
|
+
self._cache = {}
|
135
|
+
self._fn_map = self.channel_function_map if per_channel else self.image_function_map
|
136
|
+
self._is_valid_slice = box is None or bool(
|
119
137
|
box[0] >= 0 and box[1] >= 0 and box[2] <= image.shape[-1] and box[3] <= image.shape[-2]
|
120
138
|
)
|
121
139
|
|
122
|
-
def get(self, fn_key: str) -> NDArray:
|
140
|
+
def get(self, fn_key: str) -> NDArray[Any]:
|
123
141
|
if fn_key in self.cache_keys:
|
124
|
-
if fn_key not in self.
|
125
|
-
self.
|
126
|
-
return self.
|
142
|
+
if fn_key not in self._cache:
|
143
|
+
self._cache[fn_key] = self._fn_map[fn_key](self)
|
144
|
+
return self._cache[fn_key]
|
127
145
|
else:
|
128
|
-
return self.
|
146
|
+
return self._fn_map[fn_key](self)
|
129
147
|
|
130
|
-
def process(self) -> dict:
|
131
|
-
return {k: self.
|
148
|
+
def process(self) -> dict[str, Any]:
|
149
|
+
return {k: self._fn_map[k](self) for k in self._fn_map}
|
132
150
|
|
133
151
|
@property
|
134
|
-
def image(self) -> NDArray:
|
152
|
+
def image(self) -> NDArray[Any]:
|
135
153
|
if self._image is None:
|
136
|
-
if self.
|
154
|
+
if self._is_valid_slice:
|
137
155
|
norm = normalize_image_shape(self.raw)
|
138
156
|
self._image = norm[:, self.box[1] : self.box[3], self.box[0] : self.box[2]]
|
139
157
|
else:
|
@@ -141,16 +159,16 @@ class StatsProcessor(Generic[TStatsOutput]):
|
|
141
159
|
return self._image
|
142
160
|
|
143
161
|
@property
|
144
|
-
def shape(self) -> tuple:
|
162
|
+
def shape(self) -> tuple[int, ...]:
|
145
163
|
if self._shape is None:
|
146
164
|
self._shape = self.image.shape
|
147
165
|
return self._shape
|
148
166
|
|
149
167
|
@property
|
150
|
-
def scaled(self) -> NDArray:
|
168
|
+
def scaled(self) -> NDArray[Any]:
|
151
169
|
if self._scaled is None:
|
152
170
|
self._scaled = rescale(self.image)
|
153
|
-
if self.
|
171
|
+
if self._per_channel:
|
154
172
|
self._scaled = self._scaled.reshape(self.image.shape[0], -1)
|
155
173
|
return self._scaled
|
156
174
|
|
@@ -175,25 +193,25 @@ class StatsProcessorOutput(NamedTuple):
|
|
175
193
|
results: list[dict[str, Any]]
|
176
194
|
source_indices: list[SourceIndex]
|
177
195
|
box_counts: list[int]
|
178
|
-
warnings_list: list[tuple[int, int, NDArray, tuple[int, ...]]]
|
196
|
+
warnings_list: list[tuple[int, int, NDArray[np.float64], tuple[int, ...]]]
|
179
197
|
|
180
198
|
|
181
199
|
def process_stats(
|
182
200
|
i: int,
|
183
|
-
image_boxes: tuple[NDArray, NDArray | None],
|
201
|
+
image_boxes: tuple[NDArray[Any], NDArray[Any] | None],
|
184
202
|
per_channel: bool,
|
185
|
-
stats_processor_cls: Iterable[type[StatsProcessor]],
|
203
|
+
stats_processor_cls: Iterable[type[StatsProcessor[TStatsOutput]]],
|
186
204
|
) -> StatsProcessorOutput:
|
187
205
|
image, boxes = image_boxes
|
188
206
|
results_list: list[dict[str, Any]] = []
|
189
207
|
source_indices: list[SourceIndex] = []
|
190
208
|
box_counts: list[int] = []
|
191
|
-
warnings_list: list[tuple[int, int, NDArray, tuple[int, ...]]] = []
|
209
|
+
warnings_list: list[tuple[int, int, NDArray[np.float64], tuple[int, ...]]] = []
|
192
210
|
nboxes = [None] if boxes is None else normalize_box_shape(boxes)
|
193
211
|
for i_b, box in enumerate(nboxes):
|
194
212
|
i_b = None if box is None else i_b
|
195
213
|
processor_list = [p(image, box, per_channel) for p in stats_processor_cls]
|
196
|
-
if any(not p.
|
214
|
+
if any(not p._is_valid_slice for p in processor_list) and i_b is not None and box is not None:
|
197
215
|
warnings_list.append((i, i_b, box, image.shape))
|
198
216
|
results_list.append({k: v for p in processor_list for k, v in p.process().items()})
|
199
217
|
if per_channel:
|
@@ -204,7 +222,11 @@ def process_stats(
|
|
204
222
|
return StatsProcessorOutput(results_list, source_indices, box_counts, warnings_list)
|
205
223
|
|
206
224
|
|
207
|
-
def process_stats_unpack(
|
225
|
+
def process_stats_unpack(
|
226
|
+
args: tuple[int, tuple[NDArray[Any], NDArray[Any] | None]],
|
227
|
+
per_channel: bool,
|
228
|
+
stats_processor_cls: Iterable[type[StatsProcessor[TStatsOutput]]],
|
229
|
+
) -> StatsProcessorOutput:
|
208
230
|
return process_stats(*args, per_channel=per_channel, stats_processor_cls=stats_processor_cls)
|
209
231
|
|
210
232
|
|
@@ -234,6 +256,8 @@ def run_stats(
|
|
234
256
|
A flag which determines if the states should be evaluated on a per-channel basis or not.
|
235
257
|
stats_processor_cls : Iterable[type[StatsProcessor]]
|
236
258
|
An iterable of stats processor classes that calculate stats and return output classes.
|
259
|
+
processes : int | None, default None
|
260
|
+
Number of processes to use, defaults to None which uses all available CPU cores.
|
237
261
|
|
238
262
|
Returns
|
239
263
|
-------
|
@@ -251,9 +275,9 @@ def run_stats(
|
|
251
275
|
- For statistics like histograms and entropy, intermediate results may
|
252
276
|
be reused to avoid redundant computation.
|
253
277
|
"""
|
254
|
-
results_list: list[dict[str, NDArray]] = []
|
255
|
-
source_index = []
|
256
|
-
box_count = []
|
278
|
+
results_list: list[dict[str, NDArray[np.float64]]] = []
|
279
|
+
source_index: list[SourceIndex] = []
|
280
|
+
box_count: list[int] = []
|
257
281
|
bbox_iter = repeat(None) if bboxes is None else to_numpy_iter(bboxes)
|
258
282
|
|
259
283
|
warning_list = []
|
@@ -261,7 +285,7 @@ def run_stats(
|
|
261
285
|
stats_processor_cls = stats_processor_cls if isinstance(stats_processor_cls, Iterable) else [stats_processor_cls]
|
262
286
|
|
263
287
|
# TODO: Introduce global controls for CPU job parallelism and GPU configurations
|
264
|
-
with Pool(
|
288
|
+
with Pool(processes=DEFAULT_PROCESSES) as p:
|
265
289
|
for r in tqdm.tqdm(
|
266
290
|
p.imap(
|
267
291
|
partial(process_stats_unpack, per_channel=per_channel, stats_processor_cls=stats_processor_cls),
|
@@ -1,14 +1,16 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
__all__ = ["boxratiostats"]
|
4
|
+
|
3
5
|
import copy
|
4
|
-
from typing import Callable, Generic, TypeVar, cast
|
6
|
+
from typing import Any, Callable, Generic, TypeVar, cast
|
5
7
|
|
6
8
|
import numpy as np
|
7
9
|
from numpy.typing import NDArray
|
8
10
|
|
9
|
-
from dataeval.
|
10
|
-
from dataeval.
|
11
|
-
from dataeval.
|
11
|
+
from dataeval.metrics.stats.base import BOX_COUNT, SOURCE_INDEX, BaseStatsOutput
|
12
|
+
from dataeval.metrics.stats.dimensionstats import DimensionStatsOutput
|
13
|
+
from dataeval.output import set_metadata
|
12
14
|
|
13
15
|
TStatOutput = TypeVar("TStatOutput", bound=BaseStatsOutput, contravariant=True)
|
14
16
|
ArraySlice = tuple[int, int]
|
@@ -39,14 +41,16 @@ class BoxImageStatsOutputSlice(Generic[TStatOutput]):
|
|
39
41
|
self.img = self.StatSlicer(img_stats, img_slice)
|
40
42
|
|
41
43
|
|
42
|
-
RATIOSTATS_OVERRIDE_MAP: dict[type, dict[str, Callable[
|
43
|
-
DimensionStatsOutput:
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
44
|
+
RATIOSTATS_OVERRIDE_MAP: dict[type, dict[str, Callable[..., NDArray[Any]]]] = {
|
45
|
+
DimensionStatsOutput: dict[str, Callable[[BoxImageStatsOutputSlice[DimensionStatsOutput]], NDArray[Any]]](
|
46
|
+
{
|
47
|
+
"left": lambda x: x.box["left"] / x.img["width"],
|
48
|
+
"top": lambda x: x.box["top"] / x.img["height"],
|
49
|
+
"channels": lambda x: x.box["channels"],
|
50
|
+
"depth": lambda x: x.box["depth"],
|
51
|
+
"distance": lambda x: x.box["distance"],
|
52
|
+
}
|
53
|
+
)
|
50
54
|
}
|
51
55
|
|
52
56
|
|
@@ -60,7 +64,7 @@ def get_index_map(stats: BaseStatsOutput) -> list[int]:
|
|
60
64
|
return index_map
|
61
65
|
|
62
66
|
|
63
|
-
def calculate_ratios(key: str, box_stats: BaseStatsOutput, img_stats: BaseStatsOutput) -> NDArray:
|
67
|
+
def calculate_ratios(key: str, box_stats: BaseStatsOutput, img_stats: BaseStatsOutput) -> NDArray[np.float64]:
|
64
68
|
if not hasattr(box_stats, key) or not hasattr(img_stats, key):
|
65
69
|
raise KeyError("Invalid key for provided stats output object.")
|
66
70
|
|
@@ -92,7 +96,7 @@ def calculate_ratios(key: str, box_stats: BaseStatsOutput, img_stats: BaseStatsO
|
|
92
96
|
return out_stats
|
93
97
|
|
94
98
|
|
95
|
-
@set_metadata(
|
99
|
+
@set_metadata()
|
96
100
|
def boxratiostats(
|
97
101
|
boxstats: TStatOutput,
|
98
102
|
imgstats: TStatOutput,
|
@@ -1,19 +1,21 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
__all__ = ["DatasetStatsOutput", "ChannelStatsOutput", "datasetstats", "channelstats"]
|
4
|
+
|
3
5
|
from dataclasses import dataclass
|
4
6
|
from typing import Any, Iterable
|
5
7
|
|
6
8
|
from numpy.typing import ArrayLike
|
7
9
|
|
8
|
-
from dataeval.
|
9
|
-
from dataeval.
|
10
|
+
from dataeval.metrics.stats.base import BaseStatsOutput, run_stats
|
11
|
+
from dataeval.metrics.stats.dimensionstats import (
|
10
12
|
DimensionStatsOutput,
|
11
13
|
DimensionStatsProcessor,
|
12
14
|
)
|
13
|
-
from dataeval.
|
14
|
-
from dataeval.
|
15
|
-
from dataeval.
|
16
|
-
from dataeval.
|
15
|
+
from dataeval.metrics.stats.labelstats import LabelStatsOutput, labelstats
|
16
|
+
from dataeval.metrics.stats.pixelstats import PixelStatsOutput, PixelStatsProcessor
|
17
|
+
from dataeval.metrics.stats.visualstats import VisualStatsOutput, VisualStatsProcessor
|
18
|
+
from dataeval.output import OutputMetadata, set_metadata
|
17
19
|
|
18
20
|
|
19
21
|
@dataclass(frozen=True)
|
@@ -39,14 +41,14 @@ class DatasetStatsOutput(OutputMetadata):
|
|
39
41
|
visualstats: VisualStatsOutput
|
40
42
|
labelstats: LabelStatsOutput | None = None
|
41
43
|
|
42
|
-
def
|
44
|
+
def _outputs(self) -> list[OutputMetadata]:
|
43
45
|
return [s for s in (self.dimensionstats, self.pixelstats, self.visualstats, self.labelstats) if s is not None]
|
44
46
|
|
45
47
|
def dict(self) -> dict[str, Any]:
|
46
|
-
return {k: v for o in self.
|
48
|
+
return {k: v for o in self._outputs() for k, v in o.dict().items()}
|
47
49
|
|
48
|
-
def __post_init__(self):
|
49
|
-
lengths = [len(s) for s in self.
|
50
|
+
def __post_init__(self) -> None:
|
51
|
+
lengths = [len(s) for s in self._outputs() if isinstance(s, BaseStatsOutput)]
|
50
52
|
if not all(length == lengths[0] for length in lengths):
|
51
53
|
raise ValueError("All StatsOutput classes must contain the same number of image sources.")
|
52
54
|
|
@@ -70,19 +72,19 @@ class ChannelStatsOutput(OutputMetadata):
|
|
70
72
|
pixelstats: PixelStatsOutput
|
71
73
|
visualstats: VisualStatsOutput
|
72
74
|
|
73
|
-
def
|
74
|
-
return
|
75
|
+
def _outputs(self) -> tuple[PixelStatsOutput, VisualStatsOutput]:
|
76
|
+
return (self.pixelstats, self.visualstats)
|
75
77
|
|
76
78
|
def dict(self) -> dict[str, Any]:
|
77
79
|
return {**self.pixelstats.dict(), **self.visualstats.dict()}
|
78
80
|
|
79
|
-
def __post_init__(self):
|
80
|
-
lengths = [len(s) for s in self.
|
81
|
+
def __post_init__(self) -> None:
|
82
|
+
lengths = [len(s) for s in self._outputs()]
|
81
83
|
if not all(length == lengths[0] for length in lengths):
|
82
84
|
raise ValueError("All StatsOutput classes must contain the same number of image sources.")
|
83
85
|
|
84
86
|
|
85
|
-
@set_metadata(
|
87
|
+
@set_metadata()
|
86
88
|
def datasetstats(
|
87
89
|
images: Iterable[ArrayLike],
|
88
90
|
bboxes: Iterable[ArrayLike] | None = None,
|
@@ -129,7 +131,7 @@ def datasetstats(
|
|
129
131
|
return DatasetStatsOutput(*outputs, labelstats=labelstats(labels) if labels else None) # type: ignore
|
130
132
|
|
131
133
|
|
132
|
-
@set_metadata(
|
134
|
+
@set_metadata()
|
133
135
|
def channelstats(
|
134
136
|
images: Iterable[ArrayLike],
|
135
137
|
bboxes: Iterable[ArrayLike] | None = None,
|
@@ -1,14 +1,16 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
__all__ = ["DimensionStatsOutput", "dimensionstats"]
|
4
|
+
|
3
5
|
from dataclasses import dataclass
|
4
|
-
from typing import Iterable
|
6
|
+
from typing import Any, Callable, Iterable
|
5
7
|
|
6
8
|
import numpy as np
|
7
9
|
from numpy.typing import ArrayLike, NDArray
|
8
10
|
|
9
|
-
from dataeval.
|
10
|
-
from dataeval.
|
11
|
-
from dataeval.
|
11
|
+
from dataeval.metrics.stats.base import BaseStatsOutput, StatsProcessor, run_stats
|
12
|
+
from dataeval.output import set_metadata
|
13
|
+
from dataeval.utils.image import get_bitdepth
|
12
14
|
|
13
15
|
|
14
16
|
@dataclass(frozen=True)
|
@@ -53,8 +55,8 @@ class DimensionStatsOutput(BaseStatsOutput):
|
|
53
55
|
|
54
56
|
|
55
57
|
class DimensionStatsProcessor(StatsProcessor[DimensionStatsOutput]):
|
56
|
-
output_class = DimensionStatsOutput
|
57
|
-
image_function_map = {
|
58
|
+
output_class: type = DimensionStatsOutput
|
59
|
+
image_function_map: dict[str, Callable[[StatsProcessor[DimensionStatsOutput]], Any]] = {
|
58
60
|
"left": lambda x: x.box[0],
|
59
61
|
"top": lambda x: x.box[1],
|
60
62
|
"width": lambda x: x.box[2] - x.box[0],
|
@@ -71,7 +73,7 @@ class DimensionStatsProcessor(StatsProcessor[DimensionStatsOutput]):
|
|
71
73
|
}
|
72
74
|
|
73
75
|
|
74
|
-
@set_metadata(
|
76
|
+
@set_metadata()
|
75
77
|
def dimensionstats(
|
76
78
|
images: Iterable[ArrayLike],
|
77
79
|
bboxes: Iterable[ArrayLike] | None = None,
|