dataeval 0.64.0__py3-none-any.whl → 0.65.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +2 -2
- dataeval/_internal/detectors/clusterer.py +46 -34
- dataeval/_internal/detectors/drift/base.py +52 -35
- dataeval/_internal/detectors/drift/cvm.py +4 -4
- dataeval/_internal/detectors/drift/ks.py +6 -6
- dataeval/_internal/detectors/drift/mmd.py +35 -16
- dataeval/_internal/detectors/drift/torch.py +6 -5
- dataeval/_internal/detectors/drift/uncertainty.py +7 -7
- dataeval/_internal/detectors/duplicates.py +55 -29
- dataeval/_internal/detectors/linter.py +40 -24
- dataeval/_internal/detectors/ood/base.py +36 -15
- dataeval/_internal/detectors/ood/llr.py +7 -7
- dataeval/_internal/flags.py +42 -21
- dataeval/_internal/interop.py +2 -2
- dataeval/_internal/metrics/balance.py +10 -2
- dataeval/_internal/metrics/ber.py +6 -5
- dataeval/_internal/metrics/coverage.py +15 -8
- dataeval/_internal/metrics/divergence.py +41 -7
- dataeval/_internal/metrics/diversity.py +17 -12
- dataeval/_internal/metrics/parity.py +30 -43
- dataeval/_internal/metrics/stats.py +196 -317
- dataeval/_internal/metrics/uap.py +5 -2
- dataeval/_internal/metrics/utils.py +70 -33
- dataeval/_internal/models/tensorflow/losses.py +3 -3
- dataeval/_internal/models/tensorflow/trainer.py +3 -2
- dataeval/_internal/models/tensorflow/utils.py +4 -3
- dataeval/_internal/output.py +82 -0
- dataeval/_internal/workflows/sufficiency.py +96 -107
- dataeval/flags/__init__.py +2 -2
- dataeval/metrics/__init__.py +3 -3
- {dataeval-0.64.0.dist-info → dataeval-0.65.0.dist-info}/METADATA +1 -1
- dataeval-0.65.0.dist-info/RECORD +60 -0
- dataeval/_internal/metrics/base.py +0 -10
- dataeval-0.64.0.dist-info/RECORD +0 -60
- {dataeval-0.64.0.dist-info → dataeval-0.65.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.64.0.dist-info → dataeval-0.65.0.dist-info}/WHEEL +0 -0
@@ -1,20 +1,24 @@
|
|
1
1
|
import math
|
2
|
-
from
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from typing import Literal
|
3
4
|
|
4
5
|
import numpy as np
|
5
6
|
from numpy.typing import ArrayLike, NDArray
|
6
7
|
from scipy.spatial.distance import pdist, squareform
|
7
8
|
|
8
9
|
from dataeval._internal.interop import to_numpy
|
10
|
+
from dataeval._internal.metrics.utils import flatten
|
11
|
+
from dataeval._internal.output import OutputMetadata, set_metadata
|
9
12
|
|
10
13
|
|
11
|
-
|
14
|
+
@dataclass(frozen=True)
|
15
|
+
class CoverageOutput(OutputMetadata):
|
12
16
|
"""
|
13
17
|
Attributes
|
14
18
|
----------
|
15
|
-
indices :
|
19
|
+
indices : NDArray
|
16
20
|
Array of uncovered indices
|
17
|
-
radii :
|
21
|
+
radii : NDArray
|
18
22
|
Array of critical value radii
|
19
23
|
critical_value : float
|
20
24
|
Radius for coverage
|
@@ -25,6 +29,7 @@ class CoverageOutput(NamedTuple):
|
|
25
29
|
critical_value: float
|
26
30
|
|
27
31
|
|
32
|
+
@set_metadata("dataeval.metrics")
|
28
33
|
def coverage(
|
29
34
|
embeddings: ArrayLike,
|
30
35
|
radius_type: Literal["adaptive", "naive"] = "adaptive",
|
@@ -87,12 +92,14 @@ def coverage(
|
|
87
92
|
embeddings = to_numpy(embeddings)
|
88
93
|
n = len(embeddings)
|
89
94
|
if n <= k:
|
90
|
-
raise ValueError(
|
91
|
-
|
95
|
+
raise ValueError(
|
96
|
+
f"Number of observations n={n} is less than or equal to the specified number of neighbors k={k}."
|
97
|
+
)
|
98
|
+
mat = squareform(pdist(flatten(embeddings))).astype(np.float64)
|
92
99
|
sorted_dists = np.sort(mat, axis=1)
|
93
100
|
crit = sorted_dists[:, k + 1]
|
94
101
|
|
95
|
-
d =
|
102
|
+
d = embeddings.shape[1]
|
96
103
|
if radius_type == "naive":
|
97
104
|
rho = (1 / math.sqrt(math.pi)) * ((2 * k * math.gamma(d / 2 + 1)) / (n)) ** (1 / d)
|
98
105
|
pvals = np.where(crit > rho)[0]
|
@@ -101,5 +108,5 @@ def coverage(
|
|
101
108
|
rho = int(n * percent)
|
102
109
|
pvals = np.argsort(crit)[::-1][:rho]
|
103
110
|
else:
|
104
|
-
raise ValueError("
|
111
|
+
raise ValueError(f"{radius_type} is an invalid radius type. Expected 'adaptive' or 'naive'")
|
105
112
|
return CoverageOutput(pvals, crit, rho)
|
@@ -3,16 +3,19 @@ This module contains the implementation of HP Divergence
|
|
3
3
|
using the Fast Nearest Neighbor and Minimum Spanning Tree algorithms
|
4
4
|
"""
|
5
5
|
|
6
|
-
from
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from typing import Literal
|
7
8
|
|
8
9
|
import numpy as np
|
9
|
-
from numpy.typing import ArrayLike
|
10
|
+
from numpy.typing import ArrayLike, NDArray
|
10
11
|
|
11
12
|
from dataeval._internal.interop import to_numpy
|
12
13
|
from dataeval._internal.metrics.utils import compute_neighbors, get_method, minimum_spanning_tree
|
14
|
+
from dataeval._internal.output import OutputMetadata, set_metadata
|
13
15
|
|
14
16
|
|
15
|
-
|
17
|
+
@dataclass(frozen=True)
|
18
|
+
class DivergenceOutput(OutputMetadata):
|
16
19
|
"""
|
17
20
|
Attributes
|
18
21
|
----------
|
@@ -26,14 +29,44 @@ class DivergenceOutput(NamedTuple):
|
|
26
29
|
errors: int
|
27
30
|
|
28
31
|
|
29
|
-
def divergence_mst(data:
|
32
|
+
def divergence_mst(data: NDArray, labels: NDArray) -> int:
|
33
|
+
"""
|
34
|
+
Calculates the estimated label errors based on the minimum spanning tree
|
35
|
+
|
36
|
+
Parameters
|
37
|
+
----------
|
38
|
+
data : NDArray, shape - (N, ... )
|
39
|
+
Input images to be grouped
|
40
|
+
labels : NDArray
|
41
|
+
Corresponding labels for each data point
|
42
|
+
|
43
|
+
Returns
|
44
|
+
-------
|
45
|
+
int
|
46
|
+
Number of label errors when creating the minimum spanning tree
|
47
|
+
"""
|
30
48
|
mst = minimum_spanning_tree(data).toarray()
|
31
49
|
edgelist = np.transpose(np.nonzero(mst))
|
32
50
|
errors = np.sum(labels[edgelist[:, 0]] != labels[edgelist[:, 1]])
|
33
51
|
return errors
|
34
52
|
|
35
53
|
|
36
|
-
def divergence_fnn(data:
|
54
|
+
def divergence_fnn(data: NDArray, labels: NDArray) -> int:
|
55
|
+
"""
|
56
|
+
Calculates the estimated label errors based on their nearest neighbors
|
57
|
+
|
58
|
+
Parameters
|
59
|
+
----------
|
60
|
+
data : NDArray, shape - (N, ... )
|
61
|
+
Input images to be grouped
|
62
|
+
labels : NDArray
|
63
|
+
Corresponding labels for each data point
|
64
|
+
|
65
|
+
Returns
|
66
|
+
-------
|
67
|
+
int
|
68
|
+
Number of label errors when finding nearest neighbors
|
69
|
+
"""
|
37
70
|
nn_indices = compute_neighbors(data, data)
|
38
71
|
errors = np.sum(np.abs(labels[nn_indices] - labels))
|
39
72
|
return errors
|
@@ -42,6 +75,7 @@ def divergence_fnn(data: np.ndarray, labels: np.ndarray) -> int:
|
|
42
75
|
DIVERGENCE_FN_MAP = {"FNN": divergence_fnn, "MST": divergence_mst}
|
43
76
|
|
44
77
|
|
78
|
+
@set_metadata("dataeval.metrics")
|
45
79
|
def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST"] = "FNN") -> DivergenceOutput:
|
46
80
|
"""
|
47
81
|
Calculates the divergence and any errors between the datasets
|
@@ -50,10 +84,10 @@ def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST
|
|
50
84
|
----------
|
51
85
|
data_a : ArrayLike, shape - (N, P)
|
52
86
|
A dataset in an ArrayLike format to compare.
|
53
|
-
Function expects the data to have 2 dimensions, N number of observations in a P-
|
87
|
+
Function expects the data to have 2 dimensions, N number of observations in a P-dimensionial space.
|
54
88
|
data_b : ArrayLike, shape - (N, P)
|
55
89
|
A dataset in an ArrayLike format to compare.
|
56
|
-
Function expects the data to have 2 dimensions, N number of observations in a P-
|
90
|
+
Function expects the data to have 2 dimensions, N number of observations in a P-dimensionial space.
|
57
91
|
method : Literal["MST, "FNN"], default "FNN"
|
58
92
|
Method used to estimate dataset divergence
|
59
93
|
|
@@ -1,12 +1,15 @@
|
|
1
|
-
from
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import Dict, List, Literal, Optional, Sequence
|
2
3
|
|
3
4
|
import numpy as np
|
4
5
|
from numpy.typing import NDArray
|
5
6
|
|
6
7
|
from dataeval._internal.metrics.utils import entropy, get_counts, get_method, get_num_bins, preprocess_metadata
|
8
|
+
from dataeval._internal.output import OutputMetadata, set_metadata
|
7
9
|
|
8
10
|
|
9
|
-
|
11
|
+
@dataclass(frozen=True)
|
12
|
+
class DiversityOutput(OutputMetadata):
|
10
13
|
"""
|
11
14
|
Attributes
|
12
15
|
----------
|
@@ -18,11 +21,11 @@ class DiversityOutput(NamedTuple):
|
|
18
21
|
|
19
22
|
|
20
23
|
def diversity_shannon(
|
21
|
-
data:
|
24
|
+
data: NDArray,
|
22
25
|
names: List[str],
|
23
26
|
is_categorical: List[bool],
|
24
|
-
subset_mask: Optional[np.
|
25
|
-
) ->
|
27
|
+
subset_mask: Optional[NDArray[np.bool_]] = None,
|
28
|
+
) -> NDArray:
|
26
29
|
"""
|
27
30
|
Compute diversity for discrete/categorical variables and, through standard
|
28
31
|
histogram binning, for continuous variables.
|
@@ -34,7 +37,7 @@ def diversity_shannon(
|
|
34
37
|
|
35
38
|
Parameters
|
36
39
|
----------
|
37
|
-
subset_mask: Optional[np.
|
40
|
+
subset_mask: Optional[NDArray[np.bool_]]
|
38
41
|
Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
|
39
42
|
|
40
43
|
Notes
|
@@ -43,7 +46,7 @@ def diversity_shannon(
|
|
43
46
|
|
44
47
|
Returns
|
45
48
|
-------
|
46
|
-
diversity_index:
|
49
|
+
diversity_index: NDArray
|
47
50
|
Diversity index per column of X
|
48
51
|
|
49
52
|
See Also
|
@@ -59,11 +62,11 @@ def diversity_shannon(
|
|
59
62
|
|
60
63
|
|
61
64
|
def diversity_simpson(
|
62
|
-
data:
|
65
|
+
data: NDArray,
|
63
66
|
names: List[str],
|
64
67
|
is_categorical: List[bool],
|
65
|
-
subset_mask: Optional[np.
|
66
|
-
) ->
|
68
|
+
subset_mask: Optional[NDArray[np.bool_]] = None,
|
69
|
+
) -> NDArray:
|
67
70
|
"""
|
68
71
|
Compute diversity for discrete/categorical variables and, through standard
|
69
72
|
histogram binning, for continuous variables.
|
@@ -76,7 +79,7 @@ def diversity_simpson(
|
|
76
79
|
|
77
80
|
Parameters
|
78
81
|
----------
|
79
|
-
subset_mask: Optional[np.
|
82
|
+
subset_mask: Optional[NDArray[np.bool_]]
|
80
83
|
Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
|
81
84
|
|
82
85
|
Notes
|
@@ -90,7 +93,7 @@ def diversity_simpson(
|
|
90
93
|
|
91
94
|
Returns
|
92
95
|
-------
|
93
|
-
|
96
|
+
NDArray
|
94
97
|
Diversity index per column of X
|
95
98
|
|
96
99
|
See Also
|
@@ -116,6 +119,7 @@ def diversity_simpson(
|
|
116
119
|
DIVERSITY_FN_MAP = {"simpson": diversity_simpson, "shannon": diversity_shannon}
|
117
120
|
|
118
121
|
|
122
|
+
@set_metadata("dataeval.metrics")
|
119
123
|
def diversity(
|
120
124
|
class_labels: Sequence[int], metadata: List[Dict], method: Literal["shannon", "simpson"] = "simpson"
|
121
125
|
) -> DiversityOutput:
|
@@ -155,6 +159,7 @@ def diversity(
|
|
155
159
|
return DiversityOutput(diversity_index)
|
156
160
|
|
157
161
|
|
162
|
+
@set_metadata("dataeval.metrics")
|
158
163
|
def diversity_classwise(
|
159
164
|
class_labels: Sequence[int], metadata: List[Dict], method: Literal["shannon", "simpson"] = "simpson"
|
160
165
|
) -> DiversityOutput:
|
@@ -1,48 +1,39 @@
|
|
1
1
|
import warnings
|
2
|
-
from
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from typing import Dict, Generic, Mapping, Optional, Tuple, TypeVar
|
3
4
|
|
4
5
|
import numpy as np
|
5
6
|
from numpy.typing import ArrayLike, NDArray
|
6
7
|
from scipy.stats import chi2_contingency, chisquare
|
7
8
|
|
8
9
|
from dataeval._internal.interop import to_numpy
|
10
|
+
from dataeval._internal.output import OutputMetadata, set_metadata
|
9
11
|
|
10
|
-
|
11
|
-
class ParityOutput(NamedTuple):
|
12
|
-
"""
|
13
|
-
Attributes
|
14
|
-
----------
|
15
|
-
score : np.float64
|
16
|
-
chi-squared value of the test
|
17
|
-
p_value : np.float64
|
18
|
-
p-value of the test
|
19
|
-
"""
|
20
|
-
|
21
|
-
score: np.float64
|
22
|
-
p_value: np.float64
|
12
|
+
TData = TypeVar("TData", np.float64, NDArray[np.float64])
|
23
13
|
|
24
14
|
|
25
|
-
|
15
|
+
@dataclass(frozen=True)
|
16
|
+
class ParityOutput(Generic[TData], OutputMetadata):
|
26
17
|
"""
|
27
18
|
Attributes
|
28
19
|
----------
|
29
|
-
|
30
|
-
chi-squared
|
31
|
-
|
32
|
-
p-
|
20
|
+
score : np.float64 | NDArray[np.float64]
|
21
|
+
chi-squared score(s) of the test
|
22
|
+
p_value : np.float64 | NDArray[np.float64]
|
23
|
+
p-value(s) of the test
|
33
24
|
"""
|
34
25
|
|
35
|
-
score:
|
36
|
-
p_value:
|
26
|
+
score: TData
|
27
|
+
p_value: TData
|
37
28
|
|
38
29
|
|
39
|
-
def digitize_factor_bins(continuous_values:
|
30
|
+
def digitize_factor_bins(continuous_values: NDArray, bins: int, factor_name: str) -> NDArray:
|
40
31
|
"""
|
41
32
|
Digitizes a list of values into a given number of bins.
|
42
33
|
|
43
34
|
Parameters
|
44
35
|
----------
|
45
|
-
continuous_values:
|
36
|
+
continuous_values: NDArray
|
46
37
|
The values to be digitized.
|
47
38
|
bins: int
|
48
39
|
The number of bins for the discrete values that continuous_values will be digitized into.
|
@@ -51,7 +42,7 @@ def digitize_factor_bins(continuous_values: np.ndarray, bins: int, factor_name:
|
|
51
42
|
|
52
43
|
Returns
|
53
44
|
-------
|
54
|
-
|
45
|
+
NDArray
|
55
46
|
The digitized values
|
56
47
|
|
57
48
|
"""
|
@@ -69,14 +60,14 @@ def digitize_factor_bins(continuous_values: np.ndarray, bins: int, factor_name:
|
|
69
60
|
|
70
61
|
|
71
62
|
def format_discretize_factors(
|
72
|
-
data_factors:
|
73
|
-
) -> Tuple[
|
63
|
+
data_factors: Dict[str, NDArray], continuous_factor_bincounts: Dict[str, int]
|
64
|
+
) -> Tuple[Dict[str, NDArray], NDArray]:
|
74
65
|
"""
|
75
66
|
Sets up the internal list of metadata factors.
|
76
67
|
|
77
68
|
Parameters
|
78
69
|
----------
|
79
|
-
data_factors: Dict[str,
|
70
|
+
data_factors: Dict[str, NDArray]
|
80
71
|
The dataset factors, which are per-image attributes including class label and metadata.
|
81
72
|
Each key of dataset_factors is a factor, whose value is the per-image factor values.
|
82
73
|
continuous_factor_bincounts : Dict[str, int]
|
@@ -87,11 +78,10 @@ def format_discretize_factors(
|
|
87
78
|
|
88
79
|
Returns
|
89
80
|
-------
|
90
|
-
Dict[str,
|
91
|
-
Intrinsic per-image metadata information with the formatting that input data_factors uses.
|
92
|
-
|
93
|
-
|
94
|
-
Per-image labels, whose ith element is the label for the ith element of the dataset.
|
81
|
+
Tuple[Dict[str, NDArray], NDArray]
|
82
|
+
- Intrinsic per-image metadata information with the formatting that input data_factors uses.
|
83
|
+
Each key is a metadata factor, whose value is the discrete per-image factor values.
|
84
|
+
- Per-image labels, whose ith element is the label for the ith element of the dataset.
|
95
85
|
"""
|
96
86
|
invalid_keys = set(continuous_factor_bincounts.keys()) - set(data_factors.keys())
|
97
87
|
if invalid_keys:
|
@@ -123,7 +113,7 @@ def format_discretize_factors(
|
|
123
113
|
return metadata_factors, labels
|
124
114
|
|
125
115
|
|
126
|
-
def normalize_expected_dist(expected_dist:
|
116
|
+
def normalize_expected_dist(expected_dist: NDArray, observed_dist: NDArray) -> NDArray:
|
127
117
|
exp_sum = np.sum(expected_dist)
|
128
118
|
obs_sum = np.sum(observed_dist)
|
129
119
|
|
@@ -141,14 +131,14 @@ def normalize_expected_dist(expected_dist: np.ndarray, observed_dist: np.ndarray
|
|
141
131
|
return expected_dist
|
142
132
|
|
143
133
|
|
144
|
-
def validate_dist(label_dist:
|
134
|
+
def validate_dist(label_dist: NDArray, label_name: str):
|
145
135
|
"""
|
146
136
|
Verifies that the given label distribution has labels and checks if
|
147
137
|
any labels have frequencies less than 5.
|
148
138
|
|
149
139
|
Parameters
|
150
140
|
----------
|
151
|
-
label_dist :
|
141
|
+
label_dist : NDArray
|
152
142
|
Array representing label distributions
|
153
143
|
|
154
144
|
Raises
|
@@ -166,18 +156,14 @@ def validate_dist(label_dist: np.ndarray, label_name: str):
|
|
166
156
|
" dataset have frequencies less than 5. This may lead"
|
167
157
|
" to invalid chi-squared evaluation."
|
168
158
|
)
|
169
|
-
warnings.warn(
|
170
|
-
f"Labels {np.where(label_dist<5)[0]} in {label_name}"
|
171
|
-
" dataset have frequencies less than 5. This may lead"
|
172
|
-
" to invalid chi-squared evaluation."
|
173
|
-
)
|
174
159
|
|
175
160
|
|
161
|
+
@set_metadata("dataeval.metrics")
|
176
162
|
def parity(
|
177
163
|
expected_labels: ArrayLike,
|
178
164
|
observed_labels: ArrayLike,
|
179
165
|
num_classes: Optional[int] = None,
|
180
|
-
) -> ParityOutput:
|
166
|
+
) -> ParityOutput[np.float64]:
|
181
167
|
"""
|
182
168
|
Perform a one-way chi-squared test between observation frequencies and expected frequencies that
|
183
169
|
tests the null hypothesis that the observed data has the expected frequencies.
|
@@ -236,10 +222,11 @@ def parity(
|
|
236
222
|
return ParityOutput(cs, p)
|
237
223
|
|
238
224
|
|
225
|
+
@set_metadata("dataeval.metrics")
|
239
226
|
def parity_metadata(
|
240
227
|
data_factors: Mapping[str, ArrayLike],
|
241
228
|
continuous_factor_bincounts: Optional[Dict[str, int]] = None,
|
242
|
-
) ->
|
229
|
+
) -> ParityOutput[NDArray[np.float64]]:
|
243
230
|
"""
|
244
231
|
Evaluates the statistical independence of metadata factors from class labels.
|
245
232
|
This performs a chi-square test, which provides a score and a p-value for
|
@@ -306,4 +293,4 @@ def parity_metadata(
|
|
306
293
|
chi_scores[i] = chi2
|
307
294
|
p_values[i] = p
|
308
295
|
|
309
|
-
return
|
296
|
+
return ParityOutput(chi_scores, p_values)
|