dataeval 0.66.0__py3-none-any.whl → 0.68.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +1 -1
- dataeval/_internal/detectors/duplicates.py +50 -21
- dataeval/_internal/detectors/merged_stats.py +78 -0
- dataeval/_internal/detectors/outliers.py +45 -17
- dataeval/_internal/metrics/balance.py +42 -84
- dataeval/_internal/metrics/coverage.py +11 -15
- dataeval/_internal/metrics/diversity.py +45 -73
- dataeval/_internal/metrics/stats.py +10 -0
- dataeval/_internal/output.py +1 -1
- dataeval/metrics/bias/__init__.py +2 -4
- {dataeval-0.66.0.dist-info → dataeval-0.68.0.dist-info}/METADATA +1 -1
- {dataeval-0.66.0.dist-info → dataeval-0.68.0.dist-info}/RECORD +14 -13
- {dataeval-0.66.0.dist-info → dataeval-0.68.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.66.0.dist-info → dataeval-0.68.0.dist-info}/WHEEL +0 -0
dataeval/__init__.py
CHANGED
@@ -1,28 +1,37 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from dataclasses import dataclass
|
4
|
-
from typing import Iterable
|
4
|
+
from typing import Generic, Iterable, Sequence, TypeVar, cast
|
5
5
|
|
6
6
|
from numpy.typing import ArrayLike
|
7
7
|
|
8
|
+
from dataeval._internal.detectors.merged_stats import combine_stats, get_dataset_step_from_idx
|
8
9
|
from dataeval._internal.flags import ImageStat
|
9
10
|
from dataeval._internal.metrics.stats import StatsOutput, imagestats
|
10
11
|
from dataeval._internal.output import OutputMetadata, set_metadata
|
11
12
|
|
13
|
+
DuplicateGroup = list[int]
|
14
|
+
DatasetDuplicateGroupMap = dict[int, DuplicateGroup]
|
15
|
+
TIndexCollection = TypeVar("TIndexCollection", DuplicateGroup, DatasetDuplicateGroupMap)
|
16
|
+
|
12
17
|
|
13
18
|
@dataclass(frozen=True)
|
14
|
-
class DuplicatesOutput(OutputMetadata):
|
19
|
+
class DuplicatesOutput(Generic[TIndexCollection], OutputMetadata):
|
15
20
|
"""
|
16
21
|
Attributes
|
17
22
|
----------
|
18
|
-
exact :
|
23
|
+
exact : list[list[int] | dict[int, list[int]]]
|
19
24
|
Indices of images that are exact matches
|
20
|
-
near:
|
25
|
+
near: list[list[int] | dict[int, list[int]]]
|
21
26
|
Indices of images that are near matches
|
27
|
+
|
28
|
+
- For a single dataset, indices are returned as a list of index groups.
|
29
|
+
- For multiple datasets, indices are returned as dictionaries where the key is the
|
30
|
+
index of the dataset, and the value is the list index groups from that dataset.
|
22
31
|
"""
|
23
32
|
|
24
|
-
exact: list[
|
25
|
-
near: list[
|
33
|
+
exact: list[TIndexCollection]
|
34
|
+
near: list[TIndexCollection]
|
26
35
|
|
27
36
|
|
28
37
|
class Duplicates:
|
@@ -54,18 +63,18 @@ class Duplicates:
|
|
54
63
|
def _get_duplicates(self) -> dict[str, list[list[int]]]:
|
55
64
|
stats_dict = self.stats.dict()
|
56
65
|
if "xxhash" in stats_dict:
|
57
|
-
|
66
|
+
exact_dict: dict[int, list] = {}
|
58
67
|
for i, value in enumerate(stats_dict["xxhash"]):
|
59
|
-
|
60
|
-
exact = [v for v in
|
68
|
+
exact_dict.setdefault(value, []).append(i)
|
69
|
+
exact = [sorted(v) for v in exact_dict.values() if len(v) > 1]
|
61
70
|
else:
|
62
71
|
exact = []
|
63
72
|
|
64
73
|
if "pchash" in stats_dict and not self.only_exact:
|
65
|
-
|
74
|
+
near_dict: dict[int, list] = {}
|
66
75
|
for i, value in enumerate(stats_dict["pchash"]):
|
67
|
-
|
68
|
-
near = [v for v in
|
76
|
+
near_dict.setdefault(value, []).append(i)
|
77
|
+
near = [sorted(v) for v in near_dict.values() if len(v) > 1 and not any(set(v).issubset(x) for x in exact)]
|
69
78
|
else:
|
70
79
|
near = []
|
71
80
|
|
@@ -75,14 +84,14 @@ class Duplicates:
|
|
75
84
|
}
|
76
85
|
|
77
86
|
@set_metadata("dataeval.detectors", ["only_exact"])
|
78
|
-
def evaluate(self, data: Iterable[ArrayLike] | StatsOutput) -> DuplicatesOutput:
|
87
|
+
def evaluate(self, data: Iterable[ArrayLike] | StatsOutput | Sequence[StatsOutput]) -> DuplicatesOutput:
|
79
88
|
"""
|
80
89
|
Returns duplicate image indices for both exact matches and near matches
|
81
90
|
|
82
91
|
Parameters
|
83
92
|
----------
|
84
|
-
data : Iterable[ArrayLike], shape - (N, C, H, W) | StatsOutput
|
85
|
-
A dataset of images in an ArrayLike format or the output from an imagestats metric analysis
|
93
|
+
data : Iterable[ArrayLike], shape - (N, C, H, W) | StatsOutput | Sequence[StatsOutput]
|
94
|
+
A dataset of images in an ArrayLike format or the output(s) from an imagestats metric analysis
|
86
95
|
|
87
96
|
Returns
|
88
97
|
-------
|
@@ -98,12 +107,32 @@ class Duplicates:
|
|
98
107
|
>>> dups.evaluate(images)
|
99
108
|
DuplicatesOutput(exact=[[3, 20], [16, 37]], near=[[3, 20, 22], [12, 18], [13, 36], [14, 31], [17, 27], [19, 38, 47]])
|
100
109
|
""" # noqa: E501
|
101
|
-
|
102
|
-
|
110
|
+
|
111
|
+
stats, dataset_steps = combine_stats(data)
|
112
|
+
|
113
|
+
if isinstance(stats, StatsOutput):
|
114
|
+
if not stats.xxhash:
|
103
115
|
raise ValueError("StatsOutput must include xxhash information of the images.")
|
104
|
-
if not self.only_exact and not
|
116
|
+
if not self.only_exact and not stats.pchash:
|
105
117
|
raise ValueError("StatsOutput must include pchash information of the images for near matches.")
|
106
|
-
self.stats =
|
118
|
+
self.stats = stats
|
107
119
|
else:
|
108
|
-
|
109
|
-
|
120
|
+
flags = ImageStat.XXHASH | (ImageStat(0) if self.only_exact else ImageStat.PCHASH)
|
121
|
+
self.stats = imagestats(cast(Iterable[ArrayLike], data), flags)
|
122
|
+
|
123
|
+
duplicates = self._get_duplicates()
|
124
|
+
|
125
|
+
# split up results from combined dataset into individual dataset buckets
|
126
|
+
if dataset_steps:
|
127
|
+
dup_list: list[list[int]]
|
128
|
+
for dup_type, dup_list in duplicates.items():
|
129
|
+
dup_list_dict = []
|
130
|
+
for idxs in dup_list:
|
131
|
+
dup_dict = {}
|
132
|
+
for idx in idxs:
|
133
|
+
k, v = get_dataset_step_from_idx(idx, dataset_steps)
|
134
|
+
dup_dict.setdefault(k, []).append(v)
|
135
|
+
dup_list_dict.append(dup_dict)
|
136
|
+
duplicates[dup_type] = dup_list_dict
|
137
|
+
|
138
|
+
return DuplicatesOutput(**duplicates)
|
@@ -0,0 +1,78 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Sequence, cast
|
4
|
+
from warnings import warn
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
|
8
|
+
from dataeval._internal.metrics.stats import StatsOutput
|
9
|
+
from dataeval._internal.output import populate_defaults
|
10
|
+
|
11
|
+
|
12
|
+
def add_stats(a: StatsOutput, b: StatsOutput) -> StatsOutput:
|
13
|
+
if not isinstance(a, StatsOutput) or not isinstance(b, StatsOutput):
|
14
|
+
raise TypeError(f"Cannot add object of type {type(a)} and type {type(b)}.")
|
15
|
+
|
16
|
+
a_dict = a.dict()
|
17
|
+
b_dict = b.dict()
|
18
|
+
a_keys = set(a_dict)
|
19
|
+
b_keys = set(b_dict)
|
20
|
+
|
21
|
+
missing_keys = a_keys - b_keys
|
22
|
+
if missing_keys:
|
23
|
+
raise ValueError(f"Required keys are missing: {missing_keys}.")
|
24
|
+
|
25
|
+
extra_keys = b_keys - a_keys
|
26
|
+
if extra_keys:
|
27
|
+
warn(f"Extraneous keys will be dropped: {extra_keys}.")
|
28
|
+
|
29
|
+
# perform add of multi-channel stats
|
30
|
+
if "ch_idx_map" in a_dict:
|
31
|
+
for k, v in a_dict.items():
|
32
|
+
if k == "ch_idx_map":
|
33
|
+
offset = sum([len(idxs) for idxs in v.values()])
|
34
|
+
for ch_k, ch_v in b_dict[k].items():
|
35
|
+
if ch_k not in v:
|
36
|
+
v[ch_k] = []
|
37
|
+
a_dict[k][ch_k].extend([idx + offset for idx in ch_v])
|
38
|
+
else:
|
39
|
+
for ch_k in b_dict[k]:
|
40
|
+
if ch_k not in v:
|
41
|
+
v[ch_k] = b_dict[k][ch_k]
|
42
|
+
else:
|
43
|
+
v[ch_k] = np.concatenate((v[ch_k], b_dict[k][ch_k]), axis=1)
|
44
|
+
else:
|
45
|
+
for k in a_dict:
|
46
|
+
if isinstance(a_dict[k], list):
|
47
|
+
a_dict[k].extend(b_dict[k])
|
48
|
+
else:
|
49
|
+
a_dict[k] = np.concatenate((a_dict[k], b_dict[k]))
|
50
|
+
|
51
|
+
return StatsOutput(**populate_defaults(a_dict, StatsOutput))
|
52
|
+
|
53
|
+
|
54
|
+
def combine_stats(stats) -> tuple[StatsOutput | None, list[int]]:
|
55
|
+
dataset_steps = []
|
56
|
+
|
57
|
+
if isinstance(stats, StatsOutput):
|
58
|
+
return stats, dataset_steps
|
59
|
+
|
60
|
+
output = None
|
61
|
+
if isinstance(stats, Sequence) and isinstance(stats[0], StatsOutput):
|
62
|
+
stats = cast(Sequence[StatsOutput], stats)
|
63
|
+
cur_len = 0
|
64
|
+
for s in stats:
|
65
|
+
output = s if output is None else add_stats(output, s)
|
66
|
+
cur_len += len(s)
|
67
|
+
dataset_steps.append(cur_len)
|
68
|
+
|
69
|
+
return output, dataset_steps
|
70
|
+
|
71
|
+
|
72
|
+
def get_dataset_step_from_idx(idx: int, dataset_steps: list[int]) -> tuple[int, int]:
|
73
|
+
last_step = 0
|
74
|
+
for i, step in enumerate(dataset_steps):
|
75
|
+
if idx < step:
|
76
|
+
return i, idx - last_step
|
77
|
+
last_step = step
|
78
|
+
return -1, idx
|
@@ -1,27 +1,39 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from dataclasses import dataclass
|
4
|
-
from typing import Iterable, Literal
|
4
|
+
from typing import Iterable, Literal, Sequence, cast
|
5
|
+
from warnings import warn
|
5
6
|
|
6
7
|
import numpy as np
|
7
8
|
from numpy.typing import ArrayLike, NDArray
|
8
9
|
|
10
|
+
from dataeval._internal.detectors.merged_stats import combine_stats, get_dataset_step_from_idx
|
9
11
|
from dataeval._internal.flags import ImageStat, to_distinct, verify_supported
|
10
12
|
from dataeval._internal.metrics.stats import StatsOutput, imagestats
|
11
13
|
from dataeval._internal.output import OutputMetadata, set_metadata
|
12
14
|
|
15
|
+
IndexIssueMap = dict[int, dict[str, float]]
|
16
|
+
DatasetIndexIssueMap = dict[int, IndexIssueMap]
|
17
|
+
"""
|
18
|
+
Mapping of image indices to a dictionary of issue types and calculated values
|
19
|
+
"""
|
20
|
+
|
13
21
|
|
14
22
|
@dataclass(frozen=True)
|
15
23
|
class OutliersOutput(OutputMetadata):
|
16
24
|
"""
|
17
25
|
Attributes
|
18
26
|
----------
|
19
|
-
issues :
|
20
|
-
|
21
|
-
|
27
|
+
issues : dict[int, dict[str, float]] | dict[int, dict[int, dict[str, float]]]
|
28
|
+
Indices of image outliers with their associated issue type and calculated values.
|
29
|
+
|
30
|
+
- For a single dataset, a dictionary containing the indices of outliers and
|
31
|
+
a dictionary showing the issues and calculated values for the given index.
|
32
|
+
- For multiple datasets, a map of dataset indices to the indices of outliers
|
33
|
+
and their associated issues and calculated values.
|
22
34
|
"""
|
23
35
|
|
24
|
-
issues:
|
36
|
+
issues: IndexIssueMap | DatasetIndexIssueMap
|
25
37
|
|
26
38
|
|
27
39
|
def _get_outlier_mask(
|
@@ -64,7 +76,7 @@ class Outliers:
|
|
64
76
|
|
65
77
|
Attributes
|
66
78
|
----------
|
67
|
-
stats :
|
79
|
+
stats : dict[str, Any]
|
68
80
|
Dictionary to hold the value of each metric for each image
|
69
81
|
|
70
82
|
See Also
|
@@ -135,14 +147,14 @@ class Outliers:
|
|
135
147
|
return dict(sorted(flagged_images.items()))
|
136
148
|
|
137
149
|
@set_metadata("dataeval.detectors", ["flags", "outlier_method", "outlier_threshold"])
|
138
|
-
def evaluate(self, data: Iterable[ArrayLike] | StatsOutput) -> OutliersOutput:
|
150
|
+
def evaluate(self, data: Iterable[ArrayLike] | StatsOutput | Sequence[StatsOutput]) -> OutliersOutput:
|
139
151
|
"""
|
140
152
|
Returns indices of outliers with the issues identified for each
|
141
153
|
|
142
154
|
Parameters
|
143
155
|
----------
|
144
|
-
data : Iterable[ArrayLike], shape - (C, H, W) | StatsOutput
|
145
|
-
A dataset of images in an ArrayLike format or the output from an imagestats metric analysis
|
156
|
+
data : Iterable[ArrayLike], shape - (C, H, W) | StatsOutput | Sequence[StatsOutput]
|
157
|
+
A dataset of images in an ArrayLike format or the output(s) from an imagestats metric analysis
|
146
158
|
|
147
159
|
Returns
|
148
160
|
-------
|
@@ -157,13 +169,29 @@ class Outliers:
|
|
157
169
|
>>> outliers.evaluate(images)
|
158
170
|
OutliersOutput(issues={18: {'brightness': 0.78}, 25: {'brightness': 0.98}})
|
159
171
|
"""
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
172
|
+
stats, dataset_steps = combine_stats(data)
|
173
|
+
|
174
|
+
if isinstance(stats, StatsOutput):
|
175
|
+
selected_flags = set(to_distinct(self.flags).values())
|
176
|
+
provided = set(stats.dict())
|
177
|
+
missing = selected_flags - provided
|
164
178
|
if missing:
|
165
|
-
|
166
|
-
|
179
|
+
warn(
|
180
|
+
f"StatsOutput provided {provided} and is missing {missing} \
|
181
|
+
from the selected stat flags: {selected_flags}."
|
182
|
+
)
|
183
|
+
self.stats = stats
|
167
184
|
else:
|
168
|
-
self.stats = imagestats(data, self.flags)
|
169
|
-
|
185
|
+
self.stats = imagestats(cast(Iterable[ArrayLike], data), self.flags)
|
186
|
+
|
187
|
+
outliers = self._get_outliers()
|
188
|
+
|
189
|
+
# split up results from combined dataset into individual dataset buckets
|
190
|
+
if dataset_steps:
|
191
|
+
out_dict = {}
|
192
|
+
for idx, issue in outliers.items():
|
193
|
+
k, v = get_dataset_step_from_idx(idx, dataset_steps)
|
194
|
+
out_dict.setdefault(k, {})[v] = issue
|
195
|
+
outliers = out_dict
|
196
|
+
|
197
|
+
return OutliersOutput(outliers)
|
@@ -17,11 +17,17 @@ class BalanceOutput(OutputMetadata):
|
|
17
17
|
"""
|
18
18
|
Attributes
|
19
19
|
----------
|
20
|
-
|
20
|
+
balance : NDArray[np.float64]
|
21
21
|
Estimate of mutual information between metadata factors and class label
|
22
|
+
factors : NDArray[np.float64]
|
23
|
+
Estimate of inter/intra-factor mutual information
|
24
|
+
classwise : NDArray[np.float64]
|
25
|
+
Estimate of mutual information between metadata factors and individual class labels
|
22
26
|
"""
|
23
27
|
|
24
|
-
|
28
|
+
balance: NDArray[np.float64]
|
29
|
+
factors: NDArray[np.float64]
|
30
|
+
classwise: NDArray[np.float64]
|
25
31
|
|
26
32
|
|
27
33
|
def validate_num_neighbors(num_neighbors: int) -> int:
|
@@ -77,17 +83,22 @@ def balance(class_labels: Sequence[int], metadata: list[dict], num_neighbors: in
|
|
77
83
|
-------
|
78
84
|
Return balance (mutual information) of factors with class_labels
|
79
85
|
|
80
|
-
>>> balance(class_labels, metadata)
|
81
|
-
|
86
|
+
>>> bal = balance(class_labels, metadata)
|
87
|
+
>>> bal.balance
|
88
|
+
array([0.99999822, 0.13363788, 0.04505382, 0.02994455])
|
82
89
|
|
83
|
-
Return balance (mutual information)
|
84
|
-
and each other
|
90
|
+
Return intra/interfactor balance (mutual information)
|
85
91
|
|
86
|
-
>>>
|
87
|
-
array([[0.
|
88
|
-
[0.
|
89
|
-
[0.
|
90
|
-
|
92
|
+
>>> bal.factors
|
93
|
+
array([[0.99999843, 0.03510422, 0.09725766],
|
94
|
+
[0.03510422, 0.08433558, 0.15621459],
|
95
|
+
[0.09725766, 0.15621459, 0.99999856]])
|
96
|
+
|
97
|
+
Return classwise balance (mutual information) of factors with individual class_labels
|
98
|
+
|
99
|
+
>>> bal.classwise
|
100
|
+
array([[0.99999822, 0.13363788, 0. , 0. ],
|
101
|
+
[0.99999822, 0.13363788, 0. , 0. ]])
|
91
102
|
|
92
103
|
See Also
|
93
104
|
--------
|
@@ -102,13 +113,9 @@ def balance(class_labels: Sequence[int], metadata: list[dict], num_neighbors: in
|
|
102
113
|
mi[:] = np.nan
|
103
114
|
|
104
115
|
for idx in range(num_factors):
|
105
|
-
tgt = data[:, idx]
|
116
|
+
tgt = data[:, idx].astype(int)
|
106
117
|
|
107
118
|
if is_categorical[idx]:
|
108
|
-
if tgt.dtype == float:
|
109
|
-
# map to unique integers if categorical
|
110
|
-
_, tgt = np.unique(tgt, return_inverse=True)
|
111
|
-
# categorical target
|
112
119
|
mi[idx, :] = mutual_info_classif(
|
113
120
|
data,
|
114
121
|
tgt,
|
@@ -129,89 +136,40 @@ def balance(class_labels: Sequence[int], metadata: list[dict], num_neighbors: in
|
|
129
136
|
norm_factor = 0.5 * np.add.outer(ent_all, ent_all) + 1e-6
|
130
137
|
# in principle MI should be symmetric, but it is not in practice.
|
131
138
|
nmi = 0.5 * (mi + mi.T) / norm_factor
|
139
|
+
balance = nmi[0]
|
140
|
+
factors = nmi[1:, 1:]
|
132
141
|
|
133
|
-
return BalanceOutput(nmi)
|
134
|
-
|
135
|
-
|
136
|
-
@set_metadata("dataeval.metrics")
|
137
|
-
def balance_classwise(class_labels: Sequence[int], metadata: list[dict], num_neighbors: int = 5) -> BalanceOutput:
|
138
|
-
"""
|
139
|
-
Compute mutual information (analogous to correlation) between metadata factors
|
140
|
-
(class label, metadata, label/image properties) with individual class labels.
|
141
|
-
|
142
|
-
Parameters
|
143
|
-
----------
|
144
|
-
class_labels: Sequence[int]
|
145
|
-
List of class labels for each image
|
146
|
-
metadata: List[Dict]
|
147
|
-
List of metadata factors for each image
|
148
|
-
num_neighbors: int, default 5
|
149
|
-
Number of nearest neighbors to use for computing MI between discrete
|
150
|
-
and continuous variables.
|
151
|
-
|
152
|
-
Notes
|
153
|
-
-----
|
154
|
-
We use `mutual_info_classif` from sklearn since class label is categorical.
|
155
|
-
`mutual_info_classif` outputs are consistent up to O(1e-4) and depend on a random
|
156
|
-
seed. MI is computed differently for categorical and continuous variables, so we
|
157
|
-
have to specify with is_categorical.
|
158
|
-
|
159
|
-
Returns
|
160
|
-
-------
|
161
|
-
BalanceOutput
|
162
|
-
(num_classes x num_factors) estimate of mutual information between
|
163
|
-
num_factors metadata factors and individual class labels.
|
164
|
-
|
165
|
-
Example
|
166
|
-
-------
|
167
|
-
Return classwise balance (mutual information) of factors with individual class_labels
|
168
|
-
|
169
|
-
>>> balance_classwise(class_labels, metadata).mutual_information
|
170
|
-
array([[0.13363788, 0.54085156, 0. ],
|
171
|
-
[0.13363788, 0.54085156, 0. ]])
|
172
|
-
|
173
|
-
|
174
|
-
See Also
|
175
|
-
--------
|
176
|
-
sklearn.feature_selection.mutual_info_classif
|
177
|
-
sklearn.feature_selection.mutual_info_regression
|
178
|
-
sklearn.metrics.mutual_info_score
|
179
|
-
compute_mutual_information
|
180
|
-
"""
|
181
|
-
num_neighbors = validate_num_neighbors(num_neighbors)
|
182
|
-
data, names, is_categorical = preprocess_metadata(class_labels, metadata)
|
183
|
-
num_factors = len(names)
|
184
142
|
# unique class labels
|
185
143
|
class_idx = names.index("class_label")
|
186
|
-
class_data = data[:, class_idx]
|
144
|
+
class_data = data[:, class_idx].astype(int)
|
187
145
|
u_cls = np.unique(class_data)
|
188
146
|
num_classes = len(u_cls)
|
189
147
|
|
190
|
-
data_no_class = np.concatenate((data[:, :class_idx], data[:, (class_idx + 1) :]), axis=1)
|
191
|
-
|
192
148
|
# assume class is a factor
|
193
|
-
|
194
|
-
|
149
|
+
classwise_mi = np.empty((num_classes, num_factors))
|
150
|
+
classwise_mi[:] = np.nan
|
195
151
|
|
196
152
|
# categorical variables, excluding class label
|
197
153
|
cat_mask = np.concatenate((is_categorical[:class_idx], is_categorical[(class_idx + 1) :]), axis=0).astype(int)
|
198
154
|
|
155
|
+
tgt_bin = np.stack([class_data == cls for cls in u_cls]).T.astype(int)
|
156
|
+
ent_tgt_bin = entropy(
|
157
|
+
tgt_bin, names=[str(idx) for idx in range(num_classes)], is_categorical=[True for idx in range(num_classes)]
|
158
|
+
)
|
159
|
+
|
199
160
|
# classification MI for discrete/categorical features
|
200
|
-
for idx
|
201
|
-
tgt = class_data == cls
|
161
|
+
for idx in range(num_classes):
|
162
|
+
# tgt = class_data == cls
|
202
163
|
# units: nat
|
203
|
-
|
204
|
-
|
205
|
-
|
164
|
+
classwise_mi[idx, :] = mutual_info_classif(
|
165
|
+
data,
|
166
|
+
tgt_bin[:, idx],
|
206
167
|
discrete_features=cat_mask, # type: ignore
|
207
168
|
n_neighbors=num_neighbors,
|
208
169
|
random_state=0,
|
209
170
|
)
|
210
171
|
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
norm_factor = 0.5 * np.add.outer(ent_tgt, ent_all) + 1e-6
|
216
|
-
nmi = mi / norm_factor
|
217
|
-
return BalanceOutput(nmi)
|
172
|
+
norm_factor = 0.5 * np.add.outer(ent_tgt_bin, ent_all) + 1e-6
|
173
|
+
classwise = classwise_mi / norm_factor
|
174
|
+
|
175
|
+
return BalanceOutput(balance, factors, classwise)
|
@@ -66,27 +66,22 @@ def coverage(
|
|
66
66
|
|
67
67
|
Note
|
68
68
|
----
|
69
|
-
Embeddings should be on the unit interval.
|
69
|
+
Embeddings should be on the unit interval [0-1].
|
70
70
|
|
71
71
|
Example
|
72
72
|
-------
|
73
|
-
>>> coverage(embeddings)
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
0.83713908, 0.91784263, 1.12901193, 0.73907618, 0.63943983,
|
79
|
-
0.61188447, 0.47872713, 0.57207771, 0.92885883, 0.54750511,
|
80
|
-
0.83015726, 1.20721778, 0.50421928, 0.98312246, 0.59764166,
|
81
|
-
0.61009202, 0.73864073, 1.0381061 , 0.77598609, 0.72984036,
|
82
|
-
0.67573006, 0.48056064, 1.00050879, 0.89532971, 0.58395529,
|
83
|
-
0.95954793, 0.60134383, 1.10096454, 0.51955314, 0.73038702]), critical_value=0)
|
73
|
+
>>> results = coverage(embeddings)
|
74
|
+
>>> results.indices
|
75
|
+
array([447, 412, 8, 32, 63])
|
76
|
+
>>> results.critical_value
|
77
|
+
0.8459038956941765
|
84
78
|
|
85
79
|
Reference
|
86
80
|
---------
|
87
81
|
This implementation is based on https://dl.acm.org/doi/abs/10.1145/3448016.3457315.
|
82
|
+
|
88
83
|
[1] Seymour Sudman. 1976. Applied sampling. Academic Press New York (1976).
|
89
|
-
"""
|
84
|
+
"""
|
90
85
|
|
91
86
|
# Calculate distance matrix, look at the (k+1)th farthest neighbor for each image.
|
92
87
|
embeddings = to_numpy(embeddings)
|
@@ -105,8 +100,9 @@ def coverage(
|
|
105
100
|
pvals = np.where(crit > rho)[0]
|
106
101
|
elif radius_type == "adaptive":
|
107
102
|
# Use data adaptive cutoff as rho
|
108
|
-
|
109
|
-
pvals = np.argsort(crit)[::-1][:
|
103
|
+
selection = int(max(n * percent, 1))
|
104
|
+
pvals = np.argsort(crit)[::-1][:selection]
|
105
|
+
rho = float(np.mean(np.sort(crit)[::-1][selection - 1 : selection + 1]))
|
110
106
|
else:
|
111
107
|
raise ValueError(f"{radius_type} is an invalid radius type. Expected 'adaptive' or 'naive'")
|
112
108
|
return CoverageOutput(pvals, crit, rho)
|
@@ -17,9 +17,12 @@ class DiversityOutput(OutputMetadata):
|
|
17
17
|
----------
|
18
18
|
diversity_index : NDArray[np.float64]
|
19
19
|
Diversity index for classes and factors
|
20
|
+
classwise : NDArray[np.float64]
|
21
|
+
Classwise diversity index [n_class x n_factor]
|
20
22
|
"""
|
21
23
|
|
22
24
|
diversity_index: NDArray[np.float64]
|
25
|
+
classwise: NDArray[np.float64]
|
23
26
|
|
24
27
|
|
25
28
|
def diversity_shannon(
|
@@ -39,6 +42,13 @@ def diversity_shannon(
|
|
39
42
|
|
40
43
|
Parameters
|
41
44
|
----------
|
45
|
+
data: NDArray
|
46
|
+
Array containing numerical values for metadata factors
|
47
|
+
names: list[str]
|
48
|
+
Names of metadata factors -- keys of the metadata dictionary
|
49
|
+
is_categorical: list[bool]
|
50
|
+
List of flags to identify whether variables are categorical (True) or
|
51
|
+
continuous (False)
|
42
52
|
subset_mask: NDArray[np.bool_] | None
|
43
53
|
Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
|
44
54
|
|
@@ -76,14 +86,20 @@ def diversity_simpson(
|
|
76
86
|
Compute diversity for discrete/categorical variables and, through standard
|
77
87
|
histogram binning, for continuous variables.
|
78
88
|
|
79
|
-
We define diversity as
|
80
|
-
index.
|
89
|
+
We define diversity as the inverse Simpson diversity index linearly rescaled to the unit interval.
|
81
90
|
|
82
91
|
diversity = 1 implies that samples are evenly distributed across a particular factor
|
83
|
-
diversity =
|
92
|
+
diversity = 0 implies that all samples belong to one category/bin
|
84
93
|
|
85
94
|
Parameters
|
86
95
|
----------
|
96
|
+
data: NDArray
|
97
|
+
Array containing numerical values for metadata factors
|
98
|
+
names: list[str]
|
99
|
+
Names of metadata factors -- keys of the metadata dictionary
|
100
|
+
is_categorical: list[bool]
|
101
|
+
List of flags to identify whether variables are categorical (True) or
|
102
|
+
continuous (False)
|
87
103
|
subset_mask: NDArray[np.bool_] | None
|
88
104
|
Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
|
89
105
|
|
@@ -91,10 +107,7 @@ def diversity_simpson(
|
|
91
107
|
-----
|
92
108
|
For continuous variables, histogram bins are chosen automatically. See
|
93
109
|
numpy.histogram for details.
|
94
|
-
|
95
|
-
in the limit.
|
96
|
-
If there is only one category, the diversity index takes a value of 1 =
|
97
|
-
1/N = 1/1. Entropy will take a value of 0.
|
110
|
+
If there is only one category, the diversity index takes a value of 0.
|
98
111
|
|
99
112
|
Returns
|
100
113
|
-------
|
@@ -116,8 +129,8 @@ def diversity_simpson(
|
|
116
129
|
# relative frequencies
|
117
130
|
p_i = cnts / cnts.sum()
|
118
131
|
# inverse Simpson index normalized by (number of bins)
|
119
|
-
|
120
|
-
|
132
|
+
s_0 = 1 / np.sum(p_i**2) / num_bins[col]
|
133
|
+
ev_index[col] = (s_0 * num_bins[col] - 1) / (num_bins[col] - 1)
|
121
134
|
return ev_index
|
122
135
|
|
123
136
|
|
@@ -129,9 +142,11 @@ def diversity(
|
|
129
142
|
class_labels: Sequence[int], metadata: list[dict], method: Literal["shannon", "simpson"] = "simpson"
|
130
143
|
) -> DiversityOutput:
|
131
144
|
"""
|
132
|
-
Compute diversity for discrete/categorical variables and, through standard
|
145
|
+
Compute diversity and classwise diversity for discrete/categorical variables and, through standard
|
133
146
|
histogram binning, for continuous variables.
|
134
147
|
|
148
|
+
We define diversity as a normalized form of the inverse Simpson diversity index.
|
149
|
+
|
135
150
|
diversity = 1 implies that samples are evenly distributed across a particular factor
|
136
151
|
diversity = 0 implies that all samples belong to one category/bin
|
137
152
|
|
@@ -141,95 +156,51 @@ def diversity(
|
|
141
156
|
List of class labels for each image
|
142
157
|
metadata: List[Dict]
|
143
158
|
List of metadata factors for each image
|
144
|
-
|
145
|
-
|
146
|
-
Permissible values include "simpson" and "shannon"
|
159
|
+
method: Literal["shannon", "simpson"], default "simpson"
|
160
|
+
Indicates which diversity index should be computed
|
147
161
|
|
148
162
|
Notes
|
149
163
|
-----
|
150
164
|
- For continuous variables, histogram bins are chosen automatically. See numpy.histogram for details.
|
165
|
+
- The expression is undefined for q=1, but it approaches the Shannon entropy in the limit.
|
166
|
+
- If there is only one category, the diversity index takes a value of 1 = 1/N = 1/1. Entropy will take a value of 0.
|
151
167
|
|
152
168
|
Returns
|
153
169
|
-------
|
154
170
|
DiversityOutput
|
155
|
-
Diversity index per column of self.data or each factor in self.names
|
171
|
+
Diversity index per column of self.data or each factor in self.names and
|
172
|
+
classwise diversity [n_class x n_factor]
|
156
173
|
|
157
174
|
Example
|
158
175
|
-------
|
159
176
|
Compute Simpson diversity index of metadata and class labels
|
160
177
|
|
161
|
-
>>> diversity(class_labels, metadata, method="simpson")
|
162
|
-
|
178
|
+
>>> div_simp = diversity(class_labels, metadata, method="simpson")
|
179
|
+
>>> div_simp.diversity_index
|
180
|
+
array([0.18103448, 0.18103448, 0.88636364])
|
181
|
+
|
182
|
+
>>> div_simp.classwise
|
183
|
+
array([[0.17241379, 0.39473684],
|
184
|
+
[0.2 , 0.2 ]])
|
163
185
|
|
164
186
|
Compute Shannon diversity index of metadata and class labels
|
165
187
|
|
166
|
-
>>> diversity(class_labels, metadata, method="shannon")
|
188
|
+
>>> div_shan = diversity(class_labels, metadata, method="shannon")
|
189
|
+
>>> div_shan.diversity_index
|
167
190
|
array([0.37955133, 0.37955133, 0.96748876])
|
168
191
|
|
169
|
-
|
170
|
-
See Also
|
171
|
-
--------
|
172
|
-
numpy.histogram
|
173
|
-
"""
|
174
|
-
diversity_fn = get_method(DIVERSITY_FN_MAP, method)
|
175
|
-
data, names, is_categorical = preprocess_metadata(class_labels, metadata)
|
176
|
-
diversity_index = diversity_fn(data, names, is_categorical, None).astype(np.float64)
|
177
|
-
return DiversityOutput(diversity_index)
|
178
|
-
|
179
|
-
|
180
|
-
@set_metadata("dataeval.metrics")
|
181
|
-
def diversity_classwise(
|
182
|
-
class_labels: Sequence[int], metadata: list[dict], method: Literal["shannon", "simpson"] = "simpson"
|
183
|
-
) -> DiversityOutput:
|
184
|
-
"""
|
185
|
-
Compute diversity for discrete/categorical variables and, through standard
|
186
|
-
histogram binning, for continuous variables.
|
187
|
-
|
188
|
-
We define diversity as a normalized form of the inverse Simpson diversity
|
189
|
-
index.
|
190
|
-
|
191
|
-
diversity = 1 implies that samples are evenly distributed across a particular factor
|
192
|
-
diversity = 1/num_categories implies that all samples belong to one category/bin
|
193
|
-
|
194
|
-
Parameters
|
195
|
-
----------
|
196
|
-
class_labels: Sequence[int]
|
197
|
-
List of class labels for each image
|
198
|
-
metadata: List[Dict]
|
199
|
-
List of metadata factors for each image
|
200
|
-
|
201
|
-
Notes
|
202
|
-
-----
|
203
|
-
- For continuous variables, histogram bins are chosen automatically. See numpy.histogram for details.
|
204
|
-
- The expression is undefined for q=1, but it approaches the Shannon entropy in the limit.
|
205
|
-
- If there is only one category, the diversity index takes a value of 1 = 1/N = 1/1. Entropy will take a value of 0.
|
206
|
-
|
207
|
-
Returns
|
208
|
-
-------
|
209
|
-
DiversityOutput
|
210
|
-
Diversity index [n_class x n_factor]
|
211
|
-
|
212
|
-
Example
|
213
|
-
-------
|
214
|
-
Compute classwise Simpson diversity index of metadata and class labels
|
215
|
-
|
216
|
-
>>> diversity_classwise(class_labels, metadata, method="simpson").diversity_index
|
217
|
-
array([[0.33793103, 0.51578947],
|
218
|
-
[0.36 , 0.36 ]])
|
219
|
-
|
220
|
-
Compute classwise Shannon diversity index of metadata and class labels
|
221
|
-
|
222
|
-
>>> diversity_classwise(class_labels, metadata, method="shannon").diversity_index
|
192
|
+
>>> div_shan.classwise
|
223
193
|
array([[0.43156028, 0.83224889],
|
224
194
|
[0.57938016, 0.57938016]])
|
225
195
|
|
226
|
-
|
227
196
|
See Also
|
228
197
|
--------
|
229
198
|
numpy.histogram
|
230
199
|
"""
|
231
200
|
diversity_fn = get_method(DIVERSITY_FN_MAP, method)
|
232
201
|
data, names, is_categorical = preprocess_metadata(class_labels, metadata)
|
202
|
+
diversity_index = diversity_fn(data, names, is_categorical, None).astype(np.float64)
|
203
|
+
|
233
204
|
class_idx = names.index("class_label")
|
234
205
|
class_lbl = data[:, class_idx]
|
235
206
|
|
@@ -241,4 +212,5 @@ def diversity_classwise(
|
|
241
212
|
subset_mask = class_lbl == cls
|
242
213
|
diversity[idx, :] = diversity_fn(data, names, is_categorical, subset_mask)
|
243
214
|
div_no_class = np.concatenate((diversity[:, :class_idx], diversity[:, (class_idx + 1) :]), axis=1)
|
244
|
-
|
215
|
+
|
216
|
+
return DiversityOutput(diversity_index, div_no_class)
|
@@ -89,6 +89,16 @@ class StatsOutput(OutputMetadata):
|
|
89
89
|
def dict(self):
|
90
90
|
return {k: v for k, v in self.__dict__.items() if not k.startswith("_") and len(v) > 0}
|
91
91
|
|
92
|
+
def __len__(self) -> int:
|
93
|
+
if self.ch_idx_map:
|
94
|
+
return sum([len(idxs) for idxs in self.ch_idx_map.values()])
|
95
|
+
else:
|
96
|
+
for a in self.__annotations__:
|
97
|
+
attr = getattr(self, a, None)
|
98
|
+
if attr is not None and hasattr(a, "__len__") and len(attr) > 0:
|
99
|
+
return len(attr)
|
100
|
+
return 0
|
101
|
+
|
92
102
|
|
93
103
|
QUARTILES = (0, 25, 50, 75, 100)
|
94
104
|
|
dataeval/_internal/output.py
CHANGED
@@ -1,14 +1,12 @@
|
|
1
|
-
from dataeval._internal.metrics.balance import balance
|
1
|
+
from dataeval._internal.metrics.balance import balance
|
2
2
|
from dataeval._internal.metrics.coverage import coverage
|
3
|
-
from dataeval._internal.metrics.diversity import diversity
|
3
|
+
from dataeval._internal.metrics.diversity import diversity
|
4
4
|
from dataeval._internal.metrics.parity import label_parity, parity
|
5
5
|
|
6
6
|
__all__ = [
|
7
7
|
"balance",
|
8
|
-
"balance_classwise",
|
9
8
|
"coverage",
|
10
9
|
"diversity",
|
11
|
-
"diversity_classwise",
|
12
10
|
"label_parity",
|
13
11
|
"parity",
|
14
12
|
]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.68.0
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
5
|
Home-page: https://dataeval.ai/
|
6
6
|
License: MIT
|
@@ -1,4 +1,4 @@
|
|
1
|
-
dataeval/__init__.py,sha256=
|
1
|
+
dataeval/__init__.py,sha256=fV-lc8AokA2hnkUSOdX-Bxy0xmEfPTXVFB3VcYAoiA8,590
|
2
2
|
dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
dataeval/_internal/detectors/clusterer.py,sha256=hJwELUeAdZZ3OVLIfwalw2P7Zz13q2ZqrV6gx90s44E,20695
|
4
4
|
dataeval/_internal/detectors/drift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -8,7 +8,8 @@ dataeval/_internal/detectors/drift/ks.py,sha256=aoDx7ps-5vrSI8Q9ii6cwmKnAyaD8tjG
|
|
8
8
|
dataeval/_internal/detectors/drift/mmd.py,sha256=xUMQDaLOcqc3Uq2xDvNR7hbt3WnmCR2etZlGCwYlu2c,7489
|
9
9
|
dataeval/_internal/detectors/drift/torch.py,sha256=YhIN85MbUV3C4IJcRvqYdXSWLj5lUeEOb05T5DgB3xo,11552
|
10
10
|
dataeval/_internal/detectors/drift/uncertainty.py,sha256=Ot8L42AnFbkij4J3Tis7VzXLv3hfBxoOWBP4UoCEnVs,5125
|
11
|
-
dataeval/_internal/detectors/duplicates.py,sha256=
|
11
|
+
dataeval/_internal/detectors/duplicates.py,sha256=qkzbdWuJuUozFLqpnD6CYAGXQb7-aWw2mHr_cxXAfPo,4922
|
12
|
+
dataeval/_internal/detectors/merged_stats.py,sha256=WVPxz7n5fUkFKW3kobD_TkKkof51YjfIz4M_4CHh-1s,2517
|
12
13
|
dataeval/_internal/detectors/ood/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
14
|
dataeval/_internal/detectors/ood/ae.py,sha256=k8pZP7oPwVyQlv6YcoacNMzpmQZy7W222yYrdXGTYZI,2031
|
14
15
|
dataeval/_internal/detectors/ood/aegmm.py,sha256=pffThqXRoLx3GuZXEQBd-xEy5DjAZHV7WSeP2HgM_TI,2403
|
@@ -16,17 +17,17 @@ dataeval/_internal/detectors/ood/base.py,sha256=Pw34uFEWOJZiG4ciM0ArUkqhiM8WCGl2
|
|
16
17
|
dataeval/_internal/detectors/ood/llr.py,sha256=tCo8G7V8VaVuIZ09rg0ZXZmdE0N_zGm7vCfFUnGbGvo,10102
|
17
18
|
dataeval/_internal/detectors/ood/vae.py,sha256=WbQugS-bBUTTqQ9PRLHBmSUtk7O2_PN4PBLJE9ieMjw,2921
|
18
19
|
dataeval/_internal/detectors/ood/vaegmm.py,sha256=pVUSlVF2jo8uokyks2QzfBJnNtcFWmcF8EQl-azs2Bg,2832
|
19
|
-
dataeval/_internal/detectors/outliers.py,sha256=
|
20
|
+
dataeval/_internal/detectors/outliers.py,sha256=tzIraHkooPA4gSb8lG0O3koVK-9fOQg8EPo3xvgL1Y4,7533
|
20
21
|
dataeval/_internal/flags.py,sha256=FHRgm8NKB9AjQgPcAESYeSbqIszgxbSGfF0Xd_tSkyk,2169
|
21
22
|
dataeval/_internal/interop.py,sha256=x4qj4EiBt5NthSxe8prSLrPDAEcipAdyyLwbNyCBaFk,1059
|
22
23
|
dataeval/_internal/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
|
-
dataeval/_internal/metrics/balance.py,sha256=
|
24
|
+
dataeval/_internal/metrics/balance.py,sha256=eAHvgjiGCH893XSQLqh9j9wgvAECoNPVT8k0u_9Ijzg,6097
|
24
25
|
dataeval/_internal/metrics/ber.py,sha256=Onsi47AbT9rMvng-Pbu8LIrYRfLpI13En1FxkFoMKQs,4668
|
25
|
-
dataeval/_internal/metrics/coverage.py,sha256=
|
26
|
+
dataeval/_internal/metrics/coverage.py,sha256=EZVES1rbZW2j_CtQv1VFfSO-UmWcrt5nmqxDErtrG14,3473
|
26
27
|
dataeval/_internal/metrics/divergence.py,sha256=nmMUfr9FGnH798eb6xzEiMj4C42rQVthh5HeexiY6EE,4119
|
27
|
-
dataeval/_internal/metrics/diversity.py,sha256=
|
28
|
+
dataeval/_internal/metrics/diversity.py,sha256=nGjYQ-NLjb8mPt1PAYnvkWH4D58kjM39IPs2FULfis4,7503
|
28
29
|
dataeval/_internal/metrics/parity.py,sha256=suv1Pf7gPj0_NxsS0_M6ewfUndsFJyEhbt5NPp6ktMI,15457
|
29
|
-
dataeval/_internal/metrics/stats.py,sha256
|
30
|
+
dataeval/_internal/metrics/stats.py,sha256=-gLGn8Yy-Xx0kkaF-Z_3RitqPLZJhhbflksSjBRN3iY,16702
|
30
31
|
dataeval/_internal/metrics/uap.py,sha256=w-wvXXnX16kUq-weaZD2SrJi22LJ8EjOFbOhPxeGejI,2043
|
31
32
|
dataeval/_internal/metrics/utils.py,sha256=mSYa-3cHGcsQwPr7zbdpzrnK_8jIXCiAcu2HCcvrtaY,13007
|
32
33
|
dataeval/_internal/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -41,7 +42,7 @@ dataeval/_internal/models/tensorflow/losses.py,sha256=pZH5RnlM9R0RrBde9Lgq32muwA
|
|
41
42
|
dataeval/_internal/models/tensorflow/pixelcnn.py,sha256=lRpRNebMgkCJUnEk1xouVaTfS_YGMQgQhI01wNKAjeM,48420
|
42
43
|
dataeval/_internal/models/tensorflow/trainer.py,sha256=xNY0Iw7Qa1TnCuy9N1b77_VduFoW_BhbZjfQCxOVby4,4082
|
43
44
|
dataeval/_internal/models/tensorflow/utils.py,sha256=l6jXKMWyQAEI4LpAONq95Xwr7CPgrs408ypf9TuNxkY,8732
|
44
|
-
dataeval/_internal/output.py,sha256=
|
45
|
+
dataeval/_internal/output.py,sha256=bFC2qJxXUc_daQwJHHa9KfFNLuxZANGb7Dpget_TXYs,3049
|
45
46
|
dataeval/_internal/utils.py,sha256=gK0z4buuQoUYblkrCiRV9pIESzyikcY-3a08XsQkD7E,1585
|
46
47
|
dataeval/_internal/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
48
|
dataeval/_internal/workflows/sufficiency.py,sha256=0k7Dbk3QmEGkZp2IW4OcZBcrxb4zAp9hC9nXGN1v1cY,18199
|
@@ -53,7 +54,7 @@ dataeval/detectors/linters/__init__.py,sha256=1yxsJw8CFpHsZwn_YUlWpb-4YBet5U6uB-
|
|
53
54
|
dataeval/detectors/ood/__init__.py,sha256=ybWhwbMmWygIwE1A-nYihDfugrj3j0GiuABmVvD7264,583
|
54
55
|
dataeval/flags/__init__.py,sha256=qo06_Tk0ul4lOhKSEs0HE2G6WBFvMwNJq77vRX1ynww,72
|
55
56
|
dataeval/metrics/__init__.py,sha256=42szGyZrLekNU-T-rwJu-pUoDBdOoStuScB-mnGzjw4,81
|
56
|
-
dataeval/metrics/bias/__init__.py,sha256=
|
57
|
+
dataeval/metrics/bias/__init__.py,sha256=xqpxCttgzz-hMZQI7_IlaNn4OGZaGVz3KKRd26GbSKE,335
|
57
58
|
dataeval/metrics/estimators/__init__.py,sha256=fWQZUIxu88u5POYXN1yoFc-Hxx5B1fveEiiSXmK5kPk,210
|
58
59
|
dataeval/metrics/stats/__init__.py,sha256=N5UvO7reDkYX1xFdAQjwALyJwcC2FAbruzd7ZYYW_4I,123
|
59
60
|
dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -66,7 +67,7 @@ dataeval/torch/models/__init__.py,sha256=YnDnePYpRIKHyYn3F5qR1OObMSb-g0FGvI8X-uT
|
|
66
67
|
dataeval/torch/trainer/__init__.py,sha256=Te-qElt8h-Zv8NN0r-VJOEdCPHTQ2yO3rd2MhRiZGZs,93
|
67
68
|
dataeval/utils/__init__.py,sha256=ExQ1xj62MjcM9uIu1-g1P2fW0EPJpcIofnvxjQ908c4,172
|
68
69
|
dataeval/workflows/__init__.py,sha256=gkU2B6yUiefexcYrBwqfZKNl8BvX8abUjfeNvVBXF4E,186
|
69
|
-
dataeval-0.
|
70
|
-
dataeval-0.
|
71
|
-
dataeval-0.
|
72
|
-
dataeval-0.
|
70
|
+
dataeval-0.68.0.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
|
71
|
+
dataeval-0.68.0.dist-info/METADATA,sha256=XWLDiMY9JE2dxIDnRnJMQMLS8GPWFH2mbMDXkeP7Y5Q,4217
|
72
|
+
dataeval-0.68.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
73
|
+
dataeval-0.68.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|