dataeval 0.69.2__py3-none-any.whl → 0.69.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +1 -1
- dataeval/_internal/metrics/diversity.py +4 -1
- dataeval/_internal/metrics/parity.py +26 -7
- {dataeval-0.69.2.dist-info → dataeval-0.69.3.dist-info}/METADATA +1 -1
- {dataeval-0.69.2.dist-info → dataeval-0.69.3.dist-info}/RECORD +7 -7
- {dataeval-0.69.2.dist-info → dataeval-0.69.3.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.69.2.dist-info → dataeval-0.69.3.dist-info}/WHEEL +0 -0
dataeval/__init__.py
CHANGED
@@ -130,7 +130,10 @@ def diversity_simpson(
|
|
130
130
|
p_i = cnts / cnts.sum()
|
131
131
|
# inverse Simpson index normalized by (number of bins)
|
132
132
|
s_0 = 1 / np.sum(p_i**2) / num_bins[col]
|
133
|
-
|
133
|
+
if num_bins[col] == 1:
|
134
|
+
ev_index[col] = 0
|
135
|
+
else:
|
136
|
+
ev_index[col] = (s_0 * num_bins[col] - 1) / (num_bins[col] - 1)
|
134
137
|
return ev_index
|
135
138
|
|
136
139
|
|
@@ -348,6 +348,7 @@ def parity(
|
|
348
348
|
chi_scores = np.zeros(len(factors))
|
349
349
|
p_values = np.zeros(len(factors))
|
350
350
|
n_cls = len(np.unique(labels))
|
351
|
+
not_enough_data = {}
|
351
352
|
for i, (current_factor_name, factor_values) in enumerate(factors.items()):
|
352
353
|
unique_factor_values = np.unique(factor_values)
|
353
354
|
contingency_matrix = np.zeros((len(unique_factor_values), n_cls))
|
@@ -361,13 +362,12 @@ def parity(
|
|
361
362
|
with_both = np.bitwise_and((labels == label), factor_values == factor_value)
|
362
363
|
contingency_matrix[fi, label] = np.sum(with_both)
|
363
364
|
if 0 < contingency_matrix[fi, label] < 5:
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
"into fewer bins."
|
365
|
+
if current_factor_name not in not_enough_data:
|
366
|
+
not_enough_data[current_factor_name] = {}
|
367
|
+
if factor_value not in not_enough_data[current_factor_name]:
|
368
|
+
not_enough_data[current_factor_name][factor_value] = []
|
369
|
+
not_enough_data[current_factor_name][factor_value].append(
|
370
|
+
(label, int(contingency_matrix[fi, label]))
|
371
371
|
)
|
372
372
|
|
373
373
|
# This deletes rows containing only zeros,
|
@@ -381,4 +381,23 @@ def parity(
|
|
381
381
|
chi_scores[i] = chi2
|
382
382
|
p_values[i] = p
|
383
383
|
|
384
|
+
if not_enough_data:
|
385
|
+
factor_msg = []
|
386
|
+
for factor, fact_dict in not_enough_data.items():
|
387
|
+
stacked_msg = []
|
388
|
+
for key, value in fact_dict.items():
|
389
|
+
msg = []
|
390
|
+
for item in value:
|
391
|
+
msg.append(f"label {item[0]}: {item[1]} occurrences")
|
392
|
+
flat_msg = "\n\t\t".join(msg)
|
393
|
+
stacked_msg.append(f"value {key} - {flat_msg}\n\t")
|
394
|
+
factor_msg.append(factor + " - " + "".join(stacked_msg))
|
395
|
+
|
396
|
+
message = "\n".join(factor_msg)
|
397
|
+
|
398
|
+
warnings.warn(
|
399
|
+
f"The following factors did not meet the recommended 5 occurrences for each value-label combination. \nRecommend rerunning parity after adjusting the following factor-value-label combinations: \n{message}", # noqa: E501
|
400
|
+
UserWarning,
|
401
|
+
)
|
402
|
+
|
384
403
|
return ParityOutput(chi_scores, p_values)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.69.
|
3
|
+
Version: 0.69.3
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
5
|
Home-page: https://dataeval.ai/
|
6
6
|
License: MIT
|
@@ -1,4 +1,4 @@
|
|
1
|
-
dataeval/__init__.py,sha256=
|
1
|
+
dataeval/__init__.py,sha256=4JtJRUfhO_kYbjWDhzY5niIvmLb8K_3sCL-wbcZ_mUU,590
|
2
2
|
dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
dataeval/_internal/detectors/clusterer.py,sha256=hJwELUeAdZZ3OVLIfwalw2P7Zz13q2ZqrV6gx90s44E,20695
|
4
4
|
dataeval/_internal/detectors/drift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -25,8 +25,8 @@ dataeval/_internal/metrics/balance.py,sha256=eAHvgjiGCH893XSQLqh9j9wgvAECoNPVT8k
|
|
25
25
|
dataeval/_internal/metrics/ber.py,sha256=Onsi47AbT9rMvng-Pbu8LIrYRfLpI13En1FxkFoMKQs,4668
|
26
26
|
dataeval/_internal/metrics/coverage.py,sha256=EZVES1rbZW2j_CtQv1VFfSO-UmWcrt5nmqxDErtrG14,3473
|
27
27
|
dataeval/_internal/metrics/divergence.py,sha256=nmMUfr9FGnH798eb6xzEiMj4C42rQVthh5HeexiY6EE,4119
|
28
|
-
dataeval/_internal/metrics/diversity.py,sha256=
|
29
|
-
dataeval/_internal/metrics/parity.py,sha256=
|
28
|
+
dataeval/_internal/metrics/diversity.py,sha256=_oT0FHsgfLOoe_TLD2Aax4r4jmH6WnOPVIkcl_YjaoY,7582
|
29
|
+
dataeval/_internal/metrics/parity.py,sha256=VszQNbHWjct2bCqrIXUZC_qFi4ZIq2Lm-vs-DiarBFo,16244
|
30
30
|
dataeval/_internal/metrics/stats.py,sha256=ILKteVMGjrp1s2CECPL_hbLsijIKR2d6II2-8w9oxW8,18105
|
31
31
|
dataeval/_internal/metrics/uap.py,sha256=w-wvXXnX16kUq-weaZD2SrJi22LJ8EjOFbOhPxeGejI,2043
|
32
32
|
dataeval/_internal/metrics/utils.py,sha256=mSYa-3cHGcsQwPr7zbdpzrnK_8jIXCiAcu2HCcvrtaY,13007
|
@@ -67,7 +67,7 @@ dataeval/torch/models/__init__.py,sha256=YnDnePYpRIKHyYn3F5qR1OObMSb-g0FGvI8X-uT
|
|
67
67
|
dataeval/torch/trainer/__init__.py,sha256=Te-qElt8h-Zv8NN0r-VJOEdCPHTQ2yO3rd2MhRiZGZs,93
|
68
68
|
dataeval/utils/__init__.py,sha256=ExQ1xj62MjcM9uIu1-g1P2fW0EPJpcIofnvxjQ908c4,172
|
69
69
|
dataeval/workflows/__init__.py,sha256=gkU2B6yUiefexcYrBwqfZKNl8BvX8abUjfeNvVBXF4E,186
|
70
|
-
dataeval-0.69.
|
71
|
-
dataeval-0.69.
|
72
|
-
dataeval-0.69.
|
73
|
-
dataeval-0.69.
|
70
|
+
dataeval-0.69.3.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
|
71
|
+
dataeval-0.69.3.dist-info/METADATA,sha256=dyyl60cjz6n7gRgYMZs9gCOdqpc9UbSV4LFCD8rJNCM,4217
|
72
|
+
dataeval-0.69.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
73
|
+
dataeval-0.69.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|