dataeval 0.69.2__py3-none-any.whl → 0.69.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dataeval/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.69.2"
1
+ __version__ = "0.69.3"
2
2
 
3
3
  from importlib.util import find_spec
4
4
 
@@ -130,7 +130,10 @@ def diversity_simpson(
130
130
  p_i = cnts / cnts.sum()
131
131
  # inverse Simpson index normalized by (number of bins)
132
132
  s_0 = 1 / np.sum(p_i**2) / num_bins[col]
133
- ev_index[col] = (s_0 * num_bins[col] - 1) / (num_bins[col] - 1)
133
+ if num_bins[col] == 1:
134
+ ev_index[col] = 0
135
+ else:
136
+ ev_index[col] = (s_0 * num_bins[col] - 1) / (num_bins[col] - 1)
134
137
  return ev_index
135
138
 
136
139
 
@@ -348,6 +348,7 @@ def parity(
348
348
  chi_scores = np.zeros(len(factors))
349
349
  p_values = np.zeros(len(factors))
350
350
  n_cls = len(np.unique(labels))
351
+ not_enough_data = {}
351
352
  for i, (current_factor_name, factor_values) in enumerate(factors.items()):
352
353
  unique_factor_values = np.unique(factor_values)
353
354
  contingency_matrix = np.zeros((len(unique_factor_values), n_cls))
@@ -361,13 +362,12 @@ def parity(
361
362
  with_both = np.bitwise_and((labels == label), factor_values == factor_value)
362
363
  contingency_matrix[fi, label] = np.sum(with_both)
363
364
  if 0 < contingency_matrix[fi, label] < 5:
364
- warnings.warn(
365
- f"Factor {current_factor_name} value {factor_value} co-occurs "
366
- f"only {contingency_matrix[fi, label]} times with label {label}. "
367
- "This can cause inaccurate chi_square calculation. Recommend"
368
- "ensuring each label occurs either 0 times or at least 5 times. "
369
- "Alternatively, digitize any continuous-valued factors "
370
- "into fewer bins."
365
+ if current_factor_name not in not_enough_data:
366
+ not_enough_data[current_factor_name] = {}
367
+ if factor_value not in not_enough_data[current_factor_name]:
368
+ not_enough_data[current_factor_name][factor_value] = []
369
+ not_enough_data[current_factor_name][factor_value].append(
370
+ (label, int(contingency_matrix[fi, label]))
371
371
  )
372
372
 
373
373
  # This deletes rows containing only zeros,
@@ -381,4 +381,23 @@ def parity(
381
381
  chi_scores[i] = chi2
382
382
  p_values[i] = p
383
383
 
384
+ if not_enough_data:
385
+ factor_msg = []
386
+ for factor, fact_dict in not_enough_data.items():
387
+ stacked_msg = []
388
+ for key, value in fact_dict.items():
389
+ msg = []
390
+ for item in value:
391
+ msg.append(f"label {item[0]}: {item[1]} occurrences")
392
+ flat_msg = "\n\t\t".join(msg)
393
+ stacked_msg.append(f"value {key} - {flat_msg}\n\t")
394
+ factor_msg.append(factor + " - " + "".join(stacked_msg))
395
+
396
+ message = "\n".join(factor_msg)
397
+
398
+ warnings.warn(
399
+ f"The following factors did not meet the recommended 5 occurrences for each value-label combination. \nRecommend rerunning parity after adjusting the following factor-value-label combinations: \n{message}", # noqa: E501
400
+ UserWarning,
401
+ )
402
+
384
403
  return ParityOutput(chi_scores, p_values)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.69.2
3
+ Version: 0.69.3
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -1,4 +1,4 @@
1
- dataeval/__init__.py,sha256=NUQixSNyEc0GiI7YgbfY9IL0OEkIN9kdbrOGAB041Ig,590
1
+ dataeval/__init__.py,sha256=4JtJRUfhO_kYbjWDhzY5niIvmLb8K_3sCL-wbcZ_mUU,590
2
2
  dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  dataeval/_internal/detectors/clusterer.py,sha256=hJwELUeAdZZ3OVLIfwalw2P7Zz13q2ZqrV6gx90s44E,20695
4
4
  dataeval/_internal/detectors/drift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -25,8 +25,8 @@ dataeval/_internal/metrics/balance.py,sha256=eAHvgjiGCH893XSQLqh9j9wgvAECoNPVT8k
25
25
  dataeval/_internal/metrics/ber.py,sha256=Onsi47AbT9rMvng-Pbu8LIrYRfLpI13En1FxkFoMKQs,4668
26
26
  dataeval/_internal/metrics/coverage.py,sha256=EZVES1rbZW2j_CtQv1VFfSO-UmWcrt5nmqxDErtrG14,3473
27
27
  dataeval/_internal/metrics/divergence.py,sha256=nmMUfr9FGnH798eb6xzEiMj4C42rQVthh5HeexiY6EE,4119
28
- dataeval/_internal/metrics/diversity.py,sha256=nGjYQ-NLjb8mPt1PAYnvkWH4D58kjM39IPs2FULfis4,7503
29
- dataeval/_internal/metrics/parity.py,sha256=suv1Pf7gPj0_NxsS0_M6ewfUndsFJyEhbt5NPp6ktMI,15457
28
+ dataeval/_internal/metrics/diversity.py,sha256=_oT0FHsgfLOoe_TLD2Aax4r4jmH6WnOPVIkcl_YjaoY,7582
29
+ dataeval/_internal/metrics/parity.py,sha256=VszQNbHWjct2bCqrIXUZC_qFi4ZIq2Lm-vs-DiarBFo,16244
30
30
  dataeval/_internal/metrics/stats.py,sha256=ILKteVMGjrp1s2CECPL_hbLsijIKR2d6II2-8w9oxW8,18105
31
31
  dataeval/_internal/metrics/uap.py,sha256=w-wvXXnX16kUq-weaZD2SrJi22LJ8EjOFbOhPxeGejI,2043
32
32
  dataeval/_internal/metrics/utils.py,sha256=mSYa-3cHGcsQwPr7zbdpzrnK_8jIXCiAcu2HCcvrtaY,13007
@@ -67,7 +67,7 @@ dataeval/torch/models/__init__.py,sha256=YnDnePYpRIKHyYn3F5qR1OObMSb-g0FGvI8X-uT
67
67
  dataeval/torch/trainer/__init__.py,sha256=Te-qElt8h-Zv8NN0r-VJOEdCPHTQ2yO3rd2MhRiZGZs,93
68
68
  dataeval/utils/__init__.py,sha256=ExQ1xj62MjcM9uIu1-g1P2fW0EPJpcIofnvxjQ908c4,172
69
69
  dataeval/workflows/__init__.py,sha256=gkU2B6yUiefexcYrBwqfZKNl8BvX8abUjfeNvVBXF4E,186
70
- dataeval-0.69.2.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
71
- dataeval-0.69.2.dist-info/METADATA,sha256=_9rVrbIh4EPYStZtOUYnB-Xo3cZ5JMUAf06TqDKvrZs,4217
72
- dataeval-0.69.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
73
- dataeval-0.69.2.dist-info/RECORD,,
70
+ dataeval-0.69.3.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
71
+ dataeval-0.69.3.dist-info/METADATA,sha256=dyyl60cjz6n7gRgYMZs9gCOdqpc9UbSV4LFCD8rJNCM,4217
72
+ dataeval-0.69.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
73
+ dataeval-0.69.3.dist-info/RECORD,,