PyPI - dataeval - Versions diffs - 0.69.2__py3-none-any.whl → 0.69.3__py3-none-any.whl - Mend

dataeval 0.69.2py3-none-any.whl → 0.69.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

dataeval/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.69.2"
+__version__ = "0.69.3"
 from importlib.util import find_spec

dataeval/_internal/metrics/diversity.py CHANGED Viewed

@@ -130,7 +130,10 @@ def diversity_simpson(
         p_i = cnts / cnts.sum()
         # inverse Simpson index normalized by (number of bins)
         s_0 = 1 / np.sum(p_i**2) / num_bins[col]
-        ev_index[col] = (s_0 * num_bins[col] - 1) / (num_bins[col] - 1)
+        if num_bins[col] == 1:
+            ev_index[col] = 0
+        else:
+            ev_index[col] = (s_0 * num_bins[col] - 1) / (num_bins[col] - 1)
     return ev_index

dataeval/_internal/metrics/parity.py CHANGED Viewed

@@ -348,6 +348,7 @@ def parity(
     chi_scores = np.zeros(len(factors))
     p_values = np.zeros(len(factors))
     n_cls = len(np.unique(labels))
+    not_enough_data = {}
     for i, (current_factor_name, factor_values) in enumerate(factors.items()):
         unique_factor_values = np.unique(factor_values)
         contingency_matrix = np.zeros((len(unique_factor_values), n_cls))
@@ -361,13 +362,12 @@ def parity(
                 with_both = np.bitwise_and((labels == label), factor_values == factor_value)
                 contingency_matrix[fi, label] = np.sum(with_both)
                 if 0 < contingency_matrix[fi, label] < 5:
-                    warnings.warn(
-                        f"Factor {current_factor_name} value {factor_value} co-occurs "
-                        f"only {contingency_matrix[fi, label]} times with label {label}. "
-                        "This can cause inaccurate chi_square calculation. Recommend"
-                        "ensuring each label occurs either 0 times or at least 5 times. "
-                        "Alternatively, digitize any continuous-valued factors "
-                        "into fewer bins."
+                    if current_factor_name not in not_enough_data:
+                        not_enough_data[current_factor_name] = {}
+                    if factor_value not in not_enough_data[current_factor_name]:
+                        not_enough_data[current_factor_name][factor_value] = []
+                    not_enough_data[current_factor_name][factor_value].append(
+                        (label, int(contingency_matrix[fi, label]))
                     )
         # This deletes rows containing only zeros,
@@ -381,4 +381,23 @@ def parity(
         chi_scores[i] = chi2
         p_values[i] = p
+    if not_enough_data:
+        factor_msg = []
+        for factor, fact_dict in not_enough_data.items():
+            stacked_msg = []
+            for key, value in fact_dict.items():
+                msg = []
+                for item in value:
+                    msg.append(f"label {item[0]}: {item[1]} occurrences")
+                flat_msg = "\n\t\t".join(msg)
+                stacked_msg.append(f"value {key} - {flat_msg}\n\t")
+            factor_msg.append(factor + " - " + "".join(stacked_msg))
+        message = "\n".join(factor_msg)
+        warnings.warn(
+            f"The following factors did not meet the recommended 5 occurrences for each value-label combination. \nRecommend rerunning parity after adjusting the following factor-value-label combinations: \n{message}",  # noqa: E501
+            UserWarning,
+        )
     return ParityOutput(chi_scores, p_values)

{dataeval-0.69.2.dist-info → dataeval-0.69.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dataeval
-Version: 0.69.2
+Version: 0.69.3
 Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
 Home-page: https://dataeval.ai/
 License: MIT

{dataeval-0.69.2.dist-info → dataeval-0.69.3.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-dataeval/__init__.py,sha256=NUQixSNyEc0GiI7YgbfY9IL0OEkIN9kdbrOGAB041Ig,590
+dataeval/__init__.py,sha256=4JtJRUfhO_kYbjWDhzY5niIvmLb8K_3sCL-wbcZ_mUU,590
 dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dataeval/_internal/detectors/clusterer.py,sha256=hJwELUeAdZZ3OVLIfwalw2P7Zz13q2ZqrV6gx90s44E,20695
 dataeval/_internal/detectors/drift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -25,8 +25,8 @@ dataeval/_internal/metrics/balance.py,sha256=eAHvgjiGCH893XSQLqh9j9wgvAECoNPVT8k
 dataeval/_internal/metrics/ber.py,sha256=Onsi47AbT9rMvng-Pbu8LIrYRfLpI13En1FxkFoMKQs,4668
 dataeval/_internal/metrics/coverage.py,sha256=EZVES1rbZW2j_CtQv1VFfSO-UmWcrt5nmqxDErtrG14,3473
 dataeval/_internal/metrics/divergence.py,sha256=nmMUfr9FGnH798eb6xzEiMj4C42rQVthh5HeexiY6EE,4119
-dataeval/_internal/metrics/diversity.py,sha256=nGjYQ-NLjb8mPt1PAYnvkWH4D58kjM39IPs2FULfis4,7503
-dataeval/_internal/metrics/parity.py,sha256=suv1Pf7gPj0_NxsS0_M6ewfUndsFJyEhbt5NPp6ktMI,15457
+dataeval/_internal/metrics/diversity.py,sha256=_oT0FHsgfLOoe_TLD2Aax4r4jmH6WnOPVIkcl_YjaoY,7582
+dataeval/_internal/metrics/parity.py,sha256=VszQNbHWjct2bCqrIXUZC_qFi4ZIq2Lm-vs-DiarBFo,16244
 dataeval/_internal/metrics/stats.py,sha256=ILKteVMGjrp1s2CECPL_hbLsijIKR2d6II2-8w9oxW8,18105
 dataeval/_internal/metrics/uap.py,sha256=w-wvXXnX16kUq-weaZD2SrJi22LJ8EjOFbOhPxeGejI,2043
 dataeval/_internal/metrics/utils.py,sha256=mSYa-3cHGcsQwPr7zbdpzrnK_8jIXCiAcu2HCcvrtaY,13007
@@ -67,7 +67,7 @@ dataeval/torch/models/__init__.py,sha256=YnDnePYpRIKHyYn3F5qR1OObMSb-g0FGvI8X-uT
 dataeval/torch/trainer/__init__.py,sha256=Te-qElt8h-Zv8NN0r-VJOEdCPHTQ2yO3rd2MhRiZGZs,93
 dataeval/utils/__init__.py,sha256=ExQ1xj62MjcM9uIu1-g1P2fW0EPJpcIofnvxjQ908c4,172
 dataeval/workflows/__init__.py,sha256=gkU2B6yUiefexcYrBwqfZKNl8BvX8abUjfeNvVBXF4E,186
-dataeval-0.69.2.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
-dataeval-0.69.2.dist-info/METADATA,sha256=_9rVrbIh4EPYStZtOUYnB-Xo3cZ5JMUAf06TqDKvrZs,4217
-dataeval-0.69.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-dataeval-0.69.2.dist-info/RECORD,,
+dataeval-0.69.3.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
+dataeval-0.69.3.dist-info/METADATA,sha256=dyyl60cjz6n7gRgYMZs9gCOdqpc9UbSV4LFCD8rJNCM,4217
+dataeval-0.69.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+dataeval-0.69.3.dist-info/RECORD,,

{dataeval-0.69.2.dist-info → dataeval-0.69.3.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{dataeval-0.69.2.dist-info → dataeval-0.69.3.dist-info}/WHEEL RENAMED Viewed

File without changes

dataeval 0.69.2__py3-none-any.whl → 0.69.3__py3-none-any.whl

dataeval 0.69.2py3-none-any.whl → 0.69.3py3-none-any.whl