dataeval 1.0.1__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. {dataeval-1.0.1 → dataeval-1.0.2}/PKG-INFO +1 -1
  2. {dataeval-1.0.1 → dataeval-1.0.2}/pyproject.toml +1 -1
  3. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/_metadata.py +7 -1
  4. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/_version.py +2 -2
  5. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_compute_stats.py +5 -0
  6. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_parity.py +3 -1
  7. {dataeval-1.0.1 → dataeval-1.0.2}/.gitignore +0 -0
  8. {dataeval-1.0.1 → dataeval-1.0.2}/LICENSE +0 -0
  9. {dataeval-1.0.1 → dataeval-1.0.2}/README.md +0 -0
  10. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/__init__.py +0 -0
  11. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/_embeddings.py +0 -0
  12. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/_experimental.py +0 -0
  13. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/_helpers.py +0 -0
  14. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/_log.py +0 -0
  15. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/_warm_cache.py +0 -0
  16. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/bias/__init__.py +0 -0
  17. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/bias/_balance.py +0 -0
  18. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/bias/_diversity.py +0 -0
  19. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/bias/_parity.py +0 -0
  20. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/config.py +0 -0
  21. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/__init__.py +0 -0
  22. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_ber.py +0 -0
  23. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_bin.py +0 -0
  24. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_calculators/__init__.py +0 -0
  25. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_calculators/_base.py +0 -0
  26. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_calculators/_cache.py +0 -0
  27. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_calculators/_dimensionstats.py +0 -0
  28. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_calculators/_hashstats.py +0 -0
  29. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_calculators/_pixelstats.py +0 -0
  30. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_calculators/_register.py +0 -0
  31. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_calculators/_registry.py +0 -0
  32. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_calculators/_visualstats.py +0 -0
  33. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_clusterer.py +0 -0
  34. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_completeness.py +0 -0
  35. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_compute_ratios.py +0 -0
  36. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_coverage.py +0 -0
  37. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_divergence.py +0 -0
  38. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_diversity.py +0 -0
  39. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_fast_hdbscan/_cluster_trees.py +0 -0
  40. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_fast_hdbscan/_disjoint_set.py +0 -0
  41. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_fast_hdbscan/_mst.py +0 -0
  42. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_feature_distance.py +0 -0
  43. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_hash.py +0 -0
  44. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_label_errors.py +0 -0
  45. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_label_parity.py +0 -0
  46. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_label_stats.py +0 -0
  47. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_metadata_insights.py +0 -0
  48. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_mst.py +0 -0
  49. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_mutual_info.py +0 -0
  50. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_nullmodel.py +0 -0
  51. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_rank.py +0 -0
  52. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/core/_uap.py +0 -0
  53. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/exceptions.py +0 -0
  54. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/extractors/__init__.py +0 -0
  55. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/extractors/_bovw.py +0 -0
  56. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/extractors/_flatten.py +0 -0
  57. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/extractors/_onnx.py +0 -0
  58. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/extractors/_torch.py +0 -0
  59. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/extractors/_uncertainty.py +0 -0
  60. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/flags.py +0 -0
  61. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/performance/__init__.py +0 -0
  62. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/performance/_aggregator.py +0 -0
  63. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/performance/_output.py +0 -0
  64. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/performance/_sufficiency.py +0 -0
  65. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/performance/schedules.py +0 -0
  66. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/protocols.py +0 -0
  67. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/py.typed +0 -0
  68. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/quality/__init__.py +0 -0
  69. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/quality/_duplicates.py +0 -0
  70. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/quality/_outliers.py +0 -0
  71. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/quality/_shared.py +0 -0
  72. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/scope/__init__.py +0 -0
  73. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/scope/_prioritize.py +0 -0
  74. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/selection/__init__.py +0 -0
  75. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/selection/_classbalance.py +0 -0
  76. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/selection/_classfilter.py +0 -0
  77. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/selection/_indices.py +0 -0
  78. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/selection/_limit.py +0 -0
  79. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/selection/_reverse.py +0 -0
  80. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/selection/_select.py +0 -0
  81. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/selection/_shuffle.py +0 -0
  82. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/__init__.py +0 -0
  83. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_drift/__init__.py +0 -0
  84. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_drift/_base.py +0 -0
  85. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_drift/_chunk.py +0 -0
  86. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_drift/_domain_classifier.py +0 -0
  87. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_drift/_kneighbors.py +0 -0
  88. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_drift/_mmd.py +0 -0
  89. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_drift/_reconstruction.py +0 -0
  90. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_drift/_univariate.py +0 -0
  91. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_ood/__init__.py +0 -0
  92. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_ood/_base.py +0 -0
  93. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_ood/_domain_classifier.py +0 -0
  94. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_ood/_kneighbors.py +0 -0
  95. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_ood/_reconstruction.py +0 -0
  96. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_shared/__init__.py +0 -0
  97. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_shared/_domain_classifier.py +0 -0
  98. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_shared/_kneighbors.py +0 -0
  99. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/_shared/_reconstruction.py +0 -0
  100. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/shift/update_strategies.py +0 -0
  101. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/types.py +0 -0
  102. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/utils/__init__.py +0 -0
  103. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/utils/_internal.py +0 -0
  104. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/utils/data.py +0 -0
  105. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/utils/losses.py +0 -0
  106. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/utils/models.py +0 -0
  107. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/utils/onnx.py +0 -0
  108. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/utils/preprocessing.py +0 -0
  109. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/utils/thresholds.py +0 -0
  110. {dataeval-1.0.1 → dataeval-1.0.2}/src/dataeval/utils/training.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataeval
3
- Version: 1.0.1
3
+ Version: 1.0.2
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Project-URL: Homepage, https://dataeval.ai/
6
6
  Project-URL: Repository, https://github.com/aria-ml/dataeval/
@@ -103,7 +103,7 @@ docs = [
103
103
  "jinja2>=3.1.6",
104
104
  "jupyter-client>=8.6.0",
105
105
  "jupyter-cache>=1.0",
106
- "maite-datasets>=0.0.10",
106
+ "maite-datasets>=0.0.12",
107
107
  "myst-nb>=1.0",
108
108
  "opencv-python-headless>=4.8.0",
109
109
  "plotly>=6.2.0",
@@ -1311,7 +1311,13 @@ class Metadata(Array, FeatureExtractor):
1311
1311
  n_classes,
1312
1312
  )
1313
1313
 
1314
- index2label = self._dataset.metadata.get("index2label", {i: str(i) for i in np.unique(labels)})
1314
+ unique_labels = np.unique(labels) if len(labels) else np.array([], dtype=np.intp)
1315
+ provided_i2l = self._dataset.metadata.get("index2label", None)
1316
+ if provided_i2l is not None:
1317
+ # Ensure every observed label has a name; use fallback for unmapped labels
1318
+ index2label = {int(lbl): provided_i2l.get(int(lbl), f"UNDEFINED_CLASS_{int(lbl)}") for lbl in unique_labels}
1319
+ else:
1320
+ index2label = {int(lbl): str(int(lbl)) for lbl in unique_labels}
1315
1321
  target_idx = self._compute_target_indices(srcidx, datum_count, bool(self._has_targets))
1316
1322
  reserved = ["item_index", "target_index", "class_label", "score", "box"]
1317
1323
  target_factor_dict = {}
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '1.0.1'
32
- __version_tuple__ = version_tuple = (1, 0, 1)
31
+ __version__ = version = '1.0.2'
32
+ __version_tuple__ = version_tuple = (1, 0, 2)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -351,6 +351,11 @@ def compute_stats(
351
351
  """
352
352
  Compute specified statistics on a set of images, optionally within bounding boxes.
353
353
 
354
+ Mixed-bit-depth datasets can produce misleading statistics when raw pixel values are
355
+ compared directly. To avoid this, pixel values are normalized to [0, 1] based on each
356
+ image's bit depth before any statistic is computed, keeping results meaningful and
357
+ comparable across 8-bit, 16-bit, 32-bit, and other precision images.
358
+
354
359
  Parameters
355
360
  ----------
356
361
  data : Iterable[ArrayLike] | Dataset[ArrayLike] | Dataset[tuple[ArrayLike, Any, Any]]
@@ -116,6 +116,7 @@ def parity(
116
116
  chi_scores = np.zeros(factor_data_np.shape[1])
117
117
  p_values = np.zeros_like(chi_scores)
118
118
  insufficient_ddict: defaultdict[int, defaultdict[int, dict[int, int]]] = defaultdict(lambda: defaultdict(dict))
119
+ unique_class_labels = np.unique(class_labels_np)
119
120
 
120
121
  for i, col_data in enumerate(factor_data_np.T):
121
122
  # Builds a contingency matrix where entry at index (r,c) represents
@@ -132,7 +133,8 @@ def parity(
132
133
  if contingency_matrix[int_factor, int_class] > 0:
133
134
  factor_category = unique_factor_values[int_factor].item()
134
135
  class_count = contingency_matrix[int_factor, int_class].item()
135
- insufficient_ddict[i][factor_category][int_class] = class_count
136
+ class_label = int(unique_class_labels[int_class])
137
+ insufficient_ddict[i][factor_category][class_label] = class_count
136
138
 
137
139
  # This deletes rows containing only zeros,
138
140
  # because scipy.stats.chi2_contingency fails when there are rows containing only zeros.
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes