dataeval 0.69.2__tar.gz → 0.69.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {dataeval-0.69.2 → dataeval-0.69.3}/PKG-INFO +1 -1
  2. {dataeval-0.69.2 → dataeval-0.69.3}/pyproject.toml +2 -2
  3. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/__init__.py +1 -1
  4. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/metrics/diversity.py +4 -1
  5. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/metrics/parity.py +26 -7
  6. {dataeval-0.69.2 → dataeval-0.69.3}/LICENSE.txt +0 -0
  7. {dataeval-0.69.2 → dataeval-0.69.3}/README.md +0 -0
  8. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/__init__.py +0 -0
  9. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/clusterer.py +0 -0
  10. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/drift/__init__.py +0 -0
  11. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/drift/base.py +0 -0
  12. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/drift/cvm.py +0 -0
  13. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/drift/ks.py +0 -0
  14. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/drift/mmd.py +0 -0
  15. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/drift/torch.py +0 -0
  16. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/drift/uncertainty.py +0 -0
  17. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/duplicates.py +0 -0
  18. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/merged_stats.py +0 -0
  19. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/ood/__init__.py +0 -0
  20. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/ood/ae.py +0 -0
  21. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/ood/aegmm.py +0 -0
  22. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/ood/base.py +0 -0
  23. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/ood/llr.py +0 -0
  24. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/ood/vae.py +0 -0
  25. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/ood/vaegmm.py +0 -0
  26. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/detectors/outliers.py +0 -0
  27. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/flags.py +0 -0
  28. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/interop.py +0 -0
  29. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/metrics/__init__.py +0 -0
  30. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/metrics/balance.py +0 -0
  31. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/metrics/ber.py +0 -0
  32. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/metrics/coverage.py +0 -0
  33. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/metrics/divergence.py +0 -0
  34. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/metrics/stats.py +0 -0
  35. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/metrics/uap.py +0 -0
  36. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/metrics/utils.py +0 -0
  37. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/models/__init__.py +0 -0
  38. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/models/pytorch/__init__.py +0 -0
  39. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/models/pytorch/autoencoder.py +0 -0
  40. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/models/pytorch/blocks.py +0 -0
  41. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/models/pytorch/utils.py +0 -0
  42. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/models/tensorflow/__init__.py +0 -0
  43. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/models/tensorflow/autoencoder.py +0 -0
  44. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/models/tensorflow/gmm.py +0 -0
  45. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/models/tensorflow/losses.py +0 -0
  46. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/models/tensorflow/pixelcnn.py +0 -0
  47. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/models/tensorflow/trainer.py +0 -0
  48. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/models/tensorflow/utils.py +0 -0
  49. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/output.py +0 -0
  50. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/utils.py +0 -0
  51. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/workflows/__init__.py +0 -0
  52. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/_internal/workflows/sufficiency.py +0 -0
  53. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/detectors/__init__.py +0 -0
  54. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/detectors/drift/__init__.py +0 -0
  55. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/detectors/drift/kernels/__init__.py +0 -0
  56. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/detectors/drift/updates/__init__.py +0 -0
  57. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/detectors/linters/__init__.py +0 -0
  58. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/detectors/ood/__init__.py +0 -0
  59. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/flags/__init__.py +0 -0
  60. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/metrics/__init__.py +0 -0
  61. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/metrics/bias/__init__.py +0 -0
  62. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/metrics/estimators/__init__.py +0 -0
  63. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/metrics/stats/__init__.py +0 -0
  64. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/py.typed +0 -0
  65. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/tensorflow/__init__.py +0 -0
  66. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/tensorflow/loss/__init__.py +0 -0
  67. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/tensorflow/models/__init__.py +0 -0
  68. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/tensorflow/recon/__init__.py +0 -0
  69. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/torch/__init__.py +0 -0
  70. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/torch/models/__init__.py +0 -0
  71. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/torch/trainer/__init__.py +0 -0
  72. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/utils/__init__.py +0 -0
  73. {dataeval-0.69.2 → dataeval-0.69.3}/src/dataeval/workflows/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.69.2
3
+ Version: 0.69.3
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dataeval"
3
- version = "0.69.2" # dynamic
3
+ version = "0.69.3" # dynamic
4
4
  description = "DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks"
5
5
  license = "MIT"
6
6
  readme = "README.md"
@@ -80,7 +80,7 @@ pytest-xdist = {version = "*"}
80
80
  coverage = {version = "*", extras = ["toml"]}
81
81
  torchmetrics = {version = ">=1.0.0", source = "pytorch"}
82
82
  # type
83
- pyright = {version = "*, !=1.1.340"}
83
+ pyright = {version = "*", extras = ["nodejs"]}
84
84
  # prototype
85
85
  maite = {version = "*"}
86
86
  pandas = {version = "*"}
@@ -1,4 +1,4 @@
1
- __version__ = "0.69.2"
1
+ __version__ = "0.69.3"
2
2
 
3
3
  from importlib.util import find_spec
4
4
 
@@ -130,7 +130,10 @@ def diversity_simpson(
130
130
  p_i = cnts / cnts.sum()
131
131
  # inverse Simpson index normalized by (number of bins)
132
132
  s_0 = 1 / np.sum(p_i**2) / num_bins[col]
133
- ev_index[col] = (s_0 * num_bins[col] - 1) / (num_bins[col] - 1)
133
+ if num_bins[col] == 1:
134
+ ev_index[col] = 0
135
+ else:
136
+ ev_index[col] = (s_0 * num_bins[col] - 1) / (num_bins[col] - 1)
134
137
  return ev_index
135
138
 
136
139
 
@@ -348,6 +348,7 @@ def parity(
348
348
  chi_scores = np.zeros(len(factors))
349
349
  p_values = np.zeros(len(factors))
350
350
  n_cls = len(np.unique(labels))
351
+ not_enough_data = {}
351
352
  for i, (current_factor_name, factor_values) in enumerate(factors.items()):
352
353
  unique_factor_values = np.unique(factor_values)
353
354
  contingency_matrix = np.zeros((len(unique_factor_values), n_cls))
@@ -361,13 +362,12 @@ def parity(
361
362
  with_both = np.bitwise_and((labels == label), factor_values == factor_value)
362
363
  contingency_matrix[fi, label] = np.sum(with_both)
363
364
  if 0 < contingency_matrix[fi, label] < 5:
364
- warnings.warn(
365
- f"Factor {current_factor_name} value {factor_value} co-occurs "
366
- f"only {contingency_matrix[fi, label]} times with label {label}. "
367
- "This can cause inaccurate chi_square calculation. Recommend"
368
- "ensuring each label occurs either 0 times or at least 5 times. "
369
- "Alternatively, digitize any continuous-valued factors "
370
- "into fewer bins."
365
+ if current_factor_name not in not_enough_data:
366
+ not_enough_data[current_factor_name] = {}
367
+ if factor_value not in not_enough_data[current_factor_name]:
368
+ not_enough_data[current_factor_name][factor_value] = []
369
+ not_enough_data[current_factor_name][factor_value].append(
370
+ (label, int(contingency_matrix[fi, label]))
371
371
  )
372
372
 
373
373
  # This deletes rows containing only zeros,
@@ -381,4 +381,23 @@ def parity(
381
381
  chi_scores[i] = chi2
382
382
  p_values[i] = p
383
383
 
384
+ if not_enough_data:
385
+ factor_msg = []
386
+ for factor, fact_dict in not_enough_data.items():
387
+ stacked_msg = []
388
+ for key, value in fact_dict.items():
389
+ msg = []
390
+ for item in value:
391
+ msg.append(f"label {item[0]}: {item[1]} occurrences")
392
+ flat_msg = "\n\t\t".join(msg)
393
+ stacked_msg.append(f"value {key} - {flat_msg}\n\t")
394
+ factor_msg.append(factor + " - " + "".join(stacked_msg))
395
+
396
+ message = "\n".join(factor_msg)
397
+
398
+ warnings.warn(
399
+ f"The following factors did not meet the recommended 5 occurrences for each value-label combination. \nRecommend rerunning parity after adjusting the following factor-value-label combinations: \n{message}", # noqa: E501
400
+ UserWarning,
401
+ )
402
+
384
403
  return ParityOutput(chi_scores, p_values)
File without changes
File without changes