dataeval 0.86.5__tar.gz → 0.86.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. {dataeval-0.86.5 → dataeval-0.86.6}/PKG-INFO +1 -1
  2. {dataeval-0.86.5 → dataeval-0.86.6}/pyproject.toml +1 -1
  3. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/__init__.py +1 -1
  4. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/linters/outliers.py +41 -39
  5. {dataeval-0.86.5 → dataeval-0.86.6}/LICENSE.txt +0 -0
  6. {dataeval-0.86.5 → dataeval-0.86.6}/README.md +0 -0
  7. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/_log.py +0 -0
  8. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/config.py +0 -0
  9. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/data/__init__.py +0 -0
  10. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/data/_embeddings.py +0 -0
  11. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/data/_images.py +0 -0
  12. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/data/_metadata.py +0 -0
  13. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/data/_selection.py +0 -0
  14. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/data/_split.py +0 -0
  15. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/data/selections/__init__.py +0 -0
  16. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/data/selections/_classbalance.py +0 -0
  17. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/data/selections/_classfilter.py +0 -0
  18. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/data/selections/_indices.py +0 -0
  19. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/data/selections/_limit.py +0 -0
  20. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/data/selections/_prioritize.py +0 -0
  21. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/data/selections/_reverse.py +0 -0
  22. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/data/selections/_shuffle.py +0 -0
  23. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/__init__.py +0 -0
  24. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/drift/__init__.py +0 -0
  25. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/drift/_base.py +0 -0
  26. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/drift/_cvm.py +0 -0
  27. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/drift/_ks.py +0 -0
  28. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/drift/_mmd.py +0 -0
  29. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/drift/_mvdc.py +0 -0
  30. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/drift/_nml/__init__.py +0 -0
  31. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/drift/_nml/_base.py +0 -0
  32. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/drift/_nml/_chunk.py +0 -0
  33. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/drift/_nml/_domainclassifier.py +0 -0
  34. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/drift/_nml/_result.py +0 -0
  35. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/drift/_nml/_thresholds.py +0 -0
  36. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/drift/_uncertainty.py +0 -0
  37. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/drift/updates.py +0 -0
  38. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/linters/__init__.py +0 -0
  39. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/linters/duplicates.py +0 -0
  40. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/ood/__init__.py +0 -0
  41. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/ood/ae.py +0 -0
  42. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/ood/base.py +0 -0
  43. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/detectors/ood/mixin.py +0 -0
  44. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metadata/__init__.py +0 -0
  45. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metadata/_distance.py +0 -0
  46. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metadata/_ood.py +0 -0
  47. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metadata/_utils.py +0 -0
  48. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/__init__.py +0 -0
  49. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/bias/__init__.py +0 -0
  50. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/bias/_balance.py +0 -0
  51. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/bias/_completeness.py +0 -0
  52. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/bias/_coverage.py +0 -0
  53. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/bias/_diversity.py +0 -0
  54. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/bias/_parity.py +0 -0
  55. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/estimators/__init__.py +0 -0
  56. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/estimators/_ber.py +0 -0
  57. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/estimators/_clusterer.py +0 -0
  58. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/estimators/_divergence.py +0 -0
  59. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/estimators/_uap.py +0 -0
  60. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/stats/__init__.py +0 -0
  61. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/stats/_base.py +0 -0
  62. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/stats/_boxratiostats.py +0 -0
  63. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/stats/_dimensionstats.py +0 -0
  64. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/stats/_hashstats.py +0 -0
  65. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/stats/_imagestats.py +0 -0
  66. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/stats/_labelstats.py +0 -0
  67. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/stats/_pixelstats.py +0 -0
  68. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/metrics/stats/_visualstats.py +0 -0
  69. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/outputs/__init__.py +0 -0
  70. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/outputs/_base.py +0 -0
  71. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/outputs/_bias.py +0 -0
  72. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/outputs/_drift.py +0 -0
  73. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/outputs/_estimators.py +0 -0
  74. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/outputs/_linters.py +0 -0
  75. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/outputs/_metadata.py +0 -0
  76. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/outputs/_ood.py +0 -0
  77. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/outputs/_stats.py +0 -0
  78. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/outputs/_utils.py +0 -0
  79. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/outputs/_workflows.py +0 -0
  80. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/py.typed +0 -0
  81. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/typing.py +0 -0
  82. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/__init__.py +0 -0
  83. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/_array.py +0 -0
  84. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/_bin.py +0 -0
  85. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/_clusterer.py +0 -0
  86. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/_fast_mst.py +0 -0
  87. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/_image.py +0 -0
  88. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/_method.py +0 -0
  89. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/_mst.py +0 -0
  90. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/_plot.py +0 -0
  91. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/data/__init__.py +0 -0
  92. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/data/_dataset.py +0 -0
  93. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/data/collate.py +0 -0
  94. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/data/metadata.py +0 -0
  95. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/datasets/__init__.py +0 -0
  96. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/datasets/_antiuav.py +0 -0
  97. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/datasets/_base.py +0 -0
  98. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/datasets/_cifar10.py +0 -0
  99. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/datasets/_fileio.py +0 -0
  100. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/datasets/_milco.py +0 -0
  101. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/datasets/_mixin.py +0 -0
  102. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/datasets/_mnist.py +0 -0
  103. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/datasets/_ships.py +0 -0
  104. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/datasets/_types.py +0 -0
  105. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/datasets/_voc.py +0 -0
  106. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/torch/__init__.py +0 -0
  107. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/torch/_blocks.py +0 -0
  108. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/torch/_gmm.py +0 -0
  109. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/torch/_internal.py +0 -0
  110. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/torch/models.py +0 -0
  111. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/utils/torch/trainer.py +0 -0
  112. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/workflows/__init__.py +0 -0
  113. {dataeval-0.86.5 → dataeval-0.86.6}/src/dataeval/workflows/sufficiency.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.86.5
3
+ Version: 0.86.6
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dataeval"
3
- version = "0.86.5" # dynamic
3
+ version = "0.86.6" # dynamic
4
4
  description = "DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks"
5
5
  license = "MIT"
6
6
  readme = "README.md"
@@ -8,7 +8,7 @@ shifts that impact performance of deployed models.
8
8
  from __future__ import annotations
9
9
 
10
10
  __all__ = ["config", "detectors", "log", "metrics", "typing", "utils", "workflows"]
11
- __version__ = "0.86.5"
11
+ __version__ = "0.86.6"
12
12
 
13
13
  import logging
14
14
 
@@ -18,57 +18,59 @@ from dataeval.outputs._stats import BASE_ATTRS
18
18
  from dataeval.typing import ArrayLike, Dataset
19
19
 
20
20
 
21
+ def _get_zscore_mask(values: NDArray[np.float64], threshold: float | None) -> NDArray[np.bool_] | None:
22
+ threshold = threshold if threshold is not None else 3.0
23
+ std_val = np.nanstd(values)
24
+ if std_val > EPSILON:
25
+ mean_val = np.nanmean(values)
26
+ abs_diff = np.abs(values - mean_val)
27
+ return (abs_diff / std_val) > threshold
28
+ return None
29
+
30
+
31
+ def _get_modzscore_mask(values: NDArray[np.float64], threshold: float | None) -> NDArray[np.bool_] | None:
32
+ threshold = threshold if threshold is not None else 3.5
33
+ median_val = np.nanmedian(values)
34
+ abs_diff = np.abs(values - median_val)
35
+ m_abs_diff = np.nanmedian(abs_diff)
36
+ m_abs_diff = np.nanmean(abs_diff) if m_abs_diff <= EPSILON else m_abs_diff
37
+ if m_abs_diff > EPSILON:
38
+ mod_z_score = 0.6745 * abs_diff / m_abs_diff
39
+ return mod_z_score > threshold
40
+ return None
41
+
42
+
43
+ def _get_iqr_mask(values: NDArray[np.float64], threshold: float | None) -> NDArray[np.bool_] | None:
44
+ threshold = threshold if threshold is not None else 1.5
45
+ qrt = np.nanpercentile(values, q=(25, 75), method="midpoint")
46
+ iqr_val = qrt[1] - qrt[0]
47
+ if iqr_val > EPSILON:
48
+ iqr_threshold = iqr_val * threshold
49
+ return (values < (qrt[0] - iqr_threshold)) | (values > (qrt[1] + iqr_threshold))
50
+ return None
51
+
52
+
21
53
  def _get_outlier_mask(
22
54
  values: NDArray[Any], method: Literal["zscore", "modzscore", "iqr"], threshold: float | None
23
55
  ) -> NDArray[np.bool_]:
24
56
  if len(values) == 0:
25
57
  return np.array([], dtype=bool)
26
58
 
27
- values = values.astype(np.float64)
28
-
29
- valid_mask = ~np.isnan(values)
30
- outliers = np.full(values.shape, False, dtype=bool)
31
-
32
- if not np.any(valid_mask):
33
- return outliers
34
-
35
- if method == "zscore":
36
- threshold = threshold if threshold is not None else 3.0
37
-
38
- std_val = np.nanstd(values)
39
-
40
- if std_val > EPSILON:
41
- mean_val = np.nanmean(values)
42
- abs_diff = np.abs(values - mean_val)
43
- outliers = (abs_diff / std_val) > threshold
59
+ nan_mask = np.isnan(values)
44
60
 
61
+ if np.all(nan_mask):
62
+ outliers = None
63
+ elif method == "zscore":
64
+ outliers = _get_zscore_mask(values.astype(np.float64), threshold)
45
65
  elif method == "modzscore":
46
- threshold = threshold if threshold is not None else 3.5
47
-
48
- median_val = np.nanmedian(values)
49
- abs_diff = np.abs(values - median_val)
50
- m_abs_diff = np.nanmedian(abs_diff)
51
- m_abs_diff = np.nanmean(abs_diff) if m_abs_diff <= EPSILON else m_abs_diff
52
-
53
- if m_abs_diff > EPSILON:
54
- mod_z_score = 0.6745 * abs_diff / m_abs_diff
55
- outliers = mod_z_score > threshold
56
-
66
+ outliers = _get_modzscore_mask(values.astype(np.float64), threshold)
57
67
  elif method == "iqr":
58
- threshold = threshold if threshold is not None else 1.5
59
-
60
- qrt = np.nanpercentile(values, q=(25, 75), method="midpoint")
61
- iqr_val = qrt[1] - qrt[0]
62
-
63
- if iqr_val > EPSILON:
64
- iqr_threshold = iqr_val * threshold
65
- outliers = (values < (qrt[0] - iqr_threshold)) | (values > (qrt[1] + iqr_threshold))
66
-
68
+ outliers = _get_iqr_mask(values.astype(np.float64), threshold)
67
69
  else:
68
70
  raise ValueError("Outlier method must be 'zscore' 'modzscore' or 'iqr'.")
69
71
 
70
- outliers[~valid_mask] = False
71
- return outliers
72
+ # If outliers were found, return the mask with NaN values set to False, otherwise return all False
73
+ return outliers & ~nan_mask if outliers is not None else np.full(values.shape, False, dtype=bool)
72
74
 
73
75
 
74
76
  class Outliers:
File without changes
File without changes