dataeval 0.72.0__py3-none-any.whl → 0.72.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. dataeval/__init__.py +4 -4
  2. dataeval/detectors/__init__.py +4 -3
  3. dataeval/detectors/drift/__init__.py +10 -11
  4. dataeval/{_internal/detectors → detectors}/drift/base.py +51 -102
  5. dataeval/{_internal/detectors → detectors}/drift/cvm.py +9 -8
  6. dataeval/{_internal/detectors → detectors}/drift/ks.py +11 -10
  7. dataeval/{_internal/detectors → detectors}/drift/mmd.py +33 -34
  8. dataeval/{_internal/detectors → detectors}/drift/torch.py +15 -13
  9. dataeval/{_internal/detectors → detectors}/drift/uncertainty.py +12 -9
  10. dataeval/detectors/drift/updates.py +61 -0
  11. dataeval/detectors/linters/__init__.py +3 -3
  12. dataeval/{_internal/detectors → detectors/linters}/clusterer.py +47 -45
  13. dataeval/{_internal/detectors → detectors/linters}/duplicates.py +20 -10
  14. dataeval/{_internal/detectors → detectors/linters}/merged_stats.py +3 -1
  15. dataeval/{_internal/detectors → detectors/linters}/outliers.py +19 -26
  16. dataeval/detectors/ood/__init__.py +8 -16
  17. dataeval/{_internal/detectors → detectors}/ood/ae.py +9 -9
  18. dataeval/{_internal/detectors → detectors}/ood/aegmm.py +10 -30
  19. dataeval/{_internal/detectors → detectors}/ood/base.py +27 -21
  20. dataeval/{_internal/detectors → detectors}/ood/llr.py +27 -23
  21. dataeval/detectors/ood/metadata_ks_compare.py +99 -0
  22. dataeval/detectors/ood/metadata_least_likely.py +119 -0
  23. dataeval/detectors/ood/metadata_ood_mi.py +92 -0
  24. dataeval/{_internal/detectors → detectors}/ood/vae.py +11 -13
  25. dataeval/{_internal/detectors → detectors}/ood/vaegmm.py +10 -32
  26. dataeval/{_internal/interop.py → interop.py} +12 -7
  27. dataeval/metrics/__init__.py +1 -1
  28. dataeval/metrics/bias/__init__.py +4 -4
  29. dataeval/{_internal/metrics → metrics/bias}/balance.py +70 -4
  30. dataeval/{_internal/metrics → metrics/bias}/coverage.py +10 -8
  31. dataeval/{_internal/metrics → metrics/bias}/diversity.py +54 -20
  32. dataeval/metrics/bias/metadata.py +275 -0
  33. dataeval/{_internal/metrics → metrics/bias}/parity.py +21 -17
  34. dataeval/metrics/estimators/__init__.py +3 -3
  35. dataeval/{_internal/metrics → metrics/estimators}/ber.py +31 -28
  36. dataeval/{_internal/metrics → metrics/estimators}/divergence.py +15 -16
  37. dataeval/{_internal/metrics → metrics/estimators}/uap.py +8 -6
  38. dataeval/metrics/stats/__init__.py +7 -7
  39. dataeval/{_internal/metrics → metrics}/stats/base.py +66 -40
  40. dataeval/{_internal/metrics → metrics}/stats/boxratiostats.py +19 -15
  41. dataeval/{_internal/metrics → metrics}/stats/datasetstats.py +19 -17
  42. dataeval/{_internal/metrics → metrics}/stats/dimensionstats.py +12 -10
  43. dataeval/metrics/stats/hashstats.py +156 -0
  44. dataeval/{_internal/metrics → metrics}/stats/labelstats.py +8 -6
  45. dataeval/{_internal/metrics → metrics}/stats/pixelstats.py +12 -11
  46. dataeval/{_internal/metrics → metrics}/stats/visualstats.py +14 -13
  47. dataeval/{_internal/output.py → output.py} +26 -6
  48. dataeval/utils/__init__.py +8 -4
  49. dataeval/utils/image.py +71 -0
  50. dataeval/utils/shared.py +151 -0
  51. dataeval/utils/split_dataset.py +486 -0
  52. dataeval/utils/tensorflow/__init__.py +9 -7
  53. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/autoencoder.py +64 -68
  54. dataeval/{_internal/models/tensorflow/losses.py → utils/tensorflow/_internal/loss.py} +10 -9
  55. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/pixelcnn.py +18 -22
  56. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/trainer.py +3 -1
  57. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/utils.py +18 -18
  58. dataeval/utils/tensorflow/loss/__init__.py +6 -2
  59. dataeval/utils/torch/__init__.py +7 -3
  60. dataeval/{_internal/models/pytorch → utils/torch}/blocks.py +19 -14
  61. dataeval/{_internal → utils/torch}/datasets.py +49 -43
  62. dataeval/utils/torch/models.py +138 -0
  63. dataeval/{_internal/models/pytorch/autoencoder.py → utils/torch/trainer.py} +12 -141
  64. dataeval/{_internal → utils/torch}/utils.py +3 -1
  65. dataeval/workflows/__init__.py +1 -1
  66. dataeval/{_internal/workflows → workflows}/sufficiency.py +42 -37
  67. {dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/METADATA +7 -5
  68. dataeval-0.72.2.dist-info/RECORD +72 -0
  69. dataeval/_internal/detectors/__init__.py +0 -0
  70. dataeval/_internal/detectors/drift/__init__.py +0 -0
  71. dataeval/_internal/detectors/ood/__init__.py +0 -0
  72. dataeval/_internal/metrics/__init__.py +0 -0
  73. dataeval/_internal/metrics/stats/hashstats.py +0 -75
  74. dataeval/_internal/metrics/utils.py +0 -447
  75. dataeval/_internal/models/__init__.py +0 -0
  76. dataeval/_internal/models/pytorch/__init__.py +0 -0
  77. dataeval/_internal/models/pytorch/utils.py +0 -67
  78. dataeval/_internal/models/tensorflow/__init__.py +0 -0
  79. dataeval/_internal/workflows/__init__.py +0 -0
  80. dataeval/detectors/drift/kernels/__init__.py +0 -10
  81. dataeval/detectors/drift/updates/__init__.py +0 -7
  82. dataeval/utils/tensorflow/models/__init__.py +0 -9
  83. dataeval/utils/tensorflow/recon/__init__.py +0 -3
  84. dataeval/utils/torch/datasets/__init__.py +0 -12
  85. dataeval/utils/torch/models/__init__.py +0 -11
  86. dataeval/utils/torch/trainer/__init__.py +0 -7
  87. dataeval-0.72.0.dist-info/RECORD +0 -80
  88. /dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/gmm.py +0 -0
  89. {dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/LICENSE.txt +0 -0
  90. {dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,119 @@
1
+ from __future__ import annotations
2
+
3
+ import numbers
4
+ import warnings
5
+ from typing import Any
6
+
7
+ import numpy as np
8
+ from numpy.typing import NDArray
9
+
10
+
11
+ def get_least_likely_features(
12
+ metadata: dict[str, list[Any] | NDArray[Any]],
13
+ new_metadata: dict[str, list[Any] | NDArray[Any]],
14
+ is_ood: NDArray[np.bool_],
15
+ ) -> list[tuple[str, float]]:
16
+ """Computes which metadata feature is most out-of-distribution (OOD) relative to a reference metadata set.
17
+
18
+ Given a reference metadata dictionary `metadata` (where each key maps to one scalar metadata feature), a second
19
+ metadata dictionary, and a corresponding boolean flag `is_ood` indicating whether each new example falls
20
+ out-of-distribution (OOD) relative to the reference, this function finds which metadata feature is the most OOD,
21
+ for each OOD example.
22
+
23
+ Parameters
24
+ ----------
25
+ metadata: dict[str, list[Any] | NDArray[Any]]
26
+ A reference set of arrays of values, indexed by metadata feature names, with one value per data example per
27
+ feature.
28
+ new_metadata: dict[str, list[Any] | NDArray[Any]]
29
+ A second metadata set, to be tested against the reference metadata. It is ok if the two meta data objects
30
+ hold different numbers of examples.
31
+ is_ood: NDArray[np.bool_]
32
+ A boolean array, with one value per new_metadata example, that indicates which examples are OOD.
33
+
34
+ Returns
35
+ -------
36
+ list[tuple[str, float]]
37
+ An array of names of the features of each OOD new_metadata example that were the most OOD.
38
+
39
+ Examples
40
+ --------
41
+ Imagine we have 3 data examples, and that the corresponding metadata contains 2 features called time and
42
+ altitude, as shown below.
43
+
44
+ >>> from dataeval._internal.metrics.metadata_least_likely import get_least_likely_features
45
+ >>> import numpy
46
+ >>> metadata = {"time": [1.2, 3.4, 5.6], "altitude": [235, 6789, 101112]}
47
+ >>> new_metadata = {"time": [7.8, 11.12], "altitude": [532, -211101]}
48
+ >>> is_ood = numpy.array([True, True])
49
+ >>> get_least_likely_features(metadata, new_metadata, is_ood)
50
+ [('time', 2.0), ('altitude', 33.245346)]
51
+ """
52
+ # Raise errors for bad inputs...
53
+
54
+ if metadata.keys() != new_metadata.keys():
55
+ raise ValueError(f"Reference and test metadata keys must be identical: {list(metadata)}, {list(new_metadata)}")
56
+
57
+ md_lengths = {len(np.atleast_1d(v)) for v in metadata.values()}
58
+ new_md_lengths = {len(np.atleast_1d(v)) for v in new_metadata.values()}
59
+ if len(md_lengths) > 1 or len(new_md_lengths) > 1:
60
+ raise ValueError(f"All features must have same length, got lengths {md_lengths}, {new_md_lengths}")
61
+
62
+ n_reference, n_new = md_lengths.pop(), new_md_lengths.pop() # possibly different numbers of metadata examples
63
+
64
+ if n_new != len(is_ood):
65
+ raise ValueError(f"is_ood flag must have same length as new metadata {n_new} but has length {len(is_ood)}.")
66
+
67
+ if n_reference < 3: # too hard to define "in-distribution" with this few reference samples.
68
+ warnings.warn(
69
+ "We need at least 3 reference metadata examples to determine which "
70
+ f"features are least likely, but only got {n_reference}",
71
+ UserWarning,
72
+ )
73
+ return []
74
+
75
+ if not any(is_ood):
76
+ return []
77
+
78
+ # ...inputs are good, look for most deviant standardized features.
79
+
80
+ # largest standardized absolute deviation from the median observed so far for each example
81
+ deviation = np.zeros_like(is_ood, dtype=np.float32)
82
+
83
+ # name of feature that corresponds to `deviation` for each example
84
+ kmax = np.empty(len(is_ood), dtype=object)
85
+
86
+ for k, v in metadata.items():
87
+ # exclude cases where random happens to be out on tails, not interesting.
88
+ if k == "random":
89
+ continue
90
+
91
+ # Skip non-numerical features
92
+ if not all(isinstance(vi, numbers.Number) for vi in v): # NB: np.nan *is* a number in this context.
93
+ continue
94
+
95
+ # Get standardization parameters from metadata
96
+ loc = np.median(v) # ok, because we checked all were numeric
97
+ dev = np.asarray(v) - loc # need to make array from v since it could be a list here.
98
+ posdev, negdev = dev[dev > 0], dev[dev < 0]
99
+ pos_scale = np.median(posdev) if posdev.any() else 1.0
100
+ neg_scale = np.abs(np.median(negdev)) if negdev.any() else 1.0
101
+
102
+ x, x0, dxp, dxn = np.atleast_1d(new_metadata[k]), loc, pos_scale, neg_scale # just abbreviations
103
+ dxp = dxp if dxp > 0 else 1.0 # avoids dividing by zero below
104
+ dxn = dxn if dxn > 0 else 1.0
105
+
106
+ # xdev must be floating-point to avoid getting zero in an integer division.
107
+ xdev = (x - x0).astype(np.float64)
108
+ pos = xdev >= 0
109
+
110
+ X = np.zeros_like(xdev)
111
+ X[pos], X[~pos] = xdev[pos] / dxp, xdev[~pos] / dxn # keeping track of possible asymmetry of x, but...
112
+ # ...below here, only need to think about absolute deviation.
113
+
114
+ abig = np.abs(X) > deviation
115
+ kmax[abig] = k
116
+ deviation[abig] = np.abs(X[abig])
117
+
118
+ unlikely_features = list(zip(kmax[is_ood], deviation[is_ood])) # feature names, along with how far out they are.
119
+ return unlikely_features
@@ -0,0 +1,92 @@
1
+ from __future__ import annotations
2
+
3
+ import numbers
4
+ import warnings
5
+ from typing import Any
6
+
7
+ import numpy as np
8
+ from numpy.typing import NDArray
9
+ from sklearn.feature_selection import mutual_info_classif
10
+
11
+ # NATS2BITS is the reciprocal of natural log of 2. If you have an information/entropy-type quantity measured in nats,
12
+ # which is what many library functions return, multiply it by NATS2BITS to get it in bits.
13
+ NATS2BITS = 1.442695
14
+
15
+
16
+ def get_metadata_ood_mi(
17
+ metadata: dict[str, list[Any] | NDArray[Any]],
18
+ is_ood: NDArray[np.bool_],
19
+ discrete_features: str | bool | NDArray[np.bool_] = False,
20
+ random_state: int | None = None,
21
+ ) -> dict[str, float]:
22
+ """Computes mutual information between a set of metadata features and an out-of-distribution flag.
23
+
24
+ Given a metadata dictionary `metadata` (where each key maps to one scalar metadata feature per example), and a
25
+ corresponding boolean flag `is_ood` indicating whether each example falls out-of-distribution (OOD) relative to a
26
+ reference dataset, this function finds the strength of association between each metadata feature and `is_ood` by
27
+ computing their mutual information. Metadata features may be either discrete or continuous; set the
28
+ `discrete_features` keyword to a bool array set to True for each feature that is discrete, or pass one bool to apply
29
+ to all features. Returns a dict indicating the strength of association between each individual feature and the OOD
30
+ flag, measured in bits.
31
+
32
+ Parameters
33
+ ----------
34
+ metadata : dict[str, list[Any] | NDArray[Any]]
35
+ A set of arrays of values, indexed by metadata feature names, with one value per data example per feature.
36
+ is_ood : NDArray[np.bool_]
37
+ A boolean array, with one value per example, that indicates which examples are OOD.
38
+ discrete_features : str | bool | NDArray[np.bool_]
39
+ Either a boolean array or a single boolean value, indicate which features take on discrete values.
40
+ random_state : int, optional - default None
41
+ Determines random number generation for small noise added to continuous variables. Set to a value for
42
+ reproducible results.
43
+
44
+ Returns
45
+ -------
46
+ dict[str, float]
47
+ A dictionary with keys corresponding to metadata feature names, and values indicating the strength of
48
+ association between each named feature and the OOD flag, as mutual information measured in bits.
49
+
50
+ Examples
51
+ --------
52
+ Imagine we have 3 data examples, and that the corresponding metadata contains 2 features called time and altitude.
53
+
54
+ >>> import numpy
55
+ >>> metadata = {"time": numpy.linspace(0, 10, 100), "altitude": numpy.linspace(0, 16, 100) ** 2}
56
+ >>> is_ood = metadata["altitude"] > 100
57
+ >>> print(get_metadata_ood_mi(metadata, is_ood, discrete_features=False))
58
+ {'time': 0.933074285817367, 'altitude': 0.9407686591507002}
59
+ """
60
+ numerical_keys = [k for k, v in metadata.items() if all(isinstance(vi, numbers.Number) for vi in v)]
61
+ if len(numerical_keys) < len(metadata):
62
+ warnings.warn(
63
+ f"Processing {numerical_keys}, others are non-numerical and will be skipped.",
64
+ UserWarning,
65
+ )
66
+
67
+ md_lengths = {len(np.atleast_1d(v)) for v in metadata.values()}
68
+ if len(md_lengths) > 1:
69
+ raise ValueError(f"Metadata features have differing sizes: {md_lengths}")
70
+
71
+ if len(is_ood) != (mdl := md_lengths.pop()):
72
+ raise ValueError(
73
+ f"OOD flag and metadata features need to be same size, but are different sizes: {len(is_ood)} and {mdl}."
74
+ )
75
+
76
+ X = np.array([metadata[k] for k in numerical_keys]).T
77
+
78
+ X0, dX = np.mean(X, axis=0), np.std(X, axis=0, ddof=1)
79
+ Xscl = (X - X0) / dX
80
+
81
+ mutual_info_values = (
82
+ mutual_info_classif(
83
+ Xscl,
84
+ is_ood,
85
+ discrete_features=discrete_features, # type: ignore
86
+ random_state=random_state,
87
+ )
88
+ * NATS2BITS
89
+ )
90
+
91
+ mi_dict = {k: mutual_info_values[i] for i, k in enumerate(numerical_keys)}
92
+ return mi_dict
@@ -8,6 +8,8 @@ Licensed under Apache Software License (Apache 2.0)
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
+ __all__ = ["OOD_VAE"]
12
+
11
13
  from typing import Callable
12
14
 
13
15
  import numpy as np
@@ -15,12 +17,11 @@ import tensorflow as tf
15
17
  import tf_keras as keras
16
18
  from numpy.typing import ArrayLike
17
19
 
18
- from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
19
- from dataeval._internal.interop import to_numpy
20
- from dataeval._internal.models.tensorflow.autoencoder import VAE
21
- from dataeval._internal.models.tensorflow.losses import Elbo
22
- from dataeval._internal.models.tensorflow.utils import predict_batch
23
- from dataeval._internal.output import set_metadata
20
+ from dataeval.detectors.ood.base import OODBase, OODScoreOutput
21
+ from dataeval.interop import to_numpy
22
+ from dataeval.utils.tensorflow._internal.autoencoder import VAE
23
+ from dataeval.utils.tensorflow._internal.loss import Elbo
24
+ from dataeval.utils.tensorflow._internal.utils import predict_batch
24
25
 
25
26
 
26
27
  class OOD_VAE(OODBase):
@@ -38,14 +39,14 @@ class OOD_VAE(OODBase):
38
39
  --------
39
40
  Instantiate an OOD detector metric with a generic dataset - batch of images with shape (3,25,25)
40
41
 
41
- >>> metric = OOD_VAE(create_model(VAE, dataset[0].shape))
42
+ >>> metric = OOD_VAE(create_model("VAE", dataset[0].shape))
42
43
 
43
44
  Adjusting fit parameters,
44
45
  including setting the fit threshold at 85% for a training set with about 15% out-of-distribution
45
46
 
46
47
  >>> metric.fit(dataset, threshold_perc=85, batch_size=128, verbose=False)
47
48
 
48
- Detect out of distribution samples at the 'feature' level
49
+ Detect :term:`out of distribution<Out-of-Distribution (OOD)>` samples at the 'feature' level
49
50
 
50
51
  >>> result = metric.predict(dataset, ood_type="feature")
51
52
  """
@@ -58,18 +59,15 @@ class OOD_VAE(OODBase):
58
59
  self,
59
60
  x_ref: ArrayLike,
60
61
  threshold_perc: float = 100.0,
61
- loss_fn: Callable[..., tf.Tensor] | None = None,
62
+ loss_fn: Callable[..., tf.Tensor] = Elbo(0.05),
62
63
  optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
63
64
  epochs: int = 20,
64
65
  batch_size: int = 64,
65
66
  verbose: bool = True,
66
67
  ) -> None:
67
- if loss_fn is None:
68
- loss_fn = Elbo(0.05)
69
68
  super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
70
69
 
71
- @set_metadata("dataeval.detectors")
72
- def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
70
+ def _score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
73
71
  self._validate(X := to_numpy(X))
74
72
 
75
73
  # sample reconstructed instances
@@ -8,6 +8,8 @@ Licensed under Apache Software License (Apache 2.0)
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
+ __all__ = ["OOD_VAEGMM"]
12
+
11
13
  from typing import Callable
12
14
 
13
15
  import numpy as np
@@ -15,13 +17,12 @@ import tensorflow as tf
15
17
  import tf_keras as keras
16
18
  from numpy.typing import ArrayLike
17
19
 
18
- from dataeval._internal.detectors.ood.base import OODGMMBase, OODScoreOutput
19
- from dataeval._internal.interop import to_numpy
20
- from dataeval._internal.models.tensorflow.autoencoder import VAEGMM
21
- from dataeval._internal.models.tensorflow.gmm import gmm_energy
22
- from dataeval._internal.models.tensorflow.losses import Elbo, LossGMM
23
- from dataeval._internal.models.tensorflow.utils import predict_batch
24
- from dataeval._internal.output import set_metadata
20
+ from dataeval.detectors.ood.base import OODGMMBase, OODScoreOutput
21
+ from dataeval.interop import to_numpy
22
+ from dataeval.utils.tensorflow._internal.autoencoder import VAEGMM
23
+ from dataeval.utils.tensorflow._internal.gmm import gmm_energy
24
+ from dataeval.utils.tensorflow._internal.loss import Elbo, LossGMM
25
+ from dataeval.utils.tensorflow._internal.utils import predict_batch
25
26
 
26
27
 
27
28
  class OOD_VAEGMM(OODGMMBase):
@@ -44,38 +45,15 @@ class OOD_VAEGMM(OODGMMBase):
44
45
  self,
45
46
  x_ref: ArrayLike,
46
47
  threshold_perc: float = 100.0,
47
- loss_fn: Callable[..., tf.Tensor] | None = None,
48
+ loss_fn: Callable[..., tf.Tensor] = LossGMM(elbo=Elbo(0.05)),
48
49
  optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
49
50
  epochs: int = 20,
50
51
  batch_size: int = 64,
51
52
  verbose: bool = True,
52
53
  ) -> None:
53
- if loss_fn is None:
54
- loss_fn = LossGMM(elbo=Elbo(0.05))
55
54
  super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
56
55
 
57
- @set_metadata("dataeval.detectors")
58
- def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
59
- """
60
- Compute the out-of-distribution (OOD) score for a given dataset.
61
-
62
- Parameters
63
- ----------
64
- X : ArrayLike
65
- Input data to score.
66
- batch_size : int, default 1e10
67
- Number of instances to process in each batch.
68
- Use a smaller batch size if your dataset is large or if you encounter memory issues.
69
-
70
- Returns
71
- -------
72
- OODScoreOutput
73
- An object containing the instance-level OOD score.
74
-
75
- Note
76
- ----
77
- This model does not produce a feature level score like the OOD_AE or OOD_VAE models.
78
- """
56
+ def _score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
79
57
  self._validate(X := to_numpy(X))
80
58
 
81
59
  # draw samples from latent space
@@ -1,43 +1,47 @@
1
1
  from __future__ import annotations
2
2
 
3
+ __all__ = ["as_numpy", "to_numpy", "to_numpy_iter"]
4
+
3
5
  from importlib import import_module
4
6
  from typing import Any, Iterable, Iterator
5
7
 
6
8
  import numpy as np
7
9
  from numpy.typing import ArrayLike, NDArray
8
10
 
9
- module_cache = {}
11
+ _MODULE_CACHE = {}
10
12
 
11
13
 
12
- def try_import(module_name):
13
- if module_name in module_cache:
14
- return module_cache[module_name]
14
+ def _try_import(module_name):
15
+ if module_name in _MODULE_CACHE:
16
+ return _MODULE_CACHE[module_name]
15
17
 
16
18
  try:
17
19
  module = import_module(module_name)
18
20
  except ImportError: # pragma: no cover - covered by test_mindeps.py
19
21
  module = None
20
22
 
21
- module_cache[module_name] = module
23
+ _MODULE_CACHE[module_name] = module
22
24
  return module
23
25
 
24
26
 
25
27
  def as_numpy(array: ArrayLike | None) -> NDArray[Any]:
28
+ """Converts an ArrayLike to Numpy array without copying (if possible)"""
26
29
  return to_numpy(array, copy=False)
27
30
 
28
31
 
29
32
  def to_numpy(array: ArrayLike | None, copy: bool = True) -> NDArray[Any]:
33
+ """Converts an ArrayLike to new Numpy array"""
30
34
  if array is None:
31
35
  return np.ndarray([])
32
36
 
33
37
  if isinstance(array, np.ndarray):
34
38
  return array.copy() if copy else array
35
39
 
36
- tf = try_import("tensorflow")
40
+ tf = _try_import("tensorflow")
37
41
  if tf and tf.is_tensor(array):
38
42
  return array.numpy().copy() if copy else array.numpy() # type: ignore
39
43
 
40
- torch = try_import("torch")
44
+ torch = _try_import("torch")
41
45
  if torch and isinstance(array, torch.Tensor):
42
46
  return array.detach().cpu().numpy().copy() if copy else array.detach().cpu().numpy() # type: ignore
43
47
 
@@ -45,5 +49,6 @@ def to_numpy(array: ArrayLike | None, copy: bool = True) -> NDArray[Any]:
45
49
 
46
50
 
47
51
  def to_numpy_iter(iterable: Iterable[ArrayLike]) -> Iterator[NDArray[Any]]:
52
+ """Yields an iterator of numpy arrays from an ArrayLike"""
48
53
  for array in iterable:
49
54
  yield to_numpy(array)
@@ -3,6 +3,6 @@ Metrics are a way to measure the performance of your models or datasets that
3
3
  can then be analyzed in the context of a given problem.
4
4
  """
5
5
 
6
- from . import bias, estimators, stats
6
+ from dataeval.metrics import bias, estimators, stats
7
7
 
8
8
  __all__ = ["bias", "estimators", "stats"]
@@ -3,10 +3,10 @@ Bias metrics check for skewed or imbalanced datasets and incomplete feature
3
3
  representation which may impact model performance.
4
4
  """
5
5
 
6
- from dataeval._internal.metrics.balance import BalanceOutput, balance
7
- from dataeval._internal.metrics.coverage import CoverageOutput, coverage
8
- from dataeval._internal.metrics.diversity import DiversityOutput, diversity
9
- from dataeval._internal.metrics.parity import ParityOutput, label_parity, parity
6
+ from dataeval.metrics.bias.balance import BalanceOutput, balance
7
+ from dataeval.metrics.bias.coverage import CoverageOutput, coverage
8
+ from dataeval.metrics.bias.diversity import DiversityOutput, diversity
9
+ from dataeval.metrics.bias.parity import ParityOutput, label_parity, parity
10
10
 
11
11
  __all__ = [
12
12
  "balance",
@@ -1,15 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
+ __all__ = ["BalanceOutput", "balance"]
4
+
3
5
  import warnings
4
6
  from dataclasses import dataclass
5
- from typing import Mapping
7
+ from typing import Any, Mapping
6
8
 
7
9
  import numpy as np
8
10
  from numpy.typing import ArrayLike, NDArray
9
11
  from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
10
12
 
11
- from dataeval._internal.metrics.utils import entropy, preprocess_metadata
12
- from dataeval._internal.output import OutputMetadata, set_metadata
13
+ from dataeval.metrics.bias.metadata import entropy, heatmap, preprocess_metadata
14
+ from dataeval.output import OutputMetadata, set_metadata
13
15
 
14
16
 
15
17
  @dataclass(frozen=True)
@@ -25,12 +27,73 @@ class BalanceOutput(OutputMetadata):
25
27
  Estimate of inter/intra-factor mutual information
26
28
  classwise : NDArray[np.float64]
27
29
  Estimate of mutual information between metadata factors and individual class labels
30
+ class_list: NDArray[np.int64]
31
+ Class labels for each value in the dataset
32
+ metadata_names: list[str]
33
+ Names of each metadata factor
28
34
  """
29
35
 
30
36
  balance: NDArray[np.float64]
31
37
  factors: NDArray[np.float64]
32
38
  classwise: NDArray[np.float64]
33
39
 
40
+ class_list: NDArray[np.int64]
41
+ metadata_names: list[str]
42
+
43
+ def plot(
44
+ self,
45
+ row_labels: NDArray[Any] | None = None,
46
+ col_labels: NDArray[Any] | None = None,
47
+ plot_classwise: bool = False,
48
+ ) -> None:
49
+ """
50
+ Plot a heatmap of balance information
51
+
52
+ Parameters
53
+ ----------
54
+ row_labels: NDArray | None, default None
55
+ Array containing the labels for rows in the histogram
56
+ col_labels: NDArray | None, default None
57
+ Array containing the labels for columns in the histogram
58
+ plot_classwise: bool, default False
59
+ Whether to plot per-class balance instead of global balance
60
+
61
+ """
62
+ if plot_classwise:
63
+ if row_labels is None:
64
+ row_labels = np.unique(self.class_list)
65
+ if col_labels is None:
66
+ col_labels = np.concatenate((["class"], self.metadata_names))
67
+
68
+ heatmap(
69
+ self.classwise,
70
+ row_labels,
71
+ col_labels,
72
+ xlabel="Factors",
73
+ ylabel="Class",
74
+ cbarlabel="Normalized Mutual Information",
75
+ )
76
+ else:
77
+ data = np.concatenate([self.balance[np.newaxis, 1:], self.factors], axis=0)
78
+ # Create a mask for the upper triangle of the symmetrical array, ignoring the diagonal
79
+ mask = np.triu(data + 1, k=0) < 1
80
+ # Finalize the data for the plot, last row is last factor x last factor so it gets dropped
81
+ heat_data = np.where(mask, np.nan, data)[:-1]
82
+ # Creating label array for heat map axes
83
+ heat_labels = np.concatenate((["class"], self.metadata_names))
84
+
85
+ if row_labels is None:
86
+ row_labels = heat_labels[:-1]
87
+ if col_labels is None:
88
+ col_labels = heat_labels[1:]
89
+
90
+ heatmap(
91
+ heat_data,
92
+ row_labels,
93
+ col_labels,
94
+ cbarlabel="Normalized Mutual Information",
95
+ )
96
+
34
97
 
35
98
  def validate_num_neighbors(num_neighbors: int) -> int:
36
99
  if not isinstance(num_neighbors, (int, float)):
@@ -114,6 +177,9 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
114
177
  mi = np.empty((num_factors, num_factors))
115
178
  mi[:] = np.nan
116
179
 
180
+ class_idx = names.index("class_label")
181
+ class_lbl = np.array(data[:, class_idx], dtype=int)
182
+
117
183
  for idx in range(num_factors):
118
184
  tgt = data[:, idx].astype(int)
119
185
 
@@ -174,4 +240,4 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
174
240
  norm_factor = 0.5 * np.add.outer(ent_tgt_bin, ent_all) + 1e-6
175
241
  classwise = classwise_mi / norm_factor
176
242
 
177
- return BalanceOutput(balance, factors, classwise)
243
+ return BalanceOutput(balance, factors, classwise, class_lbl, list(metadata.keys()))
@@ -1,5 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
+ __all__ = ["CoverageOutput", "coverage"]
4
+
3
5
  import math
4
6
  from dataclasses import dataclass
5
7
  from typing import Literal
@@ -8,15 +10,15 @@ import numpy as np
8
10
  from numpy.typing import ArrayLike, NDArray
9
11
  from scipy.spatial.distance import pdist, squareform
10
12
 
11
- from dataeval._internal.interop import to_numpy
12
- from dataeval._internal.metrics.utils import flatten
13
- from dataeval._internal.output import OutputMetadata, set_metadata
13
+ from dataeval.interop import to_numpy
14
+ from dataeval.output import OutputMetadata, set_metadata
15
+ from dataeval.utils.shared import flatten
14
16
 
15
17
 
16
18
  @dataclass(frozen=True)
17
19
  class CoverageOutput(OutputMetadata):
18
20
  """
19
- Output class for :func:`coverage` bias metric
21
+ Output class for :func:`coverage` :term:`bias<Bias>` metric
20
22
 
21
23
  Attributes
22
24
  ----------
@@ -25,7 +27,7 @@ class CoverageOutput(OutputMetadata):
25
27
  radii : NDArray
26
28
  Array of critical value radii
27
29
  critical_value : float
28
- Radius for coverage
30
+ Radius for :term:`coverage<Coverage>`
29
31
  """
30
32
 
31
33
  indices: NDArray[np.intp]
@@ -33,7 +35,7 @@ class CoverageOutput(OutputMetadata):
33
35
  critical_value: float
34
36
 
35
37
 
36
- @set_metadata("dataeval.metrics")
38
+ @set_metadata()
37
39
  def coverage(
38
40
  embeddings: ArrayLike,
39
41
  radius_type: Literal["adaptive", "naive"] = "adaptive",
@@ -41,7 +43,7 @@ def coverage(
41
43
  percent: np.float64 = np.float64(0.01),
42
44
  ) -> CoverageOutput:
43
45
  """
44
- Class for evaluating coverage and identifying images/samples that are in undercovered regions.
46
+ Class for evaluating :term:`coverage<Coverage>` and identifying images/samples that are in undercovered regions.
45
47
 
46
48
  Parameters
47
49
  ----------
@@ -64,7 +66,7 @@ def coverage(
64
66
  Raises
65
67
  ------
66
68
  ValueError
67
- If length of embeddings is less than or equal to k
69
+ If length of :term:`embeddings<Embeddings>` is less than or equal to k
68
70
  ValueError
69
71
  If radius_type is unknown
70
72