dataeval 0.72.1__py3-none-any.whl → 0.72.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. dataeval/__init__.py +4 -4
  2. dataeval/detectors/__init__.py +4 -3
  3. dataeval/detectors/drift/__init__.py +9 -10
  4. dataeval/{_internal/detectors → detectors}/drift/base.py +39 -91
  5. dataeval/{_internal/detectors → detectors}/drift/cvm.py +4 -3
  6. dataeval/{_internal/detectors → detectors}/drift/ks.py +4 -3
  7. dataeval/{_internal/detectors → detectors}/drift/mmd.py +23 -25
  8. dataeval/{_internal/detectors → detectors}/drift/torch.py +13 -11
  9. dataeval/{_internal/detectors → detectors}/drift/uncertainty.py +7 -5
  10. dataeval/detectors/drift/updates.py +61 -0
  11. dataeval/detectors/linters/__init__.py +3 -3
  12. dataeval/{_internal/detectors → detectors/linters}/clusterer.py +41 -39
  13. dataeval/{_internal/detectors → detectors/linters}/duplicates.py +19 -9
  14. dataeval/{_internal/detectors → detectors/linters}/merged_stats.py +3 -1
  15. dataeval/{_internal/detectors → detectors/linters}/outliers.py +14 -21
  16. dataeval/detectors/ood/__init__.py +6 -6
  17. dataeval/{_internal/detectors → detectors}/ood/ae.py +7 -7
  18. dataeval/{_internal/detectors → detectors}/ood/aegmm.py +9 -29
  19. dataeval/{_internal/detectors → detectors}/ood/base.py +24 -18
  20. dataeval/{_internal/detectors → detectors}/ood/llr.py +24 -20
  21. dataeval/detectors/ood/metadata_ks_compare.py +99 -0
  22. dataeval/detectors/ood/metadata_least_likely.py +119 -0
  23. dataeval/detectors/ood/metadata_ood_mi.py +92 -0
  24. dataeval/{_internal/detectors → detectors}/ood/vae.py +10 -12
  25. dataeval/{_internal/detectors → detectors}/ood/vaegmm.py +10 -32
  26. dataeval/{_internal/interop.py → interop.py} +12 -7
  27. dataeval/metrics/__init__.py +1 -1
  28. dataeval/metrics/bias/__init__.py +4 -4
  29. dataeval/{_internal/metrics → metrics/bias}/balance.py +75 -9
  30. dataeval/{_internal/metrics → metrics/bias}/coverage.py +6 -4
  31. dataeval/{_internal/metrics → metrics/bias}/diversity.py +48 -14
  32. dataeval/metrics/bias/metadata.py +275 -0
  33. dataeval/{_internal/metrics → metrics/bias}/parity.py +12 -10
  34. dataeval/metrics/estimators/__init__.py +3 -3
  35. dataeval/{_internal/metrics → metrics/estimators}/ber.py +25 -22
  36. dataeval/{_internal/metrics → metrics/estimators}/divergence.py +11 -12
  37. dataeval/{_internal/metrics → metrics/estimators}/uap.py +5 -3
  38. dataeval/metrics/stats/__init__.py +7 -7
  39. dataeval/{_internal/metrics → metrics}/stats/base.py +59 -35
  40. dataeval/{_internal/metrics → metrics}/stats/boxratiostats.py +18 -14
  41. dataeval/{_internal/metrics → metrics}/stats/datasetstats.py +18 -16
  42. dataeval/{_internal/metrics → metrics}/stats/dimensionstats.py +9 -7
  43. dataeval/metrics/stats/hashstats.py +156 -0
  44. dataeval/{_internal/metrics → metrics}/stats/labelstats.py +5 -3
  45. dataeval/{_internal/metrics → metrics}/stats/pixelstats.py +9 -8
  46. dataeval/{_internal/metrics → metrics}/stats/visualstats.py +10 -9
  47. dataeval/{_internal/output.py → output.py} +26 -6
  48. dataeval/utils/__init__.py +7 -3
  49. dataeval/utils/image.py +71 -0
  50. dataeval/utils/shared.py +151 -0
  51. dataeval/{_internal → utils}/split_dataset.py +98 -33
  52. dataeval/utils/tensorflow/__init__.py +7 -6
  53. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/autoencoder.py +60 -64
  54. dataeval/{_internal/models/tensorflow/losses.py → utils/tensorflow/_internal/loss.py} +9 -8
  55. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/pixelcnn.py +16 -20
  56. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/trainer.py +3 -1
  57. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/utils.py +17 -17
  58. dataeval/utils/tensorflow/loss/__init__.py +6 -2
  59. dataeval/utils/torch/__init__.py +7 -3
  60. dataeval/{_internal/models/pytorch → utils/torch}/blocks.py +19 -14
  61. dataeval/{_internal → utils/torch}/datasets.py +48 -42
  62. dataeval/utils/torch/models.py +138 -0
  63. dataeval/{_internal/models/pytorch/autoencoder.py → utils/torch/trainer.py} +7 -136
  64. dataeval/{_internal → utils/torch}/utils.py +3 -1
  65. dataeval/workflows/__init__.py +1 -1
  66. dataeval/{_internal/workflows → workflows}/sufficiency.py +39 -34
  67. {dataeval-0.72.1.dist-info → dataeval-0.72.2.dist-info}/METADATA +2 -1
  68. dataeval-0.72.2.dist-info/RECORD +72 -0
  69. dataeval/_internal/detectors/__init__.py +0 -0
  70. dataeval/_internal/detectors/drift/__init__.py +0 -0
  71. dataeval/_internal/detectors/ood/__init__.py +0 -0
  72. dataeval/_internal/metrics/__init__.py +0 -0
  73. dataeval/_internal/metrics/stats/hashstats.py +0 -75
  74. dataeval/_internal/metrics/utils.py +0 -447
  75. dataeval/_internal/models/__init__.py +0 -0
  76. dataeval/_internal/models/pytorch/__init__.py +0 -0
  77. dataeval/_internal/models/pytorch/utils.py +0 -67
  78. dataeval/_internal/models/tensorflow/__init__.py +0 -0
  79. dataeval/_internal/workflows/__init__.py +0 -0
  80. dataeval/detectors/drift/kernels/__init__.py +0 -10
  81. dataeval/detectors/drift/updates/__init__.py +0 -8
  82. dataeval/utils/tensorflow/models/__init__.py +0 -9
  83. dataeval/utils/tensorflow/recon/__init__.py +0 -3
  84. dataeval/utils/torch/datasets/__init__.py +0 -12
  85. dataeval/utils/torch/models/__init__.py +0 -11
  86. dataeval/utils/torch/trainer/__init__.py +0 -7
  87. dataeval-0.72.1.dist-info/RECORD +0 -81
  88. /dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/gmm.py +0 -0
  89. {dataeval-0.72.1.dist-info → dataeval-0.72.2.dist-info}/LICENSE.txt +0 -0
  90. {dataeval-0.72.1.dist-info → dataeval-0.72.2.dist-info}/WHEEL +0 -0
@@ -8,6 +8,8 @@ Licensed under Apache Software License (Apache 2.0)
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
+ __all__ = ["OOD_LLR"]
12
+
11
13
  from functools import partial
12
14
  from typing import Callable
13
15
 
@@ -18,15 +20,14 @@ from numpy.typing import ArrayLike, NDArray
18
20
  from tf_keras.layers import Input
19
21
  from tf_keras.models import Model
20
22
 
21
- from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
22
- from dataeval._internal.interop import to_numpy
23
- from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
24
- from dataeval._internal.models.tensorflow.trainer import trainer
25
- from dataeval._internal.models.tensorflow.utils import predict_batch
26
- from dataeval._internal.output import set_metadata
23
+ from dataeval.detectors.ood.base import OODBase, OODScoreOutput
24
+ from dataeval.interop import to_numpy
25
+ from dataeval.utils.tensorflow._internal.pixelcnn import PixelCNN
26
+ from dataeval.utils.tensorflow._internal.trainer import trainer
27
+ from dataeval.utils.tensorflow._internal.utils import predict_batch
27
28
 
28
29
 
29
- def build_model(
30
+ def _build_model(
30
31
  dist: PixelCNN, input_shape: tuple | None = None, filepath: str | None = None
31
32
  ) -> tuple[keras.Model, PixelCNN]:
32
33
  """
@@ -54,11 +55,11 @@ def build_model(
54
55
  return model, dist
55
56
 
56
57
 
57
- def mutate_categorical(
58
+ def _mutate_categorical(
58
59
  X: NDArray,
59
60
  rate: float,
60
61
  seed: int = 0,
61
- feature_range: tuple = (0, 255),
62
+ feature_range: tuple[int, int] = (0, 255),
62
63
  ) -> tf.Tensor:
63
64
  """
64
65
  Randomly change integer feature values to values within a set range
@@ -113,17 +114,17 @@ class OOD_LLR(OODBase):
113
114
  log_prob: Callable | None = None,
114
115
  sequential: bool = False,
115
116
  ) -> None:
116
- self.dist_s = model
117
- self.dist_b = (
117
+ self.dist_s: PixelCNN = model
118
+ self.dist_b: PixelCNN = (
118
119
  model.copy()
119
120
  if hasattr(model, "copy")
120
121
  else keras.models.clone_model(model)
121
122
  if model_background is None
122
123
  else model_background
123
124
  )
124
- self.has_log_prob = hasattr(model, "log_prob")
125
- self.sequential = sequential
126
- self.log_prob = log_prob
125
+ self.has_log_prob: bool = hasattr(model, "log_prob")
126
+ self.sequential: bool = sequential
127
+ self.log_prob: Callable | None = log_prob
127
128
 
128
129
  self._ref_score: OODScoreOutput
129
130
  self._threshold_perc: float
@@ -138,8 +139,12 @@ class OOD_LLR(OODBase):
138
139
  epochs: int = 20,
139
140
  batch_size: int = 64,
140
141
  verbose: bool = True,
141
- mutate_fn: Callable = mutate_categorical,
142
- mutate_fn_kwargs: dict = {"rate": 0.2, "seed": 0, "feature_range": (0, 255)},
142
+ mutate_fn: Callable = _mutate_categorical,
143
+ mutate_fn_kwargs: dict[str, float | int | tuple[int, int]] = {
144
+ "rate": 0.2,
145
+ "seed": 0,
146
+ "feature_range": (0, 255),
147
+ },
143
148
  mutate_batch_size: int = int(1e10),
144
149
  ) -> None:
145
150
  """
@@ -200,11 +205,11 @@ class OOD_LLR(OODBase):
200
205
 
201
206
  if use_build:
202
207
  # build and train semantic model
203
- self.model_s = build_model(self.dist_s, input_shape)[0]
208
+ self.model_s: keras.Model = _build_model(self.dist_s, input_shape)[0]
204
209
  self.model_s.compile(optimizer=optimizer_s)
205
210
  self.model_s.fit(X, **kwargs)
206
211
  # build and train background model
207
- self.model_b = build_model(self.dist_b, input_shape)[0]
212
+ self.model_b: keras.Model = _build_model(self.dist_b, input_shape)[0]
208
213
  self.model_b.compile(optimizer=optimizer_b)
209
214
  self.model_b.fit(X_back, **kwargs)
210
215
  else:
@@ -280,8 +285,7 @@ class OOD_LLR(OODBase):
280
285
  logp_b = logp_fn(self.dist_b, X, return_per_feature=return_per_feature, batch_size=batch_size)
281
286
  return logp_s - logp_b
282
287
 
283
- @set_metadata("dataeval.detectors")
284
- def score(
288
+ def _score(
285
289
  self,
286
290
  X: ArrayLike,
287
291
  batch_size: int = int(1e10),
@@ -0,0 +1,99 @@
1
+ from __future__ import annotations
2
+
3
+ import numbers
4
+ import warnings
5
+ from typing import Any, Mapping
6
+
7
+ import numpy as np
8
+ from numpy.typing import NDArray
9
+ from scipy.stats import iqr, ks_2samp
10
+ from scipy.stats import wasserstein_distance as emd
11
+
12
+
13
+ def meta_distribution_compare(
14
+ md0: Mapping[str, list[Any] | NDArray[Any]], md1: Mapping[str, list[Any] | NDArray[Any]]
15
+ ) -> dict[str, dict[str, float]]:
16
+ """Measures the featurewise distance between two metadata distributions, and computes a p-value to evaluate its
17
+ significance.
18
+
19
+ Uses the Earth Mover's Distance and the Kolmogorov-Smirnov two-sample test, featurewise.
20
+
21
+ Parameters
22
+ ----------
23
+ md0 : Mapping[str, list[Any] | NDArray[Any]]
24
+ A set of arrays of values, indexed by metadata feature names, with one value per data example per feature.
25
+ md1 : Mapping[str, list[Any] | NDArray[Any]]
26
+ Another set of arrays of values, indexed by metadata feature names, with one value per data example per
27
+ feature.
28
+
29
+ Returns
30
+ -------
31
+ dict[str, KstestResult]
32
+ A dictionary with keys corresponding to metadata feature names, and values that are KstestResult objects, as
33
+ defined by scipy.stats.ks_2samp. These values also have two additional attributes: shift_magnitude and
34
+ statistic_location. The first is the Earth Mover's Distance normalized by the interquartile range (IQR) of
35
+ the reference, while the second is the value at which the KS statistic has its maximum, measured in
36
+ IQR-normalized units relative to the median of the reference distribution.
37
+
38
+ Examples
39
+ --------
40
+ Imagine we have 3 data examples, and that the corresponding metadata contains 2 features called time and
41
+ altitude.
42
+
43
+ >>> import numpy
44
+ >>> md0 = {"time": [1.2, 3.4, 5.6], "altitude": [235, 6789, 101112]}
45
+ >>> md1 = {"time": [7.8, 9.10, 11.12], "altitude": [532, 9876, 211101]}
46
+ >>> md_out = meta_distribution_compare(md0, md1)
47
+ >>> for k, v in md_out.items():
48
+ >>> print(k)
49
+ >>> for kv in v:
50
+ >>> print("\t", f"{kv}: {v[kv]:.3f}")
51
+ time
52
+ statistic_location: 0.444
53
+ shift_magnitude: 2.700
54
+ pvalue: 0.000
55
+ altitude
56
+ statistic_location: 0.478
57
+ shift_magnitude: 0.749
58
+ pvalue: 0.944
59
+ """
60
+
61
+ if (metadata_keys := md0.keys()) != md1.keys():
62
+ raise ValueError(f"Both sets of metadata keys must be identical: {list(md0)}, {list(md1)}")
63
+
64
+ mdc_dict = {} # output dict
65
+ for k in metadata_keys:
66
+ mdc_dict.update({k: {}})
67
+
68
+ x0, x1 = list(md0[k]), list(md1[k])
69
+
70
+ allx = x0 + x1 # "+" sign concatenates lists.
71
+
72
+ if not all(isinstance(allxi, numbers.Number) for allxi in allx): # NB: np.nan *is* a number in this context.
73
+ continue # non-numeric features will return an empty dict for feature k
74
+
75
+ # from Numerical Recipes in C, 3rd ed. p. 737. If too few points, warn and keep going.
76
+ if np.sqrt(((N := len(x0)) * (M := len(x1))) / (N + M)) < 4:
77
+ warnings.warn(
78
+ f"Sample sizes of {N}, {M} for feature {k} will yield unreliable p-values from the KS test.",
79
+ UserWarning,
80
+ )
81
+
82
+ xmin, xmax = min(allx), max(allx)
83
+ if xmin == xmax: # only one value in this feature, so fill in the obvious results for feature k
84
+ mdc_dict[k].update({"statistic_location": 0.0, "shift_magnitude": 0.0, "pvalue": 1.0})
85
+ continue
86
+
87
+ ks_result = ks_2samp(x0, x1, method="asymp")
88
+ dev = ks_result.statistic_location - xmin # pyright: ignore (KSresult type)
89
+ loc = dev / (xmax - xmin) if xmax > xmin else dev
90
+
91
+ dX = iqr(x0) # preferred value of dX, which is the scale of the the md0 values for feature k
92
+ dX = (max(x0) - min(x0)) / 2.0 if dX == 0 else dX # reasonable alternative value of dX, when iqr is zero.
93
+ dX = 1.0 if dX == 0 else dX # if dX is *still* zero, just avoid division by zero this way
94
+
95
+ drift = emd(x0, x1) / dX
96
+
97
+ mdc_dict[k].update({"statistic_location": loc, "shift_magnitude": drift, "pvalue": ks_result.pvalue}) # pyright: ignore
98
+
99
+ return mdc_dict
@@ -0,0 +1,119 @@
1
+ from __future__ import annotations
2
+
3
+ import numbers
4
+ import warnings
5
+ from typing import Any
6
+
7
+ import numpy as np
8
+ from numpy.typing import NDArray
9
+
10
+
11
+ def get_least_likely_features(
12
+ metadata: dict[str, list[Any] | NDArray[Any]],
13
+ new_metadata: dict[str, list[Any] | NDArray[Any]],
14
+ is_ood: NDArray[np.bool_],
15
+ ) -> list[tuple[str, float]]:
16
+ """Computes which metadata feature is most out-of-distribution (OOD) relative to a reference metadata set.
17
+
18
+ Given a reference metadata dictionary `metadata` (where each key maps to one scalar metadata feature), a second
19
+ metadata dictionary, and a corresponding boolean flag `is_ood` indicating whether each new example falls
20
+ out-of-distribution (OOD) relative to the reference, this function finds which metadata feature is the most OOD,
21
+ for each OOD example.
22
+
23
+ Parameters
24
+ ----------
25
+ metadata: dict[str, list[Any] | NDArray[Any]]
26
+ A reference set of arrays of values, indexed by metadata feature names, with one value per data example per
27
+ feature.
28
+ new_metadata: dict[str, list[Any] | NDArray[Any]]
29
+ A second metadata set, to be tested against the reference metadata. It is ok if the two meta data objects
30
+ hold different numbers of examples.
31
+ is_ood: NDArray[np.bool_]
32
+ A boolean array, with one value per new_metadata example, that indicates which examples are OOD.
33
+
34
+ Returns
35
+ -------
36
+ list[tuple[str, float]]
37
+ An array of names of the features of each OOD new_metadata example that were the most OOD.
38
+
39
+ Examples
40
+ --------
41
+ Imagine we have 3 data examples, and that the corresponding metadata contains 2 features called time and
42
+ altitude, as shown below.
43
+
44
+ >>> from dataeval._internal.metrics.metadata_least_likely import get_least_likely_features
45
+ >>> import numpy
46
+ >>> metadata = {"time": [1.2, 3.4, 5.6], "altitude": [235, 6789, 101112]}
47
+ >>> new_metadata = {"time": [7.8, 11.12], "altitude": [532, -211101]}
48
+ >>> is_ood = numpy.array([True, True])
49
+ >>> get_least_likely_features(metadata, new_metadata, is_ood)
50
+ [('time', 2.0), ('altitude', 33.245346)]
51
+ """
52
+ # Raise errors for bad inputs...
53
+
54
+ if metadata.keys() != new_metadata.keys():
55
+ raise ValueError(f"Reference and test metadata keys must be identical: {list(metadata)}, {list(new_metadata)}")
56
+
57
+ md_lengths = {len(np.atleast_1d(v)) for v in metadata.values()}
58
+ new_md_lengths = {len(np.atleast_1d(v)) for v in new_metadata.values()}
59
+ if len(md_lengths) > 1 or len(new_md_lengths) > 1:
60
+ raise ValueError(f"All features must have same length, got lengths {md_lengths}, {new_md_lengths}")
61
+
62
+ n_reference, n_new = md_lengths.pop(), new_md_lengths.pop() # possibly different numbers of metadata examples
63
+
64
+ if n_new != len(is_ood):
65
+ raise ValueError(f"is_ood flag must have same length as new metadata {n_new} but has length {len(is_ood)}.")
66
+
67
+ if n_reference < 3: # too hard to define "in-distribution" with this few reference samples.
68
+ warnings.warn(
69
+ "We need at least 3 reference metadata examples to determine which "
70
+ f"features are least likely, but only got {n_reference}",
71
+ UserWarning,
72
+ )
73
+ return []
74
+
75
+ if not any(is_ood):
76
+ return []
77
+
78
+ # ...inputs are good, look for most deviant standardized features.
79
+
80
+ # largest standardized absolute deviation from the median observed so far for each example
81
+ deviation = np.zeros_like(is_ood, dtype=np.float32)
82
+
83
+ # name of feature that corresponds to `deviation` for each example
84
+ kmax = np.empty(len(is_ood), dtype=object)
85
+
86
+ for k, v in metadata.items():
87
+ # exclude cases where random happens to be out on tails, not interesting.
88
+ if k == "random":
89
+ continue
90
+
91
+ # Skip non-numerical features
92
+ if not all(isinstance(vi, numbers.Number) for vi in v): # NB: np.nan *is* a number in this context.
93
+ continue
94
+
95
+ # Get standardization parameters from metadata
96
+ loc = np.median(v) # ok, because we checked all were numeric
97
+ dev = np.asarray(v) - loc # need to make array from v since it could be a list here.
98
+ posdev, negdev = dev[dev > 0], dev[dev < 0]
99
+ pos_scale = np.median(posdev) if posdev.any() else 1.0
100
+ neg_scale = np.abs(np.median(negdev)) if negdev.any() else 1.0
101
+
102
+ x, x0, dxp, dxn = np.atleast_1d(new_metadata[k]), loc, pos_scale, neg_scale # just abbreviations
103
+ dxp = dxp if dxp > 0 else 1.0 # avoids dividing by zero below
104
+ dxn = dxn if dxn > 0 else 1.0
105
+
106
+ # xdev must be floating-point to avoid getting zero in an integer division.
107
+ xdev = (x - x0).astype(np.float64)
108
+ pos = xdev >= 0
109
+
110
+ X = np.zeros_like(xdev)
111
+ X[pos], X[~pos] = xdev[pos] / dxp, xdev[~pos] / dxn # keeping track of possible asymmetry of x, but...
112
+ # ...below here, only need to think about absolute deviation.
113
+
114
+ abig = np.abs(X) > deviation
115
+ kmax[abig] = k
116
+ deviation[abig] = np.abs(X[abig])
117
+
118
+ unlikely_features = list(zip(kmax[is_ood], deviation[is_ood])) # feature names, along with how far out they are.
119
+ return unlikely_features
@@ -0,0 +1,92 @@
1
+ from __future__ import annotations
2
+
3
+ import numbers
4
+ import warnings
5
+ from typing import Any
6
+
7
+ import numpy as np
8
+ from numpy.typing import NDArray
9
+ from sklearn.feature_selection import mutual_info_classif
10
+
11
+ # NATS2BITS is the reciprocal of natural log of 2. If you have an information/entropy-type quantity measured in nats,
12
+ # which is what many library functions return, multiply it by NATS2BITS to get it in bits.
13
+ NATS2BITS = 1.442695
14
+
15
+
16
+ def get_metadata_ood_mi(
17
+ metadata: dict[str, list[Any] | NDArray[Any]],
18
+ is_ood: NDArray[np.bool_],
19
+ discrete_features: str | bool | NDArray[np.bool_] = False,
20
+ random_state: int | None = None,
21
+ ) -> dict[str, float]:
22
+ """Computes mutual information between a set of metadata features and an out-of-distribution flag.
23
+
24
+ Given a metadata dictionary `metadata` (where each key maps to one scalar metadata feature per example), and a
25
+ corresponding boolean flag `is_ood` indicating whether each example falls out-of-distribution (OOD) relative to a
26
+ reference dataset, this function finds the strength of association between each metadata feature and `is_ood` by
27
+ computing their mutual information. Metadata features may be either discrete or continuous; set the
28
+ `discrete_features` keyword to a bool array set to True for each feature that is discrete, or pass one bool to apply
29
+ to all features. Returns a dict indicating the strength of association between each individual feature and the OOD
30
+ flag, measured in bits.
31
+
32
+ Parameters
33
+ ----------
34
+ metadata : dict[str, list[Any] | NDArray[Any]]
35
+ A set of arrays of values, indexed by metadata feature names, with one value per data example per feature.
36
+ is_ood : NDArray[np.bool_]
37
+ A boolean array, with one value per example, that indicates which examples are OOD.
38
+ discrete_features : str | bool | NDArray[np.bool_]
39
+ Either a boolean array or a single boolean value, indicate which features take on discrete values.
40
+ random_state : int, optional - default None
41
+ Determines random number generation for small noise added to continuous variables. Set to a value for
42
+ reproducible results.
43
+
44
+ Returns
45
+ -------
46
+ dict[str, float]
47
+ A dictionary with keys corresponding to metadata feature names, and values indicating the strength of
48
+ association between each named feature and the OOD flag, as mutual information measured in bits.
49
+
50
+ Examples
51
+ --------
52
+ Imagine we have 3 data examples, and that the corresponding metadata contains 2 features called time and altitude.
53
+
54
+ >>> import numpy
55
+ >>> metadata = {"time": numpy.linspace(0, 10, 100), "altitude": numpy.linspace(0, 16, 100) ** 2}
56
+ >>> is_ood = metadata["altitude"] > 100
57
+ >>> print(get_metadata_ood_mi(metadata, is_ood, discrete_features=False))
58
+ {'time': 0.933074285817367, 'altitude': 0.9407686591507002}
59
+ """
60
+ numerical_keys = [k for k, v in metadata.items() if all(isinstance(vi, numbers.Number) for vi in v)]
61
+ if len(numerical_keys) < len(metadata):
62
+ warnings.warn(
63
+ f"Processing {numerical_keys}, others are non-numerical and will be skipped.",
64
+ UserWarning,
65
+ )
66
+
67
+ md_lengths = {len(np.atleast_1d(v)) for v in metadata.values()}
68
+ if len(md_lengths) > 1:
69
+ raise ValueError(f"Metadata features have differing sizes: {md_lengths}")
70
+
71
+ if len(is_ood) != (mdl := md_lengths.pop()):
72
+ raise ValueError(
73
+ f"OOD flag and metadata features need to be same size, but are different sizes: {len(is_ood)} and {mdl}."
74
+ )
75
+
76
+ X = np.array([metadata[k] for k in numerical_keys]).T
77
+
78
+ X0, dX = np.mean(X, axis=0), np.std(X, axis=0, ddof=1)
79
+ Xscl = (X - X0) / dX
80
+
81
+ mutual_info_values = (
82
+ mutual_info_classif(
83
+ Xscl,
84
+ is_ood,
85
+ discrete_features=discrete_features, # type: ignore
86
+ random_state=random_state,
87
+ )
88
+ * NATS2BITS
89
+ )
90
+
91
+ mi_dict = {k: mutual_info_values[i] for i, k in enumerate(numerical_keys)}
92
+ return mi_dict
@@ -8,6 +8,8 @@ Licensed under Apache Software License (Apache 2.0)
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
+ __all__ = ["OOD_VAE"]
12
+
11
13
  from typing import Callable
12
14
 
13
15
  import numpy as np
@@ -15,12 +17,11 @@ import tensorflow as tf
15
17
  import tf_keras as keras
16
18
  from numpy.typing import ArrayLike
17
19
 
18
- from dataeval._internal.detectors.ood.base import OODBase, OODScoreOutput
19
- from dataeval._internal.interop import to_numpy
20
- from dataeval._internal.models.tensorflow.autoencoder import VAE
21
- from dataeval._internal.models.tensorflow.losses import Elbo
22
- from dataeval._internal.models.tensorflow.utils import predict_batch
23
- from dataeval._internal.output import set_metadata
20
+ from dataeval.detectors.ood.base import OODBase, OODScoreOutput
21
+ from dataeval.interop import to_numpy
22
+ from dataeval.utils.tensorflow._internal.autoencoder import VAE
23
+ from dataeval.utils.tensorflow._internal.loss import Elbo
24
+ from dataeval.utils.tensorflow._internal.utils import predict_batch
24
25
 
25
26
 
26
27
  class OOD_VAE(OODBase):
@@ -38,7 +39,7 @@ class OOD_VAE(OODBase):
38
39
  --------
39
40
  Instantiate an OOD detector metric with a generic dataset - batch of images with shape (3,25,25)
40
41
 
41
- >>> metric = OOD_VAE(create_model(VAE, dataset[0].shape))
42
+ >>> metric = OOD_VAE(create_model("VAE", dataset[0].shape))
42
43
 
43
44
  Adjusting fit parameters,
44
45
  including setting the fit threshold at 85% for a training set with about 15% out-of-distribution
@@ -58,18 +59,15 @@ class OOD_VAE(OODBase):
58
59
  self,
59
60
  x_ref: ArrayLike,
60
61
  threshold_perc: float = 100.0,
61
- loss_fn: Callable[..., tf.Tensor] | None = None,
62
+ loss_fn: Callable[..., tf.Tensor] = Elbo(0.05),
62
63
  optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
63
64
  epochs: int = 20,
64
65
  batch_size: int = 64,
65
66
  verbose: bool = True,
66
67
  ) -> None:
67
- if loss_fn is None:
68
- loss_fn = Elbo(0.05)
69
68
  super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
70
69
 
71
- @set_metadata("dataeval.detectors")
72
- def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
70
+ def _score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
73
71
  self._validate(X := to_numpy(X))
74
72
 
75
73
  # sample reconstructed instances
@@ -8,6 +8,8 @@ Licensed under Apache Software License (Apache 2.0)
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
+ __all__ = ["OOD_VAEGMM"]
12
+
11
13
  from typing import Callable
12
14
 
13
15
  import numpy as np
@@ -15,13 +17,12 @@ import tensorflow as tf
15
17
  import tf_keras as keras
16
18
  from numpy.typing import ArrayLike
17
19
 
18
- from dataeval._internal.detectors.ood.base import OODGMMBase, OODScoreOutput
19
- from dataeval._internal.interop import to_numpy
20
- from dataeval._internal.models.tensorflow.autoencoder import VAEGMM
21
- from dataeval._internal.models.tensorflow.gmm import gmm_energy
22
- from dataeval._internal.models.tensorflow.losses import Elbo, LossGMM
23
- from dataeval._internal.models.tensorflow.utils import predict_batch
24
- from dataeval._internal.output import set_metadata
20
+ from dataeval.detectors.ood.base import OODGMMBase, OODScoreOutput
21
+ from dataeval.interop import to_numpy
22
+ from dataeval.utils.tensorflow._internal.autoencoder import VAEGMM
23
+ from dataeval.utils.tensorflow._internal.gmm import gmm_energy
24
+ from dataeval.utils.tensorflow._internal.loss import Elbo, LossGMM
25
+ from dataeval.utils.tensorflow._internal.utils import predict_batch
25
26
 
26
27
 
27
28
  class OOD_VAEGMM(OODGMMBase):
@@ -44,38 +45,15 @@ class OOD_VAEGMM(OODGMMBase):
44
45
  self,
45
46
  x_ref: ArrayLike,
46
47
  threshold_perc: float = 100.0,
47
- loss_fn: Callable[..., tf.Tensor] | None = None,
48
+ loss_fn: Callable[..., tf.Tensor] = LossGMM(elbo=Elbo(0.05)),
48
49
  optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
49
50
  epochs: int = 20,
50
51
  batch_size: int = 64,
51
52
  verbose: bool = True,
52
53
  ) -> None:
53
- if loss_fn is None:
54
- loss_fn = LossGMM(elbo=Elbo(0.05))
55
54
  super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
56
55
 
57
- @set_metadata("dataeval.detectors")
58
- def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
59
- """
60
- Compute the out of distributuion<Out-of-distribution (OOD)>` score for a given dataset.
61
-
62
- Parameters
63
- ----------
64
- X : ArrayLike
65
- Input data to score.
66
- batch_size : int, default 1e10
67
- Number of instances to process in each batch.
68
- Use a smaller batch size if your dataset is large or if you encounter memory issues.
69
-
70
- Returns
71
- -------
72
- OODScoreOutput
73
- An object containing the instance-level OOD score.
74
-
75
- Note
76
- ----
77
- This model does not produce a feature level score like the OOD_AE or OOD_VAE models.
78
- """
56
+ def _score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
79
57
  self._validate(X := to_numpy(X))
80
58
 
81
59
  # draw samples from latent space
@@ -1,43 +1,47 @@
1
1
  from __future__ import annotations
2
2
 
3
+ __all__ = ["as_numpy", "to_numpy", "to_numpy_iter"]
4
+
3
5
  from importlib import import_module
4
6
  from typing import Any, Iterable, Iterator
5
7
 
6
8
  import numpy as np
7
9
  from numpy.typing import ArrayLike, NDArray
8
10
 
9
- module_cache = {}
11
+ _MODULE_CACHE = {}
10
12
 
11
13
 
12
- def try_import(module_name):
13
- if module_name in module_cache:
14
- return module_cache[module_name]
14
+ def _try_import(module_name):
15
+ if module_name in _MODULE_CACHE:
16
+ return _MODULE_CACHE[module_name]
15
17
 
16
18
  try:
17
19
  module = import_module(module_name)
18
20
  except ImportError: # pragma: no cover - covered by test_mindeps.py
19
21
  module = None
20
22
 
21
- module_cache[module_name] = module
23
+ _MODULE_CACHE[module_name] = module
22
24
  return module
23
25
 
24
26
 
25
27
  def as_numpy(array: ArrayLike | None) -> NDArray[Any]:
28
+ """Converts an ArrayLike to Numpy array without copying (if possible)"""
26
29
  return to_numpy(array, copy=False)
27
30
 
28
31
 
29
32
  def to_numpy(array: ArrayLike | None, copy: bool = True) -> NDArray[Any]:
33
+ """Converts an ArrayLike to new Numpy array"""
30
34
  if array is None:
31
35
  return np.ndarray([])
32
36
 
33
37
  if isinstance(array, np.ndarray):
34
38
  return array.copy() if copy else array
35
39
 
36
- tf = try_import("tensorflow")
40
+ tf = _try_import("tensorflow")
37
41
  if tf and tf.is_tensor(array):
38
42
  return array.numpy().copy() if copy else array.numpy() # type: ignore
39
43
 
40
- torch = try_import("torch")
44
+ torch = _try_import("torch")
41
45
  if torch and isinstance(array, torch.Tensor):
42
46
  return array.detach().cpu().numpy().copy() if copy else array.detach().cpu().numpy() # type: ignore
43
47
 
@@ -45,5 +49,6 @@ def to_numpy(array: ArrayLike | None, copy: bool = True) -> NDArray[Any]:
45
49
 
46
50
 
47
51
  def to_numpy_iter(iterable: Iterable[ArrayLike]) -> Iterator[NDArray[Any]]:
52
+ """Yields an iterator of numpy arrays from an ArrayLike"""
48
53
  for array in iterable:
49
54
  yield to_numpy(array)
@@ -3,6 +3,6 @@ Metrics are a way to measure the performance of your models or datasets that
3
3
  can then be analyzed in the context of a given problem.
4
4
  """
5
5
 
6
- from . import bias, estimators, stats
6
+ from dataeval.metrics import bias, estimators, stats
7
7
 
8
8
  __all__ = ["bias", "estimators", "stats"]
@@ -3,10 +3,10 @@ Bias metrics check for skewed or imbalanced datasets and incomplete feature
3
3
  representation which may impact model performance.
4
4
  """
5
5
 
6
- from dataeval._internal.metrics.balance import BalanceOutput, balance
7
- from dataeval._internal.metrics.coverage import CoverageOutput, coverage
8
- from dataeval._internal.metrics.diversity import DiversityOutput, diversity
9
- from dataeval._internal.metrics.parity import ParityOutput, label_parity, parity
6
+ from dataeval.metrics.bias.balance import BalanceOutput, balance
7
+ from dataeval.metrics.bias.coverage import CoverageOutput, coverage
8
+ from dataeval.metrics.bias.diversity import DiversityOutput, diversity
9
+ from dataeval.metrics.bias.parity import ParityOutput, label_parity, parity
10
10
 
11
11
  __all__ = [
12
12
  "balance",