dataeval 0.72.1__py3-none-any.whl → 0.72.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +4 -4
- dataeval/detectors/__init__.py +4 -3
- dataeval/detectors/drift/__init__.py +9 -10
- dataeval/{_internal/detectors → detectors}/drift/base.py +39 -91
- dataeval/{_internal/detectors → detectors}/drift/cvm.py +4 -3
- dataeval/{_internal/detectors → detectors}/drift/ks.py +4 -3
- dataeval/{_internal/detectors → detectors}/drift/mmd.py +23 -25
- dataeval/{_internal/detectors → detectors}/drift/torch.py +13 -11
- dataeval/{_internal/detectors → detectors}/drift/uncertainty.py +7 -5
- dataeval/detectors/drift/updates.py +61 -0
- dataeval/detectors/linters/__init__.py +3 -3
- dataeval/{_internal/detectors → detectors/linters}/clusterer.py +41 -39
- dataeval/{_internal/detectors → detectors/linters}/duplicates.py +19 -9
- dataeval/{_internal/detectors → detectors/linters}/merged_stats.py +3 -1
- dataeval/{_internal/detectors → detectors/linters}/outliers.py +14 -21
- dataeval/detectors/ood/__init__.py +6 -6
- dataeval/{_internal/detectors → detectors}/ood/ae.py +7 -7
- dataeval/{_internal/detectors → detectors}/ood/aegmm.py +9 -29
- dataeval/{_internal/detectors → detectors}/ood/base.py +24 -18
- dataeval/{_internal/detectors → detectors}/ood/llr.py +24 -20
- dataeval/detectors/ood/metadata_ks_compare.py +99 -0
- dataeval/detectors/ood/metadata_least_likely.py +119 -0
- dataeval/detectors/ood/metadata_ood_mi.py +92 -0
- dataeval/{_internal/detectors → detectors}/ood/vae.py +10 -12
- dataeval/{_internal/detectors → detectors}/ood/vaegmm.py +10 -32
- dataeval/{_internal/interop.py → interop.py} +12 -7
- dataeval/metrics/__init__.py +1 -1
- dataeval/metrics/bias/__init__.py +4 -4
- dataeval/{_internal/metrics → metrics/bias}/balance.py +75 -9
- dataeval/{_internal/metrics → metrics/bias}/coverage.py +6 -4
- dataeval/{_internal/metrics → metrics/bias}/diversity.py +48 -14
- dataeval/metrics/bias/metadata.py +275 -0
- dataeval/{_internal/metrics → metrics/bias}/parity.py +12 -10
- dataeval/metrics/estimators/__init__.py +3 -3
- dataeval/{_internal/metrics → metrics/estimators}/ber.py +25 -22
- dataeval/{_internal/metrics → metrics/estimators}/divergence.py +11 -12
- dataeval/{_internal/metrics → metrics/estimators}/uap.py +5 -3
- dataeval/metrics/stats/__init__.py +7 -7
- dataeval/{_internal/metrics → metrics}/stats/base.py +59 -35
- dataeval/{_internal/metrics → metrics}/stats/boxratiostats.py +18 -14
- dataeval/{_internal/metrics → metrics}/stats/datasetstats.py +18 -16
- dataeval/{_internal/metrics → metrics}/stats/dimensionstats.py +9 -7
- dataeval/metrics/stats/hashstats.py +156 -0
- dataeval/{_internal/metrics → metrics}/stats/labelstats.py +5 -3
- dataeval/{_internal/metrics → metrics}/stats/pixelstats.py +9 -8
- dataeval/{_internal/metrics → metrics}/stats/visualstats.py +10 -9
- dataeval/{_internal/output.py → output.py} +26 -6
- dataeval/utils/__init__.py +7 -3
- dataeval/utils/image.py +71 -0
- dataeval/utils/shared.py +151 -0
- dataeval/{_internal → utils}/split_dataset.py +98 -33
- dataeval/utils/tensorflow/__init__.py +7 -6
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/autoencoder.py +60 -64
- dataeval/{_internal/models/tensorflow/losses.py → utils/tensorflow/_internal/loss.py} +9 -8
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/pixelcnn.py +16 -20
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/trainer.py +3 -1
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/utils.py +17 -17
- dataeval/utils/tensorflow/loss/__init__.py +6 -2
- dataeval/utils/torch/__init__.py +7 -3
- dataeval/{_internal/models/pytorch → utils/torch}/blocks.py +19 -14
- dataeval/{_internal → utils/torch}/datasets.py +48 -42
- dataeval/utils/torch/models.py +138 -0
- dataeval/{_internal/models/pytorch/autoencoder.py → utils/torch/trainer.py} +7 -136
- dataeval/{_internal → utils/torch}/utils.py +3 -1
- dataeval/workflows/__init__.py +1 -1
- dataeval/{_internal/workflows → workflows}/sufficiency.py +39 -34
- {dataeval-0.72.1.dist-info → dataeval-0.72.2.dist-info}/METADATA +2 -1
- dataeval-0.72.2.dist-info/RECORD +72 -0
- dataeval/_internal/detectors/__init__.py +0 -0
- dataeval/_internal/detectors/drift/__init__.py +0 -0
- dataeval/_internal/detectors/ood/__init__.py +0 -0
- dataeval/_internal/metrics/__init__.py +0 -0
- dataeval/_internal/metrics/stats/hashstats.py +0 -75
- dataeval/_internal/metrics/utils.py +0 -447
- dataeval/_internal/models/__init__.py +0 -0
- dataeval/_internal/models/pytorch/__init__.py +0 -0
- dataeval/_internal/models/pytorch/utils.py +0 -67
- dataeval/_internal/models/tensorflow/__init__.py +0 -0
- dataeval/_internal/workflows/__init__.py +0 -0
- dataeval/detectors/drift/kernels/__init__.py +0 -10
- dataeval/detectors/drift/updates/__init__.py +0 -8
- dataeval/utils/tensorflow/models/__init__.py +0 -9
- dataeval/utils/tensorflow/recon/__init__.py +0 -3
- dataeval/utils/torch/datasets/__init__.py +0 -12
- dataeval/utils/torch/models/__init__.py +0 -11
- dataeval/utils/torch/trainer/__init__.py +0 -7
- dataeval-0.72.1.dist-info/RECORD +0 -81
- /dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/gmm.py +0 -0
- {dataeval-0.72.1.dist-info → dataeval-0.72.2.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.72.1.dist-info → dataeval-0.72.2.dist-info}/WHEEL +0 -0
@@ -8,6 +8,8 @@ Licensed under Apache Software License (Apache 2.0)
|
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
|
+
__all__ = ["OOD_LLR"]
|
12
|
+
|
11
13
|
from functools import partial
|
12
14
|
from typing import Callable
|
13
15
|
|
@@ -18,15 +20,14 @@ from numpy.typing import ArrayLike, NDArray
|
|
18
20
|
from tf_keras.layers import Input
|
19
21
|
from tf_keras.models import Model
|
20
22
|
|
21
|
-
from dataeval.
|
22
|
-
from dataeval.
|
23
|
-
from dataeval.
|
24
|
-
from dataeval.
|
25
|
-
from dataeval.
|
26
|
-
from dataeval._internal.output import set_metadata
|
23
|
+
from dataeval.detectors.ood.base import OODBase, OODScoreOutput
|
24
|
+
from dataeval.interop import to_numpy
|
25
|
+
from dataeval.utils.tensorflow._internal.pixelcnn import PixelCNN
|
26
|
+
from dataeval.utils.tensorflow._internal.trainer import trainer
|
27
|
+
from dataeval.utils.tensorflow._internal.utils import predict_batch
|
27
28
|
|
28
29
|
|
29
|
-
def
|
30
|
+
def _build_model(
|
30
31
|
dist: PixelCNN, input_shape: tuple | None = None, filepath: str | None = None
|
31
32
|
) -> tuple[keras.Model, PixelCNN]:
|
32
33
|
"""
|
@@ -54,11 +55,11 @@ def build_model(
|
|
54
55
|
return model, dist
|
55
56
|
|
56
57
|
|
57
|
-
def
|
58
|
+
def _mutate_categorical(
|
58
59
|
X: NDArray,
|
59
60
|
rate: float,
|
60
61
|
seed: int = 0,
|
61
|
-
feature_range: tuple = (0, 255),
|
62
|
+
feature_range: tuple[int, int] = (0, 255),
|
62
63
|
) -> tf.Tensor:
|
63
64
|
"""
|
64
65
|
Randomly change integer feature values to values within a set range
|
@@ -113,17 +114,17 @@ class OOD_LLR(OODBase):
|
|
113
114
|
log_prob: Callable | None = None,
|
114
115
|
sequential: bool = False,
|
115
116
|
) -> None:
|
116
|
-
self.dist_s = model
|
117
|
-
self.dist_b = (
|
117
|
+
self.dist_s: PixelCNN = model
|
118
|
+
self.dist_b: PixelCNN = (
|
118
119
|
model.copy()
|
119
120
|
if hasattr(model, "copy")
|
120
121
|
else keras.models.clone_model(model)
|
121
122
|
if model_background is None
|
122
123
|
else model_background
|
123
124
|
)
|
124
|
-
self.has_log_prob = hasattr(model, "log_prob")
|
125
|
-
self.sequential = sequential
|
126
|
-
self.log_prob = log_prob
|
125
|
+
self.has_log_prob: bool = hasattr(model, "log_prob")
|
126
|
+
self.sequential: bool = sequential
|
127
|
+
self.log_prob: Callable | None = log_prob
|
127
128
|
|
128
129
|
self._ref_score: OODScoreOutput
|
129
130
|
self._threshold_perc: float
|
@@ -138,8 +139,12 @@ class OOD_LLR(OODBase):
|
|
138
139
|
epochs: int = 20,
|
139
140
|
batch_size: int = 64,
|
140
141
|
verbose: bool = True,
|
141
|
-
mutate_fn: Callable =
|
142
|
-
mutate_fn_kwargs: dict
|
142
|
+
mutate_fn: Callable = _mutate_categorical,
|
143
|
+
mutate_fn_kwargs: dict[str, float | int | tuple[int, int]] = {
|
144
|
+
"rate": 0.2,
|
145
|
+
"seed": 0,
|
146
|
+
"feature_range": (0, 255),
|
147
|
+
},
|
143
148
|
mutate_batch_size: int = int(1e10),
|
144
149
|
) -> None:
|
145
150
|
"""
|
@@ -200,11 +205,11 @@ class OOD_LLR(OODBase):
|
|
200
205
|
|
201
206
|
if use_build:
|
202
207
|
# build and train semantic model
|
203
|
-
self.model_s =
|
208
|
+
self.model_s: keras.Model = _build_model(self.dist_s, input_shape)[0]
|
204
209
|
self.model_s.compile(optimizer=optimizer_s)
|
205
210
|
self.model_s.fit(X, **kwargs)
|
206
211
|
# build and train background model
|
207
|
-
self.model_b =
|
212
|
+
self.model_b: keras.Model = _build_model(self.dist_b, input_shape)[0]
|
208
213
|
self.model_b.compile(optimizer=optimizer_b)
|
209
214
|
self.model_b.fit(X_back, **kwargs)
|
210
215
|
else:
|
@@ -280,8 +285,7 @@ class OOD_LLR(OODBase):
|
|
280
285
|
logp_b = logp_fn(self.dist_b, X, return_per_feature=return_per_feature, batch_size=batch_size)
|
281
286
|
return logp_s - logp_b
|
282
287
|
|
283
|
-
|
284
|
-
def score(
|
288
|
+
def _score(
|
285
289
|
self,
|
286
290
|
X: ArrayLike,
|
287
291
|
batch_size: int = int(1e10),
|
@@ -0,0 +1,99 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import numbers
|
4
|
+
import warnings
|
5
|
+
from typing import Any, Mapping
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
from numpy.typing import NDArray
|
9
|
+
from scipy.stats import iqr, ks_2samp
|
10
|
+
from scipy.stats import wasserstein_distance as emd
|
11
|
+
|
12
|
+
|
13
|
+
def meta_distribution_compare(
|
14
|
+
md0: Mapping[str, list[Any] | NDArray[Any]], md1: Mapping[str, list[Any] | NDArray[Any]]
|
15
|
+
) -> dict[str, dict[str, float]]:
|
16
|
+
"""Measures the featurewise distance between two metadata distributions, and computes a p-value to evaluate its
|
17
|
+
significance.
|
18
|
+
|
19
|
+
Uses the Earth Mover's Distance and the Kolmogorov-Smirnov two-sample test, featurewise.
|
20
|
+
|
21
|
+
Parameters
|
22
|
+
----------
|
23
|
+
md0 : Mapping[str, list[Any] | NDArray[Any]]
|
24
|
+
A set of arrays of values, indexed by metadata feature names, with one value per data example per feature.
|
25
|
+
md1 : Mapping[str, list[Any] | NDArray[Any]]
|
26
|
+
Another set of arrays of values, indexed by metadata feature names, with one value per data example per
|
27
|
+
feature.
|
28
|
+
|
29
|
+
Returns
|
30
|
+
-------
|
31
|
+
dict[str, KstestResult]
|
32
|
+
A dictionary with keys corresponding to metadata feature names, and values that are KstestResult objects, as
|
33
|
+
defined by scipy.stats.ks_2samp. These values also have two additional attributes: shift_magnitude and
|
34
|
+
statistic_location. The first is the Earth Mover's Distance normalized by the interquartile range (IQR) of
|
35
|
+
the reference, while the second is the value at which the KS statistic has its maximum, measured in
|
36
|
+
IQR-normalized units relative to the median of the reference distribution.
|
37
|
+
|
38
|
+
Examples
|
39
|
+
--------
|
40
|
+
Imagine we have 3 data examples, and that the corresponding metadata contains 2 features called time and
|
41
|
+
altitude.
|
42
|
+
|
43
|
+
>>> import numpy
|
44
|
+
>>> md0 = {"time": [1.2, 3.4, 5.6], "altitude": [235, 6789, 101112]}
|
45
|
+
>>> md1 = {"time": [7.8, 9.10, 11.12], "altitude": [532, 9876, 211101]}
|
46
|
+
>>> md_out = meta_distribution_compare(md0, md1)
|
47
|
+
>>> for k, v in md_out.items():
|
48
|
+
>>> print(k)
|
49
|
+
>>> for kv in v:
|
50
|
+
>>> print("\t", f"{kv}: {v[kv]:.3f}")
|
51
|
+
time
|
52
|
+
statistic_location: 0.444
|
53
|
+
shift_magnitude: 2.700
|
54
|
+
pvalue: 0.000
|
55
|
+
altitude
|
56
|
+
statistic_location: 0.478
|
57
|
+
shift_magnitude: 0.749
|
58
|
+
pvalue: 0.944
|
59
|
+
"""
|
60
|
+
|
61
|
+
if (metadata_keys := md0.keys()) != md1.keys():
|
62
|
+
raise ValueError(f"Both sets of metadata keys must be identical: {list(md0)}, {list(md1)}")
|
63
|
+
|
64
|
+
mdc_dict = {} # output dict
|
65
|
+
for k in metadata_keys:
|
66
|
+
mdc_dict.update({k: {}})
|
67
|
+
|
68
|
+
x0, x1 = list(md0[k]), list(md1[k])
|
69
|
+
|
70
|
+
allx = x0 + x1 # "+" sign concatenates lists.
|
71
|
+
|
72
|
+
if not all(isinstance(allxi, numbers.Number) for allxi in allx): # NB: np.nan *is* a number in this context.
|
73
|
+
continue # non-numeric features will return an empty dict for feature k
|
74
|
+
|
75
|
+
# from Numerical Recipes in C, 3rd ed. p. 737. If too few points, warn and keep going.
|
76
|
+
if np.sqrt(((N := len(x0)) * (M := len(x1))) / (N + M)) < 4:
|
77
|
+
warnings.warn(
|
78
|
+
f"Sample sizes of {N}, {M} for feature {k} will yield unreliable p-values from the KS test.",
|
79
|
+
UserWarning,
|
80
|
+
)
|
81
|
+
|
82
|
+
xmin, xmax = min(allx), max(allx)
|
83
|
+
if xmin == xmax: # only one value in this feature, so fill in the obvious results for feature k
|
84
|
+
mdc_dict[k].update({"statistic_location": 0.0, "shift_magnitude": 0.0, "pvalue": 1.0})
|
85
|
+
continue
|
86
|
+
|
87
|
+
ks_result = ks_2samp(x0, x1, method="asymp")
|
88
|
+
dev = ks_result.statistic_location - xmin # pyright: ignore (KSresult type)
|
89
|
+
loc = dev / (xmax - xmin) if xmax > xmin else dev
|
90
|
+
|
91
|
+
dX = iqr(x0) # preferred value of dX, which is the scale of the the md0 values for feature k
|
92
|
+
dX = (max(x0) - min(x0)) / 2.0 if dX == 0 else dX # reasonable alternative value of dX, when iqr is zero.
|
93
|
+
dX = 1.0 if dX == 0 else dX # if dX is *still* zero, just avoid division by zero this way
|
94
|
+
|
95
|
+
drift = emd(x0, x1) / dX
|
96
|
+
|
97
|
+
mdc_dict[k].update({"statistic_location": loc, "shift_magnitude": drift, "pvalue": ks_result.pvalue}) # pyright: ignore
|
98
|
+
|
99
|
+
return mdc_dict
|
@@ -0,0 +1,119 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import numbers
|
4
|
+
import warnings
|
5
|
+
from typing import Any
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
from numpy.typing import NDArray
|
9
|
+
|
10
|
+
|
11
|
+
def get_least_likely_features(
|
12
|
+
metadata: dict[str, list[Any] | NDArray[Any]],
|
13
|
+
new_metadata: dict[str, list[Any] | NDArray[Any]],
|
14
|
+
is_ood: NDArray[np.bool_],
|
15
|
+
) -> list[tuple[str, float]]:
|
16
|
+
"""Computes which metadata feature is most out-of-distribution (OOD) relative to a reference metadata set.
|
17
|
+
|
18
|
+
Given a reference metadata dictionary `metadata` (where each key maps to one scalar metadata feature), a second
|
19
|
+
metadata dictionary, and a corresponding boolean flag `is_ood` indicating whether each new example falls
|
20
|
+
out-of-distribution (OOD) relative to the reference, this function finds which metadata feature is the most OOD,
|
21
|
+
for each OOD example.
|
22
|
+
|
23
|
+
Parameters
|
24
|
+
----------
|
25
|
+
metadata: dict[str, list[Any] | NDArray[Any]]
|
26
|
+
A reference set of arrays of values, indexed by metadata feature names, with one value per data example per
|
27
|
+
feature.
|
28
|
+
new_metadata: dict[str, list[Any] | NDArray[Any]]
|
29
|
+
A second metadata set, to be tested against the reference metadata. It is ok if the two meta data objects
|
30
|
+
hold different numbers of examples.
|
31
|
+
is_ood: NDArray[np.bool_]
|
32
|
+
A boolean array, with one value per new_metadata example, that indicates which examples are OOD.
|
33
|
+
|
34
|
+
Returns
|
35
|
+
-------
|
36
|
+
list[tuple[str, float]]
|
37
|
+
An array of names of the features of each OOD new_metadata example that were the most OOD.
|
38
|
+
|
39
|
+
Examples
|
40
|
+
--------
|
41
|
+
Imagine we have 3 data examples, and that the corresponding metadata contains 2 features called time and
|
42
|
+
altitude, as shown below.
|
43
|
+
|
44
|
+
>>> from dataeval._internal.metrics.metadata_least_likely import get_least_likely_features
|
45
|
+
>>> import numpy
|
46
|
+
>>> metadata = {"time": [1.2, 3.4, 5.6], "altitude": [235, 6789, 101112]}
|
47
|
+
>>> new_metadata = {"time": [7.8, 11.12], "altitude": [532, -211101]}
|
48
|
+
>>> is_ood = numpy.array([True, True])
|
49
|
+
>>> get_least_likely_features(metadata, new_metadata, is_ood)
|
50
|
+
[('time', 2.0), ('altitude', 33.245346)]
|
51
|
+
"""
|
52
|
+
# Raise errors for bad inputs...
|
53
|
+
|
54
|
+
if metadata.keys() != new_metadata.keys():
|
55
|
+
raise ValueError(f"Reference and test metadata keys must be identical: {list(metadata)}, {list(new_metadata)}")
|
56
|
+
|
57
|
+
md_lengths = {len(np.atleast_1d(v)) for v in metadata.values()}
|
58
|
+
new_md_lengths = {len(np.atleast_1d(v)) for v in new_metadata.values()}
|
59
|
+
if len(md_lengths) > 1 or len(new_md_lengths) > 1:
|
60
|
+
raise ValueError(f"All features must have same length, got lengths {md_lengths}, {new_md_lengths}")
|
61
|
+
|
62
|
+
n_reference, n_new = md_lengths.pop(), new_md_lengths.pop() # possibly different numbers of metadata examples
|
63
|
+
|
64
|
+
if n_new != len(is_ood):
|
65
|
+
raise ValueError(f"is_ood flag must have same length as new metadata {n_new} but has length {len(is_ood)}.")
|
66
|
+
|
67
|
+
if n_reference < 3: # too hard to define "in-distribution" with this few reference samples.
|
68
|
+
warnings.warn(
|
69
|
+
"We need at least 3 reference metadata examples to determine which "
|
70
|
+
f"features are least likely, but only got {n_reference}",
|
71
|
+
UserWarning,
|
72
|
+
)
|
73
|
+
return []
|
74
|
+
|
75
|
+
if not any(is_ood):
|
76
|
+
return []
|
77
|
+
|
78
|
+
# ...inputs are good, look for most deviant standardized features.
|
79
|
+
|
80
|
+
# largest standardized absolute deviation from the median observed so far for each example
|
81
|
+
deviation = np.zeros_like(is_ood, dtype=np.float32)
|
82
|
+
|
83
|
+
# name of feature that corresponds to `deviation` for each example
|
84
|
+
kmax = np.empty(len(is_ood), dtype=object)
|
85
|
+
|
86
|
+
for k, v in metadata.items():
|
87
|
+
# exclude cases where random happens to be out on tails, not interesting.
|
88
|
+
if k == "random":
|
89
|
+
continue
|
90
|
+
|
91
|
+
# Skip non-numerical features
|
92
|
+
if not all(isinstance(vi, numbers.Number) for vi in v): # NB: np.nan *is* a number in this context.
|
93
|
+
continue
|
94
|
+
|
95
|
+
# Get standardization parameters from metadata
|
96
|
+
loc = np.median(v) # ok, because we checked all were numeric
|
97
|
+
dev = np.asarray(v) - loc # need to make array from v since it could be a list here.
|
98
|
+
posdev, negdev = dev[dev > 0], dev[dev < 0]
|
99
|
+
pos_scale = np.median(posdev) if posdev.any() else 1.0
|
100
|
+
neg_scale = np.abs(np.median(negdev)) if negdev.any() else 1.0
|
101
|
+
|
102
|
+
x, x0, dxp, dxn = np.atleast_1d(new_metadata[k]), loc, pos_scale, neg_scale # just abbreviations
|
103
|
+
dxp = dxp if dxp > 0 else 1.0 # avoids dividing by zero below
|
104
|
+
dxn = dxn if dxn > 0 else 1.0
|
105
|
+
|
106
|
+
# xdev must be floating-point to avoid getting zero in an integer division.
|
107
|
+
xdev = (x - x0).astype(np.float64)
|
108
|
+
pos = xdev >= 0
|
109
|
+
|
110
|
+
X = np.zeros_like(xdev)
|
111
|
+
X[pos], X[~pos] = xdev[pos] / dxp, xdev[~pos] / dxn # keeping track of possible asymmetry of x, but...
|
112
|
+
# ...below here, only need to think about absolute deviation.
|
113
|
+
|
114
|
+
abig = np.abs(X) > deviation
|
115
|
+
kmax[abig] = k
|
116
|
+
deviation[abig] = np.abs(X[abig])
|
117
|
+
|
118
|
+
unlikely_features = list(zip(kmax[is_ood], deviation[is_ood])) # feature names, along with how far out they are.
|
119
|
+
return unlikely_features
|
@@ -0,0 +1,92 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import numbers
|
4
|
+
import warnings
|
5
|
+
from typing import Any
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
from numpy.typing import NDArray
|
9
|
+
from sklearn.feature_selection import mutual_info_classif
|
10
|
+
|
11
|
+
# NATS2BITS is the reciprocal of natural log of 2. If you have an information/entropy-type quantity measured in nats,
|
12
|
+
# which is what many library functions return, multiply it by NATS2BITS to get it in bits.
|
13
|
+
NATS2BITS = 1.442695
|
14
|
+
|
15
|
+
|
16
|
+
def get_metadata_ood_mi(
|
17
|
+
metadata: dict[str, list[Any] | NDArray[Any]],
|
18
|
+
is_ood: NDArray[np.bool_],
|
19
|
+
discrete_features: str | bool | NDArray[np.bool_] = False,
|
20
|
+
random_state: int | None = None,
|
21
|
+
) -> dict[str, float]:
|
22
|
+
"""Computes mutual information between a set of metadata features and an out-of-distribution flag.
|
23
|
+
|
24
|
+
Given a metadata dictionary `metadata` (where each key maps to one scalar metadata feature per example), and a
|
25
|
+
corresponding boolean flag `is_ood` indicating whether each example falls out-of-distribution (OOD) relative to a
|
26
|
+
reference dataset, this function finds the strength of association between each metadata feature and `is_ood` by
|
27
|
+
computing their mutual information. Metadata features may be either discrete or continuous; set the
|
28
|
+
`discrete_features` keyword to a bool array set to True for each feature that is discrete, or pass one bool to apply
|
29
|
+
to all features. Returns a dict indicating the strength of association between each individual feature and the OOD
|
30
|
+
flag, measured in bits.
|
31
|
+
|
32
|
+
Parameters
|
33
|
+
----------
|
34
|
+
metadata : dict[str, list[Any] | NDArray[Any]]
|
35
|
+
A set of arrays of values, indexed by metadata feature names, with one value per data example per feature.
|
36
|
+
is_ood : NDArray[np.bool_]
|
37
|
+
A boolean array, with one value per example, that indicates which examples are OOD.
|
38
|
+
discrete_features : str | bool | NDArray[np.bool_]
|
39
|
+
Either a boolean array or a single boolean value, indicate which features take on discrete values.
|
40
|
+
random_state : int, optional - default None
|
41
|
+
Determines random number generation for small noise added to continuous variables. Set to a value for
|
42
|
+
reproducible results.
|
43
|
+
|
44
|
+
Returns
|
45
|
+
-------
|
46
|
+
dict[str, float]
|
47
|
+
A dictionary with keys corresponding to metadata feature names, and values indicating the strength of
|
48
|
+
association between each named feature and the OOD flag, as mutual information measured in bits.
|
49
|
+
|
50
|
+
Examples
|
51
|
+
--------
|
52
|
+
Imagine we have 3 data examples, and that the corresponding metadata contains 2 features called time and altitude.
|
53
|
+
|
54
|
+
>>> import numpy
|
55
|
+
>>> metadata = {"time": numpy.linspace(0, 10, 100), "altitude": numpy.linspace(0, 16, 100) ** 2}
|
56
|
+
>>> is_ood = metadata["altitude"] > 100
|
57
|
+
>>> print(get_metadata_ood_mi(metadata, is_ood, discrete_features=False))
|
58
|
+
{'time': 0.933074285817367, 'altitude': 0.9407686591507002}
|
59
|
+
"""
|
60
|
+
numerical_keys = [k for k, v in metadata.items() if all(isinstance(vi, numbers.Number) for vi in v)]
|
61
|
+
if len(numerical_keys) < len(metadata):
|
62
|
+
warnings.warn(
|
63
|
+
f"Processing {numerical_keys}, others are non-numerical and will be skipped.",
|
64
|
+
UserWarning,
|
65
|
+
)
|
66
|
+
|
67
|
+
md_lengths = {len(np.atleast_1d(v)) for v in metadata.values()}
|
68
|
+
if len(md_lengths) > 1:
|
69
|
+
raise ValueError(f"Metadata features have differing sizes: {md_lengths}")
|
70
|
+
|
71
|
+
if len(is_ood) != (mdl := md_lengths.pop()):
|
72
|
+
raise ValueError(
|
73
|
+
f"OOD flag and metadata features need to be same size, but are different sizes: {len(is_ood)} and {mdl}."
|
74
|
+
)
|
75
|
+
|
76
|
+
X = np.array([metadata[k] for k in numerical_keys]).T
|
77
|
+
|
78
|
+
X0, dX = np.mean(X, axis=0), np.std(X, axis=0, ddof=1)
|
79
|
+
Xscl = (X - X0) / dX
|
80
|
+
|
81
|
+
mutual_info_values = (
|
82
|
+
mutual_info_classif(
|
83
|
+
Xscl,
|
84
|
+
is_ood,
|
85
|
+
discrete_features=discrete_features, # type: ignore
|
86
|
+
random_state=random_state,
|
87
|
+
)
|
88
|
+
* NATS2BITS
|
89
|
+
)
|
90
|
+
|
91
|
+
mi_dict = {k: mutual_info_values[i] for i, k in enumerate(numerical_keys)}
|
92
|
+
return mi_dict
|
@@ -8,6 +8,8 @@ Licensed under Apache Software License (Apache 2.0)
|
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
|
+
__all__ = ["OOD_VAE"]
|
12
|
+
|
11
13
|
from typing import Callable
|
12
14
|
|
13
15
|
import numpy as np
|
@@ -15,12 +17,11 @@ import tensorflow as tf
|
|
15
17
|
import tf_keras as keras
|
16
18
|
from numpy.typing import ArrayLike
|
17
19
|
|
18
|
-
from dataeval.
|
19
|
-
from dataeval.
|
20
|
-
from dataeval.
|
21
|
-
from dataeval.
|
22
|
-
from dataeval.
|
23
|
-
from dataeval._internal.output import set_metadata
|
20
|
+
from dataeval.detectors.ood.base import OODBase, OODScoreOutput
|
21
|
+
from dataeval.interop import to_numpy
|
22
|
+
from dataeval.utils.tensorflow._internal.autoencoder import VAE
|
23
|
+
from dataeval.utils.tensorflow._internal.loss import Elbo
|
24
|
+
from dataeval.utils.tensorflow._internal.utils import predict_batch
|
24
25
|
|
25
26
|
|
26
27
|
class OOD_VAE(OODBase):
|
@@ -38,7 +39,7 @@ class OOD_VAE(OODBase):
|
|
38
39
|
--------
|
39
40
|
Instantiate an OOD detector metric with a generic dataset - batch of images with shape (3,25,25)
|
40
41
|
|
41
|
-
>>> metric = OOD_VAE(create_model(VAE, dataset[0].shape))
|
42
|
+
>>> metric = OOD_VAE(create_model("VAE", dataset[0].shape))
|
42
43
|
|
43
44
|
Adjusting fit parameters,
|
44
45
|
including setting the fit threshold at 85% for a training set with about 15% out-of-distribution
|
@@ -58,18 +59,15 @@ class OOD_VAE(OODBase):
|
|
58
59
|
self,
|
59
60
|
x_ref: ArrayLike,
|
60
61
|
threshold_perc: float = 100.0,
|
61
|
-
loss_fn: Callable[..., tf.Tensor]
|
62
|
+
loss_fn: Callable[..., tf.Tensor] = Elbo(0.05),
|
62
63
|
optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
|
63
64
|
epochs: int = 20,
|
64
65
|
batch_size: int = 64,
|
65
66
|
verbose: bool = True,
|
66
67
|
) -> None:
|
67
|
-
if loss_fn is None:
|
68
|
-
loss_fn = Elbo(0.05)
|
69
68
|
super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
|
70
69
|
|
71
|
-
|
72
|
-
def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
|
70
|
+
def _score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
|
73
71
|
self._validate(X := to_numpy(X))
|
74
72
|
|
75
73
|
# sample reconstructed instances
|
@@ -8,6 +8,8 @@ Licensed under Apache Software License (Apache 2.0)
|
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
|
+
__all__ = ["OOD_VAEGMM"]
|
12
|
+
|
11
13
|
from typing import Callable
|
12
14
|
|
13
15
|
import numpy as np
|
@@ -15,13 +17,12 @@ import tensorflow as tf
|
|
15
17
|
import tf_keras as keras
|
16
18
|
from numpy.typing import ArrayLike
|
17
19
|
|
18
|
-
from dataeval.
|
19
|
-
from dataeval.
|
20
|
-
from dataeval.
|
21
|
-
from dataeval.
|
22
|
-
from dataeval.
|
23
|
-
from dataeval.
|
24
|
-
from dataeval._internal.output import set_metadata
|
20
|
+
from dataeval.detectors.ood.base import OODGMMBase, OODScoreOutput
|
21
|
+
from dataeval.interop import to_numpy
|
22
|
+
from dataeval.utils.tensorflow._internal.autoencoder import VAEGMM
|
23
|
+
from dataeval.utils.tensorflow._internal.gmm import gmm_energy
|
24
|
+
from dataeval.utils.tensorflow._internal.loss import Elbo, LossGMM
|
25
|
+
from dataeval.utils.tensorflow._internal.utils import predict_batch
|
25
26
|
|
26
27
|
|
27
28
|
class OOD_VAEGMM(OODGMMBase):
|
@@ -44,38 +45,15 @@ class OOD_VAEGMM(OODGMMBase):
|
|
44
45
|
self,
|
45
46
|
x_ref: ArrayLike,
|
46
47
|
threshold_perc: float = 100.0,
|
47
|
-
loss_fn: Callable[..., tf.Tensor]
|
48
|
+
loss_fn: Callable[..., tf.Tensor] = LossGMM(elbo=Elbo(0.05)),
|
48
49
|
optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
|
49
50
|
epochs: int = 20,
|
50
51
|
batch_size: int = 64,
|
51
52
|
verbose: bool = True,
|
52
53
|
) -> None:
|
53
|
-
if loss_fn is None:
|
54
|
-
loss_fn = LossGMM(elbo=Elbo(0.05))
|
55
54
|
super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
|
56
55
|
|
57
|
-
|
58
|
-
def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
|
59
|
-
"""
|
60
|
-
Compute the out of distributuion<Out-of-distribution (OOD)>` score for a given dataset.
|
61
|
-
|
62
|
-
Parameters
|
63
|
-
----------
|
64
|
-
X : ArrayLike
|
65
|
-
Input data to score.
|
66
|
-
batch_size : int, default 1e10
|
67
|
-
Number of instances to process in each batch.
|
68
|
-
Use a smaller batch size if your dataset is large or if you encounter memory issues.
|
69
|
-
|
70
|
-
Returns
|
71
|
-
-------
|
72
|
-
OODScoreOutput
|
73
|
-
An object containing the instance-level OOD score.
|
74
|
-
|
75
|
-
Note
|
76
|
-
----
|
77
|
-
This model does not produce a feature level score like the OOD_AE or OOD_VAE models.
|
78
|
-
"""
|
56
|
+
def _score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
|
79
57
|
self._validate(X := to_numpy(X))
|
80
58
|
|
81
59
|
# draw samples from latent space
|
@@ -1,43 +1,47 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
__all__ = ["as_numpy", "to_numpy", "to_numpy_iter"]
|
4
|
+
|
3
5
|
from importlib import import_module
|
4
6
|
from typing import Any, Iterable, Iterator
|
5
7
|
|
6
8
|
import numpy as np
|
7
9
|
from numpy.typing import ArrayLike, NDArray
|
8
10
|
|
9
|
-
|
11
|
+
_MODULE_CACHE = {}
|
10
12
|
|
11
13
|
|
12
|
-
def
|
13
|
-
if module_name in
|
14
|
-
return
|
14
|
+
def _try_import(module_name):
|
15
|
+
if module_name in _MODULE_CACHE:
|
16
|
+
return _MODULE_CACHE[module_name]
|
15
17
|
|
16
18
|
try:
|
17
19
|
module = import_module(module_name)
|
18
20
|
except ImportError: # pragma: no cover - covered by test_mindeps.py
|
19
21
|
module = None
|
20
22
|
|
21
|
-
|
23
|
+
_MODULE_CACHE[module_name] = module
|
22
24
|
return module
|
23
25
|
|
24
26
|
|
25
27
|
def as_numpy(array: ArrayLike | None) -> NDArray[Any]:
|
28
|
+
"""Converts an ArrayLike to Numpy array without copying (if possible)"""
|
26
29
|
return to_numpy(array, copy=False)
|
27
30
|
|
28
31
|
|
29
32
|
def to_numpy(array: ArrayLike | None, copy: bool = True) -> NDArray[Any]:
|
33
|
+
"""Converts an ArrayLike to new Numpy array"""
|
30
34
|
if array is None:
|
31
35
|
return np.ndarray([])
|
32
36
|
|
33
37
|
if isinstance(array, np.ndarray):
|
34
38
|
return array.copy() if copy else array
|
35
39
|
|
36
|
-
tf =
|
40
|
+
tf = _try_import("tensorflow")
|
37
41
|
if tf and tf.is_tensor(array):
|
38
42
|
return array.numpy().copy() if copy else array.numpy() # type: ignore
|
39
43
|
|
40
|
-
torch =
|
44
|
+
torch = _try_import("torch")
|
41
45
|
if torch and isinstance(array, torch.Tensor):
|
42
46
|
return array.detach().cpu().numpy().copy() if copy else array.detach().cpu().numpy() # type: ignore
|
43
47
|
|
@@ -45,5 +49,6 @@ def to_numpy(array: ArrayLike | None, copy: bool = True) -> NDArray[Any]:
|
|
45
49
|
|
46
50
|
|
47
51
|
def to_numpy_iter(iterable: Iterable[ArrayLike]) -> Iterator[NDArray[Any]]:
|
52
|
+
"""Yields an iterator of numpy arrays from an ArrayLike"""
|
48
53
|
for array in iterable:
|
49
54
|
yield to_numpy(array)
|
dataeval/metrics/__init__.py
CHANGED
@@ -3,6 +3,6 @@ Metrics are a way to measure the performance of your models or datasets that
|
|
3
3
|
can then be analyzed in the context of a given problem.
|
4
4
|
"""
|
5
5
|
|
6
|
-
from . import bias, estimators, stats
|
6
|
+
from dataeval.metrics import bias, estimators, stats
|
7
7
|
|
8
8
|
__all__ = ["bias", "estimators", "stats"]
|
@@ -3,10 +3,10 @@ Bias metrics check for skewed or imbalanced datasets and incomplete feature
|
|
3
3
|
representation which may impact model performance.
|
4
4
|
"""
|
5
5
|
|
6
|
-
from dataeval.
|
7
|
-
from dataeval.
|
8
|
-
from dataeval.
|
9
|
-
from dataeval.
|
6
|
+
from dataeval.metrics.bias.balance import BalanceOutput, balance
|
7
|
+
from dataeval.metrics.bias.coverage import CoverageOutput, coverage
|
8
|
+
from dataeval.metrics.bias.diversity import DiversityOutput, diversity
|
9
|
+
from dataeval.metrics.bias.parity import ParityOutput, label_parity, parity
|
10
10
|
|
11
11
|
__all__ = [
|
12
12
|
"balance",
|