dataeval 0.72.1__py3-none-any.whl → 0.73.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +4 -4
- dataeval/detectors/__init__.py +4 -3
- dataeval/detectors/drift/__init__.py +9 -10
- dataeval/{_internal/detectors → detectors}/drift/base.py +39 -91
- dataeval/{_internal/detectors → detectors}/drift/cvm.py +4 -3
- dataeval/{_internal/detectors → detectors}/drift/ks.py +4 -3
- dataeval/{_internal/detectors → detectors}/drift/mmd.py +23 -25
- dataeval/{_internal/detectors → detectors}/drift/torch.py +13 -11
- dataeval/{_internal/detectors → detectors}/drift/uncertainty.py +7 -5
- dataeval/detectors/drift/updates.py +61 -0
- dataeval/detectors/linters/__init__.py +3 -3
- dataeval/{_internal/detectors → detectors/linters}/clusterer.py +41 -39
- dataeval/{_internal/detectors → detectors/linters}/duplicates.py +19 -9
- dataeval/{_internal/detectors → detectors/linters}/merged_stats.py +3 -1
- dataeval/{_internal/detectors → detectors/linters}/outliers.py +14 -21
- dataeval/detectors/ood/__init__.py +6 -6
- dataeval/{_internal/detectors → detectors}/ood/ae.py +20 -12
- dataeval/detectors/ood/aegmm.py +66 -0
- dataeval/{_internal/detectors → detectors}/ood/base.py +33 -21
- dataeval/{_internal/detectors → detectors}/ood/llr.py +43 -33
- dataeval/detectors/ood/metadata_ks_compare.py +99 -0
- dataeval/detectors/ood/metadata_least_likely.py +119 -0
- dataeval/detectors/ood/metadata_ood_mi.py +92 -0
- dataeval/{_internal/detectors → detectors}/ood/vae.py +23 -17
- dataeval/detectors/ood/vaegmm.py +75 -0
- dataeval/interop.py +56 -0
- dataeval/metrics/__init__.py +1 -1
- dataeval/metrics/bias/__init__.py +4 -4
- dataeval/{_internal/metrics → metrics/bias}/balance.py +75 -13
- dataeval/{_internal/metrics → metrics/bias}/coverage.py +41 -7
- dataeval/{_internal/metrics → metrics/bias}/diversity.py +75 -18
- dataeval/metrics/bias/metadata.py +358 -0
- dataeval/{_internal/metrics → metrics/bias}/parity.py +54 -44
- dataeval/metrics/estimators/__init__.py +3 -3
- dataeval/{_internal/metrics → metrics/estimators}/ber.py +25 -22
- dataeval/{_internal/metrics → metrics/estimators}/divergence.py +11 -12
- dataeval/{_internal/metrics → metrics/estimators}/uap.py +5 -3
- dataeval/metrics/stats/__init__.py +7 -7
- dataeval/{_internal/metrics → metrics}/stats/base.py +59 -35
- dataeval/{_internal/metrics → metrics}/stats/boxratiostats.py +18 -14
- dataeval/{_internal/metrics → metrics}/stats/datasetstats.py +18 -16
- dataeval/{_internal/metrics → metrics}/stats/dimensionstats.py +9 -7
- dataeval/metrics/stats/hashstats.py +156 -0
- dataeval/{_internal/metrics → metrics}/stats/labelstats.py +5 -3
- dataeval/{_internal/metrics → metrics}/stats/pixelstats.py +9 -8
- dataeval/{_internal/metrics → metrics}/stats/visualstats.py +10 -9
- dataeval/{_internal/output.py → output.py} +26 -6
- dataeval/utils/__init__.py +8 -3
- dataeval/utils/image.py +71 -0
- dataeval/utils/lazy.py +26 -0
- dataeval/utils/metadata.py +258 -0
- dataeval/utils/shared.py +151 -0
- dataeval/{_internal → utils}/split_dataset.py +98 -33
- dataeval/utils/tensorflow/__init__.py +7 -6
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/gmm.py +8 -2
- dataeval/{_internal/models/tensorflow/losses.py → utils/tensorflow/_internal/loss.py} +28 -18
- dataeval/{_internal/models/tensorflow/pixelcnn.py → utils/tensorflow/_internal/models.py} +387 -97
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/trainer.py +15 -6
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/utils.py +84 -85
- dataeval/utils/tensorflow/loss/__init__.py +6 -2
- dataeval/utils/torch/__init__.py +7 -3
- dataeval/{_internal/models/pytorch → utils/torch}/blocks.py +19 -14
- dataeval/{_internal → utils/torch}/datasets.py +48 -42
- dataeval/utils/torch/models.py +138 -0
- dataeval/{_internal/models/pytorch/autoencoder.py → utils/torch/trainer.py} +7 -136
- dataeval/{_internal → utils/torch}/utils.py +3 -1
- dataeval/workflows/__init__.py +1 -1
- dataeval/{_internal/workflows → workflows}/sufficiency.py +39 -34
- {dataeval-0.72.1.dist-info → dataeval-0.73.0.dist-info}/METADATA +4 -3
- dataeval-0.73.0.dist-info/RECORD +73 -0
- dataeval/_internal/detectors/__init__.py +0 -0
- dataeval/_internal/detectors/drift/__init__.py +0 -0
- dataeval/_internal/detectors/ood/__init__.py +0 -0
- dataeval/_internal/detectors/ood/aegmm.py +0 -78
- dataeval/_internal/detectors/ood/vaegmm.py +0 -89
- dataeval/_internal/interop.py +0 -49
- dataeval/_internal/metrics/__init__.py +0 -0
- dataeval/_internal/metrics/stats/hashstats.py +0 -75
- dataeval/_internal/metrics/utils.py +0 -447
- dataeval/_internal/models/__init__.py +0 -0
- dataeval/_internal/models/pytorch/__init__.py +0 -0
- dataeval/_internal/models/pytorch/utils.py +0 -67
- dataeval/_internal/models/tensorflow/__init__.py +0 -0
- dataeval/_internal/models/tensorflow/autoencoder.py +0 -320
- dataeval/_internal/workflows/__init__.py +0 -0
- dataeval/detectors/drift/kernels/__init__.py +0 -10
- dataeval/detectors/drift/updates/__init__.py +0 -8
- dataeval/utils/tensorflow/models/__init__.py +0 -9
- dataeval/utils/tensorflow/recon/__init__.py +0 -3
- dataeval/utils/torch/datasets/__init__.py +0 -12
- dataeval/utils/torch/models/__init__.py +0 -11
- dataeval/utils/torch/trainer/__init__.py +0 -7
- dataeval-0.72.1.dist-info/RECORD +0 -81
- {dataeval-0.72.1.dist-info → dataeval-0.73.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.72.1.dist-info → dataeval-0.73.0.dist-info}/WHEEL +0 -0
@@ -8,27 +8,34 @@ Licensed under Apache Software License (Apache 2.0)
|
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
|
+
__all__ = ["OOD_LLR"]
|
12
|
+
|
11
13
|
from functools import partial
|
12
|
-
from typing import Callable
|
14
|
+
from typing import TYPE_CHECKING, Callable
|
13
15
|
|
14
16
|
import numpy as np
|
15
|
-
import tensorflow as tf
|
16
|
-
import tf_keras as keras
|
17
17
|
from numpy.typing import ArrayLike, NDArray
|
18
|
-
from tf_keras.layers import Input
|
19
|
-
from tf_keras.models import Model
|
20
18
|
|
21
|
-
from dataeval.
|
22
|
-
from dataeval.
|
23
|
-
from dataeval.
|
24
|
-
from dataeval.
|
25
|
-
from dataeval.
|
26
|
-
|
19
|
+
from dataeval.detectors.ood.base import OODBase, OODScoreOutput
|
20
|
+
from dataeval.interop import to_numpy
|
21
|
+
from dataeval.utils.lazy import lazyload
|
22
|
+
from dataeval.utils.tensorflow._internal.trainer import trainer
|
23
|
+
from dataeval.utils.tensorflow._internal.utils import predict_batch
|
24
|
+
|
25
|
+
if TYPE_CHECKING:
|
26
|
+
import tensorflow as tf
|
27
|
+
import tf_keras as keras
|
28
|
+
|
29
|
+
import dataeval.utils.tensorflow._internal.models as tf_models
|
30
|
+
else:
|
31
|
+
tf = lazyload("tensorflow")
|
32
|
+
keras = lazyload("tf_keras")
|
33
|
+
tf_models = lazyload("dataeval.utils.tensorflow._internal.models")
|
27
34
|
|
28
35
|
|
29
|
-
def
|
30
|
-
dist: PixelCNN, input_shape: tuple | None = None, filepath: str | None = None
|
31
|
-
) -> tuple[keras.Model, PixelCNN]:
|
36
|
+
def _build_model(
|
37
|
+
dist: tf_models.PixelCNN, input_shape: tuple | None = None, filepath: str | None = None
|
38
|
+
) -> tuple[keras.Model, tf_models.PixelCNN]:
|
32
39
|
"""
|
33
40
|
Create keras.Model from TF distribution.
|
34
41
|
|
@@ -45,20 +52,20 @@ def build_model(
|
|
45
52
|
-------
|
46
53
|
TensorFlow model.
|
47
54
|
"""
|
48
|
-
x_in = Input(shape=input_shape)
|
55
|
+
x_in = keras.layers.Input(shape=input_shape)
|
49
56
|
log_prob = dist.log_prob(x_in)
|
50
|
-
model = Model(inputs=x_in, outputs=log_prob)
|
57
|
+
model = keras.models.Model(inputs=x_in, outputs=log_prob)
|
51
58
|
model.add_loss(-tf.reduce_mean(log_prob))
|
52
59
|
if isinstance(filepath, str):
|
53
60
|
model.load_weights(filepath)
|
54
61
|
return model, dist
|
55
62
|
|
56
63
|
|
57
|
-
def
|
64
|
+
def _mutate_categorical(
|
58
65
|
X: NDArray,
|
59
66
|
rate: float,
|
60
67
|
seed: int = 0,
|
61
|
-
feature_range: tuple = (0, 255),
|
68
|
+
feature_range: tuple[int, int] = (0, 255),
|
62
69
|
) -> tf.Tensor:
|
63
70
|
"""
|
64
71
|
Randomly change integer feature values to values within a set range
|
@@ -108,22 +115,22 @@ class OOD_LLR(OODBase):
|
|
108
115
|
|
109
116
|
def __init__(
|
110
117
|
self,
|
111
|
-
model: PixelCNN,
|
112
|
-
model_background: PixelCNN | None = None,
|
118
|
+
model: tf_models.PixelCNN,
|
119
|
+
model_background: tf_models.PixelCNN | None = None,
|
113
120
|
log_prob: Callable | None = None,
|
114
121
|
sequential: bool = False,
|
115
122
|
) -> None:
|
116
|
-
self.dist_s = model
|
117
|
-
self.dist_b = (
|
123
|
+
self.dist_s: tf_models.PixelCNN = model
|
124
|
+
self.dist_b: tf_models.PixelCNN = (
|
118
125
|
model.copy()
|
119
126
|
if hasattr(model, "copy")
|
120
127
|
else keras.models.clone_model(model)
|
121
128
|
if model_background is None
|
122
129
|
else model_background
|
123
130
|
)
|
124
|
-
self.has_log_prob = hasattr(model, "log_prob")
|
125
|
-
self.sequential = sequential
|
126
|
-
self.log_prob = log_prob
|
131
|
+
self.has_log_prob: bool = hasattr(model, "log_prob")
|
132
|
+
self.sequential: bool = sequential
|
133
|
+
self.log_prob: Callable | None = log_prob
|
127
134
|
|
128
135
|
self._ref_score: OODScoreOutput
|
129
136
|
self._threshold_perc: float
|
@@ -134,12 +141,16 @@ class OOD_LLR(OODBase):
|
|
134
141
|
x_ref: ArrayLike,
|
135
142
|
threshold_perc: float = 100.0,
|
136
143
|
loss_fn: Callable | None = None,
|
137
|
-
optimizer: keras.optimizers.Optimizer =
|
144
|
+
optimizer: keras.optimizers.Optimizer | None = None,
|
138
145
|
epochs: int = 20,
|
139
146
|
batch_size: int = 64,
|
140
147
|
verbose: bool = True,
|
141
|
-
mutate_fn: Callable =
|
142
|
-
mutate_fn_kwargs: dict
|
148
|
+
mutate_fn: Callable = _mutate_categorical,
|
149
|
+
mutate_fn_kwargs: dict[str, float | int | tuple[int, int]] = {
|
150
|
+
"rate": 0.2,
|
151
|
+
"seed": 0,
|
152
|
+
"feature_range": (0, 255),
|
153
|
+
},
|
143
154
|
mutate_batch_size: int = int(1e10),
|
144
155
|
) -> None:
|
145
156
|
"""
|
@@ -171,7 +182,7 @@ class OOD_LLR(OODBase):
|
|
171
182
|
"""
|
172
183
|
x_ref = to_numpy(x_ref)
|
173
184
|
input_shape = x_ref.shape[1:]
|
174
|
-
optimizer =
|
185
|
+
optimizer = keras.optimizers.Adam() if optimizer is None else optimizer
|
175
186
|
# Separate into two separate optimizers, one for semantic model and one for background model
|
176
187
|
optimizer_s = optimizer
|
177
188
|
optimizer_b = optimizer.__class__.from_config(optimizer.get_config())
|
@@ -200,11 +211,11 @@ class OOD_LLR(OODBase):
|
|
200
211
|
|
201
212
|
if use_build:
|
202
213
|
# build and train semantic model
|
203
|
-
self.model_s =
|
214
|
+
self.model_s: keras.Model = _build_model(self.dist_s, input_shape)[0]
|
204
215
|
self.model_s.compile(optimizer=optimizer_s)
|
205
216
|
self.model_s.fit(X, **kwargs)
|
206
217
|
# build and train background model
|
207
|
-
self.model_b =
|
218
|
+
self.model_b: keras.Model = _build_model(self.dist_b, input_shape)[0]
|
208
219
|
self.model_b.compile(optimizer=optimizer_b)
|
209
220
|
self.model_b.fit(X_back, **kwargs)
|
210
221
|
else:
|
@@ -280,8 +291,7 @@ class OOD_LLR(OODBase):
|
|
280
291
|
logp_b = logp_fn(self.dist_b, X, return_per_feature=return_per_feature, batch_size=batch_size)
|
281
292
|
return logp_s - logp_b
|
282
293
|
|
283
|
-
|
284
|
-
def score(
|
294
|
+
def _score(
|
285
295
|
self,
|
286
296
|
X: ArrayLike,
|
287
297
|
batch_size: int = int(1e10),
|
@@ -0,0 +1,99 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import numbers
|
4
|
+
import warnings
|
5
|
+
from typing import Any, Mapping
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
from numpy.typing import NDArray
|
9
|
+
from scipy.stats import iqr, ks_2samp
|
10
|
+
from scipy.stats import wasserstein_distance as emd
|
11
|
+
|
12
|
+
|
13
|
+
def meta_distribution_compare(
|
14
|
+
md0: Mapping[str, list[Any] | NDArray[Any]], md1: Mapping[str, list[Any] | NDArray[Any]]
|
15
|
+
) -> dict[str, dict[str, float]]:
|
16
|
+
"""Measures the featurewise distance between two metadata distributions, and computes a p-value to evaluate its
|
17
|
+
significance.
|
18
|
+
|
19
|
+
Uses the Earth Mover's Distance and the Kolmogorov-Smirnov two-sample test, featurewise.
|
20
|
+
|
21
|
+
Parameters
|
22
|
+
----------
|
23
|
+
md0 : Mapping[str, list[Any] | NDArray[Any]]
|
24
|
+
A set of arrays of values, indexed by metadata feature names, with one value per data example per feature.
|
25
|
+
md1 : Mapping[str, list[Any] | NDArray[Any]]
|
26
|
+
Another set of arrays of values, indexed by metadata feature names, with one value per data example per
|
27
|
+
feature.
|
28
|
+
|
29
|
+
Returns
|
30
|
+
-------
|
31
|
+
dict[str, KstestResult]
|
32
|
+
A dictionary with keys corresponding to metadata feature names, and values that are KstestResult objects, as
|
33
|
+
defined by scipy.stats.ks_2samp. These values also have two additional attributes: shift_magnitude and
|
34
|
+
statistic_location. The first is the Earth Mover's Distance normalized by the interquartile range (IQR) of
|
35
|
+
the reference, while the second is the value at which the KS statistic has its maximum, measured in
|
36
|
+
IQR-normalized units relative to the median of the reference distribution.
|
37
|
+
|
38
|
+
Examples
|
39
|
+
--------
|
40
|
+
Imagine we have 3 data examples, and that the corresponding metadata contains 2 features called time and
|
41
|
+
altitude.
|
42
|
+
|
43
|
+
>>> import numpy
|
44
|
+
>>> md0 = {"time": [1.2, 3.4, 5.6], "altitude": [235, 6789, 101112]}
|
45
|
+
>>> md1 = {"time": [7.8, 9.10, 11.12], "altitude": [532, 9876, 211101]}
|
46
|
+
>>> md_out = meta_distribution_compare(md0, md1)
|
47
|
+
>>> for k, v in md_out.items():
|
48
|
+
>>> print(k)
|
49
|
+
>>> for kv in v:
|
50
|
+
>>> print("\t", f"{kv}: {v[kv]:.3f}")
|
51
|
+
time
|
52
|
+
statistic_location: 0.444
|
53
|
+
shift_magnitude: 2.700
|
54
|
+
pvalue: 0.000
|
55
|
+
altitude
|
56
|
+
statistic_location: 0.478
|
57
|
+
shift_magnitude: 0.749
|
58
|
+
pvalue: 0.944
|
59
|
+
"""
|
60
|
+
|
61
|
+
if (metadata_keys := md0.keys()) != md1.keys():
|
62
|
+
raise ValueError(f"Both sets of metadata keys must be identical: {list(md0)}, {list(md1)}")
|
63
|
+
|
64
|
+
mdc_dict = {} # output dict
|
65
|
+
for k in metadata_keys:
|
66
|
+
mdc_dict.update({k: {}})
|
67
|
+
|
68
|
+
x0, x1 = list(md0[k]), list(md1[k])
|
69
|
+
|
70
|
+
allx = x0 + x1 # "+" sign concatenates lists.
|
71
|
+
|
72
|
+
if not all(isinstance(allxi, numbers.Number) for allxi in allx): # NB: np.nan *is* a number in this context.
|
73
|
+
continue # non-numeric features will return an empty dict for feature k
|
74
|
+
|
75
|
+
# from Numerical Recipes in C, 3rd ed. p. 737. If too few points, warn and keep going.
|
76
|
+
if np.sqrt(((N := len(x0)) * (M := len(x1))) / (N + M)) < 4:
|
77
|
+
warnings.warn(
|
78
|
+
f"Sample sizes of {N}, {M} for feature {k} will yield unreliable p-values from the KS test.",
|
79
|
+
UserWarning,
|
80
|
+
)
|
81
|
+
|
82
|
+
xmin, xmax = min(allx), max(allx)
|
83
|
+
if xmin == xmax: # only one value in this feature, so fill in the obvious results for feature k
|
84
|
+
mdc_dict[k].update({"statistic_location": 0.0, "shift_magnitude": 0.0, "pvalue": 1.0})
|
85
|
+
continue
|
86
|
+
|
87
|
+
ks_result = ks_2samp(x0, x1, method="asymp")
|
88
|
+
dev = ks_result.statistic_location - xmin # pyright: ignore (KSresult type)
|
89
|
+
loc = dev / (xmax - xmin) if xmax > xmin else dev
|
90
|
+
|
91
|
+
dX = iqr(x0) # preferred value of dX, which is the scale of the the md0 values for feature k
|
92
|
+
dX = (max(x0) - min(x0)) / 2.0 if dX == 0 else dX # reasonable alternative value of dX, when iqr is zero.
|
93
|
+
dX = 1.0 if dX == 0 else dX # if dX is *still* zero, just avoid division by zero this way
|
94
|
+
|
95
|
+
drift = emd(x0, x1) / dX
|
96
|
+
|
97
|
+
mdc_dict[k].update({"statistic_location": loc, "shift_magnitude": drift, "pvalue": ks_result.pvalue}) # pyright: ignore
|
98
|
+
|
99
|
+
return mdc_dict
|
@@ -0,0 +1,119 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import numbers
|
4
|
+
import warnings
|
5
|
+
from typing import Any
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
from numpy.typing import NDArray
|
9
|
+
|
10
|
+
|
11
|
+
def get_least_likely_features(
|
12
|
+
metadata: dict[str, list[Any] | NDArray[Any]],
|
13
|
+
new_metadata: dict[str, list[Any] | NDArray[Any]],
|
14
|
+
is_ood: NDArray[np.bool_],
|
15
|
+
) -> list[tuple[str, float]]:
|
16
|
+
"""Computes which metadata feature is most out-of-distribution (OOD) relative to a reference metadata set.
|
17
|
+
|
18
|
+
Given a reference metadata dictionary `metadata` (where each key maps to one scalar metadata feature), a second
|
19
|
+
metadata dictionary, and a corresponding boolean flag `is_ood` indicating whether each new example falls
|
20
|
+
out-of-distribution (OOD) relative to the reference, this function finds which metadata feature is the most OOD,
|
21
|
+
for each OOD example.
|
22
|
+
|
23
|
+
Parameters
|
24
|
+
----------
|
25
|
+
metadata: dict[str, list[Any] | NDArray[Any]]
|
26
|
+
A reference set of arrays of values, indexed by metadata feature names, with one value per data example per
|
27
|
+
feature.
|
28
|
+
new_metadata: dict[str, list[Any] | NDArray[Any]]
|
29
|
+
A second metadata set, to be tested against the reference metadata. It is ok if the two meta data objects
|
30
|
+
hold different numbers of examples.
|
31
|
+
is_ood: NDArray[np.bool_]
|
32
|
+
A boolean array, with one value per new_metadata example, that indicates which examples are OOD.
|
33
|
+
|
34
|
+
Returns
|
35
|
+
-------
|
36
|
+
list[tuple[str, float]]
|
37
|
+
An array of names of the features of each OOD new_metadata example that were the most OOD.
|
38
|
+
|
39
|
+
Examples
|
40
|
+
--------
|
41
|
+
Imagine we have 3 data examples, and that the corresponding metadata contains 2 features called time and
|
42
|
+
altitude, as shown below.
|
43
|
+
|
44
|
+
>>> from dataeval._internal.metrics.metadata_least_likely import get_least_likely_features
|
45
|
+
>>> import numpy
|
46
|
+
>>> metadata = {"time": [1.2, 3.4, 5.6], "altitude": [235, 6789, 101112]}
|
47
|
+
>>> new_metadata = {"time": [7.8, 11.12], "altitude": [532, -211101]}
|
48
|
+
>>> is_ood = numpy.array([True, True])
|
49
|
+
>>> get_least_likely_features(metadata, new_metadata, is_ood)
|
50
|
+
[('time', 2.0), ('altitude', 33.245346)]
|
51
|
+
"""
|
52
|
+
# Raise errors for bad inputs...
|
53
|
+
|
54
|
+
if metadata.keys() != new_metadata.keys():
|
55
|
+
raise ValueError(f"Reference and test metadata keys must be identical: {list(metadata)}, {list(new_metadata)}")
|
56
|
+
|
57
|
+
md_lengths = {len(np.atleast_1d(v)) for v in metadata.values()}
|
58
|
+
new_md_lengths = {len(np.atleast_1d(v)) for v in new_metadata.values()}
|
59
|
+
if len(md_lengths) > 1 or len(new_md_lengths) > 1:
|
60
|
+
raise ValueError(f"All features must have same length, got lengths {md_lengths}, {new_md_lengths}")
|
61
|
+
|
62
|
+
n_reference, n_new = md_lengths.pop(), new_md_lengths.pop() # possibly different numbers of metadata examples
|
63
|
+
|
64
|
+
if n_new != len(is_ood):
|
65
|
+
raise ValueError(f"is_ood flag must have same length as new metadata {n_new} but has length {len(is_ood)}.")
|
66
|
+
|
67
|
+
if n_reference < 3: # too hard to define "in-distribution" with this few reference samples.
|
68
|
+
warnings.warn(
|
69
|
+
"We need at least 3 reference metadata examples to determine which "
|
70
|
+
f"features are least likely, but only got {n_reference}",
|
71
|
+
UserWarning,
|
72
|
+
)
|
73
|
+
return []
|
74
|
+
|
75
|
+
if not any(is_ood):
|
76
|
+
return []
|
77
|
+
|
78
|
+
# ...inputs are good, look for most deviant standardized features.
|
79
|
+
|
80
|
+
# largest standardized absolute deviation from the median observed so far for each example
|
81
|
+
deviation = np.zeros_like(is_ood, dtype=np.float32)
|
82
|
+
|
83
|
+
# name of feature that corresponds to `deviation` for each example
|
84
|
+
kmax = np.empty(len(is_ood), dtype=object)
|
85
|
+
|
86
|
+
for k, v in metadata.items():
|
87
|
+
# exclude cases where random happens to be out on tails, not interesting.
|
88
|
+
if k == "random":
|
89
|
+
continue
|
90
|
+
|
91
|
+
# Skip non-numerical features
|
92
|
+
if not all(isinstance(vi, numbers.Number) for vi in v): # NB: np.nan *is* a number in this context.
|
93
|
+
continue
|
94
|
+
|
95
|
+
# Get standardization parameters from metadata
|
96
|
+
loc = np.median(v) # ok, because we checked all were numeric
|
97
|
+
dev = np.asarray(v) - loc # need to make array from v since it could be a list here.
|
98
|
+
posdev, negdev = dev[dev > 0], dev[dev < 0]
|
99
|
+
pos_scale = np.median(posdev) if posdev.any() else 1.0
|
100
|
+
neg_scale = np.abs(np.median(negdev)) if negdev.any() else 1.0
|
101
|
+
|
102
|
+
x, x0, dxp, dxn = np.atleast_1d(new_metadata[k]), loc, pos_scale, neg_scale # just abbreviations
|
103
|
+
dxp = dxp if dxp > 0 else 1.0 # avoids dividing by zero below
|
104
|
+
dxn = dxn if dxn > 0 else 1.0
|
105
|
+
|
106
|
+
# xdev must be floating-point to avoid getting zero in an integer division.
|
107
|
+
xdev = (x - x0).astype(np.float64)
|
108
|
+
pos = xdev >= 0
|
109
|
+
|
110
|
+
X = np.zeros_like(xdev)
|
111
|
+
X[pos], X[~pos] = xdev[pos] / dxp, xdev[~pos] / dxn # keeping track of possible asymmetry of x, but...
|
112
|
+
# ...below here, only need to think about absolute deviation.
|
113
|
+
|
114
|
+
abig = np.abs(X) > deviation
|
115
|
+
kmax[abig] = k
|
116
|
+
deviation[abig] = np.abs(X[abig])
|
117
|
+
|
118
|
+
unlikely_features = list(zip(kmax[is_ood], deviation[is_ood])) # feature names, along with how far out they are.
|
119
|
+
return unlikely_features
|
@@ -0,0 +1,92 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import numbers
|
4
|
+
import warnings
|
5
|
+
from typing import Any
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
from numpy.typing import NDArray
|
9
|
+
from sklearn.feature_selection import mutual_info_classif
|
10
|
+
|
11
|
+
# NATS2BITS is the reciprocal of natural log of 2. If you have an information/entropy-type quantity measured in nats,
|
12
|
+
# which is what many library functions return, multiply it by NATS2BITS to get it in bits.
|
13
|
+
NATS2BITS = 1.442695
|
14
|
+
|
15
|
+
|
16
|
+
def get_metadata_ood_mi(
|
17
|
+
metadata: dict[str, list[Any] | NDArray[Any]],
|
18
|
+
is_ood: NDArray[np.bool_],
|
19
|
+
discrete_features: str | bool | NDArray[np.bool_] = False,
|
20
|
+
random_state: int | None = None,
|
21
|
+
) -> dict[str, float]:
|
22
|
+
"""Computes mutual information between a set of metadata features and an out-of-distribution flag.
|
23
|
+
|
24
|
+
Given a metadata dictionary `metadata` (where each key maps to one scalar metadata feature per example), and a
|
25
|
+
corresponding boolean flag `is_ood` indicating whether each example falls out-of-distribution (OOD) relative to a
|
26
|
+
reference dataset, this function finds the strength of association between each metadata feature and `is_ood` by
|
27
|
+
computing their mutual information. Metadata features may be either discrete or continuous; set the
|
28
|
+
`discrete_features` keyword to a bool array set to True for each feature that is discrete, or pass one bool to apply
|
29
|
+
to all features. Returns a dict indicating the strength of association between each individual feature and the OOD
|
30
|
+
flag, measured in bits.
|
31
|
+
|
32
|
+
Parameters
|
33
|
+
----------
|
34
|
+
metadata : dict[str, list[Any] | NDArray[Any]]
|
35
|
+
A set of arrays of values, indexed by metadata feature names, with one value per data example per feature.
|
36
|
+
is_ood : NDArray[np.bool_]
|
37
|
+
A boolean array, with one value per example, that indicates which examples are OOD.
|
38
|
+
discrete_features : str | bool | NDArray[np.bool_]
|
39
|
+
Either a boolean array or a single boolean value, indicate which features take on discrete values.
|
40
|
+
random_state : int, optional - default None
|
41
|
+
Determines random number generation for small noise added to continuous variables. Set to a value for
|
42
|
+
reproducible results.
|
43
|
+
|
44
|
+
Returns
|
45
|
+
-------
|
46
|
+
dict[str, float]
|
47
|
+
A dictionary with keys corresponding to metadata feature names, and values indicating the strength of
|
48
|
+
association between each named feature and the OOD flag, as mutual information measured in bits.
|
49
|
+
|
50
|
+
Examples
|
51
|
+
--------
|
52
|
+
Imagine we have 3 data examples, and that the corresponding metadata contains 2 features called time and altitude.
|
53
|
+
|
54
|
+
>>> import numpy
|
55
|
+
>>> metadata = {"time": numpy.linspace(0, 10, 100), "altitude": numpy.linspace(0, 16, 100) ** 2}
|
56
|
+
>>> is_ood = metadata["altitude"] > 100
|
57
|
+
>>> print(get_metadata_ood_mi(metadata, is_ood, discrete_features=False))
|
58
|
+
{'time': 0.933074285817367, 'altitude': 0.9407686591507002}
|
59
|
+
"""
|
60
|
+
numerical_keys = [k for k, v in metadata.items() if all(isinstance(vi, numbers.Number) for vi in v)]
|
61
|
+
if len(numerical_keys) < len(metadata):
|
62
|
+
warnings.warn(
|
63
|
+
f"Processing {numerical_keys}, others are non-numerical and will be skipped.",
|
64
|
+
UserWarning,
|
65
|
+
)
|
66
|
+
|
67
|
+
md_lengths = {len(np.atleast_1d(v)) for v in metadata.values()}
|
68
|
+
if len(md_lengths) > 1:
|
69
|
+
raise ValueError(f"Metadata features have differing sizes: {md_lengths}")
|
70
|
+
|
71
|
+
if len(is_ood) != (mdl := md_lengths.pop()):
|
72
|
+
raise ValueError(
|
73
|
+
f"OOD flag and metadata features need to be same size, but are different sizes: {len(is_ood)} and {mdl}."
|
74
|
+
)
|
75
|
+
|
76
|
+
X = np.array([metadata[k] for k in numerical_keys]).T
|
77
|
+
|
78
|
+
X0, dX = np.mean(X, axis=0), np.std(X, axis=0, ddof=1)
|
79
|
+
Xscl = (X - X0) / dX
|
80
|
+
|
81
|
+
mutual_info_values = (
|
82
|
+
mutual_info_classif(
|
83
|
+
Xscl,
|
84
|
+
is_ood,
|
85
|
+
discrete_features=discrete_features, # type: ignore
|
86
|
+
random_state=random_state,
|
87
|
+
)
|
88
|
+
* NATS2BITS
|
89
|
+
)
|
90
|
+
|
91
|
+
mi_dict = {k: mutual_info_values[i] for i, k in enumerate(numerical_keys)}
|
92
|
+
return mi_dict
|
@@ -8,19 +8,28 @@ Licensed under Apache Software License (Apache 2.0)
|
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
|
-
|
11
|
+
__all__ = ["OOD_VAE"]
|
12
|
+
|
13
|
+
from typing import TYPE_CHECKING, Callable
|
12
14
|
|
13
15
|
import numpy as np
|
14
|
-
import tensorflow as tf
|
15
|
-
import tf_keras as keras
|
16
16
|
from numpy.typing import ArrayLike
|
17
17
|
|
18
|
-
from dataeval.
|
19
|
-
from dataeval.
|
20
|
-
from dataeval.
|
21
|
-
from dataeval.
|
22
|
-
from dataeval.
|
23
|
-
|
18
|
+
from dataeval.detectors.ood.base import OODBase, OODScoreOutput
|
19
|
+
from dataeval.interop import to_numpy
|
20
|
+
from dataeval.utils.lazy import lazyload
|
21
|
+
from dataeval.utils.tensorflow._internal.loss import Elbo
|
22
|
+
from dataeval.utils.tensorflow._internal.utils import predict_batch
|
23
|
+
|
24
|
+
if TYPE_CHECKING:
|
25
|
+
import tensorflow as tf
|
26
|
+
import tf_keras as keras
|
27
|
+
|
28
|
+
import dataeval.utils.tensorflow._internal.models as tf_models
|
29
|
+
else:
|
30
|
+
tf = lazyload("tensorflow")
|
31
|
+
keras = lazyload("tf_keras")
|
32
|
+
tf_models = lazyload("dataeval.utils.tensorflow._internal.models")
|
24
33
|
|
25
34
|
|
26
35
|
class OOD_VAE(OODBase):
|
@@ -38,7 +47,7 @@ class OOD_VAE(OODBase):
|
|
38
47
|
--------
|
39
48
|
Instantiate an OOD detector metric with a generic dataset - batch of images with shape (3,25,25)
|
40
49
|
|
41
|
-
>>> metric = OOD_VAE(create_model(VAE, dataset[0].shape))
|
50
|
+
>>> metric = OOD_VAE(create_model("VAE", dataset[0].shape))
|
42
51
|
|
43
52
|
Adjusting fit parameters,
|
44
53
|
including setting the fit threshold at 85% for a training set with about 15% out-of-distribution
|
@@ -50,7 +59,7 @@ class OOD_VAE(OODBase):
|
|
50
59
|
>>> result = metric.predict(dataset, ood_type="feature")
|
51
60
|
"""
|
52
61
|
|
53
|
-
def __init__(self, model: VAE, samples: int = 10) -> None:
|
62
|
+
def __init__(self, model: tf_models.VAE, samples: int = 10) -> None:
|
54
63
|
super().__init__(model)
|
55
64
|
self.samples = samples
|
56
65
|
|
@@ -58,18 +67,15 @@ class OOD_VAE(OODBase):
|
|
58
67
|
self,
|
59
68
|
x_ref: ArrayLike,
|
60
69
|
threshold_perc: float = 100.0,
|
61
|
-
loss_fn: Callable[..., tf.Tensor]
|
62
|
-
optimizer: keras.optimizers.Optimizer =
|
70
|
+
loss_fn: Callable[..., tf.Tensor] = Elbo(0.05),
|
71
|
+
optimizer: keras.optimizers.Optimizer | None = None,
|
63
72
|
epochs: int = 20,
|
64
73
|
batch_size: int = 64,
|
65
74
|
verbose: bool = True,
|
66
75
|
) -> None:
|
67
|
-
if loss_fn is None:
|
68
|
-
loss_fn = Elbo(0.05)
|
69
76
|
super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
|
70
77
|
|
71
|
-
|
72
|
-
def score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
|
78
|
+
def _score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
|
73
79
|
self._validate(X := to_numpy(X))
|
74
80
|
|
75
81
|
# sample reconstructed instances
|
@@ -0,0 +1,75 @@
|
|
1
|
+
"""
|
2
|
+
Source code derived from Alibi-Detect 0.11.4
|
3
|
+
https://github.com/SeldonIO/alibi-detect/tree/v0.11.4
|
4
|
+
|
5
|
+
Original code Copyright (c) 2023 Seldon Technologies Ltd
|
6
|
+
Licensed under Apache Software License (Apache 2.0)
|
7
|
+
"""
|
8
|
+
|
9
|
+
from __future__ import annotations
|
10
|
+
|
11
|
+
__all__ = ["OOD_VAEGMM"]
|
12
|
+
|
13
|
+
from typing import TYPE_CHECKING, Callable
|
14
|
+
|
15
|
+
import numpy as np
|
16
|
+
from numpy.typing import ArrayLike
|
17
|
+
|
18
|
+
from dataeval.detectors.ood.base import OODGMMBase, OODScoreOutput
|
19
|
+
from dataeval.interop import to_numpy
|
20
|
+
from dataeval.utils.lazy import lazyload
|
21
|
+
from dataeval.utils.tensorflow._internal.gmm import gmm_energy
|
22
|
+
from dataeval.utils.tensorflow._internal.loss import Elbo, LossGMM
|
23
|
+
from dataeval.utils.tensorflow._internal.utils import predict_batch
|
24
|
+
|
25
|
+
if TYPE_CHECKING:
|
26
|
+
import tensorflow as tf
|
27
|
+
import tf_keras as keras
|
28
|
+
|
29
|
+
import dataeval.utils.tensorflow._internal.models as tf_models
|
30
|
+
else:
|
31
|
+
tf = lazyload("tensorflow")
|
32
|
+
keras = lazyload("tf_keras")
|
33
|
+
tf_models = lazyload("dataeval.utils.tensorflow._internal.models")
|
34
|
+
|
35
|
+
|
36
|
+
class OOD_VAEGMM(OODGMMBase):
|
37
|
+
"""
|
38
|
+
VAE with Gaussian Mixture Model based outlier detector.
|
39
|
+
|
40
|
+
Parameters
|
41
|
+
----------
|
42
|
+
model : VAEGMM
|
43
|
+
A VAEGMM model.
|
44
|
+
samples
|
45
|
+
Number of samples sampled to evaluate each instance.
|
46
|
+
"""
|
47
|
+
|
48
|
+
def __init__(self, model: tf_models.VAEGMM, samples: int = 10) -> None:
|
49
|
+
super().__init__(model)
|
50
|
+
self.samples = samples
|
51
|
+
|
52
|
+
def fit(
|
53
|
+
self,
|
54
|
+
x_ref: ArrayLike,
|
55
|
+
threshold_perc: float = 100.0,
|
56
|
+
loss_fn: Callable[..., tf.Tensor] = LossGMM(elbo=Elbo(0.05)),
|
57
|
+
optimizer: keras.optimizers.Optimizer | None = None,
|
58
|
+
epochs: int = 20,
|
59
|
+
batch_size: int = 64,
|
60
|
+
verbose: bool = True,
|
61
|
+
) -> None:
|
62
|
+
super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
|
63
|
+
|
64
|
+
def _score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
|
65
|
+
self._validate(X := to_numpy(X))
|
66
|
+
|
67
|
+
# draw samples from latent space
|
68
|
+
X_samples = np.repeat(X, self.samples, axis=0)
|
69
|
+
_, z, _ = predict_batch(X_samples, self.model, batch_size=batch_size)
|
70
|
+
|
71
|
+
# compute average energy for samples
|
72
|
+
energy, _ = gmm_energy(z, self.gmm_params, return_mean=False)
|
73
|
+
energy_samples = energy.numpy().reshape((-1, self.samples)) # type: ignore
|
74
|
+
iscore = np.mean(energy_samples, axis=-1)
|
75
|
+
return OODScoreOutput(iscore)
|
dataeval/interop.py
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
__all__ = ["as_numpy", "to_numpy", "to_numpy_iter"]
|
4
|
+
|
5
|
+
from importlib import import_module
|
6
|
+
from typing import Any, Iterable, Iterator
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
from numpy.typing import ArrayLike, NDArray
|
10
|
+
|
11
|
+
_MODULE_CACHE = {}
|
12
|
+
|
13
|
+
|
14
|
+
def _try_import(module_name):
|
15
|
+
if module_name in _MODULE_CACHE:
|
16
|
+
return _MODULE_CACHE[module_name]
|
17
|
+
|
18
|
+
try:
|
19
|
+
module = import_module(module_name)
|
20
|
+
except ImportError: # pragma: no cover - covered by test_mindeps.py
|
21
|
+
module = None
|
22
|
+
|
23
|
+
_MODULE_CACHE[module_name] = module
|
24
|
+
return module
|
25
|
+
|
26
|
+
|
27
|
+
def as_numpy(array: ArrayLike | None) -> NDArray[Any]:
|
28
|
+
"""Converts an ArrayLike to Numpy array without copying (if possible)"""
|
29
|
+
return to_numpy(array, copy=False)
|
30
|
+
|
31
|
+
|
32
|
+
def to_numpy(array: ArrayLike | None, copy: bool = True) -> NDArray[Any]:
|
33
|
+
"""Converts an ArrayLike to new Numpy array"""
|
34
|
+
if array is None:
|
35
|
+
return np.ndarray([])
|
36
|
+
|
37
|
+
if isinstance(array, np.ndarray):
|
38
|
+
return array.copy() if copy else array
|
39
|
+
|
40
|
+
if array.__class__.__module__.startswith("tensorflow"):
|
41
|
+
tf = _try_import("tensorflow")
|
42
|
+
if tf and tf.is_tensor(array):
|
43
|
+
return array.numpy().copy() if copy else array.numpy() # type: ignore
|
44
|
+
|
45
|
+
if array.__class__.__module__.startswith("torch"):
|
46
|
+
torch = _try_import("torch")
|
47
|
+
if torch and isinstance(array, torch.Tensor):
|
48
|
+
return array.detach().cpu().numpy().copy() if copy else array.detach().cpu().numpy() # type: ignore
|
49
|
+
|
50
|
+
return np.array(array, copy=copy)
|
51
|
+
|
52
|
+
|
53
|
+
def to_numpy_iter(iterable: Iterable[ArrayLike]) -> Iterator[NDArray[Any]]:
|
54
|
+
"""Yields an iterator of numpy arrays from an ArrayLike"""
|
55
|
+
for array in iterable:
|
56
|
+
yield to_numpy(array)
|