dataeval 0.72.1__py3-none-any.whl → 0.73.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +4 -4
- dataeval/detectors/__init__.py +4 -3
- dataeval/detectors/drift/__init__.py +9 -10
- dataeval/{_internal/detectors → detectors}/drift/base.py +39 -91
- dataeval/{_internal/detectors → detectors}/drift/cvm.py +4 -3
- dataeval/{_internal/detectors → detectors}/drift/ks.py +4 -3
- dataeval/{_internal/detectors → detectors}/drift/mmd.py +23 -25
- dataeval/{_internal/detectors → detectors}/drift/torch.py +13 -11
- dataeval/{_internal/detectors → detectors}/drift/uncertainty.py +7 -5
- dataeval/detectors/drift/updates.py +61 -0
- dataeval/detectors/linters/__init__.py +3 -3
- dataeval/{_internal/detectors → detectors/linters}/clusterer.py +41 -39
- dataeval/{_internal/detectors → detectors/linters}/duplicates.py +19 -9
- dataeval/{_internal/detectors → detectors/linters}/merged_stats.py +3 -1
- dataeval/{_internal/detectors → detectors/linters}/outliers.py +14 -21
- dataeval/detectors/ood/__init__.py +6 -6
- dataeval/{_internal/detectors → detectors}/ood/ae.py +20 -12
- dataeval/detectors/ood/aegmm.py +66 -0
- dataeval/{_internal/detectors → detectors}/ood/base.py +33 -21
- dataeval/{_internal/detectors → detectors}/ood/llr.py +43 -33
- dataeval/detectors/ood/metadata_ks_compare.py +99 -0
- dataeval/detectors/ood/metadata_least_likely.py +119 -0
- dataeval/detectors/ood/metadata_ood_mi.py +92 -0
- dataeval/{_internal/detectors → detectors}/ood/vae.py +23 -17
- dataeval/detectors/ood/vaegmm.py +75 -0
- dataeval/interop.py +56 -0
- dataeval/metrics/__init__.py +1 -1
- dataeval/metrics/bias/__init__.py +4 -4
- dataeval/{_internal/metrics → metrics/bias}/balance.py +75 -13
- dataeval/{_internal/metrics → metrics/bias}/coverage.py +41 -7
- dataeval/{_internal/metrics → metrics/bias}/diversity.py +75 -18
- dataeval/metrics/bias/metadata.py +358 -0
- dataeval/{_internal/metrics → metrics/bias}/parity.py +54 -44
- dataeval/metrics/estimators/__init__.py +3 -3
- dataeval/{_internal/metrics → metrics/estimators}/ber.py +25 -22
- dataeval/{_internal/metrics → metrics/estimators}/divergence.py +11 -12
- dataeval/{_internal/metrics → metrics/estimators}/uap.py +5 -3
- dataeval/metrics/stats/__init__.py +7 -7
- dataeval/{_internal/metrics → metrics}/stats/base.py +59 -35
- dataeval/{_internal/metrics → metrics}/stats/boxratiostats.py +18 -14
- dataeval/{_internal/metrics → metrics}/stats/datasetstats.py +18 -16
- dataeval/{_internal/metrics → metrics}/stats/dimensionstats.py +9 -7
- dataeval/metrics/stats/hashstats.py +156 -0
- dataeval/{_internal/metrics → metrics}/stats/labelstats.py +5 -3
- dataeval/{_internal/metrics → metrics}/stats/pixelstats.py +9 -8
- dataeval/{_internal/metrics → metrics}/stats/visualstats.py +10 -9
- dataeval/{_internal/output.py → output.py} +26 -6
- dataeval/utils/__init__.py +8 -3
- dataeval/utils/image.py +71 -0
- dataeval/utils/lazy.py +26 -0
- dataeval/utils/metadata.py +258 -0
- dataeval/utils/shared.py +151 -0
- dataeval/{_internal → utils}/split_dataset.py +98 -33
- dataeval/utils/tensorflow/__init__.py +7 -6
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/gmm.py +8 -2
- dataeval/{_internal/models/tensorflow/losses.py → utils/tensorflow/_internal/loss.py} +28 -18
- dataeval/{_internal/models/tensorflow/pixelcnn.py → utils/tensorflow/_internal/models.py} +387 -97
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/trainer.py +15 -6
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/utils.py +84 -85
- dataeval/utils/tensorflow/loss/__init__.py +6 -2
- dataeval/utils/torch/__init__.py +7 -3
- dataeval/{_internal/models/pytorch → utils/torch}/blocks.py +19 -14
- dataeval/{_internal → utils/torch}/datasets.py +48 -42
- dataeval/utils/torch/models.py +138 -0
- dataeval/{_internal/models/pytorch/autoencoder.py → utils/torch/trainer.py} +7 -136
- dataeval/{_internal → utils/torch}/utils.py +3 -1
- dataeval/workflows/__init__.py +1 -1
- dataeval/{_internal/workflows → workflows}/sufficiency.py +39 -34
- {dataeval-0.72.1.dist-info → dataeval-0.73.0.dist-info}/METADATA +4 -3
- dataeval-0.73.0.dist-info/RECORD +73 -0
- dataeval/_internal/detectors/__init__.py +0 -0
- dataeval/_internal/detectors/drift/__init__.py +0 -0
- dataeval/_internal/detectors/ood/__init__.py +0 -0
- dataeval/_internal/detectors/ood/aegmm.py +0 -78
- dataeval/_internal/detectors/ood/vaegmm.py +0 -89
- dataeval/_internal/interop.py +0 -49
- dataeval/_internal/metrics/__init__.py +0 -0
- dataeval/_internal/metrics/stats/hashstats.py +0 -75
- dataeval/_internal/metrics/utils.py +0 -447
- dataeval/_internal/models/__init__.py +0 -0
- dataeval/_internal/models/pytorch/__init__.py +0 -0
- dataeval/_internal/models/pytorch/utils.py +0 -67
- dataeval/_internal/models/tensorflow/__init__.py +0 -0
- dataeval/_internal/models/tensorflow/autoencoder.py +0 -320
- dataeval/_internal/workflows/__init__.py +0 -0
- dataeval/detectors/drift/kernels/__init__.py +0 -10
- dataeval/detectors/drift/updates/__init__.py +0 -8
- dataeval/utils/tensorflow/models/__init__.py +0 -9
- dataeval/utils/tensorflow/recon/__init__.py +0 -3
- dataeval/utils/torch/datasets/__init__.py +0 -12
- dataeval/utils/torch/models/__init__.py +0 -11
- dataeval/utils/torch/trainer/__init__.py +0 -7
- dataeval-0.72.1.dist-info/RECORD +0 -81
- {dataeval-0.72.1.dist-info → dataeval-0.73.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.72.1.dist-info → dataeval-0.73.0.dist-info}/WHEEL +0 -0
@@ -8,10 +8,16 @@ Licensed under Apache Software License (Apache 2.0)
|
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
|
-
from typing import NamedTuple
|
11
|
+
from typing import TYPE_CHECKING, NamedTuple
|
12
12
|
|
13
13
|
import numpy as np
|
14
|
-
|
14
|
+
|
15
|
+
from dataeval.utils.lazy import lazyload
|
16
|
+
|
17
|
+
if TYPE_CHECKING:
|
18
|
+
import tensorflow as tf
|
19
|
+
else:
|
20
|
+
tf = lazyload("tensorflow")
|
15
21
|
|
16
22
|
|
17
23
|
class GaussianMixtureModelParams(NamedTuple):
|
@@ -8,16 +8,26 @@ Licensed under Apache Software License (Apache 2.0)
|
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
|
-
from typing import Literal, cast
|
11
|
+
from typing import TYPE_CHECKING, Literal, cast
|
12
12
|
|
13
|
-
import
|
13
|
+
import numpy as np
|
14
14
|
from numpy.typing import NDArray
|
15
|
-
from tensorflow_probability.python.distributions.mvn_diag import MultivariateNormalDiag
|
16
|
-
from tensorflow_probability.python.distributions.mvn_tril import MultivariateNormalTriL
|
17
|
-
from tensorflow_probability.python.stats import covariance
|
18
|
-
from tf_keras.layers import Flatten
|
19
15
|
|
20
|
-
from dataeval.
|
16
|
+
from dataeval.utils.lazy import lazyload
|
17
|
+
from dataeval.utils.tensorflow._internal.gmm import gmm_energy, gmm_params
|
18
|
+
|
19
|
+
if TYPE_CHECKING:
|
20
|
+
import tensorflow as tf
|
21
|
+
import tensorflow_probability.python.distributions.mvn_diag as mvn_diag
|
22
|
+
import tensorflow_probability.python.distributions.mvn_tril as mvn_tril
|
23
|
+
import tensorflow_probability.python.stats as tfp_stats
|
24
|
+
import tf_keras as keras
|
25
|
+
else:
|
26
|
+
tf = lazyload("tensorflow")
|
27
|
+
keras = lazyload("tf_keras")
|
28
|
+
mvn_diag = lazyload("tensorflow_probability.python.distributions.mvn_diag")
|
29
|
+
mvn_tril = lazyload("tensorflow_probability.python.distributions.mvn_tril")
|
30
|
+
tfp_stats = lazyload("tensorflow_probability.python.stats")
|
21
31
|
|
22
32
|
|
23
33
|
class Elbo:
|
@@ -39,29 +49,29 @@ class Elbo:
|
|
39
49
|
def __init__(
|
40
50
|
self,
|
41
51
|
cov_type: Literal["cov_full", "cov_diag"] | float = 1.0,
|
42
|
-
x: tf.Tensor | NDArray | None = None,
|
52
|
+
x: tf.Tensor | NDArray[np.float32] | None = None,
|
43
53
|
):
|
44
54
|
if isinstance(cov_type, float):
|
45
|
-
self.
|
55
|
+
self._cov = ("sim", cov_type)
|
46
56
|
elif cov_type in ["cov_full", "cov_diag"]:
|
47
|
-
x_np: NDArray = x.numpy() if tf.is_tensor(x) else x # type: ignore
|
48
|
-
cov = covariance(x_np.reshape(x_np.shape[0], -1)) # type: ignore py38
|
57
|
+
x_np: NDArray[np.float32] = x.numpy().astype(np.float32) if tf.is_tensor(x) else x # type: ignore
|
58
|
+
cov = tfp_stats.covariance(x_np.reshape(x_np.shape[0], -1)) # type: ignore py38
|
49
59
|
if cov_type == "cov_diag": # infer standard deviation from covariance matrix
|
50
60
|
cov = tf.math.sqrt(tf.linalg.diag_part(cov))
|
51
|
-
self.
|
61
|
+
self._cov = (cov_type, cov)
|
52
62
|
else:
|
53
63
|
raise ValueError("Only cov_full, cov_diag or sim value should be specified.")
|
54
64
|
|
55
65
|
def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
|
56
|
-
y_pred_flat = cast(tf.Tensor, Flatten()(y_pred))
|
66
|
+
y_pred_flat = cast(tf.Tensor, keras.layers.Flatten()(y_pred))
|
57
67
|
|
58
|
-
if self.
|
59
|
-
y_mn = MultivariateNormalTriL(y_pred_flat, scale_tril=tf.linalg.cholesky(self.
|
68
|
+
if self._cov[0] == "cov_full":
|
69
|
+
y_mn = mvn_tril.MultivariateNormalTriL(y_pred_flat, scale_tril=tf.linalg.cholesky(self._cov[1]))
|
60
70
|
else: # cov_diag and sim
|
61
|
-
cov_diag = self.
|
62
|
-
y_mn = MultivariateNormalDiag(y_pred_flat, scale_diag=cov_diag)
|
71
|
+
cov_diag = self._cov[1] if self._cov[0] == "cov_diag" else self._cov[1] * tf.ones(y_pred_flat.shape[-1])
|
72
|
+
y_mn = mvn_diag.MultivariateNormalDiag(y_pred_flat, scale_diag=cov_diag)
|
63
73
|
|
64
|
-
loss = -tf.reduce_mean(y_mn.log_prob(Flatten()(y_true)))
|
74
|
+
loss = -tf.reduce_mean(y_mn.log_prob(keras.layers.Flatten()(y_true)))
|
65
75
|
return loss
|
66
76
|
|
67
77
|
|