dataeval 0.73.1__py3-none-any.whl → 0.74.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +3 -9
- dataeval/detectors/__init__.py +2 -10
- dataeval/detectors/drift/base.py +3 -3
- dataeval/detectors/drift/mmd.py +1 -1
- dataeval/detectors/drift/torch.py +1 -101
- dataeval/detectors/linters/clusterer.py +3 -3
- dataeval/detectors/linters/duplicates.py +4 -4
- dataeval/detectors/linters/outliers.py +4 -4
- dataeval/detectors/ood/__init__.py +9 -9
- dataeval/detectors/ood/{ae.py → ae_torch.py} +22 -27
- dataeval/detectors/ood/base.py +63 -113
- dataeval/detectors/ood/base_torch.py +109 -0
- dataeval/detectors/ood/metadata_ks_compare.py +52 -14
- dataeval/interop.py +1 -1
- dataeval/metrics/bias/__init__.py +3 -0
- dataeval/metrics/bias/balance.py +73 -70
- dataeval/metrics/bias/coverage.py +4 -4
- dataeval/metrics/bias/diversity.py +67 -136
- dataeval/metrics/bias/metadata_preprocessing.py +285 -0
- dataeval/metrics/bias/metadata_utils.py +229 -0
- dataeval/metrics/bias/parity.py +51 -161
- dataeval/metrics/estimators/ber.py +3 -3
- dataeval/metrics/estimators/divergence.py +3 -3
- dataeval/metrics/estimators/uap.py +3 -3
- dataeval/metrics/stats/base.py +2 -2
- dataeval/metrics/stats/boxratiostats.py +1 -1
- dataeval/metrics/stats/datasetstats.py +6 -6
- dataeval/metrics/stats/dimensionstats.py +1 -1
- dataeval/metrics/stats/hashstats.py +1 -1
- dataeval/metrics/stats/labelstats.py +3 -3
- dataeval/metrics/stats/pixelstats.py +1 -1
- dataeval/metrics/stats/visualstats.py +1 -1
- dataeval/output.py +77 -53
- dataeval/utils/__init__.py +1 -7
- dataeval/utils/gmm.py +26 -0
- dataeval/utils/metadata.py +29 -9
- dataeval/utils/torch/gmm.py +98 -0
- dataeval/utils/torch/models.py +192 -0
- dataeval/utils/torch/trainer.py +84 -5
- dataeval/utils/torch/utils.py +107 -1
- dataeval/workflows/sufficiency.py +4 -4
- {dataeval-0.73.1.dist-info → dataeval-0.74.1.dist-info}/METADATA +3 -9
- dataeval-0.74.1.dist-info/RECORD +65 -0
- dataeval/detectors/ood/aegmm.py +0 -66
- dataeval/detectors/ood/llr.py +0 -302
- dataeval/detectors/ood/vae.py +0 -97
- dataeval/detectors/ood/vaegmm.py +0 -75
- dataeval/metrics/bias/metadata.py +0 -440
- dataeval/utils/lazy.py +0 -26
- dataeval/utils/tensorflow/__init__.py +0 -19
- dataeval/utils/tensorflow/_internal/gmm.py +0 -123
- dataeval/utils/tensorflow/_internal/loss.py +0 -121
- dataeval/utils/tensorflow/_internal/models.py +0 -1394
- dataeval/utils/tensorflow/_internal/trainer.py +0 -114
- dataeval/utils/tensorflow/_internal/utils.py +0 -256
- dataeval/utils/tensorflow/loss/__init__.py +0 -11
- dataeval-0.73.1.dist-info/RECORD +0 -73
- {dataeval-0.73.1.dist-info → dataeval-0.74.1.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.73.1.dist-info → dataeval-0.74.1.dist-info}/WHEEL +0 -0
@@ -1,121 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Source code derived from Alibi-Detect 0.11.4
|
3
|
-
https://github.com/SeldonIO/alibi-detect/tree/v0.11.4
|
4
|
-
|
5
|
-
Original code Copyright (c) 2023 Seldon Technologies Ltd
|
6
|
-
Licensed under Apache Software License (Apache 2.0)
|
7
|
-
"""
|
8
|
-
|
9
|
-
from __future__ import annotations
|
10
|
-
|
11
|
-
from typing import TYPE_CHECKING, Literal, cast
|
12
|
-
|
13
|
-
import numpy as np
|
14
|
-
from numpy.typing import NDArray
|
15
|
-
|
16
|
-
from dataeval.utils.lazy import lazyload
|
17
|
-
from dataeval.utils.tensorflow._internal.gmm import gmm_energy, gmm_params
|
18
|
-
|
19
|
-
if TYPE_CHECKING:
|
20
|
-
import tensorflow as tf
|
21
|
-
import tensorflow_probability.python.distributions.mvn_diag as mvn_diag
|
22
|
-
import tensorflow_probability.python.distributions.mvn_tril as mvn_tril
|
23
|
-
import tensorflow_probability.python.stats as tfp_stats
|
24
|
-
import tf_keras as keras
|
25
|
-
else:
|
26
|
-
tf = lazyload("tensorflow")
|
27
|
-
keras = lazyload("tf_keras")
|
28
|
-
mvn_diag = lazyload("tensorflow_probability.python.distributions.mvn_diag")
|
29
|
-
mvn_tril = lazyload("tensorflow_probability.python.distributions.mvn_tril")
|
30
|
-
tfp_stats = lazyload("tensorflow_probability.python.stats")
|
31
|
-
|
32
|
-
|
33
|
-
class Elbo:
|
34
|
-
"""
|
35
|
-
Compute ELBO loss.
|
36
|
-
|
37
|
-
The covariance matrix can be specified by passing the full covariance matrix, the matrix
|
38
|
-
diagonal, or a scale identity multiplier. Only one of these should be specified. If none are specified, the
|
39
|
-
identity matrix is used.
|
40
|
-
|
41
|
-
Parameters
|
42
|
-
----------
|
43
|
-
cov_type : Union[Literal["cov_full", "cov_diag"], float], default 1.0
|
44
|
-
Full covariance matrix, diagonal :term:`variance<Variance>` matrix, or scale identity multiplier.
|
45
|
-
x : ArrayLike, optional - default None
|
46
|
-
Dataset used to calculate the covariance matrix. Required for full and diagonal covariance matrix types.
|
47
|
-
"""
|
48
|
-
|
49
|
-
def __init__(
|
50
|
-
self,
|
51
|
-
cov_type: Literal["cov_full", "cov_diag"] | float = 1.0,
|
52
|
-
x: tf.Tensor | NDArray[np.float32] | None = None,
|
53
|
-
):
|
54
|
-
if isinstance(cov_type, float):
|
55
|
-
self._cov = ("sim", cov_type)
|
56
|
-
elif cov_type in ["cov_full", "cov_diag"]:
|
57
|
-
x_np: NDArray[np.float32] = x.numpy().astype(np.float32) if tf.is_tensor(x) else x # type: ignore
|
58
|
-
cov = tfp_stats.covariance(x_np.reshape(x_np.shape[0], -1)) # type: ignore py38
|
59
|
-
if cov_type == "cov_diag": # infer standard deviation from covariance matrix
|
60
|
-
cov = tf.math.sqrt(tf.linalg.diag_part(cov))
|
61
|
-
self._cov = (cov_type, cov)
|
62
|
-
else:
|
63
|
-
raise ValueError("Only cov_full, cov_diag or sim value should be specified.")
|
64
|
-
|
65
|
-
def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
|
66
|
-
y_pred_flat = cast(tf.Tensor, keras.layers.Flatten()(y_pred))
|
67
|
-
|
68
|
-
if self._cov[0] == "cov_full":
|
69
|
-
y_mn = mvn_tril.MultivariateNormalTriL(y_pred_flat, scale_tril=tf.linalg.cholesky(self._cov[1]))
|
70
|
-
else: # cov_diag and sim
|
71
|
-
cov_diag = self._cov[1] if self._cov[0] == "cov_diag" else self._cov[1] * tf.ones(y_pred_flat.shape[-1])
|
72
|
-
y_mn = mvn_diag.MultivariateNormalDiag(y_pred_flat, scale_diag=cov_diag)
|
73
|
-
|
74
|
-
loss = -tf.reduce_mean(y_mn.log_prob(keras.layers.Flatten()(y_true)))
|
75
|
-
return loss
|
76
|
-
|
77
|
-
|
78
|
-
class LossGMM:
|
79
|
-
"""
|
80
|
-
Loss function used for AE and VAE with GMM.
|
81
|
-
|
82
|
-
Parameters
|
83
|
-
----------
|
84
|
-
w_recon : float, default 1e-7
|
85
|
-
Weight on elbo loss term.
|
86
|
-
w_energy : float, default 0.1
|
87
|
-
Weight on sample energy loss term.
|
88
|
-
w_cov_diag : float, default 0.005
|
89
|
-
Weight on covariance regularizing loss term.
|
90
|
-
elbo : Elbo, optional - default None
|
91
|
-
ELBO loss function used to calculate w_recon.
|
92
|
-
"""
|
93
|
-
|
94
|
-
def __init__(
|
95
|
-
self,
|
96
|
-
w_recon: float = 1e-7,
|
97
|
-
w_energy: float = 0.1,
|
98
|
-
w_cov_diag: float = 0.005,
|
99
|
-
elbo: Elbo | None = None,
|
100
|
-
):
|
101
|
-
self.w_recon = w_recon
|
102
|
-
self.w_energy = w_energy
|
103
|
-
self.w_cov_diag = w_cov_diag
|
104
|
-
self.elbo = elbo
|
105
|
-
|
106
|
-
def __call__(
|
107
|
-
self,
|
108
|
-
x_true: tf.Tensor,
|
109
|
-
x_pred: tf.Tensor,
|
110
|
-
z: tf.Tensor,
|
111
|
-
gamma: tf.Tensor,
|
112
|
-
) -> tf.Tensor:
|
113
|
-
w_recon = (
|
114
|
-
tf.reduce_mean(tf.subtract(x_true, x_pred) ** 2)
|
115
|
-
if self.elbo is None
|
116
|
-
else tf.multiply(self.w_recon, self.elbo(x_true, x_pred))
|
117
|
-
)
|
118
|
-
sample_energy, cov_diag = gmm_energy(z, gmm_params(z, gamma))
|
119
|
-
w_energy = tf.multiply(self.w_energy, sample_energy)
|
120
|
-
w_cov_diag = tf.multiply(self.w_cov_diag, cov_diag)
|
121
|
-
return w_recon + w_energy + w_cov_diag
|