dataeval 0.72.2__py3-none-any.whl → 0.73.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +3 -3
- dataeval/detectors/__init__.py +1 -1
- dataeval/detectors/drift/__init__.py +1 -1
- dataeval/detectors/drift/base.py +2 -2
- dataeval/detectors/linters/clusterer.py +1 -1
- dataeval/detectors/ood/__init__.py +1 -1
- dataeval/detectors/ood/ae.py +14 -6
- dataeval/detectors/ood/aegmm.py +14 -6
- dataeval/detectors/ood/base.py +9 -3
- dataeval/detectors/ood/llr.py +22 -16
- dataeval/detectors/ood/vae.py +14 -6
- dataeval/detectors/ood/vaegmm.py +14 -6
- dataeval/interop.py +9 -7
- dataeval/metrics/bias/balance.py +50 -44
- dataeval/metrics/bias/coverage.py +38 -6
- dataeval/metrics/bias/diversity.py +117 -65
- dataeval/metrics/bias/metadata.py +225 -60
- dataeval/metrics/bias/parity.py +68 -54
- dataeval/utils/__init__.py +4 -3
- dataeval/utils/lazy.py +26 -0
- dataeval/utils/metadata.py +258 -0
- dataeval/utils/shared.py +1 -1
- dataeval/utils/split_dataset.py +12 -6
- dataeval/utils/tensorflow/_internal/gmm.py +8 -2
- dataeval/utils/tensorflow/_internal/loss.py +20 -11
- dataeval/utils/tensorflow/_internal/{pixelcnn.py → models.py} +371 -77
- dataeval/utils/tensorflow/_internal/trainer.py +12 -5
- dataeval/utils/tensorflow/_internal/utils.py +70 -71
- dataeval/utils/torch/datasets.py +2 -2
- dataeval/workflows/__init__.py +1 -1
- {dataeval-0.72.2.dist-info → dataeval-0.73.1.dist-info}/METADATA +3 -3
- {dataeval-0.72.2.dist-info → dataeval-0.73.1.dist-info}/RECORD +34 -33
- dataeval/utils/tensorflow/_internal/autoencoder.py +0 -316
- {dataeval-0.72.2.dist-info → dataeval-0.73.1.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.72.2.dist-info → dataeval-0.73.1.dist-info}/WHEEL +0 -0
@@ -1,316 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Source code derived from Alibi-Detect 0.11.4
|
3
|
-
https://github.com/SeldonIO/alibi-detect/tree/v0.11.4
|
4
|
-
|
5
|
-
Original code Copyright (c) 2023 Seldon Technologies Ltd
|
6
|
-
Licensed under Apache Software License (Apache 2.0)
|
7
|
-
"""
|
8
|
-
|
9
|
-
from __future__ import annotations
|
10
|
-
|
11
|
-
from typing import cast
|
12
|
-
|
13
|
-
import tensorflow as tf
|
14
|
-
import tf_keras as keras
|
15
|
-
from tensorflow.python.module.module import Module # noqa
|
16
|
-
from tf_keras import Sequential
|
17
|
-
from tf_keras.layers import (
|
18
|
-
Dense,
|
19
|
-
Flatten,
|
20
|
-
Layer,
|
21
|
-
)
|
22
|
-
|
23
|
-
|
24
|
-
def relative_euclidean_distance(x: tf.Tensor, y: tf.Tensor, eps: float = 1e-12, axis: int = -1) -> tf.Tensor:
|
25
|
-
"""
|
26
|
-
Relative Euclidean distance.
|
27
|
-
|
28
|
-
Parameters
|
29
|
-
----------
|
30
|
-
x
|
31
|
-
Tensor used in distance computation.
|
32
|
-
y
|
33
|
-
Tensor used in distance computation.
|
34
|
-
eps
|
35
|
-
Epsilon added to denominator for numerical stability.
|
36
|
-
axis
|
37
|
-
Axis used to compute distance.
|
38
|
-
|
39
|
-
Returns
|
40
|
-
-------
|
41
|
-
Tensor with relative Euclidean distance across specified axis.
|
42
|
-
"""
|
43
|
-
denom = tf.concat(
|
44
|
-
[
|
45
|
-
tf.reshape(tf.norm(x, ord=2, axis=axis), (-1, 1)), # type: ignore
|
46
|
-
tf.reshape(tf.norm(y, ord=2, axis=axis), (-1, 1)), # type: ignore
|
47
|
-
],
|
48
|
-
axis=1,
|
49
|
-
)
|
50
|
-
dist = tf.norm(tf.math.subtract(x, y), ord=2, axis=axis) / (tf.reduce_min(denom, axis=axis) + eps) # type: ignore
|
51
|
-
return dist
|
52
|
-
|
53
|
-
|
54
|
-
def eucl_cosim_features(x: tf.Tensor, y: tf.Tensor, max_eucl: float = 1e2) -> tf.Tensor:
|
55
|
-
"""
|
56
|
-
Compute features extracted from the reconstructed instance using the
|
57
|
-
relative Euclidean distance and cosine similarity between 2 tensors.
|
58
|
-
|
59
|
-
Parameters
|
60
|
-
----------
|
61
|
-
x : tf.Tensor
|
62
|
-
Tensor used in feature computation.
|
63
|
-
y : tf.Tensor
|
64
|
-
Tensor used in feature computation.
|
65
|
-
max_eucl : float, default 1e2
|
66
|
-
Maximum value to clip relative Euclidean distance by.
|
67
|
-
|
68
|
-
Returns
|
69
|
-
-------
|
70
|
-
tf.Tensor
|
71
|
-
Tensor concatenating the relative Euclidean distance and cosine similarity features.
|
72
|
-
"""
|
73
|
-
if len(x.shape) > 2 or len(y.shape) > 2:
|
74
|
-
x = cast(tf.Tensor, Flatten()(x))
|
75
|
-
y = cast(tf.Tensor, Flatten()(y))
|
76
|
-
rec_cos = tf.reshape(keras.losses.cosine_similarity(y, x, -1), (-1, 1))
|
77
|
-
rec_euc = tf.reshape(relative_euclidean_distance(y, x, -1), (-1, 1))
|
78
|
-
# rec_euc could become very large so should be clipped
|
79
|
-
rec_euc = tf.clip_by_value(rec_euc, 0, max_eucl)
|
80
|
-
return cast(tf.Tensor, tf.concat([rec_cos, rec_euc], -1))
|
81
|
-
|
82
|
-
|
83
|
-
class Sampling(Layer):
|
84
|
-
"""Reparametrization trick - Uses (z_mean, z_log_var) to sample the latent vector z."""
|
85
|
-
|
86
|
-
def call(self, inputs: tuple[tf.Tensor, tf.Tensor]) -> tf.Tensor:
|
87
|
-
"""
|
88
|
-
Sample z.
|
89
|
-
|
90
|
-
Parameters
|
91
|
-
----------
|
92
|
-
inputs
|
93
|
-
Tuple with mean and log :term:`variance<Variance>`.
|
94
|
-
|
95
|
-
Returns
|
96
|
-
-------
|
97
|
-
Sampled vector z.
|
98
|
-
"""
|
99
|
-
z_mean, z_log_var = inputs
|
100
|
-
batch, dim = tuple(tf.shape(z_mean).numpy().ravel()[:2]) # type: ignore
|
101
|
-
epsilon = cast(tf.Tensor, keras.backend.random_normal(shape=(batch, dim)))
|
102
|
-
return z_mean + tf.exp(tf.math.multiply(0.5, z_log_var)) * epsilon
|
103
|
-
|
104
|
-
|
105
|
-
class EncoderAE(Layer):
|
106
|
-
def __init__(self, encoder_net: Sequential) -> None:
|
107
|
-
"""
|
108
|
-
Encoder of AE.
|
109
|
-
|
110
|
-
Parameters
|
111
|
-
----------
|
112
|
-
encoder_net
|
113
|
-
Layers for the encoder wrapped in a keras.Sequential class.
|
114
|
-
name
|
115
|
-
Name of encoder.
|
116
|
-
"""
|
117
|
-
super().__init__(name="encoder_ae")
|
118
|
-
self.encoder_net: Sequential = encoder_net
|
119
|
-
|
120
|
-
def call(self, x: tf.Tensor) -> tf.Tensor:
|
121
|
-
return cast(tf.Tensor, self.encoder_net(x))
|
122
|
-
|
123
|
-
|
124
|
-
class EncoderVAE(Layer):
|
125
|
-
def __init__(self, encoder_net: Sequential, latent_dim: int) -> None:
|
126
|
-
"""
|
127
|
-
Encoder of VAE.
|
128
|
-
|
129
|
-
Parameters
|
130
|
-
----------
|
131
|
-
encoder_net
|
132
|
-
Layers for the encoder wrapped in a keras.Sequential class.
|
133
|
-
latent_dim
|
134
|
-
Dimensionality of the :term:`latent space<Latent Space>`.
|
135
|
-
name
|
136
|
-
Name of encoder.
|
137
|
-
"""
|
138
|
-
super().__init__(name="encoder_vae")
|
139
|
-
self.encoder_net: Sequential = encoder_net
|
140
|
-
self._fc_mean = Dense(latent_dim, activation=None)
|
141
|
-
self._fc_log_var = Dense(latent_dim, activation=None)
|
142
|
-
self._sampling = Sampling()
|
143
|
-
|
144
|
-
def call(self, x: tf.Tensor) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
|
145
|
-
x = cast(tf.Tensor, self.encoder_net(x))
|
146
|
-
if len(x.shape) > 2:
|
147
|
-
x = cast(tf.Tensor, Flatten()(x))
|
148
|
-
z_mean = cast(tf.Tensor, self._fc_mean(x))
|
149
|
-
z_log_var = cast(tf.Tensor, self._fc_log_var(x))
|
150
|
-
z = cast(tf.Tensor, self._sampling((z_mean, z_log_var)))
|
151
|
-
return z_mean, z_log_var, z
|
152
|
-
|
153
|
-
|
154
|
-
class Decoder(Layer):
|
155
|
-
def __init__(self, decoder_net: Sequential) -> None:
|
156
|
-
"""
|
157
|
-
Decoder of AE and VAE.
|
158
|
-
|
159
|
-
Parameters
|
160
|
-
----------
|
161
|
-
decoder_net
|
162
|
-
Layers for the decoder wrapped in a keras.Sequential class.
|
163
|
-
name
|
164
|
-
Name of decoder.
|
165
|
-
"""
|
166
|
-
super().__init__(name="decoder")
|
167
|
-
self.decoder_net: Sequential = decoder_net
|
168
|
-
|
169
|
-
def call(self, inputs: tf.Tensor) -> tf.Tensor:
|
170
|
-
return cast(tf.Tensor, self.decoder_net(inputs))
|
171
|
-
|
172
|
-
|
173
|
-
class AE(keras.Model):
|
174
|
-
"""
|
175
|
-
Combine encoder and decoder in AE.
|
176
|
-
|
177
|
-
Parameters
|
178
|
-
----------
|
179
|
-
encoder_net : Sequential
|
180
|
-
Layers for the encoder wrapped in a keras.Sequential class.
|
181
|
-
decoder_net : Sequential
|
182
|
-
Layers for the decoder wrapped in a keras.Sequential class.
|
183
|
-
"""
|
184
|
-
|
185
|
-
def __init__(self, encoder_net: Sequential, decoder_net: Sequential) -> None:
|
186
|
-
super().__init__(name="ae")
|
187
|
-
self.encoder: Layer = EncoderAE(encoder_net)
|
188
|
-
self.decoder: Layer = Decoder(decoder_net)
|
189
|
-
|
190
|
-
def call(self, inputs: tf.Tensor, training: bool | None = None, mask: tf.Tensor | None = None) -> tf.Tensor:
|
191
|
-
z = cast(tf.Tensor, self.encoder(inputs))
|
192
|
-
x_recon = cast(tf.Tensor, self.decoder(z))
|
193
|
-
return x_recon
|
194
|
-
|
195
|
-
|
196
|
-
class VAE(keras.Model):
|
197
|
-
"""
|
198
|
-
Combine encoder and decoder in VAE.
|
199
|
-
|
200
|
-
Parameters
|
201
|
-
----------
|
202
|
-
encoder_net : Sequential
|
203
|
-
Layers for the encoder wrapped in a keras.Sequential class.
|
204
|
-
decoder_net : Sequential
|
205
|
-
Layers for the decoder wrapped in a keras.Sequential class.
|
206
|
-
latent_dim : int
|
207
|
-
Dimensionality of the :term:`latent space<Latent Space>`.
|
208
|
-
beta : float, default 1.0
|
209
|
-
Beta parameter for KL-divergence loss term.
|
210
|
-
"""
|
211
|
-
|
212
|
-
def __init__(self, encoder_net: Sequential, decoder_net: Sequential, latent_dim: int, beta: float = 1.0) -> None:
|
213
|
-
super().__init__(name="vae_model")
|
214
|
-
self.encoder: Layer = EncoderVAE(encoder_net, latent_dim)
|
215
|
-
self.decoder: Layer = Decoder(decoder_net)
|
216
|
-
self.beta: float = beta
|
217
|
-
self.latent_dim: int = latent_dim
|
218
|
-
|
219
|
-
def call(self, inputs: tf.Tensor, training: bool | None = None, mask: tf.Tensor | None = None) -> tf.Tensor:
|
220
|
-
z_mean, z_log_var, z = cast(tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.encoder(inputs))
|
221
|
-
x_recon = self.decoder(z)
|
222
|
-
# add KL divergence loss term
|
223
|
-
kl_loss = -0.5 * tf.reduce_mean(z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1)
|
224
|
-
self.add_loss(self.beta * kl_loss)
|
225
|
-
return cast(tf.Tensor, x_recon)
|
226
|
-
|
227
|
-
|
228
|
-
class AEGMM(keras.Model):
|
229
|
-
"""
|
230
|
-
Deep Autoencoding Gaussian Mixture Model.
|
231
|
-
|
232
|
-
Parameters
|
233
|
-
----------
|
234
|
-
encoder_net : Sequential
|
235
|
-
Layers for the encoder wrapped in a keras.Sequential class.
|
236
|
-
decoder_net : Sequential
|
237
|
-
Layers for the decoder wrapped in a keras.Sequential class.
|
238
|
-
gmm_density_net : Sequential
|
239
|
-
Layers for the GMM network wrapped in a keras.Sequential class.
|
240
|
-
n_gmm : int
|
241
|
-
Number of components in GMM.
|
242
|
-
"""
|
243
|
-
|
244
|
-
def __init__(
|
245
|
-
self,
|
246
|
-
encoder_net: Sequential,
|
247
|
-
decoder_net: Sequential,
|
248
|
-
gmm_density_net: Sequential,
|
249
|
-
n_gmm: int,
|
250
|
-
) -> None:
|
251
|
-
super().__init__("aegmm")
|
252
|
-
self.encoder = encoder_net
|
253
|
-
self.decoder = decoder_net
|
254
|
-
self.gmm_density = gmm_density_net
|
255
|
-
self.n_gmm = n_gmm
|
256
|
-
|
257
|
-
def call(
|
258
|
-
self, inputs: tf.Tensor, training: bool | None = None, mask: tf.Tensor | None = None
|
259
|
-
) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
|
260
|
-
enc = self.encoder(inputs)
|
261
|
-
x_recon = cast(tf.Tensor, self.decoder(enc))
|
262
|
-
recon_features = eucl_cosim_features(inputs, x_recon)
|
263
|
-
z = cast(tf.Tensor, tf.concat([enc, recon_features], -1))
|
264
|
-
gamma = cast(tf.Tensor, self.gmm_density(z))
|
265
|
-
return x_recon, z, gamma
|
266
|
-
|
267
|
-
|
268
|
-
class VAEGMM(keras.Model):
|
269
|
-
"""
|
270
|
-
Variational Autoencoding Gaussian Mixture Model.
|
271
|
-
|
272
|
-
Parameters
|
273
|
-
----------
|
274
|
-
encoder_net : Sequential
|
275
|
-
Layers for the encoder wrapped in a keras.Sequential class.
|
276
|
-
decoder_net : Sequential
|
277
|
-
Layers for the decoder wrapped in a keras.Sequential class.
|
278
|
-
gmm_density_net : Sequential
|
279
|
-
Layers for the GMM network wrapped in a keras.Sequential class.
|
280
|
-
n_gmm : int
|
281
|
-
Number of components in GMM.
|
282
|
-
latent_dim : int
|
283
|
-
Dimensionality of the :term:`latent space<Latent Space>`.
|
284
|
-
beta : float, default 1.0
|
285
|
-
Beta parameter for KL-divergence loss term.
|
286
|
-
"""
|
287
|
-
|
288
|
-
def __init__(
|
289
|
-
self,
|
290
|
-
encoder_net: Sequential,
|
291
|
-
decoder_net: Sequential,
|
292
|
-
gmm_density_net: Sequential,
|
293
|
-
n_gmm: int,
|
294
|
-
latent_dim: int,
|
295
|
-
beta: float = 1.0,
|
296
|
-
) -> None:
|
297
|
-
super().__init__(name="vaegmm")
|
298
|
-
self.encoder: Sequential = EncoderVAE(encoder_net, latent_dim)
|
299
|
-
self.decoder: Sequential = decoder_net
|
300
|
-
self.gmm_density: Sequential = gmm_density_net
|
301
|
-
self.n_gmm: int = n_gmm
|
302
|
-
self.latent_dim: int = latent_dim
|
303
|
-
self.beta = beta
|
304
|
-
|
305
|
-
def call(
|
306
|
-
self, inputs: tf.Tensor, training: bool | None = None, mask: tf.Tensor | None = None
|
307
|
-
) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
|
308
|
-
enc_mean, enc_log_var, enc = cast(tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.encoder(inputs))
|
309
|
-
x_recon = cast(tf.Tensor, self.decoder(enc))
|
310
|
-
recon_features = eucl_cosim_features(inputs, x_recon)
|
311
|
-
z = cast(tf.Tensor, tf.concat([enc, recon_features], -1))
|
312
|
-
gamma = cast(tf.Tensor, self.gmm_density(z))
|
313
|
-
# add KL divergence loss term
|
314
|
-
kl_loss = -0.5 * tf.reduce_mean(enc_log_var - tf.square(enc_mean) - tf.exp(enc_log_var) + 1)
|
315
|
-
self.add_loss(self.beta * kl_loss)
|
316
|
-
return x_recon, z, gamma
|
File without changes
|
File without changes
|