dataeval 0.72.0__py3-none-any.whl → 0.72.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +4 -4
- dataeval/detectors/__init__.py +4 -3
- dataeval/detectors/drift/__init__.py +10 -11
- dataeval/{_internal/detectors → detectors}/drift/base.py +51 -102
- dataeval/{_internal/detectors → detectors}/drift/cvm.py +9 -8
- dataeval/{_internal/detectors → detectors}/drift/ks.py +11 -10
- dataeval/{_internal/detectors → detectors}/drift/mmd.py +33 -34
- dataeval/{_internal/detectors → detectors}/drift/torch.py +15 -13
- dataeval/{_internal/detectors → detectors}/drift/uncertainty.py +12 -9
- dataeval/detectors/drift/updates.py +61 -0
- dataeval/detectors/linters/__init__.py +3 -3
- dataeval/{_internal/detectors → detectors/linters}/clusterer.py +47 -45
- dataeval/{_internal/detectors → detectors/linters}/duplicates.py +20 -10
- dataeval/{_internal/detectors → detectors/linters}/merged_stats.py +3 -1
- dataeval/{_internal/detectors → detectors/linters}/outliers.py +19 -26
- dataeval/detectors/ood/__init__.py +8 -16
- dataeval/{_internal/detectors → detectors}/ood/ae.py +9 -9
- dataeval/{_internal/detectors → detectors}/ood/aegmm.py +10 -30
- dataeval/{_internal/detectors → detectors}/ood/base.py +27 -21
- dataeval/{_internal/detectors → detectors}/ood/llr.py +27 -23
- dataeval/detectors/ood/metadata_ks_compare.py +99 -0
- dataeval/detectors/ood/metadata_least_likely.py +119 -0
- dataeval/detectors/ood/metadata_ood_mi.py +92 -0
- dataeval/{_internal/detectors → detectors}/ood/vae.py +11 -13
- dataeval/{_internal/detectors → detectors}/ood/vaegmm.py +10 -32
- dataeval/{_internal/interop.py → interop.py} +12 -7
- dataeval/metrics/__init__.py +1 -1
- dataeval/metrics/bias/__init__.py +4 -4
- dataeval/{_internal/metrics → metrics/bias}/balance.py +70 -4
- dataeval/{_internal/metrics → metrics/bias}/coverage.py +10 -8
- dataeval/{_internal/metrics → metrics/bias}/diversity.py +54 -20
- dataeval/metrics/bias/metadata.py +275 -0
- dataeval/{_internal/metrics → metrics/bias}/parity.py +21 -17
- dataeval/metrics/estimators/__init__.py +3 -3
- dataeval/{_internal/metrics → metrics/estimators}/ber.py +31 -28
- dataeval/{_internal/metrics → metrics/estimators}/divergence.py +15 -16
- dataeval/{_internal/metrics → metrics/estimators}/uap.py +8 -6
- dataeval/metrics/stats/__init__.py +7 -7
- dataeval/{_internal/metrics → metrics}/stats/base.py +66 -40
- dataeval/{_internal/metrics → metrics}/stats/boxratiostats.py +19 -15
- dataeval/{_internal/metrics → metrics}/stats/datasetstats.py +19 -17
- dataeval/{_internal/metrics → metrics}/stats/dimensionstats.py +12 -10
- dataeval/metrics/stats/hashstats.py +156 -0
- dataeval/{_internal/metrics → metrics}/stats/labelstats.py +8 -6
- dataeval/{_internal/metrics → metrics}/stats/pixelstats.py +12 -11
- dataeval/{_internal/metrics → metrics}/stats/visualstats.py +14 -13
- dataeval/{_internal/output.py → output.py} +26 -6
- dataeval/utils/__init__.py +8 -4
- dataeval/utils/image.py +71 -0
- dataeval/utils/shared.py +151 -0
- dataeval/utils/split_dataset.py +486 -0
- dataeval/utils/tensorflow/__init__.py +9 -7
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/autoencoder.py +64 -68
- dataeval/{_internal/models/tensorflow/losses.py → utils/tensorflow/_internal/loss.py} +10 -9
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/pixelcnn.py +18 -22
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/trainer.py +3 -1
- dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/utils.py +18 -18
- dataeval/utils/tensorflow/loss/__init__.py +6 -2
- dataeval/utils/torch/__init__.py +7 -3
- dataeval/{_internal/models/pytorch → utils/torch}/blocks.py +19 -14
- dataeval/{_internal → utils/torch}/datasets.py +49 -43
- dataeval/utils/torch/models.py +138 -0
- dataeval/{_internal/models/pytorch/autoencoder.py → utils/torch/trainer.py} +12 -141
- dataeval/{_internal → utils/torch}/utils.py +3 -1
- dataeval/workflows/__init__.py +1 -1
- dataeval/{_internal/workflows → workflows}/sufficiency.py +42 -37
- {dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/METADATA +7 -5
- dataeval-0.72.2.dist-info/RECORD +72 -0
- dataeval/_internal/detectors/__init__.py +0 -0
- dataeval/_internal/detectors/drift/__init__.py +0 -0
- dataeval/_internal/detectors/ood/__init__.py +0 -0
- dataeval/_internal/metrics/__init__.py +0 -0
- dataeval/_internal/metrics/stats/hashstats.py +0 -75
- dataeval/_internal/metrics/utils.py +0 -447
- dataeval/_internal/models/__init__.py +0 -0
- dataeval/_internal/models/pytorch/__init__.py +0 -0
- dataeval/_internal/models/pytorch/utils.py +0 -67
- dataeval/_internal/models/tensorflow/__init__.py +0 -0
- dataeval/_internal/workflows/__init__.py +0 -0
- dataeval/detectors/drift/kernels/__init__.py +0 -10
- dataeval/detectors/drift/updates/__init__.py +0 -7
- dataeval/utils/tensorflow/models/__init__.py +0 -9
- dataeval/utils/tensorflow/recon/__init__.py +0 -3
- dataeval/utils/torch/datasets/__init__.py +0 -12
- dataeval/utils/torch/models/__init__.py +0 -11
- dataeval/utils/torch/trainer/__init__.py +0 -7
- dataeval-0.72.0.dist-info/RECORD +0 -80
- /dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/gmm.py +0 -0
- {dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/WHEEL +0 -0
@@ -6,14 +6,14 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
|
|
6
6
|
Licensed under Apache Software License (Apache 2.0)
|
7
7
|
"""
|
8
8
|
|
9
|
-
# pyright: reportIncompatibleMethodOverride=false
|
10
|
-
|
11
9
|
from __future__ import annotations
|
12
10
|
|
13
|
-
from typing import
|
11
|
+
from typing import cast
|
14
12
|
|
15
13
|
import tensorflow as tf
|
16
14
|
import tf_keras as keras
|
15
|
+
from tensorflow.python.module.module import Module # noqa
|
16
|
+
from tf_keras import Sequential
|
17
17
|
from tf_keras.layers import (
|
18
18
|
Dense,
|
19
19
|
Flatten,
|
@@ -90,7 +90,7 @@ class Sampling(Layer):
|
|
90
90
|
Parameters
|
91
91
|
----------
|
92
92
|
inputs
|
93
|
-
Tuple with mean and log variance
|
93
|
+
Tuple with mean and log :term:`variance<Variance>`.
|
94
94
|
|
95
95
|
Returns
|
96
96
|
-------
|
@@ -103,7 +103,7 @@ class Sampling(Layer):
|
|
103
103
|
|
104
104
|
|
105
105
|
class EncoderAE(Layer):
|
106
|
-
def __init__(self, encoder_net:
|
106
|
+
def __init__(self, encoder_net: Sequential) -> None:
|
107
107
|
"""
|
108
108
|
Encoder of AE.
|
109
109
|
|
@@ -115,14 +115,14 @@ class EncoderAE(Layer):
|
|
115
115
|
Name of encoder.
|
116
116
|
"""
|
117
117
|
super().__init__(name="encoder_ae")
|
118
|
-
self.encoder_net = encoder_net
|
118
|
+
self.encoder_net: Sequential = encoder_net
|
119
119
|
|
120
120
|
def call(self, x: tf.Tensor) -> tf.Tensor:
|
121
121
|
return cast(tf.Tensor, self.encoder_net(x))
|
122
122
|
|
123
123
|
|
124
124
|
class EncoderVAE(Layer):
|
125
|
-
def __init__(self, encoder_net:
|
125
|
+
def __init__(self, encoder_net: Sequential, latent_dim: int) -> None:
|
126
126
|
"""
|
127
127
|
Encoder of VAE.
|
128
128
|
|
@@ -131,28 +131,28 @@ class EncoderVAE(Layer):
|
|
131
131
|
encoder_net
|
132
132
|
Layers for the encoder wrapped in a keras.Sequential class.
|
133
133
|
latent_dim
|
134
|
-
Dimensionality of the latent space
|
134
|
+
Dimensionality of the :term:`latent space<Latent Space>`.
|
135
135
|
name
|
136
136
|
Name of encoder.
|
137
137
|
"""
|
138
138
|
super().__init__(name="encoder_vae")
|
139
|
-
self.encoder_net = encoder_net
|
140
|
-
self.
|
141
|
-
self.
|
142
|
-
self.
|
139
|
+
self.encoder_net: Sequential = encoder_net
|
140
|
+
self._fc_mean = Dense(latent_dim, activation=None)
|
141
|
+
self._fc_log_var = Dense(latent_dim, activation=None)
|
142
|
+
self._sampling = Sampling()
|
143
143
|
|
144
144
|
def call(self, x: tf.Tensor) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
|
145
145
|
x = cast(tf.Tensor, self.encoder_net(x))
|
146
146
|
if len(x.shape) > 2:
|
147
147
|
x = cast(tf.Tensor, Flatten()(x))
|
148
|
-
z_mean = cast(tf.Tensor, self.
|
149
|
-
z_log_var = cast(tf.Tensor, self.
|
150
|
-
z = cast(tf.Tensor, self.
|
148
|
+
z_mean = cast(tf.Tensor, self._fc_mean(x))
|
149
|
+
z_log_var = cast(tf.Tensor, self._fc_log_var(x))
|
150
|
+
z = cast(tf.Tensor, self._sampling((z_mean, z_log_var)))
|
151
151
|
return z_mean, z_log_var, z
|
152
152
|
|
153
153
|
|
154
154
|
class Decoder(Layer):
|
155
|
-
def __init__(self, decoder_net:
|
155
|
+
def __init__(self, decoder_net: Sequential) -> None:
|
156
156
|
"""
|
157
157
|
Decoder of AE and VAE.
|
158
158
|
|
@@ -164,10 +164,10 @@ class Decoder(Layer):
|
|
164
164
|
Name of decoder.
|
165
165
|
"""
|
166
166
|
super().__init__(name="decoder")
|
167
|
-
self.decoder_net = decoder_net
|
167
|
+
self.decoder_net: Sequential = decoder_net
|
168
168
|
|
169
|
-
def call(self,
|
170
|
-
return cast(tf.Tensor, self.decoder_net(
|
169
|
+
def call(self, inputs: tf.Tensor) -> tf.Tensor:
|
170
|
+
return cast(tf.Tensor, self.decoder_net(inputs))
|
171
171
|
|
172
172
|
|
173
173
|
class AE(keras.Model):
|
@@ -176,19 +176,19 @@ class AE(keras.Model):
|
|
176
176
|
|
177
177
|
Parameters
|
178
178
|
----------
|
179
|
-
encoder_net :
|
179
|
+
encoder_net : Sequential
|
180
180
|
Layers for the encoder wrapped in a keras.Sequential class.
|
181
|
-
decoder_net :
|
181
|
+
decoder_net : Sequential
|
182
182
|
Layers for the decoder wrapped in a keras.Sequential class.
|
183
183
|
"""
|
184
184
|
|
185
|
-
def __init__(self, encoder_net:
|
185
|
+
def __init__(self, encoder_net: Sequential, decoder_net: Sequential) -> None:
|
186
186
|
super().__init__(name="ae")
|
187
|
-
self.encoder = EncoderAE(encoder_net)
|
188
|
-
self.decoder = Decoder(decoder_net)
|
187
|
+
self.encoder: Layer = EncoderAE(encoder_net)
|
188
|
+
self.decoder: Layer = Decoder(decoder_net)
|
189
189
|
|
190
|
-
def call(self,
|
191
|
-
z = cast(tf.Tensor, self.encoder(
|
190
|
+
def call(self, inputs: tf.Tensor, training: bool | None = None, mask: tf.Tensor | None = None) -> tf.Tensor:
|
191
|
+
z = cast(tf.Tensor, self.encoder(inputs))
|
192
192
|
x_recon = cast(tf.Tensor, self.decoder(z))
|
193
193
|
return x_recon
|
194
194
|
|
@@ -199,25 +199,25 @@ class VAE(keras.Model):
|
|
199
199
|
|
200
200
|
Parameters
|
201
201
|
----------
|
202
|
-
encoder_net :
|
202
|
+
encoder_net : Sequential
|
203
203
|
Layers for the encoder wrapped in a keras.Sequential class.
|
204
|
-
decoder_net :
|
204
|
+
decoder_net : Sequential
|
205
205
|
Layers for the decoder wrapped in a keras.Sequential class.
|
206
206
|
latent_dim : int
|
207
|
-
Dimensionality of the latent space
|
207
|
+
Dimensionality of the :term:`latent space<Latent Space>`.
|
208
208
|
beta : float, default 1.0
|
209
209
|
Beta parameter for KL-divergence loss term.
|
210
210
|
"""
|
211
211
|
|
212
|
-
def __init__(self, encoder_net:
|
212
|
+
def __init__(self, encoder_net: Sequential, decoder_net: Sequential, latent_dim: int, beta: float = 1.0) -> None:
|
213
213
|
super().__init__(name="vae_model")
|
214
|
-
self.encoder = EncoderVAE(encoder_net, latent_dim)
|
215
|
-
self.decoder = Decoder(decoder_net)
|
216
|
-
self.beta = beta
|
217
|
-
self.latent_dim = latent_dim
|
214
|
+
self.encoder: Layer = EncoderVAE(encoder_net, latent_dim)
|
215
|
+
self.decoder: Layer = Decoder(decoder_net)
|
216
|
+
self.beta: float = beta
|
217
|
+
self.latent_dim: int = latent_dim
|
218
218
|
|
219
|
-
def call(self,
|
220
|
-
z_mean, z_log_var, z = cast(tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.encoder(
|
219
|
+
def call(self, inputs: tf.Tensor, training: bool | None = None, mask: tf.Tensor | None = None) -> tf.Tensor:
|
220
|
+
z_mean, z_log_var, z = cast(tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.encoder(inputs))
|
221
221
|
x_recon = self.decoder(z)
|
222
222
|
# add KL divergence loss term
|
223
223
|
kl_loss = -0.5 * tf.reduce_mean(z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1)
|
@@ -231,37 +231,35 @@ class AEGMM(keras.Model):
|
|
231
231
|
|
232
232
|
Parameters
|
233
233
|
----------
|
234
|
-
encoder_net :
|
234
|
+
encoder_net : Sequential
|
235
235
|
Layers for the encoder wrapped in a keras.Sequential class.
|
236
|
-
decoder_net :
|
236
|
+
decoder_net : Sequential
|
237
237
|
Layers for the decoder wrapped in a keras.Sequential class.
|
238
|
-
gmm_density_net :
|
238
|
+
gmm_density_net : Sequential
|
239
239
|
Layers for the GMM network wrapped in a keras.Sequential class.
|
240
240
|
n_gmm : int
|
241
241
|
Number of components in GMM.
|
242
|
-
recon_features : Callable, default eucl_cosim_features
|
243
|
-
Function to extract features from the reconstructed instance by the decoder.
|
244
242
|
"""
|
245
243
|
|
246
244
|
def __init__(
|
247
245
|
self,
|
248
|
-
encoder_net:
|
249
|
-
decoder_net:
|
250
|
-
gmm_density_net:
|
246
|
+
encoder_net: Sequential,
|
247
|
+
decoder_net: Sequential,
|
248
|
+
gmm_density_net: Sequential,
|
251
249
|
n_gmm: int,
|
252
|
-
recon_features: Callable = eucl_cosim_features,
|
253
250
|
) -> None:
|
254
251
|
super().__init__("aegmm")
|
255
252
|
self.encoder = encoder_net
|
256
253
|
self.decoder = decoder_net
|
257
254
|
self.gmm_density = gmm_density_net
|
258
255
|
self.n_gmm = n_gmm
|
259
|
-
self.recon_features = recon_features
|
260
256
|
|
261
|
-
def call(
|
262
|
-
|
257
|
+
def call(
|
258
|
+
self, inputs: tf.Tensor, training: bool | None = None, mask: tf.Tensor | None = None
|
259
|
+
) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
|
260
|
+
enc = self.encoder(inputs)
|
263
261
|
x_recon = cast(tf.Tensor, self.decoder(enc))
|
264
|
-
recon_features =
|
262
|
+
recon_features = eucl_cosim_features(inputs, x_recon)
|
265
263
|
z = cast(tf.Tensor, tf.concat([enc, recon_features], -1))
|
266
264
|
gamma = cast(tf.Tensor, self.gmm_density(z))
|
267
265
|
return x_recon, z, gamma
|
@@ -273,45 +271,43 @@ class VAEGMM(keras.Model):
|
|
273
271
|
|
274
272
|
Parameters
|
275
273
|
----------
|
276
|
-
encoder_net :
|
274
|
+
encoder_net : Sequential
|
277
275
|
Layers for the encoder wrapped in a keras.Sequential class.
|
278
|
-
decoder_net :
|
276
|
+
decoder_net : Sequential
|
279
277
|
Layers for the decoder wrapped in a keras.Sequential class.
|
280
|
-
gmm_density_net :
|
278
|
+
gmm_density_net : Sequential
|
281
279
|
Layers for the GMM network wrapped in a keras.Sequential class.
|
282
280
|
n_gmm : int
|
283
281
|
Number of components in GMM.
|
284
282
|
latent_dim : int
|
285
|
-
Dimensionality of the latent space
|
286
|
-
recon_features : Callable, default eucl_cosim_features
|
287
|
-
Function to extract features from the reconstructed instance by the decoder.
|
283
|
+
Dimensionality of the :term:`latent space<Latent Space>`.
|
288
284
|
beta : float, default 1.0
|
289
285
|
Beta parameter for KL-divergence loss term.
|
290
286
|
"""
|
291
287
|
|
292
288
|
def __init__(
|
293
289
|
self,
|
294
|
-
encoder_net:
|
295
|
-
decoder_net:
|
296
|
-
gmm_density_net:
|
290
|
+
encoder_net: Sequential,
|
291
|
+
decoder_net: Sequential,
|
292
|
+
gmm_density_net: Sequential,
|
297
293
|
n_gmm: int,
|
298
294
|
latent_dim: int,
|
299
|
-
recon_features: Callable = eucl_cosim_features,
|
300
295
|
beta: float = 1.0,
|
301
296
|
) -> None:
|
302
297
|
super().__init__(name="vaegmm")
|
303
|
-
self.encoder = EncoderVAE(encoder_net, latent_dim)
|
304
|
-
self.decoder = decoder_net
|
305
|
-
self.gmm_density = gmm_density_net
|
306
|
-
self.n_gmm = n_gmm
|
307
|
-
self.latent_dim = latent_dim
|
308
|
-
self.recon_features = recon_features
|
298
|
+
self.encoder: Sequential = EncoderVAE(encoder_net, latent_dim)
|
299
|
+
self.decoder: Sequential = decoder_net
|
300
|
+
self.gmm_density: Sequential = gmm_density_net
|
301
|
+
self.n_gmm: int = n_gmm
|
302
|
+
self.latent_dim: int = latent_dim
|
309
303
|
self.beta = beta
|
310
304
|
|
311
|
-
def call(
|
312
|
-
|
305
|
+
def call(
|
306
|
+
self, inputs: tf.Tensor, training: bool | None = None, mask: tf.Tensor | None = None
|
307
|
+
) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
|
308
|
+
enc_mean, enc_log_var, enc = cast(tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.encoder(inputs))
|
313
309
|
x_recon = cast(tf.Tensor, self.decoder(enc))
|
314
|
-
recon_features =
|
310
|
+
recon_features = eucl_cosim_features(inputs, x_recon)
|
315
311
|
z = cast(tf.Tensor, tf.concat([enc, recon_features], -1))
|
316
312
|
gamma = cast(tf.Tensor, self.gmm_density(z))
|
317
313
|
# add KL divergence loss term
|
@@ -10,6 +10,7 @@ from __future__ import annotations
|
|
10
10
|
|
11
11
|
from typing import Literal, cast
|
12
12
|
|
13
|
+
import numpy as np
|
13
14
|
import tensorflow as tf
|
14
15
|
from numpy.typing import NDArray
|
15
16
|
from tensorflow_probability.python.distributions.mvn_diag import MultivariateNormalDiag
|
@@ -17,7 +18,7 @@ from tensorflow_probability.python.distributions.mvn_tril import MultivariateNor
|
|
17
18
|
from tensorflow_probability.python.stats import covariance
|
18
19
|
from tf_keras.layers import Flatten
|
19
20
|
|
20
|
-
from dataeval.
|
21
|
+
from dataeval.utils.tensorflow._internal.gmm import gmm_energy, gmm_params
|
21
22
|
|
22
23
|
|
23
24
|
class Elbo:
|
@@ -31,7 +32,7 @@ class Elbo:
|
|
31
32
|
Parameters
|
32
33
|
----------
|
33
34
|
cov_type : Union[Literal["cov_full", "cov_diag"], float], default 1.0
|
34
|
-
Full covariance matrix, diagonal variance matrix, or scale identity multiplier.
|
35
|
+
Full covariance matrix, diagonal :term:`variance<Variance>` matrix, or scale identity multiplier.
|
35
36
|
x : ArrayLike, optional - default None
|
36
37
|
Dataset used to calculate the covariance matrix. Required for full and diagonal covariance matrix types.
|
37
38
|
"""
|
@@ -39,26 +40,26 @@ class Elbo:
|
|
39
40
|
def __init__(
|
40
41
|
self,
|
41
42
|
cov_type: Literal["cov_full", "cov_diag"] | float = 1.0,
|
42
|
-
x: tf.Tensor | NDArray | None = None,
|
43
|
+
x: tf.Tensor | NDArray[np.float32] | None = None,
|
43
44
|
):
|
44
45
|
if isinstance(cov_type, float):
|
45
|
-
self.
|
46
|
+
self._cov = ("sim", cov_type)
|
46
47
|
elif cov_type in ["cov_full", "cov_diag"]:
|
47
|
-
x_np: NDArray = x.numpy() if tf.is_tensor(x) else x # type: ignore
|
48
|
+
x_np: NDArray[np.float32] = x.numpy().astype(np.float32) if tf.is_tensor(x) else x # type: ignore
|
48
49
|
cov = covariance(x_np.reshape(x_np.shape[0], -1)) # type: ignore py38
|
49
50
|
if cov_type == "cov_diag": # infer standard deviation from covariance matrix
|
50
51
|
cov = tf.math.sqrt(tf.linalg.diag_part(cov))
|
51
|
-
self.
|
52
|
+
self._cov = (cov_type, cov)
|
52
53
|
else:
|
53
54
|
raise ValueError("Only cov_full, cov_diag or sim value should be specified.")
|
54
55
|
|
55
56
|
def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
|
56
57
|
y_pred_flat = cast(tf.Tensor, Flatten()(y_pred))
|
57
58
|
|
58
|
-
if self.
|
59
|
-
y_mn = MultivariateNormalTriL(y_pred_flat, scale_tril=tf.linalg.cholesky(self.
|
59
|
+
if self._cov[0] == "cov_full":
|
60
|
+
y_mn = MultivariateNormalTriL(y_pred_flat, scale_tril=tf.linalg.cholesky(self._cov[1]))
|
60
61
|
else: # cov_diag and sim
|
61
|
-
cov_diag = self.
|
62
|
+
cov_diag = self._cov[1] if self._cov[0] == "cov_diag" else self._cov[1] * tf.ones(y_pred_flat.shape[-1])
|
62
63
|
y_mn = MultivariateNormalDiag(y_pred_flat, scale_diag=cov_diag)
|
63
64
|
|
64
65
|
loss = -tf.reduce_mean(y_mn.log_prob(Flatten()(y_true)))
|
@@ -34,13 +34,9 @@ from tensorflow_probability.python.internal import (
|
|
34
34
|
tensorshape_util,
|
35
35
|
)
|
36
36
|
|
37
|
-
__all__ = [
|
38
|
-
"Shift",
|
39
|
-
]
|
40
|
-
|
41
37
|
|
42
38
|
class WeightNorm(keras.layers.Wrapper):
|
43
|
-
def __init__(self, layer, data_init: bool = True, **kwargs):
|
39
|
+
def __init__(self, layer, data_init: bool = True, **kwargs) -> None:
|
44
40
|
"""Layer wrapper to decouple magnitude and direction of the layer's weights.
|
45
41
|
|
46
42
|
This wrapper reparameterizes a layer by decoupling the weight's
|
@@ -187,7 +183,7 @@ class WeightNorm(keras.layers.Wrapper):
|
|
187
183
|
|
188
184
|
|
189
185
|
class Shift(bijector.Bijector):
|
190
|
-
def __init__(self, shift, validate_args=False, name="shift"):
|
186
|
+
def __init__(self, shift, validate_args=False, name="shift") -> None:
|
191
187
|
"""Instantiates the `Shift` bijector which computes `Y = g(X; shift) = X + shift`
|
192
188
|
where `shift` is a numeric `Tensor`.
|
193
189
|
|
@@ -276,13 +272,13 @@ class PixelCNN(distribution.Distribution):
|
|
276
272
|
|
277
273
|
def __init__(
|
278
274
|
self,
|
279
|
-
image_shape: tuple,
|
280
|
-
conditional_shape: tuple | None = None,
|
275
|
+
image_shape: tuple[int, int, int],
|
276
|
+
conditional_shape: tuple[int, ...] | None = None,
|
281
277
|
num_resnet: int = 5,
|
282
278
|
num_hierarchies: int = 3,
|
283
279
|
num_filters: int = 160,
|
284
280
|
num_logistic_mix: int = 10,
|
285
|
-
receptive_field_dims: tuple = (3, 3),
|
281
|
+
receptive_field_dims: tuple[int, int] = (3, 3),
|
286
282
|
dropout_p: float = 0.5,
|
287
283
|
resnet_activation: str = "concat_elu",
|
288
284
|
l2_weight: float = 0.0,
|
@@ -290,7 +286,7 @@ class PixelCNN(distribution.Distribution):
|
|
290
286
|
use_data_init: bool = True,
|
291
287
|
high: int = 255,
|
292
288
|
low: int = 0,
|
293
|
-
dtype=tf.float32,
|
289
|
+
dtype: tf.DType = tf.float32,
|
294
290
|
) -> None:
|
295
291
|
parameters = dict(locals())
|
296
292
|
with tf.name_scope("PixelCNN") as name:
|
@@ -315,7 +311,7 @@ class PixelCNN(distribution.Distribution):
|
|
315
311
|
self._high = tf.cast(high, self.dtype)
|
316
312
|
self._low = tf.cast(low, self.dtype)
|
317
313
|
self._num_logistic_mix = num_logistic_mix
|
318
|
-
self.
|
314
|
+
self._network = PixelCNNNetwork(
|
319
315
|
dropout_p=dropout_p,
|
320
316
|
num_resnet=num_resnet,
|
321
317
|
num_hierarchies=num_hierarchies,
|
@@ -338,7 +334,7 @@ class PixelCNN(distribution.Distribution):
|
|
338
334
|
|
339
335
|
self.image_shape = image_shape
|
340
336
|
self.conditional_shape = conditional_shape
|
341
|
-
self.
|
337
|
+
self._network.build(input_shape)
|
342
338
|
|
343
339
|
def _make_mixture_dist(self, component_logits, locs, scales, return_per_feature: bool = False):
|
344
340
|
"""Builds a mixture of quantized logistic distributions.
|
@@ -400,7 +396,7 @@ class PixelCNN(distribution.Distribution):
|
|
400
396
|
Parameters
|
401
397
|
----------
|
402
398
|
value
|
403
|
-
`Tensor` or
|
399
|
+
`Tensor` or :term:`NumPy` array of image data. May have leading batch
|
404
400
|
dimension(s), which must broadcast to the leading batch dimensions of
|
405
401
|
`conditional_input`.
|
406
402
|
conditional_input
|
@@ -455,7 +451,7 @@ class PixelCNN(distribution.Distribution):
|
|
455
451
|
transformed_value = (2.0 * (value - self._low) / (self._high - self._low)) - 1.0
|
456
452
|
inputs = transformed_value if conditional_input is None else [transformed_value, conditional_input]
|
457
453
|
|
458
|
-
params = self.
|
454
|
+
params = self._network(inputs, training=training)
|
459
455
|
|
460
456
|
num_channels = self.event_shape[-1]
|
461
457
|
if num_channels == 1:
|
@@ -554,7 +550,7 @@ class PixelCNN(distribution.Distribution):
|
|
554
550
|
seed=seed,
|
555
551
|
)
|
556
552
|
inputs = samples_0 if conditional_input is None else [samples_0, h]
|
557
|
-
params_0 = self.
|
553
|
+
params_0 = self._network(inputs, training=training)
|
558
554
|
samples_0 = self._sample_channels(*params_0, seed=seed)
|
559
555
|
|
560
556
|
image_height, image_width, _ = tensorshape_util.as_list(self.event_shape)
|
@@ -579,7 +575,7 @@ class PixelCNN(distribution.Distribution):
|
|
579
575
|
width, num_channels]`.
|
580
576
|
"""
|
581
577
|
inputs = samples if conditional_input is None else [samples, h]
|
582
|
-
params = self.
|
578
|
+
params = self._network(inputs, training=training)
|
583
579
|
samples_new = self._sample_channels(*params, seed=seed)
|
584
580
|
|
585
581
|
# Update the current pixel
|
@@ -673,7 +669,7 @@ class PixelCNN(distribution.Distribution):
|
|
673
669
|
return tf.TensorShape(self.image_shape)
|
674
670
|
|
675
671
|
|
676
|
-
class
|
672
|
+
class PixelCNNNetwork(keras.layers.Layer):
|
677
673
|
"""Keras `Layer` to parameterize a Pixel CNN++ distribution.
|
678
674
|
This is a Keras implementation of the Pixel CNN++ network, as described in
|
679
675
|
Salimans et al. (2017)[1] and van den Oord et al. (2016)[2].
|
@@ -699,14 +695,14 @@ class _PixelCNNNetwork(keras.layers.Layer):
|
|
699
695
|
num_hierarchies: int = 3,
|
700
696
|
num_filters: int = 160,
|
701
697
|
num_logistic_mix: int = 10,
|
702
|
-
receptive_field_dims: tuple = (3, 3),
|
698
|
+
receptive_field_dims: tuple[int, int] = (3, 3),
|
703
699
|
resnet_activation: str = "concat_elu",
|
704
700
|
l2_weight: float = 0.0,
|
705
701
|
use_weight_norm: bool = True,
|
706
702
|
use_data_init: bool = True,
|
707
|
-
dtype=tf.float32,
|
703
|
+
dtype: tf.DType = tf.float32,
|
708
704
|
) -> None:
|
709
|
-
"""Initialize the neural network for the Pixel CNN++ distribution.
|
705
|
+
"""Initialize the :term:`neural network<Neural Network>` for the Pixel CNN++ distribution.
|
710
706
|
|
711
707
|
Parameters
|
712
708
|
----------
|
@@ -765,7 +761,7 @@ class _PixelCNNNetwork(keras.layers.Layer):
|
|
765
761
|
else:
|
766
762
|
self._layer_wrapper = lambda layer: layer
|
767
763
|
|
768
|
-
def build(self, input_shape):
|
764
|
+
def build(self, input_shape: tuple[int, ...]) -> None:
|
769
765
|
dtype = self.dtype
|
770
766
|
if len(input_shape) == 2:
|
771
767
|
batch_image_shape, batch_conditional_shape = input_shape
|
@@ -1040,7 +1036,7 @@ class _PixelCNNNetwork(keras.layers.Layer):
|
|
1040
1036
|
self._network = keras.Model(inputs=inputs, outputs=outputs)
|
1041
1037
|
super().build(input_shape)
|
1042
1038
|
|
1043
|
-
def call(self, inputs, training=None):
|
1039
|
+
def call(self, inputs: tf.Tensor, training: bool | None = None, mask: tf.Tensor | None = None) -> tf.Tensor:
|
1044
1040
|
"""Call the Pixel CNN network model.
|
1045
1041
|
|
1046
1042
|
Parameters
|
@@ -60,7 +60,9 @@ def trainer(
|
|
60
60
|
loss_fn = loss_fn() if isinstance(loss_fn, type) else loss_fn
|
61
61
|
optimizer = optimizer() if isinstance(optimizer, type) else optimizer
|
62
62
|
|
63
|
-
train_data =
|
63
|
+
train_data = (
|
64
|
+
x_train.astype(np.float32) if y_train is None else (x_train.astype(np.float32), y_train.astype(np.float32))
|
65
|
+
)
|
64
66
|
dataset = tf.data.Dataset.from_tensor_slices(train_data)
|
65
67
|
dataset = dataset.shuffle(buffer_size=buffer_size).batch(batch_size)
|
66
68
|
n_minibatch = len(dataset)
|
@@ -9,7 +9,7 @@ Licensed under Apache Software License (Apache 2.0)
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
11
|
import math
|
12
|
-
from typing import Callable, Union, cast
|
12
|
+
from typing import Any, Callable, Literal, Union, cast
|
13
13
|
|
14
14
|
import numpy as np
|
15
15
|
import tensorflow as tf
|
@@ -26,8 +26,8 @@ from tf_keras.layers import (
|
|
26
26
|
Reshape,
|
27
27
|
)
|
28
28
|
|
29
|
-
from dataeval.
|
30
|
-
from dataeval.
|
29
|
+
from dataeval.utils.tensorflow._internal.autoencoder import AE, AEGMM, VAE, VAEGMM
|
30
|
+
from dataeval.utils.tensorflow._internal.pixelcnn import PixelCNN
|
31
31
|
|
32
32
|
|
33
33
|
def predict_batch(
|
@@ -55,7 +55,7 @@ def predict_batch(
|
|
55
55
|
|
56
56
|
Returns
|
57
57
|
-------
|
58
|
-
|
58
|
+
:term:`NumPy` array, tensorflow tensor or tuples of those with model outputs.
|
59
59
|
"""
|
60
60
|
n = len(x)
|
61
61
|
n_minibatch = int(np.ceil(n / batch_size))
|
@@ -95,7 +95,7 @@ def predict_batch(
|
|
95
95
|
return out
|
96
96
|
|
97
97
|
|
98
|
-
def
|
98
|
+
def get_default_encoder_net(input_shape: tuple[int, int, int], encoding_dim: int):
|
99
99
|
return Sequential(
|
100
100
|
[
|
101
101
|
InputLayer(input_shape=input_shape),
|
@@ -108,7 +108,7 @@ def _get_default_encoder_net(input_shape: tuple[int, int, int], encoding_dim: in
|
|
108
108
|
)
|
109
109
|
|
110
110
|
|
111
|
-
def
|
111
|
+
def get_default_decoder_net(input_shape: tuple[int, int, int], encoding_dim: int):
|
112
112
|
return Sequential(
|
113
113
|
[
|
114
114
|
InputLayer(input_shape=(encoding_dim,)),
|
@@ -124,18 +124,18 @@ def _get_default_decoder_net(input_shape: tuple[int, int, int], encoding_dim: in
|
|
124
124
|
|
125
125
|
|
126
126
|
def create_model(
|
127
|
-
model_type: AE
|
127
|
+
model_type: Literal["AE", "AEGMM", "PixelCNN", "VAE", "VAEGMM"],
|
128
128
|
input_shape: tuple[int, int, int],
|
129
129
|
encoding_dim: int | None = None,
|
130
130
|
n_gmm: int | None = None,
|
131
131
|
gmm_latent_dim: int | None = None,
|
132
|
-
):
|
132
|
+
) -> Any:
|
133
133
|
"""
|
134
134
|
Create a default model for the specified model type.
|
135
135
|
|
136
136
|
Parameters
|
137
137
|
----------
|
138
|
-
model_type :
|
138
|
+
model_type : Literal["AE", "AEGMM", "PixelCNN", "VAE", "VAEGMM"]
|
139
139
|
The model type to create.
|
140
140
|
input_shape : Tuple[int, int, int]
|
141
141
|
The input shape of the data used.
|
@@ -148,20 +148,20 @@ def create_model(
|
|
148
148
|
"""
|
149
149
|
input_dim = math.prod(input_shape)
|
150
150
|
encoding_dim = int(math.pow(2, int(input_dim.bit_length() * 0.8)) if encoding_dim is None else encoding_dim)
|
151
|
-
if model_type == AE:
|
151
|
+
if model_type == "AE":
|
152
152
|
return AE(
|
153
|
-
|
154
|
-
|
153
|
+
get_default_encoder_net(input_shape, encoding_dim),
|
154
|
+
get_default_decoder_net(input_shape, encoding_dim),
|
155
155
|
)
|
156
156
|
|
157
|
-
if model_type == VAE:
|
157
|
+
if model_type == "VAE":
|
158
158
|
return VAE(
|
159
|
-
|
160
|
-
|
159
|
+
get_default_encoder_net(input_shape, encoding_dim),
|
160
|
+
get_default_decoder_net(input_shape, encoding_dim),
|
161
161
|
encoding_dim,
|
162
162
|
)
|
163
163
|
|
164
|
-
if model_type == AEGMM:
|
164
|
+
if model_type == "AEGMM":
|
165
165
|
n_gmm = 2 if n_gmm is None else n_gmm
|
166
166
|
gmm_latent_dim = 1 if gmm_latent_dim is None else gmm_latent_dim
|
167
167
|
# The outlier detector is an encoder/decoder architecture
|
@@ -201,7 +201,7 @@ def create_model(
|
|
201
201
|
n_gmm=n_gmm,
|
202
202
|
)
|
203
203
|
|
204
|
-
if model_type == VAEGMM:
|
204
|
+
if model_type == "VAEGMM":
|
205
205
|
n_gmm = 2 if n_gmm is None else n_gmm
|
206
206
|
gmm_latent_dim = 2 if gmm_latent_dim is None else gmm_latent_dim
|
207
207
|
# The outlier detector is an encoder/decoder architecture
|
@@ -242,7 +242,7 @@ def create_model(
|
|
242
242
|
latent_dim=gmm_latent_dim,
|
243
243
|
)
|
244
244
|
|
245
|
-
if model_type == PixelCNN:
|
245
|
+
if model_type == "PixelCNN":
|
246
246
|
return PixelCNN(
|
247
247
|
image_shape=input_shape,
|
248
248
|
num_resnet=5,
|
@@ -1,7 +1,11 @@
|
|
1
1
|
from dataeval import _IS_TENSORFLOW_AVAILABLE
|
2
|
-
from dataeval._internal.models.tensorflow.losses import Elbo, LossGMM
|
3
2
|
|
4
3
|
__all__ = []
|
5
4
|
|
5
|
+
|
6
6
|
if _IS_TENSORFLOW_AVAILABLE:
|
7
|
-
|
7
|
+
from dataeval.utils.tensorflow._internal.loss import Elbo, LossGMM
|
8
|
+
|
9
|
+
__all__ = ["Elbo", "LossGMM"]
|
10
|
+
|
11
|
+
del _IS_TENSORFLOW_AVAILABLE
|
dataeval/utils/torch/__init__.py
CHANGED
@@ -6,16 +6,20 @@ to create a seamless integration between custom models and DataEval's metrics.
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
from dataeval import _IS_TORCH_AVAILABLE, _IS_TORCHVISION_AVAILABLE
|
9
|
-
from dataeval._internal.utils import read_dataset
|
10
9
|
|
11
10
|
__all__ = []
|
12
11
|
|
13
12
|
if _IS_TORCH_AVAILABLE:
|
14
|
-
from . import models, trainer
|
13
|
+
from dataeval.utils.torch import models, trainer
|
14
|
+
from dataeval.utils.torch.utils import read_dataset
|
15
15
|
|
16
16
|
__all__ += ["read_dataset", "models", "trainer"]
|
17
17
|
|
18
18
|
if _IS_TORCHVISION_AVAILABLE:
|
19
|
-
from . import datasets
|
19
|
+
from dataeval.utils.torch import datasets
|
20
20
|
|
21
21
|
__all__ += ["datasets"]
|
22
|
+
|
23
|
+
|
24
|
+
del _IS_TORCH_AVAILABLE
|
25
|
+
del _IS_TORCHVISION_AVAILABLE
|