dataeval 0.64.0__py3-none-any.whl → 0.66.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +13 -9
- dataeval/_internal/detectors/clusterer.py +63 -49
- dataeval/_internal/detectors/drift/base.py +248 -51
- dataeval/_internal/detectors/drift/cvm.py +28 -26
- dataeval/_internal/detectors/drift/ks.py +31 -28
- dataeval/_internal/detectors/drift/mmd.py +62 -42
- dataeval/_internal/detectors/drift/torch.py +69 -60
- dataeval/_internal/detectors/drift/uncertainty.py +32 -32
- dataeval/_internal/detectors/duplicates.py +67 -31
- dataeval/_internal/detectors/ood/ae.py +15 -29
- dataeval/_internal/detectors/ood/aegmm.py +33 -27
- dataeval/_internal/detectors/ood/base.py +86 -47
- dataeval/_internal/detectors/ood/llr.py +34 -31
- dataeval/_internal/detectors/ood/vae.py +32 -31
- dataeval/_internal/detectors/ood/vaegmm.py +34 -28
- dataeval/_internal/detectors/{linter.py → outliers.py} +60 -38
- dataeval/_internal/flags.py +44 -21
- dataeval/_internal/interop.py +5 -3
- dataeval/_internal/metrics/balance.py +42 -5
- dataeval/_internal/metrics/ber.py +11 -8
- dataeval/_internal/metrics/coverage.py +15 -8
- dataeval/_internal/metrics/divergence.py +41 -7
- dataeval/_internal/metrics/diversity.py +57 -19
- dataeval/_internal/metrics/parity.py +141 -66
- dataeval/_internal/metrics/stats.py +330 -313
- dataeval/_internal/metrics/uap.py +33 -4
- dataeval/_internal/metrics/utils.py +79 -40
- dataeval/_internal/models/pytorch/autoencoder.py +127 -22
- dataeval/_internal/models/tensorflow/autoencoder.py +33 -30
- dataeval/_internal/models/tensorflow/gmm.py +4 -2
- dataeval/_internal/models/tensorflow/losses.py +17 -13
- dataeval/_internal/models/tensorflow/pixelcnn.py +19 -18
- dataeval/_internal/models/tensorflow/trainer.py +10 -7
- dataeval/_internal/models/tensorflow/utils.py +23 -20
- dataeval/_internal/output.py +85 -0
- dataeval/_internal/utils.py +5 -3
- dataeval/_internal/workflows/sufficiency.py +122 -121
- dataeval/detectors/__init__.py +6 -25
- dataeval/detectors/drift/__init__.py +16 -0
- dataeval/detectors/drift/kernels/__init__.py +6 -0
- dataeval/detectors/drift/updates/__init__.py +3 -0
- dataeval/detectors/linters/__init__.py +5 -0
- dataeval/detectors/ood/__init__.py +11 -0
- dataeval/flags/__init__.py +2 -2
- dataeval/metrics/__init__.py +2 -26
- dataeval/metrics/bias/__init__.py +14 -0
- dataeval/metrics/estimators/__init__.py +9 -0
- dataeval/metrics/stats/__init__.py +6 -0
- dataeval/tensorflow/__init__.py +3 -0
- dataeval/tensorflow/loss/__init__.py +3 -0
- dataeval/tensorflow/models/__init__.py +5 -0
- dataeval/tensorflow/recon/__init__.py +3 -0
- dataeval/torch/__init__.py +3 -0
- dataeval/{models/torch → torch/models}/__init__.py +1 -2
- dataeval/torch/trainer/__init__.py +3 -0
- dataeval/utils/__init__.py +3 -6
- dataeval/workflows/__init__.py +2 -4
- {dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/METADATA +1 -1
- dataeval-0.66.0.dist-info/RECORD +72 -0
- dataeval/_internal/metrics/base.py +0 -10
- dataeval/models/__init__.py +0 -15
- dataeval/models/tensorflow/__init__.py +0 -6
- dataeval-0.64.0.dist-info/RECORD +0 -60
- {dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/WHEEL +0 -0
@@ -8,7 +8,9 @@ Licensed under Apache Software License (Apache 2.0)
|
|
8
8
|
|
9
9
|
# pyright: reportIncompatibleMethodOverride=false
|
10
10
|
|
11
|
-
from
|
11
|
+
from __future__ import annotations
|
12
|
+
|
13
|
+
from typing import Callable, cast
|
12
14
|
|
13
15
|
import keras
|
14
16
|
import tensorflow as tf
|
@@ -56,16 +58,17 @@ def eucl_cosim_features(x: tf.Tensor, y: tf.Tensor, max_eucl: float = 1e2) -> tf
|
|
56
58
|
|
57
59
|
Parameters
|
58
60
|
----------
|
59
|
-
x
|
61
|
+
x : tf.Tensor
|
60
62
|
Tensor used in feature computation.
|
61
|
-
y
|
63
|
+
y : tf.Tensor
|
62
64
|
Tensor used in feature computation.
|
63
|
-
max_eucl
|
65
|
+
max_eucl : float, default 1e2
|
64
66
|
Maximum value to clip relative Euclidean distance by.
|
65
67
|
|
66
68
|
Returns
|
67
69
|
-------
|
68
|
-
Tensor
|
70
|
+
tf.Tensor
|
71
|
+
Tensor concatenating the relative Euclidean distance and cosine similarity features.
|
69
72
|
"""
|
70
73
|
if len(x.shape) > 2 or len(y.shape) > 2:
|
71
74
|
x = cast(tf.Tensor, Flatten()(x))
|
@@ -78,9 +81,9 @@ def eucl_cosim_features(x: tf.Tensor, y: tf.Tensor, max_eucl: float = 1e2) -> tf
|
|
78
81
|
|
79
82
|
|
80
83
|
class Sampling(Layer):
|
81
|
-
"""Reparametrization trick
|
84
|
+
"""Reparametrization trick - Uses (z_mean, z_log_var) to sample the latent vector z."""
|
82
85
|
|
83
|
-
def call(self, inputs:
|
86
|
+
def call(self, inputs: tuple[tf.Tensor, tf.Tensor]) -> tf.Tensor:
|
84
87
|
"""
|
85
88
|
Sample z.
|
86
89
|
|
@@ -138,7 +141,7 @@ class EncoderVAE(Layer):
|
|
138
141
|
self.fc_log_var = Dense(latent_dim, activation=None)
|
139
142
|
self.sampling = Sampling()
|
140
143
|
|
141
|
-
def call(self, x: tf.Tensor) ->
|
144
|
+
def call(self, x: tf.Tensor) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
|
142
145
|
x = cast(tf.Tensor, self.encoder_net(x))
|
143
146
|
if len(x.shape) > 2:
|
144
147
|
x = cast(tf.Tensor, Flatten()(x))
|
@@ -173,9 +176,9 @@ class AE(keras.Model):
|
|
173
176
|
|
174
177
|
Parameters
|
175
178
|
----------
|
176
|
-
encoder_net
|
179
|
+
encoder_net : keras.Model
|
177
180
|
Layers for the encoder wrapped in a keras.Sequential class.
|
178
|
-
decoder_net
|
181
|
+
decoder_net : keras.Model
|
179
182
|
Layers for the decoder wrapped in a keras.Sequential class.
|
180
183
|
"""
|
181
184
|
|
@@ -196,13 +199,13 @@ class VAE(keras.Model):
|
|
196
199
|
|
197
200
|
Parameters
|
198
201
|
----------
|
199
|
-
encoder_net
|
202
|
+
encoder_net : keras.Model
|
200
203
|
Layers for the encoder wrapped in a keras.Sequential class.
|
201
|
-
decoder_net
|
204
|
+
decoder_net : keras.Model
|
202
205
|
Layers for the decoder wrapped in a keras.Sequential class.
|
203
|
-
latent_dim
|
206
|
+
latent_dim : int
|
204
207
|
Dimensionality of the latent space.
|
205
|
-
beta
|
208
|
+
beta : float, default 1.0
|
206
209
|
Beta parameter for KL-divergence loss term.
|
207
210
|
"""
|
208
211
|
|
@@ -214,7 +217,7 @@ class VAE(keras.Model):
|
|
214
217
|
self.latent_dim = latent_dim
|
215
218
|
|
216
219
|
def call(self, x: tf.Tensor) -> tf.Tensor:
|
217
|
-
z_mean, z_log_var, z = cast(
|
220
|
+
z_mean, z_log_var, z = cast(tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.encoder(x))
|
218
221
|
x_recon = self.decoder(z)
|
219
222
|
# add KL divergence loss term
|
220
223
|
kl_loss = -0.5 * tf.reduce_mean(z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1)
|
@@ -228,15 +231,15 @@ class AEGMM(keras.Model):
|
|
228
231
|
|
229
232
|
Parameters
|
230
233
|
----------
|
231
|
-
encoder_net
|
234
|
+
encoder_net : keras.Model
|
232
235
|
Layers for the encoder wrapped in a keras.Sequential class.
|
233
|
-
decoder_net
|
236
|
+
decoder_net : keras.Model
|
234
237
|
Layers for the decoder wrapped in a keras.Sequential class.
|
235
|
-
gmm_density_net
|
238
|
+
gmm_density_net : keras.Model
|
236
239
|
Layers for the GMM network wrapped in a keras.Sequential class.
|
237
|
-
n_gmm
|
240
|
+
n_gmm : int
|
238
241
|
Number of components in GMM.
|
239
|
-
recon_features
|
242
|
+
recon_features : Callable, default eucl_cosim_features
|
240
243
|
Function to extract features from the reconstructed instance by the decoder.
|
241
244
|
"""
|
242
245
|
|
@@ -255,7 +258,7 @@ class AEGMM(keras.Model):
|
|
255
258
|
self.n_gmm = n_gmm
|
256
259
|
self.recon_features = recon_features
|
257
260
|
|
258
|
-
def call(self, x: tf.Tensor) ->
|
261
|
+
def call(self, x: tf.Tensor) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
|
259
262
|
enc = self.encoder(x)
|
260
263
|
x_recon = cast(tf.Tensor, self.decoder(enc))
|
261
264
|
recon_features = self.recon_features(x, x_recon)
|
@@ -270,19 +273,19 @@ class VAEGMM(keras.Model):
|
|
270
273
|
|
271
274
|
Parameters
|
272
275
|
----------
|
273
|
-
encoder_net
|
276
|
+
encoder_net : keras.Model
|
274
277
|
Layers for the encoder wrapped in a keras.Sequential class.
|
275
|
-
decoder_net
|
278
|
+
decoder_net : keras.Model
|
276
279
|
Layers for the decoder wrapped in a keras.Sequential class.
|
277
|
-
gmm_density_net
|
280
|
+
gmm_density_net : keras.Model
|
278
281
|
Layers for the GMM network wrapped in a keras.Sequential class.
|
279
|
-
n_gmm
|
282
|
+
n_gmm : int
|
280
283
|
Number of components in GMM.
|
281
|
-
latent_dim
|
284
|
+
latent_dim : int
|
282
285
|
Dimensionality of the latent space.
|
283
|
-
recon_features
|
286
|
+
recon_features : Callable, default eucl_cosim_features
|
284
287
|
Function to extract features from the reconstructed instance by the decoder.
|
285
|
-
beta
|
288
|
+
beta : float, default 1.0
|
286
289
|
Beta parameter for KL-divergence loss term.
|
287
290
|
"""
|
288
291
|
|
@@ -305,8 +308,8 @@ class VAEGMM(keras.Model):
|
|
305
308
|
self.recon_features = recon_features
|
306
309
|
self.beta = beta
|
307
310
|
|
308
|
-
def call(self, x: tf.Tensor) ->
|
309
|
-
enc_mean, enc_log_var, enc = cast(
|
311
|
+
def call(self, x: tf.Tensor) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
|
312
|
+
enc_mean, enc_log_var, enc = cast(tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.encoder(x))
|
310
313
|
x_recon = cast(tf.Tensor, self.decoder(enc))
|
311
314
|
recon_features = self.recon_features(x, x_recon)
|
312
315
|
z = cast(tf.Tensor, tf.concat([enc, recon_features], -1))
|
@@ -6,7 +6,9 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
|
|
6
6
|
Licensed under Apache Software License (Apache 2.0)
|
7
7
|
"""
|
8
8
|
|
9
|
-
from
|
9
|
+
from __future__ import annotations
|
10
|
+
|
11
|
+
from typing import NamedTuple
|
10
12
|
|
11
13
|
import numpy as np
|
12
14
|
import tensorflow as tf
|
@@ -75,7 +77,7 @@ def gmm_energy(
|
|
75
77
|
z: tf.Tensor,
|
76
78
|
params: GaussianMixtureModelParams,
|
77
79
|
return_mean: bool = True,
|
78
|
-
) ->
|
80
|
+
) -> tuple[tf.Tensor, tf.Tensor]:
|
79
81
|
"""
|
80
82
|
Compute sample energy from Gaussian Mixture Model.
|
81
83
|
|
@@ -6,11 +6,13 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
|
|
6
6
|
Licensed under Apache Software License (Apache 2.0)
|
7
7
|
"""
|
8
8
|
|
9
|
-
from
|
9
|
+
from __future__ import annotations
|
10
|
+
|
11
|
+
from typing import Literal, cast
|
10
12
|
|
11
|
-
import numpy as np
|
12
13
|
import tensorflow as tf
|
13
14
|
from keras.layers import Flatten
|
15
|
+
from numpy.typing import NDArray
|
14
16
|
from tensorflow_probability.python.distributions.mvn_diag import MultivariateNormalDiag
|
15
17
|
from tensorflow_probability.python.distributions.mvn_tril import MultivariateNormalTriL
|
16
18
|
from tensorflow_probability.python.stats import covariance
|
@@ -20,27 +22,29 @@ from dataeval._internal.models.tensorflow.gmm import gmm_energy, gmm_params
|
|
20
22
|
|
21
23
|
class Elbo:
|
22
24
|
"""
|
23
|
-
Compute ELBO loss.
|
25
|
+
Compute ELBO loss.
|
26
|
+
|
27
|
+
The covariance matrix can be specified by passing the full covariance matrix, the matrix
|
24
28
|
diagonal, or a scale identity multiplier. Only one of these should be specified. If none are specified, the
|
25
29
|
identity matrix is used.
|
26
30
|
|
27
31
|
Parameters
|
28
32
|
----------
|
29
|
-
cov_type
|
33
|
+
cov_type : Union[Literal["cov_full", "cov_diag"], float], default 1.0
|
30
34
|
Full covariance matrix, diagonal variance matrix, or scale identity multiplier.
|
31
|
-
x
|
35
|
+
x : ArrayLike, optional - default None
|
32
36
|
Dataset used to calculate the covariance matrix. Required for full and diagonal covariance matrix types.
|
33
37
|
"""
|
34
38
|
|
35
39
|
def __init__(
|
36
40
|
self,
|
37
|
-
cov_type:
|
38
|
-
x:
|
41
|
+
cov_type: Literal["cov_full", "cov_diag"] | float = 1.0,
|
42
|
+
x: tf.Tensor | NDArray | None = None,
|
39
43
|
):
|
40
44
|
if isinstance(cov_type, float):
|
41
45
|
self.cov = ("sim", cov_type)
|
42
46
|
elif cov_type in ["cov_full", "cov_diag"]:
|
43
|
-
x_np:
|
47
|
+
x_np: NDArray = x.numpy() if tf.is_tensor(x) else x # type: ignore
|
44
48
|
cov = covariance(x_np.reshape(x_np.shape[0], -1)) # type: ignore py38
|
45
49
|
if cov_type == "cov_diag": # infer standard deviation from covariance matrix
|
46
50
|
cov = tf.math.sqrt(tf.linalg.diag_part(cov))
|
@@ -67,13 +71,13 @@ class LossGMM:
|
|
67
71
|
|
68
72
|
Parameters
|
69
73
|
----------
|
70
|
-
w_recon
|
74
|
+
w_recon : float, default 1e-7
|
71
75
|
Weight on elbo loss term.
|
72
|
-
w_energy
|
76
|
+
w_energy : float, default 0.1
|
73
77
|
Weight on sample energy loss term.
|
74
|
-
w_cov_diag
|
78
|
+
w_cov_diag : float, default 0.005
|
75
79
|
Weight on covariance regularizing loss term.
|
76
|
-
elbo
|
80
|
+
elbo : Elbo, optional - default None
|
77
81
|
ELBO loss function used to calculate w_recon.
|
78
82
|
"""
|
79
83
|
|
@@ -82,7 +86,7 @@ class LossGMM:
|
|
82
86
|
w_recon: float = 1e-7,
|
83
87
|
w_energy: float = 0.1,
|
84
88
|
w_cov_diag: float = 0.005,
|
85
|
-
elbo:
|
89
|
+
elbo: Elbo | None = None,
|
86
90
|
):
|
87
91
|
self.w_recon = w_recon
|
88
92
|
self.w_energy = w_energy
|
@@ -8,9 +8,10 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
|
|
8
8
|
Licensed under Apache Software License (Apache 2.0)
|
9
9
|
"""
|
10
10
|
|
11
|
+
from __future__ import annotations
|
12
|
+
|
11
13
|
import functools
|
12
14
|
import warnings
|
13
|
-
from typing import Optional
|
14
15
|
|
15
16
|
import keras
|
16
17
|
import numpy as np
|
@@ -238,47 +239,47 @@ class PixelCNN(distribution.Distribution):
|
|
238
239
|
|
239
240
|
Parameters
|
240
241
|
----------
|
241
|
-
image_shape
|
242
|
+
image_shape : tuple
|
242
243
|
3D `TensorShape` or tuple for the `[height, width, channels]` dimensions of the image.
|
243
|
-
conditional_shape
|
244
|
+
conditional_shape : tuple, optional - default None
|
244
245
|
`TensorShape` or tuple for the shape of the conditional input, or `None` if there is no conditional input.
|
245
|
-
num_resnet
|
246
|
+
num_resnet : int, default 5
|
246
247
|
The number of layers (shown in Figure 2 of [2]) within each highest-level block of Figure 2 of [1].
|
247
|
-
num_hierarchies
|
248
|
+
num_hierarchies : int, default 3
|
248
249
|
The number of highest-level blocks (separated by expansions/contractions of dimensions in Figure 2 of [1].)
|
249
|
-
num_filters
|
250
|
+
num_filters : int, default 160
|
250
251
|
The number of convolutional filters.
|
251
|
-
num_logistic_mix
|
252
|
+
num_logistic_mix : int, default 10
|
252
253
|
Number of components in the logistic mixture distribution.
|
253
|
-
receptive_field_dims
|
254
|
+
receptive_field_dims tuple, default (3, 3)
|
254
255
|
Height and width in pixels of the receptive field of the convolutional layers above and to the left
|
255
256
|
of a given pixel. The width (second element of the tuple) should be odd. Figure 1 (middle) of [2]
|
256
257
|
shows a receptive field of (3, 5) (the row containing the current pixel is included in the height).
|
257
258
|
The default of (3, 3) was used to produce the results in [1].
|
258
|
-
dropout_p
|
259
|
+
dropout_p : float, default 0.0
|
259
260
|
The dropout probability. Should be between 0 and 1.
|
260
|
-
resnet_activation
|
261
|
+
resnet_activation : str, default "concat_elu"
|
261
262
|
The type of activation to use in the resnet blocks. May be 'concat_elu', 'elu', or 'relu'.
|
262
|
-
l2_weight
|
263
|
+
l2_weight : float, default 0.0
|
263
264
|
The L2 regularization weight.
|
264
|
-
use_weight_norm
|
265
|
+
use_weight_norm : bool, default True
|
265
266
|
If `True` then use weight normalization (works only in Eager mode).
|
266
|
-
use_data_init
|
267
|
+
use_data_init : bool, default True
|
267
268
|
If `True` then use data-dependent initialization (has no effect if `use_weight_norm` is `False`).
|
268
|
-
high
|
269
|
+
high : int, default 255
|
269
270
|
The maximum value of the input data (255 for an 8-bit image).
|
270
|
-
low
|
271
|
+
low : int, default 0
|
271
272
|
The minimum value of the input data.
|
272
|
-
dtype
|
273
|
+
dtype : tensorflow dtype, default tf.float32
|
273
274
|
Data type of the `Distribution`.
|
274
|
-
name
|
275
|
+
name : str, default "PixelCNN"
|
275
276
|
The name of the `Distribution`.
|
276
277
|
"""
|
277
278
|
|
278
279
|
def __init__(
|
279
280
|
self,
|
280
281
|
image_shape: tuple,
|
281
|
-
conditional_shape:
|
282
|
+
conditional_shape: tuple | None = None,
|
282
283
|
num_resnet: int = 5,
|
283
284
|
num_hierarchies: int = 3,
|
284
285
|
num_filters: int = 160,
|
@@ -6,20 +6,23 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
|
|
6
6
|
Licensed under Apache Software License (Apache 2.0)
|
7
7
|
"""
|
8
8
|
|
9
|
-
from
|
9
|
+
from __future__ import annotations
|
10
|
+
|
11
|
+
from typing import Callable, Iterable, cast
|
10
12
|
|
11
13
|
import keras
|
12
14
|
import numpy as np
|
13
15
|
import tensorflow as tf
|
16
|
+
from numpy.typing import NDArray
|
14
17
|
|
15
18
|
|
16
19
|
def trainer(
|
17
20
|
model: keras.Model,
|
18
|
-
x_train:
|
19
|
-
y_train:
|
20
|
-
loss_fn:
|
21
|
+
x_train: NDArray,
|
22
|
+
y_train: NDArray | None = None,
|
23
|
+
loss_fn: Callable[..., tf.Tensor] | None = None,
|
21
24
|
optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
|
22
|
-
preprocess_fn:
|
25
|
+
preprocess_fn: Callable[[tf.Tensor], tf.Tensor] | None = None,
|
23
26
|
epochs: int = 20,
|
24
27
|
reg_loss_fn: Callable[[keras.Model], tf.Tensor] = (lambda _: cast(tf.Tensor, tf.Variable(0, dtype=tf.float32))),
|
25
28
|
batch_size: int = 64,
|
@@ -69,14 +72,14 @@ def trainer(
|
|
69
72
|
dataset.on_epoch_end() # type: ignore py39
|
70
73
|
loss_val_ma = 0.0
|
71
74
|
for step, data in enumerate(dataset):
|
72
|
-
x, y =
|
75
|
+
x, y = data if isinstance(data, tuple) else (data, None)
|
73
76
|
if isinstance(preprocess_fn, Callable):
|
74
77
|
x = preprocess_fn(x)
|
75
78
|
with tf.GradientTape() as tape:
|
76
79
|
y_hat = model(x)
|
77
80
|
y = x if y is None else y
|
78
81
|
if isinstance(loss_fn, Callable):
|
79
|
-
args = [y] + list(y_hat) if isinstance(y_hat,
|
82
|
+
args = [y] + list(y_hat) if isinstance(y_hat, tuple) else [y, y_hat]
|
80
83
|
loss = loss_fn(*args)
|
81
84
|
else:
|
82
85
|
loss = cast(tf.Tensor, tf.constant(0.0, dtype=tf.float32))
|
@@ -6,8 +6,10 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
|
|
6
6
|
Licensed under Apache Software License (Apache 2.0)
|
7
7
|
"""
|
8
8
|
|
9
|
+
from __future__ import annotations
|
10
|
+
|
9
11
|
import math
|
10
|
-
from typing import Callable,
|
12
|
+
from typing import Callable, Union, cast
|
11
13
|
|
12
14
|
import keras as keras
|
13
15
|
import numpy as np
|
@@ -21,6 +23,7 @@ from keras.layers import (
|
|
21
23
|
InputLayer,
|
22
24
|
Reshape,
|
23
25
|
)
|
26
|
+
from numpy.typing import NDArray
|
24
27
|
from tensorflow._api.v2.nn import relu, softmax, tanh
|
25
28
|
|
26
29
|
from dataeval._internal.models.tensorflow.autoencoder import AE, AEGMM, VAE, VAEGMM
|
@@ -28,12 +31,12 @@ from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
|
|
28
31
|
|
29
32
|
|
30
33
|
def predict_batch(
|
31
|
-
x:
|
32
|
-
model:
|
34
|
+
x: list | NDArray | tf.Tensor,
|
35
|
+
model: Callable | keras.Model,
|
33
36
|
batch_size: int = int(1e10),
|
34
|
-
preprocess_fn:
|
35
|
-
dtype:
|
36
|
-
) ->
|
37
|
+
preprocess_fn: Callable | None = None,
|
38
|
+
dtype: type[np.generic] | tf.DType = np.float32,
|
39
|
+
) -> NDArray | tf.Tensor | tuple | list:
|
37
40
|
"""
|
38
41
|
Make batch predictions on a model.
|
39
42
|
|
@@ -58,7 +61,7 @@ def predict_batch(
|
|
58
61
|
n_minibatch = int(np.ceil(n / batch_size))
|
59
62
|
return_np = not isinstance(dtype, tf.DType)
|
60
63
|
return_list = False
|
61
|
-
preds:
|
64
|
+
preds: list | tuple = []
|
62
65
|
for i in range(n_minibatch):
|
63
66
|
istart, istop = i * batch_size, min((i + 1) * batch_size, n)
|
64
67
|
x_batch = x[istart:istop] # type: ignore
|
@@ -80,7 +83,7 @@ def predict_batch(
|
|
80
83
|
else:
|
81
84
|
raise TypeError(
|
82
85
|
f"Model output type {type(preds_tmp)} not supported. The model output "
|
83
|
-
f"type needs to be one of list, tuple,
|
86
|
+
f"type needs to be one of list, tuple, NDArray or tf.Tensor."
|
84
87
|
)
|
85
88
|
concat = np.concatenate if return_np else tf.concat
|
86
89
|
out = cast(
|
@@ -92,7 +95,7 @@ def predict_batch(
|
|
92
95
|
return out
|
93
96
|
|
94
97
|
|
95
|
-
def _get_default_encoder_net(input_shape:
|
98
|
+
def _get_default_encoder_net(input_shape: tuple[int, int, int], encoding_dim: int):
|
96
99
|
return Sequential(
|
97
100
|
[
|
98
101
|
InputLayer(input_shape=input_shape),
|
@@ -105,7 +108,7 @@ def _get_default_encoder_net(input_shape: Tuple[int, int, int], encoding_dim: in
|
|
105
108
|
)
|
106
109
|
|
107
110
|
|
108
|
-
def _get_default_decoder_net(input_shape:
|
111
|
+
def _get_default_decoder_net(input_shape: tuple[int, int, int], encoding_dim: int):
|
109
112
|
return Sequential(
|
110
113
|
[
|
111
114
|
InputLayer(input_shape=(encoding_dim,)),
|
@@ -121,26 +124,26 @@ def _get_default_decoder_net(input_shape: Tuple[int, int, int], encoding_dim: in
|
|
121
124
|
|
122
125
|
|
123
126
|
def create_model(
|
124
|
-
model_type:
|
125
|
-
input_shape:
|
126
|
-
encoding_dim:
|
127
|
-
n_gmm:
|
128
|
-
gmm_latent_dim:
|
127
|
+
model_type: AE | AEGMM | PixelCNN | VAE | VAEGMM,
|
128
|
+
input_shape: tuple[int, int, int],
|
129
|
+
encoding_dim: int | None = None,
|
130
|
+
n_gmm: int | None = None,
|
131
|
+
gmm_latent_dim: int | None = None,
|
129
132
|
):
|
130
133
|
"""
|
131
134
|
Create a default model for the specified model type.
|
132
135
|
|
133
136
|
Parameters
|
134
137
|
----------
|
135
|
-
model_type
|
138
|
+
model_type : Union[AE, AEGMM, PixelCNN, VAE, VAEGMM]
|
136
139
|
The model type to create.
|
137
|
-
input_shape
|
140
|
+
input_shape : Tuple[int, int, int]
|
138
141
|
The input shape of the data used.
|
139
|
-
encoding_dim
|
142
|
+
encoding_dim : int, optional - default None
|
140
143
|
The target encoding dimensionality.
|
141
|
-
n_gmm
|
144
|
+
n_gmm : int, optional - default None
|
142
145
|
Number of components used in the GMM layer.
|
143
|
-
gmm_latent_dim
|
146
|
+
gmm_latent_dim : int, optional - default None
|
144
147
|
Latent dimensionality of the GMM layer.
|
145
148
|
"""
|
146
149
|
input_dim = math.prod(input_shape)
|
@@ -0,0 +1,85 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import inspect
|
4
|
+
from datetime import datetime, timezone
|
5
|
+
from functools import wraps
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from dataeval import __version__
|
10
|
+
|
11
|
+
|
12
|
+
class OutputMetadata:
|
13
|
+
_name: str
|
14
|
+
_execution_time: str
|
15
|
+
_execution_duration: float
|
16
|
+
_arguments: dict[str, str]
|
17
|
+
_state: dict[str, str]
|
18
|
+
_version: str
|
19
|
+
|
20
|
+
def dict(self) -> dict:
|
21
|
+
return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
|
22
|
+
|
23
|
+
def meta(self) -> dict:
|
24
|
+
return {k.removeprefix("_"): v for k, v in self.__dict__.items() if k.startswith("_")}
|
25
|
+
|
26
|
+
|
27
|
+
def set_metadata(module_name: str = "", state_attr: list[str] | None = None):
|
28
|
+
def decorator(fn):
|
29
|
+
@wraps(fn)
|
30
|
+
def wrapper(*args, **kwargs):
|
31
|
+
def fmt(v):
|
32
|
+
if np.isscalar(v):
|
33
|
+
return v
|
34
|
+
if hasattr(v, "shape"):
|
35
|
+
return f"{v.__class__.__name__}: shape={getattr(v, 'shape')}"
|
36
|
+
if hasattr(v, "__len__"):
|
37
|
+
return f"{v.__class__.__name__}: len={len(v)}"
|
38
|
+
return f"{v.__class__.__name__}"
|
39
|
+
|
40
|
+
time = datetime.now(timezone.utc)
|
41
|
+
result = fn(*args, **kwargs)
|
42
|
+
duration = (datetime.now(timezone.utc) - time).total_seconds()
|
43
|
+
fn_params = inspect.signature(fn).parameters
|
44
|
+
# set all params with defaults then update params with mapped arguments and explicit keyword args
|
45
|
+
arguments = {k: None if v.default is inspect.Parameter.empty else v.default for k, v in fn_params.items()}
|
46
|
+
arguments.update(zip(fn_params, args))
|
47
|
+
arguments.update(kwargs)
|
48
|
+
arguments = {k: fmt(v) for k, v in arguments.items()}
|
49
|
+
state = (
|
50
|
+
{k: fmt(getattr(args[0], k)) for k in state_attr if "self" in arguments}
|
51
|
+
if "self" in arguments and state_attr
|
52
|
+
else {}
|
53
|
+
)
|
54
|
+
name = args[0].__class__.__name__ if "self" in arguments else fn.__name__
|
55
|
+
metadata = {
|
56
|
+
"_name": f"{module_name}.{name}",
|
57
|
+
"_execution_time": time,
|
58
|
+
"_execution_duration": duration,
|
59
|
+
"_arguments": {k: v for k, v in arguments.items() if k != "self"},
|
60
|
+
"_state": state,
|
61
|
+
"_version": __version__,
|
62
|
+
}
|
63
|
+
for k, v in metadata.items():
|
64
|
+
object.__setattr__(result, k, v)
|
65
|
+
return result
|
66
|
+
|
67
|
+
return wrapper
|
68
|
+
|
69
|
+
return decorator
|
70
|
+
|
71
|
+
|
72
|
+
def populate_defaults(d: dict, c: type) -> dict:
|
73
|
+
def default(t):
|
74
|
+
t = (
|
75
|
+
t if isinstance(t, str) else t._name if hasattr(t, "_name") else t.__name__
|
76
|
+
).lower() # py3.9 : _name, py3.10 : __name__
|
77
|
+
if t.startswith("dict"):
|
78
|
+
return {}
|
79
|
+
if t.startswith("list"):
|
80
|
+
return []
|
81
|
+
if t.startswith("ndarray"):
|
82
|
+
return np.array([])
|
83
|
+
raise TypeError("Unrecognized annotation type")
|
84
|
+
|
85
|
+
return {k: d[k] if k in d else default(t) for k, t in c.__annotations__.items()}
|
dataeval/_internal/utils.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
from collections import defaultdict
|
2
|
-
from typing import Any
|
4
|
+
from typing import Any
|
3
5
|
|
4
6
|
from torch.utils.data import Dataset
|
5
7
|
|
6
8
|
|
7
|
-
def read_dataset(dataset: Dataset) ->
|
9
|
+
def read_dataset(dataset: Dataset) -> list[list[Any]]:
|
8
10
|
"""
|
9
11
|
Extract information from a dataset at each index into a individual lists of each information position
|
10
12
|
|
@@ -51,7 +53,7 @@ def read_dataset(dataset: Dataset) -> List[List[Any]]:
|
|
51
53
|
True
|
52
54
|
"""
|
53
55
|
|
54
|
-
ddict:
|
56
|
+
ddict: dict[int, list] = defaultdict(list)
|
55
57
|
|
56
58
|
for data in dataset:
|
57
59
|
# Convert to tuple if single return (e.g. images only)
|