dataeval 0.65.0__py3-none-any.whl → 0.66.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +13 -9
- dataeval/_internal/detectors/clusterer.py +24 -22
- dataeval/_internal/detectors/drift/base.py +206 -26
- dataeval/_internal/detectors/drift/cvm.py +25 -23
- dataeval/_internal/detectors/drift/ks.py +28 -25
- dataeval/_internal/detectors/drift/mmd.py +30 -29
- dataeval/_internal/detectors/drift/torch.py +66 -58
- dataeval/_internal/detectors/drift/uncertainty.py +28 -28
- dataeval/_internal/detectors/duplicates.py +28 -18
- dataeval/_internal/detectors/ood/ae.py +15 -29
- dataeval/_internal/detectors/ood/aegmm.py +33 -27
- dataeval/_internal/detectors/ood/base.py +61 -43
- dataeval/_internal/detectors/ood/llr.py +27 -24
- dataeval/_internal/detectors/ood/vae.py +32 -31
- dataeval/_internal/detectors/ood/vaegmm.py +34 -28
- dataeval/_internal/detectors/{linter.py → outliers.py} +33 -27
- dataeval/_internal/flags.py +5 -3
- dataeval/_internal/interop.py +4 -2
- dataeval/_internal/metrics/balance.py +33 -4
- dataeval/_internal/metrics/ber.py +6 -4
- dataeval/_internal/metrics/diversity.py +45 -12
- dataeval/_internal/metrics/parity.py +114 -26
- dataeval/_internal/metrics/stats.py +154 -16
- dataeval/_internal/metrics/uap.py +28 -2
- dataeval/_internal/metrics/utils.py +20 -18
- dataeval/_internal/models/pytorch/autoencoder.py +127 -22
- dataeval/_internal/models/tensorflow/autoencoder.py +33 -30
- dataeval/_internal/models/tensorflow/gmm.py +4 -2
- dataeval/_internal/models/tensorflow/losses.py +15 -11
- dataeval/_internal/models/tensorflow/pixelcnn.py +19 -18
- dataeval/_internal/models/tensorflow/trainer.py +8 -6
- dataeval/_internal/models/tensorflow/utils.py +21 -19
- dataeval/_internal/output.py +13 -10
- dataeval/_internal/utils.py +5 -3
- dataeval/_internal/workflows/sufficiency.py +42 -30
- dataeval/detectors/__init__.py +6 -25
- dataeval/detectors/drift/__init__.py +16 -0
- dataeval/detectors/drift/kernels/__init__.py +6 -0
- dataeval/detectors/drift/updates/__init__.py +3 -0
- dataeval/detectors/linters/__init__.py +5 -0
- dataeval/detectors/ood/__init__.py +11 -0
- dataeval/metrics/__init__.py +2 -26
- dataeval/metrics/bias/__init__.py +14 -0
- dataeval/metrics/estimators/__init__.py +9 -0
- dataeval/metrics/stats/__init__.py +6 -0
- dataeval/tensorflow/__init__.py +3 -0
- dataeval/tensorflow/loss/__init__.py +3 -0
- dataeval/tensorflow/models/__init__.py +5 -0
- dataeval/tensorflow/recon/__init__.py +3 -0
- dataeval/torch/__init__.py +3 -0
- dataeval/{models/torch → torch/models}/__init__.py +1 -2
- dataeval/torch/trainer/__init__.py +3 -0
- dataeval/utils/__init__.py +3 -6
- dataeval/workflows/__init__.py +2 -4
- {dataeval-0.65.0.dist-info → dataeval-0.66.0.dist-info}/METADATA +1 -1
- dataeval-0.66.0.dist-info/RECORD +72 -0
- dataeval/models/__init__.py +0 -15
- dataeval/models/tensorflow/__init__.py +0 -6
- dataeval-0.65.0.dist-info/RECORD +0 -60
- {dataeval-0.65.0.dist-info → dataeval-0.66.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.65.0.dist-info → dataeval-0.66.0.dist-info}/WHEEL +0 -0
@@ -6,8 +6,10 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
|
|
6
6
|
Licensed under Apache Software License (Apache 2.0)
|
7
7
|
"""
|
8
8
|
|
9
|
+
from __future__ import annotations
|
10
|
+
|
9
11
|
import math
|
10
|
-
from typing import Callable,
|
12
|
+
from typing import Callable, Union, cast
|
11
13
|
|
12
14
|
import keras as keras
|
13
15
|
import numpy as np
|
@@ -29,12 +31,12 @@ from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
|
|
29
31
|
|
30
32
|
|
31
33
|
def predict_batch(
|
32
|
-
x:
|
33
|
-
model:
|
34
|
+
x: list | NDArray | tf.Tensor,
|
35
|
+
model: Callable | keras.Model,
|
34
36
|
batch_size: int = int(1e10),
|
35
|
-
preprocess_fn:
|
36
|
-
dtype:
|
37
|
-
) ->
|
37
|
+
preprocess_fn: Callable | None = None,
|
38
|
+
dtype: type[np.generic] | tf.DType = np.float32,
|
39
|
+
) -> NDArray | tf.Tensor | tuple | list:
|
38
40
|
"""
|
39
41
|
Make batch predictions on a model.
|
40
42
|
|
@@ -59,7 +61,7 @@ def predict_batch(
|
|
59
61
|
n_minibatch = int(np.ceil(n / batch_size))
|
60
62
|
return_np = not isinstance(dtype, tf.DType)
|
61
63
|
return_list = False
|
62
|
-
preds:
|
64
|
+
preds: list | tuple = []
|
63
65
|
for i in range(n_minibatch):
|
64
66
|
istart, istop = i * batch_size, min((i + 1) * batch_size, n)
|
65
67
|
x_batch = x[istart:istop] # type: ignore
|
@@ -93,7 +95,7 @@ def predict_batch(
|
|
93
95
|
return out
|
94
96
|
|
95
97
|
|
96
|
-
def _get_default_encoder_net(input_shape:
|
98
|
+
def _get_default_encoder_net(input_shape: tuple[int, int, int], encoding_dim: int):
|
97
99
|
return Sequential(
|
98
100
|
[
|
99
101
|
InputLayer(input_shape=input_shape),
|
@@ -106,7 +108,7 @@ def _get_default_encoder_net(input_shape: Tuple[int, int, int], encoding_dim: in
|
|
106
108
|
)
|
107
109
|
|
108
110
|
|
109
|
-
def _get_default_decoder_net(input_shape:
|
111
|
+
def _get_default_decoder_net(input_shape: tuple[int, int, int], encoding_dim: int):
|
110
112
|
return Sequential(
|
111
113
|
[
|
112
114
|
InputLayer(input_shape=(encoding_dim,)),
|
@@ -122,26 +124,26 @@ def _get_default_decoder_net(input_shape: Tuple[int, int, int], encoding_dim: in
|
|
122
124
|
|
123
125
|
|
124
126
|
def create_model(
|
125
|
-
model_type:
|
126
|
-
input_shape:
|
127
|
-
encoding_dim:
|
128
|
-
n_gmm:
|
129
|
-
gmm_latent_dim:
|
127
|
+
model_type: AE | AEGMM | PixelCNN | VAE | VAEGMM,
|
128
|
+
input_shape: tuple[int, int, int],
|
129
|
+
encoding_dim: int | None = None,
|
130
|
+
n_gmm: int | None = None,
|
131
|
+
gmm_latent_dim: int | None = None,
|
130
132
|
):
|
131
133
|
"""
|
132
134
|
Create a default model for the specified model type.
|
133
135
|
|
134
136
|
Parameters
|
135
137
|
----------
|
136
|
-
model_type
|
138
|
+
model_type : Union[AE, AEGMM, PixelCNN, VAE, VAEGMM]
|
137
139
|
The model type to create.
|
138
|
-
input_shape
|
140
|
+
input_shape : Tuple[int, int, int]
|
139
141
|
The input shape of the data used.
|
140
|
-
encoding_dim
|
142
|
+
encoding_dim : int, optional - default None
|
141
143
|
The target encoding dimensionality.
|
142
|
-
n_gmm
|
144
|
+
n_gmm : int, optional - default None
|
143
145
|
Number of components used in the GMM layer.
|
144
|
-
gmm_latent_dim
|
146
|
+
gmm_latent_dim : int, optional - default None
|
145
147
|
Latent dimensionality of the GMM layer.
|
146
148
|
"""
|
147
149
|
input_dim = math.prod(input_shape)
|
dataeval/_internal/output.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
import inspect
|
2
4
|
from datetime import datetime, timezone
|
3
5
|
from functools import wraps
|
4
|
-
from typing import Dict, List, Optional
|
5
6
|
|
6
7
|
import numpy as np
|
7
8
|
|
@@ -12,18 +13,18 @@ class OutputMetadata:
|
|
12
13
|
_name: str
|
13
14
|
_execution_time: str
|
14
15
|
_execution_duration: float
|
15
|
-
_arguments:
|
16
|
-
_state:
|
16
|
+
_arguments: dict[str, str]
|
17
|
+
_state: dict[str, str]
|
17
18
|
_version: str
|
18
19
|
|
19
|
-
def dict(self) ->
|
20
|
+
def dict(self) -> dict:
|
20
21
|
return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
|
21
22
|
|
22
|
-
def meta(self) ->
|
23
|
+
def meta(self) -> dict:
|
23
24
|
return {k.removeprefix("_"): v for k, v in self.__dict__.items() if k.startswith("_")}
|
24
25
|
|
25
26
|
|
26
|
-
def set_metadata(module_name: str = "", state_attr:
|
27
|
+
def set_metadata(module_name: str = "", state_attr: list[str] | None = None):
|
27
28
|
def decorator(fn):
|
28
29
|
@wraps(fn)
|
29
30
|
def wrapper(*args, **kwargs):
|
@@ -70,12 +71,14 @@ def set_metadata(module_name: str = "", state_attr: Optional[List[str]] = None):
|
|
70
71
|
|
71
72
|
def populate_defaults(d: dict, c: type) -> dict:
|
72
73
|
def default(t):
|
73
|
-
|
74
|
-
|
74
|
+
t = (
|
75
|
+
t if isinstance(t, str) else t._name if hasattr(t, "_name") else t.__name__
|
76
|
+
).lower() # py3.9 : _name, py3.10 : __name__
|
77
|
+
if t.startswith("dict"):
|
75
78
|
return {}
|
76
|
-
if
|
79
|
+
if t.startswith("list"):
|
77
80
|
return []
|
78
|
-
if
|
81
|
+
if t.startswith("ndarray"):
|
79
82
|
return np.array([])
|
80
83
|
raise TypeError("Unrecognized annotation type")
|
81
84
|
|
dataeval/_internal/utils.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
from collections import defaultdict
|
2
|
-
from typing import Any
|
4
|
+
from typing import Any
|
3
5
|
|
4
6
|
from torch.utils.data import Dataset
|
5
7
|
|
6
8
|
|
7
|
-
def read_dataset(dataset: Dataset) ->
|
9
|
+
def read_dataset(dataset: Dataset) -> list[list[Any]]:
|
8
10
|
"""
|
9
11
|
Extract information from a dataset at each index into a individual lists of each information position
|
10
12
|
|
@@ -51,7 +53,7 @@ def read_dataset(dataset: Dataset) -> List[List[Any]]:
|
|
51
53
|
True
|
52
54
|
"""
|
53
55
|
|
54
|
-
ddict:
|
56
|
+
ddict: dict[int, list] = defaultdict(list)
|
55
57
|
|
56
58
|
for data in dataset:
|
57
59
|
# Convert to tuple if single return (e.g. images only)
|
@@ -1,6 +1,8 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
import warnings
|
2
4
|
from dataclasses import dataclass
|
3
|
-
from typing import Any, Callable,
|
5
|
+
from typing import Any, Callable, Sequence, cast
|
4
6
|
|
5
7
|
import matplotlib.pyplot as plt
|
6
8
|
import numpy as np
|
@@ -19,17 +21,17 @@ class SufficiencyOutput(OutputMetadata):
|
|
19
21
|
"""
|
20
22
|
Attributes
|
21
23
|
----------
|
22
|
-
steps : NDArray
|
24
|
+
steps : NDArray
|
23
25
|
Array of sample sizes
|
24
|
-
params : Dict[str, NDArray
|
26
|
+
params : Dict[str, NDArray]
|
25
27
|
Inverse power curve coefficients for the line of best fit for each measure
|
26
|
-
measures : Dict[str, NDArray
|
28
|
+
measures : Dict[str, NDArray]
|
27
29
|
Average of values observed for each sample size step for each measure
|
28
30
|
"""
|
29
31
|
|
30
32
|
steps: NDArray[np.uint32]
|
31
|
-
params:
|
32
|
-
measures:
|
33
|
+
params: dict[str, NDArray[np.float64]]
|
34
|
+
measures: dict[str, NDArray[np.float64]]
|
33
35
|
|
34
36
|
def __post_init__(self):
|
35
37
|
c = len(self.steps)
|
@@ -73,7 +75,7 @@ def f_inv_out(y_i: NDArray, x: NDArray) -> NDArray[np.uint64]:
|
|
73
75
|
|
74
76
|
Returns
|
75
77
|
-------
|
76
|
-
NDArray
|
78
|
+
NDArray
|
77
79
|
Array of sample sizes
|
78
80
|
"""
|
79
81
|
n_i = ((y_i - x[2]) / x[0]) ** (-1 / x[1])
|
@@ -183,7 +185,7 @@ def inv_project_steps(params: NDArray, targets: NDArray) -> NDArray[np.uint64]:
|
|
183
185
|
|
184
186
|
Returns
|
185
187
|
-------
|
186
|
-
NDArray
|
188
|
+
NDArray
|
187
189
|
Array of sample sizes, or 0 if overflow
|
188
190
|
"""
|
189
191
|
steps = f_inv_out(1 - np.array(targets), params)
|
@@ -191,7 +193,7 @@ def inv_project_steps(params: NDArray, targets: NDArray) -> NDArray[np.uint64]:
|
|
191
193
|
return np.ceil(steps)
|
192
194
|
|
193
195
|
|
194
|
-
def get_curve_params(measures:
|
196
|
+
def get_curve_params(measures: dict[str, NDArray], ranges: NDArray, niter: int) -> dict[str, NDArray]:
|
195
197
|
"""Calculates and aggregates parameters for both single and multi-class metrics"""
|
196
198
|
output = {}
|
197
199
|
for name, measure in measures.items():
|
@@ -246,15 +248,15 @@ class Sufficiency:
|
|
246
248
|
----------
|
247
249
|
model : nn.Module
|
248
250
|
Model that will be trained for each subset of data
|
249
|
-
train_ds : Dataset
|
251
|
+
train_ds : torch.Dataset
|
250
252
|
Full training data that will be split for each run
|
251
|
-
test_ds : Dataset
|
253
|
+
test_ds : torch.Dataset
|
252
254
|
Data that will be used for every run's evaluation
|
253
255
|
train_fn : Callable[[nn.Module, Dataset, Sequence[int]], None]
|
254
256
|
Function which takes a model (torch.nn.Module), a dataset
|
255
257
|
(torch.utils.data.Dataset), indices to train on and executes model
|
256
258
|
training against the data.
|
257
|
-
eval_fn : Callable[[nn.Module, Dataset], Dict[str, float]]
|
259
|
+
eval_fn : Callable[[nn.Module, Dataset], Dict[str, float | NDArray]]
|
258
260
|
Function which takes a model (torch.nn.Module), a dataset
|
259
261
|
(torch.utils.data.Dataset) and returns a dictionary of metric
|
260
262
|
values (Dict[str, float]) which is used to assess model performance
|
@@ -263,9 +265,9 @@ class Sufficiency:
|
|
263
265
|
Number of models to run over all subsets
|
264
266
|
substeps : int, default 5
|
265
267
|
Total number of dataset partitions that each model will train on
|
266
|
-
train_kwargs : Dict
|
268
|
+
train_kwargs : Dict | None, default None
|
267
269
|
Additional arguments required for custom training function
|
268
|
-
eval_kwargs : Dict
|
270
|
+
eval_kwargs : Dict | None, default None
|
269
271
|
Additional arguments required for custom evaluation function
|
270
272
|
"""
|
271
273
|
|
@@ -275,11 +277,11 @@ class Sufficiency:
|
|
275
277
|
train_ds: Dataset,
|
276
278
|
test_ds: Dataset,
|
277
279
|
train_fn: Callable[[nn.Module, Dataset, Sequence[int]], None],
|
278
|
-
eval_fn: Callable[[nn.Module, Dataset],
|
280
|
+
eval_fn: Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]],
|
279
281
|
runs: int = 1,
|
280
282
|
substeps: int = 5,
|
281
|
-
train_kwargs:
|
282
|
-
eval_kwargs:
|
283
|
+
train_kwargs: dict[str, Any] | None = None,
|
284
|
+
eval_kwargs: dict[str, Any] | None = None,
|
283
285
|
):
|
284
286
|
self.model = model
|
285
287
|
self.train_ds = train_ds
|
@@ -322,42 +324,42 @@ class Sufficiency:
|
|
322
324
|
@property
|
323
325
|
def eval_fn(
|
324
326
|
self,
|
325
|
-
) -> Callable[[nn.Module, Dataset],
|
327
|
+
) -> Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]]:
|
326
328
|
return self._eval_fn
|
327
329
|
|
328
330
|
@eval_fn.setter
|
329
331
|
def eval_fn(
|
330
332
|
self,
|
331
|
-
value: Callable[[nn.Module, Dataset],
|
333
|
+
value: Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]],
|
332
334
|
):
|
333
335
|
if not callable(value):
|
334
336
|
raise TypeError("Must provide a callable for eval_fn.")
|
335
337
|
self._eval_fn = value
|
336
338
|
|
337
339
|
@property
|
338
|
-
def train_kwargs(self) ->
|
340
|
+
def train_kwargs(self) -> dict[str, Any]:
|
339
341
|
return self._train_kwargs
|
340
342
|
|
341
343
|
@train_kwargs.setter
|
342
|
-
def train_kwargs(self, value:
|
344
|
+
def train_kwargs(self, value: dict[str, Any] | None):
|
343
345
|
self._train_kwargs = {} if value is None else value
|
344
346
|
|
345
347
|
@property
|
346
|
-
def eval_kwargs(self) ->
|
348
|
+
def eval_kwargs(self) -> dict[str, Any]:
|
347
349
|
return self._eval_kwargs
|
348
350
|
|
349
351
|
@eval_kwargs.setter
|
350
|
-
def eval_kwargs(self, value:
|
352
|
+
def eval_kwargs(self, value: dict[str, Any] | None):
|
351
353
|
self._eval_kwargs = {} if value is None else value
|
352
354
|
|
353
355
|
@set_metadata("dataeval.workflows", ["runs", "substeps"])
|
354
|
-
def evaluate(self, eval_at:
|
356
|
+
def evaluate(self, eval_at: NDArray | None = None, niter: int = 1000) -> SufficiencyOutput:
|
355
357
|
"""
|
356
358
|
Creates data indices, trains models, and returns plotting data
|
357
359
|
|
358
360
|
Parameters
|
359
361
|
----------
|
360
|
-
eval_at :
|
362
|
+
eval_at : NDArray | None, default None
|
361
363
|
Specify this to collect accuracies over a specific set of dataset lengths, rather
|
362
364
|
than letting Sufficiency internally create the lengths to evaluate at.
|
363
365
|
niter : int, default 1000
|
@@ -367,7 +369,15 @@ class Sufficiency:
|
|
367
369
|
-------
|
368
370
|
SufficiencyOutput
|
369
371
|
Dataclass containing the average of each measure per substep
|
370
|
-
|
372
|
+
|
373
|
+
Examples
|
374
|
+
--------
|
375
|
+
>>> suff = Sufficiency(
|
376
|
+
... model=model, train_ds=train_ds, test_ds=test_ds, train_fn=train_fn, eval_fn=eval_fn, runs=3, substeps=5
|
377
|
+
... )
|
378
|
+
>>> suff.evaluate()
|
379
|
+
SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), params={'test': array([ 0., 42., 0.])}, measures={'test': array([1., 1., 1., 1., 1.])})
|
380
|
+
""" # noqa: E501
|
371
381
|
if eval_at is not None:
|
372
382
|
ranges = eval_at
|
373
383
|
else:
|
@@ -416,7 +426,7 @@ class Sufficiency:
|
|
416
426
|
def project(
|
417
427
|
cls,
|
418
428
|
data: SufficiencyOutput,
|
419
|
-
projection:
|
429
|
+
projection: int | Sequence[int] | NDArray[np.uint],
|
420
430
|
) -> SufficiencyOutput:
|
421
431
|
"""Projects the measures for each value of X
|
422
432
|
|
@@ -424,7 +434,7 @@ class Sufficiency:
|
|
424
434
|
----------
|
425
435
|
data : SufficiencyOutput
|
426
436
|
Dataclass containing the average of each measure per substep
|
427
|
-
projection :
|
437
|
+
projection : int | Sequence[int] | NDArray[np.uint]
|
428
438
|
Step or steps to project
|
429
439
|
|
430
440
|
Returns
|
@@ -456,13 +466,15 @@ class Sufficiency:
|
|
456
466
|
return SufficiencyOutput(projection, data.params, output)
|
457
467
|
|
458
468
|
@classmethod
|
459
|
-
def plot(cls, data: SufficiencyOutput, class_names:
|
469
|
+
def plot(cls, data: SufficiencyOutput, class_names: Sequence[str] | None = None) -> list[Figure]:
|
460
470
|
"""Plotting function for data sufficiency tasks
|
461
471
|
|
462
472
|
Parameters
|
463
473
|
----------
|
464
474
|
data : SufficiencyOutput
|
465
475
|
Dataclass containing the average of each measure per substep
|
476
|
+
class_names : Sequence[str] | None, default None
|
477
|
+
List of class names
|
466
478
|
|
467
479
|
Returns
|
468
480
|
-------
|
@@ -505,7 +517,7 @@ class Sufficiency:
|
|
505
517
|
return plots
|
506
518
|
|
507
519
|
@classmethod
|
508
|
-
def inv_project(cls, targets:
|
520
|
+
def inv_project(cls, targets: dict[str, NDArray], data: SufficiencyOutput) -> dict[str, NDArray]:
|
509
521
|
"""
|
510
522
|
Calculate training samples needed to achieve target model metric values.
|
511
523
|
|
dataeval/detectors/__init__.py
CHANGED
@@ -1,29 +1,10 @@
|
|
1
|
-
from
|
1
|
+
from dataeval import _IS_TENSORFLOW_AVAILABLE
|
2
2
|
|
3
|
-
from
|
4
|
-
from dataeval._internal.detectors.drift.base import LastSeenUpdate, ReservoirSamplingUpdate
|
5
|
-
from dataeval._internal.detectors.drift.cvm import DriftCVM
|
6
|
-
from dataeval._internal.detectors.drift.ks import DriftKS
|
7
|
-
from dataeval._internal.detectors.duplicates import Duplicates
|
8
|
-
from dataeval._internal.detectors.linter import Linter
|
3
|
+
from . import drift, linters
|
9
4
|
|
10
|
-
__all__ = ["
|
5
|
+
__all__ = ["drift", "linters"]
|
11
6
|
|
12
|
-
if
|
13
|
-
from
|
14
|
-
from dataeval._internal.detectors.drift.torch import GaussianRBF, preprocess_drift
|
15
|
-
from dataeval._internal.detectors.drift.uncertainty import DriftUncertainty
|
7
|
+
if _IS_TENSORFLOW_AVAILABLE: # pragma: no cover
|
8
|
+
from . import ood
|
16
9
|
|
17
|
-
__all__ += ["
|
18
|
-
|
19
|
-
if find_spec("tensorflow") is not None and find_spec("tensorflow_probability") is not None: # pragma: no cover
|
20
|
-
from dataeval._internal.detectors.ood.ae import OOD_AE
|
21
|
-
from dataeval._internal.detectors.ood.aegmm import OOD_AEGMM
|
22
|
-
from dataeval._internal.detectors.ood.base import OODScore
|
23
|
-
from dataeval._internal.detectors.ood.llr import OOD_LLR
|
24
|
-
from dataeval._internal.detectors.ood.vae import OOD_VAE
|
25
|
-
from dataeval._internal.detectors.ood.vaegmm import OOD_VAEGMM
|
26
|
-
|
27
|
-
__all__ += ["OOD_AE", "OOD_AEGMM", "OOD_LLR", "OODScore", "OOD_VAE", "OOD_VAEGMM"]
|
28
|
-
|
29
|
-
del find_spec
|
10
|
+
__all__ += ["ood"]
|
@@ -0,0 +1,16 @@
|
|
1
|
+
from dataeval import _IS_TORCH_AVAILABLE
|
2
|
+
from dataeval._internal.detectors.drift.cvm import DriftCVM
|
3
|
+
from dataeval._internal.detectors.drift.ks import DriftKS
|
4
|
+
|
5
|
+
from . import updates
|
6
|
+
|
7
|
+
__all__ = ["DriftCVM", "DriftKS", "updates"]
|
8
|
+
|
9
|
+
if _IS_TORCH_AVAILABLE: # pragma: no cover
|
10
|
+
from dataeval._internal.detectors.drift.mmd import DriftMMD
|
11
|
+
from dataeval._internal.detectors.drift.torch import preprocess_drift
|
12
|
+
from dataeval._internal.detectors.drift.uncertainty import DriftUncertainty
|
13
|
+
|
14
|
+
from . import kernels
|
15
|
+
|
16
|
+
__all__ += ["DriftMMD", "DriftUncertainty", "kernels", "preprocess_drift"]
|
@@ -0,0 +1,11 @@
|
|
1
|
+
from dataeval import _IS_TENSORFLOW_AVAILABLE
|
2
|
+
|
3
|
+
if _IS_TENSORFLOW_AVAILABLE: # pragma: no cover
|
4
|
+
from dataeval._internal.detectors.ood.ae import OOD_AE
|
5
|
+
from dataeval._internal.detectors.ood.aegmm import OOD_AEGMM
|
6
|
+
from dataeval._internal.detectors.ood.base import OODOutput, OODScore
|
7
|
+
from dataeval._internal.detectors.ood.llr import OOD_LLR
|
8
|
+
from dataeval._internal.detectors.ood.vae import OOD_VAE
|
9
|
+
from dataeval._internal.detectors.ood.vaegmm import OOD_VAEGMM
|
10
|
+
|
11
|
+
__all__ = ["OODOutput", "OODScore", "OOD_AE", "OOD_AEGMM", "OOD_LLR", "OOD_VAE", "OOD_VAEGMM"]
|
dataeval/metrics/__init__.py
CHANGED
@@ -1,27 +1,3 @@
|
|
1
|
-
from
|
1
|
+
from . import bias, estimators, stats
|
2
2
|
|
3
|
-
__all__
|
4
|
-
|
5
|
-
from dataeval._internal.metrics.balance import balance, balance_classwise
|
6
|
-
from dataeval._internal.metrics.ber import ber
|
7
|
-
from dataeval._internal.metrics.coverage import coverage
|
8
|
-
from dataeval._internal.metrics.divergence import divergence
|
9
|
-
from dataeval._internal.metrics.diversity import diversity, diversity_classwise
|
10
|
-
from dataeval._internal.metrics.parity import parity, parity_metadata
|
11
|
-
from dataeval._internal.metrics.stats import channelstats, imagestats
|
12
|
-
from dataeval._internal.metrics.uap import uap
|
13
|
-
|
14
|
-
__all__ += [
|
15
|
-
"balance",
|
16
|
-
"balance_classwise",
|
17
|
-
"ber",
|
18
|
-
"channelstats",
|
19
|
-
"coverage",
|
20
|
-
"divergence",
|
21
|
-
"diversity",
|
22
|
-
"diversity_classwise",
|
23
|
-
"imagestats",
|
24
|
-
"parity",
|
25
|
-
"parity_metadata",
|
26
|
-
"uap",
|
27
|
-
]
|
3
|
+
__all__ = ["bias", "estimators", "stats"]
|
@@ -0,0 +1,14 @@
|
|
1
|
+
from dataeval._internal.metrics.balance import balance, balance_classwise
|
2
|
+
from dataeval._internal.metrics.coverage import coverage
|
3
|
+
from dataeval._internal.metrics.diversity import diversity, diversity_classwise
|
4
|
+
from dataeval._internal.metrics.parity import label_parity, parity
|
5
|
+
|
6
|
+
__all__ = [
|
7
|
+
"balance",
|
8
|
+
"balance_classwise",
|
9
|
+
"coverage",
|
10
|
+
"diversity",
|
11
|
+
"diversity_classwise",
|
12
|
+
"label_parity",
|
13
|
+
"parity",
|
14
|
+
]
|
@@ -0,0 +1,5 @@
|
|
1
|
+
from dataeval._internal.models.tensorflow.autoencoder import AE, AEGMM, VAE, VAEGMM
|
2
|
+
from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
|
3
|
+
from dataeval._internal.models.tensorflow.utils import create_model
|
4
|
+
|
5
|
+
__all__ = ["create_model", "AE", "AEGMM", "PixelCNN", "VAE", "VAEGMM"]
|
dataeval/utils/__init__.py
CHANGED
@@ -1,9 +1,6 @@
|
|
1
|
-
from
|
2
|
-
from typing import List
|
1
|
+
from dataeval import _IS_TORCH_AVAILABLE
|
3
2
|
|
4
|
-
|
5
|
-
|
6
|
-
if find_spec("torch") is not None: # pragma: no cover
|
3
|
+
if _IS_TORCH_AVAILABLE: # pragma: no cover
|
7
4
|
from dataeval._internal.utils import read_dataset
|
8
5
|
|
9
|
-
__all__
|
6
|
+
__all__ = ["read_dataset"]
|
dataeval/workflows/__init__.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1
|
-
from
|
1
|
+
from dataeval import _IS_TORCH_AVAILABLE
|
2
2
|
|
3
|
-
if
|
3
|
+
if _IS_TORCH_AVAILABLE: # pragma: no cover
|
4
4
|
from dataeval._internal.workflows.sufficiency import Sufficiency
|
5
5
|
|
6
6
|
__all__ = ["Sufficiency"]
|
7
|
-
|
8
|
-
del find_spec
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.66.0
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
5
|
Home-page: https://dataeval.ai/
|
6
6
|
License: MIT
|