dataeval 0.64.0__py3-none-any.whl → 0.66.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +13 -9
- dataeval/_internal/detectors/clusterer.py +63 -49
- dataeval/_internal/detectors/drift/base.py +248 -51
- dataeval/_internal/detectors/drift/cvm.py +28 -26
- dataeval/_internal/detectors/drift/ks.py +31 -28
- dataeval/_internal/detectors/drift/mmd.py +62 -42
- dataeval/_internal/detectors/drift/torch.py +69 -60
- dataeval/_internal/detectors/drift/uncertainty.py +32 -32
- dataeval/_internal/detectors/duplicates.py +67 -31
- dataeval/_internal/detectors/ood/ae.py +15 -29
- dataeval/_internal/detectors/ood/aegmm.py +33 -27
- dataeval/_internal/detectors/ood/base.py +86 -47
- dataeval/_internal/detectors/ood/llr.py +34 -31
- dataeval/_internal/detectors/ood/vae.py +32 -31
- dataeval/_internal/detectors/ood/vaegmm.py +34 -28
- dataeval/_internal/detectors/{linter.py → outliers.py} +60 -38
- dataeval/_internal/flags.py +44 -21
- dataeval/_internal/interop.py +5 -3
- dataeval/_internal/metrics/balance.py +42 -5
- dataeval/_internal/metrics/ber.py +11 -8
- dataeval/_internal/metrics/coverage.py +15 -8
- dataeval/_internal/metrics/divergence.py +41 -7
- dataeval/_internal/metrics/diversity.py +57 -19
- dataeval/_internal/metrics/parity.py +141 -66
- dataeval/_internal/metrics/stats.py +330 -313
- dataeval/_internal/metrics/uap.py +33 -4
- dataeval/_internal/metrics/utils.py +79 -40
- dataeval/_internal/models/pytorch/autoencoder.py +127 -22
- dataeval/_internal/models/tensorflow/autoencoder.py +33 -30
- dataeval/_internal/models/tensorflow/gmm.py +4 -2
- dataeval/_internal/models/tensorflow/losses.py +17 -13
- dataeval/_internal/models/tensorflow/pixelcnn.py +19 -18
- dataeval/_internal/models/tensorflow/trainer.py +10 -7
- dataeval/_internal/models/tensorflow/utils.py +23 -20
- dataeval/_internal/output.py +85 -0
- dataeval/_internal/utils.py +5 -3
- dataeval/_internal/workflows/sufficiency.py +122 -121
- dataeval/detectors/__init__.py +6 -25
- dataeval/detectors/drift/__init__.py +16 -0
- dataeval/detectors/drift/kernels/__init__.py +6 -0
- dataeval/detectors/drift/updates/__init__.py +3 -0
- dataeval/detectors/linters/__init__.py +5 -0
- dataeval/detectors/ood/__init__.py +11 -0
- dataeval/flags/__init__.py +2 -2
- dataeval/metrics/__init__.py +2 -26
- dataeval/metrics/bias/__init__.py +14 -0
- dataeval/metrics/estimators/__init__.py +9 -0
- dataeval/metrics/stats/__init__.py +6 -0
- dataeval/tensorflow/__init__.py +3 -0
- dataeval/tensorflow/loss/__init__.py +3 -0
- dataeval/tensorflow/models/__init__.py +5 -0
- dataeval/tensorflow/recon/__init__.py +3 -0
- dataeval/torch/__init__.py +3 -0
- dataeval/{models/torch → torch/models}/__init__.py +1 -2
- dataeval/torch/trainer/__init__.py +3 -0
- dataeval/utils/__init__.py +3 -6
- dataeval/workflows/__init__.py +2 -4
- {dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/METADATA +1 -1
- dataeval-0.66.0.dist-info/RECORD +72 -0
- dataeval/_internal/metrics/base.py +0 -10
- dataeval/models/__init__.py +0 -15
- dataeval/models/tensorflow/__init__.py +0 -6
- dataeval-0.64.0.dist-info/RECORD +0 -60
- {dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/WHEEL +0 -0
@@ -1,62 +1,88 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
import warnings
|
2
|
-
from
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from typing import Any, Callable, Sequence, cast
|
3
6
|
|
4
7
|
import matplotlib.pyplot as plt
|
5
8
|
import numpy as np
|
6
9
|
import torch
|
7
10
|
import torch.nn as nn
|
8
11
|
from matplotlib.figure import Figure
|
12
|
+
from numpy.typing import NDArray
|
9
13
|
from scipy.optimize import basinhopping
|
10
14
|
from torch.utils.data import Dataset
|
11
15
|
|
12
|
-
from dataeval._internal.
|
16
|
+
from dataeval._internal.output import OutputMetadata, set_metadata
|
17
|
+
|
13
18
|
|
14
|
-
|
15
|
-
|
19
|
+
@dataclass(frozen=True)
|
20
|
+
class SufficiencyOutput(OutputMetadata):
|
21
|
+
"""
|
22
|
+
Attributes
|
23
|
+
----------
|
24
|
+
steps : NDArray
|
25
|
+
Array of sample sizes
|
26
|
+
params : Dict[str, NDArray]
|
27
|
+
Inverse power curve coefficients for the line of best fit for each measure
|
28
|
+
measures : Dict[str, NDArray]
|
29
|
+
Average of values observed for each sample size step for each measure
|
30
|
+
"""
|
16
31
|
|
17
|
-
|
32
|
+
steps: NDArray[np.uint32]
|
33
|
+
params: dict[str, NDArray[np.float64]]
|
34
|
+
measures: dict[str, NDArray[np.float64]]
|
18
35
|
|
36
|
+
def __post_init__(self):
|
37
|
+
c = len(self.steps)
|
38
|
+
if set(self.params) != set(self.measures):
|
39
|
+
raise ValueError("params and measures have a key mismatch")
|
40
|
+
for m, v in self.measures.items():
|
41
|
+
c_v = v.shape[1] if v.ndim > 1 else len(v)
|
42
|
+
if c != c_v:
|
43
|
+
raise ValueError(f"{m} does not contain the expected number ({c}) of data points.")
|
19
44
|
|
20
|
-
|
45
|
+
|
46
|
+
def f_out(n_i: NDArray, x: NDArray) -> NDArray:
|
21
47
|
"""
|
22
48
|
Calculates the line of best fit based on its free parameters
|
23
49
|
|
24
50
|
Parameters
|
25
51
|
----------
|
26
|
-
n_i :
|
52
|
+
n_i : NDArray
|
27
53
|
Array of sample sizes
|
28
|
-
x :
|
54
|
+
x : NDArray
|
29
55
|
Array of inverse power curve coefficients
|
30
56
|
|
31
57
|
Returns
|
32
58
|
-------
|
33
|
-
|
59
|
+
NDArray
|
34
60
|
Data points for the line of best fit
|
35
61
|
"""
|
36
62
|
return x[0] * n_i ** (-x[1]) + x[2]
|
37
63
|
|
38
64
|
|
39
|
-
def f_inv_out(y_i:
|
65
|
+
def f_inv_out(y_i: NDArray, x: NDArray) -> NDArray[np.uint64]:
|
40
66
|
"""
|
41
67
|
Inverse function for f_out()
|
42
68
|
|
43
69
|
Parameters
|
44
70
|
----------
|
45
|
-
y_i :
|
71
|
+
y_i : NDArray
|
46
72
|
Data points for the line of best fit
|
47
|
-
x :
|
73
|
+
x : NDArray
|
48
74
|
Array of inverse power curve coefficients
|
49
75
|
|
50
76
|
Returns
|
51
77
|
-------
|
52
|
-
|
78
|
+
NDArray
|
53
79
|
Array of sample sizes
|
54
80
|
"""
|
55
81
|
n_i = ((y_i - x[2]) / x[0]) ** (-1 / x[1])
|
56
|
-
return n_i
|
82
|
+
return np.asarray(n_i, dtype=np.uint64)
|
57
83
|
|
58
84
|
|
59
|
-
def calc_params(p_i:
|
85
|
+
def calc_params(p_i: NDArray, n_i: NDArray, niter: int) -> NDArray:
|
60
86
|
"""
|
61
87
|
Retrieves the inverse power curve coefficients for the line of best fit.
|
62
88
|
Global minimization is done via basin hopping. More info on this algorithm
|
@@ -64,9 +90,9 @@ def calc_params(p_i: np.ndarray, n_i: np.ndarray, niter: int) -> np.ndarray:
|
|
64
90
|
|
65
91
|
Parameters
|
66
92
|
----------
|
67
|
-
p_i :
|
93
|
+
p_i : NDArray
|
68
94
|
Array of corresponding losses
|
69
|
-
n_i :
|
95
|
+
n_i : NDArray
|
70
96
|
Array of sample sizes
|
71
97
|
niter : int
|
72
98
|
Number of iterations to perform in the basin-hopping
|
@@ -74,7 +100,7 @@ def calc_params(p_i: np.ndarray, n_i: np.ndarray, niter: int) -> np.ndarray:
|
|
74
100
|
|
75
101
|
Returns
|
76
102
|
-------
|
77
|
-
|
103
|
+
NDArray
|
78
104
|
Array of parameters to recreate line of best fit
|
79
105
|
"""
|
80
106
|
|
@@ -128,60 +154,46 @@ def validate_dataset_len(dataset: Dataset) -> int:
|
|
128
154
|
return length
|
129
155
|
|
130
156
|
|
131
|
-
def
|
132
|
-
"""Ensure the sufficiency data used is not malformed"""
|
133
|
-
if not all(key in data for key in [STEPS_KEY, PARAMS_KEY]):
|
134
|
-
raise KeyError(f"{STEPS_KEY} and {PARAMS_KEY} are required keys for Sufficiency output.")
|
135
|
-
c = len(data[STEPS_KEY])
|
136
|
-
for m, v in data.items():
|
137
|
-
if m in [STEPS_KEY, PARAMS_KEY]:
|
138
|
-
continue
|
139
|
-
v = cast(np.ndarray, v)
|
140
|
-
c_v = v.shape[1] if v.ndim > 1 else len(v)
|
141
|
-
if c != c_v:
|
142
|
-
raise ValueError("f{m} does not contain the expected number ({c}) of data points.")
|
143
|
-
|
144
|
-
|
145
|
-
def project_steps(params: np.ndarray, projection: np.ndarray) -> np.ndarray:
|
157
|
+
def project_steps(params: NDArray, projection: NDArray) -> NDArray:
|
146
158
|
"""Projects the measures for each value of X
|
147
159
|
|
148
160
|
Parameters
|
149
161
|
----------
|
150
|
-
params :
|
162
|
+
params : NDArray
|
151
163
|
Inverse power curve coefficients used to calculate projection
|
152
|
-
projection :
|
164
|
+
projection : NDArray
|
153
165
|
Steps to extrapolate
|
154
166
|
|
155
167
|
Returns
|
156
168
|
-------
|
157
|
-
|
169
|
+
NDArray
|
158
170
|
Extrapolated measure values at each projection step
|
159
171
|
|
160
172
|
"""
|
161
173
|
return 1 - f_out(projection, params)
|
162
174
|
|
163
175
|
|
164
|
-
def inv_project_steps(params:
|
176
|
+
def inv_project_steps(params: NDArray, targets: NDArray) -> NDArray[np.uint64]:
|
165
177
|
"""Inverse function for project_steps()
|
166
178
|
|
167
179
|
Parameters
|
168
180
|
----------
|
169
|
-
params :
|
181
|
+
params : NDArray
|
170
182
|
Inverse power curve coefficients used to calculate projection
|
171
|
-
targets :
|
183
|
+
targets : NDArray
|
172
184
|
Desired measure values
|
173
185
|
|
174
186
|
Returns
|
175
187
|
-------
|
176
|
-
|
188
|
+
NDArray
|
177
189
|
Array of sample sizes, or 0 if overflow
|
178
190
|
"""
|
179
191
|
steps = f_inv_out(1 - np.array(targets), params)
|
180
192
|
steps[np.isnan(steps)] = 0
|
181
|
-
return np.ceil(steps)
|
193
|
+
return np.ceil(steps)
|
182
194
|
|
183
195
|
|
184
|
-
def get_curve_params(measures:
|
196
|
+
def get_curve_params(measures: dict[str, NDArray], ranges: NDArray, niter: int) -> dict[str, NDArray]:
|
185
197
|
"""Calculates and aggregates parameters for both single and multi-class metrics"""
|
186
198
|
output = {}
|
187
199
|
for name, measure in measures.items():
|
@@ -198,10 +210,10 @@ def get_curve_params(measures: Dict[str, np.ndarray], ranges: np.ndarray, niter:
|
|
198
210
|
|
199
211
|
def plot_measure(
|
200
212
|
name: str,
|
201
|
-
steps:
|
202
|
-
measure:
|
203
|
-
params:
|
204
|
-
projection:
|
213
|
+
steps: NDArray,
|
214
|
+
measure: NDArray,
|
215
|
+
params: NDArray,
|
216
|
+
projection: NDArray,
|
205
217
|
) -> Figure:
|
206
218
|
fig = plt.figure()
|
207
219
|
fig = cast(Figure, fig)
|
@@ -228,7 +240,7 @@ def plot_measure(
|
|
228
240
|
return fig
|
229
241
|
|
230
242
|
|
231
|
-
class Sufficiency
|
243
|
+
class Sufficiency:
|
232
244
|
"""
|
233
245
|
Project dataset sufficiency using given a model and evaluation criteria
|
234
246
|
|
@@ -236,15 +248,15 @@ class Sufficiency(EvaluateMixin):
|
|
236
248
|
----------
|
237
249
|
model : nn.Module
|
238
250
|
Model that will be trained for each subset of data
|
239
|
-
train_ds : Dataset
|
251
|
+
train_ds : torch.Dataset
|
240
252
|
Full training data that will be split for each run
|
241
|
-
test_ds : Dataset
|
253
|
+
test_ds : torch.Dataset
|
242
254
|
Data that will be used for every run's evaluation
|
243
255
|
train_fn : Callable[[nn.Module, Dataset, Sequence[int]], None]
|
244
256
|
Function which takes a model (torch.nn.Module), a dataset
|
245
257
|
(torch.utils.data.Dataset), indices to train on and executes model
|
246
258
|
training against the data.
|
247
|
-
eval_fn : Callable[[nn.Module, Dataset], Dict[str, float]]
|
259
|
+
eval_fn : Callable[[nn.Module, Dataset], Dict[str, float | NDArray]]
|
248
260
|
Function which takes a model (torch.nn.Module), a dataset
|
249
261
|
(torch.utils.data.Dataset) and returns a dictionary of metric
|
250
262
|
values (Dict[str, float]) which is used to assess model performance
|
@@ -253,9 +265,9 @@ class Sufficiency(EvaluateMixin):
|
|
253
265
|
Number of models to run over all subsets
|
254
266
|
substeps : int, default 5
|
255
267
|
Total number of dataset partitions that each model will train on
|
256
|
-
train_kwargs : Dict
|
268
|
+
train_kwargs : Dict | None, default None
|
257
269
|
Additional arguments required for custom training function
|
258
|
-
eval_kwargs : Dict
|
270
|
+
eval_kwargs : Dict | None, default None
|
259
271
|
Additional arguments required for custom evaluation function
|
260
272
|
"""
|
261
273
|
|
@@ -265,11 +277,11 @@ class Sufficiency(EvaluateMixin):
|
|
265
277
|
train_ds: Dataset,
|
266
278
|
test_ds: Dataset,
|
267
279
|
train_fn: Callable[[nn.Module, Dataset, Sequence[int]], None],
|
268
|
-
eval_fn: Callable[[nn.Module, Dataset],
|
280
|
+
eval_fn: Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]],
|
269
281
|
runs: int = 1,
|
270
282
|
substeps: int = 5,
|
271
|
-
train_kwargs:
|
272
|
-
eval_kwargs:
|
283
|
+
train_kwargs: dict[str, Any] | None = None,
|
284
|
+
eval_kwargs: dict[str, Any] | None = None,
|
273
285
|
):
|
274
286
|
self.model = model
|
275
287
|
self.train_ds = train_ds
|
@@ -312,41 +324,42 @@ class Sufficiency(EvaluateMixin):
|
|
312
324
|
@property
|
313
325
|
def eval_fn(
|
314
326
|
self,
|
315
|
-
) -> Callable[[nn.Module, Dataset],
|
327
|
+
) -> Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]]:
|
316
328
|
return self._eval_fn
|
317
329
|
|
318
330
|
@eval_fn.setter
|
319
331
|
def eval_fn(
|
320
332
|
self,
|
321
|
-
value: Callable[[nn.Module, Dataset],
|
333
|
+
value: Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]],
|
322
334
|
):
|
323
335
|
if not callable(value):
|
324
336
|
raise TypeError("Must provide a callable for eval_fn.")
|
325
337
|
self._eval_fn = value
|
326
338
|
|
327
339
|
@property
|
328
|
-
def train_kwargs(self) ->
|
340
|
+
def train_kwargs(self) -> dict[str, Any]:
|
329
341
|
return self._train_kwargs
|
330
342
|
|
331
343
|
@train_kwargs.setter
|
332
|
-
def train_kwargs(self, value:
|
344
|
+
def train_kwargs(self, value: dict[str, Any] | None):
|
333
345
|
self._train_kwargs = {} if value is None else value
|
334
346
|
|
335
347
|
@property
|
336
|
-
def eval_kwargs(self) ->
|
348
|
+
def eval_kwargs(self) -> dict[str, Any]:
|
337
349
|
return self._eval_kwargs
|
338
350
|
|
339
351
|
@eval_kwargs.setter
|
340
|
-
def eval_kwargs(self, value:
|
352
|
+
def eval_kwargs(self, value: dict[str, Any] | None):
|
341
353
|
self._eval_kwargs = {} if value is None else value
|
342
354
|
|
343
|
-
|
355
|
+
@set_metadata("dataeval.workflows", ["runs", "substeps"])
|
356
|
+
def evaluate(self, eval_at: NDArray | None = None, niter: int = 1000) -> SufficiencyOutput:
|
344
357
|
"""
|
345
358
|
Creates data indices, trains models, and returns plotting data
|
346
359
|
|
347
360
|
Parameters
|
348
361
|
----------
|
349
|
-
eval_at :
|
362
|
+
eval_at : NDArray | None, default None
|
350
363
|
Specify this to collect accuracies over a specific set of dataset lengths, rather
|
351
364
|
than letting Sufficiency internally create the lengths to evaluate at.
|
352
365
|
niter : int, default 1000
|
@@ -354,9 +367,17 @@ class Sufficiency(EvaluateMixin):
|
|
354
367
|
|
355
368
|
Returns
|
356
369
|
-------
|
357
|
-
|
358
|
-
|
359
|
-
|
370
|
+
SufficiencyOutput
|
371
|
+
Dataclass containing the average of each measure per substep
|
372
|
+
|
373
|
+
Examples
|
374
|
+
--------
|
375
|
+
>>> suff = Sufficiency(
|
376
|
+
... model=model, train_ds=train_ds, test_ds=test_ds, train_fn=train_fn, eval_fn=eval_fn, runs=3, substeps=5
|
377
|
+
... )
|
378
|
+
>>> suff.evaluate()
|
379
|
+
SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), params={'test': array([ 0., 42., 0.])}, measures={'test': array([1., 1., 1., 1., 1.])})
|
380
|
+
""" # noqa: E501
|
360
381
|
if eval_at is not None:
|
361
382
|
ranges = eval_at
|
362
383
|
else:
|
@@ -365,7 +386,7 @@ class Sufficiency(EvaluateMixin):
|
|
365
386
|
self._length,
|
366
387
|
self.substeps,
|
367
388
|
) # Start, Stop, Num steps
|
368
|
-
ranges = np.geomspace(*geomshape
|
389
|
+
ranges = np.geomspace(*geomshape, dtype=np.uint32)
|
369
390
|
substeps = len(ranges)
|
370
391
|
measures = {}
|
371
392
|
|
@@ -381,7 +402,7 @@ class Sufficiency(EvaluateMixin):
|
|
381
402
|
self.train_fn(
|
382
403
|
model,
|
383
404
|
self.train_ds,
|
384
|
-
indices[:substep].tolist(),
|
405
|
+
indices[: int(substep)].tolist(),
|
385
406
|
**self.train_kwargs,
|
386
407
|
)
|
387
408
|
|
@@ -390,9 +411,6 @@ class Sufficiency(EvaluateMixin):
|
|
390
411
|
|
391
412
|
# Keep track of each measures values
|
392
413
|
for name, value in measure.items():
|
393
|
-
if name in [STEPS_KEY, PARAMS_KEY]:
|
394
|
-
raise KeyError(f"Cannot use reserved name '{name}' as a metric name.")
|
395
|
-
|
396
414
|
# Sum result into current substep iteration to be averaged later
|
397
415
|
value = np.array(value).ravel()
|
398
416
|
if name not in measures:
|
@@ -402,66 +420,61 @@ class Sufficiency(EvaluateMixin):
|
|
402
420
|
# The mean for each measure must be calculated before being returned
|
403
421
|
measures = {k: (v / self.runs).T for k, v in measures.items()}
|
404
422
|
params_output = get_curve_params(measures, ranges, niter)
|
405
|
-
|
406
|
-
output.update(measures)
|
407
|
-
return output
|
423
|
+
return SufficiencyOutput(ranges, params_output, measures)
|
408
424
|
|
409
425
|
@classmethod
|
410
426
|
def project(
|
411
427
|
cls,
|
412
428
|
data: SufficiencyOutput,
|
413
|
-
projection:
|
414
|
-
) ->
|
429
|
+
projection: int | Sequence[int] | NDArray[np.uint],
|
430
|
+
) -> SufficiencyOutput:
|
415
431
|
"""Projects the measures for each value of X
|
416
432
|
|
417
433
|
Parameters
|
418
434
|
----------
|
419
|
-
data :
|
435
|
+
data : SufficiencyOutput
|
420
436
|
Dataclass containing the average of each measure per substep
|
421
|
-
|
437
|
+
projection : int | Sequence[int] | NDArray[np.uint]
|
422
438
|
Step or steps to project
|
423
|
-
|
424
|
-
|
425
|
-
|
439
|
+
|
440
|
+
Returns
|
441
|
+
-------
|
442
|
+
SufficiencyOutput
|
443
|
+
Dataclass containing the projected measures per projection
|
426
444
|
|
427
445
|
Raises
|
428
446
|
------
|
429
|
-
KeyError
|
430
|
-
If STEPS_KEY or measure is not a valid key
|
431
447
|
ValueError
|
432
448
|
If the length of data points in the measures do not match
|
433
449
|
If the steps are not int, Sequence[int] or an ndarray
|
434
450
|
"""
|
435
|
-
validate_output(data)
|
436
451
|
projection = [projection] if isinstance(projection, int) else projection
|
437
452
|
projection = np.array(projection) if isinstance(projection, Sequence) else projection
|
438
453
|
if not isinstance(projection, np.ndarray):
|
439
454
|
raise ValueError("'steps' must be an int, Sequence[int] or ndarray")
|
440
455
|
|
441
456
|
output = {}
|
442
|
-
|
443
|
-
for name, measures in data.items():
|
444
|
-
if name in [STEPS_KEY, PARAMS_KEY]:
|
445
|
-
continue
|
446
|
-
measures = cast(np.ndarray, measures)
|
457
|
+
for name, measures in data.measures.items():
|
447
458
|
if measures.ndim > 1:
|
448
459
|
result = []
|
449
460
|
for i in range(len(measures)):
|
450
|
-
projected = project_steps(data[
|
461
|
+
projected = project_steps(data.params[name][i], projection)
|
451
462
|
result.append(projected)
|
452
|
-
output[name] = np.array(result)
|
463
|
+
output[name] = np.array(result)
|
453
464
|
else:
|
454
|
-
output[name] = project_steps(data[
|
455
|
-
return output
|
465
|
+
output[name] = project_steps(data.params[name], projection)
|
466
|
+
return SufficiencyOutput(projection, data.params, output)
|
456
467
|
|
457
468
|
@classmethod
|
458
|
-
def plot(cls, data: SufficiencyOutput, class_names:
|
469
|
+
def plot(cls, data: SufficiencyOutput, class_names: Sequence[str] | None = None) -> list[Figure]:
|
459
470
|
"""Plotting function for data sufficiency tasks
|
460
471
|
|
461
472
|
Parameters
|
462
473
|
----------
|
463
|
-
data :
|
474
|
+
data : SufficiencyOutput
|
464
475
|
Dataclass containing the average of each measure per substep
|
476
|
+
class_names : Sequence[str] | None, default None
|
477
|
+
List of class names
|
465
478
|
|
466
479
|
Returns
|
467
480
|
-------
|
@@ -470,29 +483,19 @@ class Sufficiency(EvaluateMixin):
|
|
470
483
|
|
471
484
|
Raises
|
472
485
|
------
|
473
|
-
KeyError
|
474
|
-
If STEPS_KEY or measure is not a valid key
|
475
486
|
ValueError
|
476
487
|
If the length of data points in the measures do not match
|
477
488
|
"""
|
478
|
-
validate_output(data)
|
479
|
-
|
480
|
-
# X, y data
|
481
|
-
steps = cast(np.ndarray, data[STEPS_KEY])
|
482
|
-
|
483
489
|
# Extrapolation parameters
|
484
|
-
last_X = steps[-1]
|
485
|
-
geomshape = (0.01 * last_X, last_X * 4, len(steps))
|
490
|
+
last_X = data.steps[-1]
|
491
|
+
geomshape = (0.01 * last_X, last_X * 4, len(data.steps))
|
486
492
|
extrapolated = np.geomspace(*geomshape).astype(np.int64)
|
487
493
|
|
488
494
|
# Stores all plots
|
489
495
|
plots = []
|
490
496
|
|
491
497
|
# Create a plot for each measure on one figure
|
492
|
-
for name, measures in data.items():
|
493
|
-
if name in [STEPS_KEY, PARAMS_KEY]:
|
494
|
-
continue
|
495
|
-
measures = cast(np.ndarray, measures)
|
498
|
+
for name, measures in data.measures.items():
|
496
499
|
if measures.ndim > 1:
|
497
500
|
if class_names is not None and len(measures) != len(class_names):
|
498
501
|
raise IndexError("Class name count does not align with measures")
|
@@ -500,56 +503,54 @@ class Sufficiency(EvaluateMixin):
|
|
500
503
|
class_name = str(i) if class_names is None else class_names[i]
|
501
504
|
fig = plot_measure(
|
502
505
|
f"{name}_{class_name}",
|
503
|
-
steps,
|
506
|
+
data.steps,
|
504
507
|
measure,
|
505
|
-
data[
|
508
|
+
data.params[name][i],
|
506
509
|
extrapolated,
|
507
510
|
)
|
508
511
|
plots.append(fig)
|
509
512
|
|
510
513
|
else:
|
511
|
-
fig = plot_measure(name, steps, measures, data[
|
514
|
+
fig = plot_measure(name, data.steps, measures, data.params[name], extrapolated)
|
512
515
|
plots.append(fig)
|
513
516
|
|
514
517
|
return plots
|
515
518
|
|
516
519
|
@classmethod
|
517
|
-
def inv_project(cls, targets:
|
520
|
+
def inv_project(cls, targets: dict[str, NDArray], data: SufficiencyOutput) -> dict[str, NDArray]:
|
518
521
|
"""
|
519
522
|
Calculate training samples needed to achieve target model metric values.
|
520
523
|
|
521
524
|
Parameters
|
522
525
|
----------
|
523
|
-
targets : Dict[str,
|
526
|
+
targets : Dict[str, NDArray]
|
524
527
|
Dictionary of target metric scores (from 0.0 to 1.0) that we want
|
525
528
|
to achieve, where the key is the name of the metric.
|
526
529
|
|
527
|
-
data :
|
530
|
+
data : SufficiencyOutput
|
528
531
|
Dataclass containing the average of each measure per substep
|
529
532
|
|
530
533
|
Returns
|
531
534
|
-------
|
532
|
-
Dict[str,
|
535
|
+
Dict[str, NDArray]
|
533
536
|
List of the number of training samples needed to achieve each
|
534
537
|
corresponding entry in targets
|
535
538
|
"""
|
536
539
|
|
537
|
-
validate_output(data)
|
538
|
-
|
539
540
|
projection = {}
|
540
541
|
|
541
542
|
for name, target in targets.items():
|
542
|
-
if name not in data:
|
543
|
+
if name not in data.measures:
|
543
544
|
continue
|
544
545
|
|
545
|
-
measure =
|
546
|
+
measure = data.measures[name]
|
546
547
|
if measure.ndim > 1:
|
547
548
|
projection[name] = np.zeros((len(measure), len(target)))
|
548
549
|
for i in range(len(measure)):
|
549
550
|
projection[name][i] = inv_project_steps(
|
550
|
-
data[
|
551
|
+
data.params[name][i], target[i] if target.ndim == measure.ndim else target
|
551
552
|
)
|
552
553
|
else:
|
553
|
-
projection[name] = inv_project_steps(data[
|
554
|
+
projection[name] = inv_project_steps(data.params[name], target)
|
554
555
|
|
555
556
|
return projection
|
dataeval/detectors/__init__.py
CHANGED
@@ -1,29 +1,10 @@
|
|
1
|
-
from
|
1
|
+
from dataeval import _IS_TENSORFLOW_AVAILABLE
|
2
2
|
|
3
|
-
from
|
4
|
-
from dataeval._internal.detectors.drift.base import LastSeenUpdate, ReservoirSamplingUpdate
|
5
|
-
from dataeval._internal.detectors.drift.cvm import DriftCVM
|
6
|
-
from dataeval._internal.detectors.drift.ks import DriftKS
|
7
|
-
from dataeval._internal.detectors.duplicates import Duplicates
|
8
|
-
from dataeval._internal.detectors.linter import Linter
|
3
|
+
from . import drift, linters
|
9
4
|
|
10
|
-
__all__ = ["
|
5
|
+
__all__ = ["drift", "linters"]
|
11
6
|
|
12
|
-
if
|
13
|
-
from
|
14
|
-
from dataeval._internal.detectors.drift.torch import GaussianRBF, preprocess_drift
|
15
|
-
from dataeval._internal.detectors.drift.uncertainty import DriftUncertainty
|
7
|
+
if _IS_TENSORFLOW_AVAILABLE: # pragma: no cover
|
8
|
+
from . import ood
|
16
9
|
|
17
|
-
__all__ += ["
|
18
|
-
|
19
|
-
if find_spec("tensorflow") is not None and find_spec("tensorflow_probability") is not None: # pragma: no cover
|
20
|
-
from dataeval._internal.detectors.ood.ae import OOD_AE
|
21
|
-
from dataeval._internal.detectors.ood.aegmm import OOD_AEGMM
|
22
|
-
from dataeval._internal.detectors.ood.base import OODScore
|
23
|
-
from dataeval._internal.detectors.ood.llr import OOD_LLR
|
24
|
-
from dataeval._internal.detectors.ood.vae import OOD_VAE
|
25
|
-
from dataeval._internal.detectors.ood.vaegmm import OOD_VAEGMM
|
26
|
-
|
27
|
-
__all__ += ["OOD_AE", "OOD_AEGMM", "OOD_LLR", "OODScore", "OOD_VAE", "OOD_VAEGMM"]
|
28
|
-
|
29
|
-
del find_spec
|
10
|
+
__all__ += ["ood"]
|
@@ -0,0 +1,16 @@
|
|
1
|
+
from dataeval import _IS_TORCH_AVAILABLE
|
2
|
+
from dataeval._internal.detectors.drift.cvm import DriftCVM
|
3
|
+
from dataeval._internal.detectors.drift.ks import DriftKS
|
4
|
+
|
5
|
+
from . import updates
|
6
|
+
|
7
|
+
__all__ = ["DriftCVM", "DriftKS", "updates"]
|
8
|
+
|
9
|
+
if _IS_TORCH_AVAILABLE: # pragma: no cover
|
10
|
+
from dataeval._internal.detectors.drift.mmd import DriftMMD
|
11
|
+
from dataeval._internal.detectors.drift.torch import preprocess_drift
|
12
|
+
from dataeval._internal.detectors.drift.uncertainty import DriftUncertainty
|
13
|
+
|
14
|
+
from . import kernels
|
15
|
+
|
16
|
+
__all__ += ["DriftMMD", "DriftUncertainty", "kernels", "preprocess_drift"]
|
@@ -0,0 +1,11 @@
|
|
1
|
+
from dataeval import _IS_TENSORFLOW_AVAILABLE
|
2
|
+
|
3
|
+
if _IS_TENSORFLOW_AVAILABLE: # pragma: no cover
|
4
|
+
from dataeval._internal.detectors.ood.ae import OOD_AE
|
5
|
+
from dataeval._internal.detectors.ood.aegmm import OOD_AEGMM
|
6
|
+
from dataeval._internal.detectors.ood.base import OODOutput, OODScore
|
7
|
+
from dataeval._internal.detectors.ood.llr import OOD_LLR
|
8
|
+
from dataeval._internal.detectors.ood.vae import OOD_VAE
|
9
|
+
from dataeval._internal.detectors.ood.vaegmm import OOD_VAEGMM
|
10
|
+
|
11
|
+
__all__ = ["OODOutput", "OODScore", "OOD_AE", "OOD_AEGMM", "OOD_LLR", "OOD_VAE", "OOD_VAEGMM"]
|
dataeval/flags/__init__.py
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
from dataeval._internal.flags import
|
1
|
+
from dataeval._internal.flags import ImageStat
|
2
2
|
|
3
|
-
__all__ = ["
|
3
|
+
__all__ = ["ImageStat"]
|
dataeval/metrics/__init__.py
CHANGED
@@ -1,27 +1,3 @@
|
|
1
|
-
from
|
1
|
+
from . import bias, estimators, stats
|
2
2
|
|
3
|
-
__all__
|
4
|
-
|
5
|
-
from dataeval._internal.metrics.balance import balance, balance_classwise
|
6
|
-
from dataeval._internal.metrics.ber import ber
|
7
|
-
from dataeval._internal.metrics.coverage import coverage
|
8
|
-
from dataeval._internal.metrics.divergence import divergence
|
9
|
-
from dataeval._internal.metrics.diversity import diversity, diversity_classwise
|
10
|
-
from dataeval._internal.metrics.parity import parity, parity_metadata
|
11
|
-
from dataeval._internal.metrics.stats import ChannelStats, ImageStats
|
12
|
-
from dataeval._internal.metrics.uap import uap
|
13
|
-
|
14
|
-
__all__ += [
|
15
|
-
"balance",
|
16
|
-
"balance_classwise",
|
17
|
-
"ber",
|
18
|
-
"coverage",
|
19
|
-
"divergence",
|
20
|
-
"diversity",
|
21
|
-
"diversity_classwise",
|
22
|
-
"parity",
|
23
|
-
"parity_metadata",
|
24
|
-
"ChannelStats",
|
25
|
-
"ImageStats",
|
26
|
-
"uap",
|
27
|
-
]
|
3
|
+
__all__ = ["bias", "estimators", "stats"]
|