dataeval 0.64.0__py3-none-any.whl → 0.66.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. dataeval/__init__.py +13 -9
  2. dataeval/_internal/detectors/clusterer.py +63 -49
  3. dataeval/_internal/detectors/drift/base.py +248 -51
  4. dataeval/_internal/detectors/drift/cvm.py +28 -26
  5. dataeval/_internal/detectors/drift/ks.py +31 -28
  6. dataeval/_internal/detectors/drift/mmd.py +62 -42
  7. dataeval/_internal/detectors/drift/torch.py +69 -60
  8. dataeval/_internal/detectors/drift/uncertainty.py +32 -32
  9. dataeval/_internal/detectors/duplicates.py +67 -31
  10. dataeval/_internal/detectors/ood/ae.py +15 -29
  11. dataeval/_internal/detectors/ood/aegmm.py +33 -27
  12. dataeval/_internal/detectors/ood/base.py +86 -47
  13. dataeval/_internal/detectors/ood/llr.py +34 -31
  14. dataeval/_internal/detectors/ood/vae.py +32 -31
  15. dataeval/_internal/detectors/ood/vaegmm.py +34 -28
  16. dataeval/_internal/detectors/{linter.py → outliers.py} +60 -38
  17. dataeval/_internal/flags.py +44 -21
  18. dataeval/_internal/interop.py +5 -3
  19. dataeval/_internal/metrics/balance.py +42 -5
  20. dataeval/_internal/metrics/ber.py +11 -8
  21. dataeval/_internal/metrics/coverage.py +15 -8
  22. dataeval/_internal/metrics/divergence.py +41 -7
  23. dataeval/_internal/metrics/diversity.py +57 -19
  24. dataeval/_internal/metrics/parity.py +141 -66
  25. dataeval/_internal/metrics/stats.py +330 -313
  26. dataeval/_internal/metrics/uap.py +33 -4
  27. dataeval/_internal/metrics/utils.py +79 -40
  28. dataeval/_internal/models/pytorch/autoencoder.py +127 -22
  29. dataeval/_internal/models/tensorflow/autoencoder.py +33 -30
  30. dataeval/_internal/models/tensorflow/gmm.py +4 -2
  31. dataeval/_internal/models/tensorflow/losses.py +17 -13
  32. dataeval/_internal/models/tensorflow/pixelcnn.py +19 -18
  33. dataeval/_internal/models/tensorflow/trainer.py +10 -7
  34. dataeval/_internal/models/tensorflow/utils.py +23 -20
  35. dataeval/_internal/output.py +85 -0
  36. dataeval/_internal/utils.py +5 -3
  37. dataeval/_internal/workflows/sufficiency.py +122 -121
  38. dataeval/detectors/__init__.py +6 -25
  39. dataeval/detectors/drift/__init__.py +16 -0
  40. dataeval/detectors/drift/kernels/__init__.py +6 -0
  41. dataeval/detectors/drift/updates/__init__.py +3 -0
  42. dataeval/detectors/linters/__init__.py +5 -0
  43. dataeval/detectors/ood/__init__.py +11 -0
  44. dataeval/flags/__init__.py +2 -2
  45. dataeval/metrics/__init__.py +2 -26
  46. dataeval/metrics/bias/__init__.py +14 -0
  47. dataeval/metrics/estimators/__init__.py +9 -0
  48. dataeval/metrics/stats/__init__.py +6 -0
  49. dataeval/tensorflow/__init__.py +3 -0
  50. dataeval/tensorflow/loss/__init__.py +3 -0
  51. dataeval/tensorflow/models/__init__.py +5 -0
  52. dataeval/tensorflow/recon/__init__.py +3 -0
  53. dataeval/torch/__init__.py +3 -0
  54. dataeval/{models/torch → torch/models}/__init__.py +1 -2
  55. dataeval/torch/trainer/__init__.py +3 -0
  56. dataeval/utils/__init__.py +3 -6
  57. dataeval/workflows/__init__.py +2 -4
  58. {dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/METADATA +1 -1
  59. dataeval-0.66.0.dist-info/RECORD +72 -0
  60. dataeval/_internal/metrics/base.py +0 -10
  61. dataeval/models/__init__.py +0 -15
  62. dataeval/models/tensorflow/__init__.py +0 -6
  63. dataeval-0.64.0.dist-info/RECORD +0 -60
  64. {dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/LICENSE.txt +0 -0
  65. {dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/WHEEL +0 -0
@@ -1,62 +1,88 @@
1
+ from __future__ import annotations
2
+
1
3
  import warnings
2
- from typing import Any, Callable, Dict, List, Optional, Sequence, Union, cast
4
+ from dataclasses import dataclass
5
+ from typing import Any, Callable, Sequence, cast
3
6
 
4
7
  import matplotlib.pyplot as plt
5
8
  import numpy as np
6
9
  import torch
7
10
  import torch.nn as nn
8
11
  from matplotlib.figure import Figure
12
+ from numpy.typing import NDArray
9
13
  from scipy.optimize import basinhopping
10
14
  from torch.utils.data import Dataset
11
15
 
12
- from dataeval._internal.metrics.base import EvaluateMixin
16
+ from dataeval._internal.output import OutputMetadata, set_metadata
17
+
13
18
 
14
- STEPS_KEY = "_STEPS_"
15
- PARAMS_KEY = "_CURVE_PARAMS_"
19
+ @dataclass(frozen=True)
20
+ class SufficiencyOutput(OutputMetadata):
21
+ """
22
+ Attributes
23
+ ----------
24
+ steps : NDArray
25
+ Array of sample sizes
26
+ params : Dict[str, NDArray]
27
+ Inverse power curve coefficients for the line of best fit for each measure
28
+ measures : Dict[str, NDArray]
29
+ Average of values observed for each sample size step for each measure
30
+ """
16
31
 
17
- SufficiencyOutput = Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
32
+ steps: NDArray[np.uint32]
33
+ params: dict[str, NDArray[np.float64]]
34
+ measures: dict[str, NDArray[np.float64]]
18
35
 
36
+ def __post_init__(self):
37
+ c = len(self.steps)
38
+ if set(self.params) != set(self.measures):
39
+ raise ValueError("params and measures have a key mismatch")
40
+ for m, v in self.measures.items():
41
+ c_v = v.shape[1] if v.ndim > 1 else len(v)
42
+ if c != c_v:
43
+ raise ValueError(f"{m} does not contain the expected number ({c}) of data points.")
19
44
 
20
- def f_out(n_i: np.ndarray, x: np.ndarray) -> np.ndarray:
45
+
46
+ def f_out(n_i: NDArray, x: NDArray) -> NDArray:
21
47
  """
22
48
  Calculates the line of best fit based on its free parameters
23
49
 
24
50
  Parameters
25
51
  ----------
26
- n_i : np.ndarray
52
+ n_i : NDArray
27
53
  Array of sample sizes
28
- x : np.ndarray
54
+ x : NDArray
29
55
  Array of inverse power curve coefficients
30
56
 
31
57
  Returns
32
58
  -------
33
- np.ndarray
59
+ NDArray
34
60
  Data points for the line of best fit
35
61
  """
36
62
  return x[0] * n_i ** (-x[1]) + x[2]
37
63
 
38
64
 
39
- def f_inv_out(y_i: np.ndarray, x: np.ndarray) -> np.ndarray:
65
+ def f_inv_out(y_i: NDArray, x: NDArray) -> NDArray[np.uint64]:
40
66
  """
41
67
  Inverse function for f_out()
42
68
 
43
69
  Parameters
44
70
  ----------
45
- y_i : np.ndarray
71
+ y_i : NDArray
46
72
  Data points for the line of best fit
47
- x : np.ndarray
73
+ x : NDArray
48
74
  Array of inverse power curve coefficients
49
75
 
50
76
  Returns
51
77
  -------
52
- np.ndarray
78
+ NDArray
53
79
  Array of sample sizes
54
80
  """
55
81
  n_i = ((y_i - x[2]) / x[0]) ** (-1 / x[1])
56
- return n_i
82
+ return np.asarray(n_i, dtype=np.uint64)
57
83
 
58
84
 
59
- def calc_params(p_i: np.ndarray, n_i: np.ndarray, niter: int) -> np.ndarray:
85
+ def calc_params(p_i: NDArray, n_i: NDArray, niter: int) -> NDArray:
60
86
  """
61
87
  Retrieves the inverse power curve coefficients for the line of best fit.
62
88
  Global minimization is done via basin hopping. More info on this algorithm
@@ -64,9 +90,9 @@ def calc_params(p_i: np.ndarray, n_i: np.ndarray, niter: int) -> np.ndarray:
64
90
 
65
91
  Parameters
66
92
  ----------
67
- p_i : np.ndarray
93
+ p_i : NDArray
68
94
  Array of corresponding losses
69
- n_i : np.ndarray
95
+ n_i : NDArray
70
96
  Array of sample sizes
71
97
  niter : int
72
98
  Number of iterations to perform in the basin-hopping
@@ -74,7 +100,7 @@ def calc_params(p_i: np.ndarray, n_i: np.ndarray, niter: int) -> np.ndarray:
74
100
 
75
101
  Returns
76
102
  -------
77
- np.ndarray
103
+ NDArray
78
104
  Array of parameters to recreate line of best fit
79
105
  """
80
106
 
@@ -128,60 +154,46 @@ def validate_dataset_len(dataset: Dataset) -> int:
128
154
  return length
129
155
 
130
156
 
131
- def validate_output(data: SufficiencyOutput):
132
- """Ensure the sufficiency data used is not malformed"""
133
- if not all(key in data for key in [STEPS_KEY, PARAMS_KEY]):
134
- raise KeyError(f"{STEPS_KEY} and {PARAMS_KEY} are required keys for Sufficiency output.")
135
- c = len(data[STEPS_KEY])
136
- for m, v in data.items():
137
- if m in [STEPS_KEY, PARAMS_KEY]:
138
- continue
139
- v = cast(np.ndarray, v)
140
- c_v = v.shape[1] if v.ndim > 1 else len(v)
141
- if c != c_v:
142
- raise ValueError("f{m} does not contain the expected number ({c}) of data points.")
143
-
144
-
145
- def project_steps(params: np.ndarray, projection: np.ndarray) -> np.ndarray:
157
+ def project_steps(params: NDArray, projection: NDArray) -> NDArray:
146
158
  """Projects the measures for each value of X
147
159
 
148
160
  Parameters
149
161
  ----------
150
- params : np.ndarray
162
+ params : NDArray
151
163
  Inverse power curve coefficients used to calculate projection
152
- projection : np.ndarray
164
+ projection : NDArray
153
165
  Steps to extrapolate
154
166
 
155
167
  Returns
156
168
  -------
157
- np.ndarray
169
+ NDArray
158
170
  Extrapolated measure values at each projection step
159
171
 
160
172
  """
161
173
  return 1 - f_out(projection, params)
162
174
 
163
175
 
164
- def inv_project_steps(params: np.ndarray, targets: np.ndarray) -> np.ndarray:
176
+ def inv_project_steps(params: NDArray, targets: NDArray) -> NDArray[np.uint64]:
165
177
  """Inverse function for project_steps()
166
178
 
167
179
  Parameters
168
180
  ----------
169
- params : np.ndarray
181
+ params : NDArray
170
182
  Inverse power curve coefficients used to calculate projection
171
- targets : np.ndarray
183
+ targets : NDArray
172
184
  Desired measure values
173
185
 
174
186
  Returns
175
187
  -------
176
- np.ndarray
188
+ NDArray
177
189
  Array of sample sizes, or 0 if overflow
178
190
  """
179
191
  steps = f_inv_out(1 - np.array(targets), params)
180
192
  steps[np.isnan(steps)] = 0
181
- return np.ceil(steps).astype(np.int64)
193
+ return np.ceil(steps)
182
194
 
183
195
 
184
- def get_curve_params(measures: Dict[str, np.ndarray], ranges: np.ndarray, niter: int) -> Dict[str, np.ndarray]:
196
+ def get_curve_params(measures: dict[str, NDArray], ranges: NDArray, niter: int) -> dict[str, NDArray]:
185
197
  """Calculates and aggregates parameters for both single and multi-class metrics"""
186
198
  output = {}
187
199
  for name, measure in measures.items():
@@ -198,10 +210,10 @@ def get_curve_params(measures: Dict[str, np.ndarray], ranges: np.ndarray, niter:
198
210
 
199
211
  def plot_measure(
200
212
  name: str,
201
- steps: np.ndarray,
202
- measure: np.ndarray,
203
- params: np.ndarray,
204
- projection: np.ndarray,
213
+ steps: NDArray,
214
+ measure: NDArray,
215
+ params: NDArray,
216
+ projection: NDArray,
205
217
  ) -> Figure:
206
218
  fig = plt.figure()
207
219
  fig = cast(Figure, fig)
@@ -228,7 +240,7 @@ def plot_measure(
228
240
  return fig
229
241
 
230
242
 
231
- class Sufficiency(EvaluateMixin):
243
+ class Sufficiency:
232
244
  """
233
245
  Project dataset sufficiency using given a model and evaluation criteria
234
246
 
@@ -236,15 +248,15 @@ class Sufficiency(EvaluateMixin):
236
248
  ----------
237
249
  model : nn.Module
238
250
  Model that will be trained for each subset of data
239
- train_ds : Dataset
251
+ train_ds : torch.Dataset
240
252
  Full training data that will be split for each run
241
- test_ds : Dataset
253
+ test_ds : torch.Dataset
242
254
  Data that will be used for every run's evaluation
243
255
  train_fn : Callable[[nn.Module, Dataset, Sequence[int]], None]
244
256
  Function which takes a model (torch.nn.Module), a dataset
245
257
  (torch.utils.data.Dataset), indices to train on and executes model
246
258
  training against the data.
247
- eval_fn : Callable[[nn.Module, Dataset], Dict[str, float]]
259
+ eval_fn : Callable[[nn.Module, Dataset], Dict[str, float | NDArray]]
248
260
  Function which takes a model (torch.nn.Module), a dataset
249
261
  (torch.utils.data.Dataset) and returns a dictionary of metric
250
262
  values (Dict[str, float]) which is used to assess model performance
@@ -253,9 +265,9 @@ class Sufficiency(EvaluateMixin):
253
265
  Number of models to run over all subsets
254
266
  substeps : int, default 5
255
267
  Total number of dataset partitions that each model will train on
256
- train_kwargs : Dict[str, Any] | None, default None
268
+ train_kwargs : Dict | None, default None
257
269
  Additional arguments required for custom training function
258
- eval_kwargs : Dict[str, Any] | None, default None
270
+ eval_kwargs : Dict | None, default None
259
271
  Additional arguments required for custom evaluation function
260
272
  """
261
273
 
@@ -265,11 +277,11 @@ class Sufficiency(EvaluateMixin):
265
277
  train_ds: Dataset,
266
278
  test_ds: Dataset,
267
279
  train_fn: Callable[[nn.Module, Dataset, Sequence[int]], None],
268
- eval_fn: Callable[[nn.Module, Dataset], Union[Dict[str, float], Dict[str, np.ndarray]]],
280
+ eval_fn: Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]],
269
281
  runs: int = 1,
270
282
  substeps: int = 5,
271
- train_kwargs: Optional[Dict[str, Any]] = None,
272
- eval_kwargs: Optional[Dict[str, Any]] = None,
283
+ train_kwargs: dict[str, Any] | None = None,
284
+ eval_kwargs: dict[str, Any] | None = None,
273
285
  ):
274
286
  self.model = model
275
287
  self.train_ds = train_ds
@@ -312,41 +324,42 @@ class Sufficiency(EvaluateMixin):
312
324
  @property
313
325
  def eval_fn(
314
326
  self,
315
- ) -> Callable[[nn.Module, Dataset], Union[Dict[str, float], Dict[str, np.ndarray]]]:
327
+ ) -> Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]]:
316
328
  return self._eval_fn
317
329
 
318
330
  @eval_fn.setter
319
331
  def eval_fn(
320
332
  self,
321
- value: Callable[[nn.Module, Dataset], Union[Dict[str, float], Dict[str, np.ndarray]]],
333
+ value: Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]],
322
334
  ):
323
335
  if not callable(value):
324
336
  raise TypeError("Must provide a callable for eval_fn.")
325
337
  self._eval_fn = value
326
338
 
327
339
  @property
328
- def train_kwargs(self) -> Dict[str, Any]:
340
+ def train_kwargs(self) -> dict[str, Any]:
329
341
  return self._train_kwargs
330
342
 
331
343
  @train_kwargs.setter
332
- def train_kwargs(self, value: Optional[Dict[str, Any]]):
344
+ def train_kwargs(self, value: dict[str, Any] | None):
333
345
  self._train_kwargs = {} if value is None else value
334
346
 
335
347
  @property
336
- def eval_kwargs(self) -> Dict[str, Any]:
348
+ def eval_kwargs(self) -> dict[str, Any]:
337
349
  return self._eval_kwargs
338
350
 
339
351
  @eval_kwargs.setter
340
- def eval_kwargs(self, value: Optional[Dict[str, Any]]):
352
+ def eval_kwargs(self, value: dict[str, Any] | None):
341
353
  self._eval_kwargs = {} if value is None else value
342
354
 
343
- def evaluate(self, eval_at: Optional[np.ndarray] = None, niter: int = 1000) -> SufficiencyOutput:
355
+ @set_metadata("dataeval.workflows", ["runs", "substeps"])
356
+ def evaluate(self, eval_at: NDArray | None = None, niter: int = 1000) -> SufficiencyOutput:
344
357
  """
345
358
  Creates data indices, trains models, and returns plotting data
346
359
 
347
360
  Parameters
348
361
  ----------
349
- eval_at : Optional[np.ndarray]
362
+ eval_at : NDArray | None, default None
350
363
  Specify this to collect accuracies over a specific set of dataset lengths, rather
351
364
  than letting Sufficiency internally create the lengths to evaluate at.
352
365
  niter : int, default 1000
@@ -354,9 +367,17 @@ class Sufficiency(EvaluateMixin):
354
367
 
355
368
  Returns
356
369
  -------
357
- Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
358
- Dictionary containing the average of each measure per substep
359
- """
370
+ SufficiencyOutput
371
+ Dataclass containing the average of each measure per substep
372
+
373
+ Examples
374
+ --------
375
+ >>> suff = Sufficiency(
376
+ ... model=model, train_ds=train_ds, test_ds=test_ds, train_fn=train_fn, eval_fn=eval_fn, runs=3, substeps=5
377
+ ... )
378
+ >>> suff.evaluate()
379
+ SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), params={'test': array([ 0., 42., 0.])}, measures={'test': array([1., 1., 1., 1., 1.])})
380
+ """ # noqa: E501
360
381
  if eval_at is not None:
361
382
  ranges = eval_at
362
383
  else:
@@ -365,7 +386,7 @@ class Sufficiency(EvaluateMixin):
365
386
  self._length,
366
387
  self.substeps,
367
388
  ) # Start, Stop, Num steps
368
- ranges = np.geomspace(*geomshape).astype(np.int64)
389
+ ranges = np.geomspace(*geomshape, dtype=np.uint32)
369
390
  substeps = len(ranges)
370
391
  measures = {}
371
392
 
@@ -381,7 +402,7 @@ class Sufficiency(EvaluateMixin):
381
402
  self.train_fn(
382
403
  model,
383
404
  self.train_ds,
384
- indices[:substep].tolist(),
405
+ indices[: int(substep)].tolist(),
385
406
  **self.train_kwargs,
386
407
  )
387
408
 
@@ -390,9 +411,6 @@ class Sufficiency(EvaluateMixin):
390
411
 
391
412
  # Keep track of each measures values
392
413
  for name, value in measure.items():
393
- if name in [STEPS_KEY, PARAMS_KEY]:
394
- raise KeyError(f"Cannot use reserved name '{name}' as a metric name.")
395
-
396
414
  # Sum result into current substep iteration to be averaged later
397
415
  value = np.array(value).ravel()
398
416
  if name not in measures:
@@ -402,66 +420,61 @@ class Sufficiency(EvaluateMixin):
402
420
  # The mean for each measure must be calculated before being returned
403
421
  measures = {k: (v / self.runs).T for k, v in measures.items()}
404
422
  params_output = get_curve_params(measures, ranges, niter)
405
- output = {STEPS_KEY: ranges, PARAMS_KEY: params_output}
406
- output.update(measures)
407
- return output
423
+ return SufficiencyOutput(ranges, params_output, measures)
408
424
 
409
425
  @classmethod
410
426
  def project(
411
427
  cls,
412
428
  data: SufficiencyOutput,
413
- projection: Union[int, Sequence[int], np.ndarray],
414
- ) -> Dict[str, np.ndarray]:
429
+ projection: int | Sequence[int] | NDArray[np.uint],
430
+ ) -> SufficiencyOutput:
415
431
  """Projects the measures for each value of X
416
432
 
417
433
  Parameters
418
434
  ----------
419
- data : Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
435
+ data : SufficiencyOutput
420
436
  Dataclass containing the average of each measure per substep
421
- steps : Union[int, np.ndarray]
437
+ projection : int | Sequence[int] | NDArray[np.uint]
422
438
  Step or steps to project
423
- niter : int, default 200
424
- Number of iterations to perform in the basin-hopping
425
- numerical process to curve-fit data
439
+
440
+ Returns
441
+ -------
442
+ SufficiencyOutput
443
+ Dataclass containing the projected measures per projection
426
444
 
427
445
  Raises
428
446
  ------
429
- KeyError
430
- If STEPS_KEY or measure is not a valid key
431
447
  ValueError
432
448
  If the length of data points in the measures do not match
433
449
  If the steps are not int, Sequence[int] or an ndarray
434
450
  """
435
- validate_output(data)
436
451
  projection = [projection] if isinstance(projection, int) else projection
437
452
  projection = np.array(projection) if isinstance(projection, Sequence) else projection
438
453
  if not isinstance(projection, np.ndarray):
439
454
  raise ValueError("'steps' must be an int, Sequence[int] or ndarray")
440
455
 
441
456
  output = {}
442
- output[STEPS_KEY] = projection
443
- for name, measures in data.items():
444
- if name in [STEPS_KEY, PARAMS_KEY]:
445
- continue
446
- measures = cast(np.ndarray, measures)
457
+ for name, measures in data.measures.items():
447
458
  if measures.ndim > 1:
448
459
  result = []
449
460
  for i in range(len(measures)):
450
- projected = project_steps(data[PARAMS_KEY][name][i], projection)
461
+ projected = project_steps(data.params[name][i], projection)
451
462
  result.append(projected)
452
- output[name] = np.array(result).T
463
+ output[name] = np.array(result)
453
464
  else:
454
- output[name] = project_steps(data[PARAMS_KEY][name], projection)
455
- return output
465
+ output[name] = project_steps(data.params[name], projection)
466
+ return SufficiencyOutput(projection, data.params, output)
456
467
 
457
468
  @classmethod
458
- def plot(cls, data: SufficiencyOutput, class_names: Optional[Sequence[str]] = None) -> List[Figure]:
469
+ def plot(cls, data: SufficiencyOutput, class_names: Sequence[str] | None = None) -> list[Figure]:
459
470
  """Plotting function for data sufficiency tasks
460
471
 
461
472
  Parameters
462
473
  ----------
463
- data : Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
474
+ data : SufficiencyOutput
464
475
  Dataclass containing the average of each measure per substep
476
+ class_names : Sequence[str] | None, default None
477
+ List of class names
465
478
 
466
479
  Returns
467
480
  -------
@@ -470,29 +483,19 @@ class Sufficiency(EvaluateMixin):
470
483
 
471
484
  Raises
472
485
  ------
473
- KeyError
474
- If STEPS_KEY or measure is not a valid key
475
486
  ValueError
476
487
  If the length of data points in the measures do not match
477
488
  """
478
- validate_output(data)
479
-
480
- # X, y data
481
- steps = cast(np.ndarray, data[STEPS_KEY])
482
-
483
489
  # Extrapolation parameters
484
- last_X = steps[-1]
485
- geomshape = (0.01 * last_X, last_X * 4, len(steps))
490
+ last_X = data.steps[-1]
491
+ geomshape = (0.01 * last_X, last_X * 4, len(data.steps))
486
492
  extrapolated = np.geomspace(*geomshape).astype(np.int64)
487
493
 
488
494
  # Stores all plots
489
495
  plots = []
490
496
 
491
497
  # Create a plot for each measure on one figure
492
- for name, measures in data.items():
493
- if name in [STEPS_KEY, PARAMS_KEY]:
494
- continue
495
- measures = cast(np.ndarray, measures)
498
+ for name, measures in data.measures.items():
496
499
  if measures.ndim > 1:
497
500
  if class_names is not None and len(measures) != len(class_names):
498
501
  raise IndexError("Class name count does not align with measures")
@@ -500,56 +503,54 @@ class Sufficiency(EvaluateMixin):
500
503
  class_name = str(i) if class_names is None else class_names[i]
501
504
  fig = plot_measure(
502
505
  f"{name}_{class_name}",
503
- steps,
506
+ data.steps,
504
507
  measure,
505
- data[PARAMS_KEY][name][i],
508
+ data.params[name][i],
506
509
  extrapolated,
507
510
  )
508
511
  plots.append(fig)
509
512
 
510
513
  else:
511
- fig = plot_measure(name, steps, measures, data[PARAMS_KEY][name], extrapolated)
514
+ fig = plot_measure(name, data.steps, measures, data.params[name], extrapolated)
512
515
  plots.append(fig)
513
516
 
514
517
  return plots
515
518
 
516
519
  @classmethod
517
- def inv_project(cls, targets: Dict[str, np.ndarray], data: SufficiencyOutput) -> Dict[str, np.ndarray]:
520
+ def inv_project(cls, targets: dict[str, NDArray], data: SufficiencyOutput) -> dict[str, NDArray]:
518
521
  """
519
522
  Calculate training samples needed to achieve target model metric values.
520
523
 
521
524
  Parameters
522
525
  ----------
523
- targets : Dict[str, np.ndarray]
526
+ targets : Dict[str, NDArray]
524
527
  Dictionary of target metric scores (from 0.0 to 1.0) that we want
525
528
  to achieve, where the key is the name of the metric.
526
529
 
527
- data : Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
530
+ data : SufficiencyOutput
528
531
  Dataclass containing the average of each measure per substep
529
532
 
530
533
  Returns
531
534
  -------
532
- Dict[str, np.ndarray]
535
+ Dict[str, NDArray]
533
536
  List of the number of training samples needed to achieve each
534
537
  corresponding entry in targets
535
538
  """
536
539
 
537
- validate_output(data)
538
-
539
540
  projection = {}
540
541
 
541
542
  for name, target in targets.items():
542
- if name not in data:
543
+ if name not in data.measures:
543
544
  continue
544
545
 
545
- measure = cast(np.ndarray, data[name])
546
+ measure = data.measures[name]
546
547
  if measure.ndim > 1:
547
548
  projection[name] = np.zeros((len(measure), len(target)))
548
549
  for i in range(len(measure)):
549
550
  projection[name][i] = inv_project_steps(
550
- data[PARAMS_KEY][name][i], target[i] if target.ndim == measure.ndim else target
551
+ data.params[name][i], target[i] if target.ndim == measure.ndim else target
551
552
  )
552
553
  else:
553
- projection[name] = inv_project_steps(data[PARAMS_KEY][name], target)
554
+ projection[name] = inv_project_steps(data.params[name], target)
554
555
 
555
556
  return projection
@@ -1,29 +1,10 @@
1
- from importlib.util import find_spec
1
+ from dataeval import _IS_TENSORFLOW_AVAILABLE
2
2
 
3
- from dataeval._internal.detectors.clusterer import Clusterer
4
- from dataeval._internal.detectors.drift.base import LastSeenUpdate, ReservoirSamplingUpdate
5
- from dataeval._internal.detectors.drift.cvm import DriftCVM
6
- from dataeval._internal.detectors.drift.ks import DriftKS
7
- from dataeval._internal.detectors.duplicates import Duplicates
8
- from dataeval._internal.detectors.linter import Linter
3
+ from . import drift, linters
9
4
 
10
- __all__ = ["Clusterer", "Duplicates", "Linter", "DriftCVM", "DriftKS", "LastSeenUpdate", "ReservoirSamplingUpdate"]
5
+ __all__ = ["drift", "linters"]
11
6
 
12
- if find_spec("torch") is not None: # pragma: no cover
13
- from dataeval._internal.detectors.drift.mmd import DriftMMD
14
- from dataeval._internal.detectors.drift.torch import GaussianRBF, preprocess_drift
15
- from dataeval._internal.detectors.drift.uncertainty import DriftUncertainty
7
+ if _IS_TENSORFLOW_AVAILABLE: # pragma: no cover
8
+ from . import ood
16
9
 
17
- __all__ += ["DriftMMD", "GaussianRBF", "DriftUncertainty", "preprocess_drift"]
18
-
19
- if find_spec("tensorflow") is not None and find_spec("tensorflow_probability") is not None: # pragma: no cover
20
- from dataeval._internal.detectors.ood.ae import OOD_AE
21
- from dataeval._internal.detectors.ood.aegmm import OOD_AEGMM
22
- from dataeval._internal.detectors.ood.base import OODScore
23
- from dataeval._internal.detectors.ood.llr import OOD_LLR
24
- from dataeval._internal.detectors.ood.vae import OOD_VAE
25
- from dataeval._internal.detectors.ood.vaegmm import OOD_VAEGMM
26
-
27
- __all__ += ["OOD_AE", "OOD_AEGMM", "OOD_LLR", "OODScore", "OOD_VAE", "OOD_VAEGMM"]
28
-
29
- del find_spec
10
+ __all__ += ["ood"]
@@ -0,0 +1,16 @@
1
+ from dataeval import _IS_TORCH_AVAILABLE
2
+ from dataeval._internal.detectors.drift.cvm import DriftCVM
3
+ from dataeval._internal.detectors.drift.ks import DriftKS
4
+
5
+ from . import updates
6
+
7
+ __all__ = ["DriftCVM", "DriftKS", "updates"]
8
+
9
+ if _IS_TORCH_AVAILABLE: # pragma: no cover
10
+ from dataeval._internal.detectors.drift.mmd import DriftMMD
11
+ from dataeval._internal.detectors.drift.torch import preprocess_drift
12
+ from dataeval._internal.detectors.drift.uncertainty import DriftUncertainty
13
+
14
+ from . import kernels
15
+
16
+ __all__ += ["DriftMMD", "DriftUncertainty", "kernels", "preprocess_drift"]
@@ -0,0 +1,6 @@
1
+ from dataeval import _IS_TORCH_AVAILABLE
2
+
3
+ if _IS_TORCH_AVAILABLE: # pragma: no cover
4
+ from dataeval._internal.detectors.drift.torch import GaussianRBF
5
+
6
+ __all__ = ["GaussianRBF"]
@@ -0,0 +1,3 @@
1
+ from dataeval._internal.detectors.drift.base import LastSeenUpdate, ReservoirSamplingUpdate
2
+
3
+ __all__ = ["LastSeenUpdate", "ReservoirSamplingUpdate"]
@@ -0,0 +1,5 @@
1
+ from dataeval._internal.detectors.clusterer import Clusterer
2
+ from dataeval._internal.detectors.duplicates import Duplicates
3
+ from dataeval._internal.detectors.outliers import Outliers
4
+
5
+ __all__ = ["Clusterer", "Duplicates", "Outliers"]
@@ -0,0 +1,11 @@
1
+ from dataeval import _IS_TENSORFLOW_AVAILABLE
2
+
3
+ if _IS_TENSORFLOW_AVAILABLE: # pragma: no cover
4
+ from dataeval._internal.detectors.ood.ae import OOD_AE
5
+ from dataeval._internal.detectors.ood.aegmm import OOD_AEGMM
6
+ from dataeval._internal.detectors.ood.base import OODOutput, OODScore
7
+ from dataeval._internal.detectors.ood.llr import OOD_LLR
8
+ from dataeval._internal.detectors.ood.vae import OOD_VAE
9
+ from dataeval._internal.detectors.ood.vaegmm import OOD_VAEGMM
10
+
11
+ __all__ = ["OODOutput", "OODScore", "OOD_AE", "OOD_AEGMM", "OOD_LLR", "OOD_VAE", "OOD_VAEGMM"]
@@ -1,3 +1,3 @@
1
- from dataeval._internal.flags import ImageHash, ImageProperty, ImageStatistics, ImageVisuals
1
+ from dataeval._internal.flags import ImageStat
2
2
 
3
- __all__ = ["ImageHash", "ImageProperty", "ImageStatistics", "ImageVisuals"]
3
+ __all__ = ["ImageStat"]
@@ -1,27 +1,3 @@
1
- from typing import List
1
+ from . import bias, estimators, stats
2
2
 
3
- __all__: List[str] = []
4
-
5
- from dataeval._internal.metrics.balance import balance, balance_classwise
6
- from dataeval._internal.metrics.ber import ber
7
- from dataeval._internal.metrics.coverage import coverage
8
- from dataeval._internal.metrics.divergence import divergence
9
- from dataeval._internal.metrics.diversity import diversity, diversity_classwise
10
- from dataeval._internal.metrics.parity import parity, parity_metadata
11
- from dataeval._internal.metrics.stats import ChannelStats, ImageStats
12
- from dataeval._internal.metrics.uap import uap
13
-
14
- __all__ += [
15
- "balance",
16
- "balance_classwise",
17
- "ber",
18
- "coverage",
19
- "divergence",
20
- "diversity",
21
- "diversity_classwise",
22
- "parity",
23
- "parity_metadata",
24
- "ChannelStats",
25
- "ImageStats",
26
- "uap",
27
- ]
3
+ __all__ = ["bias", "estimators", "stats"]