dataeval 0.65.0__py3-none-any.whl → 0.66.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. dataeval/__init__.py +13 -9
  2. dataeval/_internal/detectors/clusterer.py +24 -22
  3. dataeval/_internal/detectors/drift/base.py +206 -26
  4. dataeval/_internal/detectors/drift/cvm.py +25 -23
  5. dataeval/_internal/detectors/drift/ks.py +28 -25
  6. dataeval/_internal/detectors/drift/mmd.py +30 -29
  7. dataeval/_internal/detectors/drift/torch.py +66 -58
  8. dataeval/_internal/detectors/drift/uncertainty.py +28 -28
  9. dataeval/_internal/detectors/duplicates.py +28 -18
  10. dataeval/_internal/detectors/ood/ae.py +15 -29
  11. dataeval/_internal/detectors/ood/aegmm.py +33 -27
  12. dataeval/_internal/detectors/ood/base.py +61 -43
  13. dataeval/_internal/detectors/ood/llr.py +27 -24
  14. dataeval/_internal/detectors/ood/vae.py +32 -31
  15. dataeval/_internal/detectors/ood/vaegmm.py +34 -28
  16. dataeval/_internal/detectors/{linter.py → outliers.py} +33 -27
  17. dataeval/_internal/flags.py +5 -3
  18. dataeval/_internal/interop.py +4 -2
  19. dataeval/_internal/metrics/balance.py +33 -4
  20. dataeval/_internal/metrics/ber.py +6 -4
  21. dataeval/_internal/metrics/diversity.py +45 -12
  22. dataeval/_internal/metrics/parity.py +114 -26
  23. dataeval/_internal/metrics/stats.py +154 -16
  24. dataeval/_internal/metrics/uap.py +28 -2
  25. dataeval/_internal/metrics/utils.py +20 -18
  26. dataeval/_internal/models/pytorch/autoencoder.py +127 -22
  27. dataeval/_internal/models/tensorflow/autoencoder.py +33 -30
  28. dataeval/_internal/models/tensorflow/gmm.py +4 -2
  29. dataeval/_internal/models/tensorflow/losses.py +15 -11
  30. dataeval/_internal/models/tensorflow/pixelcnn.py +19 -18
  31. dataeval/_internal/models/tensorflow/trainer.py +8 -6
  32. dataeval/_internal/models/tensorflow/utils.py +21 -19
  33. dataeval/_internal/output.py +13 -10
  34. dataeval/_internal/utils.py +5 -3
  35. dataeval/_internal/workflows/sufficiency.py +42 -30
  36. dataeval/detectors/__init__.py +6 -25
  37. dataeval/detectors/drift/__init__.py +16 -0
  38. dataeval/detectors/drift/kernels/__init__.py +6 -0
  39. dataeval/detectors/drift/updates/__init__.py +3 -0
  40. dataeval/detectors/linters/__init__.py +5 -0
  41. dataeval/detectors/ood/__init__.py +11 -0
  42. dataeval/metrics/__init__.py +2 -26
  43. dataeval/metrics/bias/__init__.py +14 -0
  44. dataeval/metrics/estimators/__init__.py +9 -0
  45. dataeval/metrics/stats/__init__.py +6 -0
  46. dataeval/tensorflow/__init__.py +3 -0
  47. dataeval/tensorflow/loss/__init__.py +3 -0
  48. dataeval/tensorflow/models/__init__.py +5 -0
  49. dataeval/tensorflow/recon/__init__.py +3 -0
  50. dataeval/torch/__init__.py +3 -0
  51. dataeval/{models/torch → torch/models}/__init__.py +1 -2
  52. dataeval/torch/trainer/__init__.py +3 -0
  53. dataeval/utils/__init__.py +3 -6
  54. dataeval/workflows/__init__.py +2 -4
  55. {dataeval-0.65.0.dist-info → dataeval-0.66.0.dist-info}/METADATA +1 -1
  56. dataeval-0.66.0.dist-info/RECORD +72 -0
  57. dataeval/models/__init__.py +0 -15
  58. dataeval/models/tensorflow/__init__.py +0 -6
  59. dataeval-0.65.0.dist-info/RECORD +0 -60
  60. {dataeval-0.65.0.dist-info → dataeval-0.66.0.dist-info}/LICENSE.txt +0 -0
  61. {dataeval-0.65.0.dist-info → dataeval-0.66.0.dist-info}/WHEEL +0 -0
@@ -6,8 +6,10 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
6
6
  Licensed under Apache Software License (Apache 2.0)
7
7
  """
8
8
 
9
+ from __future__ import annotations
10
+
9
11
  import math
10
- from typing import Callable, Optional, Tuple, Type, Union, cast
12
+ from typing import Callable, Union, cast
11
13
 
12
14
  import keras as keras
13
15
  import numpy as np
@@ -29,12 +31,12 @@ from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
29
31
 
30
32
 
31
33
  def predict_batch(
32
- x: Union[list, NDArray, tf.Tensor],
33
- model: Union[Callable, keras.Model],
34
+ x: list | NDArray | tf.Tensor,
35
+ model: Callable | keras.Model,
34
36
  batch_size: int = int(1e10),
35
- preprocess_fn: Optional[Callable] = None,
36
- dtype: Union[Type[np.generic], tf.DType] = np.float32,
37
- ) -> Union[NDArray, tf.Tensor, tuple, list]:
37
+ preprocess_fn: Callable | None = None,
38
+ dtype: type[np.generic] | tf.DType = np.float32,
39
+ ) -> NDArray | tf.Tensor | tuple | list:
38
40
  """
39
41
  Make batch predictions on a model.
40
42
 
@@ -59,7 +61,7 @@ def predict_batch(
59
61
  n_minibatch = int(np.ceil(n / batch_size))
60
62
  return_np = not isinstance(dtype, tf.DType)
61
63
  return_list = False
62
- preds: Union[list, tuple] = []
64
+ preds: list | tuple = []
63
65
  for i in range(n_minibatch):
64
66
  istart, istop = i * batch_size, min((i + 1) * batch_size, n)
65
67
  x_batch = x[istart:istop] # type: ignore
@@ -93,7 +95,7 @@ def predict_batch(
93
95
  return out
94
96
 
95
97
 
96
- def _get_default_encoder_net(input_shape: Tuple[int, int, int], encoding_dim: int):
98
+ def _get_default_encoder_net(input_shape: tuple[int, int, int], encoding_dim: int):
97
99
  return Sequential(
98
100
  [
99
101
  InputLayer(input_shape=input_shape),
@@ -106,7 +108,7 @@ def _get_default_encoder_net(input_shape: Tuple[int, int, int], encoding_dim: in
106
108
  )
107
109
 
108
110
 
109
- def _get_default_decoder_net(input_shape: Tuple[int, int, int], encoding_dim: int):
111
+ def _get_default_decoder_net(input_shape: tuple[int, int, int], encoding_dim: int):
110
112
  return Sequential(
111
113
  [
112
114
  InputLayer(input_shape=(encoding_dim,)),
@@ -122,26 +124,26 @@ def _get_default_decoder_net(input_shape: Tuple[int, int, int], encoding_dim: in
122
124
 
123
125
 
124
126
  def create_model(
125
- model_type: Union[AE, AEGMM, PixelCNN, VAE, VAEGMM],
126
- input_shape: Tuple[int, int, int],
127
- encoding_dim: Optional[int] = None,
128
- n_gmm: Optional[int] = None,
129
- gmm_latent_dim: Optional[int] = None,
127
+ model_type: AE | AEGMM | PixelCNN | VAE | VAEGMM,
128
+ input_shape: tuple[int, int, int],
129
+ encoding_dim: int | None = None,
130
+ n_gmm: int | None = None,
131
+ gmm_latent_dim: int | None = None,
130
132
  ):
131
133
  """
132
134
  Create a default model for the specified model type.
133
135
 
134
136
  Parameters
135
137
  ----------
136
- model_type
138
+ model_type : Union[AE, AEGMM, PixelCNN, VAE, VAEGMM]
137
139
  The model type to create.
138
- input_shape
140
+ input_shape : Tuple[int, int, int]
139
141
  The input shape of the data used.
140
- encoding_dim
142
+ encoding_dim : int, optional - default None
141
143
  The target encoding dimensionality.
142
- n_gmm
144
+ n_gmm : int, optional - default None
143
145
  Number of components used in the GMM layer.
144
- gmm_latent_dim
146
+ gmm_latent_dim : int, optional - default None
145
147
  Latent dimensionality of the GMM layer.
146
148
  """
147
149
  input_dim = math.prod(input_shape)
@@ -1,7 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import inspect
2
4
  from datetime import datetime, timezone
3
5
  from functools import wraps
4
- from typing import Dict, List, Optional
5
6
 
6
7
  import numpy as np
7
8
 
@@ -12,18 +13,18 @@ class OutputMetadata:
12
13
  _name: str
13
14
  _execution_time: str
14
15
  _execution_duration: float
15
- _arguments: Dict[str, str]
16
- _state: Dict[str, str]
16
+ _arguments: dict[str, str]
17
+ _state: dict[str, str]
17
18
  _version: str
18
19
 
19
- def dict(self) -> Dict:
20
+ def dict(self) -> dict:
20
21
  return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
21
22
 
22
- def meta(self) -> Dict:
23
+ def meta(self) -> dict:
23
24
  return {k.removeprefix("_"): v for k, v in self.__dict__.items() if k.startswith("_")}
24
25
 
25
26
 
26
- def set_metadata(module_name: str = "", state_attr: Optional[List[str]] = None):
27
+ def set_metadata(module_name: str = "", state_attr: list[str] | None = None):
27
28
  def decorator(fn):
28
29
  @wraps(fn)
29
30
  def wrapper(*args, **kwargs):
@@ -70,12 +71,14 @@ def set_metadata(module_name: str = "", state_attr: Optional[List[str]] = None):
70
71
 
71
72
  def populate_defaults(d: dict, c: type) -> dict:
72
73
  def default(t):
73
- name = t._name if hasattr(t, "_name") else t.__name__ # py3.9 : _name, py3.10 : __name__
74
- if name == "Dict":
74
+ t = (
75
+ t if isinstance(t, str) else t._name if hasattr(t, "_name") else t.__name__
76
+ ).lower() # py3.9 : _name, py3.10 : __name__
77
+ if t.startswith("dict"):
75
78
  return {}
76
- if name == "List":
79
+ if t.startswith("list"):
77
80
  return []
78
- if name == "ndarray":
81
+ if t.startswith("ndarray"):
79
82
  return np.array([])
80
83
  raise TypeError("Unrecognized annotation type")
81
84
 
@@ -1,10 +1,12 @@
1
+ from __future__ import annotations
2
+
1
3
  from collections import defaultdict
2
- from typing import Any, Dict, List
4
+ from typing import Any
3
5
 
4
6
  from torch.utils.data import Dataset
5
7
 
6
8
 
7
- def read_dataset(dataset: Dataset) -> List[List[Any]]:
9
+ def read_dataset(dataset: Dataset) -> list[list[Any]]:
8
10
  """
9
11
  Extract information from a dataset at each index into a individual lists of each information position
10
12
 
@@ -51,7 +53,7 @@ def read_dataset(dataset: Dataset) -> List[List[Any]]:
51
53
  True
52
54
  """
53
55
 
54
- ddict: Dict[int, List] = defaultdict(list)
56
+ ddict: dict[int, list] = defaultdict(list)
55
57
 
56
58
  for data in dataset:
57
59
  # Convert to tuple if single return (e.g. images only)
@@ -1,6 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import warnings
2
4
  from dataclasses import dataclass
3
- from typing import Any, Callable, Dict, List, Optional, Sequence, Union, cast
5
+ from typing import Any, Callable, Sequence, cast
4
6
 
5
7
  import matplotlib.pyplot as plt
6
8
  import numpy as np
@@ -19,17 +21,17 @@ class SufficiencyOutput(OutputMetadata):
19
21
  """
20
22
  Attributes
21
23
  ----------
22
- steps : NDArray[np.uint32]
24
+ steps : NDArray
23
25
  Array of sample sizes
24
- params : Dict[str, NDArray[np.float64]]
26
+ params : Dict[str, NDArray]
25
27
  Inverse power curve coefficients for the line of best fit for each measure
26
- measures : Dict[str, NDArray[np.float64]]
28
+ measures : Dict[str, NDArray]
27
29
  Average of values observed for each sample size step for each measure
28
30
  """
29
31
 
30
32
  steps: NDArray[np.uint32]
31
- params: Dict[str, NDArray[np.float64]]
32
- measures: Dict[str, NDArray[np.float64]]
33
+ params: dict[str, NDArray[np.float64]]
34
+ measures: dict[str, NDArray[np.float64]]
33
35
 
34
36
  def __post_init__(self):
35
37
  c = len(self.steps)
@@ -73,7 +75,7 @@ def f_inv_out(y_i: NDArray, x: NDArray) -> NDArray[np.uint64]:
73
75
 
74
76
  Returns
75
77
  -------
76
- NDArray[np.uint64]
78
+ NDArray
77
79
  Array of sample sizes
78
80
  """
79
81
  n_i = ((y_i - x[2]) / x[0]) ** (-1 / x[1])
@@ -183,7 +185,7 @@ def inv_project_steps(params: NDArray, targets: NDArray) -> NDArray[np.uint64]:
183
185
 
184
186
  Returns
185
187
  -------
186
- NDArray[np.uint64]
188
+ NDArray
187
189
  Array of sample sizes, or 0 if overflow
188
190
  """
189
191
  steps = f_inv_out(1 - np.array(targets), params)
@@ -191,7 +193,7 @@ def inv_project_steps(params: NDArray, targets: NDArray) -> NDArray[np.uint64]:
191
193
  return np.ceil(steps)
192
194
 
193
195
 
194
- def get_curve_params(measures: Dict[str, NDArray], ranges: NDArray, niter: int) -> Dict[str, NDArray]:
196
+ def get_curve_params(measures: dict[str, NDArray], ranges: NDArray, niter: int) -> dict[str, NDArray]:
195
197
  """Calculates and aggregates parameters for both single and multi-class metrics"""
196
198
  output = {}
197
199
  for name, measure in measures.items():
@@ -246,15 +248,15 @@ class Sufficiency:
246
248
  ----------
247
249
  model : nn.Module
248
250
  Model that will be trained for each subset of data
249
- train_ds : Dataset
251
+ train_ds : torch.Dataset
250
252
  Full training data that will be split for each run
251
- test_ds : Dataset
253
+ test_ds : torch.Dataset
252
254
  Data that will be used for every run's evaluation
253
255
  train_fn : Callable[[nn.Module, Dataset, Sequence[int]], None]
254
256
  Function which takes a model (torch.nn.Module), a dataset
255
257
  (torch.utils.data.Dataset), indices to train on and executes model
256
258
  training against the data.
257
- eval_fn : Callable[[nn.Module, Dataset], Dict[str, float]]
259
+ eval_fn : Callable[[nn.Module, Dataset], Dict[str, float | NDArray]]
258
260
  Function which takes a model (torch.nn.Module), a dataset
259
261
  (torch.utils.data.Dataset) and returns a dictionary of metric
260
262
  values (Dict[str, float]) which is used to assess model performance
@@ -263,9 +265,9 @@ class Sufficiency:
263
265
  Number of models to run over all subsets
264
266
  substeps : int, default 5
265
267
  Total number of dataset partitions that each model will train on
266
- train_kwargs : Dict[str, Any] | None, default None
268
+ train_kwargs : Dict | None, default None
267
269
  Additional arguments required for custom training function
268
- eval_kwargs : Dict[str, Any] | None, default None
270
+ eval_kwargs : Dict | None, default None
269
271
  Additional arguments required for custom evaluation function
270
272
  """
271
273
 
@@ -275,11 +277,11 @@ class Sufficiency:
275
277
  train_ds: Dataset,
276
278
  test_ds: Dataset,
277
279
  train_fn: Callable[[nn.Module, Dataset, Sequence[int]], None],
278
- eval_fn: Callable[[nn.Module, Dataset], Union[Dict[str, float], Dict[str, NDArray]]],
280
+ eval_fn: Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]],
279
281
  runs: int = 1,
280
282
  substeps: int = 5,
281
- train_kwargs: Optional[Dict[str, Any]] = None,
282
- eval_kwargs: Optional[Dict[str, Any]] = None,
283
+ train_kwargs: dict[str, Any] | None = None,
284
+ eval_kwargs: dict[str, Any] | None = None,
283
285
  ):
284
286
  self.model = model
285
287
  self.train_ds = train_ds
@@ -322,42 +324,42 @@ class Sufficiency:
322
324
  @property
323
325
  def eval_fn(
324
326
  self,
325
- ) -> Callable[[nn.Module, Dataset], Union[Dict[str, float], Dict[str, NDArray]]]:
327
+ ) -> Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]]:
326
328
  return self._eval_fn
327
329
 
328
330
  @eval_fn.setter
329
331
  def eval_fn(
330
332
  self,
331
- value: Callable[[nn.Module, Dataset], Union[Dict[str, float], Dict[str, NDArray]]],
333
+ value: Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]],
332
334
  ):
333
335
  if not callable(value):
334
336
  raise TypeError("Must provide a callable for eval_fn.")
335
337
  self._eval_fn = value
336
338
 
337
339
  @property
338
- def train_kwargs(self) -> Dict[str, Any]:
340
+ def train_kwargs(self) -> dict[str, Any]:
339
341
  return self._train_kwargs
340
342
 
341
343
  @train_kwargs.setter
342
- def train_kwargs(self, value: Optional[Dict[str, Any]]):
344
+ def train_kwargs(self, value: dict[str, Any] | None):
343
345
  self._train_kwargs = {} if value is None else value
344
346
 
345
347
  @property
346
- def eval_kwargs(self) -> Dict[str, Any]:
348
+ def eval_kwargs(self) -> dict[str, Any]:
347
349
  return self._eval_kwargs
348
350
 
349
351
  @eval_kwargs.setter
350
- def eval_kwargs(self, value: Optional[Dict[str, Any]]):
352
+ def eval_kwargs(self, value: dict[str, Any] | None):
351
353
  self._eval_kwargs = {} if value is None else value
352
354
 
353
355
  @set_metadata("dataeval.workflows", ["runs", "substeps"])
354
- def evaluate(self, eval_at: Optional[NDArray] = None, niter: int = 1000) -> SufficiencyOutput:
356
+ def evaluate(self, eval_at: NDArray | None = None, niter: int = 1000) -> SufficiencyOutput:
355
357
  """
356
358
  Creates data indices, trains models, and returns plotting data
357
359
 
358
360
  Parameters
359
361
  ----------
360
- eval_at : Optional[NDArray]
362
+ eval_at : NDArray | None, default None
361
363
  Specify this to collect accuracies over a specific set of dataset lengths, rather
362
364
  than letting Sufficiency internally create the lengths to evaluate at.
363
365
  niter : int, default 1000
@@ -367,7 +369,15 @@ class Sufficiency:
367
369
  -------
368
370
  SufficiencyOutput
369
371
  Dataclass containing the average of each measure per substep
370
- """
372
+
373
+ Examples
374
+ --------
375
+ >>> suff = Sufficiency(
376
+ ... model=model, train_ds=train_ds, test_ds=test_ds, train_fn=train_fn, eval_fn=eval_fn, runs=3, substeps=5
377
+ ... )
378
+ >>> suff.evaluate()
379
+ SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), params={'test': array([ 0., 42., 0.])}, measures={'test': array([1., 1., 1., 1., 1.])})
380
+ """ # noqa: E501
371
381
  if eval_at is not None:
372
382
  ranges = eval_at
373
383
  else:
@@ -416,7 +426,7 @@ class Sufficiency:
416
426
  def project(
417
427
  cls,
418
428
  data: SufficiencyOutput,
419
- projection: Union[int, Sequence[int], NDArray[np.uint]],
429
+ projection: int | Sequence[int] | NDArray[np.uint],
420
430
  ) -> SufficiencyOutput:
421
431
  """Projects the measures for each value of X
422
432
 
@@ -424,7 +434,7 @@ class Sufficiency:
424
434
  ----------
425
435
  data : SufficiencyOutput
426
436
  Dataclass containing the average of each measure per substep
427
- projection : Union[int, Sequence[int], NDArray[np.uint]]
437
+ projection : int | Sequence[int] | NDArray[np.uint]
428
438
  Step or steps to project
429
439
 
430
440
  Returns
@@ -456,13 +466,15 @@ class Sufficiency:
456
466
  return SufficiencyOutput(projection, data.params, output)
457
467
 
458
468
  @classmethod
459
- def plot(cls, data: SufficiencyOutput, class_names: Optional[Sequence[str]] = None) -> List[Figure]:
469
+ def plot(cls, data: SufficiencyOutput, class_names: Sequence[str] | None = None) -> list[Figure]:
460
470
  """Plotting function for data sufficiency tasks
461
471
 
462
472
  Parameters
463
473
  ----------
464
474
  data : SufficiencyOutput
465
475
  Dataclass containing the average of each measure per substep
476
+ class_names : Sequence[str] | None, default None
477
+ List of class names
466
478
 
467
479
  Returns
468
480
  -------
@@ -505,7 +517,7 @@ class Sufficiency:
505
517
  return plots
506
518
 
507
519
  @classmethod
508
- def inv_project(cls, targets: Dict[str, NDArray], data: SufficiencyOutput) -> Dict[str, NDArray]:
520
+ def inv_project(cls, targets: dict[str, NDArray], data: SufficiencyOutput) -> dict[str, NDArray]:
509
521
  """
510
522
  Calculate training samples needed to achieve target model metric values.
511
523
 
@@ -1,29 +1,10 @@
1
- from importlib.util import find_spec
1
+ from dataeval import _IS_TENSORFLOW_AVAILABLE
2
2
 
3
- from dataeval._internal.detectors.clusterer import Clusterer
4
- from dataeval._internal.detectors.drift.base import LastSeenUpdate, ReservoirSamplingUpdate
5
- from dataeval._internal.detectors.drift.cvm import DriftCVM
6
- from dataeval._internal.detectors.drift.ks import DriftKS
7
- from dataeval._internal.detectors.duplicates import Duplicates
8
- from dataeval._internal.detectors.linter import Linter
3
+ from . import drift, linters
9
4
 
10
- __all__ = ["Clusterer", "Duplicates", "Linter", "DriftCVM", "DriftKS", "LastSeenUpdate", "ReservoirSamplingUpdate"]
5
+ __all__ = ["drift", "linters"]
11
6
 
12
- if find_spec("torch") is not None: # pragma: no cover
13
- from dataeval._internal.detectors.drift.mmd import DriftMMD
14
- from dataeval._internal.detectors.drift.torch import GaussianRBF, preprocess_drift
15
- from dataeval._internal.detectors.drift.uncertainty import DriftUncertainty
7
+ if _IS_TENSORFLOW_AVAILABLE: # pragma: no cover
8
+ from . import ood
16
9
 
17
- __all__ += ["DriftMMD", "GaussianRBF", "DriftUncertainty", "preprocess_drift"]
18
-
19
- if find_spec("tensorflow") is not None and find_spec("tensorflow_probability") is not None: # pragma: no cover
20
- from dataeval._internal.detectors.ood.ae import OOD_AE
21
- from dataeval._internal.detectors.ood.aegmm import OOD_AEGMM
22
- from dataeval._internal.detectors.ood.base import OODScore
23
- from dataeval._internal.detectors.ood.llr import OOD_LLR
24
- from dataeval._internal.detectors.ood.vae import OOD_VAE
25
- from dataeval._internal.detectors.ood.vaegmm import OOD_VAEGMM
26
-
27
- __all__ += ["OOD_AE", "OOD_AEGMM", "OOD_LLR", "OODScore", "OOD_VAE", "OOD_VAEGMM"]
28
-
29
- del find_spec
10
+ __all__ += ["ood"]
@@ -0,0 +1,16 @@
1
+ from dataeval import _IS_TORCH_AVAILABLE
2
+ from dataeval._internal.detectors.drift.cvm import DriftCVM
3
+ from dataeval._internal.detectors.drift.ks import DriftKS
4
+
5
+ from . import updates
6
+
7
+ __all__ = ["DriftCVM", "DriftKS", "updates"]
8
+
9
+ if _IS_TORCH_AVAILABLE: # pragma: no cover
10
+ from dataeval._internal.detectors.drift.mmd import DriftMMD
11
+ from dataeval._internal.detectors.drift.torch import preprocess_drift
12
+ from dataeval._internal.detectors.drift.uncertainty import DriftUncertainty
13
+
14
+ from . import kernels
15
+
16
+ __all__ += ["DriftMMD", "DriftUncertainty", "kernels", "preprocess_drift"]
@@ -0,0 +1,6 @@
1
+ from dataeval import _IS_TORCH_AVAILABLE
2
+
3
+ if _IS_TORCH_AVAILABLE: # pragma: no cover
4
+ from dataeval._internal.detectors.drift.torch import GaussianRBF
5
+
6
+ __all__ = ["GaussianRBF"]
@@ -0,0 +1,3 @@
1
+ from dataeval._internal.detectors.drift.base import LastSeenUpdate, ReservoirSamplingUpdate
2
+
3
+ __all__ = ["LastSeenUpdate", "ReservoirSamplingUpdate"]
@@ -0,0 +1,5 @@
1
+ from dataeval._internal.detectors.clusterer import Clusterer
2
+ from dataeval._internal.detectors.duplicates import Duplicates
3
+ from dataeval._internal.detectors.outliers import Outliers
4
+
5
+ __all__ = ["Clusterer", "Duplicates", "Outliers"]
@@ -0,0 +1,11 @@
1
+ from dataeval import _IS_TENSORFLOW_AVAILABLE
2
+
3
+ if _IS_TENSORFLOW_AVAILABLE: # pragma: no cover
4
+ from dataeval._internal.detectors.ood.ae import OOD_AE
5
+ from dataeval._internal.detectors.ood.aegmm import OOD_AEGMM
6
+ from dataeval._internal.detectors.ood.base import OODOutput, OODScore
7
+ from dataeval._internal.detectors.ood.llr import OOD_LLR
8
+ from dataeval._internal.detectors.ood.vae import OOD_VAE
9
+ from dataeval._internal.detectors.ood.vaegmm import OOD_VAEGMM
10
+
11
+ __all__ = ["OODOutput", "OODScore", "OOD_AE", "OOD_AEGMM", "OOD_LLR", "OOD_VAE", "OOD_VAEGMM"]
@@ -1,27 +1,3 @@
1
- from typing import List
1
+ from . import bias, estimators, stats
2
2
 
3
- __all__: List[str] = []
4
-
5
- from dataeval._internal.metrics.balance import balance, balance_classwise
6
- from dataeval._internal.metrics.ber import ber
7
- from dataeval._internal.metrics.coverage import coverage
8
- from dataeval._internal.metrics.divergence import divergence
9
- from dataeval._internal.metrics.diversity import diversity, diversity_classwise
10
- from dataeval._internal.metrics.parity import parity, parity_metadata
11
- from dataeval._internal.metrics.stats import channelstats, imagestats
12
- from dataeval._internal.metrics.uap import uap
13
-
14
- __all__ += [
15
- "balance",
16
- "balance_classwise",
17
- "ber",
18
- "channelstats",
19
- "coverage",
20
- "divergence",
21
- "diversity",
22
- "diversity_classwise",
23
- "imagestats",
24
- "parity",
25
- "parity_metadata",
26
- "uap",
27
- ]
3
+ __all__ = ["bias", "estimators", "stats"]
@@ -0,0 +1,14 @@
1
+ from dataeval._internal.metrics.balance import balance, balance_classwise
2
+ from dataeval._internal.metrics.coverage import coverage
3
+ from dataeval._internal.metrics.diversity import diversity, diversity_classwise
4
+ from dataeval._internal.metrics.parity import label_parity, parity
5
+
6
+ __all__ = [
7
+ "balance",
8
+ "balance_classwise",
9
+ "coverage",
10
+ "diversity",
11
+ "diversity_classwise",
12
+ "label_parity",
13
+ "parity",
14
+ ]
@@ -0,0 +1,9 @@
1
+ from dataeval._internal.metrics.ber import ber
2
+ from dataeval._internal.metrics.divergence import divergence
3
+ from dataeval._internal.metrics.uap import uap
4
+
5
+ __all__ = [
6
+ "ber",
7
+ "divergence",
8
+ "uap",
9
+ ]
@@ -0,0 +1,6 @@
1
+ from dataeval._internal.metrics.stats import channelstats, imagestats
2
+
3
+ __all__ = [
4
+ "channelstats",
5
+ "imagestats",
6
+ ]
@@ -0,0 +1,3 @@
1
+ from . import loss, models, recon
2
+
3
+ __all__ = ["loss", "models", "recon"]
@@ -0,0 +1,3 @@
1
+ from dataeval._internal.models.tensorflow.losses import Elbo, LossGMM
2
+
3
+ __all__ = ["Elbo", "LossGMM"]
@@ -0,0 +1,5 @@
1
+ from dataeval._internal.models.tensorflow.autoencoder import AE, AEGMM, VAE, VAEGMM
2
+ from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
3
+ from dataeval._internal.models.tensorflow.utils import create_model
4
+
5
+ __all__ = ["create_model", "AE", "AEGMM", "PixelCNN", "VAE", "VAEGMM"]
@@ -0,0 +1,3 @@
1
+ from dataeval._internal.models.tensorflow.autoencoder import eucl_cosim_features
2
+
3
+ __all__ = ["eucl_cosim_features"]
@@ -0,0 +1,3 @@
1
+ from . import models, trainer
2
+
3
+ __all__ = ["models", "trainer"]
@@ -1,8 +1,7 @@
1
1
  from dataeval._internal.models.pytorch.autoencoder import (
2
- AETrainer,
3
2
  AriaAutoencoder,
4
3
  Decoder,
5
4
  Encoder,
6
5
  )
7
6
 
8
- __all__ = ["AETrainer", "AriaAutoencoder", "Decoder", "Encoder"]
7
+ __all__ = ["AriaAutoencoder", "Decoder", "Encoder"]
@@ -0,0 +1,3 @@
1
+ from dataeval._internal.models.pytorch.autoencoder import AETrainer
2
+
3
+ __all__ = ["AETrainer"]
@@ -1,9 +1,6 @@
1
- from importlib.util import find_spec
2
- from typing import List
1
+ from dataeval import _IS_TORCH_AVAILABLE
3
2
 
4
- __all__: List[str] = []
5
-
6
- if find_spec("torch") is not None: # pragma: no cover
3
+ if _IS_TORCH_AVAILABLE: # pragma: no cover
7
4
  from dataeval._internal.utils import read_dataset
8
5
 
9
- __all__ += ["read_dataset"]
6
+ __all__ = ["read_dataset"]
@@ -1,8 +1,6 @@
1
- from importlib.util import find_spec
1
+ from dataeval import _IS_TORCH_AVAILABLE
2
2
 
3
- if find_spec("torch") is not None: # pragma: no cover
3
+ if _IS_TORCH_AVAILABLE: # pragma: no cover
4
4
  from dataeval._internal.workflows.sufficiency import Sufficiency
5
5
 
6
6
  __all__ = ["Sufficiency"]
7
-
8
- del find_spec
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.65.0
3
+ Version: 0.66.0
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT