dataeval 0.70.0__py3-none-any.whl → 0.70.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. dataeval/__init__.py +6 -6
  2. dataeval/_internal/datasets.py +235 -131
  3. dataeval/_internal/detectors/clusterer.py +2 -0
  4. dataeval/_internal/detectors/drift/base.py +2 -2
  5. dataeval/_internal/detectors/drift/mmd.py +1 -1
  6. dataeval/_internal/detectors/duplicates.py +2 -0
  7. dataeval/_internal/detectors/ood/ae.py +5 -3
  8. dataeval/_internal/detectors/ood/aegmm.py +6 -4
  9. dataeval/_internal/detectors/ood/base.py +12 -7
  10. dataeval/_internal/detectors/ood/llr.py +6 -4
  11. dataeval/_internal/detectors/ood/vae.py +5 -3
  12. dataeval/_internal/detectors/ood/vaegmm.py +6 -4
  13. dataeval/_internal/detectors/outliers.py +4 -2
  14. dataeval/_internal/metrics/balance.py +4 -2
  15. dataeval/_internal/metrics/ber.py +2 -0
  16. dataeval/_internal/metrics/coverage.py +4 -0
  17. dataeval/_internal/metrics/divergence.py +6 -2
  18. dataeval/_internal/metrics/diversity.py +8 -6
  19. dataeval/_internal/metrics/parity.py +8 -6
  20. dataeval/_internal/metrics/stats/base.py +2 -2
  21. dataeval/_internal/metrics/stats/datasetstats.py +2 -0
  22. dataeval/_internal/metrics/stats/dimensionstats.py +2 -0
  23. dataeval/_internal/metrics/stats/hashstats.py +2 -0
  24. dataeval/_internal/metrics/stats/labelstats.py +1 -1
  25. dataeval/_internal/metrics/stats/pixelstats.py +4 -2
  26. dataeval/_internal/metrics/stats/visualstats.py +4 -2
  27. dataeval/_internal/metrics/uap.py +6 -2
  28. dataeval/_internal/metrics/utils.py +2 -2
  29. dataeval/_internal/models/pytorch/autoencoder.py +5 -5
  30. dataeval/_internal/models/tensorflow/pixelcnn.py +1 -4
  31. dataeval/_internal/utils.py +11 -16
  32. dataeval/_internal/workflows/sufficiency.py +44 -33
  33. dataeval/detectors/__init__.py +4 -0
  34. dataeval/detectors/drift/__init__.py +8 -3
  35. dataeval/detectors/drift/kernels/__init__.py +4 -0
  36. dataeval/detectors/drift/updates/__init__.py +4 -0
  37. dataeval/detectors/linters/__init__.py +15 -4
  38. dataeval/detectors/ood/__init__.py +14 -2
  39. dataeval/metrics/__init__.py +5 -0
  40. dataeval/metrics/bias/__init__.py +13 -4
  41. dataeval/metrics/estimators/__init__.py +8 -8
  42. dataeval/metrics/stats/__init__.py +17 -6
  43. dataeval/utils/__init__.py +16 -3
  44. dataeval/utils/tensorflow/__init__.py +11 -0
  45. dataeval/utils/torch/__init__.py +12 -0
  46. dataeval/utils/torch/datasets/__init__.py +7 -0
  47. dataeval/workflows/__init__.py +4 -0
  48. {dataeval-0.70.0.dist-info → dataeval-0.70.1.dist-info}/METADATA +10 -2
  49. dataeval-0.70.1.dist-info/RECORD +80 -0
  50. dataeval/tensorflow/__init__.py +0 -3
  51. dataeval/torch/__init__.py +0 -3
  52. dataeval-0.70.0.dist-info/RECORD +0 -79
  53. /dataeval/{tensorflow → utils/tensorflow}/loss/__init__.py +0 -0
  54. /dataeval/{tensorflow → utils/tensorflow}/models/__init__.py +0 -0
  55. /dataeval/{tensorflow → utils/tensorflow}/recon/__init__.py +0 -0
  56. /dataeval/{torch → utils/torch}/models/__init__.py +0 -0
  57. /dataeval/{torch → utils/torch}/trainer/__init__.py +0 -0
  58. {dataeval-0.70.0.dist-info → dataeval-0.70.1.dist-info}/LICENSE.txt +0 -0
  59. {dataeval-0.70.0.dist-info → dataeval-0.70.1.dist-info}/WHEEL +0 -0
@@ -2,23 +2,26 @@ from __future__ import annotations
2
2
 
3
3
  import warnings
4
4
  from dataclasses import dataclass
5
- from typing import Any, Callable, Sequence, cast
5
+ from typing import Any, Callable, Iterable, Mapping, Sequence, cast
6
6
 
7
7
  import matplotlib.pyplot as plt
8
8
  import numpy as np
9
9
  import torch
10
10
  import torch.nn as nn
11
11
  from matplotlib.figure import Figure
12
- from numpy.typing import NDArray
12
+ from numpy.typing import ArrayLike, NDArray
13
13
  from scipy.optimize import basinhopping
14
14
  from torch.utils.data import Dataset
15
15
 
16
+ from dataeval._internal.interop import as_numpy
16
17
  from dataeval._internal.output import OutputMetadata, set_metadata
17
18
 
18
19
 
19
20
  @dataclass(frozen=True)
20
21
  class SufficiencyOutput(OutputMetadata):
21
22
  """
23
+ Output class for :class:`Sufficiency` workflow
24
+
22
25
  Attributes
23
26
  ----------
24
27
  steps : NDArray
@@ -45,13 +48,13 @@ class SufficiencyOutput(OutputMetadata):
45
48
  @set_metadata("dataeval.workflows.SufficiencyOutput")
46
49
  def project(
47
50
  self,
48
- projection: int | Sequence[int] | NDArray[np.uint],
51
+ projection: int | Iterable[int],
49
52
  ) -> SufficiencyOutput:
50
53
  """Projects the measures for each value of X
51
54
 
52
55
  Parameters
53
56
  ----------
54
- projection : int | Sequence[int] | NDArray[np.uint]
57
+ projection : int | Iterable[int]
55
58
  Step or steps to project
56
59
 
57
60
  Returns
@@ -63,12 +66,12 @@ class SufficiencyOutput(OutputMetadata):
63
66
  ------
64
67
  ValueError
65
68
  If the length of data points in the measures do not match
66
- If the steps are not int, Sequence[int] or an ndarray
69
+ If `projection` is not numerical
67
70
  """
68
- projection = [projection] if isinstance(projection, int) else projection
69
- projection = np.array(projection) if isinstance(projection, Sequence) else projection
70
- if not isinstance(projection, np.ndarray):
71
- raise ValueError("'steps' must be an int, Sequence[int] or ndarray")
71
+ projection = np.asarray(list(projection) if isinstance(projection, Iterable) else [projection])
72
+
73
+ if not np.issubdtype(projection.dtype, np.number):
74
+ raise ValueError("'projection' must consist of numerical values")
72
75
 
73
76
  output = {}
74
77
  for name, measures in self.measures.items():
@@ -92,7 +95,7 @@ class SufficiencyOutput(OutputMetadata):
92
95
 
93
96
  Returns
94
97
  -------
95
- List[plt.Figure]
98
+ list[plt.Figure]
96
99
  List of Figures for each measure
97
100
 
98
101
  Raises
@@ -130,19 +133,19 @@ class SufficiencyOutput(OutputMetadata):
130
133
 
131
134
  return plots
132
135
 
133
- def inv_project(self, targets: dict[str, NDArray]) -> dict[str, NDArray]:
136
+ def inv_project(self, targets: Mapping[str, ArrayLike]) -> dict[str, NDArray[np.float64]]:
134
137
  """
135
138
  Calculate training samples needed to achieve target model metric values.
136
139
 
137
140
  Parameters
138
141
  ----------
139
- targets : Dict[str, NDArray]
140
- Dictionary of target metric scores (from 0.0 to 1.0) that we want
142
+ targets : Mapping[str, ArrayLike]
143
+ Mapping of target metric scores (from 0.0 to 1.0) that we want
141
144
  to achieve, where the key is the name of the metric.
142
145
 
143
146
  Returns
144
147
  -------
145
- Dict[str, NDArray]
148
+ dict[str, NDArray]
146
149
  List of the number of training samples needed to achieve each
147
150
  corresponding entry in targets
148
151
  """
@@ -150,18 +153,19 @@ class SufficiencyOutput(OutputMetadata):
150
153
  projection = {}
151
154
 
152
155
  for name, target in targets.items():
156
+ tarray = as_numpy(target)
153
157
  if name not in self.measures:
154
158
  continue
155
159
 
156
160
  measure = self.measures[name]
157
161
  if measure.ndim > 1:
158
- projection[name] = np.zeros((len(measure), len(target)))
162
+ projection[name] = np.zeros((len(measure), len(tarray)))
159
163
  for i in range(len(measure)):
160
164
  projection[name][i] = inv_project_steps(
161
- self.params[name][i], target[i] if target.ndim == measure.ndim else target
165
+ self.params[name][i], tarray[i] if tarray.ndim == measure.ndim else tarray
162
166
  )
163
167
  else:
164
- projection[name] = inv_project_steps(self.params[name], target)
168
+ projection[name] = inv_project_steps(self.params[name], tarray)
165
169
 
166
170
  return projection
167
171
 
@@ -379,18 +383,18 @@ class Sufficiency:
379
383
  Function which takes a model (torch.nn.Module), a dataset
380
384
  (torch.utils.data.Dataset), indices to train on and executes model
381
385
  training against the data.
382
- eval_fn : Callable[[nn.Module, Dataset], Dict[str, float | NDArray]]
386
+ eval_fn : Callable[[nn.Module, Dataset], Mapping[str, float | ArrayLike]]
383
387
  Function which takes a model (torch.nn.Module), a dataset
384
388
  (torch.utils.data.Dataset) and returns a dictionary of metric
385
- values (Dict[str, float]) which is used to assess model performance
389
+ values (Mapping[str, float]) which is used to assess model performance
386
390
  given the model and data.
387
391
  runs : int, default 1
388
392
  Number of models to run over all subsets
389
393
  substeps : int, default 5
390
394
  Total number of dataset partitions that each model will train on
391
- train_kwargs : Dict | None, default None
395
+ train_kwargs : Mapping | None, default None
392
396
  Additional arguments required for custom training function
393
- eval_kwargs : Dict | None, default None
397
+ eval_kwargs : Mapping | None, default None
394
398
  Additional arguments required for custom evaluation function
395
399
  """
396
400
 
@@ -400,11 +404,11 @@ class Sufficiency:
400
404
  train_ds: Dataset,
401
405
  test_ds: Dataset,
402
406
  train_fn: Callable[[nn.Module, Dataset, Sequence[int]], None],
403
- eval_fn: Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]],
407
+ eval_fn: Callable[[nn.Module, Dataset], Mapping[str, float] | Mapping[str, ArrayLike]],
404
408
  runs: int = 1,
405
409
  substeps: int = 5,
406
- train_kwargs: dict[str, Any] | None = None,
407
- eval_kwargs: dict[str, Any] | None = None,
410
+ train_kwargs: Mapping[str, Any] | None = None,
411
+ eval_kwargs: Mapping[str, Any] | None = None,
408
412
  ):
409
413
  self.model = model
410
414
  self.train_ds = train_ds
@@ -447,42 +451,42 @@ class Sufficiency:
447
451
  @property
448
452
  def eval_fn(
449
453
  self,
450
- ) -> Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]]:
454
+ ) -> Callable[[nn.Module, Dataset], dict[str, float] | Mapping[str, ArrayLike]]:
451
455
  return self._eval_fn
452
456
 
453
457
  @eval_fn.setter
454
458
  def eval_fn(
455
459
  self,
456
- value: Callable[[nn.Module, Dataset], dict[str, float] | dict[str, NDArray]],
460
+ value: Callable[[nn.Module, Dataset], dict[str, float] | Mapping[str, ArrayLike]],
457
461
  ):
458
462
  if not callable(value):
459
463
  raise TypeError("Must provide a callable for eval_fn.")
460
464
  self._eval_fn = value
461
465
 
462
466
  @property
463
- def train_kwargs(self) -> dict[str, Any]:
467
+ def train_kwargs(self) -> Mapping[str, Any]:
464
468
  return self._train_kwargs
465
469
 
466
470
  @train_kwargs.setter
467
- def train_kwargs(self, value: dict[str, Any] | None):
471
+ def train_kwargs(self, value: Mapping[str, Any] | None):
468
472
  self._train_kwargs = {} if value is None else value
469
473
 
470
474
  @property
471
- def eval_kwargs(self) -> dict[str, Any]:
475
+ def eval_kwargs(self) -> Mapping[str, Any]:
472
476
  return self._eval_kwargs
473
477
 
474
478
  @eval_kwargs.setter
475
- def eval_kwargs(self, value: dict[str, Any] | None):
479
+ def eval_kwargs(self, value: Mapping[str, Any] | None):
476
480
  self._eval_kwargs = {} if value is None else value
477
481
 
478
482
  @set_metadata("dataeval.workflows", ["runs", "substeps"])
479
- def evaluate(self, eval_at: NDArray | None = None, niter: int = 1000) -> SufficiencyOutput:
483
+ def evaluate(self, eval_at: int | Iterable[int] | None = None, niter: int = 1000) -> SufficiencyOutput:
480
484
  """
481
485
  Creates data indices, trains models, and returns plotting data
482
486
 
483
487
  Parameters
484
488
  ----------
485
- eval_at : NDArray | None, default None
489
+ eval_at : int | Iterable[int] | None, default None
486
490
  Specify this to collect accuracies over a specific set of dataset lengths, rather
487
491
  than letting Sufficiency internally create the lengths to evaluate at.
488
492
  niter : int, default 1000
@@ -493,6 +497,11 @@ class Sufficiency:
493
497
  SufficiencyOutput
494
498
  Dataclass containing the average of each measure per substep
495
499
 
500
+ Raises
501
+ ------
502
+ ValueError
503
+ If `eval_at` is not numerical
504
+
496
505
  Examples
497
506
  --------
498
507
  >>> suff = Sufficiency(
@@ -502,7 +511,9 @@ class Sufficiency:
502
511
  SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), params={'test': array([ 0., 42., 0.])}, measures={'test': array([1., 1., 1., 1., 1.])})
503
512
  """ # noqa: E501
504
513
  if eval_at is not None:
505
- ranges = eval_at
514
+ ranges = np.asarray(list(eval_at) if isinstance(eval_at, Iterable) else [eval_at])
515
+ if not np.issubdtype(ranges.dtype, np.number):
516
+ raise ValueError("'eval_at' must consist of numerical values")
506
517
  else:
507
518
  geomshape = (
508
519
  0.01 * self._length,
@@ -1,3 +1,7 @@
1
+ """
2
+ Detectors can determine if a dataset or individual images in a dataset are indicative of a specific issue.
3
+ """
4
+
1
5
  from dataeval import _IS_TENSORFLOW_AVAILABLE
2
6
 
3
7
  from . import drift, linters
@@ -1,16 +1,21 @@
1
+ """
2
+ Drift detectors identify if the statistical properties of the data has changed.
3
+ """
4
+
1
5
  from dataeval import _IS_TORCH_AVAILABLE
6
+ from dataeval._internal.detectors.drift.base import DriftOutput
2
7
  from dataeval._internal.detectors.drift.cvm import DriftCVM
3
8
  from dataeval._internal.detectors.drift.ks import DriftKS
4
9
 
5
10
  from . import updates
6
11
 
7
- __all__ = ["DriftCVM", "DriftKS", "updates"]
12
+ __all__ = ["DriftCVM", "DriftKS", "DriftOutput", "updates"]
8
13
 
9
14
  if _IS_TORCH_AVAILABLE: # pragma: no cover
10
- from dataeval._internal.detectors.drift.mmd import DriftMMD
15
+ from dataeval._internal.detectors.drift.mmd import DriftMMD, DriftMMDOutput
11
16
  from dataeval._internal.detectors.drift.torch import preprocess_drift
12
17
  from dataeval._internal.detectors.drift.uncertainty import DriftUncertainty
13
18
 
14
19
  from . import kernels
15
20
 
16
- __all__ += ["DriftMMD", "DriftUncertainty", "kernels", "preprocess_drift"]
21
+ __all__ += ["DriftMMD", "DriftMMDOutput", "DriftUncertainty", "kernels", "preprocess_drift"]
@@ -1,3 +1,7 @@
1
+ """
2
+ Kernels are used to map non-linear data to a higher dimensional space.
3
+ """
4
+
1
5
  from dataeval import _IS_TORCH_AVAILABLE
2
6
 
3
7
  if _IS_TORCH_AVAILABLE: # pragma: no cover
@@ -1,3 +1,7 @@
1
+ """
2
+ Update strategies inform how the drift detector classes update the reference data when monitoring for drift.
3
+ """
4
+
1
5
  from dataeval._internal.detectors.drift.base import LastSeenUpdate, ReservoirSamplingUpdate
2
6
 
3
7
  __all__ = ["LastSeenUpdate", "ReservoirSamplingUpdate"]
@@ -1,5 +1,16 @@
1
- from dataeval._internal.detectors.clusterer import Clusterer
2
- from dataeval._internal.detectors.duplicates import Duplicates
3
- from dataeval._internal.detectors.outliers import Outliers
1
+ """
2
+ Linters help identify potential issues in training and test data and are an important aspect of data cleaning.
3
+ """
4
4
 
5
- __all__ = ["Clusterer", "Duplicates", "Outliers"]
5
+ from dataeval._internal.detectors.clusterer import Clusterer, ClustererOutput
6
+ from dataeval._internal.detectors.duplicates import Duplicates, DuplicatesOutput
7
+ from dataeval._internal.detectors.outliers import Outliers, OutliersOutput
8
+
9
+ __all__ = [
10
+ "Clusterer",
11
+ "ClustererOutput",
12
+ "Duplicates",
13
+ "DuplicatesOutput",
14
+ "Outliers",
15
+ "OutliersOutput",
16
+ ]
@@ -1,11 +1,23 @@
1
+ """
2
+ Out-of-distribution detectors identify data that is different from the data used to train a particular model.
3
+ """
4
+
1
5
  from dataeval import _IS_TENSORFLOW_AVAILABLE
2
6
 
3
7
  if _IS_TENSORFLOW_AVAILABLE: # pragma: no cover
4
8
  from dataeval._internal.detectors.ood.ae import OOD_AE
5
9
  from dataeval._internal.detectors.ood.aegmm import OOD_AEGMM
6
- from dataeval._internal.detectors.ood.base import OODOutput, OODScore
10
+ from dataeval._internal.detectors.ood.base import OODOutput, OODScoreOutput
7
11
  from dataeval._internal.detectors.ood.llr import OOD_LLR
8
12
  from dataeval._internal.detectors.ood.vae import OOD_VAE
9
13
  from dataeval._internal.detectors.ood.vaegmm import OOD_VAEGMM
10
14
 
11
- __all__ = ["OODOutput", "OODScore", "OOD_AE", "OOD_AEGMM", "OOD_LLR", "OOD_VAE", "OOD_VAEGMM"]
15
+ __all__ = [
16
+ "OOD_AE",
17
+ "OOD_AEGMM",
18
+ "OOD_LLR",
19
+ "OOD_VAE",
20
+ "OOD_VAEGMM",
21
+ "OODOutput",
22
+ "OODScoreOutput",
23
+ ]
@@ -1,3 +1,8 @@
1
+ """
2
+ Metrics are a way to measure the performance of your models or datasets that
3
+ can then be analyzed in the context of a given problem.
4
+ """
5
+
1
6
  from . import bias, estimators, stats
2
7
 
3
8
  __all__ = ["bias", "estimators", "stats"]
@@ -1,7 +1,12 @@
1
- from dataeval._internal.metrics.balance import balance
2
- from dataeval._internal.metrics.coverage import coverage
3
- from dataeval._internal.metrics.diversity import diversity
4
- from dataeval._internal.metrics.parity import label_parity, parity
1
+ """
2
+ Bias metrics check for skewed or imbalanced datasets and incomplete feature
3
+ representation which may impact model performance.
4
+ """
5
+
6
+ from dataeval._internal.metrics.balance import BalanceOutput, balance
7
+ from dataeval._internal.metrics.coverage import CoverageOutput, coverage
8
+ from dataeval._internal.metrics.diversity import DiversityOutput, diversity
9
+ from dataeval._internal.metrics.parity import ParityOutput, label_parity, parity
5
10
 
6
11
  __all__ = [
7
12
  "balance",
@@ -9,4 +14,8 @@ __all__ = [
9
14
  "diversity",
10
15
  "label_parity",
11
16
  "parity",
17
+ "BalanceOutput",
18
+ "CoverageOutput",
19
+ "DiversityOutput",
20
+ "ParityOutput",
12
21
  ]
@@ -1,9 +1,9 @@
1
- from dataeval._internal.metrics.ber import ber
2
- from dataeval._internal.metrics.divergence import divergence
3
- from dataeval._internal.metrics.uap import uap
1
+ """
2
+ Estimators calculate performance bounds and the statistical distance between datasets.
3
+ """
4
4
 
5
- __all__ = [
6
- "ber",
7
- "divergence",
8
- "uap",
9
- ]
5
+ from dataeval._internal.metrics.ber import BEROutput, ber
6
+ from dataeval._internal.metrics.divergence import DivergenceOutput, divergence
7
+ from dataeval._internal.metrics.uap import UAPOutput, uap
8
+
9
+ __all__ = ["ber", "divergence", "uap", "BEROutput", "DivergenceOutput", "UAPOutput"]
@@ -1,10 +1,15 @@
1
+ """
2
+ Statistics metrics calculate a variety of image properties and pixel statistics
3
+ and label statistics against the images and labels of a dataset.
4
+ """
5
+
1
6
  from dataeval._internal.metrics.stats.boxratiostats import boxratiostats
2
- from dataeval._internal.metrics.stats.datasetstats import datasetstats
3
- from dataeval._internal.metrics.stats.dimensionstats import dimensionstats
4
- from dataeval._internal.metrics.stats.hashstats import hashstats
5
- from dataeval._internal.metrics.stats.labelstats import labelstats
6
- from dataeval._internal.metrics.stats.pixelstats import pixelstats
7
- from dataeval._internal.metrics.stats.visualstats import visualstats
7
+ from dataeval._internal.metrics.stats.datasetstats import DatasetStatsOutput, datasetstats
8
+ from dataeval._internal.metrics.stats.dimensionstats import DimensionStatsOutput, dimensionstats
9
+ from dataeval._internal.metrics.stats.hashstats import HashStatsOutput, hashstats
10
+ from dataeval._internal.metrics.stats.labelstats import LabelStatsOutput, labelstats
11
+ from dataeval._internal.metrics.stats.pixelstats import PixelStatsOutput, pixelstats
12
+ from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput, visualstats
8
13
 
9
14
  __all__ = [
10
15
  "boxratiostats",
@@ -14,4 +19,10 @@ __all__ = [
14
19
  "labelstats",
15
20
  "pixelstats",
16
21
  "visualstats",
22
+ "DatasetStatsOutput",
23
+ "DimensionStatsOutput",
24
+ "HashStatsOutput",
25
+ "LabelStatsOutput",
26
+ "PixelStatsOutput",
27
+ "VisualStatsOutput",
17
28
  ]
@@ -1,6 +1,19 @@
1
- from dataeval import _IS_TORCH_AVAILABLE
1
+ """
2
+ The utility classes and functions are provided by DataEval to assist users
3
+ in setting up architectures that are guaranteed to work with applicable DataEval
4
+ metrics. Currently DataEval supports both Tensorflow and PyTorch backends.
5
+ """
6
+
7
+ from dataeval import _IS_TENSORFLOW_AVAILABLE, _IS_TORCH_AVAILABLE
8
+
9
+ __all__ = []
2
10
 
3
11
  if _IS_TORCH_AVAILABLE: # pragma: no cover
4
- from dataeval._internal.utils import read_dataset
12
+ from . import torch
13
+
14
+ __all__ += ["torch"]
15
+
16
+ if _IS_TENSORFLOW_AVAILABLE: # pragma: no cover
17
+ from . import tensorflow
5
18
 
6
- __all__ = ["read_dataset"]
19
+ __all__ += ["tensorflow"]
@@ -0,0 +1,11 @@
1
+ """
2
+ Tensorflow models are used in out-of-distribution detectors in the :mod:`dataeval.detectors.ood` module.
3
+
4
+ DataEval provides both basic default models through the utility :func:`dataeval.utils.tensorflow.models.create_model`
5
+ as well as constructors which allow for customization of the encoder, decoder and any other applicable
6
+ layers used by the model.
7
+ """
8
+
9
+ from . import loss, models, recon
10
+
11
+ __all__ = ["loss", "models", "recon"]
@@ -0,0 +1,12 @@
1
+ """
2
+ PyTorch is the primary backend for metrics that require neural networks.
3
+
4
+ While these metrics can take in custom models, DataEval provides utility classes
5
+ to create a seamless integration between custom models and DataEval's metrics.
6
+ """
7
+
8
+ from dataeval._internal.utils import read_dataset
9
+
10
+ from . import models, trainer
11
+
12
+ __all__ = ["read_dataset", "models", "trainer"]
@@ -0,0 +1,7 @@
1
+ """
2
+ Provide access to common Torch datasets used for computer vision
3
+ """
4
+
5
+ from dataeval._internal.datasets import CIFAR10, MNIST, VOCDetection
6
+
7
+ __all__ = ["CIFAR10", "MNIST", "VOCDetection"]
@@ -1,3 +1,7 @@
1
+ """
2
+ Workflows perform a sequence of actions to analyze the dataset and make predictions.
3
+ """
4
+
1
5
  from dataeval import _IS_TORCH_AVAILABLE
2
6
 
3
7
  if _IS_TORCH_AVAILABLE: # pragma: no cover
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.70.0
3
+ Version: 0.70.1
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -30,7 +30,6 @@ Requires-Dist: pillow (>=10.3.0)
30
30
  Requires-Dist: scikit-learn (>=1.5.0)
31
31
  Requires-Dist: scipy (>=1.10)
32
32
  Requires-Dist: tensorflow (>=2.14.1,<2.16) ; extra == "tensorflow" or extra == "all"
33
- Requires-Dist: tensorflow-io-gcs-filesystem (>=0.35.0,<0.37) ; extra == "tensorflow" or extra == "all"
34
33
  Requires-Dist: tensorflow_probability (>=0.22.1,<0.24) ; extra == "tensorflow" or extra == "all"
35
34
  Requires-Dist: torch (>=2.2.0) ; extra == "torch" or extra == "all"
36
35
  Requires-Dist: torchvision (>=0.17.0) ; extra == "torch" or extra == "all"
@@ -75,6 +74,15 @@ You can install DataEval directly from pypi.org using the following command. Th
75
74
  pip install dataeval[all]
76
75
  ```
77
76
 
77
+ ### Installing DataEval in Conda/Mamba
78
+
79
+ DataEval can be installed in a Conda/Mamba environment using the provided `environment.yaml` file. As some dependencies
80
+ are installed from the `pytorch` channel, the channel is specified in the below example.
81
+
82
+ ```
83
+ micromamba create -f environment\environment.yaml -c pytorch
84
+ ```
85
+
78
86
  ### Installing DataEval from GitHub
79
87
 
80
88
  To install DataEval from source locally on Ubuntu, you will need `git-lfs` to download larger, binary source files and `poetry` for project dependency management.
@@ -0,0 +1,80 @@
1
+ dataeval/__init__.py,sha256=AIHxRS7PYlqg4s7fZJTPKuTtyWFWoVROw4knVoSBH6E,555
2
+ dataeval/_internal/datasets.py,sha256=KbXSR-vOAzFamfXHRnI9mhhqUzEPyGpK47fZsirQn1I,14638
3
+ dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ dataeval/_internal/detectors/clusterer.py,sha256=srqTHzh9kIy7Ty4VYaptwuQlBh8emFeiEAeS_mYFKro,20750
5
+ dataeval/_internal/detectors/drift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ dataeval/_internal/detectors/drift/base.py,sha256=9EIb4eHVnZ8j6ms_kxRs6fOWbHkYKeHPPHWVGD4ikZo,15984
7
+ dataeval/_internal/detectors/drift/cvm.py,sha256=xiyZlf0rAQGG8Z6ZBLPVri805aPRkERrUySwRN8cTZQ,4010
8
+ dataeval/_internal/detectors/drift/ks.py,sha256=aoDx7ps-5vrSI8Q9ii6cwmKnAyaD8tjG69wI-7R3MVQ,4098
9
+ dataeval/_internal/detectors/drift/mmd.py,sha256=ztQSdSlpD66z9xFKqvNo3QHR1vEvf6X-m0LvxNckQgc,7517
10
+ dataeval/_internal/detectors/drift/torch.py,sha256=YhIN85MbUV3C4IJcRvqYdXSWLj5lUeEOb05T5DgB3xo,11552
11
+ dataeval/_internal/detectors/drift/uncertainty.py,sha256=Ot8L42AnFbkij4J3Tis7VzXLv3hfBxoOWBP4UoCEnVs,5125
12
+ dataeval/_internal/detectors/duplicates.py,sha256=wggaIl3uFxihNBQhPv5JcreZbhVaFKoMAJMv_9-aaHU,5324
13
+ dataeval/_internal/detectors/merged_stats.py,sha256=okXGrqAgsqfANMxfIjiUQlZWlaIh5TVvIB9UPsOJZ7k,1351
14
+ dataeval/_internal/detectors/ood/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ dataeval/_internal/detectors/ood/ae.py,sha256=AIffh11UKZX-3oocDDp8-t-uDUm8aQKvrV0nIE3BLQo,2140
16
+ dataeval/_internal/detectors/ood/aegmm.py,sha256=q2kRXZM5X0PoA10mRsi8Gh-W5FdFzEsnM1yDq3GFzn0,2518
17
+ dataeval/_internal/detectors/ood/base.py,sha256=qYSmcN74x5-qL0_I7uNo4eQ8X8pr7M4cwjs2qvkJt5g,8561
18
+ dataeval/_internal/detectors/ood/llr.py,sha256=VgZtMrMgI8zDVUzsqm2l4tqsULFIhdQeDd4lzdo_G7U,10217
19
+ dataeval/_internal/detectors/ood/vae.py,sha256=iXEltu5pATWr42-28hZ3ARZavJrptLwUM5P9c8omA_s,3030
20
+ dataeval/_internal/detectors/ood/vaegmm.py,sha256=ujp6UN0wpZcmPDPkVfTHZxgka5kuTOSzgXMmbKdmK2U,2947
21
+ dataeval/_internal/detectors/outliers.py,sha256=JmAXoMO0Od7tc9RVFGJsDyOnByciPFG5FdS54Iu0BII,10396
22
+ dataeval/_internal/interop.py,sha256=FLXJY-5hwJcKCtruyvaarqynXCMfcLbQSFvGnrWQDPo,1338
23
+ dataeval/_internal/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
+ dataeval/_internal/metrics/balance.py,sha256=8KwuR5HvytJtS1YW9KkNrCu2dPn_gP4FSbXrxQ-9kK8,6182
25
+ dataeval/_internal/metrics/ber.py,sha256=RLRC3ouKYCLYJsA0sqS1gLjE_WFLq7dHElRfVVabvWA,4719
26
+ dataeval/_internal/metrics/coverage.py,sha256=jxvzWpVQrfmN3S9rpXvyb35vpRn1ovV2IdXdB6aU2-w,3560
27
+ dataeval/_internal/metrics/divergence.py,sha256=gKQt9rxmhW8RnODCoLgFSPnCUWEMjmNIPlCV2w6E6oU,4211
28
+ dataeval/_internal/metrics/diversity.py,sha256=ZSlq1KBvkMRVAvlznils2QmlPC73TTpHs1ux7PoFrio,7664
29
+ dataeval/_internal/metrics/parity.py,sha256=eTjltNBJOTFH6T_t9V9-1EFr_U0vqlU642o3x2RWgz0,16527
30
+ dataeval/_internal/metrics/stats/base.py,sha256=HyjgHTQZqgkkCWDzOF-aNZBr88IAjnao8VSbHC5ZtbI,8554
31
+ dataeval/_internal/metrics/stats/boxratiostats.py,sha256=Ac6nB41q43xHCJRDEXHNgsJF80VE8MpH8_kySxA84BE,6342
32
+ dataeval/_internal/metrics/stats/datasetstats.py,sha256=6DFl3TE7t2ggDD8WBVgPH7F2bRvae7NR2PVoEWL92dw,3759
33
+ dataeval/_internal/metrics/stats/dimensionstats.py,sha256=MUQJgrWmRoQFap7gPf8vTFXJ_z7G7bAQpZ7kCPRtNkA,3847
34
+ dataeval/_internal/metrics/stats/hashstats.py,sha256=xH0k_wOeGO5UC7-0fhAIg4WV2fO8fnF0Jdn18gYhW88,2087
35
+ dataeval/_internal/metrics/stats/labelstats.py,sha256=BNxI2flvKhSps2o4-TPbN9nf52ctatI2SuDZ07hah5E,4058
36
+ dataeval/_internal/metrics/stats/pixelstats.py,sha256=LxoDQ6afsNuzB0WnOgmzkEUV7s534MrAYkzS6Be7PPQ,4419
37
+ dataeval/_internal/metrics/stats/visualstats.py,sha256=3uET0N3WgV5dcxst8Xb9DhcATiNfAXsx1OKbPz2mU4Q,4712
38
+ dataeval/_internal/metrics/uap.py,sha256=RumSQey6vNoz9CtOG2_Inb-TurKJrAHqwhkyWBirxhk,2128
39
+ dataeval/_internal/metrics/utils.py,sha256=vW3mQHjF0AvYlml27X5dZgd0YBk3zyBvvztLEfdRkvI,13475
40
+ dataeval/_internal/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ dataeval/_internal/models/pytorch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
+ dataeval/_internal/models/pytorch/autoencoder.py,sha256=nPyLjLZrPNla-lMnym3fUW-O1F00JbIrVUrUFU4C4UQ,8364
43
+ dataeval/_internal/models/pytorch/blocks.py,sha256=pm2xwsDZjZJYXrhhiz8husvh2vHmrkFMSYEn-EDUD5Q,1354
44
+ dataeval/_internal/models/pytorch/utils.py,sha256=Qgwym1PxGuwxbXCKUT-8r6Iyrxqm7x94oj45Vf5_CjE,1675
45
+ dataeval/_internal/models/tensorflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
+ dataeval/_internal/models/tensorflow/autoencoder.py,sha256=Ryn11jDbpZJOM5De-kMGPdbJBQEdwip6B20ajS8HqpE,10354
47
+ dataeval/_internal/models/tensorflow/gmm.py,sha256=QoEgbeax1GETqRmUF7A2ih9uFOZfFAjGzgH2ljExlAc,3669
48
+ dataeval/_internal/models/tensorflow/losses.py,sha256=pZH5RnlM9R0RrBde9Lgq32muwAp7_PWc56Mu4u8RVvo,3976
49
+ dataeval/_internal/models/tensorflow/pixelcnn.py,sha256=keI1gTNjBk18YD91Cp4exfuGYWU9lt-wapvhSazhcVs,48319
50
+ dataeval/_internal/models/tensorflow/trainer.py,sha256=xNY0Iw7Qa1TnCuy9N1b77_VduFoW_BhbZjfQCxOVby4,4082
51
+ dataeval/_internal/models/tensorflow/utils.py,sha256=l6jXKMWyQAEI4LpAONq95Xwr7CPgrs408ypf9TuNxkY,8732
52
+ dataeval/_internal/output.py,sha256=qVbOi41dvfQICQ4uxysHPWBRKo1XR61kXHPL_vKOPm0,2545
53
+ dataeval/_internal/utils.py,sha256=jo6bGJZAgyuZqRpAAC4gwhAHYE12316na19ZuFwMqes,1504
54
+ dataeval/_internal/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
+ dataeval/_internal/workflows/sufficiency.py,sha256=5N07nV5Oi4kUCm_3rJoj0SeIy1iRC3LciEgrR7E9m7Q,18240
56
+ dataeval/detectors/__init__.py,sha256=8nJk2U99_SQD7RtEKjyS4WJct8gX1HgjEm4oHTnRhTI,320
57
+ dataeval/detectors/drift/__init__.py,sha256=9TbJok7fH3mcXcM7c2vT3WZnZr2wanY_8TUwu-8JX58,800
58
+ dataeval/detectors/drift/kernels/__init__.py,sha256=djIbmvYoHWpWxfdYtiouEC2KqzvgmtEqlg1i5p-UCgM,266
59
+ dataeval/detectors/drift/updates/__init__.py,sha256=tiYSA1-AsTiFgC3LuxM8iYFsWUX0Fr8hElzWvU8ovig,267
60
+ dataeval/detectors/linters/__init__.py,sha256=m5F5JgGBcqGb3J_qXQ3PBkKyePjOklrYbM9dGUsgxFA,489
61
+ dataeval/detectors/ood/__init__.py,sha256=K5QrSJg2QePs6Pa3Cg80ZwXu7BELLrSlbEpTdxuL3Ys,777
62
+ dataeval/metrics/__init__.py,sha256=U0sRw5eiqeeDLbLPxT_rznZsvtNwONVxKVwfC0qVOgo,223
63
+ dataeval/metrics/bias/__init__.py,sha256=Wn1Ui_g-9cR4c4IS7RFKJ6UH5DLXKjEBoXTuEYPXSBc,619
64
+ dataeval/metrics/estimators/__init__.py,sha256=4VFMKLPsJdaWiflf84bXGQ2k8ertFQ4WEPhyWqjFFvE,377
65
+ dataeval/metrics/stats/__init__.py,sha256=UcD41gFwFhcQMtqwWkPQlg6cFA2_gdj6yGRCDrKYXM8,1055
66
+ dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
+ dataeval/utils/__init__.py,sha256=cW_5-DIZG2OFRs3FVLOz0uCv4JWdaoVO7C9rOlR7ZEA,526
68
+ dataeval/utils/tensorflow/__init__.py,sha256=XgjqrMtI00ERPPpdokbO1lDyc_H3CZ1TTqUXIj0B6PI,435
69
+ dataeval/utils/tensorflow/loss/__init__.py,sha256=E9eB87LNh0o5nUCqssB027EXBsOfEayNHPcNW0QGFdA,101
70
+ dataeval/utils/tensorflow/models/__init__.py,sha256=OVpmHF8itDcgOlfw6N9jr7IphZPbMJoiu7OdqYhU9fs,291
71
+ dataeval/utils/tensorflow/recon/__init__.py,sha256=xe6gAQqK9tyAoDQTtaJAxIPK1humt5QzsG_9NPsqx58,116
72
+ dataeval/utils/torch/__init__.py,sha256=bYUm-nNlNVU3bqDz7dQHFmaRWgLy3lLrD4cSDumDlxQ,373
73
+ dataeval/utils/torch/datasets/__init__.py,sha256=S6C4OaxEjJJaIpHSZcZfkl4U5iS5YtZ9N5GYHqvbzvM,191
74
+ dataeval/utils/torch/models/__init__.py,sha256=YnDnePYpRIKHyYn3F5qR1OObMSb-g0FGvI8X-uTB09E,162
75
+ dataeval/utils/torch/trainer/__init__.py,sha256=Te-qElt8h-Zv8NN0r-VJOEdCPHTQ2yO3rd2MhRiZGZs,93
76
+ dataeval/workflows/__init__.py,sha256=VFeJyMhZxvj8WnU5Un32mwO8lNfBQOBjD9IdOqexnAE,320
77
+ dataeval-0.70.1.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
78
+ dataeval-0.70.1.dist-info/METADATA,sha256=B2slR1eY_xRR4QcUTpV8EJh5Z_plWmHFqTT5j4r2Vvk,4502
79
+ dataeval-0.70.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
80
+ dataeval-0.70.1.dist-info/RECORD,,
@@ -1,3 +0,0 @@
1
- from . import loss, models, recon
2
-
3
- __all__ = ["loss", "models", "recon"]
@@ -1,3 +0,0 @@
1
- from . import models, trainer
2
-
3
- __all__ = ["models", "trainer"]