dataeval 0.74.2__py3-none-any.whl → 0.76.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. dataeval/__init__.py +27 -23
  2. dataeval/detectors/__init__.py +2 -2
  3. dataeval/detectors/drift/__init__.py +14 -12
  4. dataeval/detectors/drift/base.py +3 -3
  5. dataeval/detectors/drift/cvm.py +1 -1
  6. dataeval/detectors/drift/ks.py +3 -2
  7. dataeval/detectors/drift/mmd.py +9 -7
  8. dataeval/detectors/drift/torch.py +12 -12
  9. dataeval/detectors/drift/uncertainty.py +5 -4
  10. dataeval/detectors/drift/updates.py +1 -1
  11. dataeval/detectors/linters/__init__.py +4 -4
  12. dataeval/detectors/linters/clusterer.py +5 -9
  13. dataeval/detectors/linters/duplicates.py +10 -14
  14. dataeval/detectors/linters/outliers.py +100 -5
  15. dataeval/detectors/ood/__init__.py +4 -11
  16. dataeval/detectors/ood/{ae_torch.py → ae.py} +6 -4
  17. dataeval/detectors/ood/base.py +47 -160
  18. dataeval/detectors/ood/metadata_ks_compare.py +34 -42
  19. dataeval/detectors/ood/metadata_least_likely.py +3 -3
  20. dataeval/detectors/ood/metadata_ood_mi.py +6 -5
  21. dataeval/detectors/ood/mixin.py +146 -0
  22. dataeval/detectors/ood/output.py +63 -0
  23. dataeval/interop.py +7 -6
  24. dataeval/{logging.py → log.py} +2 -0
  25. dataeval/metrics/__init__.py +3 -3
  26. dataeval/metrics/bias/__init__.py +10 -13
  27. dataeval/metrics/bias/balance.py +13 -11
  28. dataeval/metrics/bias/coverage.py +53 -5
  29. dataeval/metrics/bias/diversity.py +56 -24
  30. dataeval/metrics/bias/parity.py +20 -17
  31. dataeval/metrics/estimators/__init__.py +2 -2
  32. dataeval/metrics/estimators/ber.py +7 -4
  33. dataeval/metrics/estimators/divergence.py +4 -4
  34. dataeval/metrics/estimators/uap.py +4 -4
  35. dataeval/metrics/stats/__init__.py +19 -19
  36. dataeval/metrics/stats/base.py +28 -12
  37. dataeval/metrics/stats/boxratiostats.py +13 -14
  38. dataeval/metrics/stats/datasetstats.py +49 -20
  39. dataeval/metrics/stats/dimensionstats.py +8 -8
  40. dataeval/metrics/stats/hashstats.py +14 -10
  41. dataeval/metrics/stats/labelstats.py +94 -11
  42. dataeval/metrics/stats/pixelstats.py +11 -14
  43. dataeval/metrics/stats/visualstats.py +10 -13
  44. dataeval/output.py +23 -14
  45. dataeval/utils/__init__.py +5 -14
  46. dataeval/utils/dataset/__init__.py +7 -0
  47. dataeval/utils/{torch → dataset}/datasets.py +2 -0
  48. dataeval/utils/dataset/read.py +63 -0
  49. dataeval/utils/{split_dataset.py → dataset/split.py} +38 -30
  50. dataeval/utils/image.py +2 -2
  51. dataeval/utils/metadata.py +317 -14
  52. dataeval/{metrics/bias/metadata_utils.py → utils/plot.py} +91 -71
  53. dataeval/utils/torch/__init__.py +2 -17
  54. dataeval/utils/torch/gmm.py +29 -6
  55. dataeval/utils/torch/{utils.py → internal.py} +82 -58
  56. dataeval/utils/torch/models.py +10 -8
  57. dataeval/utils/torch/trainer.py +6 -85
  58. dataeval/workflows/__init__.py +2 -5
  59. dataeval/workflows/sufficiency.py +18 -8
  60. {dataeval-0.74.2.dist-info → dataeval-0.76.0.dist-info}/LICENSE.txt +2 -2
  61. dataeval-0.76.0.dist-info/METADATA +137 -0
  62. dataeval-0.76.0.dist-info/RECORD +67 -0
  63. dataeval/detectors/ood/base_torch.py +0 -109
  64. dataeval/metrics/bias/metadata_preprocessing.py +0 -285
  65. dataeval/utils/gmm.py +0 -26
  66. dataeval-0.74.2.dist-info/METADATA +0 -120
  67. dataeval-0.74.2.dist-info/RECORD +0 -66
  68. {dataeval-0.74.2.dist-info → dataeval-0.76.0.dist-info}/WHEEL +0 -0
@@ -1,70 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
- __all__ = ["read_dataset"]
3
+ __all__ = []
4
4
 
5
- from collections import defaultdict
6
5
  from functools import partial
7
6
  from typing import Any, Callable
8
7
 
9
8
  import numpy as np
10
9
  import torch
11
10
  from numpy.typing import NDArray
12
- from torch.utils.data import Dataset
13
-
14
-
15
- def read_dataset(dataset: Dataset[Any]) -> list[list[Any]]:
16
- """
17
- Extract information from a dataset at each index into individual lists of each information position
18
-
19
- Parameters
20
- ----------
21
- dataset : torch.utils.data.Dataset
22
- Input dataset
23
-
24
- Returns
25
- -------
26
- List[List[Any]]
27
- All objects in individual lists based on return position from dataset
28
-
29
- Warning
30
- -------
31
- No type checking is done between lists or data inside lists
32
-
33
- See Also
34
- --------
35
- torch.utils.data.Dataset
36
-
37
- Examples
38
- --------
39
- >>> import numpy as np
40
- >>> data = np.ones((10, 1, 3, 3))
41
- >>> labels = np.ones((10,))
42
- >>> class ICDataset:
43
- ... def __init__(self, data, labels):
44
- ... self.data = data
45
- ... self.labels = labels
46
- ...
47
- ... def __getitem__(self, idx):
48
- ... return self.data[idx], self.labels[idx]
49
-
50
- >>> ds = ICDataset(data, labels)
51
-
52
- >>> result = read_dataset(ds)
53
- >>> len(result) # images and labels
54
- 2
55
- >>> np.asarray(result[0]).shape # images
56
- (10, 1, 3, 3)
57
- >>> np.asarray(result[1]).shape # labels
58
- (10,)
59
- """
60
-
61
- ddict: dict[int, list[Any]] = defaultdict(list[Any])
62
-
63
- for data in dataset:
64
- for i, d in enumerate(data if isinstance(data, tuple) else (data,)):
65
- ddict[i].append(d)
66
-
67
- return list(ddict.values())
11
+ from torch.utils.data import DataLoader, TensorDataset
12
+ from tqdm import tqdm
68
13
 
69
14
 
70
15
  def get_device(device: str | torch.device | None = None) -> torch.device:
@@ -167,3 +112,82 @@ def predict_batch(
167
112
  tuple(concat(p) for p in preds) if isinstance(preds, tuple) else concat(preds) # type: ignore
168
113
  )
169
114
  return out
115
+
116
+
117
+ def trainer(
118
+ model: torch.nn.Module,
119
+ x_train: NDArray[Any],
120
+ y_train: NDArray[Any] | None,
121
+ loss_fn: Callable[..., torch.Tensor | torch.nn.Module] | None,
122
+ optimizer: torch.optim.Optimizer | None,
123
+ preprocess_fn: Callable[[torch.Tensor], torch.Tensor] | None,
124
+ epochs: int,
125
+ batch_size: int,
126
+ device: torch.device,
127
+ verbose: bool,
128
+ ) -> None:
129
+ """
130
+ Train Pytorch model.
131
+
132
+ Parameters
133
+ ----------
134
+ model
135
+ Model to train.
136
+ loss_fn
137
+ Loss function used for training.
138
+ x_train
139
+ Training data.
140
+ y_train
141
+ Training labels.
142
+ optimizer
143
+ Optimizer used for training.
144
+ preprocess_fn
145
+ Preprocessing function applied to each training batch.
146
+ epochs
147
+ Number of training epochs.
148
+ reg_loss_fn
149
+ Allows an additional regularisation term to be defined as reg_loss_fn(model)
150
+ batch_size
151
+ Batch size used for training.
152
+ buffer_size
153
+ Maximum number of elements that will be buffered when prefetching.
154
+ verbose
155
+ Whether to print training progress.
156
+ """
157
+ if optimizer is None:
158
+ optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
159
+
160
+ if y_train is None:
161
+ dataset = TensorDataset(torch.from_numpy(x_train).to(torch.float32))
162
+
163
+ else:
164
+ dataset = TensorDataset(
165
+ torch.from_numpy(x_train).to(torch.float32), torch.from_numpy(y_train).to(torch.float32)
166
+ )
167
+
168
+ loader = DataLoader(dataset=dataset)
169
+
170
+ model = model.to(device)
171
+
172
+ # iterate over epochs
173
+ loss = torch.nan
174
+ disable_tqdm = not verbose
175
+ for epoch in (pbar := tqdm(range(epochs), disable=disable_tqdm)):
176
+ epoch_loss = loss
177
+ for step, data in enumerate(loader):
178
+ if step % 250 == 0:
179
+ pbar.set_description(f"Epoch: {epoch} ({epoch_loss:.3f}), loss: {loss:.3f}")
180
+
181
+ x, y = [d.to(device) for d in data] if len(data) > 1 else (data[0].to(device), None)
182
+
183
+ if isinstance(preprocess_fn, Callable):
184
+ x = preprocess_fn(x)
185
+
186
+ y_hat = model(x)
187
+ y = x if y is None else y
188
+
189
+ loss = loss_fn(y, y_hat) # type: ignore
190
+
191
+ optimizer.zero_grad()
192
+ loss.backward()
193
+ optimizer.step()
@@ -1,6 +1,8 @@
1
+ """Simple PyTorch model architectures used by DataEval."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
- __all__ = ["AriaAutoencoder", "Encoder", "Decoder"]
5
+ __all__ = ["Autoencoder", "Encoder", "Decoder"]
4
6
 
5
7
  import math
6
8
  from typing import Any
@@ -9,7 +11,7 @@ import torch
9
11
  import torch.nn as nn
10
12
 
11
13
 
12
- class AriaAutoencoder(nn.Module):
14
+ class Autoencoder(nn.Module):
13
15
  """
14
16
  An autoencoder model with a separate encoder and decoder.
15
17
 
@@ -63,7 +65,7 @@ class Encoder(nn.Module):
63
65
  """
64
66
  A simple encoder to be used in an autoencoder model.
65
67
 
66
- This is the encoder used by the AriaAutoencoder model.
68
+ This is the encoder used by the Autoencoder model.
67
69
 
68
70
  Parameters
69
71
  ----------
@@ -104,7 +106,7 @@ class Decoder(nn.Module):
104
106
  """
105
107
  A simple decoder to be used in an autoencoder model.
106
108
 
107
- This is the decoder used by the AriaAutoencoder model.
109
+ This is the decoder used by the Autoencoder model.
108
110
 
109
111
  Parameters
110
112
  ----------
@@ -142,14 +144,14 @@ class Decoder(nn.Module):
142
144
 
143
145
  class AE(nn.Module):
144
146
  """
145
- An autoencoder model with a separate encoder and decoder. Meant to replace the TensorFlow model called AE, which we
146
- used as the core of an autoencoder-based OOD detector, i.e. as an argument to OOD_AE().
147
+ An autoencoder model with a separate encoder and decoder used as the core of an autoencoder-based
148
+ OOD detector, i.e. as an argument to OOD_AE().
147
149
 
148
150
  Parameters
149
151
  ----------
150
152
  input_shape : tuple[int, int, int]
151
153
  Number of input channels, number of rows, number of columns.() Number of examples per batch will be inferred
152
- at runtime.)
154
+ at runtime.)
153
155
  """
154
156
 
155
157
  def __init__(self, input_shape: tuple[int, int, int]) -> None:
@@ -279,7 +281,7 @@ class Decoder_AE(nn.Module):
279
281
  """
280
282
  A simple decoder to be used in an autoencoder model.
281
283
 
282
- This is the decoder used by the AriaAutoencoder model.
284
+ This is the decoder used by the Autoencoder model.
283
285
 
284
286
  Parameters
285
287
  ----------
@@ -1,15 +1,15 @@
1
+ """Utility classes for training PyTorch models."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
- from typing import Any, Callable
5
+ __all__ = ["AETrainer"]
6
+
7
+ from typing import Any
4
8
 
5
9
  import torch
6
10
  import torch.nn as nn
7
- from numpy.typing import NDArray
8
11
  from torch.optim import Adam
9
- from torch.utils.data import DataLoader, Dataset, TensorDataset
10
- from tqdm import tqdm
11
-
12
- __all__ = ["AETrainer", "trainer"]
12
+ from torch.utils.data import DataLoader, Dataset
13
13
 
14
14
 
15
15
  def get_images_from_batch(batch: Any) -> Any:
@@ -176,82 +176,3 @@ class AETrainer:
176
176
  encodings = torch.vstack((encodings, embeddings)) if len(encodings) else embeddings
177
177
 
178
178
  return encodings
179
-
180
-
181
- def trainer(
182
- model: torch.nn.Module,
183
- x_train: NDArray[Any],
184
- y_train: NDArray[Any] | None,
185
- loss_fn: Callable[..., torch.Tensor | torch.nn.Module] | None,
186
- optimizer: torch.optim.Optimizer | None,
187
- preprocess_fn: Callable[[torch.Tensor], torch.Tensor] | None,
188
- epochs: int,
189
- batch_size: int,
190
- device: torch.device,
191
- verbose: bool,
192
- ) -> None:
193
- """
194
- Train Pytorch model.
195
-
196
- Parameters
197
- ----------
198
- model
199
- Model to train.
200
- loss_fn
201
- Loss function used for training.
202
- x_train
203
- Training data.
204
- y_train
205
- Training labels.
206
- optimizer
207
- Optimizer used for training.
208
- preprocess_fn
209
- Preprocessing function applied to each training batch.
210
- epochs
211
- Number of training epochs.
212
- reg_loss_fn
213
- Allows an additional regularisation term to be defined as reg_loss_fn(model)
214
- batch_size
215
- Batch size used for training.
216
- buffer_size
217
- Maximum number of elements that will be buffered when prefetching.
218
- verbose
219
- Whether to print training progress.
220
- """
221
- if optimizer is None:
222
- optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
223
-
224
- if y_train is None:
225
- dataset = TensorDataset(torch.from_numpy(x_train).to(torch.float32))
226
-
227
- else:
228
- dataset = TensorDataset(
229
- torch.from_numpy(x_train).to(torch.float32), torch.from_numpy(y_train).to(torch.float32)
230
- )
231
-
232
- loader = DataLoader(dataset=dataset)
233
-
234
- model = model.to(device)
235
-
236
- # iterate over epochs
237
- loss = torch.nan
238
- disable_tqdm = not verbose
239
- for epoch in (pbar := tqdm(range(epochs), disable=disable_tqdm)):
240
- epoch_loss = loss
241
- for step, data in enumerate(loader):
242
- if step % 250 == 0:
243
- pbar.set_description(f"Epoch: {epoch} ({epoch_loss:.3f}), loss: {loss:.3f}")
244
-
245
- x, y = [d.to(device) for d in data] if len(data) > 1 else (data[0].to(device), None)
246
-
247
- if isinstance(preprocess_fn, Callable):
248
- x = preprocess_fn(x)
249
-
250
- y_hat = model(x)
251
- y = x if y is None else y
252
-
253
- loss = loss_fn(y, y_hat) # type: ignore
254
-
255
- optimizer.zero_grad()
256
- loss.backward()
257
- optimizer.step()
@@ -2,9 +2,6 @@
2
2
  Workflows perform a sequence of actions to analyze the dataset and make predictions.
3
3
  """
4
4
 
5
- from dataeval import _IS_TORCH_AVAILABLE
5
+ __all__ = ["Sufficiency", "SufficiencyOutput"]
6
6
 
7
- if _IS_TORCH_AVAILABLE:
8
- from dataeval.workflows.sufficiency import Sufficiency, SufficiencyOutput
9
-
10
- __all__ = ["Sufficiency", "SufficiencyOutput"]
7
+ from dataeval.workflows.sufficiency import Sufficiency, SufficiencyOutput
@@ -1,16 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
- __all__ = ["SufficiencyOutput", "Sufficiency"]
3
+ __all__ = []
4
4
 
5
+ import contextlib
5
6
  import warnings
6
7
  from dataclasses import dataclass
7
8
  from typing import Any, Callable, Generic, Iterable, Mapping, Sequence, TypeVar, cast
8
9
 
9
- import matplotlib.pyplot as plt
10
10
  import numpy as np
11
11
  import torch
12
12
  import torch.nn as nn
13
- from matplotlib.figure import Figure
14
13
  from numpy.typing import ArrayLike, NDArray
15
14
  from scipy.optimize import basinhopping
16
15
  from torch.utils.data import Dataset
@@ -18,11 +17,14 @@ from torch.utils.data import Dataset
18
17
  from dataeval.interop import as_numpy
19
18
  from dataeval.output import Output, set_metadata
20
19
 
20
+ with contextlib.suppress(ImportError):
21
+ from matplotlib.figure import Figure
22
+
21
23
 
22
24
  @dataclass(frozen=True)
23
25
  class SufficiencyOutput(Output):
24
26
  """
25
- Output class for :class:`Sufficiency` workflow
27
+ Output class for :class:`Sufficiency` workflow.
26
28
 
27
29
  Attributes
28
30
  ----------
@@ -97,7 +99,7 @@ class SufficiencyOutput(Output):
97
99
 
98
100
  Returns
99
101
  -------
100
- list[plt.Figure]
102
+ list[Figure]
101
103
  List of Figures for each measure
102
104
 
103
105
  Raises
@@ -344,7 +346,9 @@ def plot_measure(
344
346
  params: NDArray[Any],
345
347
  projection: NDArray[Any],
346
348
  ) -> Figure:
347
- fig = plt.figure()
349
+ import matplotlib.pyplot
350
+
351
+ fig = matplotlib.pyplot.figure()
348
352
  fig = cast(Figure, fig)
349
353
  fig.tight_layout()
350
354
 
@@ -374,7 +378,7 @@ T = TypeVar("T")
374
378
 
375
379
  class Sufficiency(Generic[T]):
376
380
  """
377
- Project dataset :term:`sufficiency<Sufficiency>` using given a model and evaluation criteria
381
+ Project dataset :term:`sufficiency<Sufficiency>` using given a model and evaluation criteria.
378
382
 
379
383
  Parameters
380
384
  ----------
@@ -510,7 +514,13 @@ class Sufficiency(Generic[T]):
510
514
  Examples
511
515
  --------
512
516
  >>> suff = Sufficiency(
513
- ... model=model, train_ds=train_ds, test_ds=test_ds, train_fn=train_fn, eval_fn=eval_fn, runs=3, substeps=5
517
+ ... model=model,
518
+ ... train_ds=train_ds,
519
+ ... test_ds=test_ds,
520
+ ... train_fn=train_fn,
521
+ ... eval_fn=eval_fn,
522
+ ... runs=3,
523
+ ... substeps=5,
514
524
  ... )
515
525
  >>> suff.evaluate()
516
526
  SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), params={'test': array([ 0., 42., 0.])}, measures={'test': array([1., 1., 1., 1., 1.])})
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024 ARiA
3
+ Copyright (c) 2025 ARiA
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
18
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
21
+ SOFTWARE.
@@ -0,0 +1,137 @@
1
+ Metadata-Version: 2.1
2
+ Name: dataeval
3
+ Version: 0.76.0
4
+ Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
+ Home-page: https://dataeval.ai/
6
+ License: MIT
7
+ Author: Andrew Weng
8
+ Author-email: andrew.weng@ariacoustics.com
9
+ Maintainer: ARiA
10
+ Maintainer-email: dataeval@ariacoustics.com
11
+ Requires-Python: >=3.9,<3.13
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3 :: Only
22
+ Classifier: Topic :: Scientific/Engineering
23
+ Provides-Extra: all
24
+ Requires-Dist: matplotlib ; extra == "all"
25
+ Requires-Dist: numpy (>=1.24.2)
26
+ Requires-Dist: pillow (>=10.3.0)
27
+ Requires-Dist: requests
28
+ Requires-Dist: scikit-learn (>=1.5.0)
29
+ Requires-Dist: scipy (>=1.10)
30
+ Requires-Dist: torch (>=2.2.0)
31
+ Requires-Dist: torchvision (>=0.17.0)
32
+ Requires-Dist: tqdm
33
+ Requires-Dist: typing-extensions (>=4.12) ; python_version >= "3.9" and python_version < "4.0"
34
+ Requires-Dist: xxhash (>=3.3)
35
+ Project-URL: Documentation, https://dataeval.readthedocs.io/
36
+ Project-URL: Repository, https://github.com/aria-ml/dataeval/
37
+ Description-Content-Type: text/markdown
38
+
39
+ # DataEval
40
+
41
+ To view our extensive collection of tutorials, how-to's, explanation guides, and reference material, please visit our documentation on **[Read the Docs](https://dataeval.readthedocs.io/)**
42
+
43
+ ## About DataEval
44
+
45
+ <!-- start tagline -->
46
+
47
+ DataEval curates datasets to train and test performant, robust, unbiased and reliable AI models and monitors for data shifts that impact performance of deployed models.
48
+
49
+ <!-- end tagline -->
50
+
51
+ ### Our mission
52
+
53
+ <!-- start needs -->
54
+
55
+ DataEval is an effective, powerful, and reliable set of tools for any T&E engineer. Throughout all stages of the machine learning lifecycle, DataEval supports model development, data analysis, and monitoring with state-of-the-art algorithms to help you solve difficult problems. With a focus on computer vision tasks, DataEval provides simple, but effective metrics for performance estimation, bias detection, and dataset linting.
56
+
57
+ <!-- end needs -->
58
+
59
+ <!-- start JATIC interop -->
60
+ DataEval is easy to install, supports a wide range of Python versions, and is compatible with many of the most popular packages in the scientific and T&E communities.
61
+ DataEval also has native interopability between JATIC's suite of tools when using MAITE-compliant datasets and models.
62
+ <!-- end JATIC interop -->
63
+
64
+ ## Getting Started
65
+
66
+ **Python versions:** 3.9 - 3.12
67
+
68
+ **Supported packages**: *NumPy*, *Pandas*, *Sci-kit learn*, *MAITE*, *NRTK*, *Gradient*
69
+
70
+ Choose your preferred method of installation below or follow our [installation guide](https://dataeval.readthedocs.io/en/v0.74.2/installation.html).
71
+
72
+ * [Installing with pip](#installing-with-pip)
73
+ * [Installing with conda/mamba](#installing-with-conda)
74
+ * [Installing from GitHub](#installing-from-github)
75
+
76
+ ### **Installing with pip**
77
+
78
+ You can install DataEval directly from pypi.org using the following command. The optional dependencies of DataEval are `all`.
79
+
80
+ ```bash
81
+ pip install dataeval[all]
82
+ ```
83
+
84
+ ### **Installing with conda**
85
+
86
+ DataEval can be installed in a Conda/Mamba environment using the provided `environment.yaml` file. As some dependencies
87
+ are installed from the `pytorch` channel, the channel is specified in the below example.
88
+
89
+ ```bash
90
+ micromamba create -f environment\environment.yaml -c pytorch
91
+ ```
92
+
93
+ ### **Installing from GitHub**
94
+
95
+ To install DataEval from source locally on Ubuntu, you will need `git-lfs` to download larger, binary source files and `poetry` for project dependency management.
96
+
97
+ ```bash
98
+ sudo apt-get install git-lfs
99
+ pip install poetry
100
+ ```
101
+
102
+ Pull the source down and change to the DataEval project directory.
103
+
104
+ ```bash
105
+ git clone https://github.com/aria-ml/dataeval.git
106
+ cd dataeval
107
+ ```
108
+
109
+ Install DataEval with optional dependencies for development.
110
+
111
+ ```bash
112
+ poetry install --all-extras --with dev
113
+ ```
114
+
115
+ Now that DataEval is installed, you can run commands in the poetry virtual environment by prefixing shell commands with `poetry run`, or activate the virtual environment directly in the shell.
116
+
117
+ ```bash
118
+ poetry shell
119
+ ```
120
+
121
+ ## Contact Us
122
+
123
+ If you have any questions, feel free to reach out to the people below:
124
+
125
+ * **POC**: Scott Swan @scott.swan
126
+ * **DPOC**: Andrew Weng @aweng
127
+
128
+ ## Acknowledgement
129
+
130
+ <!-- start acknowledgement -->
131
+
132
+ ### CDAO Funding Acknowledgement
133
+
134
+ This material is based upon work supported by the Chief Digital and Artificial Intelligence Office under Contract No. W519TC-23-9-2033. The views and conclusions contained herein are those of the author(s) and should not be interpreted as necessarily representing the official policies or endorsements, either expressed or implied, of the U.S. Government.
135
+
136
+ <!-- end acknowledgement -->
137
+
@@ -0,0 +1,67 @@
1
+ dataeval/__init__.py,sha256=TSINwIPlGIGiYd66kY8gnBnEpBhcgWm7_029htFBgv8,1474
2
+ dataeval/detectors/__init__.py,sha256=iifG-Z08mH5B4QhkKtAieDGJBKldKvmCXpDQJD9qVY8,206
3
+ dataeval/detectors/drift/__init__.py,sha256=wO294Oz--l0GuZTAkBpyGwZphbQsot57HoiEX6kjNOc,652
4
+ dataeval/detectors/drift/base.py,sha256=8zHUnUpmgpWMzDv5C-tUX61lbpDjhJ-eAIiNxaNvWP8,14469
5
+ dataeval/detectors/drift/cvm.py,sha256=TATS6IOE0INO1pkyRkesgrhDawD_kITsRsOOGVRs420,4132
6
+ dataeval/detectors/drift/ks.py,sha256=SAd2T9CdytXD7DegCzAX1pWYJdPuttyL97KAQYF4j7Y,4265
7
+ dataeval/detectors/drift/mmd.py,sha256=z7JPFbW4fmHJhR-Qe1OQ4mM8kW6dNxnd3uHD9oXMETE,7599
8
+ dataeval/detectors/drift/torch.py,sha256=ykD-Nggys5T9FTGXXbYYOi2WRKwEzEjXhL8ZueVmTxU,7659
9
+ dataeval/detectors/drift/uncertainty.py,sha256=zkrqz5euJJtYFKsDiRqFfTnDjVOTbqpZWgZiGMrYxvI,5351
10
+ dataeval/detectors/drift/updates.py,sha256=nKsF4xrMFZd2X84GJ5XnGylUuketX_RcH7UpcdlonIo,1781
11
+ dataeval/detectors/linters/__init__.py,sha256=CZV5naeYQYL3sHXO_CXB26AXkyTeKHI-TMaewtEs8Ag,483
12
+ dataeval/detectors/linters/clusterer.py,sha256=V-bNs4ut2E6SIqU4MR1Y96WBZcs4cavQhvXBB0vFZPw,20937
13
+ dataeval/detectors/linters/duplicates.py,sha256=Ba-Nmbjqg_HDMlEBqlWW1aFO_BA-HSc-uWHc3cmI394,5620
14
+ dataeval/detectors/linters/merged_stats.py,sha256=X-bDTwjyR8RuVmzxLaHZmQ5nI3oOWvsqVlitdSncapk,1355
15
+ dataeval/detectors/linters/outliers.py,sha256=aGGGOJKs0FTObQtj1m-ench0MHADOhrhC8idf1wRB0s,13786
16
+ dataeval/detectors/ood/__init__.py,sha256=Ws6_un4pFWNknki7Bp7qjrslZVB9pYNE-K72u2lF65k,291
17
+ dataeval/detectors/ood/ae.py,sha256=SL8oKTERhMwaZTQWwDhQQ6H07UKj8ozXqEWO3TaOAos,2151
18
+ dataeval/detectors/ood/base.py,sha256=-ApcC9lyZJAgk-joMpLXF20sJqtvlAugg-W18TcAsEw,3010
19
+ dataeval/detectors/ood/metadata_ks_compare.py,sha256=-hEhDNXFC7X8wmFeoigO7A7Qn90vRLroN_nKDwNgjnE,5204
20
+ dataeval/detectors/ood/metadata_least_likely.py,sha256=rb8GOgsrlrEzc6fxccdmyZQ5PC7HtTsTY8U97D-h5OU,5088
21
+ dataeval/detectors/ood/metadata_ood_mi.py,sha256=7_Sdzf7-x1TlrIQvSyOIB98C8_UQhUwmwFQmZ9_q1Uc,4042
22
+ dataeval/detectors/ood/mixin.py,sha256=Ia-rJF6rtGhE8uavijdbzOha3ueFk2CFfA0Ah_mnF40,4976
23
+ dataeval/detectors/ood/output.py,sha256=yygnsjaIQB6v6sXh7glqX2aoqWdf3_YLINqx7BGKMtk,1710
24
+ dataeval/interop.py,sha256=P9Kwe-vOVgbn1ng60y4giCnJYmHjIOpyGpccuIA7P1g,2322
25
+ dataeval/log.py,sha256=Mn5bRWO0cgtAYd5VGYSFiPgu57ta3zoktrtHAZ1m3dU,357
26
+ dataeval/metrics/__init__.py,sha256=OMntcHmmrsOfIlRsJTZQQaF5qXEuP61Li-ElKy7Ysbk,240
27
+ dataeval/metrics/bias/__init__.py,sha256=SIg4Qxza9BqXyKNQLIY0bpqoFvZfK5-GaejpTH6efVc,601
28
+ dataeval/metrics/bias/balance.py,sha256=B1sPackyodiBct9Hs88BR4nJde_R61JyjwSBIG_CFug,9171
29
+ dataeval/metrics/bias/coverage.py,sha256=igVDWJSrO2MvaTEiDUhVzVWPGNB1QOZvngCi8UF0RwA,5746
30
+ dataeval/metrics/bias/diversity.py,sha256=nF1y2FaQIU0yHQtckoddjqoty2hsVVMqwaXWHRdGfqA,8521
31
+ dataeval/metrics/bias/parity.py,sha256=rzi7Z0Z6injCaj2vkbSsZvbKMfk1EN648oKinv5y5Dk,12760
32
+ dataeval/metrics/estimators/__init__.py,sha256=oY_9jX7V-Kg7-4KpvMNB4rUhsk8QTA0DIoM8d2VtVIg,380
33
+ dataeval/metrics/estimators/ber.py,sha256=vcndXr0PNLRlYz7u7K74f-B5g3DnUkaTO_WigGdj0cg,5012
34
+ dataeval/metrics/estimators/divergence.py,sha256=joqqlH0AQFibJkHCCb7i7dMJIGF28fmZIR-tGupQQJQ,4247
35
+ dataeval/metrics/estimators/uap.py,sha256=ZAQUjJCbdulftWk6yjILCbnXGOE8RuDqEINZRtMW3tc,2143
36
+ dataeval/metrics/stats/__init__.py,sha256=pUT84sOxDiCHW6xz6Ml1Mf1bFszQrtd3qPG0Ja3boxA,1088
37
+ dataeval/metrics/stats/base.py,sha256=1ejjwlA0FmllcAw7J9Yv1r7GMmBYKvuGPzmDk9ktASM,12613
38
+ dataeval/metrics/stats/boxratiostats.py,sha256=PS1wvWwhTCMJX56erfPW-BZymXrevvXnKl2PkE0qmLk,6315
39
+ dataeval/metrics/stats/datasetstats.py,sha256=mt5t5WhlVI7mo56dmhqgnk1eH8oBV7dahgmqkFDcKo0,7387
40
+ dataeval/metrics/stats/dimensionstats.py,sha256=AlPor23dUH718jFNiVNedHQVaQzN-6OKQEVDQbnGE50,4027
41
+ dataeval/metrics/stats/hashstats.py,sha256=5nNSJ3Tl8gPqpYlWpxl7EHfW6pJd1BtbXYUiuGgH4Eo,5070
42
+ dataeval/metrics/stats/labelstats.py,sha256=v9EAg-9h0OtuoU0r3K5TJbHj87fjmnWnNdtg0EPp8co,7030
43
+ dataeval/metrics/stats/pixelstats.py,sha256=tfvu0tYPgDS0jCCSY2sZ2Ice5r1nNuKx-LYXxZQCw7s,4220
44
+ dataeval/metrics/stats/visualstats.py,sha256=pEQnAPFg-zQ1U5orwF0-U7kfHuZGjMJDsdEMAoDZd4I,4634
45
+ dataeval/output.py,sha256=Dyfv1xlrwSbCe7HdDyq8t-kiIRJbBeaMEmMROr1FrVQ,4034
46
+ dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
+ dataeval/utils/__init__.py,sha256=WW9e_1RbtkvLDRqu1NpDw3-V4su4mA8yJ_P3bgd_7Ho,283
48
+ dataeval/utils/dataset/__init__.py,sha256=IvRauQaa0CzJ5nZrfTSjGoaaKelyJcQDe3OPRw0-NXs,332
49
+ dataeval/utils/dataset/datasets.py,sha256=7tSqN3d8UncqmXh4eiEwarXgVxc4sMuIKPTqBCE0pN8,15080
50
+ dataeval/utils/dataset/read.py,sha256=Q_RaNTFXhkMsx3PrgJEIySdHAA-QxGuih6eq6mnJv-4,1524
51
+ dataeval/utils/dataset/split.py,sha256=1vNy5I1zZx-LIf8B0y57dUaO_UdVd1hyJggUANkwNtM,18958
52
+ dataeval/utils/image.py,sha256=AQljELyMFkYsf2AoNOH5dZG8DYE4hPw0MCk85eIXqAw,1926
53
+ dataeval/utils/metadata.py,sha256=SjYPXvM7x_3OyQbdfn4WsViqMplEjRxTdz8tjSJEP3E,22497
54
+ dataeval/utils/plot.py,sha256=YyFL1KoJgnl2Bip7m73WVBJa6zbsBnn5c1b3skFfUrA,7068
55
+ dataeval/utils/shared.py,sha256=xvF3VLfyheVwJtdtDrneOobkKf7t-JTmf_w91FWXmqo,3616
56
+ dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
57
+ dataeval/utils/torch/blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
58
+ dataeval/utils/torch/gmm.py,sha256=fQ8CBO4Bf6i9N1CZdeJ8VJP25fsPjgMextQkondwgvo,3693
59
+ dataeval/utils/torch/internal.py,sha256=qAzQTwTI9Qy6f01Olw3d1TIJ4HoWGf0gQzgWVcdD2x4,6653
60
+ dataeval/utils/torch/models.py,sha256=Df3B_9x5uu-Y5ZOyhRZYpKJnDvxt0hgMeJLy1E4oxpU,8519
61
+ dataeval/utils/torch/trainer.py,sha256=Qay0LK63RuyoGYiJ5zI2C5BVym309ORvp6shhpcrIU4,5589
62
+ dataeval/workflows/__init__.py,sha256=L9yfBipNFGnYuN2JbMknIHDvziwfa2XAGFnOwifZbls,216
63
+ dataeval/workflows/sufficiency.py,sha256=jf53J1PAlfRHSjGpMCWRJzImitLtCQvTMCaMm28ZuPM,18675
64
+ dataeval-0.76.0.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
65
+ dataeval-0.76.0.dist-info/METADATA,sha256=zk12Bkp0R6Glx-VSrG7ip45aTU4y6i_P_mPw2c_SQ6w,5140
66
+ dataeval-0.76.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
67
+ dataeval-0.76.0.dist-info/RECORD,,