dataeval 0.76.0__py3-none-any.whl → 0.81.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. dataeval/__init__.py +3 -3
  2. dataeval/{output.py → _output.py} +14 -0
  3. dataeval/config.py +77 -0
  4. dataeval/detectors/__init__.py +1 -1
  5. dataeval/detectors/drift/__init__.py +6 -6
  6. dataeval/detectors/drift/{base.py → _base.py} +41 -30
  7. dataeval/detectors/drift/{cvm.py → _cvm.py} +21 -28
  8. dataeval/detectors/drift/{ks.py → _ks.py} +20 -26
  9. dataeval/detectors/drift/{mmd.py → _mmd.py} +33 -19
  10. dataeval/detectors/drift/{torch.py → _torch.py} +2 -1
  11. dataeval/detectors/drift/{uncertainty.py → _uncertainty.py} +23 -7
  12. dataeval/detectors/drift/updates.py +1 -1
  13. dataeval/detectors/linters/__init__.py +0 -3
  14. dataeval/detectors/linters/duplicates.py +17 -8
  15. dataeval/detectors/linters/outliers.py +52 -43
  16. dataeval/detectors/ood/ae.py +29 -8
  17. dataeval/detectors/ood/base.py +5 -4
  18. dataeval/detectors/ood/metadata_ks_compare.py +1 -1
  19. dataeval/detectors/ood/mixin.py +20 -5
  20. dataeval/detectors/ood/output.py +1 -1
  21. dataeval/detectors/ood/vae.py +73 -0
  22. dataeval/metadata/__init__.py +5 -0
  23. dataeval/metadata/_ood.py +238 -0
  24. dataeval/metrics/__init__.py +1 -1
  25. dataeval/metrics/bias/__init__.py +5 -4
  26. dataeval/metrics/bias/{balance.py → _balance.py} +67 -17
  27. dataeval/metrics/bias/{coverage.py → _coverage.py} +41 -35
  28. dataeval/metrics/bias/{diversity.py → _diversity.py} +17 -12
  29. dataeval/metrics/bias/{parity.py → _parity.py} +89 -63
  30. dataeval/metrics/estimators/__init__.py +14 -4
  31. dataeval/metrics/estimators/{ber.py → _ber.py} +42 -11
  32. dataeval/metrics/estimators/_clusterer.py +104 -0
  33. dataeval/metrics/estimators/{divergence.py → _divergence.py} +18 -13
  34. dataeval/metrics/estimators/{uap.py → _uap.py} +4 -4
  35. dataeval/metrics/stats/__init__.py +7 -7
  36. dataeval/metrics/stats/{base.py → _base.py} +52 -16
  37. dataeval/metrics/stats/{boxratiostats.py → _boxratiostats.py} +6 -9
  38. dataeval/metrics/stats/{datasetstats.py → _datasetstats.py} +10 -14
  39. dataeval/metrics/stats/{dimensionstats.py → _dimensionstats.py} +6 -5
  40. dataeval/metrics/stats/{hashstats.py → _hashstats.py} +6 -6
  41. dataeval/metrics/stats/{labelstats.py → _labelstats.py} +25 -25
  42. dataeval/metrics/stats/{pixelstats.py → _pixelstats.py} +5 -4
  43. dataeval/metrics/stats/{visualstats.py → _visualstats.py} +9 -8
  44. dataeval/typing.py +54 -0
  45. dataeval/utils/__init__.py +2 -2
  46. dataeval/utils/_array.py +169 -0
  47. dataeval/utils/_bin.py +199 -0
  48. dataeval/utils/_clusterer.py +144 -0
  49. dataeval/utils/_fast_mst.py +189 -0
  50. dataeval/utils/{image.py → _image.py} +6 -4
  51. dataeval/utils/_method.py +18 -0
  52. dataeval/utils/{shared.py → _mst.py} +3 -65
  53. dataeval/utils/{plot.py → _plot.py} +4 -4
  54. dataeval/utils/data/__init__.py +22 -0
  55. dataeval/utils/data/_embeddings.py +105 -0
  56. dataeval/utils/data/_images.py +65 -0
  57. dataeval/utils/data/_metadata.py +352 -0
  58. dataeval/utils/data/_selection.py +119 -0
  59. dataeval/utils/{dataset/split.py → data/_split.py} +13 -14
  60. dataeval/utils/data/_targets.py +73 -0
  61. dataeval/utils/data/_types.py +58 -0
  62. dataeval/utils/data/collate.py +103 -0
  63. dataeval/utils/data/datasets/__init__.py +17 -0
  64. dataeval/utils/data/datasets/_base.py +254 -0
  65. dataeval/utils/data/datasets/_cifar10.py +134 -0
  66. dataeval/utils/data/datasets/_fileio.py +168 -0
  67. dataeval/utils/data/datasets/_milco.py +153 -0
  68. dataeval/utils/data/datasets/_mixin.py +56 -0
  69. dataeval/utils/data/datasets/_mnist.py +183 -0
  70. dataeval/utils/data/datasets/_ships.py +123 -0
  71. dataeval/utils/data/datasets/_voc.py +352 -0
  72. dataeval/utils/data/selections/__init__.py +15 -0
  73. dataeval/utils/data/selections/_classfilter.py +60 -0
  74. dataeval/utils/data/selections/_indices.py +26 -0
  75. dataeval/utils/data/selections/_limit.py +26 -0
  76. dataeval/utils/data/selections/_reverse.py +18 -0
  77. dataeval/utils/data/selections/_shuffle.py +29 -0
  78. dataeval/utils/metadata.py +198 -376
  79. dataeval/utils/torch/{gmm.py → _gmm.py} +4 -2
  80. dataeval/utils/torch/{internal.py → _internal.py} +21 -51
  81. dataeval/utils/torch/models.py +43 -2
  82. dataeval/workflows/sufficiency.py +10 -9
  83. {dataeval-0.76.0.dist-info → dataeval-0.81.0.dist-info}/METADATA +44 -15
  84. dataeval-0.81.0.dist-info/RECORD +94 -0
  85. dataeval/detectors/linters/clusterer.py +0 -512
  86. dataeval/detectors/linters/merged_stats.py +0 -49
  87. dataeval/detectors/ood/metadata_least_likely.py +0 -119
  88. dataeval/interop.py +0 -69
  89. dataeval/utils/dataset/__init__.py +0 -7
  90. dataeval/utils/dataset/datasets.py +0 -412
  91. dataeval/utils/dataset/read.py +0 -63
  92. dataeval-0.76.0.dist-info/RECORD +0 -67
  93. /dataeval/{log.py → _log.py} +0 -0
  94. /dataeval/utils/torch/{blocks.py → _blocks.py} +0 -0
  95. {dataeval-0.76.0.dist-info → dataeval-0.81.0.dist-info}/LICENSE.txt +0 -0
  96. {dataeval-0.76.0.dist-info → dataeval-0.81.0.dist-info}/WHEEL +0 -0
@@ -10,7 +10,8 @@ from __future__ import annotations
10
10
 
11
11
  __all__ = []
12
12
 
13
- from typing import NamedTuple, TypeVar
13
+ from dataclasses import dataclass
14
+ from typing import TypeVar
14
15
 
15
16
  import numpy as np
16
17
  import torch
@@ -18,7 +19,8 @@ import torch
18
19
  TGMMData = TypeVar("TGMMData")
19
20
 
20
21
 
21
- class GaussianMixtureModelParams(NamedTuple):
22
+ @dataclass
23
+ class GaussianMixtureModelParams:
22
24
  """
23
25
  phi : torch.Tensor
24
26
  Mixture component distribution weights.
@@ -11,30 +11,7 @@ from numpy.typing import NDArray
11
11
  from torch.utils.data import DataLoader, TensorDataset
12
12
  from tqdm import tqdm
13
13
 
14
-
15
- def get_device(device: str | torch.device | None = None) -> torch.device:
16
- """
17
- Instantiates a PyTorch device object.
18
-
19
- Parameters
20
- ----------
21
- device : str | torch.device | None, default None
22
- Either ``None``, a str ('gpu' or 'cpu') indicating the device to choose, or an
23
- already instantiated device object. If ``None``, the GPU is selected if it is
24
- detected, otherwise the CPU is used as a fallback.
25
-
26
- Returns
27
- -------
28
- The instantiated device object.
29
- """
30
- if isinstance(device, torch.device): # Already a torch device
31
- return device
32
- else: # Instantiate device
33
- if device is None or device.lower() in ["gpu", "cuda"]:
34
- torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
35
- else:
36
- torch_device = torch.device("cpu")
37
- return torch_device
14
+ from dataeval.config import get_device
38
15
 
39
16
 
40
17
  def predict_batch(
@@ -42,7 +19,7 @@ def predict_batch(
42
19
  model: Callable | torch.nn.Module | torch.nn.Sequential,
43
20
  device: torch.device | None = None,
44
21
  batch_size: int = int(1e10),
45
- preprocess_fn: Callable | None = None,
22
+ preprocess_fn: Callable[[torch.Tensor], torch.Tensor] | None = None,
46
23
  dtype: type[np.generic] | torch.dtype = np.float32,
47
24
  ) -> NDArray[Any] | torch.Tensor | tuple[Any, ...]:
48
25
  """
@@ -71,11 +48,12 @@ def predict_batch(
71
48
  """
72
49
  device = get_device(device)
73
50
  if isinstance(x, np.ndarray):
74
- x = torch.from_numpy(x).to(device)
51
+ x = torch.tensor(x, device=device)
75
52
  n = len(x)
76
53
  n_minibatch = int(np.ceil(n / batch_size))
77
54
  return_np = not isinstance(dtype, torch.dtype)
78
- preds = []
55
+ preds_tuple = None
56
+ preds_array = []
79
57
  with torch.no_grad():
80
58
  for i in range(n_minibatch):
81
59
  istart, istop = i * batch_size, min((i + 1) * batch_size, n)
@@ -83,23 +61,17 @@ def predict_batch(
83
61
  if isinstance(preprocess_fn, Callable):
84
62
  x_batch = preprocess_fn(x_batch)
85
63
 
86
- preds_tmp = model(x_batch.to(torch.float32).to(device))
64
+ preds_tmp = model(x_batch.to(dtype=torch.float32))
87
65
  if isinstance(preds_tmp, (list, tuple)):
88
- if len(preds) == 0: # init tuple with lists to store predictions
89
- preds = tuple([] for _ in range(len(preds_tmp)))
66
+ if preds_tuple is None: # init tuple with lists to store predictions
67
+ preds_tuple = tuple([] for _ in range(len(preds_tmp)))
90
68
  for j, p in enumerate(preds_tmp):
91
- if isinstance(p, torch.Tensor):
92
- p = p.cpu()
93
- preds[j].append(p if not return_np or isinstance(p, np.ndarray) else p.numpy())
69
+ p = p.cpu() if isinstance(p, torch.Tensor) else p
70
+ preds_tuple[j].append(p if not return_np or isinstance(p, np.ndarray) else p.numpy())
94
71
  elif isinstance(preds_tmp, (np.ndarray, torch.Tensor)):
95
- if isinstance(preds_tmp, torch.Tensor):
96
- preds_tmp = preds_tmp.cpu()
97
- if isinstance(preds, tuple):
98
- preds = list(preds)
99
- preds.append(
100
- preds_tmp
101
- if not return_np or isinstance(preds_tmp, np.ndarray) # type: ignore
102
- else preds_tmp.numpy()
72
+ preds_tmp = preds_tmp.cpu() if isinstance(preds_tmp, torch.Tensor) else preds_tmp
73
+ preds_array.append(
74
+ preds_tmp if not return_np or isinstance(preds_tmp, np.ndarray) else preds_tmp.numpy()
103
75
  )
104
76
  else:
105
77
  raise TypeError(
@@ -108,9 +80,7 @@ def predict_batch(
108
80
  torch.Tensor."
109
81
  )
110
82
  concat = partial(np.concatenate, axis=0) if return_np else partial(torch.cat, dim=0)
111
- out: tuple | np.ndarray | torch.Tensor = (
112
- tuple(concat(p) for p in preds) if isinstance(preds, tuple) else concat(preds) # type: ignore
113
- )
83
+ out = tuple(concat(p) for p in preds_tuple) if preds_tuple is not None else concat(preds_array)
114
84
  return out
115
85
 
116
86
 
@@ -154,18 +124,18 @@ def trainer(
154
124
  verbose
155
125
  Whether to print training progress.
156
126
  """
127
+ if loss_fn is None:
128
+ loss_fn = torch.nn.MSELoss()
129
+
157
130
  if optimizer is None:
158
131
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
159
132
 
160
133
  if y_train is None:
161
- dataset = TensorDataset(torch.from_numpy(x_train).to(torch.float32))
162
-
134
+ dataset = TensorDataset(torch.tensor(x_train, dtype=torch.float32))
163
135
  else:
164
- dataset = TensorDataset(
165
- torch.from_numpy(x_train).to(torch.float32), torch.from_numpy(y_train).to(torch.float32)
166
- )
136
+ dataset = TensorDataset(torch.tensor(x_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
167
137
 
168
- loader = DataLoader(dataset=dataset)
138
+ loader = DataLoader(dataset=dataset, batch_size=batch_size)
169
139
 
170
140
  model = model.to(device)
171
141
 
@@ -186,7 +156,7 @@ def trainer(
186
156
  y_hat = model(x)
187
157
  y = x if y is None else y
188
158
 
189
- loss = loss_fn(y, y_hat) # type: ignore
159
+ loss = loss_fn(y, *y_hat) if isinstance(y_hat, tuple) else loss_fn(y, y_hat) # type: ignore
190
160
 
191
161
  optimizer.zero_grad()
192
162
  loss.backward()
@@ -2,13 +2,19 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- __all__ = ["Autoencoder", "Encoder", "Decoder"]
5
+ __all__ = ["Autoencoder", "Encoder", "Decoder", "ResNet18"]
6
6
 
7
7
  import math
8
- from typing import Any
8
+ from typing import Any, Protocol, runtime_checkable
9
9
 
10
10
  import torch
11
11
  import torch.nn as nn
12
+ from torchvision.models import ResNet18_Weights, resnet18
13
+
14
+
15
+ @runtime_checkable
16
+ class SupportsEncode(Protocol):
17
+ def encode(self, x: Any) -> Any: ...
12
18
 
13
19
 
14
20
  class Autoencoder(nn.Module):
@@ -330,3 +336,38 @@ class Decoder_AE(nn.Module):
330
336
  x = self.decoder(x)
331
337
  x = x.reshape((-1, *self.input_shape))
332
338
  return x
339
+
340
+
341
+ class ResNet18(nn.Module):
342
+ """
343
+ A wrapper class for the torchvision.models.resnet18 model
344
+
345
+
346
+ Note
347
+ ----
348
+ This class is provided for the use of DataEval documentation and excludes many features
349
+ of the torchvision implementation.
350
+
351
+ Warning
352
+ -------
353
+ This class has been thoroughly tested for the purposes
354
+ of DataEval's documentation but not for operational use.
355
+ Please use with caution if deploying this class or subclasses.
356
+ """
357
+
358
+ def __init__(self, embedding_size: int = 128):
359
+ super().__init__()
360
+ self.model: nn.Module = resnet18(weights=ResNet18_Weights.DEFAULT, progress=False)
361
+ self.model.fc = nn.Linear(self.model.fc.in_features, embedding_size)
362
+
363
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
364
+ return self.model(x)
365
+
366
+ @staticmethod
367
+ def transforms() -> Any:
368
+ """(Returns) the default ResNet18 IMAGENET1K_V1 transforms"""
369
+
370
+ return ResNet18_Weights.DEFAULT.transforms()
371
+
372
+ def __str__(self) -> str:
373
+ return str(self.model)
@@ -5,17 +5,18 @@ __all__ = []
5
5
  import contextlib
6
6
  import warnings
7
7
  from dataclasses import dataclass
8
- from typing import Any, Callable, Generic, Iterable, Mapping, Sequence, TypeVar, cast
8
+ from typing import Any, Callable, Generic, Iterable, Mapping, Sequence, Sized, TypeVar, cast
9
9
 
10
10
  import numpy as np
11
11
  import torch
12
12
  import torch.nn as nn
13
- from numpy.typing import ArrayLike, NDArray
13
+ from numpy.typing import NDArray
14
14
  from scipy.optimize import basinhopping
15
15
  from torch.utils.data import Dataset
16
16
 
17
- from dataeval.interop import as_numpy
18
- from dataeval.output import Output, set_metadata
17
+ from dataeval._output import Output, set_metadata
18
+ from dataeval.typing import ArrayLike
19
+ from dataeval.utils._array import as_numpy
19
20
 
20
21
  with contextlib.suppress(ImportError):
21
22
  from matplotlib.figure import Figure
@@ -24,7 +25,7 @@ with contextlib.suppress(ImportError):
24
25
  @dataclass(frozen=True)
25
26
  class SufficiencyOutput(Output):
26
27
  """
27
- Output class for :class:`Sufficiency` workflow.
28
+ Output class for :class:`.Sufficiency` workflow.
28
29
 
29
30
  Attributes
30
31
  ----------
@@ -277,9 +278,9 @@ def reset_parameters(model: nn.Module) -> nn.Module:
277
278
 
278
279
 
279
280
  def validate_dataset_len(dataset: Dataset[Any]) -> int:
280
- if not hasattr(dataset, "__len__"):
281
+ if not isinstance(dataset, Sized):
281
282
  raise TypeError("Must provide a dataset with a length attribute")
282
- length: int = dataset.__len__() # type: ignore
283
+ length: int = len(dataset)
283
284
  if length <= 0:
284
285
  raise ValueError("Dataset length must be greater than 0")
285
286
  return length
@@ -460,13 +461,13 @@ class Sufficiency(Generic[T]):
460
461
  @property
461
462
  def eval_fn(
462
463
  self,
463
- ) -> Callable[[nn.Module, Dataset[T]], dict[str, float] | Mapping[str, ArrayLike]]:
464
+ ) -> Callable[[nn.Module, Dataset[T]], Mapping[str, float] | Mapping[str, ArrayLike]]:
464
465
  return self._eval_fn
465
466
 
466
467
  @eval_fn.setter
467
468
  def eval_fn(
468
469
  self,
469
- value: Callable[[nn.Module, Dataset[T]], dict[str, float] | Mapping[str, ArrayLike]],
470
+ value: Callable[[nn.Module, Dataset[T]], Mapping[str, float] | Mapping[str, ArrayLike]],
470
471
  ) -> None:
471
472
  if not callable(value):
472
473
  raise TypeError("Must provide a callable for eval_fn.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.76.0
3
+ Version: 0.81.0
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -21,8 +21,12 @@ Classifier: Programming Language :: Python :: 3.12
21
21
  Classifier: Programming Language :: Python :: 3 :: Only
22
22
  Classifier: Topic :: Scientific/Engineering
23
23
  Provides-Extra: all
24
- Requires-Dist: matplotlib ; extra == "all"
24
+ Requires-Dist: defusedxml (>=0.7.1)
25
+ Requires-Dist: fast_hdbscan (==0.2.0)
26
+ Requires-Dist: matplotlib (>=3.7.1) ; extra == "all"
27
+ Requires-Dist: numba (>=0.59.1)
25
28
  Requires-Dist: numpy (>=1.24.2)
29
+ Requires-Dist: pandas (>=2.0) ; extra == "all"
26
30
  Requires-Dist: pillow (>=10.3.0)
27
31
  Requires-Dist: requests
28
32
  Requires-Dist: scikit-learn (>=1.5.0)
@@ -38,13 +42,17 @@ Description-Content-Type: text/markdown
38
42
 
39
43
  # DataEval
40
44
 
41
- To view our extensive collection of tutorials, how-to's, explanation guides, and reference material, please visit our documentation on **[Read the Docs](https://dataeval.readthedocs.io/)**
45
+ To view our extensive collection of tutorials, how-to's, explanation guides,
46
+ and reference material, please visit our documentation on
47
+ **[Read the Docs](https://dataeval.readthedocs.io/)**
42
48
 
43
49
  ## About DataEval
44
50
 
45
51
  <!-- start tagline -->
46
52
 
47
- DataEval curates datasets to train and test performant, robust, unbiased and reliable AI models and monitors for data shifts that impact performance of deployed models.
53
+ DataEval curates datasets to train and test performant, robust, unbiased and
54
+ reliable AI models and monitors for data shifts that impact performance of
55
+ deployed models.
48
56
 
49
57
  <!-- end tagline -->
50
58
 
@@ -52,22 +60,33 @@ DataEval curates datasets to train and test performant, robust, unbiased and rel
52
60
 
53
61
  <!-- start needs -->
54
62
 
55
- DataEval is an effective, powerful, and reliable set of tools for any T&E engineer. Throughout all stages of the machine learning lifecycle, DataEval supports model development, data analysis, and monitoring with state-of-the-art algorithms to help you solve difficult problems. With a focus on computer vision tasks, DataEval provides simple, but effective metrics for performance estimation, bias detection, and dataset linting.
63
+ DataEval is an effective, powerful, and reliable set of tools for any T&E
64
+ engineer. Throughout all stages of the machine learning lifecycle, DataEval
65
+ supports model development, data analysis, and monitoring with state-of-the-art
66
+ algorithms to help you solve difficult problems. With a focus on computer
67
+ vision tasks, DataEval provides simple, but effective metrics for performance
68
+ estimation, bias detection, and dataset linting.
56
69
 
57
70
  <!-- end needs -->
58
71
 
59
72
  <!-- start JATIC interop -->
60
- DataEval is easy to install, supports a wide range of Python versions, and is compatible with many of the most popular packages in the scientific and T&E communities.
61
- DataEval also has native interopability between JATIC's suite of tools when using MAITE-compliant datasets and models.
73
+ DataEval is easy to install, supports a wide range of Python versions, and is
74
+ compatible with many of the most popular packages in the scientific and T&E
75
+ communities.
76
+
77
+ DataEval also has native interopability between JATIC's suite of tools when
78
+ using MAITE-compliant datasets and models.
62
79
  <!-- end JATIC interop -->
63
80
 
64
81
  ## Getting Started
65
82
 
66
83
  **Python versions:** 3.9 - 3.12
67
84
 
68
- **Supported packages**: *NumPy*, *Pandas*, *Sci-kit learn*, *MAITE*, *NRTK*, *Gradient*
85
+ **Supported packages**: *NumPy*, *Pandas*, *Sci-kit learn*, *MAITE*, *NRTK*,
86
+ *Gradient*
69
87
 
70
- Choose your preferred method of installation below or follow our [installation guide](https://dataeval.readthedocs.io/en/v0.74.2/installation.html).
88
+ Choose your preferred method of installation below or follow our
89
+ [installation guide](https://dataeval.readthedocs.io/en/v0.74.2/installation.html).
71
90
 
72
91
  * [Installing with pip](#installing-with-pip)
73
92
  * [Installing with conda/mamba](#installing-with-conda)
@@ -75,7 +94,8 @@ Choose your preferred method of installation below or follow our [installation g
75
94
 
76
95
  ### **Installing with pip**
77
96
 
78
- You can install DataEval directly from pypi.org using the following command. The optional dependencies of DataEval are `all`.
97
+ You can install DataEval directly from pypi.org using the following command.
98
+ The optional dependencies of DataEval are `all`.
79
99
 
80
100
  ```bash
81
101
  pip install dataeval[all]
@@ -83,8 +103,9 @@ pip install dataeval[all]
83
103
 
84
104
  ### **Installing with conda**
85
105
 
86
- DataEval can be installed in a Conda/Mamba environment using the provided `environment.yaml` file. As some dependencies
87
- are installed from the `pytorch` channel, the channel is specified in the below example.
106
+ DataEval can be installed in a Conda/Mamba environment using the provided
107
+ `environment.yaml` file. As some dependencies are installed from the `pytorch`
108
+ channel, the channel is specified in the below example.
88
109
 
89
110
  ```bash
90
111
  micromamba create -f environment\environment.yaml -c pytorch
@@ -92,7 +113,9 @@ micromamba create -f environment\environment.yaml -c pytorch
92
113
 
93
114
  ### **Installing from GitHub**
94
115
 
95
- To install DataEval from source locally on Ubuntu, you will need `git-lfs` to download larger, binary source files and `poetry` for project dependency management.
116
+ To install DataEval from source locally on Ubuntu, you will need `git-lfs` to
117
+ download larger, binary source files and `poetry` for project dependency
118
+ management.
96
119
 
97
120
  ```bash
98
121
  sudo apt-get install git-lfs
@@ -112,7 +135,9 @@ Install DataEval with optional dependencies for development.
112
135
  poetry install --all-extras --with dev
113
136
  ```
114
137
 
115
- Now that DataEval is installed, you can run commands in the poetry virtual environment by prefixing shell commands with `poetry run`, or activate the virtual environment directly in the shell.
138
+ Now that DataEval is installed, you can run commands in the poetry virtual
139
+ environment by prefixing shell commands with `poetry run`, or activate the
140
+ virtual environment directly in the shell.
116
141
 
117
142
  ```bash
118
143
  poetry shell
@@ -131,7 +156,11 @@ If you have any questions, feel free to reach out to the people below:
131
156
 
132
157
  ### CDAO Funding Acknowledgement
133
158
 
134
- This material is based upon work supported by the Chief Digital and Artificial Intelligence Office under Contract No. W519TC-23-9-2033. The views and conclusions contained herein are those of the author(s) and should not be interpreted as necessarily representing the official policies or endorsements, either expressed or implied, of the U.S. Government.
159
+ This material is based upon work supported by the Chief Digital and Artificial
160
+ Intelligence Office under Contract No. W519TC-23-9-2033. The views and
161
+ conclusions contained herein are those of the author(s) and should not be
162
+ interpreted as necessarily representing the official policies or endorsements,
163
+ either expressed or implied, of the U.S. Government.
135
164
 
136
165
  <!-- end acknowledgement -->
137
166
 
@@ -0,0 +1,94 @@
1
+ dataeval/__init__.py,sha256=XbukGD_taba_kqIskYMUHjfbRwCiS9AatkZlWOKwAyw,1510
2
+ dataeval/_log.py,sha256=Mn5bRWO0cgtAYd5VGYSFiPgu57ta3zoktrtHAZ1m3dU,357
3
+ dataeval/_output.py,sha256=BB_wJJpQX7CaPZFE8x-0KMYBhuO1pramZhkz6LQ4uf0,4281
4
+ dataeval/config.py,sha256=x55jqLFrlHvOcNqPXudVnF24yc3OAaEAu-q9NJZSIq4,2225
5
+ dataeval/detectors/__init__.py,sha256=3Sg-XWlwr75zEEH3hZKA4nWMtGvaRlnfzTWvZG_Ak6U,189
6
+ dataeval/detectors/drift/__init__.py,sha256=nagRw504maFP_129qBLY170RExXy4LNJQCujfPdCopk,658
7
+ dataeval/detectors/drift/_base.py,sha256=MMqm9ysw4ehozQsepR6utPJhvybdOJOk-sjKx8N-Vqw,14782
8
+ dataeval/detectors/drift/_cvm.py,sha256=H2w-I0eMD7yP-CSmpdodeJ0-TYznJT7w_H7JuobESow,3859
9
+ dataeval/detectors/drift/_ks.py,sha256=-5k3RBPA3kadX7oD14Wc52rAqQf1udwFeW7Qf3Sv4Tw,4058
10
+ dataeval/detectors/drift/_mmd.py,sha256=11z6zIUs9NzD2J5j1RtZQZkLOOP0mtLQIzKSSOnPfUU,7931
11
+ dataeval/detectors/drift/_torch.py,sha256=BY-AEqjkzX8fJnLJSBosHnsRsUorL0de_ysJjkZyS0s,7687
12
+ dataeval/detectors/drift/_uncertainty.py,sha256=c86qgGqP-Ig2r0l1cnsNUqzRxRoM13TJafuXsELzdZw,5675
13
+ dataeval/detectors/drift/updates.py,sha256=CKH1aEvo1Ltz1zxsKA-0uXR3mz7CiQHeTx86lxgg0SY,1782
14
+ dataeval/detectors/linters/__init__.py,sha256=x6upwKPxJCBQvFcUyLf_4jTmL-CmTt1G4XeZUcQuhvc,367
15
+ dataeval/detectors/linters/duplicates.py,sha256=KagOkHZcx2YxGc6DqA1Vof1rq7ELZUHW9dRIBrQBJ28,6037
16
+ dataeval/detectors/linters/outliers.py,sha256=vPQRfJeo9npEVGRjg2c5ffuuqf-hyxGQsyVcb5fZ9Kg,14128
17
+ dataeval/detectors/ood/__init__.py,sha256=Ws6_un4pFWNknki7Bp7qjrslZVB9pYNE-K72u2lF65k,291
18
+ dataeval/detectors/ood/ae.py,sha256=3uzPMN1MZlnRXOmgsgkg7V1PpkqCFSSTR2xmfJDFEjk,2962
19
+ dataeval/detectors/ood/base.py,sha256=I2gW8cRWR-eBSI2zwESDrnYUEsMlhRsnWJWVyw4Jgkg,3047
20
+ dataeval/detectors/ood/metadata_ks_compare.py,sha256=4wwf6Nwx8qeCL9AnGP91vMfeMD8wpAF1_XOA15sdXsY,5205
21
+ dataeval/detectors/ood/metadata_ood_mi.py,sha256=7_Sdzf7-x1TlrIQvSyOIB98C8_UQhUwmwFQmZ9_q1Uc,4042
22
+ dataeval/detectors/ood/mixin.py,sha256=hisM-xQgcz2kyC8sPHqCQ_ZaUSLWdsOT0QdouezQwOo,5439
23
+ dataeval/detectors/ood/output.py,sha256=srbcxYehJpMPEwzy9mk0LUEOUNXO2y8rMu7HaxuNXfE,1711
24
+ dataeval/detectors/ood/vae.py,sha256=TEvv1ydHk_URio9Nm9KNb4Ci3jyj-g4l31fakkV3vus,2273
25
+ dataeval/metadata/__init__.py,sha256=_GVidbjjjxOxvdy0H-cTIOAzlwTRaXJR8NN9_9W3SAk,183
26
+ dataeval/metadata/_ood.py,sha256=NRCqG5LLR1CTaj10bI2PUltPoxyYGGdhi8JYX4xDKB4,8408
27
+ dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
28
+ dataeval/metrics/bias/__init__.py,sha256=I8h7QDGpB21HY-mkU0B9wJKnUBr3Kx2xTmeebkpqe_Y,649
29
+ dataeval/metrics/bias/_balance.py,sha256=6W40OWkLO3c5QKcPXMBaUrC1JOiNNmMiWll4VhVK7yE,11219
30
+ dataeval/metrics/bias/_coverage.py,sha256=QU0Pl3u6qdYgjBoLqoec0BJQtHgZ0M_EmGUSYo9unPI,6395
31
+ dataeval/metrics/bias/_diversity.py,sha256=vOnziwmwnEO6v2BLk8C9nKzyDAwjXMzv5God1Le629Y,8566
32
+ dataeval/metrics/bias/_parity.py,sha256=FYsqy8_R0LAPj6-NkSWy9gHGJ1V3wC4KYRRjSFjY6ww,13137
33
+ dataeval/metrics/estimators/__init__.py,sha256=d0b1eqlForaiXAUY7E7HUfITVhJu4mzo0ULe50EBrDE,528
34
+ dataeval/metrics/estimators/_ber.py,sha256=Xtn9YKS7uUlyJkDck9kqSOyEnqI7GFiyq4zrWpTLyAs,5697
35
+ dataeval/metrics/estimators/_clusterer.py,sha256=wqtw4_2kw6tdb1zx7b9vNdK0iQYZ1KESZ2nSx2txvoo,3403
36
+ dataeval/metrics/estimators/_divergence.py,sha256=m2-9oO5Sx6ybFyhm3IwARoPudMz7kKnj6m6p31gGZSw,4426
37
+ dataeval/metrics/estimators/_uap.py,sha256=GyLd7fgyALit3jpPaEzAntOm9ULim2bO64HFEOX2KO4,2153
38
+ dataeval/metrics/stats/__init__.py,sha256=xmoChTCtSgE5ZPPLmXV6VbePGpNp-wryuERC1y10J_I,1095
39
+ dataeval/metrics/stats/_base.py,sha256=KP_1VhgYPS6maDbzLG8xMldjjv-MfFBlC2huP2yadpY,13605
40
+ dataeval/metrics/stats/_boxratiostats.py,sha256=kUEyPF9-6XSlegqNONN-FbnpahJPCPB4-VjMFPtg1Wg,6321
41
+ dataeval/metrics/stats/_datasetstats.py,sha256=N2DAZWUzCx5dF8aWIcQjStE1Vpdzk0Gf8Nbjrptm9hg,7385
42
+ dataeval/metrics/stats/_dimensionstats.py,sha256=qqSqzhpsGT1wGxgqI6PhZSzJQcf0ZQCFNQqDYAIkkao,4058
43
+ dataeval/metrics/stats/_hashstats.py,sha256=aBB-VeNZMxLBSzGoXKps8kqxQbrtJ7z_-6HRENjPo0s,5082
44
+ dataeval/metrics/stats/_labelstats.py,sha256=KCEsVXH6AmVvXtg-uBVicFloWeO5J9oApX9DGpm7Xhw,7002
45
+ dataeval/metrics/stats/_pixelstats.py,sha256=bzi_zEivcNXEKu2xgv3TCoqpAURym35kLwKNM2-8Dnk,4250
46
+ dataeval/metrics/stats/_visualstats.py,sha256=s84iwDYYUp5DWIdaeLs68PIcZkD8Wa52V7Y8tDPD76Q,4689
47
+ dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
+ dataeval/typing.py,sha256=qXAGzxJoaTZ5BBUUsQP8KEiswu8IKt2PRvZIXKvxjcU,1284
49
+ dataeval/utils/__init__.py,sha256=T8F8zJh4ZAeu0wDzfpld92I2zJg9mWBmkGCHrDPU7gk,264
50
+ dataeval/utils/_array.py,sha256=fc04sYShIdsRS4qtG1UCnlGGk-yVRxlOHTNAmW7NpDY,4990
51
+ dataeval/utils/_bin.py,sha256=nylthmsC3vzLHLhlUMACvZs--h7xvAh9Pt75InaQJW8,7322
52
+ dataeval/utils/_clusterer.py,sha256=fw5x-2QN0TIbiodDKHZxRgxKHINedpPcOklzce0Rbjg,5436
53
+ dataeval/utils/_fast_mst.py,sha256=4_7ykVihCL5jWtxcGnrecIsDQo65kUml9SZ1JxgBZYY,7172
54
+ dataeval/utils/_image.py,sha256=capzF_X5H0jy0PmTP3Hf52GFgLqrnfU6gS4tiwck9jo,1939
55
+ dataeval/utils/_method.py,sha256=EplDHf402o8EDlY5PHLCKk1XObK2i-MWZp4BgymNn9A,480
56
+ dataeval/utils/_mst.py,sha256=gXjUUhz9G4wkcCUTqQ-61Ti9sZUFx08hEjlZXWiEmPc,2163
57
+ dataeval/utils/_plot.py,sha256=8w7utZ6sT0flCQi5KNFHNmEIbDvru7SE18r_w9DnEX8,7114
58
+ dataeval/utils/data/__init__.py,sha256=G3PKWyEDG6RPwawPBKEpCVx-cakIHwScGxxCdstpbP4,609
59
+ dataeval/utils/data/_embeddings.py,sha256=SSGaXnsyjF9gozr_nF4WJ8ljY-PSxNu7ITJZyeKy-AA,3574
60
+ dataeval/utils/data/_images.py,sha256=aN_VUZt4mCB_0LGpR79QC-325ZhvmsyANoquWmyhDbg,1764
61
+ dataeval/utils/data/_metadata.py,sha256=7czoekyYY1x2oYaxXQHwpla-RQxLnkX-nGPu54x5nQg,13632
62
+ dataeval/utils/data/_selection.py,sha256=bhvDRd-zyPCshTraDBfvVuOd4tKQ5dhiE0Bqvn7sTao,4019
63
+ dataeval/utils/data/_split.py,sha256=3Fn73qImqtwPQP49W4fQvbiURnMV45xA6DW7lW3nGR0,18872
64
+ dataeval/utils/data/_targets.py,sha256=GZpN0NuvO6i0dXF_I5jDEvcmBWz0yVQ5OFfYQUyc3rs,2581
65
+ dataeval/utils/data/_types.py,sha256=tNhw5a6pvuG0EHpLDwSvNWt7vESYEh48LzR4l6eXC7M,1388
66
+ dataeval/utils/data/collate.py,sha256=Z5nmBnWV_IoJzMp_tj8RCKjMJA9sSCY_zZITqISGixc,3865
67
+ dataeval/utils/data/datasets/__init__.py,sha256=jBrswiERrvBx4pJQJZIq_B5UE-Wy8a2_SBfM2crG8R8,511
68
+ dataeval/utils/data/datasets/_base.py,sha256=1GxwEB_Ql84183SdL_jTwLkUzdfXxNMgX3PLMzJJm4Y,8682
69
+ dataeval/utils/data/datasets/_cifar10.py,sha256=p0IdnHai80kLnA7V5rxdtxOuuBaWGJ5Ymi5xZTLmrgY,5151
70
+ dataeval/utils/data/datasets/_fileio.py,sha256=SixIk5nIlIwJdX9zjNXS10vHA3hL8aaYbqHsDg1xSpY,6447
71
+ dataeval/utils/data/datasets/_milco.py,sha256=KJjmF6IhBZHdSHRvRveD0wsMeixMFS5qgcKudqXmcRc,6059
72
+ dataeval/utils/data/datasets/_mixin.py,sha256=FJgZP_cpJkgAHA3j3ai_j3Wt7aFSEjIMVmt9NpvVXzg,1757
73
+ dataeval/utils/data/datasets/_mnist.py,sha256=_9pOWmTF43Is354kTdT1YAqThUmiFyIbKLncFVBuK_k,7214
74
+ dataeval/utils/data/datasets/_ships.py,sha256=M_46nzlOLv6jk5EOTWIub3MraAMbDQREnCVA0XdGW6Q,4352
75
+ dataeval/utils/data/datasets/_voc.py,sha256=vvpCuahGPN4cRLAGIXldD8a3cYqsx2cFomwx6T9oSaI,13828
76
+ dataeval/utils/data/selections/__init__.py,sha256=RLjkIh2IAvPktLbUmyLv3p-rvDEaBAdWzjiNnnhVtn8,481
77
+ dataeval/utils/data/selections/_classfilter.py,sha256=4AixzXlS8SudKTOTHvx5PlJvhAINAp110Q7RKDqpno4,2334
78
+ dataeval/utils/data/selections/_indices.py,sha256=Z5RTFSRUZqv8vFoo5vPxJOCS5jpdstl8Ru59COl8weY,646
79
+ dataeval/utils/data/selections/_limit.py,sha256=kZFhFOxWBFaZgM0zOFF5lLse3osWlelMDFE9fUO3wJE,528
80
+ dataeval/utils/data/selections/_reverse.py,sha256=sBkuBmkCllqUufdQvjj1Sslg3QzWkTB5BPlo45ONFxE,384
81
+ dataeval/utils/data/selections/_shuffle.py,sha256=3T_F53BKD-S4cVCaj1MQoF6k0Dp64PdIGbPNuOQ38-8,591
82
+ dataeval/utils/metadata.py,sha256=X8Hu4LdCzAaE9uk1hI4BflmFve_VOQCqK9lXq0sk9ow,14196
83
+ dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
84
+ dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
85
+ dataeval/utils/torch/_gmm.py,sha256=XBHNLPTtLGRrzq0B4GI48Sha7YHL-0PpXil3s3exLGE,3714
86
+ dataeval/utils/torch/_internal.py,sha256=5BYibQvvXS-trsHi2x7gjxuaknLwSyj6yWXbOFEdx-M,5790
87
+ dataeval/utils/torch/models.py,sha256=hmroEs6C6jQ5tAoZa71RFeIvXLxfXrTJSFH_jG2LGQU,9749
88
+ dataeval/utils/torch/trainer.py,sha256=Qay0LK63RuyoGYiJ5zI2C5BVym309ORvp6shhpcrIU4,5589
89
+ dataeval/workflows/__init__.py,sha256=L9yfBipNFGnYuN2JbMknIHDvziwfa2XAGFnOwifZbls,216
90
+ dataeval/workflows/sufficiency.py,sha256=flYfHh3NX02MPFzeGmk4s_1WctoQFf2hIsbAHch-lQ0,18700
91
+ dataeval-0.81.0.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
92
+ dataeval-0.81.0.dist-info/METADATA,sha256=tYTSEqObHVtAexNlrGwq99AWtPxBjmrkOUYUiX31pwE,5302
93
+ dataeval-0.81.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
94
+ dataeval-0.81.0.dist-info/RECORD,,