dataeval 0.76.0__py3-none-any.whl → 0.81.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +3 -3
- dataeval/{output.py → _output.py} +14 -0
- dataeval/config.py +77 -0
- dataeval/detectors/__init__.py +1 -1
- dataeval/detectors/drift/__init__.py +6 -6
- dataeval/detectors/drift/{base.py → _base.py} +41 -30
- dataeval/detectors/drift/{cvm.py → _cvm.py} +21 -28
- dataeval/detectors/drift/{ks.py → _ks.py} +20 -26
- dataeval/detectors/drift/{mmd.py → _mmd.py} +33 -19
- dataeval/detectors/drift/{torch.py → _torch.py} +2 -1
- dataeval/detectors/drift/{uncertainty.py → _uncertainty.py} +23 -7
- dataeval/detectors/drift/updates.py +1 -1
- dataeval/detectors/linters/__init__.py +0 -3
- dataeval/detectors/linters/duplicates.py +17 -8
- dataeval/detectors/linters/outliers.py +52 -43
- dataeval/detectors/ood/ae.py +29 -8
- dataeval/detectors/ood/base.py +5 -4
- dataeval/detectors/ood/metadata_ks_compare.py +1 -1
- dataeval/detectors/ood/mixin.py +20 -5
- dataeval/detectors/ood/output.py +1 -1
- dataeval/detectors/ood/vae.py +73 -0
- dataeval/metadata/__init__.py +5 -0
- dataeval/metadata/_ood.py +238 -0
- dataeval/metrics/__init__.py +1 -1
- dataeval/metrics/bias/__init__.py +5 -4
- dataeval/metrics/bias/{balance.py → _balance.py} +67 -17
- dataeval/metrics/bias/{coverage.py → _coverage.py} +41 -35
- dataeval/metrics/bias/{diversity.py → _diversity.py} +17 -12
- dataeval/metrics/bias/{parity.py → _parity.py} +89 -63
- dataeval/metrics/estimators/__init__.py +14 -4
- dataeval/metrics/estimators/{ber.py → _ber.py} +42 -11
- dataeval/metrics/estimators/_clusterer.py +104 -0
- dataeval/metrics/estimators/{divergence.py → _divergence.py} +18 -13
- dataeval/metrics/estimators/{uap.py → _uap.py} +4 -4
- dataeval/metrics/stats/__init__.py +7 -7
- dataeval/metrics/stats/{base.py → _base.py} +52 -16
- dataeval/metrics/stats/{boxratiostats.py → _boxratiostats.py} +6 -9
- dataeval/metrics/stats/{datasetstats.py → _datasetstats.py} +10 -14
- dataeval/metrics/stats/{dimensionstats.py → _dimensionstats.py} +6 -5
- dataeval/metrics/stats/{hashstats.py → _hashstats.py} +6 -6
- dataeval/metrics/stats/{labelstats.py → _labelstats.py} +25 -25
- dataeval/metrics/stats/{pixelstats.py → _pixelstats.py} +5 -4
- dataeval/metrics/stats/{visualstats.py → _visualstats.py} +9 -8
- dataeval/typing.py +54 -0
- dataeval/utils/__init__.py +2 -2
- dataeval/utils/_array.py +169 -0
- dataeval/utils/_bin.py +199 -0
- dataeval/utils/_clusterer.py +144 -0
- dataeval/utils/_fast_mst.py +189 -0
- dataeval/utils/{image.py → _image.py} +6 -4
- dataeval/utils/_method.py +18 -0
- dataeval/utils/{shared.py → _mst.py} +3 -65
- dataeval/utils/{plot.py → _plot.py} +4 -4
- dataeval/utils/data/__init__.py +22 -0
- dataeval/utils/data/_embeddings.py +105 -0
- dataeval/utils/data/_images.py +65 -0
- dataeval/utils/data/_metadata.py +352 -0
- dataeval/utils/data/_selection.py +119 -0
- dataeval/utils/{dataset/split.py → data/_split.py} +13 -14
- dataeval/utils/data/_targets.py +73 -0
- dataeval/utils/data/_types.py +58 -0
- dataeval/utils/data/collate.py +103 -0
- dataeval/utils/data/datasets/__init__.py +17 -0
- dataeval/utils/data/datasets/_base.py +254 -0
- dataeval/utils/data/datasets/_cifar10.py +134 -0
- dataeval/utils/data/datasets/_fileio.py +168 -0
- dataeval/utils/data/datasets/_milco.py +153 -0
- dataeval/utils/data/datasets/_mixin.py +56 -0
- dataeval/utils/data/datasets/_mnist.py +183 -0
- dataeval/utils/data/datasets/_ships.py +123 -0
- dataeval/utils/data/datasets/_voc.py +352 -0
- dataeval/utils/data/selections/__init__.py +15 -0
- dataeval/utils/data/selections/_classfilter.py +60 -0
- dataeval/utils/data/selections/_indices.py +26 -0
- dataeval/utils/data/selections/_limit.py +26 -0
- dataeval/utils/data/selections/_reverse.py +18 -0
- dataeval/utils/data/selections/_shuffle.py +29 -0
- dataeval/utils/metadata.py +198 -376
- dataeval/utils/torch/{gmm.py → _gmm.py} +4 -2
- dataeval/utils/torch/{internal.py → _internal.py} +21 -51
- dataeval/utils/torch/models.py +43 -2
- dataeval/workflows/sufficiency.py +10 -9
- {dataeval-0.76.0.dist-info → dataeval-0.81.0.dist-info}/METADATA +44 -15
- dataeval-0.81.0.dist-info/RECORD +94 -0
- dataeval/detectors/linters/clusterer.py +0 -512
- dataeval/detectors/linters/merged_stats.py +0 -49
- dataeval/detectors/ood/metadata_least_likely.py +0 -119
- dataeval/interop.py +0 -69
- dataeval/utils/dataset/__init__.py +0 -7
- dataeval/utils/dataset/datasets.py +0 -412
- dataeval/utils/dataset/read.py +0 -63
- dataeval-0.76.0.dist-info/RECORD +0 -67
- /dataeval/{log.py → _log.py} +0 -0
- /dataeval/utils/torch/{blocks.py → _blocks.py} +0 -0
- {dataeval-0.76.0.dist-info → dataeval-0.81.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.76.0.dist-info → dataeval-0.81.0.dist-info}/WHEEL +0 -0
@@ -10,7 +10,8 @@ from __future__ import annotations
|
|
10
10
|
|
11
11
|
__all__ = []
|
12
12
|
|
13
|
-
from
|
13
|
+
from dataclasses import dataclass
|
14
|
+
from typing import TypeVar
|
14
15
|
|
15
16
|
import numpy as np
|
16
17
|
import torch
|
@@ -18,7 +19,8 @@ import torch
|
|
18
19
|
TGMMData = TypeVar("TGMMData")
|
19
20
|
|
20
21
|
|
21
|
-
|
22
|
+
@dataclass
|
23
|
+
class GaussianMixtureModelParams:
|
22
24
|
"""
|
23
25
|
phi : torch.Tensor
|
24
26
|
Mixture component distribution weights.
|
@@ -11,30 +11,7 @@ from numpy.typing import NDArray
|
|
11
11
|
from torch.utils.data import DataLoader, TensorDataset
|
12
12
|
from tqdm import tqdm
|
13
13
|
|
14
|
-
|
15
|
-
def get_device(device: str | torch.device | None = None) -> torch.device:
|
16
|
-
"""
|
17
|
-
Instantiates a PyTorch device object.
|
18
|
-
|
19
|
-
Parameters
|
20
|
-
----------
|
21
|
-
device : str | torch.device | None, default None
|
22
|
-
Either ``None``, a str ('gpu' or 'cpu') indicating the device to choose, or an
|
23
|
-
already instantiated device object. If ``None``, the GPU is selected if it is
|
24
|
-
detected, otherwise the CPU is used as a fallback.
|
25
|
-
|
26
|
-
Returns
|
27
|
-
-------
|
28
|
-
The instantiated device object.
|
29
|
-
"""
|
30
|
-
if isinstance(device, torch.device): # Already a torch device
|
31
|
-
return device
|
32
|
-
else: # Instantiate device
|
33
|
-
if device is None or device.lower() in ["gpu", "cuda"]:
|
34
|
-
torch_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
35
|
-
else:
|
36
|
-
torch_device = torch.device("cpu")
|
37
|
-
return torch_device
|
14
|
+
from dataeval.config import get_device
|
38
15
|
|
39
16
|
|
40
17
|
def predict_batch(
|
@@ -42,7 +19,7 @@ def predict_batch(
|
|
42
19
|
model: Callable | torch.nn.Module | torch.nn.Sequential,
|
43
20
|
device: torch.device | None = None,
|
44
21
|
batch_size: int = int(1e10),
|
45
|
-
preprocess_fn: Callable | None = None,
|
22
|
+
preprocess_fn: Callable[[torch.Tensor], torch.Tensor] | None = None,
|
46
23
|
dtype: type[np.generic] | torch.dtype = np.float32,
|
47
24
|
) -> NDArray[Any] | torch.Tensor | tuple[Any, ...]:
|
48
25
|
"""
|
@@ -71,11 +48,12 @@ def predict_batch(
|
|
71
48
|
"""
|
72
49
|
device = get_device(device)
|
73
50
|
if isinstance(x, np.ndarray):
|
74
|
-
x = torch.
|
51
|
+
x = torch.tensor(x, device=device)
|
75
52
|
n = len(x)
|
76
53
|
n_minibatch = int(np.ceil(n / batch_size))
|
77
54
|
return_np = not isinstance(dtype, torch.dtype)
|
78
|
-
|
55
|
+
preds_tuple = None
|
56
|
+
preds_array = []
|
79
57
|
with torch.no_grad():
|
80
58
|
for i in range(n_minibatch):
|
81
59
|
istart, istop = i * batch_size, min((i + 1) * batch_size, n)
|
@@ -83,23 +61,17 @@ def predict_batch(
|
|
83
61
|
if isinstance(preprocess_fn, Callable):
|
84
62
|
x_batch = preprocess_fn(x_batch)
|
85
63
|
|
86
|
-
preds_tmp = model(x_batch.to(torch.float32)
|
64
|
+
preds_tmp = model(x_batch.to(dtype=torch.float32))
|
87
65
|
if isinstance(preds_tmp, (list, tuple)):
|
88
|
-
if
|
89
|
-
|
66
|
+
if preds_tuple is None: # init tuple with lists to store predictions
|
67
|
+
preds_tuple = tuple([] for _ in range(len(preds_tmp)))
|
90
68
|
for j, p in enumerate(preds_tmp):
|
91
|
-
if isinstance(p, torch.Tensor)
|
92
|
-
|
93
|
-
preds[j].append(p if not return_np or isinstance(p, np.ndarray) else p.numpy())
|
69
|
+
p = p.cpu() if isinstance(p, torch.Tensor) else p
|
70
|
+
preds_tuple[j].append(p if not return_np or isinstance(p, np.ndarray) else p.numpy())
|
94
71
|
elif isinstance(preds_tmp, (np.ndarray, torch.Tensor)):
|
95
|
-
if isinstance(preds_tmp, torch.Tensor)
|
96
|
-
|
97
|
-
|
98
|
-
preds = list(preds)
|
99
|
-
preds.append(
|
100
|
-
preds_tmp
|
101
|
-
if not return_np or isinstance(preds_tmp, np.ndarray) # type: ignore
|
102
|
-
else preds_tmp.numpy()
|
72
|
+
preds_tmp = preds_tmp.cpu() if isinstance(preds_tmp, torch.Tensor) else preds_tmp
|
73
|
+
preds_array.append(
|
74
|
+
preds_tmp if not return_np or isinstance(preds_tmp, np.ndarray) else preds_tmp.numpy()
|
103
75
|
)
|
104
76
|
else:
|
105
77
|
raise TypeError(
|
@@ -108,9 +80,7 @@ def predict_batch(
|
|
108
80
|
torch.Tensor."
|
109
81
|
)
|
110
82
|
concat = partial(np.concatenate, axis=0) if return_np else partial(torch.cat, dim=0)
|
111
|
-
out
|
112
|
-
tuple(concat(p) for p in preds) if isinstance(preds, tuple) else concat(preds) # type: ignore
|
113
|
-
)
|
83
|
+
out = tuple(concat(p) for p in preds_tuple) if preds_tuple is not None else concat(preds_array)
|
114
84
|
return out
|
115
85
|
|
116
86
|
|
@@ -154,18 +124,18 @@ def trainer(
|
|
154
124
|
verbose
|
155
125
|
Whether to print training progress.
|
156
126
|
"""
|
127
|
+
if loss_fn is None:
|
128
|
+
loss_fn = torch.nn.MSELoss()
|
129
|
+
|
157
130
|
if optimizer is None:
|
158
131
|
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
159
132
|
|
160
133
|
if y_train is None:
|
161
|
-
dataset = TensorDataset(torch.
|
162
|
-
|
134
|
+
dataset = TensorDataset(torch.tensor(x_train, dtype=torch.float32))
|
163
135
|
else:
|
164
|
-
dataset = TensorDataset(
|
165
|
-
torch.from_numpy(x_train).to(torch.float32), torch.from_numpy(y_train).to(torch.float32)
|
166
|
-
)
|
136
|
+
dataset = TensorDataset(torch.tensor(x_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
|
167
137
|
|
168
|
-
loader = DataLoader(dataset=dataset)
|
138
|
+
loader = DataLoader(dataset=dataset, batch_size=batch_size)
|
169
139
|
|
170
140
|
model = model.to(device)
|
171
141
|
|
@@ -186,7 +156,7 @@ def trainer(
|
|
186
156
|
y_hat = model(x)
|
187
157
|
y = x if y is None else y
|
188
158
|
|
189
|
-
loss = loss_fn(y, y_hat) # type: ignore
|
159
|
+
loss = loss_fn(y, *y_hat) if isinstance(y_hat, tuple) else loss_fn(y, y_hat) # type: ignore
|
190
160
|
|
191
161
|
optimizer.zero_grad()
|
192
162
|
loss.backward()
|
dataeval/utils/torch/models.py
CHANGED
@@ -2,13 +2,19 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
5
|
-
__all__ = ["Autoencoder", "Encoder", "Decoder"]
|
5
|
+
__all__ = ["Autoencoder", "Encoder", "Decoder", "ResNet18"]
|
6
6
|
|
7
7
|
import math
|
8
|
-
from typing import Any
|
8
|
+
from typing import Any, Protocol, runtime_checkable
|
9
9
|
|
10
10
|
import torch
|
11
11
|
import torch.nn as nn
|
12
|
+
from torchvision.models import ResNet18_Weights, resnet18
|
13
|
+
|
14
|
+
|
15
|
+
@runtime_checkable
|
16
|
+
class SupportsEncode(Protocol):
|
17
|
+
def encode(self, x: Any) -> Any: ...
|
12
18
|
|
13
19
|
|
14
20
|
class Autoencoder(nn.Module):
|
@@ -330,3 +336,38 @@ class Decoder_AE(nn.Module):
|
|
330
336
|
x = self.decoder(x)
|
331
337
|
x = x.reshape((-1, *self.input_shape))
|
332
338
|
return x
|
339
|
+
|
340
|
+
|
341
|
+
class ResNet18(nn.Module):
|
342
|
+
"""
|
343
|
+
A wrapper class for the torchvision.models.resnet18 model
|
344
|
+
|
345
|
+
|
346
|
+
Note
|
347
|
+
----
|
348
|
+
This class is provided for the use of DataEval documentation and excludes many features
|
349
|
+
of the torchvision implementation.
|
350
|
+
|
351
|
+
Warning
|
352
|
+
-------
|
353
|
+
This class has been thoroughly tested for the purposes
|
354
|
+
of DataEval's documentation but not for operational use.
|
355
|
+
Please use with caution if deploying this class or subclasses.
|
356
|
+
"""
|
357
|
+
|
358
|
+
def __init__(self, embedding_size: int = 128):
|
359
|
+
super().__init__()
|
360
|
+
self.model: nn.Module = resnet18(weights=ResNet18_Weights.DEFAULT, progress=False)
|
361
|
+
self.model.fc = nn.Linear(self.model.fc.in_features, embedding_size)
|
362
|
+
|
363
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
364
|
+
return self.model(x)
|
365
|
+
|
366
|
+
@staticmethod
|
367
|
+
def transforms() -> Any:
|
368
|
+
"""(Returns) the default ResNet18 IMAGENET1K_V1 transforms"""
|
369
|
+
|
370
|
+
return ResNet18_Weights.DEFAULT.transforms()
|
371
|
+
|
372
|
+
def __str__(self) -> str:
|
373
|
+
return str(self.model)
|
@@ -5,17 +5,18 @@ __all__ = []
|
|
5
5
|
import contextlib
|
6
6
|
import warnings
|
7
7
|
from dataclasses import dataclass
|
8
|
-
from typing import Any, Callable, Generic, Iterable, Mapping, Sequence, TypeVar, cast
|
8
|
+
from typing import Any, Callable, Generic, Iterable, Mapping, Sequence, Sized, TypeVar, cast
|
9
9
|
|
10
10
|
import numpy as np
|
11
11
|
import torch
|
12
12
|
import torch.nn as nn
|
13
|
-
from numpy.typing import
|
13
|
+
from numpy.typing import NDArray
|
14
14
|
from scipy.optimize import basinhopping
|
15
15
|
from torch.utils.data import Dataset
|
16
16
|
|
17
|
-
from dataeval.
|
18
|
-
from dataeval.
|
17
|
+
from dataeval._output import Output, set_metadata
|
18
|
+
from dataeval.typing import ArrayLike
|
19
|
+
from dataeval.utils._array import as_numpy
|
19
20
|
|
20
21
|
with contextlib.suppress(ImportError):
|
21
22
|
from matplotlib.figure import Figure
|
@@ -24,7 +25,7 @@ with contextlib.suppress(ImportError):
|
|
24
25
|
@dataclass(frozen=True)
|
25
26
|
class SufficiencyOutput(Output):
|
26
27
|
"""
|
27
|
-
Output class for :class
|
28
|
+
Output class for :class:`.Sufficiency` workflow.
|
28
29
|
|
29
30
|
Attributes
|
30
31
|
----------
|
@@ -277,9 +278,9 @@ def reset_parameters(model: nn.Module) -> nn.Module:
|
|
277
278
|
|
278
279
|
|
279
280
|
def validate_dataset_len(dataset: Dataset[Any]) -> int:
|
280
|
-
if not
|
281
|
+
if not isinstance(dataset, Sized):
|
281
282
|
raise TypeError("Must provide a dataset with a length attribute")
|
282
|
-
length: int = dataset
|
283
|
+
length: int = len(dataset)
|
283
284
|
if length <= 0:
|
284
285
|
raise ValueError("Dataset length must be greater than 0")
|
285
286
|
return length
|
@@ -460,13 +461,13 @@ class Sufficiency(Generic[T]):
|
|
460
461
|
@property
|
461
462
|
def eval_fn(
|
462
463
|
self,
|
463
|
-
) -> Callable[[nn.Module, Dataset[T]],
|
464
|
+
) -> Callable[[nn.Module, Dataset[T]], Mapping[str, float] | Mapping[str, ArrayLike]]:
|
464
465
|
return self._eval_fn
|
465
466
|
|
466
467
|
@eval_fn.setter
|
467
468
|
def eval_fn(
|
468
469
|
self,
|
469
|
-
value: Callable[[nn.Module, Dataset[T]],
|
470
|
+
value: Callable[[nn.Module, Dataset[T]], Mapping[str, float] | Mapping[str, ArrayLike]],
|
470
471
|
) -> None:
|
471
472
|
if not callable(value):
|
472
473
|
raise TypeError("Must provide a callable for eval_fn.")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.81.0
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
5
|
Home-page: https://dataeval.ai/
|
6
6
|
License: MIT
|
@@ -21,8 +21,12 @@ Classifier: Programming Language :: Python :: 3.12
|
|
21
21
|
Classifier: Programming Language :: Python :: 3 :: Only
|
22
22
|
Classifier: Topic :: Scientific/Engineering
|
23
23
|
Provides-Extra: all
|
24
|
-
Requires-Dist:
|
24
|
+
Requires-Dist: defusedxml (>=0.7.1)
|
25
|
+
Requires-Dist: fast_hdbscan (==0.2.0)
|
26
|
+
Requires-Dist: matplotlib (>=3.7.1) ; extra == "all"
|
27
|
+
Requires-Dist: numba (>=0.59.1)
|
25
28
|
Requires-Dist: numpy (>=1.24.2)
|
29
|
+
Requires-Dist: pandas (>=2.0) ; extra == "all"
|
26
30
|
Requires-Dist: pillow (>=10.3.0)
|
27
31
|
Requires-Dist: requests
|
28
32
|
Requires-Dist: scikit-learn (>=1.5.0)
|
@@ -38,13 +42,17 @@ Description-Content-Type: text/markdown
|
|
38
42
|
|
39
43
|
# DataEval
|
40
44
|
|
41
|
-
To view our extensive collection of tutorials, how-to's, explanation guides,
|
45
|
+
To view our extensive collection of tutorials, how-to's, explanation guides,
|
46
|
+
and reference material, please visit our documentation on
|
47
|
+
**[Read the Docs](https://dataeval.readthedocs.io/)**
|
42
48
|
|
43
49
|
## About DataEval
|
44
50
|
|
45
51
|
<!-- start tagline -->
|
46
52
|
|
47
|
-
DataEval curates datasets to train and test performant, robust, unbiased and
|
53
|
+
DataEval curates datasets to train and test performant, robust, unbiased and
|
54
|
+
reliable AI models and monitors for data shifts that impact performance of
|
55
|
+
deployed models.
|
48
56
|
|
49
57
|
<!-- end tagline -->
|
50
58
|
|
@@ -52,22 +60,33 @@ DataEval curates datasets to train and test performant, robust, unbiased and rel
|
|
52
60
|
|
53
61
|
<!-- start needs -->
|
54
62
|
|
55
|
-
DataEval is an effective, powerful, and reliable set of tools for any T&E
|
63
|
+
DataEval is an effective, powerful, and reliable set of tools for any T&E
|
64
|
+
engineer. Throughout all stages of the machine learning lifecycle, DataEval
|
65
|
+
supports model development, data analysis, and monitoring with state-of-the-art
|
66
|
+
algorithms to help you solve difficult problems. With a focus on computer
|
67
|
+
vision tasks, DataEval provides simple, but effective metrics for performance
|
68
|
+
estimation, bias detection, and dataset linting.
|
56
69
|
|
57
70
|
<!-- end needs -->
|
58
71
|
|
59
72
|
<!-- start JATIC interop -->
|
60
|
-
DataEval is easy to install, supports a wide range of Python versions, and is
|
61
|
-
|
73
|
+
DataEval is easy to install, supports a wide range of Python versions, and is
|
74
|
+
compatible with many of the most popular packages in the scientific and T&E
|
75
|
+
communities.
|
76
|
+
|
77
|
+
DataEval also has native interopability between JATIC's suite of tools when
|
78
|
+
using MAITE-compliant datasets and models.
|
62
79
|
<!-- end JATIC interop -->
|
63
80
|
|
64
81
|
## Getting Started
|
65
82
|
|
66
83
|
**Python versions:** 3.9 - 3.12
|
67
84
|
|
68
|
-
**Supported packages**: *NumPy*, *Pandas*, *Sci-kit learn*, *MAITE*, *NRTK*,
|
85
|
+
**Supported packages**: *NumPy*, *Pandas*, *Sci-kit learn*, *MAITE*, *NRTK*,
|
86
|
+
*Gradient*
|
69
87
|
|
70
|
-
Choose your preferred method of installation below or follow our
|
88
|
+
Choose your preferred method of installation below or follow our
|
89
|
+
[installation guide](https://dataeval.readthedocs.io/en/v0.74.2/installation.html).
|
71
90
|
|
72
91
|
* [Installing with pip](#installing-with-pip)
|
73
92
|
* [Installing with conda/mamba](#installing-with-conda)
|
@@ -75,7 +94,8 @@ Choose your preferred method of installation below or follow our [installation g
|
|
75
94
|
|
76
95
|
### **Installing with pip**
|
77
96
|
|
78
|
-
You can install DataEval directly from pypi.org using the following command.
|
97
|
+
You can install DataEval directly from pypi.org using the following command.
|
98
|
+
The optional dependencies of DataEval are `all`.
|
79
99
|
|
80
100
|
```bash
|
81
101
|
pip install dataeval[all]
|
@@ -83,8 +103,9 @@ pip install dataeval[all]
|
|
83
103
|
|
84
104
|
### **Installing with conda**
|
85
105
|
|
86
|
-
DataEval can be installed in a Conda/Mamba environment using the provided
|
87
|
-
are installed from the `pytorch`
|
106
|
+
DataEval can be installed in a Conda/Mamba environment using the provided
|
107
|
+
`environment.yaml` file. As some dependencies are installed from the `pytorch`
|
108
|
+
channel, the channel is specified in the below example.
|
88
109
|
|
89
110
|
```bash
|
90
111
|
micromamba create -f environment\environment.yaml -c pytorch
|
@@ -92,7 +113,9 @@ micromamba create -f environment\environment.yaml -c pytorch
|
|
92
113
|
|
93
114
|
### **Installing from GitHub**
|
94
115
|
|
95
|
-
To install DataEval from source locally on Ubuntu, you will need `git-lfs` to
|
116
|
+
To install DataEval from source locally on Ubuntu, you will need `git-lfs` to
|
117
|
+
download larger, binary source files and `poetry` for project dependency
|
118
|
+
management.
|
96
119
|
|
97
120
|
```bash
|
98
121
|
sudo apt-get install git-lfs
|
@@ -112,7 +135,9 @@ Install DataEval with optional dependencies for development.
|
|
112
135
|
poetry install --all-extras --with dev
|
113
136
|
```
|
114
137
|
|
115
|
-
Now that DataEval is installed, you can run commands in the poetry virtual
|
138
|
+
Now that DataEval is installed, you can run commands in the poetry virtual
|
139
|
+
environment by prefixing shell commands with `poetry run`, or activate the
|
140
|
+
virtual environment directly in the shell.
|
116
141
|
|
117
142
|
```bash
|
118
143
|
poetry shell
|
@@ -131,7 +156,11 @@ If you have any questions, feel free to reach out to the people below:
|
|
131
156
|
|
132
157
|
### CDAO Funding Acknowledgement
|
133
158
|
|
134
|
-
This material is based upon work supported by the Chief Digital and Artificial
|
159
|
+
This material is based upon work supported by the Chief Digital and Artificial
|
160
|
+
Intelligence Office under Contract No. W519TC-23-9-2033. The views and
|
161
|
+
conclusions contained herein are those of the author(s) and should not be
|
162
|
+
interpreted as necessarily representing the official policies or endorsements,
|
163
|
+
either expressed or implied, of the U.S. Government.
|
135
164
|
|
136
165
|
<!-- end acknowledgement -->
|
137
166
|
|
@@ -0,0 +1,94 @@
|
|
1
|
+
dataeval/__init__.py,sha256=XbukGD_taba_kqIskYMUHjfbRwCiS9AatkZlWOKwAyw,1510
|
2
|
+
dataeval/_log.py,sha256=Mn5bRWO0cgtAYd5VGYSFiPgu57ta3zoktrtHAZ1m3dU,357
|
3
|
+
dataeval/_output.py,sha256=BB_wJJpQX7CaPZFE8x-0KMYBhuO1pramZhkz6LQ4uf0,4281
|
4
|
+
dataeval/config.py,sha256=x55jqLFrlHvOcNqPXudVnF24yc3OAaEAu-q9NJZSIq4,2225
|
5
|
+
dataeval/detectors/__init__.py,sha256=3Sg-XWlwr75zEEH3hZKA4nWMtGvaRlnfzTWvZG_Ak6U,189
|
6
|
+
dataeval/detectors/drift/__init__.py,sha256=nagRw504maFP_129qBLY170RExXy4LNJQCujfPdCopk,658
|
7
|
+
dataeval/detectors/drift/_base.py,sha256=MMqm9ysw4ehozQsepR6utPJhvybdOJOk-sjKx8N-Vqw,14782
|
8
|
+
dataeval/detectors/drift/_cvm.py,sha256=H2w-I0eMD7yP-CSmpdodeJ0-TYznJT7w_H7JuobESow,3859
|
9
|
+
dataeval/detectors/drift/_ks.py,sha256=-5k3RBPA3kadX7oD14Wc52rAqQf1udwFeW7Qf3Sv4Tw,4058
|
10
|
+
dataeval/detectors/drift/_mmd.py,sha256=11z6zIUs9NzD2J5j1RtZQZkLOOP0mtLQIzKSSOnPfUU,7931
|
11
|
+
dataeval/detectors/drift/_torch.py,sha256=BY-AEqjkzX8fJnLJSBosHnsRsUorL0de_ysJjkZyS0s,7687
|
12
|
+
dataeval/detectors/drift/_uncertainty.py,sha256=c86qgGqP-Ig2r0l1cnsNUqzRxRoM13TJafuXsELzdZw,5675
|
13
|
+
dataeval/detectors/drift/updates.py,sha256=CKH1aEvo1Ltz1zxsKA-0uXR3mz7CiQHeTx86lxgg0SY,1782
|
14
|
+
dataeval/detectors/linters/__init__.py,sha256=x6upwKPxJCBQvFcUyLf_4jTmL-CmTt1G4XeZUcQuhvc,367
|
15
|
+
dataeval/detectors/linters/duplicates.py,sha256=KagOkHZcx2YxGc6DqA1Vof1rq7ELZUHW9dRIBrQBJ28,6037
|
16
|
+
dataeval/detectors/linters/outliers.py,sha256=vPQRfJeo9npEVGRjg2c5ffuuqf-hyxGQsyVcb5fZ9Kg,14128
|
17
|
+
dataeval/detectors/ood/__init__.py,sha256=Ws6_un4pFWNknki7Bp7qjrslZVB9pYNE-K72u2lF65k,291
|
18
|
+
dataeval/detectors/ood/ae.py,sha256=3uzPMN1MZlnRXOmgsgkg7V1PpkqCFSSTR2xmfJDFEjk,2962
|
19
|
+
dataeval/detectors/ood/base.py,sha256=I2gW8cRWR-eBSI2zwESDrnYUEsMlhRsnWJWVyw4Jgkg,3047
|
20
|
+
dataeval/detectors/ood/metadata_ks_compare.py,sha256=4wwf6Nwx8qeCL9AnGP91vMfeMD8wpAF1_XOA15sdXsY,5205
|
21
|
+
dataeval/detectors/ood/metadata_ood_mi.py,sha256=7_Sdzf7-x1TlrIQvSyOIB98C8_UQhUwmwFQmZ9_q1Uc,4042
|
22
|
+
dataeval/detectors/ood/mixin.py,sha256=hisM-xQgcz2kyC8sPHqCQ_ZaUSLWdsOT0QdouezQwOo,5439
|
23
|
+
dataeval/detectors/ood/output.py,sha256=srbcxYehJpMPEwzy9mk0LUEOUNXO2y8rMu7HaxuNXfE,1711
|
24
|
+
dataeval/detectors/ood/vae.py,sha256=TEvv1ydHk_URio9Nm9KNb4Ci3jyj-g4l31fakkV3vus,2273
|
25
|
+
dataeval/metadata/__init__.py,sha256=_GVidbjjjxOxvdy0H-cTIOAzlwTRaXJR8NN9_9W3SAk,183
|
26
|
+
dataeval/metadata/_ood.py,sha256=NRCqG5LLR1CTaj10bI2PUltPoxyYGGdhi8JYX4xDKB4,8408
|
27
|
+
dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
|
28
|
+
dataeval/metrics/bias/__init__.py,sha256=I8h7QDGpB21HY-mkU0B9wJKnUBr3Kx2xTmeebkpqe_Y,649
|
29
|
+
dataeval/metrics/bias/_balance.py,sha256=6W40OWkLO3c5QKcPXMBaUrC1JOiNNmMiWll4VhVK7yE,11219
|
30
|
+
dataeval/metrics/bias/_coverage.py,sha256=QU0Pl3u6qdYgjBoLqoec0BJQtHgZ0M_EmGUSYo9unPI,6395
|
31
|
+
dataeval/metrics/bias/_diversity.py,sha256=vOnziwmwnEO6v2BLk8C9nKzyDAwjXMzv5God1Le629Y,8566
|
32
|
+
dataeval/metrics/bias/_parity.py,sha256=FYsqy8_R0LAPj6-NkSWy9gHGJ1V3wC4KYRRjSFjY6ww,13137
|
33
|
+
dataeval/metrics/estimators/__init__.py,sha256=d0b1eqlForaiXAUY7E7HUfITVhJu4mzo0ULe50EBrDE,528
|
34
|
+
dataeval/metrics/estimators/_ber.py,sha256=Xtn9YKS7uUlyJkDck9kqSOyEnqI7GFiyq4zrWpTLyAs,5697
|
35
|
+
dataeval/metrics/estimators/_clusterer.py,sha256=wqtw4_2kw6tdb1zx7b9vNdK0iQYZ1KESZ2nSx2txvoo,3403
|
36
|
+
dataeval/metrics/estimators/_divergence.py,sha256=m2-9oO5Sx6ybFyhm3IwARoPudMz7kKnj6m6p31gGZSw,4426
|
37
|
+
dataeval/metrics/estimators/_uap.py,sha256=GyLd7fgyALit3jpPaEzAntOm9ULim2bO64HFEOX2KO4,2153
|
38
|
+
dataeval/metrics/stats/__init__.py,sha256=xmoChTCtSgE5ZPPLmXV6VbePGpNp-wryuERC1y10J_I,1095
|
39
|
+
dataeval/metrics/stats/_base.py,sha256=KP_1VhgYPS6maDbzLG8xMldjjv-MfFBlC2huP2yadpY,13605
|
40
|
+
dataeval/metrics/stats/_boxratiostats.py,sha256=kUEyPF9-6XSlegqNONN-FbnpahJPCPB4-VjMFPtg1Wg,6321
|
41
|
+
dataeval/metrics/stats/_datasetstats.py,sha256=N2DAZWUzCx5dF8aWIcQjStE1Vpdzk0Gf8Nbjrptm9hg,7385
|
42
|
+
dataeval/metrics/stats/_dimensionstats.py,sha256=qqSqzhpsGT1wGxgqI6PhZSzJQcf0ZQCFNQqDYAIkkao,4058
|
43
|
+
dataeval/metrics/stats/_hashstats.py,sha256=aBB-VeNZMxLBSzGoXKps8kqxQbrtJ7z_-6HRENjPo0s,5082
|
44
|
+
dataeval/metrics/stats/_labelstats.py,sha256=KCEsVXH6AmVvXtg-uBVicFloWeO5J9oApX9DGpm7Xhw,7002
|
45
|
+
dataeval/metrics/stats/_pixelstats.py,sha256=bzi_zEivcNXEKu2xgv3TCoqpAURym35kLwKNM2-8Dnk,4250
|
46
|
+
dataeval/metrics/stats/_visualstats.py,sha256=s84iwDYYUp5DWIdaeLs68PIcZkD8Wa52V7Y8tDPD76Q,4689
|
47
|
+
dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
48
|
+
dataeval/typing.py,sha256=qXAGzxJoaTZ5BBUUsQP8KEiswu8IKt2PRvZIXKvxjcU,1284
|
49
|
+
dataeval/utils/__init__.py,sha256=T8F8zJh4ZAeu0wDzfpld92I2zJg9mWBmkGCHrDPU7gk,264
|
50
|
+
dataeval/utils/_array.py,sha256=fc04sYShIdsRS4qtG1UCnlGGk-yVRxlOHTNAmW7NpDY,4990
|
51
|
+
dataeval/utils/_bin.py,sha256=nylthmsC3vzLHLhlUMACvZs--h7xvAh9Pt75InaQJW8,7322
|
52
|
+
dataeval/utils/_clusterer.py,sha256=fw5x-2QN0TIbiodDKHZxRgxKHINedpPcOklzce0Rbjg,5436
|
53
|
+
dataeval/utils/_fast_mst.py,sha256=4_7ykVihCL5jWtxcGnrecIsDQo65kUml9SZ1JxgBZYY,7172
|
54
|
+
dataeval/utils/_image.py,sha256=capzF_X5H0jy0PmTP3Hf52GFgLqrnfU6gS4tiwck9jo,1939
|
55
|
+
dataeval/utils/_method.py,sha256=EplDHf402o8EDlY5PHLCKk1XObK2i-MWZp4BgymNn9A,480
|
56
|
+
dataeval/utils/_mst.py,sha256=gXjUUhz9G4wkcCUTqQ-61Ti9sZUFx08hEjlZXWiEmPc,2163
|
57
|
+
dataeval/utils/_plot.py,sha256=8w7utZ6sT0flCQi5KNFHNmEIbDvru7SE18r_w9DnEX8,7114
|
58
|
+
dataeval/utils/data/__init__.py,sha256=G3PKWyEDG6RPwawPBKEpCVx-cakIHwScGxxCdstpbP4,609
|
59
|
+
dataeval/utils/data/_embeddings.py,sha256=SSGaXnsyjF9gozr_nF4WJ8ljY-PSxNu7ITJZyeKy-AA,3574
|
60
|
+
dataeval/utils/data/_images.py,sha256=aN_VUZt4mCB_0LGpR79QC-325ZhvmsyANoquWmyhDbg,1764
|
61
|
+
dataeval/utils/data/_metadata.py,sha256=7czoekyYY1x2oYaxXQHwpla-RQxLnkX-nGPu54x5nQg,13632
|
62
|
+
dataeval/utils/data/_selection.py,sha256=bhvDRd-zyPCshTraDBfvVuOd4tKQ5dhiE0Bqvn7sTao,4019
|
63
|
+
dataeval/utils/data/_split.py,sha256=3Fn73qImqtwPQP49W4fQvbiURnMV45xA6DW7lW3nGR0,18872
|
64
|
+
dataeval/utils/data/_targets.py,sha256=GZpN0NuvO6i0dXF_I5jDEvcmBWz0yVQ5OFfYQUyc3rs,2581
|
65
|
+
dataeval/utils/data/_types.py,sha256=tNhw5a6pvuG0EHpLDwSvNWt7vESYEh48LzR4l6eXC7M,1388
|
66
|
+
dataeval/utils/data/collate.py,sha256=Z5nmBnWV_IoJzMp_tj8RCKjMJA9sSCY_zZITqISGixc,3865
|
67
|
+
dataeval/utils/data/datasets/__init__.py,sha256=jBrswiERrvBx4pJQJZIq_B5UE-Wy8a2_SBfM2crG8R8,511
|
68
|
+
dataeval/utils/data/datasets/_base.py,sha256=1GxwEB_Ql84183SdL_jTwLkUzdfXxNMgX3PLMzJJm4Y,8682
|
69
|
+
dataeval/utils/data/datasets/_cifar10.py,sha256=p0IdnHai80kLnA7V5rxdtxOuuBaWGJ5Ymi5xZTLmrgY,5151
|
70
|
+
dataeval/utils/data/datasets/_fileio.py,sha256=SixIk5nIlIwJdX9zjNXS10vHA3hL8aaYbqHsDg1xSpY,6447
|
71
|
+
dataeval/utils/data/datasets/_milco.py,sha256=KJjmF6IhBZHdSHRvRveD0wsMeixMFS5qgcKudqXmcRc,6059
|
72
|
+
dataeval/utils/data/datasets/_mixin.py,sha256=FJgZP_cpJkgAHA3j3ai_j3Wt7aFSEjIMVmt9NpvVXzg,1757
|
73
|
+
dataeval/utils/data/datasets/_mnist.py,sha256=_9pOWmTF43Is354kTdT1YAqThUmiFyIbKLncFVBuK_k,7214
|
74
|
+
dataeval/utils/data/datasets/_ships.py,sha256=M_46nzlOLv6jk5EOTWIub3MraAMbDQREnCVA0XdGW6Q,4352
|
75
|
+
dataeval/utils/data/datasets/_voc.py,sha256=vvpCuahGPN4cRLAGIXldD8a3cYqsx2cFomwx6T9oSaI,13828
|
76
|
+
dataeval/utils/data/selections/__init__.py,sha256=RLjkIh2IAvPktLbUmyLv3p-rvDEaBAdWzjiNnnhVtn8,481
|
77
|
+
dataeval/utils/data/selections/_classfilter.py,sha256=4AixzXlS8SudKTOTHvx5PlJvhAINAp110Q7RKDqpno4,2334
|
78
|
+
dataeval/utils/data/selections/_indices.py,sha256=Z5RTFSRUZqv8vFoo5vPxJOCS5jpdstl8Ru59COl8weY,646
|
79
|
+
dataeval/utils/data/selections/_limit.py,sha256=kZFhFOxWBFaZgM0zOFF5lLse3osWlelMDFE9fUO3wJE,528
|
80
|
+
dataeval/utils/data/selections/_reverse.py,sha256=sBkuBmkCllqUufdQvjj1Sslg3QzWkTB5BPlo45ONFxE,384
|
81
|
+
dataeval/utils/data/selections/_shuffle.py,sha256=3T_F53BKD-S4cVCaj1MQoF6k0Dp64PdIGbPNuOQ38-8,591
|
82
|
+
dataeval/utils/metadata.py,sha256=X8Hu4LdCzAaE9uk1hI4BflmFve_VOQCqK9lXq0sk9ow,14196
|
83
|
+
dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
|
84
|
+
dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
|
85
|
+
dataeval/utils/torch/_gmm.py,sha256=XBHNLPTtLGRrzq0B4GI48Sha7YHL-0PpXil3s3exLGE,3714
|
86
|
+
dataeval/utils/torch/_internal.py,sha256=5BYibQvvXS-trsHi2x7gjxuaknLwSyj6yWXbOFEdx-M,5790
|
87
|
+
dataeval/utils/torch/models.py,sha256=hmroEs6C6jQ5tAoZa71RFeIvXLxfXrTJSFH_jG2LGQU,9749
|
88
|
+
dataeval/utils/torch/trainer.py,sha256=Qay0LK63RuyoGYiJ5zI2C5BVym309ORvp6shhpcrIU4,5589
|
89
|
+
dataeval/workflows/__init__.py,sha256=L9yfBipNFGnYuN2JbMknIHDvziwfa2XAGFnOwifZbls,216
|
90
|
+
dataeval/workflows/sufficiency.py,sha256=flYfHh3NX02MPFzeGmk4s_1WctoQFf2hIsbAHch-lQ0,18700
|
91
|
+
dataeval-0.81.0.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
|
92
|
+
dataeval-0.81.0.dist-info/METADATA,sha256=tYTSEqObHVtAexNlrGwq99AWtPxBjmrkOUYUiX31pwE,5302
|
93
|
+
dataeval-0.81.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
94
|
+
dataeval-0.81.0.dist-info/RECORD,,
|