dataeval 0.83.0__py3-none-any.whl → 0.84.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +1 -1
- dataeval/config.py +3 -3
- dataeval/detectors/drift/__init__.py +2 -2
- dataeval/detectors/drift/_base.py +55 -203
- dataeval/detectors/drift/_cvm.py +19 -30
- dataeval/detectors/drift/_ks.py +18 -30
- dataeval/detectors/drift/_mmd.py +189 -53
- dataeval/detectors/drift/_uncertainty.py +52 -56
- dataeval/detectors/drift/updates.py +13 -12
- dataeval/detectors/linters/duplicates.py +5 -3
- dataeval/detectors/linters/outliers.py +2 -2
- dataeval/detectors/ood/ae.py +1 -1
- dataeval/metrics/bias/__init__.py +11 -1
- dataeval/metrics/bias/_completeness.py +130 -0
- dataeval/metrics/stats/_base.py +28 -32
- dataeval/metrics/stats/_dimensionstats.py +2 -2
- dataeval/metrics/stats/_hashstats.py +2 -2
- dataeval/metrics/stats/_imagestats.py +4 -4
- dataeval/metrics/stats/_labelstats.py +4 -45
- dataeval/metrics/stats/_pixelstats.py +2 -2
- dataeval/metrics/stats/_visualstats.py +2 -2
- dataeval/outputs/__init__.py +2 -1
- dataeval/outputs/_bias.py +31 -22
- dataeval/outputs/_stats.py +2 -3
- dataeval/typing.py +25 -22
- dataeval/utils/_array.py +43 -7
- dataeval/utils/data/_dataset.py +8 -4
- dataeval/utils/data/_embeddings.py +141 -24
- dataeval/utils/data/_images.py +38 -15
- dataeval/utils/data/_metadata.py +5 -4
- dataeval/utils/data/_selection.py +3 -15
- dataeval/utils/data/_split.py +76 -129
- dataeval/utils/data/datasets/_base.py +7 -4
- dataeval/utils/data/datasets/_cifar10.py +9 -9
- dataeval/utils/data/datasets/_milco.py +42 -14
- dataeval/utils/data/datasets/_mnist.py +9 -5
- dataeval/utils/data/datasets/_ships.py +8 -4
- dataeval/utils/data/datasets/_voc.py +40 -19
- dataeval/utils/data/selections/__init__.py +2 -0
- dataeval/utils/data/selections/_classbalance.py +38 -0
- dataeval/utils/data/selections/_classfilter.py +14 -29
- dataeval/utils/data/selections/_prioritize.py +1 -1
- dataeval/utils/data/selections/_shuffle.py +2 -2
- dataeval/utils/metadata.py +1 -1
- dataeval/utils/torch/_internal.py +12 -35
- {dataeval-0.83.0.dist-info → dataeval-0.84.1.dist-info}/METADATA +2 -3
- {dataeval-0.83.0.dist-info → dataeval-0.84.1.dist-info}/RECORD +49 -48
- dataeval/detectors/drift/_torch.py +0 -222
- {dataeval-0.83.0.dist-info → dataeval-0.84.1.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.83.0.dist-info → dataeval-0.84.1.dist-info}/WHEEL +0 -0
@@ -14,6 +14,8 @@ from dataeval.utils.data.datasets._base import (
|
|
14
14
|
BaseODDataset,
|
15
15
|
BaseSegDataset,
|
16
16
|
DataLocation,
|
17
|
+
_TArray,
|
18
|
+
_TTarget,
|
17
19
|
)
|
18
20
|
from dataeval.utils.data.datasets._mixin import BaseDatasetNumpyMixin, BaseDatasetTorchMixin
|
19
21
|
from dataeval.utils.data.datasets._types import ObjectDetectionTarget, SegmentationTarget
|
@@ -21,9 +23,6 @@ from dataeval.utils.data.datasets._types import ObjectDetectionTarget, Segmentat
|
|
21
23
|
if TYPE_CHECKING:
|
22
24
|
from dataeval.typing import Transform
|
23
25
|
|
24
|
-
_TArray = TypeVar("_TArray")
|
25
|
-
_TTarget = TypeVar("_TTarget")
|
26
|
-
|
27
26
|
VOCClassStringMap = Literal[
|
28
27
|
"aeroplane",
|
29
28
|
"bicycle",
|
@@ -121,19 +120,19 @@ class BaseVOCDataset(BaseDataset[_TArray, _TTarget, list[str]]):
|
|
121
120
|
def __init__(
|
122
121
|
self,
|
123
122
|
root: str | Path,
|
124
|
-
year: Literal["2007", "2008", "2009", "2010", "2011", "2012"] = "2012",
|
125
123
|
image_set: Literal["train", "val", "test", "base"] = "train",
|
126
|
-
|
124
|
+
year: Literal["2007", "2008", "2009", "2010", "2011", "2012"] = "2012",
|
127
125
|
transforms: Transform[_TArray] | Sequence[Transform[_TArray]] | None = None,
|
126
|
+
download: bool = False,
|
128
127
|
verbose: bool = False,
|
129
128
|
) -> None:
|
130
129
|
self.year = year
|
131
130
|
self._resource_index = self._get_year_image_set_index(year, image_set)
|
132
131
|
super().__init__(
|
133
132
|
root,
|
134
|
-
download,
|
135
133
|
image_set,
|
136
134
|
transforms,
|
135
|
+
download,
|
137
136
|
verbose,
|
138
137
|
)
|
139
138
|
|
@@ -191,10 +190,14 @@ class BaseVOCDataset(BaseDataset[_TArray, _TTarget, list[str]]):
|
|
191
190
|
for entry in data:
|
192
191
|
file_name = Path(entry).name
|
193
192
|
file_stem = Path(entry).stem
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
193
|
+
if self.year != "2007":
|
194
|
+
# Remove file extension and split by "_"
|
195
|
+
parts = file_stem.split("_")
|
196
|
+
file_meta["year"].append(parts[0])
|
197
|
+
file_meta["image_id"].append(parts[1])
|
198
|
+
else:
|
199
|
+
file_meta["year"].append(self.year)
|
200
|
+
file_meta["image_id"].append(file_stem)
|
198
201
|
file_meta["mask_path"].append(str(seg_folder / file_name))
|
199
202
|
annotations.append(str(ann_folder / file_stem) + ".xml")
|
200
203
|
|
@@ -250,9 +253,6 @@ class VOCDetection(
|
|
250
253
|
----------
|
251
254
|
root : str or pathlib.Path
|
252
255
|
Root directory of dataset where the ``vocdataset`` folder exists.
|
253
|
-
download : bool, default False
|
254
|
-
If True, downloads the dataset from the internet and puts it in root directory.
|
255
|
-
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
256
256
|
image_set : "train", "val", "test", or "base", default "train"
|
257
257
|
If "test", then dataset year must be "2007".
|
258
258
|
If "base", then the combined dataset of "train" and "val" is returned.
|
@@ -260,6 +260,9 @@ class VOCDetection(
|
|
260
260
|
The dataset year.
|
261
261
|
transforms : Transform, Sequence[Transform] or None, default None
|
262
262
|
Transform(s) to apply to the data.
|
263
|
+
download : bool, default False
|
264
|
+
If True, downloads the dataset from the internet and puts it in root directory.
|
265
|
+
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
263
266
|
verbose : bool, default False
|
264
267
|
If True, outputs print statements.
|
265
268
|
|
@@ -267,6 +270,8 @@ class VOCDetection(
|
|
267
270
|
----------
|
268
271
|
path : pathlib.Path
|
269
272
|
Location of the folder containing the data.
|
273
|
+
year : "2007", "2008", "2009", "2010", "2011" or "2012"
|
274
|
+
The selected dataset year.
|
270
275
|
image_set : "train", "val", "test" or "base"
|
271
276
|
The selected image set from the dataset.
|
272
277
|
index2label : dict[int, str]
|
@@ -279,6 +284,10 @@ class VOCDetection(
|
|
279
284
|
The transforms to be applied to the data.
|
280
285
|
size : int
|
281
286
|
The size of the dataset.
|
287
|
+
|
288
|
+
Note
|
289
|
+
----
|
290
|
+
Data License: `Flickr Terms of Use <http://www.flickr.com/terms.gne?legacy=1>`_
|
282
291
|
"""
|
283
292
|
|
284
293
|
|
@@ -294,9 +303,6 @@ class VOCDetectionTorch(
|
|
294
303
|
----------
|
295
304
|
root : str or pathlib.Path
|
296
305
|
Root directory of dataset where the ``vocdataset`` folder exists.
|
297
|
-
download : bool, default False
|
298
|
-
If True, downloads the dataset from the internet and puts it in root directory.
|
299
|
-
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
300
306
|
image_set : "train", "val", "test", or "base", default "train"
|
301
307
|
If "test", then dataset year must be "2007".
|
302
308
|
If "base", then the combined dataset of "train" and "val" is returned.
|
@@ -304,6 +310,9 @@ class VOCDetectionTorch(
|
|
304
310
|
The dataset year.
|
305
311
|
transforms : Transform, Sequence[Transform] or None, default None
|
306
312
|
Transform(s) to apply to the data.
|
313
|
+
download : bool, default False
|
314
|
+
If True, downloads the dataset from the internet and puts it in root directory.
|
315
|
+
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
307
316
|
verbose : bool, default False
|
308
317
|
If True, outputs print statements.
|
309
318
|
|
@@ -311,6 +320,8 @@ class VOCDetectionTorch(
|
|
311
320
|
----------
|
312
321
|
path : pathlib.Path
|
313
322
|
Location of the folder containing the data.
|
323
|
+
year : "2007", "2008", "2009", "2010", "2011" or "2012"
|
324
|
+
The selected dataset year.
|
314
325
|
image_set : "train", "val", "test" or "base"
|
315
326
|
The selected image set from the dataset.
|
316
327
|
index2label : dict[int, str]
|
@@ -323,6 +334,10 @@ class VOCDetectionTorch(
|
|
323
334
|
The transforms to be applied to the data.
|
324
335
|
size : int
|
325
336
|
The size of the dataset.
|
337
|
+
|
338
|
+
Note
|
339
|
+
----
|
340
|
+
Data License: `Flickr Terms of Use <http://www.flickr.com/terms.gne?legacy=1>`_
|
326
341
|
"""
|
327
342
|
|
328
343
|
|
@@ -338,9 +353,6 @@ class VOCSegmentation(
|
|
338
353
|
----------
|
339
354
|
root : str or pathlib.Path
|
340
355
|
Root directory of dataset where the ``vocdataset`` folder exists.
|
341
|
-
download : bool, default False
|
342
|
-
If True, downloads the dataset from the internet and puts it in root directory.
|
343
|
-
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
344
356
|
image_set : "train", "val", "test", or "base", default "train"
|
345
357
|
If "test", then dataset year must be "2007".
|
346
358
|
If "base", then the combined dataset of "train" and "val" is returned.
|
@@ -348,6 +360,9 @@ class VOCSegmentation(
|
|
348
360
|
The dataset year.
|
349
361
|
transforms : Transform, Sequence[Transform] or None, default None
|
350
362
|
Transform(s) to apply to the data.
|
363
|
+
download : bool, default False
|
364
|
+
If True, downloads the dataset from the internet and puts it in root directory.
|
365
|
+
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
351
366
|
verbose : bool, default False
|
352
367
|
If True, outputs print statements.
|
353
368
|
|
@@ -355,6 +370,8 @@ class VOCSegmentation(
|
|
355
370
|
----------
|
356
371
|
path : pathlib.Path
|
357
372
|
Location of the folder containing the data.
|
373
|
+
year : "2007", "2008", "2009", "2010", "2011" or "2012"
|
374
|
+
The selected dataset year.
|
358
375
|
image_set : "train", "val", "test" or "base"
|
359
376
|
The selected image set from the dataset.
|
360
377
|
index2label : dict[int, str]
|
@@ -367,6 +384,10 @@ class VOCSegmentation(
|
|
367
384
|
The transforms to be applied to the data.
|
368
385
|
size : int
|
369
386
|
The size of the dataset.
|
387
|
+
|
388
|
+
Note
|
389
|
+
----
|
390
|
+
Data License: `Flickr Terms of Use <http://www.flickr.com/terms.gne?legacy=1>`_
|
370
391
|
"""
|
371
392
|
|
372
393
|
def _load_data(self) -> tuple[list[str], list[str], dict[str, list[Any]]]:
|
@@ -1,6 +1,7 @@
|
|
1
1
|
"""Provides selection classes for selecting subsets of Computer Vision datasets."""
|
2
2
|
|
3
3
|
__all__ = [
|
4
|
+
"ClassBalance",
|
4
5
|
"ClassFilter",
|
5
6
|
"Indices",
|
6
7
|
"Limit",
|
@@ -9,6 +10,7 @@ __all__ = [
|
|
9
10
|
"Shuffle",
|
10
11
|
]
|
11
12
|
|
13
|
+
from dataeval.utils.data.selections._classbalance import ClassBalance
|
12
14
|
from dataeval.utils.data.selections._classfilter import ClassFilter
|
13
15
|
from dataeval.utils.data.selections._indices import Indices
|
14
16
|
from dataeval.utils.data.selections._limit import Limit
|
@@ -0,0 +1,38 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
__all__ = []
|
4
|
+
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
|
8
|
+
from dataeval.typing import Array, ImageClassificationDatum
|
9
|
+
from dataeval.utils._array import as_numpy
|
10
|
+
from dataeval.utils.data._selection import Select, Selection, SelectionStage
|
11
|
+
|
12
|
+
|
13
|
+
class ClassBalance(Selection[ImageClassificationDatum]):
|
14
|
+
"""
|
15
|
+
Balance the dataset by class.
|
16
|
+
|
17
|
+
Note
|
18
|
+
----
|
19
|
+
The total number of instances of each class will be equalized which may result
|
20
|
+
in a lower total number of instances than specified by the selection limit.
|
21
|
+
"""
|
22
|
+
|
23
|
+
stage = SelectionStage.FILTER
|
24
|
+
|
25
|
+
def __call__(self, dataset: Select[ImageClassificationDatum]) -> None:
|
26
|
+
class_indices: dict[int, list[int]] = {}
|
27
|
+
for i, idx in enumerate(dataset._selection):
|
28
|
+
target = dataset._dataset[idx][1]
|
29
|
+
if isinstance(target, Array):
|
30
|
+
label = int(np.argmax(as_numpy(target)))
|
31
|
+
else:
|
32
|
+
# ObjectDetectionTarget and SegmentationTarget not supported yet
|
33
|
+
raise TypeError("ClassFilter only supports classification targets as an array of confidence scores.")
|
34
|
+
class_indices.setdefault(label, []).append(i)
|
35
|
+
|
36
|
+
per_class_limit = min(min(len(c) for c in class_indices.values()), dataset._size_limit // len(class_indices))
|
37
|
+
subselection = sorted([i for v in class_indices.values() for i in v[:per_class_limit]])
|
38
|
+
dataset._selection = [dataset._selection[i] for i in subselection]
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
__all__ = []
|
4
4
|
|
5
|
-
from typing import Sequence
|
5
|
+
from typing import Sequence
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
|
@@ -10,50 +10,35 @@ from dataeval.typing import Array, ImageClassificationDatum
|
|
10
10
|
from dataeval.utils._array import as_numpy
|
11
11
|
from dataeval.utils.data._selection import Select, Selection, SelectionStage
|
12
12
|
|
13
|
-
TImageClassificationDatum = TypeVar("TImageClassificationDatum", bound=ImageClassificationDatum)
|
14
13
|
|
15
|
-
|
16
|
-
class ClassFilter(Selection[TImageClassificationDatum]):
|
14
|
+
class ClassFilter(Selection[ImageClassificationDatum]):
|
17
15
|
"""
|
18
|
-
Filter
|
16
|
+
Filter the dataset by class.
|
19
17
|
|
20
18
|
Parameters
|
21
19
|
----------
|
22
|
-
classes : Sequence[int]
|
23
|
-
The classes to filter by.
|
24
|
-
balance : bool, default False
|
25
|
-
Whether to balance the classes.
|
26
|
-
|
27
|
-
Note
|
28
|
-
----
|
29
|
-
If `balance` is True, the total number of instances of each class will
|
30
|
-
be equalized. This may result in a lower total number of instances.
|
20
|
+
classes : Sequence[int]
|
21
|
+
The classes to filter by.
|
31
22
|
"""
|
32
23
|
|
33
24
|
stage = SelectionStage.FILTER
|
34
25
|
|
35
|
-
def __init__(self, classes: Sequence[int]
|
26
|
+
def __init__(self, classes: Sequence[int]) -> None:
|
36
27
|
self.classes = classes
|
37
|
-
self.balance = balance
|
38
28
|
|
39
|
-
def __call__(self, dataset: Select[
|
40
|
-
if
|
29
|
+
def __call__(self, dataset: Select[ImageClassificationDatum]) -> None:
|
30
|
+
if not self.classes:
|
41
31
|
return
|
42
32
|
|
43
|
-
|
44
|
-
|
45
|
-
for i, idx in enumerate(dataset._selection):
|
33
|
+
selection = []
|
34
|
+
for idx in dataset._selection:
|
46
35
|
target = dataset._dataset[idx][1]
|
47
36
|
if isinstance(target, Array):
|
48
37
|
label = int(np.argmax(as_numpy(target)))
|
49
38
|
else:
|
50
39
|
# ObjectDetectionTarget and SegmentationTarget not supported yet
|
51
40
|
raise TypeError("ClassFilter only supports classification targets as an array of confidence scores.")
|
52
|
-
if
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
per_class_limit = min(len(c) for c in class_indices.values()) if self.balance else dataset._size_limit
|
58
|
-
subselection = sorted([i for v in class_indices.values() for i in v[:per_class_limit]])
|
59
|
-
dataset._selection = [dataset._selection[i] for i in subselection]
|
41
|
+
if label in self.classes:
|
42
|
+
selection.append(idx)
|
43
|
+
|
44
|
+
dataset._selection = selection
|
@@ -272,7 +272,7 @@ class Prioritize(Selection[Any]):
|
|
272
272
|
return _KMeansComplexitySorter(samples, self._c)
|
273
273
|
|
274
274
|
def _to_normalized_ndarray(self, embeddings: Embeddings, selection: list[int] | None = None) -> NDArray[Any]:
|
275
|
-
emb: NDArray[Any] = embeddings.
|
275
|
+
emb: NDArray[Any] = embeddings.to_numpy(selection)
|
276
276
|
emb /= max(np.max(np.linalg.norm(emb, axis=1)), EPSILON)
|
277
277
|
return emb
|
278
278
|
|
@@ -8,7 +8,7 @@ import numpy as np
|
|
8
8
|
from numpy.random import BitGenerator, Generator, SeedSequence
|
9
9
|
from numpy.typing import NDArray
|
10
10
|
|
11
|
-
from dataeval.typing import Array
|
11
|
+
from dataeval.typing import Array
|
12
12
|
from dataeval.utils._array import as_numpy
|
13
13
|
from dataeval.utils.data._selection import Select, Selection, SelectionStage
|
14
14
|
|
@@ -30,7 +30,7 @@ class Shuffle(Selection[Any]):
|
|
30
30
|
seed: int | NDArray[Any] | SeedSequence | BitGenerator | Generator | None
|
31
31
|
stage = SelectionStage.ORDER
|
32
32
|
|
33
|
-
def __init__(self, seed: int |
|
33
|
+
def __init__(self, seed: int | Sequence[int] | Array | SeedSequence | BitGenerator | Generator | None = None):
|
34
34
|
self.seed = as_numpy(seed) if isinstance(seed, (Sequence, Array)) else seed
|
35
35
|
|
36
36
|
def __call__(self, dataset: Select[Any]) -> None:
|
dataeval/utils/metadata.py
CHANGED
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
__all__ = []
|
4
4
|
|
5
|
-
from functools import partial
|
6
5
|
from typing import Any, Callable
|
7
6
|
|
8
7
|
import numpy as np
|
@@ -12,16 +11,16 @@ from torch.utils.data import DataLoader, TensorDataset
|
|
12
11
|
from tqdm import tqdm
|
13
12
|
|
14
13
|
from dataeval.config import DeviceLike, get_device
|
14
|
+
from dataeval.typing import Array
|
15
15
|
|
16
16
|
|
17
17
|
def predict_batch(
|
18
|
-
x:
|
19
|
-
model:
|
18
|
+
x: Array,
|
19
|
+
model: torch.nn.Module,
|
20
20
|
device: DeviceLike | None = None,
|
21
21
|
batch_size: int = int(1e10),
|
22
22
|
preprocess_fn: Callable[[torch.Tensor], torch.Tensor] | None = None,
|
23
|
-
|
24
|
-
) -> NDArray[Any] | torch.Tensor | tuple[Any, ...]:
|
23
|
+
) -> torch.Tensor:
|
25
24
|
"""
|
26
25
|
Make batch predictions on a model.
|
27
26
|
|
@@ -29,7 +28,7 @@ def predict_batch(
|
|
29
28
|
----------
|
30
29
|
x : np.ndarray | torch.Tensor
|
31
30
|
Batch of instances.
|
32
|
-
model :
|
31
|
+
model : nn.Module
|
33
32
|
PyTorch model.
|
34
33
|
device : DeviceLike or None, default None
|
35
34
|
The hardware device to use if specified, otherwise uses the DataEval
|
@@ -38,21 +37,18 @@ def predict_batch(
|
|
38
37
|
Batch size used during prediction.
|
39
38
|
preprocess_fn : Callable | None, default None
|
40
39
|
Optional preprocessing function for each batch.
|
41
|
-
dtype : np.dtype | torch.dtype, default np.float32
|
42
|
-
Model output type, either a :term:`NumPy` or torch dtype, e.g. np.float32 or torch.float32.
|
43
40
|
|
44
41
|
Returns
|
45
42
|
-------
|
46
|
-
|
47
|
-
|
43
|
+
torch.Tensor
|
44
|
+
PyTorch tensor with model outputs.
|
48
45
|
"""
|
49
46
|
device = get_device(device)
|
50
|
-
if isinstance(
|
51
|
-
|
47
|
+
if isinstance(model, torch.nn.Module):
|
48
|
+
model = model.to(device).eval()
|
49
|
+
x = torch.tensor(x, device=device)
|
52
50
|
n = len(x)
|
53
51
|
n_minibatch = int(np.ceil(n / batch_size))
|
54
|
-
return_np = not isinstance(dtype, torch.dtype)
|
55
|
-
preds_tuple = None
|
56
52
|
preds_array = []
|
57
53
|
with torch.no_grad():
|
58
54
|
for i in range(n_minibatch):
|
@@ -60,28 +56,9 @@ def predict_batch(
|
|
60
56
|
x_batch = x[istart:istop]
|
61
57
|
if isinstance(preprocess_fn, Callable):
|
62
58
|
x_batch = preprocess_fn(x_batch)
|
59
|
+
preds_array.append(model(x_batch.to(dtype=torch.float32)).cpu())
|
63
60
|
|
64
|
-
|
65
|
-
if isinstance(preds_tmp, (list, tuple)):
|
66
|
-
if preds_tuple is None: # init tuple with lists to store predictions
|
67
|
-
preds_tuple = tuple([] for _ in range(len(preds_tmp)))
|
68
|
-
for j, p in enumerate(preds_tmp):
|
69
|
-
p = p.cpu() if isinstance(p, torch.Tensor) else p
|
70
|
-
preds_tuple[j].append(p if not return_np or isinstance(p, np.ndarray) else p.numpy())
|
71
|
-
elif isinstance(preds_tmp, (np.ndarray, torch.Tensor)):
|
72
|
-
preds_tmp = preds_tmp.cpu() if isinstance(preds_tmp, torch.Tensor) else preds_tmp
|
73
|
-
preds_array.append(
|
74
|
-
preds_tmp if not return_np or isinstance(preds_tmp, np.ndarray) else preds_tmp.numpy()
|
75
|
-
)
|
76
|
-
else:
|
77
|
-
raise TypeError(
|
78
|
-
f"Model output type {type(preds_tmp)} not supported. The model \
|
79
|
-
output type needs to be one of list, tuple, NDArray or \
|
80
|
-
torch.Tensor."
|
81
|
-
)
|
82
|
-
concat = partial(np.concatenate, axis=0) if return_np else partial(torch.cat, dim=0)
|
83
|
-
out = tuple(concat(p) for p in preds_tuple) if preds_tuple is not None else concat(preds_array)
|
84
|
-
return out
|
61
|
+
return torch.cat(preds_array, dim=0)
|
85
62
|
|
86
63
|
|
87
64
|
def trainer(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.84.1
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
5
|
Home-page: https://dataeval.ai/
|
6
6
|
License: MIT
|
@@ -82,8 +82,7 @@ using MAITE-compliant datasets and models.
|
|
82
82
|
|
83
83
|
**Python versions:** 3.9 - 3.12
|
84
84
|
|
85
|
-
**Supported packages**: *NumPy*, *Pandas*, *Sci-kit learn*, *MAITE*, *NRTK
|
86
|
-
*Gradient*
|
85
|
+
**Supported packages**: *NumPy*, *Pandas*, *Sci-kit learn*, *MAITE*, *NRTK*
|
87
86
|
|
88
87
|
Choose your preferred method of installation below or follow our
|
89
88
|
[installation guide](https://dataeval.readthedocs.io/en/v0.74.2/installation.html).
|
@@ -1,20 +1,19 @@
|
|
1
|
-
dataeval/__init__.py,sha256=
|
1
|
+
dataeval/__init__.py,sha256=QzrctVrymZuLN8tnHcF1wp0RTXYM3WSWMozX3NOzIos,1636
|
2
2
|
dataeval/_log.py,sha256=Mn5bRWO0cgtAYd5VGYSFiPgu57ta3zoktrtHAZ1m3dU,357
|
3
|
-
dataeval/config.py,sha256=
|
3
|
+
dataeval/config.py,sha256=lD1YDH8HosFeRU5rQEYRBcmXMZy-csWaMlJTRZGd9iU,3582
|
4
4
|
dataeval/detectors/__init__.py,sha256=3Sg-XWlwr75zEEH3hZKA4nWMtGvaRlnfzTWvZG_Ak6U,189
|
5
|
-
dataeval/detectors/drift/__init__.py,sha256=
|
6
|
-
dataeval/detectors/drift/_base.py,sha256=
|
7
|
-
dataeval/detectors/drift/_cvm.py,sha256=
|
8
|
-
dataeval/detectors/drift/_ks.py,sha256
|
9
|
-
dataeval/detectors/drift/_mmd.py,sha256=
|
10
|
-
dataeval/detectors/drift/
|
11
|
-
dataeval/detectors/drift/
|
12
|
-
dataeval/detectors/drift/updates.py,sha256=Btu2iaZW7fbO59G1w5v3ykFot0YPzy2U6VjF0d440VE,2195
|
5
|
+
dataeval/detectors/drift/__init__.py,sha256=gD8aY5PotS-S2ot7iB_z_zzSOjIbQLw5znFBNj0jtHE,646
|
6
|
+
dataeval/detectors/drift/_base.py,sha256=PdWyEuYqExFdyxvyOh7Q8yXnjNm0D3KfpDUn0bUixtY,7580
|
7
|
+
dataeval/detectors/drift/_cvm.py,sha256=CSEyNN9u1MzUI6QmCSlexTUSlHzK1kYh36Nv2L72WbY,3016
|
8
|
+
dataeval/detectors/drift/_ks.py,sha256=ifFb_0JcyykJyF9DAVkQqWCXc-3aA0AC8c8to_oOPKo,3198
|
9
|
+
dataeval/detectors/drift/_mmd.py,sha256=DOHBNyNNxosR67yM9HTxbvqp1IZ8_KSvTVlX-JtKtjM,11601
|
10
|
+
dataeval/detectors/drift/_uncertainty.py,sha256=BHlykJ-r7TGLJxdPfoazXnoAJ1qVDzbk5HjAMdsnHz8,5847
|
11
|
+
dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
|
13
12
|
dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
|
14
|
-
dataeval/detectors/linters/duplicates.py,sha256=
|
15
|
-
dataeval/detectors/linters/outliers.py,sha256=
|
13
|
+
dataeval/detectors/linters/duplicates.py,sha256=x36-0EAlO_AuOttvElJOZCa0R3VzrlII0NxjwhdkrpE,4969
|
14
|
+
dataeval/detectors/linters/outliers.py,sha256=Z0Sbtluu2im1IRGsjhXF2AhrShKDrEkF8BWzAZyPwlA,9054
|
16
15
|
dataeval/detectors/ood/__init__.py,sha256=juCYBDs7CQEAtMhnEpPqF6uTrOIH9kTBSuQ_GRw6a8o,283
|
17
|
-
dataeval/detectors/ood/ae.py,sha256=
|
16
|
+
dataeval/detectors/ood/ae.py,sha256=fTrUfFxv6xUqzKpwMC8rW3JrizA16M_bgzqLuBKMrS0,2944
|
18
17
|
dataeval/detectors/ood/base.py,sha256=9b-Ljznf0lB1SXF4F_Aj3eJ4Y3ijGEDPMjucUsWOGJM,3051
|
19
18
|
dataeval/detectors/ood/mixin.py,sha256=0_o-1HPvgf3-Lf1MSOIfjj5UB8LTLEBGYtJJfyCCzwc,5431
|
20
19
|
dataeval/detectors/ood/vae.py,sha256=Fcq0-WbLhzYCgYOAJPBklHm7yuXmFJuEpBkhgwM5kiA,2291
|
@@ -23,8 +22,9 @@ dataeval/metadata/_distance.py,sha256=xsXMMg1pJkHcEZ-KIlqv9YOGYVID3ELjt3-fr1QVnO
|
|
23
22
|
dataeval/metadata/_ood.py,sha256=HbS5MusWl62hjixUAd-xaaT0KXkYY1M-MlnUaAI_-8M,12751
|
24
23
|
dataeval/metadata/_utils.py,sha256=r8qBJT83RblobD5W5zyTVi6vYi51Dwkqswizdbzss-M,1169
|
25
24
|
dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
|
26
|
-
dataeval/metrics/bias/__init__.py,sha256=
|
25
|
+
dataeval/metrics/bias/__init__.py,sha256=329S1_3WnWqeU4-qVcbe0fMy4lDrj9uKslWHIQf93yg,839
|
27
26
|
dataeval/metrics/bias/_balance.py,sha256=UnUgbPk2ybFfS5qxv8e_uim7RxamWj0UQP71x3omGs0,6158
|
27
|
+
dataeval/metrics/bias/_completeness.py,sha256=BysXU2Jpw33n5dl3acJFEqF3mFGiJLsfG4n5Q2fkTaY,4608
|
28
28
|
dataeval/metrics/bias/_coverage.py,sha256=PeUoOiaghUEdn6Ov8z2-am7-fnBVIPcFbJK7Ty5JObA,3647
|
29
29
|
dataeval/metrics/bias/_diversity.py,sha256=U_l4oYjH39rON2Io0BdCIwJxxob0cKTW8bZNufG0CWs,5820
|
30
30
|
dataeval/metrics/bias/_parity.py,sha256=8JRZv4wLpxN9zTvMDlcpKgz-2nO-9eVjqccODcf2nbw,11535
|
@@ -34,29 +34,29 @@ dataeval/metrics/estimators/_clusterer.py,sha256=1HrpihGTJ63IkNSOy4Ibw633Gllkm1R
|
|
34
34
|
dataeval/metrics/estimators/_divergence.py,sha256=QDWl1lyAYoO9D3Ho7qOHSk6ud8Gi2MGuXEsYwO1HxvA,4043
|
35
35
|
dataeval/metrics/estimators/_uap.py,sha256=BULEBbJ9BQ1IcTeZf0x7iI60QHAWCccBOM97FIu9VXA,1928
|
36
36
|
dataeval/metrics/stats/__init__.py,sha256=6tA_9nbbM5ObJ6cds8Y1VBtTQiTOxrpGQSFLu_lWGGA,1098
|
37
|
-
dataeval/metrics/stats/_base.py,sha256=
|
37
|
+
dataeval/metrics/stats/_base.py,sha256=YIfOVGd7E19B4dpAnzDYRQkaikvRRyJIpznJNfVtPdw,10750
|
38
38
|
dataeval/metrics/stats/_boxratiostats.py,sha256=8Kd2FTZ5PLNYZfdAjU_R385gb0Z16JY0L9H_d5ZhgQs,6341
|
39
|
-
dataeval/metrics/stats/_dimensionstats.py,sha256=
|
40
|
-
dataeval/metrics/stats/_hashstats.py,sha256=
|
41
|
-
dataeval/metrics/stats/_imagestats.py,sha256=
|
42
|
-
dataeval/metrics/stats/_labelstats.py,sha256=
|
43
|
-
dataeval/metrics/stats/_pixelstats.py,sha256=
|
44
|
-
dataeval/metrics/stats/_visualstats.py,sha256=
|
45
|
-
dataeval/outputs/__init__.py,sha256=
|
39
|
+
dataeval/metrics/stats/_dimensionstats.py,sha256=73mFP-Myxne0peFliwvTntc0kk4cpq0krzMvSLDSIMM,2702
|
40
|
+
dataeval/metrics/stats/_hashstats.py,sha256=gp9X_pnTT3mPH9YNrWLdn2LQPK_epJ3dQRoyOCwmKlg,4758
|
41
|
+
dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSnlR_7guCg,3007
|
42
|
+
dataeval/metrics/stats/_labelstats.py,sha256=WbvXZ831a5BDfm58HF8Z8i5JUV1tgw7tcfzUh8pOXSo,2825
|
43
|
+
dataeval/metrics/stats/_pixelstats.py,sha256=SfergRbjNJE4h0xqe-0c8RnKtZmEkZ9MwExdipLSGvg,3247
|
44
|
+
dataeval/metrics/stats/_visualstats.py,sha256=cq4AbF2B50Ihbzb86FphcnKQ1TSwNnP3PsnbpiPQZWw,3698
|
45
|
+
dataeval/outputs/__init__.py,sha256=ciK-RdXgtn_s7MSCUW1UXvrXltMbltqbpfe9_V7xGrI,1701
|
46
46
|
dataeval/outputs/_base.py,sha256=aZFbgybnZSQ3ws7QYRLTbDFqUfBFRVtIwX2LZfeGFUA,5703
|
47
|
-
dataeval/outputs/_bias.py,sha256=
|
47
|
+
dataeval/outputs/_bias.py,sha256=GwbjLdppUODOeudYb_7ki2ejDmAYthlRKGijVwgVePE,12407
|
48
48
|
dataeval/outputs/_drift.py,sha256=gOiu2C-ERTWiRqlP0auMYxPBGdm9HecWPqWfg7I4tZg,2015
|
49
49
|
dataeval/outputs/_estimators.py,sha256=a2oAIxxEDZ9WLGfMWH8KD-BVUS_SnULRPR-iI9hFPoQ,3047
|
50
50
|
dataeval/outputs/_linters.py,sha256=YOdjrfm8ypdRrqYOaPM9nc6wVJI3-ita3Haj7LHDNaw,6416
|
51
51
|
dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI,1710
|
52
52
|
dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
|
53
|
-
dataeval/outputs/_stats.py,sha256=
|
53
|
+
dataeval/outputs/_stats.py,sha256=c73Yc3Kkrl-MN6BGKe1V0Yr6Ix2Yp_DZZfFSp8fZMZ0,13180
|
54
54
|
dataeval/outputs/_utils.py,sha256=HHlGC7sk416m_3Bgn075Qdblz_aPup_UOafJpB0RuXY,893
|
55
55
|
dataeval/outputs/_workflows.py,sha256=MkRD6ubI4NCBXb9v3kjXy64cUGs3G-JKkBdOpRD9XVE,10750
|
56
56
|
dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
57
|
-
dataeval/typing.py,sha256=
|
57
|
+
dataeval/typing.py,sha256=zn6smomSdcO7EeZpeeSP5-8sknTdgUuU7TKe-3UFVrg,6550
|
58
58
|
dataeval/utils/__init__.py,sha256=T8F8zJh4ZAeu0wDzfpld92I2zJg9mWBmkGCHrDPU7gk,264
|
59
|
-
dataeval/utils/_array.py,sha256=
|
59
|
+
dataeval/utils/_array.py,sha256=KqAdXEMjcXYvdWdYEEoEbigwQJ4S9VYxQS3sRFeY5XY,5929
|
60
60
|
dataeval/utils/_bin.py,sha256=nylthmsC3vzLHLhlUMACvZs--h7xvAh9Pt75InaQJW8,7322
|
61
61
|
dataeval/utils/_clusterer.py,sha256=fw5x-2QN0TIbiodDKHZxRgxKHINedpPcOklzce0Rbjg,5436
|
62
62
|
dataeval/utils/_fast_mst.py,sha256=4_7ykVihCL5jWtxcGnrecIsDQo65kUml9SZ1JxgBZYY,7172
|
@@ -65,41 +65,42 @@ dataeval/utils/_method.py,sha256=9B9JQbgqWJBRhQJb7glajUtWaQzUTIUuvrZ9_bisxsM,394
|
|
65
65
|
dataeval/utils/_mst.py,sha256=f0vXytTUjlOS6AyL7c6PkXmaHuuGUK-vMLpq-5xMgxk,2183
|
66
66
|
dataeval/utils/_plot.py,sha256=mTRQNbJsA42QMiOwZbJaH8sNYgP996QFDEGVVE9HSgY,7076
|
67
67
|
dataeval/utils/data/__init__.py,sha256=vldQ2ZXl8gnI3s4vAGqUUVi6dc_R58F3JMSpbCOyFRI,820
|
68
|
-
dataeval/utils/data/_dataset.py,sha256=
|
69
|
-
dataeval/utils/data/_embeddings.py,sha256=
|
70
|
-
dataeval/utils/data/_images.py,sha256=
|
71
|
-
dataeval/utils/data/_metadata.py,sha256=
|
72
|
-
dataeval/utils/data/_selection.py,sha256=
|
73
|
-
dataeval/utils/data/_split.py,sha256=
|
68
|
+
dataeval/utils/data/_dataset.py,sha256=MHY582yRm4FxQkkLWUhKZBb7ZyvWypM6ldUG89vd3uE,7936
|
69
|
+
dataeval/utils/data/_embeddings.py,sha256=iDtfLJY1uHoTP4UdQoOt-3wopc6kSOXH_4CVNnmXXA4,8356
|
70
|
+
dataeval/utils/data/_images.py,sha256=WF9XJRka8ohUdyI2IKBMAy3JoJhOm1iC-8tbYl8woRM,2642
|
71
|
+
dataeval/utils/data/_metadata.py,sha256=62z_qHjoGjiMdpuT36QpNhbWy2UClHWUcjHHlIWT470,14464
|
72
|
+
dataeval/utils/data/_selection.py,sha256=2c6DjyeDIJapbI7xL36eBxFnJHIP8Yxt3oU3rBGMqLk,3948
|
73
|
+
dataeval/utils/data/_split.py,sha256=q-2RwllJgazwuyxB_GoBqK_nLkqIjyTVr2SQKj_7lhw,16767
|
74
74
|
dataeval/utils/data/_targets.py,sha256=ws5d9wRiDkIuOV7GSAKNxzgSm6AWTgb0BFroQK5nAmM,3057
|
75
75
|
dataeval/utils/data/collate.py,sha256=Z5nmBnWV_IoJzMp_tj8RCKjMJA9sSCY_zZITqISGixc,3865
|
76
76
|
dataeval/utils/data/datasets/__init__.py,sha256=jBrswiERrvBx4pJQJZIq_B5UE-Wy8a2_SBfM2crG8R8,511
|
77
|
-
dataeval/utils/data/datasets/_base.py,sha256=
|
78
|
-
dataeval/utils/data/datasets/_cifar10.py,sha256=
|
77
|
+
dataeval/utils/data/datasets/_base.py,sha256=827nSVhZ-tqeHw1HQ7Qj060CSDd90fEWZomN6FaWnQA,8872
|
78
|
+
dataeval/utils/data/datasets/_cifar10.py,sha256=R7QgcCHowAkqhEXOvUhybXTmMlA4BJXkTuAeV9uDgfU,5449
|
79
79
|
dataeval/utils/data/datasets/_fileio.py,sha256=SixIk5nIlIwJdX9zjNXS10vHA3hL8aaYbqHsDg1xSpY,6447
|
80
|
-
dataeval/utils/data/datasets/_milco.py,sha256=
|
80
|
+
dataeval/utils/data/datasets/_milco.py,sha256=bVVDl5W8TdTPU2RiwoPXrfFDM1rKyb-LslwTThBXEr0,7583
|
81
81
|
dataeval/utils/data/datasets/_mixin.py,sha256=FJgZP_cpJkgAHA3j3ai_j3Wt7aFSEjIMVmt9NpvVXzg,1757
|
82
|
-
dataeval/utils/data/datasets/_mnist.py,sha256=
|
83
|
-
dataeval/utils/data/datasets/_ships.py,sha256=
|
82
|
+
dataeval/utils/data/datasets/_mnist.py,sha256=kNDJw0oyqa6QgU1y9lg-3AzStavK1BB8iHnDOdv9nyE,8112
|
83
|
+
dataeval/utils/data/datasets/_ships.py,sha256=rsyIoRAIk40liFgaEb2dg0lYB7__bAGd9zh9ouzjFKg,4880
|
84
84
|
dataeval/utils/data/datasets/_types.py,sha256=iSKyHXRlGuomXs0FHK6md8lXLQrQQ4fxgVOwr4o81bo,1089
|
85
|
-
dataeval/utils/data/datasets/_voc.py,sha256=
|
86
|
-
dataeval/utils/data/selections/__init__.py,sha256=
|
87
|
-
dataeval/utils/data/selections/
|
85
|
+
dataeval/utils/data/datasets/_voc.py,sha256=QUtpbh2EpiBoicsmOo-YIfwRwPXyHj-zB2hFn7tlz0Y,15580
|
86
|
+
dataeval/utils/data/selections/__init__.py,sha256=iUbMZRDuBXwY3SNAtZTdCVu7SI4zbCyaL6ItXnnq1yI,655
|
87
|
+
dataeval/utils/data/selections/_classbalance.py,sha256=hHq9frdwzFLCUmfeJq977Sot_SXhuGANlSsetokhRDc,1465
|
88
|
+
dataeval/utils/data/selections/_classfilter.py,sha256=xdR5uX7W5Yivf-mE_CikbRi2fGrZLFrPYun3TeQHTA0,1267
|
88
89
|
dataeval/utils/data/selections/_indices.py,sha256=QdLgXN7GABCvGPYe28PV1RAc_RSP_nZOyCvEpKRBdWg,636
|
89
90
|
dataeval/utils/data/selections/_limit.py,sha256=ECvHRsp7OF4LZw2tE4sGqqJ085kjC-hd2c7QDMfvXr8,518
|
90
|
-
dataeval/utils/data/selections/_prioritize.py,sha256=
|
91
|
+
dataeval/utils/data/selections/_prioritize.py,sha256=uRQjeQiAc-vvwHMH4CQtXTGJCfjj_h5dgGlhQYFMz1c,11318
|
91
92
|
dataeval/utils/data/selections/_reverse.py,sha256=6SWpELC9Wgx-kPqzhDrPNn4NKU6FqDJveLrxV4D2Ypk,374
|
92
|
-
dataeval/utils/data/selections/_shuffle.py,sha256=
|
93
|
-
dataeval/utils/metadata.py,sha256=
|
93
|
+
dataeval/utils/data/selections/_shuffle.py,sha256=_jwms0qcwrknf2Fx84cCXyNOJyhE_V8rcnDOTDn1S2A,1179
|
94
|
+
dataeval/utils/metadata.py,sha256=1XeGYj_e97-nJ_IrWEHPhWICmouYU5qbXWbp7uhZrIE,14171
|
94
95
|
dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
|
95
96
|
dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
|
96
97
|
dataeval/utils/torch/_gmm.py,sha256=XM68GNEP97EjaB1U49-ZXRb81d0CEFnPS910alrcB3g,3740
|
97
|
-
dataeval/utils/torch/_internal.py,sha256=
|
98
|
+
dataeval/utils/torch/_internal.py,sha256=vHy-DzPhmvE8h3wmWc3aciBJ8nDGzQ1z1jTZgGjmDyM,4154
|
98
99
|
dataeval/utils/torch/models.py,sha256=hmroEs6C6jQ5tAoZa71RFeIvXLxfXrTJSFH_jG2LGQU,9749
|
99
100
|
dataeval/utils/torch/trainer.py,sha256=iUotX4OdirH8-ZtjdpU8gbJavkYW9YY9qpA2mAlFy1Y,5520
|
100
101
|
dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
|
101
102
|
dataeval/workflows/sufficiency.py,sha256=mjKmfRrAjShLUFIARv5o8yT5fnFvDsS5Qu6ujIPUgQg,8497
|
102
|
-
dataeval-0.
|
103
|
-
dataeval-0.
|
104
|
-
dataeval-0.
|
105
|
-
dataeval-0.
|
103
|
+
dataeval-0.84.1.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
|
104
|
+
dataeval-0.84.1.dist-info/METADATA,sha256=F7L5PSWHV3z0_4pwA-JSgucW2A4bEv_dtvIMzCTGLZ8,5308
|
105
|
+
dataeval-0.84.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
106
|
+
dataeval-0.84.1.dist-info/RECORD,,
|