dataeval 0.84.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +1 -1
- dataeval/data/__init__.py +19 -0
- dataeval/data/_embeddings.py +345 -0
- dataeval/{utils/data → data}/_images.py +2 -2
- dataeval/{utils/data → data}/_metadata.py +8 -7
- dataeval/{utils/data → data}/_selection.py +22 -9
- dataeval/{utils/data → data}/_split.py +1 -1
- dataeval/data/selections/__init__.py +19 -0
- dataeval/data/selections/_classbalance.py +37 -0
- dataeval/data/selections/_classfilter.py +109 -0
- dataeval/{utils/data → data}/selections/_indices.py +1 -1
- dataeval/{utils/data → data}/selections/_limit.py +1 -1
- dataeval/{utils/data → data}/selections/_prioritize.py +3 -3
- dataeval/{utils/data → data}/selections/_reverse.py +1 -1
- dataeval/{utils/data → data}/selections/_shuffle.py +3 -3
- dataeval/detectors/drift/__init__.py +2 -2
- dataeval/detectors/drift/_base.py +55 -203
- dataeval/detectors/drift/_cvm.py +19 -30
- dataeval/detectors/drift/_ks.py +18 -30
- dataeval/detectors/drift/_mmd.py +189 -53
- dataeval/detectors/drift/_uncertainty.py +52 -56
- dataeval/detectors/drift/updates.py +13 -12
- dataeval/detectors/linters/duplicates.py +6 -4
- dataeval/detectors/linters/outliers.py +3 -3
- dataeval/detectors/ood/ae.py +1 -1
- dataeval/metadata/_distance.py +1 -1
- dataeval/metadata/_ood.py +4 -4
- dataeval/metrics/bias/_balance.py +1 -1
- dataeval/metrics/bias/_diversity.py +1 -1
- dataeval/metrics/bias/_parity.py +1 -1
- dataeval/metrics/stats/_base.py +7 -7
- dataeval/metrics/stats/_dimensionstats.py +2 -2
- dataeval/metrics/stats/_hashstats.py +2 -2
- dataeval/metrics/stats/_imagestats.py +4 -4
- dataeval/metrics/stats/_labelstats.py +2 -2
- dataeval/metrics/stats/_pixelstats.py +2 -2
- dataeval/metrics/stats/_visualstats.py +2 -2
- dataeval/outputs/_bias.py +1 -1
- dataeval/typing.py +53 -19
- dataeval/utils/__init__.py +2 -2
- dataeval/utils/_array.py +18 -7
- dataeval/utils/data/__init__.py +5 -20
- dataeval/utils/data/_dataset.py +6 -4
- dataeval/utils/data/collate.py +2 -0
- dataeval/utils/datasets/__init__.py +17 -0
- dataeval/utils/{data/datasets → datasets}/_base.py +10 -7
- dataeval/utils/{data/datasets → datasets}/_cifar10.py +11 -11
- dataeval/utils/{data/datasets → datasets}/_milco.py +44 -16
- dataeval/utils/{data/datasets → datasets}/_mnist.py +11 -7
- dataeval/utils/{data/datasets → datasets}/_ships.py +10 -6
- dataeval/utils/{data/datasets → datasets}/_voc.py +43 -22
- dataeval/utils/torch/_internal.py +12 -35
- {dataeval-0.84.0.dist-info → dataeval-1.0.0.dist-info}/METADATA +2 -3
- dataeval-1.0.0.dist-info/RECORD +107 -0
- dataeval/detectors/drift/_torch.py +0 -222
- dataeval/utils/data/_embeddings.py +0 -186
- dataeval/utils/data/datasets/__init__.py +0 -17
- dataeval/utils/data/selections/__init__.py +0 -17
- dataeval/utils/data/selections/_classfilter.py +0 -59
- dataeval-0.84.0.dist-info/RECORD +0 -106
- /dataeval/{utils/data → data}/_targets.py +0 -0
- /dataeval/utils/{metadata.py → data/metadata.py} +0 -0
- /dataeval/utils/{data/datasets → datasets}/_fileio.py +0 -0
- /dataeval/utils/{data/datasets → datasets}/_mixin.py +0 -0
- /dataeval/utils/{data/datasets → datasets}/_types.py +0 -0
- {dataeval-0.84.0.dist-info → dataeval-1.0.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.84.0.dist-info → dataeval-1.0.0.dist-info}/WHEEL +0 -0
dataeval/utils/data/_dataset.py
CHANGED
@@ -52,10 +52,12 @@ def _validate_data(
|
|
52
52
|
|
53
53
|
|
54
54
|
def _find_max(arr: ArrayLike) -> Any:
|
55
|
-
if isinstance(arr
|
56
|
-
|
57
|
-
|
58
|
-
|
55
|
+
if isinstance(arr, (Iterable, Sequence, Array)):
|
56
|
+
if isinstance(arr[0], (Iterable, Sequence, Array)):
|
57
|
+
return max([_find_max(x) for x in arr]) # type: ignore
|
58
|
+
else:
|
59
|
+
return max(arr)
|
60
|
+
return arr
|
59
61
|
|
60
62
|
|
61
63
|
_TLabels = TypeVar("_TLabels", Sequence[int], Sequence[Sequence[int]])
|
dataeval/utils/data/collate.py
CHANGED
@@ -4,6 +4,8 @@ Collate functions used with a PyTorch DataLoader to load data from MAITE complia
|
|
4
4
|
|
5
5
|
from __future__ import annotations
|
6
6
|
|
7
|
+
__all__ = ["list_collate_fn", "numpy_collate_fn", "torch_collate_fn"]
|
8
|
+
|
7
9
|
from typing import Any, Iterable, Sequence, TypeVar
|
8
10
|
|
9
11
|
import numpy as np
|
@@ -0,0 +1,17 @@
|
|
1
|
+
"""Provides access to common Computer Vision datasets."""
|
2
|
+
|
3
|
+
from dataeval.utils.datasets._cifar10 import CIFAR10
|
4
|
+
from dataeval.utils.datasets._milco import MILCO
|
5
|
+
from dataeval.utils.datasets._mnist import MNIST
|
6
|
+
from dataeval.utils.datasets._ships import Ships
|
7
|
+
from dataeval.utils.datasets._voc import VOCDetection, VOCDetectionTorch, VOCSegmentation
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
"MNIST",
|
11
|
+
"Ships",
|
12
|
+
"CIFAR10",
|
13
|
+
"MILCO",
|
14
|
+
"VOCDetection",
|
15
|
+
"VOCDetectionTorch",
|
16
|
+
"VOCSegmentation",
|
17
|
+
]
|
@@ -6,9 +6,9 @@ from abc import abstractmethod
|
|
6
6
|
from pathlib import Path
|
7
7
|
from typing import TYPE_CHECKING, Any, Generic, Iterator, Literal, NamedTuple, Sequence, TypeVar
|
8
8
|
|
9
|
-
from dataeval.utils.
|
10
|
-
from dataeval.utils.
|
11
|
-
from dataeval.utils.
|
9
|
+
from dataeval.utils.datasets._fileio import _ensure_exists
|
10
|
+
from dataeval.utils.datasets._mixin import BaseDatasetMixin
|
11
|
+
from dataeval.utils.datasets._types import (
|
12
12
|
AnnotatedDataset,
|
13
13
|
DatasetMetadata,
|
14
14
|
ImageClassificationDataset,
|
@@ -19,9 +19,12 @@ from dataeval.utils.data.datasets._types import (
|
|
19
19
|
)
|
20
20
|
|
21
21
|
if TYPE_CHECKING:
|
22
|
-
from dataeval.typing import Transform
|
22
|
+
from dataeval.typing import Array, Transform
|
23
|
+
|
24
|
+
_TArray = TypeVar("_TArray", bound=Array)
|
25
|
+
else:
|
26
|
+
_TArray = TypeVar("_TArray")
|
23
27
|
|
24
|
-
_TArray = TypeVar("_TArray")
|
25
28
|
_TTarget = TypeVar("_TTarget")
|
26
29
|
_TRawTarget = TypeVar("_TRawTarget", list[int], list[str])
|
27
30
|
|
@@ -51,9 +54,9 @@ class BaseDataset(AnnotatedDataset[tuple[_TArray, _TTarget, dict[str, Any]]], Ge
|
|
51
54
|
def __init__(
|
52
55
|
self,
|
53
56
|
root: str | Path,
|
54
|
-
|
55
|
-
image_set: Literal["train", "val", "test", "base"] = "train",
|
57
|
+
image_set: Literal["train", "val", "test", "operational", "base"] = "train",
|
56
58
|
transforms: Transform[_TArray] | Sequence[Transform[_TArray]] | None = None,
|
59
|
+
download: bool = False,
|
57
60
|
verbose: bool = False,
|
58
61
|
) -> None:
|
59
62
|
self._root: Path = root.absolute() if isinstance(root, Path) else Path(root).absolute()
|
@@ -9,8 +9,8 @@ import numpy as np
|
|
9
9
|
from numpy.typing import NDArray
|
10
10
|
from PIL import Image
|
11
11
|
|
12
|
-
from dataeval.utils.
|
13
|
-
from dataeval.utils.
|
12
|
+
from dataeval.utils.datasets._base import BaseICDataset, DataLocation
|
13
|
+
from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
|
14
14
|
|
15
15
|
if TYPE_CHECKING:
|
16
16
|
from dataeval.typing import Transform
|
@@ -27,13 +27,13 @@ class CIFAR10(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
27
27
|
----------
|
28
28
|
root : str or pathlib.Path
|
29
29
|
Root directory of dataset where the ``mnist`` folder exists.
|
30
|
-
download : bool, default False
|
31
|
-
If True, downloads the dataset from the internet and puts it in root directory.
|
32
|
-
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
33
30
|
image_set : "train", "test" or "base", default "train"
|
34
31
|
If "base", returns all of the data to allow the user to create their own splits.
|
35
32
|
transforms : Transform, Sequence[Transform] or None, default None
|
36
33
|
Transform(s) to apply to the data.
|
34
|
+
download : bool, default False
|
35
|
+
If True, downloads the dataset from the internet and puts it in root directory.
|
36
|
+
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
37
37
|
verbose : bool, default False
|
38
38
|
If True, outputs print statements.
|
39
39
|
|
@@ -43,16 +43,16 @@ class CIFAR10(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
43
43
|
Location of the folder containing the data.
|
44
44
|
image_set : "train", "test" or "base"
|
45
45
|
The selected image set from the dataset.
|
46
|
+
transforms : Sequence[Transform]
|
47
|
+
The transforms to be applied to the data.
|
48
|
+
size : int
|
49
|
+
The size of the dataset.
|
46
50
|
index2label : dict[int, str]
|
47
51
|
Dictionary which translates from class integers to the associated class strings.
|
48
52
|
label2index : dict[str, int]
|
49
53
|
Dictionary which translates from class strings to the associated class integers.
|
50
54
|
metadata : DatasetMetadata
|
51
55
|
Typed dictionary containing dataset metadata, such as `id` which returns the dataset class name.
|
52
|
-
transforms : Sequence[Transform]
|
53
|
-
The transforms to be applied to the data.
|
54
|
-
size : int
|
55
|
-
The size of the dataset.
|
56
56
|
"""
|
57
57
|
|
58
58
|
_resources = [
|
@@ -80,16 +80,16 @@ class CIFAR10(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
80
80
|
def __init__(
|
81
81
|
self,
|
82
82
|
root: str | Path,
|
83
|
-
download: bool = False,
|
84
83
|
image_set: Literal["train", "test", "base"] = "train",
|
85
84
|
transforms: Transform[NDArray[Any]] | Sequence[Transform[NDArray[Any]]] | None = None,
|
85
|
+
download: bool = False,
|
86
86
|
verbose: bool = False,
|
87
87
|
) -> None:
|
88
88
|
super().__init__(
|
89
89
|
root,
|
90
|
-
download,
|
91
90
|
image_set,
|
92
91
|
transforms,
|
92
|
+
download,
|
93
93
|
verbose,
|
94
94
|
)
|
95
95
|
|
@@ -3,12 +3,12 @@ from __future__ import annotations
|
|
3
3
|
__all__ = []
|
4
4
|
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import TYPE_CHECKING, Any, Sequence
|
6
|
+
from typing import TYPE_CHECKING, Any, Literal, Sequence
|
7
7
|
|
8
8
|
from numpy.typing import NDArray
|
9
9
|
|
10
|
-
from dataeval.utils.
|
11
|
-
from dataeval.utils.
|
10
|
+
from dataeval.utils.datasets._base import BaseODDataset, DataLocation
|
11
|
+
from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
|
12
12
|
|
13
13
|
if TYPE_CHECKING:
|
14
14
|
from dataeval.typing import Transform
|
@@ -16,21 +16,20 @@ if TYPE_CHECKING:
|
|
16
16
|
|
17
17
|
class MILCO(BaseODDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
18
18
|
"""
|
19
|
-
A side-scan sonar dataset focused on mine
|
19
|
+
A side-scan sonar dataset focused on mine-like object detection.
|
20
20
|
|
21
21
|
The dataset comes from the paper
|
22
22
|
`Side-scan sonar imaging data of underwater vehicles for mine detection <https://doi.org/10.1016/j.dib.2024.110132>`_
|
23
23
|
by N.P. Santos et. al. (2024).
|
24
24
|
|
25
|
-
This class only accesses a portion of the above dataset due to size constraints.
|
26
25
|
The full dataset contains 1170 side-scan sonar images collected using a 900-1800 kHz Marine Sonic
|
27
26
|
dual frequency side-scan sonar of a Teledyne Marine Gavia Autonomous Underwater Vehicle.
|
28
27
|
All the images were carefully analyzed and annotated, including the image coordinates of the
|
29
28
|
Bounding Box (BB) of the detected objects divided into NOn-Mine-like BOttom Objects (NOMBO)
|
30
29
|
and MIne-Like COntacts (MILCO) classes.
|
31
30
|
|
32
|
-
This dataset is consists of
|
33
|
-
In these
|
31
|
+
This dataset is consists of 345 images from 2010, 120 images from 2015, 93 images from 2017, 564 images from 2018,
|
32
|
+
and 48 images from 2021). In these 1170 images, there are 432 MILCO objects, and 235 NOMBO objects.
|
34
33
|
The class “0” corresponds to a MILCO object and the class “1” corresponds to a NOMBO object.
|
35
34
|
The raw BB coordinates provided in the downloaded text files are (x, y, w, h),
|
36
35
|
given as percentages of the image (x_BB = x/img_width, y_BB = y/img_height, etc.).
|
@@ -40,11 +39,17 @@ class MILCO(BaseODDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
40
39
|
----------
|
41
40
|
root : str or pathlib.Path
|
42
41
|
Root directory of dataset where the ``milco`` folder exists.
|
42
|
+
image_set: "train", "operational", or "base", default "train"
|
43
|
+
If "train", then the images from 2015, 2017 and 2021 are selected,
|
44
|
+
resulting in 315 MILCO objects and 177 NOMBO objects.
|
45
|
+
If "operational", then the images from 2010 and 2018 are selected,
|
46
|
+
resulting in 117 MILCO objects and 58 NOMBO objects.
|
47
|
+
If "base", then the full dataset is selected.
|
48
|
+
transforms : Transform, Sequence[Transform] or None, default None
|
49
|
+
Transform(s) to apply to the data.
|
43
50
|
download : bool, default False
|
44
51
|
If True, downloads the dataset from the internet and puts it in root directory.
|
45
52
|
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
46
|
-
transforms : Transform, Sequence[Transform] or None, default None
|
47
|
-
Transform(s) to apply to the data.
|
48
53
|
verbose : bool, default False
|
49
54
|
If True, outputs print statements.
|
50
55
|
|
@@ -52,8 +57,8 @@ class MILCO(BaseODDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
52
57
|
----------
|
53
58
|
path : pathlib.Path
|
54
59
|
Location of the folder containing the data.
|
55
|
-
image_set : "base"
|
56
|
-
The
|
60
|
+
image_set : "train", "operational" or "base"
|
61
|
+
The selected image set from the dataset.
|
57
62
|
index2label : dict[int, str]
|
58
63
|
Dictionary which translates from class integers to the associated class strings.
|
59
64
|
label2index : dict[str, int]
|
@@ -64,6 +69,10 @@ class MILCO(BaseODDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
64
69
|
The transforms to be applied to the data.
|
65
70
|
size : int
|
66
71
|
The size of the dataset.
|
72
|
+
|
73
|
+
Note
|
74
|
+
----
|
75
|
+
Data License: `CC BY 4.0 <https://creativecommons.org/licenses/by/4.0/>`_
|
67
76
|
"""
|
68
77
|
|
69
78
|
_resources = [
|
@@ -85,6 +94,18 @@ class MILCO(BaseODDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
85
94
|
md5=True,
|
86
95
|
checksum="b84749b21fa95a4a4c7de3741db78bc7",
|
87
96
|
),
|
97
|
+
DataLocation(
|
98
|
+
url="https://figshare.com/ndownloader/files/43169008",
|
99
|
+
filename="2010.zip",
|
100
|
+
md5=True,
|
101
|
+
checksum="43347a0cc383c0d3dbe0d24ae56f328d",
|
102
|
+
),
|
103
|
+
DataLocation(
|
104
|
+
url="https://figshare.com/ndownloader/files/43169011",
|
105
|
+
filename="2018.zip",
|
106
|
+
md5=True,
|
107
|
+
checksum="25d091044a10c78674fedad655023e3b",
|
108
|
+
),
|
88
109
|
]
|
89
110
|
|
90
111
|
index2label: dict[int, str] = {
|
@@ -95,15 +116,16 @@ class MILCO(BaseODDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
95
116
|
def __init__(
|
96
117
|
self,
|
97
118
|
root: str | Path,
|
98
|
-
|
119
|
+
image_set: Literal["train", "operational", "base"] = "train",
|
99
120
|
transforms: Transform[NDArray[Any]] | Sequence[Transform[NDArray[Any]]] | None = None,
|
121
|
+
download: bool = False,
|
100
122
|
verbose: bool = False,
|
101
123
|
) -> None:
|
102
124
|
super().__init__(
|
103
125
|
root,
|
104
|
-
|
105
|
-
"base",
|
126
|
+
image_set,
|
106
127
|
transforms,
|
128
|
+
download,
|
107
129
|
verbose,
|
108
130
|
)
|
109
131
|
|
@@ -112,10 +134,16 @@ class MILCO(BaseODDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
112
134
|
targets: list[str] = []
|
113
135
|
datum_metadata: dict[str, list[Any]] = {}
|
114
136
|
metadata_list: list[dict[str, Any]] = []
|
137
|
+
image_sets: dict[str, list[int]] = {
|
138
|
+
"base": list(range(len(self._resources))),
|
139
|
+
"train": list(range(3)),
|
140
|
+
"operational": list(range(3, len(self._resources))),
|
141
|
+
}
|
115
142
|
|
116
143
|
# Load the data
|
117
|
-
|
118
|
-
|
144
|
+
resource_indices = image_sets[self.image_set]
|
145
|
+
for idx in resource_indices:
|
146
|
+
self._resource = self._resources[idx]
|
119
147
|
filepath, target, metadata = super()._load_data()
|
120
148
|
filepaths.extend(filepath)
|
121
149
|
targets.extend(target)
|
@@ -8,8 +8,8 @@ from typing import TYPE_CHECKING, Any, Literal, Sequence, TypeVar
|
|
8
8
|
import numpy as np
|
9
9
|
from numpy.typing import NDArray
|
10
10
|
|
11
|
-
from dataeval.utils.
|
12
|
-
from dataeval.utils.
|
11
|
+
from dataeval.utils.datasets._base import BaseICDataset, DataLocation
|
12
|
+
from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
|
13
13
|
|
14
14
|
if TYPE_CHECKING:
|
15
15
|
from dataeval.typing import Transform
|
@@ -49,9 +49,6 @@ class MNIST(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
49
49
|
----------
|
50
50
|
root : str or pathlib.Path
|
51
51
|
Root directory of dataset where the ``mnist`` folder exists.
|
52
|
-
download : bool, default False
|
53
|
-
If True, downloads the dataset from the internet and puts it in root directory.
|
54
|
-
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
55
52
|
image_set : "train", "test" or "base", default "train"
|
56
53
|
If "base", returns all of the data to allow the user to create their own splits.
|
57
54
|
corruption : "identity", "shot_noise", "impulse_noise", "glass_blur", "motion_blur", \
|
@@ -60,6 +57,9 @@ class MNIST(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
60
57
|
Corruption to apply to the data.
|
61
58
|
transforms : Transform, Sequence[Transform] or None, default None
|
62
59
|
Transform(s) to apply to the data.
|
60
|
+
download : bool, default False
|
61
|
+
If True, downloads the dataset from the internet and puts it in root directory.
|
62
|
+
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
63
63
|
verbose : bool, default False
|
64
64
|
If True, outputs print statements.
|
65
65
|
|
@@ -81,6 +81,10 @@ class MNIST(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
81
81
|
The transforms to be applied to the data.
|
82
82
|
size : int
|
83
83
|
The size of the dataset.
|
84
|
+
|
85
|
+
Note
|
86
|
+
----
|
87
|
+
Data License: `CC BY 4.0 <https://creativecommons.org/licenses/by/4.0/>`_ for corruption dataset
|
84
88
|
"""
|
85
89
|
|
86
90
|
_resources = [
|
@@ -114,10 +118,10 @@ class MNIST(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
114
118
|
def __init__(
|
115
119
|
self,
|
116
120
|
root: str | Path,
|
117
|
-
download: bool = False,
|
118
121
|
image_set: Literal["train", "test", "base"] = "train",
|
119
122
|
corruption: CorruptionStringMap | None = None,
|
120
123
|
transforms: Transform[NDArray[Any]] | Sequence[Transform[NDArray[Any]]] | None = None,
|
124
|
+
download: bool = False,
|
121
125
|
verbose: bool = False,
|
122
126
|
) -> None:
|
123
127
|
self.corruption = corruption
|
@@ -127,9 +131,9 @@ class MNIST(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
127
131
|
|
128
132
|
super().__init__(
|
129
133
|
root,
|
130
|
-
download,
|
131
134
|
image_set,
|
132
135
|
transforms,
|
136
|
+
download,
|
133
137
|
verbose,
|
134
138
|
)
|
135
139
|
|
@@ -8,8 +8,8 @@ from typing import TYPE_CHECKING, Any, Sequence
|
|
8
8
|
import numpy as np
|
9
9
|
from numpy.typing import NDArray
|
10
10
|
|
11
|
-
from dataeval.utils.
|
12
|
-
from dataeval.utils.
|
11
|
+
from dataeval.utils.datasets._base import BaseICDataset, DataLocation
|
12
|
+
from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin
|
13
13
|
|
14
14
|
if TYPE_CHECKING:
|
15
15
|
from dataeval.typing import Transform
|
@@ -31,11 +31,11 @@ class Ships(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
31
31
|
----------
|
32
32
|
root : str or pathlib.Path
|
33
33
|
Root directory of dataset where the ``shipdataset`` folder exists.
|
34
|
+
transforms : Transform, Sequence[Transform] or None, default None
|
35
|
+
Transform(s) to apply to the data.
|
34
36
|
download : bool, default False
|
35
37
|
If True, downloads the dataset from the internet and puts it in root directory.
|
36
38
|
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
37
|
-
transforms : Transform, Sequence[Transform] or None, default None
|
38
|
-
Transform(s) to apply to the data.
|
39
39
|
verbose : bool, default False
|
40
40
|
If True, outputs print statements.
|
41
41
|
|
@@ -55,6 +55,10 @@ class Ships(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
55
55
|
The transforms to be applied to the data.
|
56
56
|
size : int
|
57
57
|
The size of the dataset.
|
58
|
+
|
59
|
+
Note
|
60
|
+
----
|
61
|
+
Data License: `CC BY-SA 4.0 <https://creativecommons.org/licenses/by-sa/4.0/>`_
|
58
62
|
"""
|
59
63
|
|
60
64
|
_resources = [
|
@@ -74,15 +78,15 @@ class Ships(BaseICDataset[NDArray[Any]], BaseDatasetNumpyMixin):
|
|
74
78
|
def __init__(
|
75
79
|
self,
|
76
80
|
root: str | Path,
|
77
|
-
download: bool = False,
|
78
81
|
transforms: Transform[NDArray[Any]] | Sequence[Transform[NDArray[Any]]] | None = None,
|
82
|
+
download: bool = False,
|
79
83
|
verbose: bool = False,
|
80
84
|
) -> None:
|
81
85
|
super().__init__(
|
82
86
|
root,
|
83
|
-
download,
|
84
87
|
"base",
|
85
88
|
transforms,
|
89
|
+
download,
|
86
90
|
verbose,
|
87
91
|
)
|
88
92
|
self._scenes: list[str] = self._load_scenes()
|
@@ -9,21 +9,20 @@ import torch
|
|
9
9
|
from defusedxml.ElementTree import parse
|
10
10
|
from numpy.typing import NDArray
|
11
11
|
|
12
|
-
from dataeval.utils.
|
12
|
+
from dataeval.utils.datasets._base import (
|
13
13
|
BaseDataset,
|
14
14
|
BaseODDataset,
|
15
15
|
BaseSegDataset,
|
16
16
|
DataLocation,
|
17
|
+
_TArray,
|
18
|
+
_TTarget,
|
17
19
|
)
|
18
|
-
from dataeval.utils.
|
19
|
-
from dataeval.utils.
|
20
|
+
from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin, BaseDatasetTorchMixin
|
21
|
+
from dataeval.utils.datasets._types import ObjectDetectionTarget, SegmentationTarget
|
20
22
|
|
21
23
|
if TYPE_CHECKING:
|
22
24
|
from dataeval.typing import Transform
|
23
25
|
|
24
|
-
_TArray = TypeVar("_TArray")
|
25
|
-
_TTarget = TypeVar("_TTarget")
|
26
|
-
|
27
26
|
VOCClassStringMap = Literal[
|
28
27
|
"aeroplane",
|
29
28
|
"bicycle",
|
@@ -121,19 +120,19 @@ class BaseVOCDataset(BaseDataset[_TArray, _TTarget, list[str]]):
|
|
121
120
|
def __init__(
|
122
121
|
self,
|
123
122
|
root: str | Path,
|
124
|
-
year: Literal["2007", "2008", "2009", "2010", "2011", "2012"] = "2012",
|
125
123
|
image_set: Literal["train", "val", "test", "base"] = "train",
|
126
|
-
|
124
|
+
year: Literal["2007", "2008", "2009", "2010", "2011", "2012"] = "2012",
|
127
125
|
transforms: Transform[_TArray] | Sequence[Transform[_TArray]] | None = None,
|
126
|
+
download: bool = False,
|
128
127
|
verbose: bool = False,
|
129
128
|
) -> None:
|
130
129
|
self.year = year
|
131
130
|
self._resource_index = self._get_year_image_set_index(year, image_set)
|
132
131
|
super().__init__(
|
133
132
|
root,
|
134
|
-
download,
|
135
133
|
image_set,
|
136
134
|
transforms,
|
135
|
+
download,
|
137
136
|
verbose,
|
138
137
|
)
|
139
138
|
|
@@ -191,10 +190,14 @@ class BaseVOCDataset(BaseDataset[_TArray, _TTarget, list[str]]):
|
|
191
190
|
for entry in data:
|
192
191
|
file_name = Path(entry).name
|
193
192
|
file_stem = Path(entry).stem
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
193
|
+
if self.year != "2007":
|
194
|
+
# Remove file extension and split by "_"
|
195
|
+
parts = file_stem.split("_")
|
196
|
+
file_meta["year"].append(parts[0])
|
197
|
+
file_meta["image_id"].append(parts[1])
|
198
|
+
else:
|
199
|
+
file_meta["year"].append(self.year)
|
200
|
+
file_meta["image_id"].append(file_stem)
|
198
201
|
file_meta["mask_path"].append(str(seg_folder / file_name))
|
199
202
|
annotations.append(str(ann_folder / file_stem) + ".xml")
|
200
203
|
|
@@ -250,9 +253,6 @@ class VOCDetection(
|
|
250
253
|
----------
|
251
254
|
root : str or pathlib.Path
|
252
255
|
Root directory of dataset where the ``vocdataset`` folder exists.
|
253
|
-
download : bool, default False
|
254
|
-
If True, downloads the dataset from the internet and puts it in root directory.
|
255
|
-
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
256
256
|
image_set : "train", "val", "test", or "base", default "train"
|
257
257
|
If "test", then dataset year must be "2007".
|
258
258
|
If "base", then the combined dataset of "train" and "val" is returned.
|
@@ -260,6 +260,9 @@ class VOCDetection(
|
|
260
260
|
The dataset year.
|
261
261
|
transforms : Transform, Sequence[Transform] or None, default None
|
262
262
|
Transform(s) to apply to the data.
|
263
|
+
download : bool, default False
|
264
|
+
If True, downloads the dataset from the internet and puts it in root directory.
|
265
|
+
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
263
266
|
verbose : bool, default False
|
264
267
|
If True, outputs print statements.
|
265
268
|
|
@@ -267,6 +270,8 @@ class VOCDetection(
|
|
267
270
|
----------
|
268
271
|
path : pathlib.Path
|
269
272
|
Location of the folder containing the data.
|
273
|
+
year : "2007", "2008", "2009", "2010", "2011" or "2012"
|
274
|
+
The selected dataset year.
|
270
275
|
image_set : "train", "val", "test" or "base"
|
271
276
|
The selected image set from the dataset.
|
272
277
|
index2label : dict[int, str]
|
@@ -279,6 +284,10 @@ class VOCDetection(
|
|
279
284
|
The transforms to be applied to the data.
|
280
285
|
size : int
|
281
286
|
The size of the dataset.
|
287
|
+
|
288
|
+
Note
|
289
|
+
----
|
290
|
+
Data License: `Flickr Terms of Use <http://www.flickr.com/terms.gne?legacy=1>`_
|
282
291
|
"""
|
283
292
|
|
284
293
|
|
@@ -294,9 +303,6 @@ class VOCDetectionTorch(
|
|
294
303
|
----------
|
295
304
|
root : str or pathlib.Path
|
296
305
|
Root directory of dataset where the ``vocdataset`` folder exists.
|
297
|
-
download : bool, default False
|
298
|
-
If True, downloads the dataset from the internet and puts it in root directory.
|
299
|
-
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
300
306
|
image_set : "train", "val", "test", or "base", default "train"
|
301
307
|
If "test", then dataset year must be "2007".
|
302
308
|
If "base", then the combined dataset of "train" and "val" is returned.
|
@@ -304,6 +310,9 @@ class VOCDetectionTorch(
|
|
304
310
|
The dataset year.
|
305
311
|
transforms : Transform, Sequence[Transform] or None, default None
|
306
312
|
Transform(s) to apply to the data.
|
313
|
+
download : bool, default False
|
314
|
+
If True, downloads the dataset from the internet and puts it in root directory.
|
315
|
+
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
307
316
|
verbose : bool, default False
|
308
317
|
If True, outputs print statements.
|
309
318
|
|
@@ -311,6 +320,8 @@ class VOCDetectionTorch(
|
|
311
320
|
----------
|
312
321
|
path : pathlib.Path
|
313
322
|
Location of the folder containing the data.
|
323
|
+
year : "2007", "2008", "2009", "2010", "2011" or "2012"
|
324
|
+
The selected dataset year.
|
314
325
|
image_set : "train", "val", "test" or "base"
|
315
326
|
The selected image set from the dataset.
|
316
327
|
index2label : dict[int, str]
|
@@ -323,6 +334,10 @@ class VOCDetectionTorch(
|
|
323
334
|
The transforms to be applied to the data.
|
324
335
|
size : int
|
325
336
|
The size of the dataset.
|
337
|
+
|
338
|
+
Note
|
339
|
+
----
|
340
|
+
Data License: `Flickr Terms of Use <http://www.flickr.com/terms.gne?legacy=1>`_
|
326
341
|
"""
|
327
342
|
|
328
343
|
|
@@ -338,9 +353,6 @@ class VOCSegmentation(
|
|
338
353
|
----------
|
339
354
|
root : str or pathlib.Path
|
340
355
|
Root directory of dataset where the ``vocdataset`` folder exists.
|
341
|
-
download : bool, default False
|
342
|
-
If True, downloads the dataset from the internet and puts it in root directory.
|
343
|
-
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
344
356
|
image_set : "train", "val", "test", or "base", default "train"
|
345
357
|
If "test", then dataset year must be "2007".
|
346
358
|
If "base", then the combined dataset of "train" and "val" is returned.
|
@@ -348,6 +360,9 @@ class VOCSegmentation(
|
|
348
360
|
The dataset year.
|
349
361
|
transforms : Transform, Sequence[Transform] or None, default None
|
350
362
|
Transform(s) to apply to the data.
|
363
|
+
download : bool, default False
|
364
|
+
If True, downloads the dataset from the internet and puts it in root directory.
|
365
|
+
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
351
366
|
verbose : bool, default False
|
352
367
|
If True, outputs print statements.
|
353
368
|
|
@@ -355,6 +370,8 @@ class VOCSegmentation(
|
|
355
370
|
----------
|
356
371
|
path : pathlib.Path
|
357
372
|
Location of the folder containing the data.
|
373
|
+
year : "2007", "2008", "2009", "2010", "2011" or "2012"
|
374
|
+
The selected dataset year.
|
358
375
|
image_set : "train", "val", "test" or "base"
|
359
376
|
The selected image set from the dataset.
|
360
377
|
index2label : dict[int, str]
|
@@ -367,6 +384,10 @@ class VOCSegmentation(
|
|
367
384
|
The transforms to be applied to the data.
|
368
385
|
size : int
|
369
386
|
The size of the dataset.
|
387
|
+
|
388
|
+
Note
|
389
|
+
----
|
390
|
+
Data License: `Flickr Terms of Use <http://www.flickr.com/terms.gne?legacy=1>`_
|
370
391
|
"""
|
371
392
|
|
372
393
|
def _load_data(self) -> tuple[list[str], list[str], dict[str, list[Any]]]:
|