dataeval 0.86.8__py3-none-any.whl → 0.87.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +1 -1
- dataeval/_version.py +2 -2
- dataeval/config.py +4 -19
- dataeval/data/_metadata.py +56 -27
- dataeval/data/_split.py +1 -1
- dataeval/data/selections/_classbalance.py +4 -3
- dataeval/data/selections/_classfilter.py +5 -5
- dataeval/data/selections/_indices.py +2 -2
- dataeval/data/selections/_prioritize.py +249 -29
- dataeval/data/selections/_reverse.py +1 -1
- dataeval/data/selections/_shuffle.py +2 -2
- dataeval/detectors/ood/__init__.py +2 -1
- dataeval/detectors/ood/base.py +38 -1
- dataeval/detectors/ood/knn.py +95 -0
- dataeval/metrics/bias/_balance.py +28 -21
- dataeval/metrics/bias/_diversity.py +4 -4
- dataeval/metrics/bias/_parity.py +2 -2
- dataeval/metrics/stats/_hashstats.py +19 -2
- dataeval/outputs/_workflows.py +20 -7
- dataeval/typing.py +14 -2
- dataeval/utils/__init__.py +2 -2
- dataeval/utils/_bin.py +7 -6
- dataeval/utils/data/__init__.py +2 -0
- dataeval/utils/data/_dataset.py +13 -6
- dataeval/utils/data/_validate.py +169 -0
- dataeval/workflows/sufficiency.py +53 -10
- {dataeval-0.86.8.dist-info → dataeval-0.87.0.dist-info}/METADATA +5 -17
- {dataeval-0.86.8.dist-info → dataeval-0.87.0.dist-info}/RECORD +30 -39
- dataeval/utils/datasets/__init__.py +0 -19
- dataeval/utils/datasets/_antiuav.py +0 -189
- dataeval/utils/datasets/_base.py +0 -262
- dataeval/utils/datasets/_cifar10.py +0 -201
- dataeval/utils/datasets/_fileio.py +0 -142
- dataeval/utils/datasets/_milco.py +0 -197
- dataeval/utils/datasets/_mixin.py +0 -54
- dataeval/utils/datasets/_mnist.py +0 -202
- dataeval/utils/datasets/_ships.py +0 -144
- dataeval/utils/datasets/_types.py +0 -48
- dataeval/utils/datasets/_voc.py +0 -583
- {dataeval-0.86.8.dist-info → dataeval-0.87.0.dist-info}/WHEEL +0 -0
- /dataeval-0.86.8.dist-info/licenses/LICENSE.txt → /dataeval-0.87.0.dist-info/licenses/LICENSE +0 -0
dataeval/utils/datasets/_voc.py
DELETED
@@ -1,583 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
__all__ = []
|
4
|
-
|
5
|
-
import os
|
6
|
-
import shutil
|
7
|
-
from pathlib import Path
|
8
|
-
from typing import TYPE_CHECKING, Any, Literal, Sequence, TypeVar
|
9
|
-
|
10
|
-
import torch
|
11
|
-
from defusedxml.ElementTree import parse
|
12
|
-
from numpy.typing import NDArray
|
13
|
-
|
14
|
-
from dataeval.utils.datasets._base import (
|
15
|
-
BaseDataset,
|
16
|
-
BaseODDataset,
|
17
|
-
BaseSegDataset,
|
18
|
-
DataLocation,
|
19
|
-
_ensure_exists,
|
20
|
-
_TArray,
|
21
|
-
_TTarget,
|
22
|
-
)
|
23
|
-
from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin, BaseDatasetTorchMixin
|
24
|
-
from dataeval.utils.datasets._types import ObjectDetectionTarget, SegmentationTarget
|
25
|
-
|
26
|
-
if TYPE_CHECKING:
|
27
|
-
from dataeval.typing import Transform
|
28
|
-
|
29
|
-
VOCClassStringMap = Literal[
|
30
|
-
"aeroplane",
|
31
|
-
"bicycle",
|
32
|
-
"bird",
|
33
|
-
"boat",
|
34
|
-
"bottle",
|
35
|
-
"bus",
|
36
|
-
"car",
|
37
|
-
"cat",
|
38
|
-
"chair",
|
39
|
-
"cow",
|
40
|
-
"diningtable",
|
41
|
-
"dog",
|
42
|
-
"horse",
|
43
|
-
"motorbike",
|
44
|
-
"person",
|
45
|
-
"pottedplant",
|
46
|
-
"sheep",
|
47
|
-
"sofa",
|
48
|
-
"train",
|
49
|
-
"tvmonitor",
|
50
|
-
]
|
51
|
-
TVOCClassMap = TypeVar("TVOCClassMap", VOCClassStringMap, int, list[VOCClassStringMap], list[int])
|
52
|
-
|
53
|
-
|
54
|
-
class BaseVOCDataset(BaseDataset[_TArray, _TTarget, list[str]]):
|
55
|
-
_resources = [
|
56
|
-
DataLocation(
|
57
|
-
url="https://data.brainchip.com/dataset-mirror/voc/VOCtrainval_11-May-2012.tar",
|
58
|
-
filename="VOCtrainval_11-May-2012.tar",
|
59
|
-
md5=False,
|
60
|
-
checksum="e14f763270cf193d0b5f74b169f44157a4b0c6efa708f4dd0ff78ee691763bcb",
|
61
|
-
),
|
62
|
-
DataLocation(
|
63
|
-
url="http://host.robots.ox.ac.uk/pascal/VOC/voc2011/VOCtrainval_25-May-2011.tar",
|
64
|
-
filename="VOCtrainval_25-May-2011.tar",
|
65
|
-
md5=False,
|
66
|
-
checksum="0a7f5f5d154f7290ec65ec3f78b72ef72c6d93ff6d79acd40dc222a9ee5248ba",
|
67
|
-
),
|
68
|
-
DataLocation(
|
69
|
-
url="http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar",
|
70
|
-
filename="VOCtrainval_03-May-2010.tar",
|
71
|
-
md5=False,
|
72
|
-
checksum="1af4189cbe44323ab212bff7afbc7d0f55a267cc191eb3aac911037887e5c7d4",
|
73
|
-
),
|
74
|
-
DataLocation(
|
75
|
-
url="http://host.robots.ox.ac.uk/pascal/VOC/voc2009/VOCtrainval_11-May-2009.tar",
|
76
|
-
filename="VOCtrainval_11-May-2009.tar",
|
77
|
-
md5=False,
|
78
|
-
checksum="11cbe1741fb5bdadbbca3c08e9ec62cd95c14884845527d50847bc2cf57e7fd6",
|
79
|
-
),
|
80
|
-
DataLocation(
|
81
|
-
url="http://host.robots.ox.ac.uk/pascal/VOC/voc2008/VOCtrainval_14-Jul-2008.tar",
|
82
|
-
filename="VOCtrainval_14-Jul-2008.tar",
|
83
|
-
md5=False,
|
84
|
-
checksum="7f0ca53c1b5a838fbe946965fc106c6e86832183240af5c88e3f6c306318d42e",
|
85
|
-
),
|
86
|
-
DataLocation(
|
87
|
-
url="https://data.brainchip.com/dataset-mirror/voc/VOCtrainval_06-Nov-2007.tar",
|
88
|
-
filename="VOCtrainval_06-Nov-2007.tar",
|
89
|
-
md5=False,
|
90
|
-
checksum="7d8cd951101b0957ddfd7a530bdc8a94f06121cfc1e511bb5937e973020c7508",
|
91
|
-
),
|
92
|
-
DataLocation(
|
93
|
-
url="https://data.brainchip.com/dataset-mirror/voc/VOC2012test.tar",
|
94
|
-
filename="VOC2012test.tar",
|
95
|
-
md5=False,
|
96
|
-
checksum="f08582b1935816c5eab3bbb1eb6d06201a789eaa173cdf1cf400c26f0cac2fb3",
|
97
|
-
),
|
98
|
-
DataLocation(
|
99
|
-
url="https://data.brainchip.com/dataset-mirror/voc/VOCtest_06-Nov-2007.tar",
|
100
|
-
filename="VOCtest_06-Nov-2007.tar",
|
101
|
-
md5=False,
|
102
|
-
checksum="6836888e2e01dca84577a849d339fa4f73e1e4f135d312430c4856b5609b4892",
|
103
|
-
),
|
104
|
-
]
|
105
|
-
_base2007: tuple[int, int] = (5, 7)
|
106
|
-
_base2012: tuple[int, int] = (0, 6)
|
107
|
-
|
108
|
-
index2label: dict[int, str] = {
|
109
|
-
0: "aeroplane",
|
110
|
-
1: "bicycle",
|
111
|
-
2: "bird",
|
112
|
-
3: "boat",
|
113
|
-
4: "bottle",
|
114
|
-
5: "bus",
|
115
|
-
6: "car",
|
116
|
-
7: "cat",
|
117
|
-
8: "chair",
|
118
|
-
9: "cow",
|
119
|
-
10: "diningtable",
|
120
|
-
11: "dog",
|
121
|
-
12: "horse",
|
122
|
-
13: "motorbike",
|
123
|
-
14: "person",
|
124
|
-
15: "pottedplant",
|
125
|
-
16: "sheep",
|
126
|
-
17: "sofa",
|
127
|
-
18: "train",
|
128
|
-
19: "tvmonitor",
|
129
|
-
}
|
130
|
-
|
131
|
-
def __init__(
|
132
|
-
self,
|
133
|
-
root: str | Path,
|
134
|
-
image_set: Literal["train", "val", "test", "base"] = "train",
|
135
|
-
year: Literal["2007", "2008", "2009", "2010", "2011", "2012"] = "2012",
|
136
|
-
transforms: Transform[_TArray] | Sequence[Transform[_TArray]] | None = None,
|
137
|
-
download: bool = False,
|
138
|
-
verbose: bool = False,
|
139
|
-
) -> None:
|
140
|
-
self.year = year
|
141
|
-
self._resource_index = self._get_year_image_set_index(year, image_set)
|
142
|
-
super().__init__(
|
143
|
-
root,
|
144
|
-
image_set,
|
145
|
-
transforms,
|
146
|
-
download,
|
147
|
-
verbose,
|
148
|
-
)
|
149
|
-
|
150
|
-
def _get_dataset_dir(self) -> Path:
|
151
|
-
"""Overrides the base function to determine correct dataset directory for VOC class"""
|
152
|
-
return self._find_main_VOC_dir(self._root)
|
153
|
-
|
154
|
-
def _find_main_VOC_dir(self, base: Path) -> Path:
|
155
|
-
"""
|
156
|
-
Determine the correct dataset directory for VOC detection and segmentation classes.
|
157
|
-
Handles various directory structure possibilities and validates existence.
|
158
|
-
"""
|
159
|
-
|
160
|
-
# VOCdataset directory possibilities
|
161
|
-
dataset_dir = base if base.stem.lower() == "vocdataset" else base / "vocdataset"
|
162
|
-
|
163
|
-
# Define possible directory structures based on patterns
|
164
|
-
# 1. Root is already the specific VOC year directory
|
165
|
-
# 2. Root is the VOCdevkit directory
|
166
|
-
# 3. Standard structure
|
167
|
-
# 4. Special case for year 2011
|
168
|
-
# 5. Within VOCdataset directory
|
169
|
-
# 6. Special case for year 2011 within VOCdataset
|
170
|
-
possible_paths = [
|
171
|
-
base if base.stem == f"VOC{self.year}" else None,
|
172
|
-
base / f"VOC{self.year}" if base.stem == "VOCdevkit" else None,
|
173
|
-
base / "VOCdevkit" / f"VOC{self.year}",
|
174
|
-
base / "TrainVal" / "VOCdevkit" / f"VOC{self.year}" if self.year == "2011" else None,
|
175
|
-
dataset_dir / "VOCdevkit" / f"VOC{self.year}",
|
176
|
-
dataset_dir / "TrainVal" / "VOCdevkit" / f"VOC{self.year}" if self.year == "2011" else None,
|
177
|
-
]
|
178
|
-
|
179
|
-
# Filter out None values and check each path
|
180
|
-
for path in filter(None, possible_paths):
|
181
|
-
if path.exists():
|
182
|
-
return path
|
183
|
-
|
184
|
-
# If no existing path is found, create and return the dataset directory
|
185
|
-
if not dataset_dir.exists():
|
186
|
-
dataset_dir.mkdir(parents=True, exist_ok=True)
|
187
|
-
|
188
|
-
return dataset_dir
|
189
|
-
|
190
|
-
def _get_year_image_set_index(self, year: str, image_set: str) -> int:
|
191
|
-
"""Function to ensure that the correct resource file is accessed"""
|
192
|
-
if year == "2007" and image_set == "test":
|
193
|
-
return -1
|
194
|
-
if year == "2012" and image_set == "test":
|
195
|
-
return -2
|
196
|
-
if year != "2007" and image_set == "test":
|
197
|
-
raise ValueError(
|
198
|
-
f"The only test sets available are for the years 2007 and 2012, not {year}. "
|
199
|
-
"Either select the year 2007 or 2012, or use a different image_set."
|
200
|
-
)
|
201
|
-
return 2012 - int(year)
|
202
|
-
|
203
|
-
def _update_path(self) -> None:
|
204
|
-
"""Update the path to the new folder structure"""
|
205
|
-
if self.year == "2011" and self.path.stem.lower() == "vocdataset":
|
206
|
-
self.path: Path = self.path / "TrainVal" / "VOCdevkit" / f"VOC{self.year}"
|
207
|
-
elif self.path.stem.lower() == "vocdataset":
|
208
|
-
self.path: Path = self.path / "VOCdevkit" / f"VOC{self.year}"
|
209
|
-
|
210
|
-
def _load_data_exception(self) -> tuple[list[str], list[str], dict[str, Any]]:
|
211
|
-
"""Adjust how the directory is created for the 2007 and 2012 test set"""
|
212
|
-
filepaths: list[str] = []
|
213
|
-
targets: list[str] = []
|
214
|
-
datum_metadata: dict[str, list[Any]] = {}
|
215
|
-
tmp_path: Path = self._root / "tmp_directory_for_download"
|
216
|
-
tmp_path.mkdir(exist_ok=True)
|
217
|
-
resource_idx = self._base2007 if self.year == "2007" else self._base2012
|
218
|
-
|
219
|
-
# Determine if text files exist
|
220
|
-
train_file = self.path / "ImageSets" / "Main" / "trainval.txt"
|
221
|
-
test_file = self.path / "ImageSets" / "Main" / "test.txt"
|
222
|
-
train_exists = train_file.exists()
|
223
|
-
test_exists = test_file.exists()
|
224
|
-
|
225
|
-
if self.image_set == "base":
|
226
|
-
if not train_exists and not test_exists:
|
227
|
-
_ensure_exists(*self._resources[resource_idx[0]], self.path, self._root, self._download, self._verbose)
|
228
|
-
self._update_path()
|
229
|
-
_ensure_exists(*self._resources[resource_idx[1]], tmp_path, self._root, self._download, self._verbose)
|
230
|
-
self._merge_voc_directories(tmp_path)
|
231
|
-
|
232
|
-
elif train_exists and not test_exists:
|
233
|
-
_ensure_exists(*self._resources[resource_idx[1]], tmp_path, self._root, self._download, self._verbose)
|
234
|
-
self._merge_voc_directories(tmp_path)
|
235
|
-
|
236
|
-
elif not train_exists and test_exists:
|
237
|
-
_ensure_exists(*self._resources[resource_idx[0]], tmp_path, self._root, self._download, self._verbose)
|
238
|
-
self._merge_voc_directories(tmp_path)
|
239
|
-
|
240
|
-
# Code to determine what is needed in each category
|
241
|
-
metadata_list: list[dict[str, Any]] = []
|
242
|
-
|
243
|
-
for img_set in ["test", "base"]:
|
244
|
-
self.image_set = img_set
|
245
|
-
resource_filepaths, resource_targets, resource_metadata = self._load_data_inner()
|
246
|
-
filepaths.extend(resource_filepaths)
|
247
|
-
targets.extend(resource_targets)
|
248
|
-
metadata_list.append(resource_metadata)
|
249
|
-
|
250
|
-
# Combine metadata from all resources
|
251
|
-
for data_dict in metadata_list:
|
252
|
-
for key, val in data_dict.items():
|
253
|
-
str_key = str(key) # Ensure key is string
|
254
|
-
if str_key not in datum_metadata:
|
255
|
-
datum_metadata[str_key] = []
|
256
|
-
datum_metadata[str_key].extend(val)
|
257
|
-
|
258
|
-
else:
|
259
|
-
self._resource = self._resources[resource_idx[1]]
|
260
|
-
|
261
|
-
if train_exists and not test_exists:
|
262
|
-
_ensure_exists(*self._resource, tmp_path, self._root, self._download, self._verbose)
|
263
|
-
self._merge_voc_directories(tmp_path)
|
264
|
-
|
265
|
-
resource_filepaths, resource_targets, resource_metadata = self._load_try_and_update()
|
266
|
-
filepaths.extend(resource_filepaths)
|
267
|
-
targets.extend(resource_targets)
|
268
|
-
datum_metadata.update(resource_metadata)
|
269
|
-
|
270
|
-
return filepaths, targets, datum_metadata
|
271
|
-
|
272
|
-
def _merge_voc_directories(self, source_dir: Path) -> None:
|
273
|
-
"""Merge two VOC directories, handling file conflicts intelligently."""
|
274
|
-
base: Path = self._find_main_VOC_dir(source_dir)
|
275
|
-
# Create all subdirectories in target if they don't exist
|
276
|
-
for dirpath, dirnames, filenames in os.walk(base):
|
277
|
-
# Convert to Path objects
|
278
|
-
source_path = Path(dirpath)
|
279
|
-
|
280
|
-
# Get the relative path from source_dir
|
281
|
-
rel_path = source_path.relative_to(base)
|
282
|
-
|
283
|
-
# Create the corresponding target path
|
284
|
-
target_path = self.path / rel_path
|
285
|
-
target_path.mkdir(parents=True, exist_ok=True)
|
286
|
-
|
287
|
-
# Copy all files
|
288
|
-
for filename in filenames:
|
289
|
-
source_file = source_path / filename
|
290
|
-
target_file = target_path / filename
|
291
|
-
|
292
|
-
# File doesn't exist in target, just move it
|
293
|
-
if not target_file.exists():
|
294
|
-
shutil.move(source_file, target_file)
|
295
|
-
else:
|
296
|
-
# File exists in both assume they're identical and skip
|
297
|
-
pass
|
298
|
-
|
299
|
-
shutil.rmtree(source_dir)
|
300
|
-
|
301
|
-
def _load_try_and_update(self) -> tuple[list[str], list[str], dict[str, Any]]:
|
302
|
-
"""Test if data needs to be downloaded and update path if it does"""
|
303
|
-
if self._verbose:
|
304
|
-
print(f"Determining if {self._resource.filename} needs to be downloaded.")
|
305
|
-
|
306
|
-
try:
|
307
|
-
result = self._load_data_inner()
|
308
|
-
if self._verbose:
|
309
|
-
print("No download needed, loaded data successfully.")
|
310
|
-
except FileNotFoundError:
|
311
|
-
_ensure_exists(*self._resource, self.path, self._root, self._download, self._verbose)
|
312
|
-
self._update_path()
|
313
|
-
result = self._load_data_inner()
|
314
|
-
return result
|
315
|
-
|
316
|
-
def _load_data(self) -> tuple[list[str], list[str], dict[str, Any]]:
|
317
|
-
"""
|
318
|
-
Function to determine if data can be accessed or if it needs to be downloaded and/or extracted.
|
319
|
-
"""
|
320
|
-
# Exception - test sets
|
321
|
-
year_set_bool = (self.image_set == "test" or self.image_set == "base") and (
|
322
|
-
self.year == "2012" or self.year == "2007"
|
323
|
-
)
|
324
|
-
if year_set_bool:
|
325
|
-
return self._load_data_exception()
|
326
|
-
|
327
|
-
return self._load_try_and_update()
|
328
|
-
|
329
|
-
def _get_image_sets(self) -> dict[str, list[str]]:
|
330
|
-
"""Function to create the list of images in each image set"""
|
331
|
-
image_folder = self.path / "JPEGImages"
|
332
|
-
image_set_list = ["train", "val", "trainval"] if self.image_set != "test" else ["test"]
|
333
|
-
image_sets = {}
|
334
|
-
for image_set in image_set_list:
|
335
|
-
text_file = self.path / "ImageSets" / "Main" / (image_set + ".txt")
|
336
|
-
selected_images: list[str] = []
|
337
|
-
with open(text_file) as f:
|
338
|
-
for line in f.readlines():
|
339
|
-
out = line.strip()
|
340
|
-
selected_images.append(str(image_folder / (out + ".jpg")))
|
341
|
-
|
342
|
-
name = "base" if image_set == "trainval" else image_set
|
343
|
-
image_sets[name] = selected_images
|
344
|
-
return image_sets
|
345
|
-
|
346
|
-
def _load_data_inner(self) -> tuple[list[str], list[str], dict[str, Any]]:
|
347
|
-
"""Function to load in the file paths for the data, annotations and segmentation masks"""
|
348
|
-
file_meta = {"year": [], "image_id": [], "mask_path": []}
|
349
|
-
ann_folder = self.path / "Annotations"
|
350
|
-
seg_folder = self.path / "SegmentationClass"
|
351
|
-
|
352
|
-
# Load in the image sets
|
353
|
-
image_sets = self._get_image_sets()
|
354
|
-
|
355
|
-
# Get the data, annotations and metadata
|
356
|
-
annotations = []
|
357
|
-
data = image_sets[self.image_set]
|
358
|
-
for entry in data:
|
359
|
-
file_name = Path(entry).name
|
360
|
-
file_stem = Path(entry).stem
|
361
|
-
if self.year != "2007":
|
362
|
-
# Remove file extension and split by "_"
|
363
|
-
parts = file_stem.split("_")
|
364
|
-
file_meta["year"].append(parts[0])
|
365
|
-
file_meta["image_id"].append(parts[1])
|
366
|
-
else:
|
367
|
-
file_meta["year"].append(self.year)
|
368
|
-
file_meta["image_id"].append(file_stem)
|
369
|
-
file_meta["mask_path"].append(str(seg_folder / file_name))
|
370
|
-
annotations.append(str(ann_folder / file_stem) + ".xml")
|
371
|
-
|
372
|
-
return data, annotations, file_meta
|
373
|
-
|
374
|
-
def _read_annotations(self, annotation: str) -> tuple[list[list[float]], list[int], dict[str, Any]]:
|
375
|
-
boxes: list[list[float]] = []
|
376
|
-
label_str = []
|
377
|
-
if not Path(annotation).exists():
|
378
|
-
return boxes, label_str, {}
|
379
|
-
root = parse(annotation).getroot()
|
380
|
-
if root is None:
|
381
|
-
raise ValueError(f"Unable to parse {annotation}")
|
382
|
-
additional_meta: dict[str, Any] = {
|
383
|
-
"folder": root.findtext("folder", default=""),
|
384
|
-
"filename": root.findtext("filename", default=""),
|
385
|
-
"database": root.findtext("source/database", default=""),
|
386
|
-
"annotation_source": root.findtext("source/annotation", default=""),
|
387
|
-
"image_source": root.findtext("source/image", default=""),
|
388
|
-
"image_width": int(root.findtext("size/width", default="-1")),
|
389
|
-
"image_height": int(root.findtext("size/height", default="-1")),
|
390
|
-
"image_depth": int(root.findtext("size/depth", default="-1")),
|
391
|
-
"segmented": int(root.findtext("segmented", default="-1")),
|
392
|
-
"pose": [],
|
393
|
-
"truncated": [],
|
394
|
-
"difficult": [],
|
395
|
-
}
|
396
|
-
for obj in root.findall("object"):
|
397
|
-
label_str.append(obj.findtext("name", default=""))
|
398
|
-
additional_meta["pose"].append(obj.findtext("pose", default=""))
|
399
|
-
additional_meta["truncated"].append(int(obj.findtext("truncated", default="-1")))
|
400
|
-
additional_meta["difficult"].append(int(obj.findtext("difficult", default="-1")))
|
401
|
-
boxes.append(
|
402
|
-
[
|
403
|
-
float(obj.findtext("bndbox/xmin", default="0")),
|
404
|
-
float(obj.findtext("bndbox/ymin", default="0")),
|
405
|
-
float(obj.findtext("bndbox/xmax", default="0")),
|
406
|
-
float(obj.findtext("bndbox/ymax", default="0")),
|
407
|
-
]
|
408
|
-
)
|
409
|
-
labels = [self.label2index[label] for label in label_str]
|
410
|
-
return boxes, labels, additional_meta
|
411
|
-
|
412
|
-
|
413
|
-
class VOCDetection(
|
414
|
-
BaseVOCDataset[NDArray[Any], ObjectDetectionTarget[NDArray[Any]]],
|
415
|
-
BaseODDataset[NDArray[Any]],
|
416
|
-
BaseDatasetNumpyMixin,
|
417
|
-
):
|
418
|
-
"""
|
419
|
-
`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Detection Dataset.
|
420
|
-
|
421
|
-
Parameters
|
422
|
-
----------
|
423
|
-
root : str or pathlib.Path
|
424
|
-
Because of the structure of the PASCAL VOC datasets, the root needs to be one of 4 folders.
|
425
|
-
1) Directory containing the year of the **already downloaded** dataset (i.e. .../VOCdevkit/VOC2012 <-)
|
426
|
-
2) Directory to the VOCdevkit folder of the **already downloaded** dataset (i.e. .../VOCdevkit <- /VOC2012)
|
427
|
-
3) Directory to the folder one level up from the VOCdevkit folder,
|
428
|
-
data **may** or **may not** be already downloaded (i.e. ... <- /VOCdevkit/VOC2012)
|
429
|
-
4) Directory to where you would like the dataset to be downloaded
|
430
|
-
image_set : "train", "val", "test", or "base", default "train"
|
431
|
-
If "test", then dataset year must be "2007" or "2012". Note that the 2012 test set does not contain annotations.
|
432
|
-
If "base", then the combined dataset of "train" and "val" is returned.
|
433
|
-
year : "2007", "2008", "2009", "2010", "2011" or "2012", default "2012"
|
434
|
-
The dataset year.
|
435
|
-
transforms : Transform, Sequence[Transform] or None, default None
|
436
|
-
Transform(s) to apply to the data.
|
437
|
-
download : bool, default False
|
438
|
-
If True, downloads the dataset from the internet and puts it in root directory.
|
439
|
-
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
440
|
-
verbose : bool, default False
|
441
|
-
If True, outputs print statements.
|
442
|
-
|
443
|
-
Attributes
|
444
|
-
----------
|
445
|
-
path : pathlib.Path
|
446
|
-
Location of the folder containing the data.
|
447
|
-
year : "2007", "2008", "2009", "2010", "2011" or "2012"
|
448
|
-
The selected dataset year.
|
449
|
-
image_set : "train", "val", "test" or "base"
|
450
|
-
The selected image set from the dataset.
|
451
|
-
index2label : dict[int, str]
|
452
|
-
Dictionary which translates from class integers to the associated class strings.
|
453
|
-
label2index : dict[str, int]
|
454
|
-
Dictionary which translates from class strings to the associated class integers.
|
455
|
-
metadata : DatasetMetadata
|
456
|
-
Typed dictionary containing dataset metadata, such as `id` which returns the dataset class name.
|
457
|
-
transforms : Sequence[Transform]
|
458
|
-
The transforms to be applied to the data.
|
459
|
-
size : int
|
460
|
-
The size of the dataset.
|
461
|
-
|
462
|
-
Note
|
463
|
-
----
|
464
|
-
Data License: `Flickr Terms of Use <http://www.flickr.com/terms.gne?legacy=1>`_
|
465
|
-
"""
|
466
|
-
|
467
|
-
|
468
|
-
class VOCDetectionTorch(
|
469
|
-
BaseVOCDataset[torch.Tensor, ObjectDetectionTarget[torch.Tensor]],
|
470
|
-
BaseODDataset[torch.Tensor],
|
471
|
-
BaseDatasetTorchMixin,
|
472
|
-
):
|
473
|
-
"""
|
474
|
-
`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Detection Dataset as PyTorch tensors.
|
475
|
-
|
476
|
-
Parameters
|
477
|
-
----------
|
478
|
-
root : str or pathlib.Path
|
479
|
-
Because of the structure of the PASCAL VOC datasets, the root needs to be one of 4 folders.
|
480
|
-
1) Directory containing the year of the **already downloaded** dataset (i.e. .../VOCdevkit/VOC2012 <-)
|
481
|
-
2) Directory to the VOCdevkit folder of the **already downloaded** dataset (i.e. .../VOCdevkit <- /VOC2012)
|
482
|
-
3) Directory to the folder one level up from the VOCdevkit folder,
|
483
|
-
data **may** or **may not** be already downloaded (i.e. ... <- /VOCdevkit/VOC2012)
|
484
|
-
4) Directory to where you would like the dataset to be downloaded
|
485
|
-
image_set : "train", "val", "test", or "base", default "train"
|
486
|
-
If "test", then dataset year must be "2007" or "2012". Note that the 2012 test set does not contain annotations.
|
487
|
-
If "base", then the combined dataset of "train" and "val" is returned.
|
488
|
-
year : "2007", "2008", "2009", "2010", "2011" or "2012", default "2012"
|
489
|
-
The dataset year.
|
490
|
-
transforms : Transform, Sequence[Transform] or None, default None
|
491
|
-
Transform(s) to apply to the data.
|
492
|
-
download : bool, default False
|
493
|
-
If True, downloads the dataset from the internet and puts it in root directory.
|
494
|
-
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
495
|
-
verbose : bool, default False
|
496
|
-
If True, outputs print statements.
|
497
|
-
|
498
|
-
Attributes
|
499
|
-
----------
|
500
|
-
path : pathlib.Path
|
501
|
-
Location of the folder containing the data.
|
502
|
-
year : "2007", "2008", "2009", "2010", "2011" or "2012"
|
503
|
-
The selected dataset year.
|
504
|
-
image_set : "train", "val", "test" or "base"
|
505
|
-
The selected image set from the dataset.
|
506
|
-
index2label : dict[int, str]
|
507
|
-
Dictionary which translates from class integers to the associated class strings.
|
508
|
-
label2index : dict[str, int]
|
509
|
-
Dictionary which translates from class strings to the associated class integers.
|
510
|
-
metadata : DatasetMetadata
|
511
|
-
Typed dictionary containing dataset metadata, such as `id` which returns the dataset class name.
|
512
|
-
transforms : Sequence[Transform]
|
513
|
-
The transforms to be applied to the data.
|
514
|
-
size : int
|
515
|
-
The size of the dataset.
|
516
|
-
|
517
|
-
Note
|
518
|
-
----
|
519
|
-
Data License: `Flickr Terms of Use <http://www.flickr.com/terms.gne?legacy=1>`_
|
520
|
-
"""
|
521
|
-
|
522
|
-
|
523
|
-
class VOCSegmentation(
|
524
|
-
BaseVOCDataset[NDArray[Any], SegmentationTarget[NDArray[Any]]],
|
525
|
-
BaseSegDataset[NDArray[Any]],
|
526
|
-
BaseDatasetNumpyMixin,
|
527
|
-
):
|
528
|
-
"""
|
529
|
-
`Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Segmentation Dataset.
|
530
|
-
|
531
|
-
Parameters
|
532
|
-
----------
|
533
|
-
root : str or pathlib.Path
|
534
|
-
Root directory of dataset where the ``vocdataset`` folder exists.
|
535
|
-
image_set : "train", "val", "test", or "base", default "train"
|
536
|
-
If "test", then dataset year must be "2007".
|
537
|
-
If "base", then the combined dataset of "train" and "val" is returned.
|
538
|
-
year : "2007", "2008", "2009", "2010", "2011" or "2012", default "2012"
|
539
|
-
The dataset year.
|
540
|
-
transforms : Transform, Sequence[Transform] or None, default None
|
541
|
-
Transform(s) to apply to the data.
|
542
|
-
download : bool, default False
|
543
|
-
If True, downloads the dataset from the internet and puts it in root directory.
|
544
|
-
Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
|
545
|
-
verbose : bool, default False
|
546
|
-
If True, outputs print statements.
|
547
|
-
|
548
|
-
Attributes
|
549
|
-
----------
|
550
|
-
path : pathlib.Path
|
551
|
-
Location of the folder containing the data.
|
552
|
-
year : "2007", "2008", "2009", "2010", "2011" or "2012"
|
553
|
-
The selected dataset year.
|
554
|
-
image_set : "train", "val", "test" or "base"
|
555
|
-
The selected image set from the dataset.
|
556
|
-
index2label : dict[int, str]
|
557
|
-
Dictionary which translates from class integers to the associated class strings.
|
558
|
-
label2index : dict[str, int]
|
559
|
-
Dictionary which translates from class strings to the associated class integers.
|
560
|
-
metadata : DatasetMetadata
|
561
|
-
Typed dictionary containing dataset metadata, such as `id` which returns the dataset class name.
|
562
|
-
transforms : Sequence[Transform]
|
563
|
-
The transforms to be applied to the data.
|
564
|
-
size : int
|
565
|
-
The size of the dataset.
|
566
|
-
|
567
|
-
Note
|
568
|
-
----
|
569
|
-
Data License: `Flickr Terms of Use <http://www.flickr.com/terms.gne?legacy=1>`_
|
570
|
-
"""
|
571
|
-
|
572
|
-
def _load_data(self) -> tuple[list[str], list[str], dict[str, list[Any]]]:
|
573
|
-
"""Overload base load data to split out masks for segmentation."""
|
574
|
-
# Exception - test sets
|
575
|
-
year_set_bool = (self.image_set == "test" or self.image_set == "base") and (
|
576
|
-
self.year == "2012" or self.year == "2007"
|
577
|
-
)
|
578
|
-
if year_set_bool:
|
579
|
-
filepaths, targets, datum_metadata = self._load_data_exception()
|
580
|
-
else:
|
581
|
-
filepaths, targets, datum_metadata = self._load_try_and_update()
|
582
|
-
self._masks = datum_metadata.pop("mask_path")
|
583
|
-
return filepaths, targets, datum_metadata
|
File without changes
|
/dataeval-0.86.8.dist-info/licenses/LICENSE.txt → /dataeval-0.87.0.dist-info/licenses/LICENSE
RENAMED
File without changes
|