dataeval 0.86.9__py3-none-any.whl → 0.87.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. dataeval/__init__.py +1 -1
  2. dataeval/_version.py +2 -2
  3. dataeval/config.py +4 -19
  4. dataeval/data/_metadata.py +56 -27
  5. dataeval/data/_split.py +1 -1
  6. dataeval/data/selections/_classbalance.py +4 -3
  7. dataeval/data/selections/_classfilter.py +5 -5
  8. dataeval/data/selections/_indices.py +2 -2
  9. dataeval/data/selections/_prioritize.py +249 -29
  10. dataeval/data/selections/_reverse.py +1 -1
  11. dataeval/data/selections/_shuffle.py +2 -2
  12. dataeval/detectors/ood/__init__.py +2 -1
  13. dataeval/detectors/ood/base.py +38 -1
  14. dataeval/detectors/ood/knn.py +95 -0
  15. dataeval/metrics/bias/_balance.py +28 -21
  16. dataeval/metrics/bias/_diversity.py +4 -4
  17. dataeval/metrics/bias/_parity.py +2 -2
  18. dataeval/metrics/stats/_hashstats.py +19 -2
  19. dataeval/outputs/_workflows.py +20 -7
  20. dataeval/typing.py +14 -2
  21. dataeval/utils/__init__.py +2 -2
  22. dataeval/utils/_bin.py +7 -6
  23. dataeval/utils/data/__init__.py +2 -0
  24. dataeval/utils/data/_dataset.py +13 -6
  25. dataeval/utils/data/_validate.py +169 -0
  26. {dataeval-0.86.9.dist-info → dataeval-0.87.0.dist-info}/METADATA +5 -17
  27. {dataeval-0.86.9.dist-info → dataeval-0.87.0.dist-info}/RECORD +29 -39
  28. dataeval/utils/datasets/__init__.py +0 -21
  29. dataeval/utils/datasets/_antiuav.py +0 -189
  30. dataeval/utils/datasets/_base.py +0 -266
  31. dataeval/utils/datasets/_cifar10.py +0 -201
  32. dataeval/utils/datasets/_fileio.py +0 -142
  33. dataeval/utils/datasets/_milco.py +0 -197
  34. dataeval/utils/datasets/_mixin.py +0 -54
  35. dataeval/utils/datasets/_mnist.py +0 -202
  36. dataeval/utils/datasets/_seadrone.py +0 -512
  37. dataeval/utils/datasets/_ships.py +0 -144
  38. dataeval/utils/datasets/_types.py +0 -48
  39. dataeval/utils/datasets/_voc.py +0 -583
  40. {dataeval-0.86.9.dist-info → dataeval-0.87.0.dist-info}/WHEEL +0 -0
  41. /dataeval-0.86.9.dist-info/licenses/LICENSE.txt → /dataeval-0.87.0.dist-info/licenses/LICENSE +0 -0
@@ -1,583 +0,0 @@
1
- from __future__ import annotations
2
-
3
- __all__ = []
4
-
5
- import os
6
- import shutil
7
- from pathlib import Path
8
- from typing import TYPE_CHECKING, Any, Literal, Sequence, TypeVar
9
-
10
- import torch
11
- from defusedxml.ElementTree import parse
12
- from numpy.typing import NDArray
13
-
14
- from dataeval.utils.datasets._base import (
15
- BaseDataset,
16
- BaseODDataset,
17
- BaseSegDataset,
18
- DataLocation,
19
- _ensure_exists,
20
- _TArray,
21
- _TTarget,
22
- )
23
- from dataeval.utils.datasets._mixin import BaseDatasetNumpyMixin, BaseDatasetTorchMixin
24
- from dataeval.utils.datasets._types import ObjectDetectionTarget, SegmentationTarget
25
-
26
- if TYPE_CHECKING:
27
- from dataeval.typing import Transform
28
-
29
- VOCClassStringMap = Literal[
30
- "aeroplane",
31
- "bicycle",
32
- "bird",
33
- "boat",
34
- "bottle",
35
- "bus",
36
- "car",
37
- "cat",
38
- "chair",
39
- "cow",
40
- "diningtable",
41
- "dog",
42
- "horse",
43
- "motorbike",
44
- "person",
45
- "pottedplant",
46
- "sheep",
47
- "sofa",
48
- "train",
49
- "tvmonitor",
50
- ]
51
- TVOCClassMap = TypeVar("TVOCClassMap", VOCClassStringMap, int, list[VOCClassStringMap], list[int])
52
-
53
-
54
- class BaseVOCDataset(BaseDataset[_TArray, _TTarget, list[str], str]):
55
- _resources = [
56
- DataLocation(
57
- url="https://data.brainchip.com/dataset-mirror/voc/VOCtrainval_11-May-2012.tar",
58
- filename="VOCtrainval_11-May-2012.tar",
59
- md5=False,
60
- checksum="e14f763270cf193d0b5f74b169f44157a4b0c6efa708f4dd0ff78ee691763bcb",
61
- ),
62
- DataLocation(
63
- url="http://host.robots.ox.ac.uk/pascal/VOC/voc2011/VOCtrainval_25-May-2011.tar",
64
- filename="VOCtrainval_25-May-2011.tar",
65
- md5=False,
66
- checksum="0a7f5f5d154f7290ec65ec3f78b72ef72c6d93ff6d79acd40dc222a9ee5248ba",
67
- ),
68
- DataLocation(
69
- url="http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar",
70
- filename="VOCtrainval_03-May-2010.tar",
71
- md5=False,
72
- checksum="1af4189cbe44323ab212bff7afbc7d0f55a267cc191eb3aac911037887e5c7d4",
73
- ),
74
- DataLocation(
75
- url="http://host.robots.ox.ac.uk/pascal/VOC/voc2009/VOCtrainval_11-May-2009.tar",
76
- filename="VOCtrainval_11-May-2009.tar",
77
- md5=False,
78
- checksum="11cbe1741fb5bdadbbca3c08e9ec62cd95c14884845527d50847bc2cf57e7fd6",
79
- ),
80
- DataLocation(
81
- url="http://host.robots.ox.ac.uk/pascal/VOC/voc2008/VOCtrainval_14-Jul-2008.tar",
82
- filename="VOCtrainval_14-Jul-2008.tar",
83
- md5=False,
84
- checksum="7f0ca53c1b5a838fbe946965fc106c6e86832183240af5c88e3f6c306318d42e",
85
- ),
86
- DataLocation(
87
- url="https://data.brainchip.com/dataset-mirror/voc/VOCtrainval_06-Nov-2007.tar",
88
- filename="VOCtrainval_06-Nov-2007.tar",
89
- md5=False,
90
- checksum="7d8cd951101b0957ddfd7a530bdc8a94f06121cfc1e511bb5937e973020c7508",
91
- ),
92
- DataLocation(
93
- url="https://data.brainchip.com/dataset-mirror/voc/VOC2012test.tar",
94
- filename="VOC2012test.tar",
95
- md5=False,
96
- checksum="f08582b1935816c5eab3bbb1eb6d06201a789eaa173cdf1cf400c26f0cac2fb3",
97
- ),
98
- DataLocation(
99
- url="https://data.brainchip.com/dataset-mirror/voc/VOCtest_06-Nov-2007.tar",
100
- filename="VOCtest_06-Nov-2007.tar",
101
- md5=False,
102
- checksum="6836888e2e01dca84577a849d339fa4f73e1e4f135d312430c4856b5609b4892",
103
- ),
104
- ]
105
- _base2007: tuple[int, int] = (5, 7)
106
- _base2012: tuple[int, int] = (0, 6)
107
-
108
- index2label: dict[int, str] = {
109
- 0: "aeroplane",
110
- 1: "bicycle",
111
- 2: "bird",
112
- 3: "boat",
113
- 4: "bottle",
114
- 5: "bus",
115
- 6: "car",
116
- 7: "cat",
117
- 8: "chair",
118
- 9: "cow",
119
- 10: "diningtable",
120
- 11: "dog",
121
- 12: "horse",
122
- 13: "motorbike",
123
- 14: "person",
124
- 15: "pottedplant",
125
- 16: "sheep",
126
- 17: "sofa",
127
- 18: "train",
128
- 19: "tvmonitor",
129
- }
130
-
131
- def __init__(
132
- self,
133
- root: str | Path,
134
- image_set: Literal["train", "val", "test", "base"] = "train",
135
- year: Literal["2007", "2008", "2009", "2010", "2011", "2012"] = "2012",
136
- transforms: Transform[_TArray] | Sequence[Transform[_TArray]] | None = None,
137
- download: bool = False,
138
- verbose: bool = False,
139
- ) -> None:
140
- self.year = year
141
- self._resource_index = self._get_year_image_set_index(year, image_set)
142
- super().__init__(
143
- root,
144
- image_set,
145
- transforms,
146
- download,
147
- verbose,
148
- )
149
-
150
- def _get_dataset_dir(self) -> Path:
151
- """Overrides the base function to determine correct dataset directory for VOC class"""
152
- return self._find_main_VOC_dir(self._root)
153
-
154
- def _find_main_VOC_dir(self, base: Path) -> Path:
155
- """
156
- Determine the correct dataset directory for VOC detection and segmentation classes.
157
- Handles various directory structure possibilities and validates existence.
158
- """
159
-
160
- # VOCdataset directory possibilities
161
- dataset_dir = base if base.stem.lower() == "vocdataset" else base / "vocdataset"
162
-
163
- # Define possible directory structures based on patterns
164
- # 1. Root is already the specific VOC year directory
165
- # 2. Root is the VOCdevkit directory
166
- # 3. Standard structure
167
- # 4. Special case for year 2011
168
- # 5. Within VOCdataset directory
169
- # 6. Special case for year 2011 within VOCdataset
170
- possible_paths = [
171
- base if base.stem == f"VOC{self.year}" else None,
172
- base / f"VOC{self.year}" if base.stem == "VOCdevkit" else None,
173
- base / "VOCdevkit" / f"VOC{self.year}",
174
- base / "TrainVal" / "VOCdevkit" / f"VOC{self.year}" if self.year == "2011" else None,
175
- dataset_dir / "VOCdevkit" / f"VOC{self.year}",
176
- dataset_dir / "TrainVal" / "VOCdevkit" / f"VOC{self.year}" if self.year == "2011" else None,
177
- ]
178
-
179
- # Filter out None values and check each path
180
- for path in filter(None, possible_paths):
181
- if path.exists():
182
- return path
183
-
184
- # If no existing path is found, create and return the dataset directory
185
- if not dataset_dir.exists():
186
- dataset_dir.mkdir(parents=True, exist_ok=True)
187
-
188
- return dataset_dir
189
-
190
- def _get_year_image_set_index(self, year: str, image_set: str) -> int:
191
- """Function to ensure that the correct resource file is accessed"""
192
- if year == "2007" and image_set == "test":
193
- return -1
194
- if year == "2012" and image_set == "test":
195
- return -2
196
- if year != "2007" and image_set == "test":
197
- raise ValueError(
198
- f"The only test sets available are for the years 2007 and 2012, not {year}. "
199
- "Either select the year 2007 or 2012, or use a different image_set."
200
- )
201
- return 2012 - int(year)
202
-
203
- def _update_path(self) -> None:
204
- """Update the path to the new folder structure"""
205
- if self.year == "2011" and self.path.stem.lower() == "vocdataset":
206
- self.path: Path = self.path / "TrainVal" / "VOCdevkit" / f"VOC{self.year}"
207
- elif self.path.stem.lower() == "vocdataset":
208
- self.path: Path = self.path / "VOCdevkit" / f"VOC{self.year}"
209
-
210
- def _load_data_exception(self) -> tuple[list[str], list[str], dict[str, Any]]:
211
- """Adjust how the directory is created for the 2007 and 2012 test set"""
212
- filepaths: list[str] = []
213
- targets: list[str] = []
214
- datum_metadata: dict[str, list[Any]] = {}
215
- tmp_path: Path = self._root / "tmp_directory_for_download"
216
- tmp_path.mkdir(exist_ok=True)
217
- resource_idx = self._base2007 if self.year == "2007" else self._base2012
218
-
219
- # Determine if text files exist
220
- train_file = self.path / "ImageSets" / "Main" / "trainval.txt"
221
- test_file = self.path / "ImageSets" / "Main" / "test.txt"
222
- train_exists = train_file.exists()
223
- test_exists = test_file.exists()
224
-
225
- if self.image_set == "base":
226
- if not train_exists and not test_exists:
227
- _ensure_exists(*self._resources[resource_idx[0]], self.path, self._root, self._download, self._verbose)
228
- self._update_path()
229
- _ensure_exists(*self._resources[resource_idx[1]], tmp_path, self._root, self._download, self._verbose)
230
- self._merge_voc_directories(tmp_path)
231
-
232
- elif train_exists and not test_exists:
233
- _ensure_exists(*self._resources[resource_idx[1]], tmp_path, self._root, self._download, self._verbose)
234
- self._merge_voc_directories(tmp_path)
235
-
236
- elif not train_exists and test_exists:
237
- _ensure_exists(*self._resources[resource_idx[0]], tmp_path, self._root, self._download, self._verbose)
238
- self._merge_voc_directories(tmp_path)
239
-
240
- # Code to determine what is needed in each category
241
- metadata_list: list[dict[str, Any]] = []
242
-
243
- for img_set in ["test", "base"]:
244
- self.image_set = img_set
245
- resource_filepaths, resource_targets, resource_metadata = self._load_data_inner()
246
- filepaths.extend(resource_filepaths)
247
- targets.extend(resource_targets)
248
- metadata_list.append(resource_metadata)
249
-
250
- # Combine metadata from all resources
251
- for data_dict in metadata_list:
252
- for key, val in data_dict.items():
253
- str_key = str(key) # Ensure key is string
254
- if str_key not in datum_metadata:
255
- datum_metadata[str_key] = []
256
- datum_metadata[str_key].extend(val)
257
-
258
- else:
259
- self._resource = self._resources[resource_idx[1]]
260
-
261
- if train_exists and not test_exists:
262
- _ensure_exists(*self._resource, tmp_path, self._root, self._download, self._verbose)
263
- self._merge_voc_directories(tmp_path)
264
-
265
- resource_filepaths, resource_targets, resource_metadata = self._load_try_and_update()
266
- filepaths.extend(resource_filepaths)
267
- targets.extend(resource_targets)
268
- datum_metadata.update(resource_metadata)
269
-
270
- return filepaths, targets, datum_metadata
271
-
272
- def _merge_voc_directories(self, source_dir: Path) -> None:
273
- """Merge two VOC directories, handling file conflicts intelligently."""
274
- base: Path = self._find_main_VOC_dir(source_dir)
275
- # Create all subdirectories in target if they don't exist
276
- for dirpath, dirnames, filenames in os.walk(base):
277
- # Convert to Path objects
278
- source_path = Path(dirpath)
279
-
280
- # Get the relative path from source_dir
281
- rel_path = source_path.relative_to(base)
282
-
283
- # Create the corresponding target path
284
- target_path = self.path / rel_path
285
- target_path.mkdir(parents=True, exist_ok=True)
286
-
287
- # Copy all files
288
- for filename in filenames:
289
- source_file = source_path / filename
290
- target_file = target_path / filename
291
-
292
- # File doesn't exist in target, just move it
293
- if not target_file.exists():
294
- shutil.move(source_file, target_file)
295
- else:
296
- # File exists in both assume they're identical and skip
297
- pass
298
-
299
- shutil.rmtree(source_dir)
300
-
301
- def _load_try_and_update(self) -> tuple[list[str], list[str], dict[str, Any]]:
302
- """Test if data needs to be downloaded and update path if it does"""
303
- if self._verbose:
304
- print(f"Determining if {self._resource.filename} needs to be downloaded.")
305
-
306
- try:
307
- result = self._load_data_inner()
308
- if self._verbose:
309
- print("No download needed, loaded data successfully.")
310
- except FileNotFoundError:
311
- _ensure_exists(*self._resource, self.path, self._root, self._download, self._verbose)
312
- self._update_path()
313
- result = self._load_data_inner()
314
- return result
315
-
316
- def _load_data(self) -> tuple[list[str], list[str], dict[str, Any]]:
317
- """
318
- Function to determine if data can be accessed or if it needs to be downloaded and/or extracted.
319
- """
320
- # Exception - test sets
321
- year_set_bool = (self.image_set == "test" or self.image_set == "base") and (
322
- self.year == "2012" or self.year == "2007"
323
- )
324
- if year_set_bool:
325
- return self._load_data_exception()
326
-
327
- return self._load_try_and_update()
328
-
329
- def _get_image_sets(self) -> dict[str, list[str]]:
330
- """Function to create the list of images in each image set"""
331
- image_folder = self.path / "JPEGImages"
332
- image_set_list = ["train", "val", "trainval"] if self.image_set != "test" else ["test"]
333
- image_sets = {}
334
- for image_set in image_set_list:
335
- text_file = self.path / "ImageSets" / "Main" / (image_set + ".txt")
336
- selected_images: list[str] = []
337
- with open(text_file) as f:
338
- for line in f.readlines():
339
- out = line.strip()
340
- selected_images.append(str(image_folder / (out + ".jpg")))
341
-
342
- name = "base" if image_set == "trainval" else image_set
343
- image_sets[name] = selected_images
344
- return image_sets
345
-
346
- def _load_data_inner(self) -> tuple[list[str], list[str], dict[str, Any]]:
347
- """Function to load in the file paths for the data, annotations and segmentation masks"""
348
- file_meta = {"year": [], "image_id": [], "mask_path": []}
349
- ann_folder = self.path / "Annotations"
350
- seg_folder = self.path / "SegmentationClass"
351
-
352
- # Load in the image sets
353
- image_sets = self._get_image_sets()
354
-
355
- # Get the data, annotations and metadata
356
- annotations = []
357
- data = image_sets[self.image_set]
358
- for entry in data:
359
- file_name = Path(entry).name
360
- file_stem = Path(entry).stem
361
- if self.year != "2007":
362
- # Remove file extension and split by "_"
363
- parts = file_stem.split("_")
364
- file_meta["year"].append(parts[0])
365
- file_meta["image_id"].append(parts[1])
366
- else:
367
- file_meta["year"].append(self.year)
368
- file_meta["image_id"].append(file_stem)
369
- file_meta["mask_path"].append(str(seg_folder / file_name))
370
- annotations.append(str(ann_folder / file_stem) + ".xml")
371
-
372
- return data, annotations, file_meta
373
-
374
- def _read_annotations(self, annotation: str) -> tuple[list[list[float]], list[int], dict[str, Any]]:
375
- boxes: list[list[float]] = []
376
- label_str = []
377
- if not Path(annotation).exists():
378
- return boxes, label_str, {}
379
- root = parse(annotation).getroot()
380
- if root is None:
381
- raise ValueError(f"Unable to parse {annotation}")
382
- additional_meta: dict[str, Any] = {
383
- "folder": root.findtext("folder", default=""),
384
- "filename": root.findtext("filename", default=""),
385
- "database": root.findtext("source/database", default=""),
386
- "annotation_source": root.findtext("source/annotation", default=""),
387
- "image_source": root.findtext("source/image", default=""),
388
- "image_width": int(root.findtext("size/width", default="-1")),
389
- "image_height": int(root.findtext("size/height", default="-1")),
390
- "image_depth": int(root.findtext("size/depth", default="-1")),
391
- "segmented": int(root.findtext("segmented", default="-1")),
392
- "pose": [],
393
- "truncated": [],
394
- "difficult": [],
395
- }
396
- for obj in root.findall("object"):
397
- label_str.append(obj.findtext("name", default=""))
398
- additional_meta["pose"].append(obj.findtext("pose", default=""))
399
- additional_meta["truncated"].append(int(obj.findtext("truncated", default="-1")))
400
- additional_meta["difficult"].append(int(obj.findtext("difficult", default="-1")))
401
- boxes.append(
402
- [
403
- float(obj.findtext("bndbox/xmin", default="0")),
404
- float(obj.findtext("bndbox/ymin", default="0")),
405
- float(obj.findtext("bndbox/xmax", default="0")),
406
- float(obj.findtext("bndbox/ymax", default="0")),
407
- ]
408
- )
409
- labels = [self.label2index[label] for label in label_str]
410
- return boxes, labels, additional_meta
411
-
412
-
413
- class VOCDetection(
414
- BaseVOCDataset[NDArray[Any], ObjectDetectionTarget[NDArray[Any]]],
415
- BaseODDataset[NDArray[Any], list[str], str],
416
- BaseDatasetNumpyMixin,
417
- ):
418
- """
419
- `Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Detection Dataset.
420
-
421
- Parameters
422
- ----------
423
- root : str or pathlib.Path
424
- Because of the structure of the PASCAL VOC datasets, the root needs to be one of 4 folders.
425
- 1) Directory containing the year of the **already downloaded** dataset (i.e. .../VOCdevkit/VOC2012 <-)
426
- 2) Directory to the VOCdevkit folder of the **already downloaded** dataset (i.e. .../VOCdevkit <- /VOC2012)
427
- 3) Directory to the folder one level up from the VOCdevkit folder,
428
- data **may** or **may not** be already downloaded (i.e. ... <- /VOCdevkit/VOC2012)
429
- 4) Directory to where you would like the dataset to be downloaded
430
- image_set : "train", "val", "test", or "base", default "train"
431
- If "test", then dataset year must be "2007" or "2012". Note that the 2012 test set does not contain annotations.
432
- If "base", then the combined dataset of "train" and "val" is returned.
433
- year : "2007", "2008", "2009", "2010", "2011" or "2012", default "2012"
434
- The dataset year.
435
- transforms : Transform, Sequence[Transform] or None, default None
436
- Transform(s) to apply to the data.
437
- download : bool, default False
438
- If True, downloads the dataset from the internet and puts it in root directory.
439
- Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
440
- verbose : bool, default False
441
- If True, outputs print statements.
442
-
443
- Attributes
444
- ----------
445
- path : pathlib.Path
446
- Location of the folder containing the data.
447
- year : "2007", "2008", "2009", "2010", "2011" or "2012"
448
- The selected dataset year.
449
- image_set : "train", "val", "test" or "base"
450
- The selected image set from the dataset.
451
- index2label : dict[int, str]
452
- Dictionary which translates from class integers to the associated class strings.
453
- label2index : dict[str, int]
454
- Dictionary which translates from class strings to the associated class integers.
455
- metadata : DatasetMetadata
456
- Typed dictionary containing dataset metadata, such as `id` which returns the dataset class name.
457
- transforms : Sequence[Transform]
458
- The transforms to be applied to the data.
459
- size : int
460
- The size of the dataset.
461
-
462
- Note
463
- ----
464
- Data License: `Flickr Terms of Use <http://www.flickr.com/terms.gne?legacy=1>`_
465
- """
466
-
467
-
468
- class VOCDetectionTorch(
469
- BaseVOCDataset[torch.Tensor, ObjectDetectionTarget[torch.Tensor]],
470
- BaseODDataset[torch.Tensor, list[str], str],
471
- BaseDatasetTorchMixin,
472
- ):
473
- """
474
- `Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Detection Dataset as PyTorch tensors.
475
-
476
- Parameters
477
- ----------
478
- root : str or pathlib.Path
479
- Because of the structure of the PASCAL VOC datasets, the root needs to be one of 4 folders.
480
- 1) Directory containing the year of the **already downloaded** dataset (i.e. .../VOCdevkit/VOC2012 <-)
481
- 2) Directory to the VOCdevkit folder of the **already downloaded** dataset (i.e. .../VOCdevkit <- /VOC2012)
482
- 3) Directory to the folder one level up from the VOCdevkit folder,
483
- data **may** or **may not** be already downloaded (i.e. ... <- /VOCdevkit/VOC2012)
484
- 4) Directory to where you would like the dataset to be downloaded
485
- image_set : "train", "val", "test", or "base", default "train"
486
- If "test", then dataset year must be "2007" or "2012". Note that the 2012 test set does not contain annotations.
487
- If "base", then the combined dataset of "train" and "val" is returned.
488
- year : "2007", "2008", "2009", "2010", "2011" or "2012", default "2012"
489
- The dataset year.
490
- transforms : Transform, Sequence[Transform] or None, default None
491
- Transform(s) to apply to the data.
492
- download : bool, default False
493
- If True, downloads the dataset from the internet and puts it in root directory.
494
- Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
495
- verbose : bool, default False
496
- If True, outputs print statements.
497
-
498
- Attributes
499
- ----------
500
- path : pathlib.Path
501
- Location of the folder containing the data.
502
- year : "2007", "2008", "2009", "2010", "2011" or "2012"
503
- The selected dataset year.
504
- image_set : "train", "val", "test" or "base"
505
- The selected image set from the dataset.
506
- index2label : dict[int, str]
507
- Dictionary which translates from class integers to the associated class strings.
508
- label2index : dict[str, int]
509
- Dictionary which translates from class strings to the associated class integers.
510
- metadata : DatasetMetadata
511
- Typed dictionary containing dataset metadata, such as `id` which returns the dataset class name.
512
- transforms : Sequence[Transform]
513
- The transforms to be applied to the data.
514
- size : int
515
- The size of the dataset.
516
-
517
- Note
518
- ----
519
- Data License: `Flickr Terms of Use <http://www.flickr.com/terms.gne?legacy=1>`_
520
- """
521
-
522
-
523
- class VOCSegmentation(
524
- BaseVOCDataset[NDArray[Any], SegmentationTarget[NDArray[Any]]],
525
- BaseSegDataset[NDArray[Any]],
526
- BaseDatasetNumpyMixin,
527
- ):
528
- """
529
- `Pascal VOC <http://host.robots.ox.ac.uk/pascal/VOC/>`_ Segmentation Dataset.
530
-
531
- Parameters
532
- ----------
533
- root : str or pathlib.Path
534
- Root directory of dataset where the ``vocdataset`` folder exists.
535
- image_set : "train", "val", "test", or "base", default "train"
536
- If "test", then dataset year must be "2007".
537
- If "base", then the combined dataset of "train" and "val" is returned.
538
- year : "2007", "2008", "2009", "2010", "2011" or "2012", default "2012"
539
- The dataset year.
540
- transforms : Transform, Sequence[Transform] or None, default None
541
- Transform(s) to apply to the data.
542
- download : bool, default False
543
- If True, downloads the dataset from the internet and puts it in root directory.
544
- Class checks to see if data is already downloaded to ensure it does not create a duplicate download.
545
- verbose : bool, default False
546
- If True, outputs print statements.
547
-
548
- Attributes
549
- ----------
550
- path : pathlib.Path
551
- Location of the folder containing the data.
552
- year : "2007", "2008", "2009", "2010", "2011" or "2012"
553
- The selected dataset year.
554
- image_set : "train", "val", "test" or "base"
555
- The selected image set from the dataset.
556
- index2label : dict[int, str]
557
- Dictionary which translates from class integers to the associated class strings.
558
- label2index : dict[str, int]
559
- Dictionary which translates from class strings to the associated class integers.
560
- metadata : DatasetMetadata
561
- Typed dictionary containing dataset metadata, such as `id` which returns the dataset class name.
562
- transforms : Sequence[Transform]
563
- The transforms to be applied to the data.
564
- size : int
565
- The size of the dataset.
566
-
567
- Note
568
- ----
569
- Data License: `Flickr Terms of Use <http://www.flickr.com/terms.gne?legacy=1>`_
570
- """
571
-
572
- def _load_data(self) -> tuple[list[str], list[str], dict[str, list[Any]]]:
573
- """Overload base load data to split out masks for segmentation."""
574
- # Exception - test sets
575
- year_set_bool = (self.image_set == "test" or self.image_set == "base") and (
576
- self.year == "2012" or self.year == "2007"
577
- )
578
- if year_set_bool:
579
- filepaths, targets, datum_metadata = self._load_data_exception()
580
- else:
581
- filepaths, targets, datum_metadata = self._load_try_and_update()
582
- self._masks = datum_metadata.pop("mask_path")
583
- return filepaths, targets, datum_metadata