hafnia 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hafnia/dataset/{dataset_upload_helper.py → dataset_details_uploader.py} +114 -191
- hafnia/dataset/dataset_names.py +26 -0
- hafnia/dataset/format_conversions/format_coco.py +490 -0
- hafnia/dataset/format_conversions/format_helpers.py +33 -0
- hafnia/dataset/format_conversions/format_image_classification_folder.py +95 -14
- hafnia/dataset/format_conversions/format_yolo.py +115 -25
- hafnia/dataset/format_conversions/torchvision_datasets.py +10 -8
- hafnia/dataset/hafnia_dataset.py +20 -466
- hafnia/dataset/hafnia_dataset_types.py +477 -0
- hafnia/dataset/license_types.py +4 -4
- hafnia/dataset/operations/dataset_stats.py +3 -3
- hafnia/dataset/operations/dataset_transformations.py +14 -17
- hafnia/dataset/operations/table_transformations.py +20 -13
- hafnia/dataset/primitives/bbox.py +6 -2
- hafnia/dataset/primitives/bitmask.py +21 -46
- hafnia/dataset/primitives/classification.py +1 -1
- hafnia/dataset/primitives/polygon.py +43 -2
- hafnia/dataset/primitives/primitive.py +1 -1
- hafnia/dataset/primitives/segmentation.py +1 -1
- hafnia/experiment/hafnia_logger.py +13 -4
- hafnia/platform/datasets.py +2 -3
- hafnia/torch_helpers.py +48 -4
- hafnia/utils.py +34 -0
- hafnia/visualizations/image_visualizations.py +3 -1
- {hafnia-0.4.2.dist-info → hafnia-0.4.3.dist-info}/METADATA +2 -2
- {hafnia-0.4.2.dist-info → hafnia-0.4.3.dist-info}/RECORD +29 -26
- {hafnia-0.4.2.dist-info → hafnia-0.4.3.dist-info}/WHEEL +0 -0
- {hafnia-0.4.2.dist-info → hafnia-0.4.3.dist-info}/entry_points.txt +0 -0
- {hafnia-0.4.2.dist-info → hafnia-0.4.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
import shutil
|
|
2
|
+
from dataclasses import dataclass
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from typing import TYPE_CHECKING, List, Optional
|
|
4
5
|
|
|
5
6
|
from PIL import Image
|
|
6
|
-
from rich.progress import track
|
|
7
7
|
|
|
8
8
|
from hafnia.dataset import primitives
|
|
9
|
-
from hafnia.dataset.dataset_names import SplitName
|
|
9
|
+
from hafnia.dataset.dataset_names import SampleField, SplitName
|
|
10
|
+
from hafnia.dataset.format_conversions import format_helpers
|
|
11
|
+
from hafnia.dataset.hafnia_dataset_types import DatasetInfo, Sample, TaskInfo
|
|
12
|
+
from hafnia.utils import progress_bar
|
|
10
13
|
|
|
11
|
-
if TYPE_CHECKING:
|
|
14
|
+
if TYPE_CHECKING: # Using 'TYPE_CHECKING' to avoid circular imports during type checking
|
|
12
15
|
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
13
16
|
|
|
14
17
|
FILENAME_YOLO_CLASS_NAMES = "obj.names"
|
|
@@ -20,23 +23,81 @@ def get_image_size(path: Path) -> tuple[int, int]:
|
|
|
20
23
|
return img.size # (width, height)
|
|
21
24
|
|
|
22
25
|
|
|
26
|
+
@dataclass
|
|
27
|
+
class YoloSplitPaths:
|
|
28
|
+
split: str
|
|
29
|
+
path_root: Path
|
|
30
|
+
path_images_txt: Path
|
|
31
|
+
path_class_names: Path
|
|
32
|
+
|
|
33
|
+
def check_paths(self):
|
|
34
|
+
if not self.path_root.exists():
|
|
35
|
+
raise FileNotFoundError(f"YOLO dataset root path not found at '{self.path_root.resolve()}'")
|
|
36
|
+
if not self.path_images_txt.exists():
|
|
37
|
+
raise FileNotFoundError(f"File with images not found at '{self.path_images_txt.resolve()}'")
|
|
38
|
+
if not self.path_class_names.exists():
|
|
39
|
+
raise FileNotFoundError(f"File with class names not found at '{self.path_class_names.resolve()}'")
|
|
40
|
+
|
|
41
|
+
|
|
23
42
|
def from_yolo_format(
|
|
24
|
-
|
|
25
|
-
split_name: str = SplitName.UNDEFINED,
|
|
43
|
+
path_dataset: Path,
|
|
26
44
|
dataset_name: str = "yolo-dataset",
|
|
27
45
|
filename_class_names: str = FILENAME_YOLO_CLASS_NAMES,
|
|
28
46
|
filename_images_txt: str = FILENAME_YOLO_IMAGES_TXT,
|
|
47
|
+
) -> "HafniaDataset":
|
|
48
|
+
per_split_paths: List[YoloSplitPaths] = get_split_definitions_for_coco_dataset_formats(
|
|
49
|
+
path_dataset=path_dataset,
|
|
50
|
+
filename_class_names=filename_class_names,
|
|
51
|
+
filename_images_txt=filename_images_txt,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
hafnia_dataset = from_yolo_format_by_split_paths(splits=per_split_paths, dataset_name=dataset_name)
|
|
55
|
+
return hafnia_dataset
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def from_yolo_format_by_split_paths(splits: List[YoloSplitPaths], dataset_name: str) -> "HafniaDataset":
|
|
59
|
+
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
60
|
+
|
|
61
|
+
dataset_splits = []
|
|
62
|
+
for split_paths in splits:
|
|
63
|
+
dataset_split = dataset_split_from_yolo_format(split_paths=split_paths, dataset_name=dataset_name)
|
|
64
|
+
dataset_splits.append(dataset_split)
|
|
65
|
+
|
|
66
|
+
hafnia_dataset = HafniaDataset.from_merger(dataset_splits)
|
|
67
|
+
return hafnia_dataset
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_split_definitions_for_coco_dataset_formats(
|
|
71
|
+
path_dataset: Path,
|
|
72
|
+
filename_class_names: str = FILENAME_YOLO_CLASS_NAMES,
|
|
73
|
+
filename_images_txt: str = FILENAME_YOLO_IMAGES_TXT,
|
|
74
|
+
) -> List[YoloSplitPaths]:
|
|
75
|
+
splits = []
|
|
76
|
+
|
|
77
|
+
for split_def in format_helpers.get_splits_from_folder(path_dataset):
|
|
78
|
+
split_path = YoloSplitPaths(
|
|
79
|
+
split=split_def.name,
|
|
80
|
+
path_root=split_def.path,
|
|
81
|
+
path_images_txt=split_def.path / filename_images_txt,
|
|
82
|
+
path_class_names=path_dataset / filename_class_names,
|
|
83
|
+
)
|
|
84
|
+
splits.append(split_path)
|
|
85
|
+
|
|
86
|
+
return splits
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def dataset_split_from_yolo_format(
|
|
90
|
+
split_paths: YoloSplitPaths,
|
|
91
|
+
dataset_name: str,
|
|
29
92
|
) -> "HafniaDataset":
|
|
30
93
|
"""
|
|
31
94
|
Imports a YOLO (Darknet) formatted dataset as a HafniaDataset.
|
|
32
95
|
"""
|
|
33
|
-
from hafnia.dataset.hafnia_dataset import
|
|
34
|
-
|
|
35
|
-
path_class_names = path_yolo_dataset / filename_class_names
|
|
36
|
-
|
|
37
|
-
if split_name not in SplitName.all_split_names():
|
|
38
|
-
raise ValueError(f"Invalid split name: {split_name}. Must be one of {SplitName.all_split_names()}")
|
|
96
|
+
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
39
97
|
|
|
98
|
+
path_class_names = split_paths.path_class_names
|
|
99
|
+
if split_paths.split not in SplitName.all_split_names():
|
|
100
|
+
raise ValueError(f"Invalid split name: {split_paths.split}. Must be one of {SplitName.all_split_names()}")
|
|
40
101
|
if not path_class_names.exists():
|
|
41
102
|
raise FileNotFoundError(f"File with class names not found at '{path_class_names.resolve()}'.")
|
|
42
103
|
|
|
@@ -49,8 +110,7 @@ def from_yolo_format(
|
|
|
49
110
|
if len(class_names) == 0:
|
|
50
111
|
raise ValueError(f"File with class names not found at '{path_class_names.resolve()}' has no class names")
|
|
51
112
|
|
|
52
|
-
path_images_txt =
|
|
53
|
-
|
|
113
|
+
path_images_txt = split_paths.path_images_txt
|
|
54
114
|
if not path_images_txt.exists():
|
|
55
115
|
raise FileNotFoundError(f"File with images not found at '{path_images_txt.resolve()}'")
|
|
56
116
|
|
|
@@ -61,8 +121,8 @@ def from_yolo_format(
|
|
|
61
121
|
image_paths_raw = [line.strip() for line in images_txt_text.splitlines()]
|
|
62
122
|
|
|
63
123
|
samples: List[Sample] = []
|
|
64
|
-
for image_path_raw in
|
|
65
|
-
path_image =
|
|
124
|
+
for image_path_raw in progress_bar(image_paths_raw, description=f"Import YOLO '{split_paths.split}' split"):
|
|
125
|
+
path_image = split_paths.path_root / image_path_raw
|
|
66
126
|
if not path_image.exists():
|
|
67
127
|
raise FileNotFoundError(f"File with image not found at '{path_image.resolve()}'")
|
|
68
128
|
width, height = get_image_size(path_image)
|
|
@@ -98,7 +158,7 @@ def from_yolo_format(
|
|
|
98
158
|
file_path=path_image.absolute().as_posix(),
|
|
99
159
|
height=height,
|
|
100
160
|
width=width,
|
|
101
|
-
split=
|
|
161
|
+
split=split_paths.split,
|
|
102
162
|
bboxes=boxes,
|
|
103
163
|
)
|
|
104
164
|
samples.append(sample)
|
|
@@ -111,11 +171,41 @@ def from_yolo_format(
|
|
|
111
171
|
|
|
112
172
|
def to_yolo_format(
|
|
113
173
|
dataset: "HafniaDataset",
|
|
114
|
-
|
|
174
|
+
path_output: Path,
|
|
115
175
|
task_name: Optional[str] = None,
|
|
176
|
+
filename_images_txt: str = FILENAME_YOLO_IMAGES_TXT,
|
|
177
|
+
filename_class_names: str = FILENAME_YOLO_CLASS_NAMES,
|
|
178
|
+
) -> List[YoloSplitPaths]:
|
|
179
|
+
"""Exports a HafniaDataset as YOLO (Darknet) format."""
|
|
180
|
+
|
|
181
|
+
split_names = dataset.samples[SampleField.SPLIT].unique().to_list()
|
|
182
|
+
|
|
183
|
+
per_split_paths: List[YoloSplitPaths] = []
|
|
184
|
+
for split_name in split_names:
|
|
185
|
+
dataset_split = dataset.create_split_dataset(split_name)
|
|
186
|
+
|
|
187
|
+
yolo_split_paths = YoloSplitPaths(
|
|
188
|
+
split=split_name,
|
|
189
|
+
path_root=path_output / split_name,
|
|
190
|
+
path_images_txt=path_output / split_name / filename_images_txt,
|
|
191
|
+
path_class_names=path_output / filename_class_names,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
to_yolo_split_format(
|
|
195
|
+
dataset=dataset_split,
|
|
196
|
+
split_paths=yolo_split_paths,
|
|
197
|
+
task_name=task_name,
|
|
198
|
+
)
|
|
199
|
+
per_split_paths.append(yolo_split_paths)
|
|
200
|
+
return per_split_paths
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def to_yolo_split_format(
|
|
204
|
+
dataset: "HafniaDataset",
|
|
205
|
+
split_paths: YoloSplitPaths,
|
|
206
|
+
task_name: Optional[str],
|
|
116
207
|
):
|
|
117
208
|
"""Exports a HafniaDataset as YOLO (Darknet) format."""
|
|
118
|
-
from hafnia.dataset.hafnia_dataset import Sample
|
|
119
209
|
|
|
120
210
|
bbox_task = dataset.info.get_task_by_task_name_and_primitive(task_name=task_name, primitive=primitives.Bbox)
|
|
121
211
|
|
|
@@ -124,11 +214,11 @@ def to_yolo_format(
|
|
|
124
214
|
raise ValueError(
|
|
125
215
|
f"Hafnia dataset task '{bbox_task.name}' has no class names defined. This is required for YOLO export."
|
|
126
216
|
)
|
|
127
|
-
|
|
128
|
-
path_class_names =
|
|
129
|
-
path_class_names.write_text("\n".join(class_names))
|
|
217
|
+
split_paths.path_root.mkdir(parents=True, exist_ok=True)
|
|
218
|
+
split_paths.path_class_names.parent.mkdir(parents=True, exist_ok=True)
|
|
219
|
+
split_paths.path_class_names.write_text("\n".join(class_names))
|
|
130
220
|
|
|
131
|
-
path_data_folder =
|
|
221
|
+
path_data_folder = split_paths.path_root / "data"
|
|
132
222
|
path_data_folder.mkdir(parents=True, exist_ok=True)
|
|
133
223
|
image_paths: List[str] = []
|
|
134
224
|
for sample_dict in dataset:
|
|
@@ -138,14 +228,14 @@ def to_yolo_format(
|
|
|
138
228
|
path_image_src = Path(sample.file_path)
|
|
139
229
|
path_image_dst = path_data_folder / path_image_src.name
|
|
140
230
|
shutil.copy2(path_image_src, path_image_dst)
|
|
141
|
-
image_paths.append(path_image_dst.relative_to(
|
|
231
|
+
image_paths.append(path_image_dst.relative_to(split_paths.path_root).as_posix())
|
|
142
232
|
path_label = path_image_dst.with_suffix(".txt")
|
|
143
233
|
bboxes = sample.bboxes or []
|
|
144
234
|
bbox_strings = [bbox_to_yolo_format(bbox) for bbox in bboxes]
|
|
145
235
|
path_label.write_text("\n".join(bbox_strings))
|
|
146
236
|
|
|
147
|
-
path_images_txt =
|
|
148
|
-
path_images_txt.write_text("\n".join(image_paths))
|
|
237
|
+
split_paths.path_images_txt.parent.mkdir(parents=True, exist_ok=True)
|
|
238
|
+
split_paths.path_images_txt.write_text("\n".join(image_paths))
|
|
149
239
|
|
|
150
240
|
|
|
151
241
|
def bbox_to_yolo_format(bbox: primitives.Bbox) -> str:
|
|
@@ -6,7 +6,6 @@ import textwrap
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import Callable, Dict, List, Optional, Tuple
|
|
8
8
|
|
|
9
|
-
from rich.progress import track
|
|
10
9
|
from torchvision import datasets as tv_datasets
|
|
11
10
|
from torchvision.datasets import VisionDataset
|
|
12
11
|
from torchvision.datasets.utils import download_and_extract_archive, extract_archive
|
|
@@ -15,9 +14,10 @@ from hafnia import utils
|
|
|
15
14
|
from hafnia.dataset.dataset_helpers import save_pil_image_with_hash_name
|
|
16
15
|
from hafnia.dataset.dataset_names import SplitName
|
|
17
16
|
from hafnia.dataset.format_conversions.format_image_classification_folder import (
|
|
18
|
-
|
|
17
|
+
from_image_classification_split_folder,
|
|
19
18
|
)
|
|
20
|
-
from hafnia.dataset.hafnia_dataset import
|
|
19
|
+
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
20
|
+
from hafnia.dataset.hafnia_dataset_types import DatasetInfo, Sample, TaskInfo
|
|
21
21
|
from hafnia.dataset.primitives import Classification
|
|
22
22
|
|
|
23
23
|
|
|
@@ -72,12 +72,12 @@ def caltech_101_as_hafnia_dataset(
|
|
|
72
72
|
path_image_classification_folder = _download_and_extract_caltech_dataset(
|
|
73
73
|
dataset_name, force_redownload=force_redownload
|
|
74
74
|
)
|
|
75
|
-
hafnia_dataset =
|
|
75
|
+
hafnia_dataset = from_image_classification_split_folder(
|
|
76
76
|
path_image_classification_folder,
|
|
77
77
|
split=SplitName.TRAIN,
|
|
78
78
|
n_samples=n_samples,
|
|
79
|
+
dataset_name=dataset_name,
|
|
79
80
|
)
|
|
80
|
-
hafnia_dataset.info.dataset_name = dataset_name
|
|
81
81
|
hafnia_dataset.info.version = "1.1.0"
|
|
82
82
|
hafnia_dataset.info.reference_bibtex = textwrap.dedent("""\
|
|
83
83
|
@article{FeiFei2004LearningGV,
|
|
@@ -102,12 +102,12 @@ def caltech_256_as_hafnia_dataset(
|
|
|
102
102
|
path_image_classification_folder = _download_and_extract_caltech_dataset(
|
|
103
103
|
dataset_name, force_redownload=force_redownload
|
|
104
104
|
)
|
|
105
|
-
hafnia_dataset =
|
|
105
|
+
hafnia_dataset = from_image_classification_split_folder(
|
|
106
106
|
path_image_classification_folder,
|
|
107
107
|
split=SplitName.TRAIN,
|
|
108
108
|
n_samples=n_samples,
|
|
109
|
+
dataset_name=dataset_name,
|
|
109
110
|
)
|
|
110
|
-
hafnia_dataset.info.dataset_name = dataset_name
|
|
111
111
|
hafnia_dataset.info.version = "1.1.0"
|
|
112
112
|
hafnia_dataset.info.reference_bibtex = textwrap.dedent("""\
|
|
113
113
|
@misc{griffin_2023_5sv1j-ytw97,
|
|
@@ -216,7 +216,9 @@ def torchvision_basic_image_classification_dataset_as_hafnia_dataset(
|
|
|
216
216
|
class_index_to_name = {v: k for k, v in class_name_to_index.items()}
|
|
217
217
|
description = f"Convert '{torchvision_dataset_name}' ({split_name} split) to Hafnia Dataset "
|
|
218
218
|
samples_in_split = []
|
|
219
|
-
for image, class_idx in
|
|
219
|
+
for image, class_idx in utils.progress_bar(
|
|
220
|
+
torchvision_dataset, total=n_samples_per_split, description=description
|
|
221
|
+
):
|
|
220
222
|
(width, height) = image.size
|
|
221
223
|
path_image = save_pil_image_with_hash_name(image, path_hafnia_conversions)
|
|
222
224
|
sample = Sample(
|