hafnia 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. hafnia/dataset/{dataset_upload_helper.py → dataset_details_uploader.py} +115 -192
  2. hafnia/dataset/dataset_names.py +26 -0
  3. hafnia/dataset/dataset_recipe/dataset_recipe.py +3 -3
  4. hafnia/dataset/format_conversions/format_coco.py +490 -0
  5. hafnia/dataset/format_conversions/format_helpers.py +33 -0
  6. hafnia/dataset/format_conversions/format_image_classification_folder.py +95 -14
  7. hafnia/dataset/format_conversions/format_yolo.py +115 -25
  8. hafnia/dataset/format_conversions/torchvision_datasets.py +10 -8
  9. hafnia/dataset/hafnia_dataset.py +20 -466
  10. hafnia/dataset/hafnia_dataset_types.py +477 -0
  11. hafnia/dataset/license_types.py +4 -4
  12. hafnia/dataset/operations/dataset_stats.py +3 -3
  13. hafnia/dataset/operations/dataset_transformations.py +14 -17
  14. hafnia/dataset/operations/table_transformations.py +20 -13
  15. hafnia/dataset/primitives/bbox.py +6 -2
  16. hafnia/dataset/primitives/bitmask.py +21 -46
  17. hafnia/dataset/primitives/classification.py +1 -1
  18. hafnia/dataset/primitives/polygon.py +43 -2
  19. hafnia/dataset/primitives/primitive.py +1 -1
  20. hafnia/dataset/primitives/segmentation.py +1 -1
  21. hafnia/experiment/hafnia_logger.py +13 -4
  22. hafnia/platform/datasets.py +3 -4
  23. hafnia/torch_helpers.py +48 -4
  24. hafnia/utils.py +35 -1
  25. hafnia/visualizations/image_visualizations.py +3 -1
  26. {hafnia-0.4.1.dist-info → hafnia-0.4.3.dist-info}/METADATA +2 -2
  27. hafnia-0.4.3.dist-info/RECORD +60 -0
  28. hafnia-0.4.3.dist-info/entry_points.txt +2 -0
  29. {cli → hafnia_cli}/__main__.py +2 -2
  30. {cli → hafnia_cli}/config.py +2 -2
  31. {cli → hafnia_cli}/dataset_cmds.py +2 -2
  32. {cli → hafnia_cli}/dataset_recipe_cmds.py +1 -1
  33. {cli → hafnia_cli}/experiment_cmds.py +1 -1
  34. {cli → hafnia_cli}/profile_cmds.py +2 -2
  35. {cli → hafnia_cli}/runc_cmds.py +1 -1
  36. {cli → hafnia_cli}/trainer_package_cmds.py +2 -2
  37. hafnia-0.4.1.dist-info/RECORD +0 -57
  38. hafnia-0.4.1.dist-info/entry_points.txt +0 -2
  39. {hafnia-0.4.1.dist-info → hafnia-0.4.3.dist-info}/WHEEL +0 -0
  40. {hafnia-0.4.1.dist-info → hafnia-0.4.3.dist-info}/licenses/LICENSE +0 -0
  41. {cli → hafnia_cli}/__init__.py +0 -0
  42. {cli → hafnia_cli}/consts.py +0 -0
  43. {cli → hafnia_cli}/keychain.py +0 -0
@@ -1,14 +1,17 @@
1
1
  import shutil
2
+ from dataclasses import dataclass
2
3
  from pathlib import Path
3
4
  from typing import TYPE_CHECKING, List, Optional
4
5
 
5
6
  from PIL import Image
6
- from rich.progress import track
7
7
 
8
8
  from hafnia.dataset import primitives
9
- from hafnia.dataset.dataset_names import SplitName
9
+ from hafnia.dataset.dataset_names import SampleField, SplitName
10
+ from hafnia.dataset.format_conversions import format_helpers
11
+ from hafnia.dataset.hafnia_dataset_types import DatasetInfo, Sample, TaskInfo
12
+ from hafnia.utils import progress_bar
10
13
 
11
- if TYPE_CHECKING:
14
+ if TYPE_CHECKING: # Using 'TYPE_CHECKING' to avoid circular imports during type checking
12
15
  from hafnia.dataset.hafnia_dataset import HafniaDataset
13
16
 
14
17
  FILENAME_YOLO_CLASS_NAMES = "obj.names"
@@ -20,23 +23,81 @@ def get_image_size(path: Path) -> tuple[int, int]:
20
23
  return img.size # (width, height)
21
24
 
22
25
 
26
+ @dataclass
27
+ class YoloSplitPaths:
28
+ split: str
29
+ path_root: Path
30
+ path_images_txt: Path
31
+ path_class_names: Path
32
+
33
+ def check_paths(self):
34
+ if not self.path_root.exists():
35
+ raise FileNotFoundError(f"YOLO dataset root path not found at '{self.path_root.resolve()}'")
36
+ if not self.path_images_txt.exists():
37
+ raise FileNotFoundError(f"File with images not found at '{self.path_images_txt.resolve()}'")
38
+ if not self.path_class_names.exists():
39
+ raise FileNotFoundError(f"File with class names not found at '{self.path_class_names.resolve()}'")
40
+
41
+
23
42
  def from_yolo_format(
24
- path_yolo_dataset: Path,
25
- split_name: str = SplitName.UNDEFINED,
43
+ path_dataset: Path,
26
44
  dataset_name: str = "yolo-dataset",
27
45
  filename_class_names: str = FILENAME_YOLO_CLASS_NAMES,
28
46
  filename_images_txt: str = FILENAME_YOLO_IMAGES_TXT,
47
+ ) -> "HafniaDataset":
48
+ per_split_paths: List[YoloSplitPaths] = get_split_definitions_for_coco_dataset_formats(
49
+ path_dataset=path_dataset,
50
+ filename_class_names=filename_class_names,
51
+ filename_images_txt=filename_images_txt,
52
+ )
53
+
54
+ hafnia_dataset = from_yolo_format_by_split_paths(splits=per_split_paths, dataset_name=dataset_name)
55
+ return hafnia_dataset
56
+
57
+
58
+ def from_yolo_format_by_split_paths(splits: List[YoloSplitPaths], dataset_name: str) -> "HafniaDataset":
59
+ from hafnia.dataset.hafnia_dataset import HafniaDataset
60
+
61
+ dataset_splits = []
62
+ for split_paths in splits:
63
+ dataset_split = dataset_split_from_yolo_format(split_paths=split_paths, dataset_name=dataset_name)
64
+ dataset_splits.append(dataset_split)
65
+
66
+ hafnia_dataset = HafniaDataset.from_merger(dataset_splits)
67
+ return hafnia_dataset
68
+
69
+
70
+ def get_split_definitions_for_coco_dataset_formats(
71
+ path_dataset: Path,
72
+ filename_class_names: str = FILENAME_YOLO_CLASS_NAMES,
73
+ filename_images_txt: str = FILENAME_YOLO_IMAGES_TXT,
74
+ ) -> List[YoloSplitPaths]:
75
+ splits = []
76
+
77
+ for split_def in format_helpers.get_splits_from_folder(path_dataset):
78
+ split_path = YoloSplitPaths(
79
+ split=split_def.name,
80
+ path_root=split_def.path,
81
+ path_images_txt=split_def.path / filename_images_txt,
82
+ path_class_names=path_dataset / filename_class_names,
83
+ )
84
+ splits.append(split_path)
85
+
86
+ return splits
87
+
88
+
89
+ def dataset_split_from_yolo_format(
90
+ split_paths: YoloSplitPaths,
91
+ dataset_name: str,
29
92
  ) -> "HafniaDataset":
30
93
  """
31
94
  Imports a YOLO (Darknet) formatted dataset as a HafniaDataset.
32
95
  """
33
- from hafnia.dataset.hafnia_dataset import DatasetInfo, HafniaDataset, Sample, TaskInfo
34
-
35
- path_class_names = path_yolo_dataset / filename_class_names
36
-
37
- if split_name not in SplitName.all_split_names():
38
- raise ValueError(f"Invalid split name: {split_name}. Must be one of {SplitName.all_split_names()}")
96
+ from hafnia.dataset.hafnia_dataset import HafniaDataset
39
97
 
98
+ path_class_names = split_paths.path_class_names
99
+ if split_paths.split not in SplitName.all_split_names():
100
+ raise ValueError(f"Invalid split name: {split_paths.split}. Must be one of {SplitName.all_split_names()}")
40
101
  if not path_class_names.exists():
41
102
  raise FileNotFoundError(f"File with class names not found at '{path_class_names.resolve()}'.")
42
103
 
@@ -49,8 +110,7 @@ def from_yolo_format(
49
110
  if len(class_names) == 0:
50
111
  raise ValueError(f"File with class names not found at '{path_class_names.resolve()}' has no class names")
51
112
 
52
- path_images_txt = path_yolo_dataset / filename_images_txt
53
-
113
+ path_images_txt = split_paths.path_images_txt
54
114
  if not path_images_txt.exists():
55
115
  raise FileNotFoundError(f"File with images not found at '{path_images_txt.resolve()}'")
56
116
 
@@ -61,8 +121,8 @@ def from_yolo_format(
61
121
  image_paths_raw = [line.strip() for line in images_txt_text.splitlines()]
62
122
 
63
123
  samples: List[Sample] = []
64
- for image_path_raw in track(image_paths_raw):
65
- path_image = path_yolo_dataset / image_path_raw
124
+ for image_path_raw in progress_bar(image_paths_raw, description=f"Import YOLO '{split_paths.split}' split"):
125
+ path_image = split_paths.path_root / image_path_raw
66
126
  if not path_image.exists():
67
127
  raise FileNotFoundError(f"File with image not found at '{path_image.resolve()}'")
68
128
  width, height = get_image_size(path_image)
@@ -98,7 +158,7 @@ def from_yolo_format(
98
158
  file_path=path_image.absolute().as_posix(),
99
159
  height=height,
100
160
  width=width,
101
- split=split_name,
161
+ split=split_paths.split,
102
162
  bboxes=boxes,
103
163
  )
104
164
  samples.append(sample)
@@ -111,11 +171,41 @@ def from_yolo_format(
111
171
 
112
172
  def to_yolo_format(
113
173
  dataset: "HafniaDataset",
114
- path_export_yolo_dataset: Path,
174
+ path_output: Path,
115
175
  task_name: Optional[str] = None,
176
+ filename_images_txt: str = FILENAME_YOLO_IMAGES_TXT,
177
+ filename_class_names: str = FILENAME_YOLO_CLASS_NAMES,
178
+ ) -> List[YoloSplitPaths]:
179
+ """Exports a HafniaDataset as YOLO (Darknet) format."""
180
+
181
+ split_names = dataset.samples[SampleField.SPLIT].unique().to_list()
182
+
183
+ per_split_paths: List[YoloSplitPaths] = []
184
+ for split_name in split_names:
185
+ dataset_split = dataset.create_split_dataset(split_name)
186
+
187
+ yolo_split_paths = YoloSplitPaths(
188
+ split=split_name,
189
+ path_root=path_output / split_name,
190
+ path_images_txt=path_output / split_name / filename_images_txt,
191
+ path_class_names=path_output / filename_class_names,
192
+ )
193
+
194
+ to_yolo_split_format(
195
+ dataset=dataset_split,
196
+ split_paths=yolo_split_paths,
197
+ task_name=task_name,
198
+ )
199
+ per_split_paths.append(yolo_split_paths)
200
+ return per_split_paths
201
+
202
+
203
+ def to_yolo_split_format(
204
+ dataset: "HafniaDataset",
205
+ split_paths: YoloSplitPaths,
206
+ task_name: Optional[str],
116
207
  ):
117
208
  """Exports a HafniaDataset as YOLO (Darknet) format."""
118
- from hafnia.dataset.hafnia_dataset import Sample
119
209
 
120
210
  bbox_task = dataset.info.get_task_by_task_name_and_primitive(task_name=task_name, primitive=primitives.Bbox)
121
211
 
@@ -124,11 +214,11 @@ def to_yolo_format(
124
214
  raise ValueError(
125
215
  f"Hafnia dataset task '{bbox_task.name}' has no class names defined. This is required for YOLO export."
126
216
  )
127
- path_export_yolo_dataset.mkdir(parents=True, exist_ok=True)
128
- path_class_names = path_export_yolo_dataset / FILENAME_YOLO_CLASS_NAMES
129
- path_class_names.write_text("\n".join(class_names))
217
+ split_paths.path_root.mkdir(parents=True, exist_ok=True)
218
+ split_paths.path_class_names.parent.mkdir(parents=True, exist_ok=True)
219
+ split_paths.path_class_names.write_text("\n".join(class_names))
130
220
 
131
- path_data_folder = path_export_yolo_dataset / "data"
221
+ path_data_folder = split_paths.path_root / "data"
132
222
  path_data_folder.mkdir(parents=True, exist_ok=True)
133
223
  image_paths: List[str] = []
134
224
  for sample_dict in dataset:
@@ -138,14 +228,14 @@ def to_yolo_format(
138
228
  path_image_src = Path(sample.file_path)
139
229
  path_image_dst = path_data_folder / path_image_src.name
140
230
  shutil.copy2(path_image_src, path_image_dst)
141
- image_paths.append(path_image_dst.relative_to(path_export_yolo_dataset).as_posix())
231
+ image_paths.append(path_image_dst.relative_to(split_paths.path_root).as_posix())
142
232
  path_label = path_image_dst.with_suffix(".txt")
143
233
  bboxes = sample.bboxes or []
144
234
  bbox_strings = [bbox_to_yolo_format(bbox) for bbox in bboxes]
145
235
  path_label.write_text("\n".join(bbox_strings))
146
236
 
147
- path_images_txt = path_export_yolo_dataset / FILENAME_YOLO_IMAGES_TXT
148
- path_images_txt.write_text("\n".join(image_paths))
237
+ split_paths.path_images_txt.parent.mkdir(parents=True, exist_ok=True)
238
+ split_paths.path_images_txt.write_text("\n".join(image_paths))
149
239
 
150
240
 
151
241
  def bbox_to_yolo_format(bbox: primitives.Bbox) -> str:
@@ -6,7 +6,6 @@ import textwrap
6
6
  from pathlib import Path
7
7
  from typing import Callable, Dict, List, Optional, Tuple
8
8
 
9
- from rich.progress import track
10
9
  from torchvision import datasets as tv_datasets
11
10
  from torchvision.datasets import VisionDataset
12
11
  from torchvision.datasets.utils import download_and_extract_archive, extract_archive
@@ -15,9 +14,10 @@ from hafnia import utils
15
14
  from hafnia.dataset.dataset_helpers import save_pil_image_with_hash_name
16
15
  from hafnia.dataset.dataset_names import SplitName
17
16
  from hafnia.dataset.format_conversions.format_image_classification_folder import (
18
- from_image_classification_folder,
17
+ from_image_classification_split_folder,
19
18
  )
20
- from hafnia.dataset.hafnia_dataset import DatasetInfo, HafniaDataset, Sample, TaskInfo
19
+ from hafnia.dataset.hafnia_dataset import HafniaDataset
20
+ from hafnia.dataset.hafnia_dataset_types import DatasetInfo, Sample, TaskInfo
21
21
  from hafnia.dataset.primitives import Classification
22
22
 
23
23
 
@@ -72,12 +72,12 @@ def caltech_101_as_hafnia_dataset(
72
72
  path_image_classification_folder = _download_and_extract_caltech_dataset(
73
73
  dataset_name, force_redownload=force_redownload
74
74
  )
75
- hafnia_dataset = from_image_classification_folder(
75
+ hafnia_dataset = from_image_classification_split_folder(
76
76
  path_image_classification_folder,
77
77
  split=SplitName.TRAIN,
78
78
  n_samples=n_samples,
79
+ dataset_name=dataset_name,
79
80
  )
80
- hafnia_dataset.info.dataset_name = dataset_name
81
81
  hafnia_dataset.info.version = "1.1.0"
82
82
  hafnia_dataset.info.reference_bibtex = textwrap.dedent("""\
83
83
  @article{FeiFei2004LearningGV,
@@ -102,12 +102,12 @@ def caltech_256_as_hafnia_dataset(
102
102
  path_image_classification_folder = _download_and_extract_caltech_dataset(
103
103
  dataset_name, force_redownload=force_redownload
104
104
  )
105
- hafnia_dataset = from_image_classification_folder(
105
+ hafnia_dataset = from_image_classification_split_folder(
106
106
  path_image_classification_folder,
107
107
  split=SplitName.TRAIN,
108
108
  n_samples=n_samples,
109
+ dataset_name=dataset_name,
109
110
  )
110
- hafnia_dataset.info.dataset_name = dataset_name
111
111
  hafnia_dataset.info.version = "1.1.0"
112
112
  hafnia_dataset.info.reference_bibtex = textwrap.dedent("""\
113
113
  @misc{griffin_2023_5sv1j-ytw97,
@@ -216,7 +216,9 @@ def torchvision_basic_image_classification_dataset_as_hafnia_dataset(
216
216
  class_index_to_name = {v: k for k, v in class_name_to_index.items()}
217
217
  description = f"Convert '{torchvision_dataset_name}' ({split_name} split) to Hafnia Dataset "
218
218
  samples_in_split = []
219
- for image, class_idx in track(torchvision_dataset, total=n_samples_per_split, description=description):
219
+ for image, class_idx in utils.progress_bar(
220
+ torchvision_dataset, total=n_samples_per_split, description=description
221
+ ):
220
222
  (width, height) = image.size
221
223
  path_image = save_pil_image_with_hash_name(image, path_hafnia_conversions)
222
224
  sample = Sample(