hafnia 0.4.3__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/tests.yaml +4 -0
  2. {hafnia-0.4.3 → hafnia-0.5.0}/.vscode/settings.json +1 -1
  3. {hafnia-0.4.3 → hafnia-0.5.0}/PKG-INFO +3 -3
  4. {hafnia-0.4.3 → hafnia-0.5.0}/README.md +1 -1
  5. {hafnia-0.4.3 → hafnia-0.5.0}/examples/example_hafnia_dataset.py +39 -24
  6. {hafnia-0.4.3 → hafnia-0.5.0}/pyproject.toml +2 -2
  7. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/dataset_details_uploader.py +41 -54
  8. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/dataset_helpers.py +1 -15
  9. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/dataset_names.py +17 -3
  10. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/format_conversions/torchvision_datasets.py +6 -3
  11. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/hafnia_dataset.py +99 -24
  12. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/hafnia_dataset_types.py +3 -1
  13. hafnia-0.5.0/src/hafnia/dataset/operations/dataset_s3_storage.py +211 -0
  14. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/operations/table_transformations.py +2 -1
  15. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/http.py +2 -1
  16. hafnia-0.5.0/src/hafnia/platform/datasets.py +334 -0
  17. hafnia-0.5.0/src/hafnia/platform/s5cmd_utils.py +147 -0
  18. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/utils.py +4 -0
  19. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/dataset_cmds.py +18 -0
  20. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/profile_cmds.py +0 -1
  21. {hafnia-0.4.3 → hafnia-0.5.0}/tests/helper_testing.py +5 -0
  22. hafnia-0.5.0/tests/integration/test_bring_your_own_data.py +93 -0
  23. {hafnia-0.4.3 → hafnia-0.5.0}/tests/integration/test_torchvision_datasets.py +1 -5
  24. hafnia-0.5.0/tests/unit/dataset/test_dataset_details_uploader.py +55 -0
  25. {hafnia-0.4.3 → hafnia-0.5.0}/uv.lock +1290 -1176
  26. hafnia-0.4.3/src/hafnia/platform/datasets.py +0 -243
  27. hafnia-0.4.3/tests/unit/dataset/test_dataset_details_uploader.py +0 -29
  28. {hafnia-0.4.3 → hafnia-0.5.0}/.devcontainer/devcontainer.json +0 -0
  29. {hafnia-0.4.3 → hafnia-0.5.0}/.devcontainer/hooks/post_create +0 -0
  30. {hafnia-0.4.3 → hafnia-0.5.0}/.github/dependabot.yaml +0 -0
  31. {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/Dockerfile +0 -0
  32. {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/build.yaml +0 -0
  33. {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/check_release.yaml +0 -0
  34. {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/ci_cd.yaml +0 -0
  35. {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/lint.yaml +0 -0
  36. {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/publish_docker.yaml +0 -0
  37. {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/publish_pypi.yaml +0 -0
  38. {hafnia-0.4.3 → hafnia-0.5.0}/.gitignore +0 -0
  39. {hafnia-0.4.3 → hafnia-0.5.0}/.pre-commit-config.yaml +0 -0
  40. {hafnia-0.4.3 → hafnia-0.5.0}/.python-version +0 -0
  41. {hafnia-0.4.3 → hafnia-0.5.0}/.trivyignore +0 -0
  42. {hafnia-0.4.3 → hafnia-0.5.0}/.vscode/extensions.json +0 -0
  43. {hafnia-0.4.3 → hafnia-0.5.0}/.vscode/launch.json +0 -0
  44. {hafnia-0.4.3 → hafnia-0.5.0}/LICENSE +0 -0
  45. {hafnia-0.4.3 → hafnia-0.5.0}/docs/cli.md +0 -0
  46. {hafnia-0.4.3 → hafnia-0.5.0}/docs/release.md +0 -0
  47. {hafnia-0.4.3 → hafnia-0.5.0}/examples/example_dataset_recipe.py +0 -0
  48. {hafnia-0.4.3 → hafnia-0.5.0}/examples/example_logger.py +0 -0
  49. {hafnia-0.4.3 → hafnia-0.5.0}/examples/example_torchvision_dataloader.py +0 -0
  50. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/__init__.py +0 -0
  51. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/data/__init__.py +0 -0
  52. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/data/factory.py +0 -0
  53. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/dataset_recipe/dataset_recipe.py +0 -0
  54. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/dataset_recipe/recipe_transforms.py +0 -0
  55. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/dataset_recipe/recipe_types.py +0 -0
  56. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/format_conversions/format_coco.py +0 -0
  57. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/format_conversions/format_helpers.py +0 -0
  58. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/format_conversions/format_image_classification_folder.py +0 -0
  59. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/format_conversions/format_yolo.py +0 -0
  60. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/license_types.py +0 -0
  61. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/operations/dataset_stats.py +0 -0
  62. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/operations/dataset_transformations.py +0 -0
  63. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/__init__.py +0 -0
  64. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/bbox.py +0 -0
  65. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/bitmask.py +0 -0
  66. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/classification.py +0 -0
  67. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/point.py +0 -0
  68. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/polygon.py +0 -0
  69. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/primitive.py +0 -0
  70. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/segmentation.py +0 -0
  71. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/utils.py +0 -0
  72. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/experiment/__init__.py +0 -0
  73. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/experiment/hafnia_logger.py +0 -0
  74. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/log.py +0 -0
  75. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/platform/__init__.py +0 -0
  76. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/platform/builder.py +0 -0
  77. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/platform/dataset_recipe.py +0 -0
  78. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/platform/download.py +0 -0
  79. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/platform/experiment.py +0 -0
  80. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/platform/trainer_package.py +0 -0
  81. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/torch_helpers.py +0 -0
  82. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/visualizations/colors.py +0 -0
  83. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/visualizations/image_visualizations.py +0 -0
  84. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/__init__.py +0 -0
  85. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/__main__.py +0 -0
  86. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/config.py +0 -0
  87. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/consts.py +0 -0
  88. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/dataset_recipe_cmds.py +0 -0
  89. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/experiment_cmds.py +0 -0
  90. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/keychain.py +0 -0
  91. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/runc_cmds.py +0 -0
  92. {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/trainer_package_cmds.py +0 -0
  93. {hafnia-0.4.3 → hafnia-0.5.0}/tests/__init__.py +0 -0
  94. {hafnia-0.4.3 → hafnia-0.5.0}/tests/conftest.py +0 -0
  95. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_coco_roboflow/train/000000000632.jpg +0 -0
  96. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_coco_roboflow/train/000000000724.jpg +0 -0
  97. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_coco_roboflow/train/_annotations.coco.json +0 -0
  98. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_coco_roboflow/valid/000000000139.jpg +0 -0
  99. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_coco_roboflow/valid/000000000285.jpg +0 -0
  100. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_coco_roboflow/valid/_annotations.coco.json +0 -0
  101. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/obj.names +0 -0
  102. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/train/data/000000000139.jpg +0 -0
  103. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/train/data/000000000139.txt +0 -0
  104. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/train/data/000000000285.jpg +0 -0
  105. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/train/data/000000000285.txt +0 -0
  106. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/train/images.txt +0 -0
  107. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/validation/data/000000000632.jpg +0 -0
  108. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/validation/data/000000000632.txt +0 -0
  109. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/validation/images.txt +0 -0
  110. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_image_metadata_schema.yaml +0 -0
  111. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_dataset_transformations/test_video_storage_format_read_image.png +0 -0
  112. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[polygon].png +0 -0
  113. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_as_ints].png +0 -0
  114. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_compressed_bytes].png +0 -0
  115. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_compressed_str].png +0 -0
  116. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_coco/test_from_coco_format_visualized.png +0 -0
  117. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_coco/test_to_coco_format_visualized.png +0 -0
  118. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_yolo/test_format_yolo_import_export_tiny_dataset.png +0 -0
  119. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_yolo/test_import_yolo_format_visualized.png +0 -0
  120. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png +0 -0
  121. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[caltech-256].png +0 -0
  122. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[cifar100].png +0 -0
  123. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[cifar10].png +0 -0
  124. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[coco-2017].png +0 -0
  125. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[midwest-vehicle-detection].png +0 -0
  126. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[mnist].png +0 -0
  127. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[tiny-dataset].png +0 -0
  128. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-101].png +0 -0
  129. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-256].png +0 -0
  130. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar100].png +0 -0
  131. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar10].png +0 -0
  132. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[coco-2017].png +0 -0
  133. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[midwest-vehicle-detection].png +0 -0
  134. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[mnist].png +0 -0
  135. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[tiny-dataset].png +0 -0
  136. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-coco-2017].png +0 -0
  137. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-tiny-dataset].png +0 -0
  138. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-coco-2017].png +0 -0
  139. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-tiny-dataset].png +0 -0
  140. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_visualizations/test_mask_region[micro-coco-2017].png +0 -0
  141. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_visualizations/test_mask_region[micro-tiny-dataset].png +0 -0
  142. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_visualizations/test_polygon_to_bitmask_conversion.png +0 -0
  143. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-coco-2017/annotations.jsonl +0 -0
  144. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-coco-2017/annotations.parquet +0 -0
  145. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-coco-2017/data/253/253925d334c002ce6662d8133535dd4c.jpg +0 -0
  146. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-coco-2017/data/b1a/b1a09f4d922f8f6904bab0c1caf172ab.jpg +0 -0
  147. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-coco-2017/data/f67/f675c8a1e862b5e00203ab888ac7fff4.jpg +0 -0
  148. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-coco-2017/dataset_info.json +0 -0
  149. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.jsonl +0 -0
  150. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.parquet +0 -0
  151. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-tiny-dataset/data/25c/25c3a206e7b60ab50245ee3d52d97f11.png +0 -0
  152. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-tiny-dataset/data/962/962fd865fdd45f169d5ca8c8f284d68d.png +0 -0
  153. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-tiny-dataset/data/ec6/ec60f2f4fb854b59c97e16b45c713de0.png +0 -0
  154. {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-tiny-dataset/dataset_info.json +0 -0
  155. {hafnia-0.4.3 → hafnia-0.5.0}/tests/integration/test_check_example_scripts.py +0 -0
  156. {hafnia-0.4.3 → hafnia-0.5.0}/tests/integration/test_cli_integration.py +0 -0
  157. {hafnia-0.4.3 → hafnia-0.5.0}/tests/integration/test_dataset_merges.py +0 -0
  158. {hafnia-0.4.3 → hafnia-0.5.0}/tests/integration/test_dataset_recipes_with_platform.py +0 -0
  159. {hafnia-0.4.3 → hafnia-0.5.0}/tests/integration/test_samples.py +0 -0
  160. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/dataset_recipe/test_dataset_recipe_helpers.py +0 -0
  161. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/dataset_recipe/test_dataset_recipes.py +0 -0
  162. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/dataset_recipe/test_recipe_transformations.py +0 -0
  163. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/format_conversions/test_format_coco.py +0 -0
  164. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/format_conversions/test_format_image_classification_folder.py +0 -0
  165. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/format_conversions/test_format_yolo.py +0 -0
  166. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/operations/test_dataset_stats.py +0 -0
  167. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/operations/test_dataset_transformations.py +0 -0
  168. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/operations/test_table_transformations.py +0 -0
  169. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/test_colors.py +0 -0
  170. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/test_dataset_helpers.py +0 -0
  171. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/test_dataset_names.py +0 -0
  172. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/test_hafnia_dataset.py +0 -0
  173. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/test_shape_primitives.py +0 -0
  174. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/test_builder.py +0 -0
  175. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/test_cli.py +0 -0
  176. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/test_hafnia_logger.py +0 -0
  177. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/test_utils.py +0 -0
  178. {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/test_visualizations.py +0 -0
@@ -32,6 +32,10 @@ jobs:
32
32
  run: |
33
33
  mkdir -p ~/.hafnia
34
34
  echo "$HAFNIA_CONFIG" | jq . > ~/.hafnia/config.json
35
+ - name: Check hafnia configured
36
+ run: uv run hafnia profile active
37
+ - name: Check hafnia by download
38
+ run: uv run hafnia dataset download mnist --force
35
39
  - name: Run tests
36
40
  run: uv run pytest tests
37
41
 
@@ -23,7 +23,7 @@
23
23
  "python.testing.pytestArgs": [
24
24
  "tests",
25
25
  "-vv",
26
- "--durations=20",
26
+ "--durations=20"
27
27
  ],
28
28
  "python.testing.unittestEnabled": false,
29
29
  "python.testing.pytestEnabled": true,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hafnia
3
- Version: 0.4.3
3
+ Version: 0.5.0
4
4
  Summary: Python SDK for communication with Hafnia platform.
5
5
  Author-email: Milestone Systems <hafniaplatform@milestone.dk>
6
6
  License-File: LICENSE
@@ -10,7 +10,7 @@ Requires-Dist: click>=8.1.8
10
10
  Requires-Dist: emoji>=2.14.1
11
11
  Requires-Dist: flatten-dict>=0.4.2
12
12
  Requires-Dist: keyring>=25.6.0
13
- Requires-Dist: mcp==1.16.0
13
+ Requires-Dist: mcp>=1.16.0
14
14
  Requires-Dist: mlflow>=3.4.0
15
15
  Requires-Dist: more-itertools>=10.7.0
16
16
  Requires-Dist: opencv-python-headless>=4.11.0.86
@@ -209,7 +209,7 @@ DatasetInfo(
209
209
  ```
210
210
 
211
211
  You can iterate and access samples in the dataset using the `HafniaDataset` object.
212
- Each sample contain image and annotations information.
212
+ Each sample contain image and annotations information.
213
213
 
214
214
  ```python
215
215
  from hafnia.dataset.hafnia_dataset import HafniaDataset, Sample
@@ -180,7 +180,7 @@ DatasetInfo(
180
180
  ```
181
181
 
182
182
  You can iterate and access samples in the dataset using the `HafniaDataset` object.
183
- Each sample contain image and annotations information.
183
+ Each sample contain image and annotations information.
184
184
 
185
185
  ```python
186
186
  from hafnia.dataset.hafnia_dataset import HafniaDataset, Sample
@@ -138,43 +138,58 @@ path_tmp.mkdir(parents=True, exist_ok=True)
138
138
  Image.fromarray(image_with_annotations).save(path_tmp / "sample_with_annotations.png")
139
139
 
140
140
 
141
- ## Bring-your-own-data: Create a new dataset from samples
141
+ ## Create a hafnia dataset from scratch ##
142
+ path_yolo_dataset = Path("tests/data/dataset_formats/format_yolo/train")
143
+ path_class_names = path_yolo_dataset.parent / "obj.names"
144
+ class_names = [line.strip() for line in path_class_names.read_text().splitlines() if line.strip()]
145
+ path_images_file = path_yolo_dataset / "images.txt"
146
+ image_files = [line.strip() for line in path_images_file.read_text().splitlines() if line.strip()]
147
+
142
148
  fake_samples = []
143
- for i_fake_sample in range(5):
144
- bboxes = [Bbox(top_left_x=0.1, top_left_y=0.20, width=0.1, height=0.2, class_name="car")]
145
- classifications = [Classification(class_name="vehicle", class_idx=0)]
146
- sample = Sample(
147
- file_path=f"path/to/image_{i_fake_sample:05}.jpg",
148
- height=480,
149
- width=640,
150
- split="train",
151
- tags=["sample"],
152
- bboxes=bboxes,
153
- classifications=classifications,
154
- )
149
+ for image_file in image_files:
150
+ path_image = path_yolo_dataset / image_file
151
+ path_bboxes = path_yolo_dataset / image_file.replace(".jpg", ".txt")
152
+ bboxes: List[Bbox] = []
153
+ for bboxes_line in path_bboxes.read_text().splitlines():
154
+ str_parts = bboxes_line.strip().split()
155
+ class_idx = int(str_parts[0])
156
+ x_center, y_center, bbox_width, bbox_height = (float(value) for value in str_parts[1:5])
157
+ bbox = Bbox(
158
+ top_left_x=x_center - bbox_width / 2,
159
+ top_left_y=y_center - bbox_height / 2,
160
+ width=bbox_width,
161
+ height=bbox_height,
162
+ class_idx=class_idx,
163
+ class_name=class_names[class_idx],
164
+ )
165
+ bboxes.append(bbox)
166
+ image = Image.open(path_image)
167
+ height, width = image.size[1], image.size[0]
168
+ sample = Sample(file_path=str(path_image), height=height, width=width, split="train", bboxes=bboxes)
155
169
  fake_samples.append(sample)
156
170
 
157
171
 
158
172
  fake_dataset_info = DatasetInfo(
159
- dataset_name="fake-dataset",
173
+ dataset_name="custom-dataset",
160
174
  version="0.0.1",
161
- tasks=[
162
- TaskInfo(primitive=Bbox, class_names=["car", "truck", "bus"]),
163
- TaskInfo(primitive=Classification, class_names=["vehicle", "pedestrian", "cyclist"]),
164
- ],
175
+ tasks=[TaskInfo(primitive=Bbox, class_names=class_names)],
165
176
  )
166
- fake_dataset = HafniaDataset.from_samples_list(samples_list=fake_samples, info=fake_dataset_info)
177
+ custom_dataset = HafniaDataset.from_samples_list(samples_list=fake_samples, info=fake_dataset_info)
178
+
179
+ sample = Sample(**custom_dataset[0])
180
+
181
+ # To visualize and verify dataset is formatted correctly store image with annotations
182
+ image_with_annotations = sample.draw_annotations()
183
+ Image.fromarray(image_with_annotations).save(path_tmp / "custom_dataset_sample.png") # Save visualization to TM
167
184
 
168
- # Coming soon! Upload your dataset to the Hafnia Platform
169
- # fake_dataset.upload_to_hafnia()
185
+ # To upload the dataset to Hafnia platform
186
+ # custom_dataset.upload_to_platform(interactive=True, allow_version_overwrite=False)
170
187
 
171
- # Coming soon! Create your own dataset details page in Hafnia
172
- # fake_dataset.upload_dataset_details()
173
188
 
174
189
  ## Storing predictions: A hafnia dataset can also be used for storing predictions per sample
175
190
  # set 'ground_truth=False' and add 'confidence'.
176
191
  bboxes_predictions = [
177
- Bbox(top_left_x=10, top_left_y=20, width=100, height=200, class_name="car", ground_truth=False, confidence=0.9)
192
+ Bbox(top_left_x=0.1, top_left_y=0.2, width=0.3, height=0.4, class_name="car", ground_truth=False, confidence=0.9)
178
193
  ]
179
194
 
180
195
  classifications_predictions = [Classification(class_name="vehicle", class_idx=0, ground_truth=False, confidence=0.95)]
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hafnia"
3
- version = "0.4.3"
3
+ version = "0.5.0"
4
4
  description = "Python SDK for communication with Hafnia platform."
5
5
  readme = "README.md"
6
6
  authors = [
@@ -28,7 +28,7 @@ dependencies = [
28
28
  "xxhash>=3.5.0",
29
29
  "mlflow>=3.4.0",
30
30
  "sagemaker-mlflow>=0.1.0",
31
- "mcp==1.16.0",
31
+ "mcp>=1.16.0",
32
32
  ]
33
33
 
34
34
  [dependency-groups]
@@ -4,7 +4,7 @@ import base64
4
4
  from datetime import datetime
5
5
  from enum import Enum
6
6
  from pathlib import Path
7
- from typing import Any, Dict, List, Optional, Tuple, Type, Union
7
+ from typing import Any, Dict, List, Optional, Type, Union
8
8
 
9
9
  import boto3
10
10
  import polars as pl
@@ -13,7 +13,6 @@ from pydantic import BaseModel, ConfigDict, field_validator
13
13
 
14
14
  from hafnia.dataset.dataset_names import (
15
15
  DatasetVariant,
16
- DeploymentStage,
17
16
  PrimitiveField,
18
17
  SampleField,
19
18
  SplitName,
@@ -29,26 +28,21 @@ from hafnia.dataset.primitives import (
29
28
  Segmentation,
30
29
  )
31
30
  from hafnia.dataset.primitives.primitive import Primitive
32
- from hafnia.http import post
33
- from hafnia.log import user_logger
34
- from hafnia.platform.datasets import get_dataset_id
31
+ from hafnia.platform.datasets import upload_dataset_details
32
+ from hafnia.utils import get_path_dataset_gallery_images
35
33
  from hafnia_cli.config import Config
36
34
 
37
35
 
38
- def generate_bucket_name(dataset_name: str, deployment_stage: DeploymentStage) -> str:
39
- # TODO: When moving to versioning we do NOT need 'staging' and 'production' specific buckets
40
- # and the new name convention should be: f"hafnia-dataset-{dataset_name}"
41
- return f"mdi-{deployment_stage.value}-{dataset_name}"
42
-
43
-
44
36
  class DatasetDetails(BaseModel, validate_assignment=True): # type: ignore[call-arg]
45
37
  model_config = ConfigDict(use_enum_values=True) # To parse Enum values as strings
46
38
  name: str
39
+ title: Optional[str] = None
40
+ overview: Optional[str] = None
47
41
  data_captured_start: Optional[datetime] = None
48
42
  data_captured_end: Optional[datetime] = None
49
43
  data_received_start: Optional[datetime] = None
50
44
  data_received_end: Optional[datetime] = None
51
- latest_update: Optional[datetime] = None
45
+ dataset_updated_at: Optional[datetime] = None
52
46
  license_citation: Optional[str] = None
53
47
  version: Optional[str] = None
54
48
  s3_bucket_name: Optional[str] = None
@@ -281,26 +275,32 @@ def get_folder_size(path: Path) -> int:
281
275
  return sum([path.stat().st_size for path in path.rglob("*")])
282
276
 
283
277
 
284
- def upload_to_hafnia_dataset_detail_page(dataset_update: DatasetDetails, upload_gallery_images: bool) -> dict:
285
- if not upload_gallery_images:
286
- dataset_update.imgs = None
287
-
288
- cfg = Config()
289
- dataset_details = dataset_update.model_dump_json()
290
- data = upload_dataset_details(cfg=cfg, data=dataset_details, dataset_name=dataset_update.name)
291
- return data
292
-
293
-
294
- def upload_dataset_details(cfg: Config, data: str, dataset_name: str) -> dict:
295
- dataset_endpoint = cfg.get_platform_endpoint("datasets")
296
- dataset_id = get_dataset_id(dataset_name, dataset_endpoint, cfg.api_key)
278
+ def upload_dataset_details_to_platform(
279
+ dataset: HafniaDataset,
280
+ path_gallery_images: Optional[Path] = None,
281
+ gallery_image_names: Optional[List[str]] = None,
282
+ distribution_task_names: Optional[List[str]] = None,
283
+ update_platform: bool = True,
284
+ cfg: Optional[Config] = None,
285
+ ) -> dict:
286
+ cfg = cfg or Config()
287
+ dataset_details = dataset_details_from_hafnia_dataset(
288
+ dataset=dataset,
289
+ path_gallery_images=path_gallery_images,
290
+ gallery_image_names=gallery_image_names,
291
+ distribution_task_names=distribution_task_names,
292
+ )
297
293
 
298
- import_endpoint = f"{dataset_endpoint}/{dataset_id}/import"
299
- headers = {"Authorization": cfg.api_key}
294
+ if update_platform:
295
+ dataset_details_exclude_none = dataset_details.model_dump(exclude_none=True, mode="json")
296
+ upload_dataset_details(
297
+ cfg=cfg,
298
+ data=dataset_details_exclude_none,
299
+ dataset_name=dataset_details.name,
300
+ )
300
301
 
301
- user_logger.info("Exporting dataset details to platform. This may take up to 30 seconds...")
302
- response = post(endpoint=import_endpoint, headers=headers, data=data) # type: ignore[assignment]
303
- return response # type: ignore[return-value]
302
+ dataset_details_dict = dataset_details.model_dump(exclude_none=False, mode="json")
303
+ return dataset_details_dict
304
304
 
305
305
 
306
306
  def get_resolutions(dataset: HafniaDataset, max_resolutions_selected: int = 8) -> List[DbResolution]:
@@ -360,9 +360,6 @@ def s3_based_fields(bucket_name: str, variant_type: DatasetVariant, session: bot
360
360
 
361
361
  def dataset_details_from_hafnia_dataset(
362
362
  dataset: HafniaDataset,
363
- deployment_stage: DeploymentStage,
364
- path_sample: Optional[Path],
365
- path_hidden: Optional[Path],
366
363
  path_gallery_images: Optional[Path] = None,
367
364
  gallery_image_names: Optional[List[str]] = None,
368
365
  distribution_task_names: Optional[List[str]] = None,
@@ -371,33 +368,24 @@ def dataset_details_from_hafnia_dataset(
371
368
  dataset_reports = []
372
369
  dataset_meta_info = dataset.info.meta or {}
373
370
 
374
- path_and_variant: List[Tuple[Path, DatasetVariant]] = []
375
- if path_sample is not None:
376
- path_and_variant.append((path_sample, DatasetVariant.SAMPLE))
377
-
378
- if path_hidden is not None:
379
- path_and_variant.append((path_hidden, DatasetVariant.HIDDEN))
380
-
381
- if len(path_and_variant) == 0:
382
- raise ValueError("At least one path must be provided for sample or hidden dataset.")
383
-
371
+ path_and_variant = [DatasetVariant.SAMPLE, DatasetVariant.HIDDEN]
384
372
  gallery_images = create_gallery_images(
385
373
  dataset=dataset,
386
374
  path_gallery_images=path_gallery_images,
387
375
  gallery_image_names=gallery_image_names,
388
376
  )
389
377
 
390
- for path_dataset, variant_type in path_and_variant:
378
+ for variant_type in path_and_variant:
391
379
  if variant_type == DatasetVariant.SAMPLE:
392
380
  dataset_variant = dataset.create_sample_dataset()
393
381
  else:
394
382
  dataset_variant = dataset
395
383
 
396
- size_bytes = get_folder_size(path_dataset)
384
+ files_paths = dataset_variant.samples[SampleField.FILE_PATH].to_list()
385
+ size_bytes = sum([Path(file_path).stat().st_size for file_path in files_paths])
397
386
  dataset_variants.append(
398
387
  DbDatasetVariant(
399
388
  variant_type=VARIANT_TYPE_MAPPING[variant_type], # type: ignore[index]
400
- # upload_date: Optional[datetime] = None
401
389
  size_bytes=size_bytes,
402
390
  data_type=DataTypeChoices.images,
403
391
  number_of_data_items=len(dataset_variant),
@@ -405,7 +393,6 @@ def dataset_details_from_hafnia_dataset(
405
393
  duration=dataset_meta_info.get("duration", None),
406
394
  duration_average=dataset_meta_info.get("duration_average", None),
407
395
  frame_rate=dataset_meta_info.get("frame_rate", None),
408
- # bit_rate: Optional[float] = None
409
396
  n_cameras=dataset_meta_info.get("n_cameras", None),
410
397
  )
411
398
  )
@@ -435,19 +422,19 @@ def dataset_details_from_hafnia_dataset(
435
422
  object_reports = sorted(object_reports, key=lambda x: x.obj.name) # Sort object reports by name
436
423
  report.annotated_object_reports = object_reports
437
424
 
438
- if report.distribution_values is None:
439
- report.distribution_values = []
425
+ if report.distribution_values is None:
426
+ report.distribution_values = []
440
427
 
441
- dataset_reports.append(report)
428
+ dataset_reports.append(report)
442
429
  dataset_name = dataset.info.dataset_name
443
- bucket_sample = generate_bucket_name(dataset_name, deployment_stage=deployment_stage)
444
430
  dataset_info = DatasetDetails(
445
431
  name=dataset_name,
432
+ title=dataset.info.dataset_title,
433
+ overview=dataset.info.description,
446
434
  version=dataset.info.version,
447
- s3_bucket_name=bucket_sample,
448
435
  dataset_variants=dataset_variants,
449
436
  split_annotations_reports=dataset_reports,
450
- latest_update=dataset.info.updated_at,
437
+ dataset_updated_at=dataset.info.updated_at,
451
438
  dataset_format_version=dataset.info.format_version,
452
439
  license_citation=dataset.info.reference_bibtex,
453
440
  data_captured_start=dataset_meta_info.get("data_captured_start", None),
@@ -565,7 +552,7 @@ def create_gallery_images(
565
552
  gallery_images = None
566
553
  if (gallery_image_names is not None) and (len(gallery_image_names) > 0):
567
554
  if path_gallery_images is None:
568
- raise ValueError("Path to gallery images must be provided.")
555
+ path_gallery_images = get_path_dataset_gallery_images(dataset.info.dataset_name)
569
556
  path_gallery_images.mkdir(parents=True, exist_ok=True)
570
557
  COL_IMAGE_NAME = "image_name"
571
558
  samples = dataset.samples.with_columns(
@@ -57,20 +57,6 @@ def save_pil_image_with_hash_name(image: Image.Image, path_folder: Path, allow_s
57
57
  def copy_and_rename_file_to_hash_value(path_source: Path, path_dataset_root: Path) -> Path:
58
58
  """
59
59
  Copies a file to a dataset root directory with a hash-based name and sub-directory structure.
60
-
61
- E.g. for an "image.png" with hash "dfe8f3b1c2a4f5b6c7d8e9f0a1b2c3d4", the image will be copied to
62
- 'path_dataset_root / "data" / "dfe" / "dfe8f3b1c2a4f5b6c7d8e9f0a1b2c3d4.png"'
63
- Notice that the hash is used for both the filename and the subfolder name.
64
-
65
- Placing image/video files into multiple sub-folders (instead of one large folder) is seemingly
66
- unnecessary, but it is actually a requirement when the dataset is later downloaded from S3.
67
-
68
- The reason is that AWS has a rate limit of 3500 ops/sec per prefix (sub-folder) in S3 - meaning we can "only"
69
- download 3500 files per second from a single folder (prefix) in S3.
70
-
71
- For even a single user, we found that this limit was being reached when files are stored in single folder (prefix)
72
- in S3. To support multiple users and concurrent experiments, we are required to separate files into
73
- multiple sub-folders (prefixes) in S3 to not hit the rate limit.
74
60
  """
75
61
 
76
62
  if not path_source.exists():
@@ -86,7 +72,7 @@ def copy_and_rename_file_to_hash_value(path_source: Path, path_dataset_root: Pat
86
72
 
87
73
 
88
74
  def relative_path_from_hash(hash: str, suffix: str) -> Path:
89
- path_file = Path("data") / hash[:3] / f"{hash}{suffix}"
75
+ path_file = Path("data") / f"{hash}{suffix}"
90
76
  return path_file
91
77
 
92
78
 
@@ -2,6 +2,7 @@ from enum import Enum
2
2
  from typing import Dict, List, Optional
3
3
 
4
4
  import boto3
5
+ from botocore.exceptions import UnauthorizedSSOTokenError
5
6
  from pydantic import BaseModel, field_validator
6
7
 
7
8
  FILENAME_RECIPE_JSON = "recipe.json"
@@ -21,6 +22,7 @@ class DeploymentStage(Enum):
21
22
  PRODUCTION = "production"
22
23
 
23
24
 
25
+ ARN_PREFIX = "arn:aws:s3:::"
24
26
  TAG_IS_SAMPLE = "sample"
25
27
 
26
28
  OPS_REMOVE_CLASS = "__REMOVE__"
@@ -151,7 +153,14 @@ class AwsCredentials(BaseModel):
151
153
  """
152
154
  Creates AwsCredentials from a Boto3 session.
153
155
  """
154
- frozen_credentials = session.get_credentials().get_frozen_credentials()
156
+ try:
157
+ frozen_credentials = session.get_credentials().get_frozen_credentials()
158
+ except UnauthorizedSSOTokenError as e:
159
+ raise RuntimeError(
160
+ f"Failed to get AWS credentials from the session for profile '{session.profile_name}'.\n"
161
+ f"Ensure the profile exists in your AWS config in '~/.aws/config' and that you are logged in via AWS SSO.\n"
162
+ f"\tUse 'aws sso login --profile {session.profile_name}' to log in."
163
+ ) from e
155
164
  return AwsCredentials(
156
165
  access_key=frozen_credentials.access_key,
157
166
  secret_key=frozen_credentials.secret_key,
@@ -159,8 +168,13 @@ class AwsCredentials(BaseModel):
159
168
  region=session.region_name,
160
169
  )
161
170
 
162
-
163
- ARN_PREFIX = "arn:aws:s3:::"
171
+ def to_resource_credentials(self, bucket_name: str) -> "ResourceCredentials":
172
+ """
173
+ Converts AwsCredentials to ResourceCredentials by adding the S3 ARN.
174
+ """
175
+ payload = self.model_dump()
176
+ payload["s3_arn"] = f"{ARN_PREFIX}{bucket_name}"
177
+ return ResourceCredentials(**payload)
164
178
 
165
179
 
166
180
  class ResourceCredentials(AwsCredentials):
@@ -40,7 +40,7 @@ def mnist_as_hafnia_dataset(force_redownload=False, n_samples: Optional[int] = N
40
40
 
41
41
  dataset_info = DatasetInfo(
42
42
  dataset_name="mnist",
43
- version="1.1.0",
43
+ version="1.0.0",
44
44
  tasks=tasks,
45
45
  reference_bibtex=textwrap.dedent("""\
46
46
  @article{lecun2010mnist,
@@ -150,7 +150,7 @@ def cifar_as_hafnia_dataset(
150
150
 
151
151
  dataset_info = DatasetInfo(
152
152
  dataset_name=dataset_name,
153
- version="1.1.0",
153
+ version="1.0.0",
154
154
  tasks=tasks,
155
155
  reference_bibtex=textwrap.dedent("""\
156
156
  @@TECHREPORT{Krizhevsky09learningmultiple,
@@ -268,7 +268,10 @@ def _download_and_extract_caltech_dataset(dataset_name: str, force_redownload: b
268
268
  path_output_extracted = path_tmp_output / "caltech-101"
269
269
  for gzip_file in os.listdir(path_output_extracted):
270
270
  if gzip_file.endswith(".gz"):
271
- extract_archive(os.path.join(path_output_extracted, gzip_file), path_output_extracted)
271
+ extract_archive(
272
+ from_path=os.path.join(path_output_extracted, gzip_file),
273
+ to_path=path_output_extracted,
274
+ )
272
275
  path_org = path_output_extracted / "101_ObjectCategories"
273
276
 
274
277
  elif dataset_name == "caltech-256":
@@ -12,7 +12,6 @@ from packaging.version import Version
12
12
 
13
13
  from hafnia.dataset import dataset_helpers
14
14
  from hafnia.dataset.dataset_names import (
15
- DATASET_FILENAMES_REQUIRED,
16
15
  FILENAME_ANNOTATIONS_JSONL,
17
16
  FILENAME_ANNOTATIONS_PARQUET,
18
17
  FILENAME_DATASET_INFO,
@@ -38,6 +37,7 @@ from hafnia.dataset.operations import (
38
37
  from hafnia.dataset.primitives.primitive import Primitive
39
38
  from hafnia.log import user_logger
40
39
  from hafnia.utils import progress_bar
40
+ from hafnia_cli.config import Config
41
41
 
42
42
 
43
43
  @dataclass
@@ -434,7 +434,7 @@ class HafniaDataset:
434
434
  aws_credentials: AwsCredentials,
435
435
  force_redownload: bool = False,
436
436
  ) -> HafniaDataset:
437
- from hafnia.platform.datasets import fast_copy_files_s3
437
+ from hafnia.platform.s5cmd_utils import fast_copy_files
438
438
 
439
439
  remote_src_paths = dataset.samples[SampleField.REMOTE_PATH].unique().to_list()
440
440
  update_rows = []
@@ -470,7 +470,7 @@ class HafniaDataset:
470
470
  return dataset
471
471
 
472
472
  environment_vars = aws_credentials.aws_credentials()
473
- fast_copy_files_s3(
473
+ fast_copy_files(
474
474
  src_paths=remote_src_paths,
475
475
  dst_paths=local_dst_paths,
476
476
  append_envs=environment_vars,
@@ -563,7 +563,7 @@ class HafniaDataset:
563
563
  keep_sample_data=keep_sample_data,
564
564
  )
565
565
 
566
- def write(self, path_folder: Path, add_version: bool = False, drop_null_cols: bool = True) -> None:
566
+ def write(self, path_folder: Path, drop_null_cols: bool = True) -> None:
567
567
  user_logger.info(f"Writing dataset to {path_folder}...")
568
568
  path_folder = path_folder.absolute()
569
569
  if not path_folder.exists():
@@ -578,18 +578,9 @@ class HafniaDataset:
578
578
  )
579
579
  new_paths.append(str(new_path))
580
580
  hafnia_dataset.samples = hafnia_dataset.samples.with_columns(pl.Series(new_paths).alias(SampleField.FILE_PATH))
581
- hafnia_dataset.write_annotations(
582
- path_folder=path_folder,
583
- drop_null_cols=drop_null_cols,
584
- add_version=add_version,
585
- )
581
+ hafnia_dataset.write_annotations(path_folder=path_folder, drop_null_cols=drop_null_cols)
586
582
 
587
- def write_annotations(
588
- dataset: HafniaDataset,
589
- path_folder: Path,
590
- drop_null_cols: bool = True,
591
- add_version: bool = False,
592
- ) -> None:
583
+ def write_annotations(dataset: HafniaDataset, path_folder: Path, drop_null_cols: bool = True) -> None:
593
584
  """
594
585
  Writes only the annotations files (JSONL and Parquet) to the specified folder.
595
586
  """
@@ -604,18 +595,102 @@ class HafniaDataset:
604
595
  samples = samples.drop(pl.selectors.by_dtype(pl.Null))
605
596
 
606
597
  # Store only relative paths in the annotations files
607
- absolute_paths = samples[SampleField.FILE_PATH].to_list()
608
- relative_paths = [str(Path(path).relative_to(path_folder)) for path in absolute_paths]
609
- samples = samples.with_columns(pl.Series(relative_paths).alias(SampleField.FILE_PATH))
610
-
598
+ if SampleField.FILE_PATH in samples.columns: # We drop column for remote datasets
599
+ absolute_paths = samples[SampleField.FILE_PATH].to_list()
600
+ relative_paths = [str(Path(path).relative_to(path_folder)) for path in absolute_paths]
601
+ samples = samples.with_columns(pl.Series(relative_paths).alias(SampleField.FILE_PATH))
602
+ else:
603
+ samples = samples.with_columns(pl.lit("").alias(SampleField.FILE_PATH))
611
604
  samples.write_ndjson(path_folder / FILENAME_ANNOTATIONS_JSONL) # Json for readability
612
605
  samples.write_parquet(path_folder / FILENAME_ANNOTATIONS_PARQUET) # Parquet for speed
613
606
 
614
- if add_version:
615
- path_version = path_folder / "versions" / f"{dataset.info.version}"
616
- path_version.mkdir(parents=True, exist_ok=True)
617
- for filename in DATASET_FILENAMES_REQUIRED:
618
- shutil.copy2(path_folder / filename, path_version / filename)
607
+ def delete_on_platform(dataset: HafniaDataset, interactive: bool = True) -> None:
608
+ """
609
+ Delete this dataset from the Hafnia platform.
610
+ This is a thin wrapper around `hafnia.platform.datasets.delete_dataset_completely_by_name`.
611
+
612
+ Args:
613
+ dataset (HafniaDataset): The :class:`HafniaDataset` instance to delete from the platform. The
614
+ dataset name is taken from `dataset.info.dataset_name`.
615
+ interactive (bool): If ``True``, perform the deletion in interactive mode (for example,
616
+ prompting the user for confirmation where supported). If ``False``,
617
+ run non-interactively, suitable for automated scripts or CI usage. Defaults to True.
618
+ """
619
+ from hafnia.platform.datasets import delete_dataset_completely_by_name
620
+
621
+ delete_dataset_completely_by_name(dataset_name=dataset.info.dataset_name, interactive=interactive)
622
+
623
+ def upload_to_platform(
624
+ dataset: HafniaDataset,
625
+ dataset_sample: Optional[HafniaDataset] = None,
626
+ allow_version_overwrite: bool = False,
627
+ interactive: bool = True,
628
+ gallery_images: Optional[Any] = None,
629
+ distribution_task_names: Optional[List[str]] = None,
630
+ cfg: Optional[Config] = None,
631
+ ) -> dict:
632
+ """
633
+ Upload the dataset and dataset details to the Hafnia platform.
634
+ This method ensures the dataset exists on the platform, synchronizes the
635
+ dataset files to remote storage, and uploads dataset details and optional gallery images
636
+ distributions.
637
+ Args:
638
+ dataset: The full :class:`HafniaDataset` instance that should be uploaded
639
+ to the platform.
640
+ dataset_sample: Optional sample :class:`HafniaDataset` used as a smaller
641
+ preview or subset of the main dataset on the platform. If provided,
642
+ it is uploaded alongside the full dataset for demonstration or
643
+ inspection purposes. Use only this if the sample dataset uses different
644
+ image files than the main dataset. Otherwise it is sufficient to just provide
645
+ the main dataset and the platform will create a sample automatically.
646
+ allow_version_overwrite: If ``True``, allows an existing dataset version
647
+ with the same name to be overwritten on the platform. If ``False``,
648
+ an error or confirmation may be required when a version conflict is
649
+ detected.
650
+ interactive: If ``True``, the upload process may prompt the user for
651
+ confirmation or additional input (for example when overwriting
652
+ existing versions). If ``False``, the upload is performed without
653
+ interactive prompts.
654
+ gallery_images: Optional collection of image identifiers or file names
655
+ that should be marked or displayed as gallery images for the dataset
656
+ on the platform. These are forwarded as ``gallery_image_names`` to
657
+ the platform API.
658
+ distribution_task_names: Optional list of task names associated with the
659
+ dataset that should be considered when configuring how the dataset is
660
+ distributed or exposed on the platform.
661
+ cfg: Optional :class:`hafnia_cli.config.Config` instance providing
662
+ configuration for platform access and storage. If not supplied, a
663
+ default configuration is created.
664
+ Returns:
665
+ dict: The response returned by the platform after uploading the dataset
666
+ details. The exact contents depend on the platform API but typically
667
+ include information about the created or updated dataset (such as
668
+ identifiers and status).
669
+ """
670
+
671
+ from hafnia.dataset.dataset_details_uploader import upload_dataset_details_to_platform
672
+ from hafnia.dataset.operations.dataset_s3_storage import sync_dataset_files_to_platform
673
+ from hafnia.platform.datasets import get_or_create_dataset
674
+
675
+ cfg = cfg or Config()
676
+ get_or_create_dataset(dataset.info.dataset_name, cfg=cfg)
677
+
678
+ sync_dataset_files_to_platform(
679
+ dataset=dataset,
680
+ sample_dataset=dataset_sample,
681
+ interactive=interactive,
682
+ allow_version_overwrite=allow_version_overwrite,
683
+ cfg=cfg,
684
+ )
685
+
686
+ response = upload_dataset_details_to_platform(
687
+ dataset=dataset,
688
+ distribution_task_names=distribution_task_names,
689
+ gallery_image_names=gallery_images,
690
+ cfg=cfg,
691
+ )
692
+
693
+ return response
619
694
 
620
695
  def __eq__(self, value) -> bool:
621
696
  if not isinstance(value, HafniaDataset):
@@ -51,7 +51,7 @@ class TaskInfo(BaseModel):
51
51
  return self.class_names.index(class_name)
52
52
 
53
53
  # The 'primitive'-field of type 'Type[Primitive]' is not supported by pydantic out-of-the-box as
54
- # the 'Primitive' class is an abstract base class and for the actual primtives such as Bbox, Bitmask, Classification.
54
+ # the 'Primitive' class is an abstract base class and for the actual primitives such as Bbox, Bitmask, Classification.
55
55
  # Below magic functions ('ensure_primitive' and 'serialize_primitive') ensures that the 'primitive' field can
56
56
  # correctly validate and serialize sub-classes (Bbox, Classification, ...).
57
57
  @field_validator("primitive", mode="plain")
@@ -103,6 +103,8 @@ class TaskInfo(BaseModel):
103
103
  class DatasetInfo(BaseModel):
104
104
  dataset_name: str = Field(description="Name of the dataset, e.g. 'coco'")
105
105
  version: Optional[str] = Field(default=None, description="Version of the dataset")
106
+ dataset_title: Optional[str] = Field(default=None, description="Optional, human-readable title of the dataset")
107
+ description: Optional[str] = Field(default=None, description="Optional, description of the dataset")
106
108
  tasks: List[TaskInfo] = Field(default=None, description="List of tasks in the dataset")
107
109
  reference_bibtex: Optional[str] = Field(
108
110
  default=None,