hafnia 0.5.0__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/build.yaml +3 -3
  2. {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/check_release.yaml +1 -1
  3. {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/ci_cd.yaml +1 -1
  4. {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/lint.yaml +2 -2
  5. {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/publish_docker.yaml +4 -4
  6. {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/publish_pypi.yaml +1 -1
  7. {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/tests.yaml +3 -5
  8. {hafnia-0.5.0 → hafnia-0.5.1}/PKG-INFO +2 -2
  9. {hafnia-0.5.0 → hafnia-0.5.1}/examples/example_dataset_recipe.py +15 -87
  10. {hafnia-0.5.0 → hafnia-0.5.1}/examples/example_hafnia_dataset.py +15 -3
  11. {hafnia-0.5.0 → hafnia-0.5.1}/examples/example_torchvision_dataloader.py +2 -1
  12. {hafnia-0.5.0 → hafnia-0.5.1}/pyproject.toml +2 -2
  13. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/dataset_helpers.py +59 -1
  14. hafnia-0.5.1/src/hafnia/dataset/dataset_names.py +123 -0
  15. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/dataset_recipe/dataset_recipe.py +48 -4
  16. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/format_conversions/torchvision_datasets.py +2 -2
  17. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/hafnia_dataset.py +163 -69
  18. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/hafnia_dataset_types.py +142 -18
  19. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/operations/dataset_s3_storage.py +7 -2
  20. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/operations/table_transformations.py +0 -18
  21. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/datasets.py +32 -132
  22. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/download.py +1 -1
  23. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/s5cmd_utils.py +122 -3
  24. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/dataset_cmds.py +19 -13
  25. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/runc_cmds.py +7 -2
  26. hafnia-0.5.1/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png +0 -0
  27. hafnia-0.5.1/tests/data/expected_images/test_samples/test_check_dataset[caltech-256].png +0 -0
  28. hafnia-0.5.1/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-101].png +0 -0
  29. hafnia-0.5.1/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-256].png +0 -0
  30. {hafnia-0.5.0 → hafnia-0.5.1}/tests/helper_testing.py +25 -14
  31. hafnia-0.5.1/tests/helper_testing_datasets.py +73 -0
  32. {hafnia-0.5.0 → hafnia-0.5.1}/tests/integration/test_bring_your_own_data.py +9 -8
  33. {hafnia-0.5.0 → hafnia-0.5.1}/tests/integration/test_cli_integration.py +6 -3
  34. {hafnia-0.5.0 → hafnia-0.5.1}/tests/integration/test_dataset_merges.py +10 -7
  35. {hafnia-0.5.0 → hafnia-0.5.1}/tests/integration/test_dataset_recipes_with_platform.py +9 -6
  36. {hafnia-0.5.0 → hafnia-0.5.1}/tests/integration/test_samples.py +57 -22
  37. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/dataset_recipe/test_dataset_recipe_helpers.py +37 -28
  38. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/dataset_recipe/test_dataset_recipes.py +16 -7
  39. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/test_dataset_helpers.py +44 -0
  40. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/test_hafnia_dataset.py +8 -8
  41. hafnia-0.5.1/tests/unit/dataset/test_hafnia_dataset_types.py +61 -0
  42. {hafnia-0.5.0 → hafnia-0.5.1}/uv.lock +5 -5
  43. hafnia-0.5.0/src/hafnia/dataset/dataset_names.py +0 -230
  44. hafnia-0.5.0/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png +0 -0
  45. hafnia-0.5.0/tests/data/expected_images/test_samples/test_check_dataset[caltech-256].png +0 -0
  46. hafnia-0.5.0/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-101].png +0 -0
  47. hafnia-0.5.0/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-256].png +0 -0
  48. {hafnia-0.5.0 → hafnia-0.5.1}/.devcontainer/devcontainer.json +0 -0
  49. {hafnia-0.5.0 → hafnia-0.5.1}/.devcontainer/hooks/post_create +0 -0
  50. {hafnia-0.5.0 → hafnia-0.5.1}/.github/dependabot.yaml +0 -0
  51. {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/Dockerfile +0 -0
  52. {hafnia-0.5.0 → hafnia-0.5.1}/.gitignore +0 -0
  53. {hafnia-0.5.0 → hafnia-0.5.1}/.pre-commit-config.yaml +0 -0
  54. {hafnia-0.5.0 → hafnia-0.5.1}/.python-version +0 -0
  55. {hafnia-0.5.0 → hafnia-0.5.1}/.trivyignore +0 -0
  56. {hafnia-0.5.0 → hafnia-0.5.1}/.vscode/extensions.json +0 -0
  57. {hafnia-0.5.0 → hafnia-0.5.1}/.vscode/launch.json +0 -0
  58. {hafnia-0.5.0 → hafnia-0.5.1}/.vscode/settings.json +0 -0
  59. {hafnia-0.5.0 → hafnia-0.5.1}/LICENSE +0 -0
  60. {hafnia-0.5.0 → hafnia-0.5.1}/README.md +0 -0
  61. {hafnia-0.5.0 → hafnia-0.5.1}/docs/cli.md +0 -0
  62. {hafnia-0.5.0 → hafnia-0.5.1}/docs/release.md +0 -0
  63. {hafnia-0.5.0 → hafnia-0.5.1}/examples/example_logger.py +0 -0
  64. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/__init__.py +0 -0
  65. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/data/__init__.py +0 -0
  66. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/data/factory.py +0 -0
  67. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/dataset_details_uploader.py +0 -0
  68. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/dataset_recipe/recipe_transforms.py +0 -0
  69. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/dataset_recipe/recipe_types.py +0 -0
  70. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/format_conversions/format_coco.py +0 -0
  71. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/format_conversions/format_helpers.py +0 -0
  72. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/format_conversions/format_image_classification_folder.py +0 -0
  73. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/format_conversions/format_yolo.py +0 -0
  74. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/license_types.py +0 -0
  75. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/operations/dataset_stats.py +0 -0
  76. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/operations/dataset_transformations.py +0 -0
  77. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/__init__.py +0 -0
  78. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/bbox.py +0 -0
  79. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/bitmask.py +0 -0
  80. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/classification.py +0 -0
  81. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/point.py +0 -0
  82. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/polygon.py +0 -0
  83. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/primitive.py +0 -0
  84. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/segmentation.py +0 -0
  85. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/utils.py +0 -0
  86. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/experiment/__init__.py +0 -0
  87. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/experiment/hafnia_logger.py +0 -0
  88. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/http.py +0 -0
  89. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/log.py +0 -0
  90. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/__init__.py +0 -0
  91. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/builder.py +0 -0
  92. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/dataset_recipe.py +0 -0
  93. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/experiment.py +0 -0
  94. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/trainer_package.py +0 -0
  95. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/torch_helpers.py +0 -0
  96. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/utils.py +0 -0
  97. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/visualizations/colors.py +0 -0
  98. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/visualizations/image_visualizations.py +0 -0
  99. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/__init__.py +0 -0
  100. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/__main__.py +0 -0
  101. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/config.py +0 -0
  102. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/consts.py +0 -0
  103. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/dataset_recipe_cmds.py +0 -0
  104. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/experiment_cmds.py +0 -0
  105. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/keychain.py +0 -0
  106. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/profile_cmds.py +0 -0
  107. {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/trainer_package_cmds.py +0 -0
  108. {hafnia-0.5.0 → hafnia-0.5.1}/tests/__init__.py +0 -0
  109. {hafnia-0.5.0 → hafnia-0.5.1}/tests/conftest.py +0 -0
  110. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_coco_roboflow/train/000000000632.jpg +0 -0
  111. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_coco_roboflow/train/000000000724.jpg +0 -0
  112. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_coco_roboflow/train/_annotations.coco.json +0 -0
  113. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_coco_roboflow/valid/000000000139.jpg +0 -0
  114. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_coco_roboflow/valid/000000000285.jpg +0 -0
  115. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_coco_roboflow/valid/_annotations.coco.json +0 -0
  116. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/obj.names +0 -0
  117. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/train/data/000000000139.jpg +0 -0
  118. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/train/data/000000000139.txt +0 -0
  119. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/train/data/000000000285.jpg +0 -0
  120. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/train/data/000000000285.txt +0 -0
  121. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/train/images.txt +0 -0
  122. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/validation/data/000000000632.jpg +0 -0
  123. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/validation/data/000000000632.txt +0 -0
  124. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/validation/images.txt +0 -0
  125. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_image_metadata_schema.yaml +0 -0
  126. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_dataset_transformations/test_video_storage_format_read_image.png +0 -0
  127. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[polygon].png +0 -0
  128. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_as_ints].png +0 -0
  129. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_compressed_bytes].png +0 -0
  130. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_compressed_str].png +0 -0
  131. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_coco/test_from_coco_format_visualized.png +0 -0
  132. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_coco/test_to_coco_format_visualized.png +0 -0
  133. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_yolo/test_format_yolo_import_export_tiny_dataset.png +0 -0
  134. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_yolo/test_import_yolo_format_visualized.png +0 -0
  135. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_check_dataset[cifar100].png +0 -0
  136. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_check_dataset[cifar10].png +0 -0
  137. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_check_dataset[coco-2017].png +0 -0
  138. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_check_dataset[midwest-vehicle-detection].png +0 -0
  139. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_check_dataset[mnist].png +0 -0
  140. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_check_dataset[tiny-dataset].png +0 -0
  141. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar100].png +0 -0
  142. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar10].png +0 -0
  143. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[coco-2017].png +0 -0
  144. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[midwest-vehicle-detection].png +0 -0
  145. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[mnist].png +0 -0
  146. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[tiny-dataset].png +0 -0
  147. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-coco-2017].png +0 -0
  148. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-tiny-dataset].png +0 -0
  149. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-coco-2017].png +0 -0
  150. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-tiny-dataset].png +0 -0
  151. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_visualizations/test_mask_region[micro-coco-2017].png +0 -0
  152. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_visualizations/test_mask_region[micro-tiny-dataset].png +0 -0
  153. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_visualizations/test_polygon_to_bitmask_conversion.png +0 -0
  154. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-coco-2017/annotations.jsonl +0 -0
  155. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-coco-2017/annotations.parquet +0 -0
  156. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-coco-2017/data/253/253925d334c002ce6662d8133535dd4c.jpg +0 -0
  157. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-coco-2017/data/b1a/b1a09f4d922f8f6904bab0c1caf172ab.jpg +0 -0
  158. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-coco-2017/data/f67/f675c8a1e862b5e00203ab888ac7fff4.jpg +0 -0
  159. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-coco-2017/dataset_info.json +0 -0
  160. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.jsonl +0 -0
  161. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.parquet +0 -0
  162. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-tiny-dataset/data/25c/25c3a206e7b60ab50245ee3d52d97f11.png +0 -0
  163. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-tiny-dataset/data/962/962fd865fdd45f169d5ca8c8f284d68d.png +0 -0
  164. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-tiny-dataset/data/ec6/ec60f2f4fb854b59c97e16b45c713de0.png +0 -0
  165. {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-tiny-dataset/dataset_info.json +0 -0
  166. {hafnia-0.5.0 → hafnia-0.5.1}/tests/integration/test_check_example_scripts.py +0 -0
  167. {hafnia-0.5.0 → hafnia-0.5.1}/tests/integration/test_torchvision_datasets.py +0 -0
  168. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/dataset_recipe/test_recipe_transformations.py +0 -0
  169. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/format_conversions/test_format_coco.py +0 -0
  170. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/format_conversions/test_format_image_classification_folder.py +0 -0
  171. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/format_conversions/test_format_yolo.py +0 -0
  172. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/operations/test_dataset_stats.py +0 -0
  173. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/operations/test_dataset_transformations.py +0 -0
  174. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/operations/test_table_transformations.py +0 -0
  175. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/test_colors.py +0 -0
  176. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/test_dataset_details_uploader.py +0 -0
  177. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/test_dataset_names.py +0 -0
  178. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/test_shape_primitives.py +0 -0
  179. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/test_builder.py +0 -0
  180. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/test_cli.py +0 -0
  181. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/test_hafnia_logger.py +0 -0
  182. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/test_utils.py +0 -0
  183. {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/test_visualizations.py +0 -0
@@ -21,8 +21,8 @@ jobs:
21
21
  outputs:
22
22
  package-version: ${{ steps.extract-version.outputs.package_version }}
23
23
  steps:
24
- - uses: actions/checkout@v5.0.0
25
- - uses: actions/setup-python@v6.0.0
24
+ - uses: actions/checkout@v6.0.1
25
+ - uses: actions/setup-python@v6.1.0
26
26
  with:
27
27
  python-version-file: ${{ inputs.python-version-file }}
28
28
 
@@ -49,7 +49,7 @@ jobs:
49
49
  run: uv build
50
50
 
51
51
  - name: Upload package artifact
52
- uses: actions/upload-artifact@v5.0.0
52
+ uses: actions/upload-artifact@v6.0.0
53
53
  with:
54
54
  name: python-package
55
55
  path: dist/
@@ -20,7 +20,7 @@ jobs:
20
20
  make_release: ${{ steps.check_release.outputs.make_release }}
21
21
  steps:
22
22
  - name: Download package artifact
23
- uses: actions/download-artifact@v6.0.0
23
+ uses: actions/download-artifact@v7.0.0
24
24
  with:
25
25
  name: python-package
26
26
  path: dist/
@@ -19,7 +19,7 @@ jobs:
19
19
  runs-on: ubuntu-latest
20
20
  needs: lint
21
21
  steps:
22
- - uses: actions/checkout@v5.0.0
22
+ - uses: actions/checkout@v6.0.1
23
23
  - name: Run Trivy vulnerability scanner
24
24
  uses: aquasecurity/trivy-action@0.33.1
25
25
  with:
@@ -10,8 +10,8 @@ jobs:
10
10
  lint:
11
11
  runs-on: ubuntu-latest
12
12
  steps:
13
- - uses: actions/checkout@v5.0.0
14
- - uses: actions/setup-python@v6.0.0
13
+ - uses: actions/checkout@v6.0.1
14
+ - uses: actions/setup-python@v6.1.0
15
15
  with:
16
16
  python-version-file: ${{ inputs.python-version-file }}
17
17
  - uses: pre-commit/action@v3.0.1
@@ -24,14 +24,14 @@ jobs:
24
24
  build:
25
25
  runs-on: ubuntu-latest
26
26
  steps:
27
- - uses: actions/checkout@v5.0.0
28
- - uses: actions/setup-python@v6.0.0
27
+ - uses: actions/checkout@v6.0.1
28
+ - uses: actions/setup-python@v6.1.0
29
29
  id: python
30
30
  with:
31
31
  python-version-file: ${{ inputs.python-version-file }}
32
32
 
33
33
  - name: Download package artifact
34
- uses: actions/download-artifact@v6.0.0
34
+ uses: actions/download-artifact@v7.0.0
35
35
  with:
36
36
  name: python-package
37
37
  path: dist/
@@ -47,7 +47,7 @@ jobs:
47
47
  echo "aws_region=${{ secrets.STAGE_AWS_REGION }}" >> $GITHUB_OUTPUT
48
48
  fi
49
49
  - name: Configure AWS credentials
50
- uses: aws-actions/configure-aws-credentials@v5.1.0
50
+ uses: aws-actions/configure-aws-credentials@v5.1.1
51
51
  with:
52
52
  role-to-assume: arn:aws:iam::${{ steps.env-vars.outputs.aws_account_id }}:role/${{ secrets.AWS_ROLE_NAME }}
53
53
  aws-region: ${{ steps.env-vars.outputs.aws_region }}
@@ -17,7 +17,7 @@ jobs:
17
17
  contents: read
18
18
  steps:
19
19
  - name: Download package artifact
20
- uses: actions/download-artifact@v6.0.0
20
+ uses: actions/download-artifact@v7.0.0
21
21
  with:
22
22
  name: python-package
23
23
  path: dist/
@@ -15,8 +15,8 @@ jobs:
15
15
  matrix:
16
16
  os: [ubuntu-latest, windows-latest]
17
17
  steps:
18
- - uses: actions/checkout@v5.0.0
19
- - uses: actions/setup-python@v6.0.0
18
+ - uses: actions/checkout@v6.0.1
19
+ - uses: actions/setup-python@v6.1.0
20
20
  with:
21
21
  python-version-file: ${{ inputs.python-version-file }}
22
22
  - name: Install uv
@@ -32,9 +32,7 @@ jobs:
32
32
  run: |
33
33
  mkdir -p ~/.hafnia
34
34
  echo "$HAFNIA_CONFIG" | jq . > ~/.hafnia/config.json
35
- - name: Check hafnia configured
36
- run: uv run hafnia profile active
37
- - name: Check hafnia by download
35
+ - name: Check hafnia configuration by download
38
36
  run: uv run hafnia dataset download mnist --force
39
37
  - name: Run tests
40
38
  run: uv run pytest tests
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hafnia
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: Python SDK for communication with Hafnia platform.
5
5
  Author-email: Milestone Systems <hafniaplatform@milestone.dk>
6
6
  License-File: LICENSE
@@ -10,7 +10,7 @@ Requires-Dist: click>=8.1.8
10
10
  Requires-Dist: emoji>=2.14.1
11
11
  Requires-Dist: flatten-dict>=0.4.2
12
12
  Requires-Dist: keyring>=25.6.0
13
- Requires-Dist: mcp>=1.16.0
13
+ Requires-Dist: mcp>=1.23.0
14
14
  Requires-Dist: mlflow>=3.4.0
15
15
  Requires-Dist: more-itertools>=10.7.0
16
16
  Requires-Dist: opencv-python-headless>=4.11.0.86
@@ -5,13 +5,12 @@ from rich import print as rprint
5
5
  from hafnia import utils
6
6
  from hafnia.dataset.dataset_names import OPS_REMOVE_CLASS
7
7
  from hafnia.dataset.dataset_recipe.dataset_recipe import DatasetRecipe
8
- from hafnia.dataset.dataset_recipe.recipe_transforms import (
9
- SelectSamples,
10
- Shuffle,
11
- SplitsByRatios,
12
- )
13
8
  from hafnia.dataset.hafnia_dataset import HafniaDataset
14
9
 
10
+ COCO_VERSION = "1.0.0"
11
+ MIDWEST_VERSION = "1.0.0"
12
+ MNIST_VERSION = "1.0.0"
13
+
15
14
  ### Introducing DatasetRecipe ###
16
15
  # A DatasetRecipe is a recipe for the dataset you want to create.
17
16
  # The recipe itself is not executed - this is just a specification of the dataset you want!
@@ -19,10 +18,10 @@ from hafnia.dataset.hafnia_dataset import HafniaDataset
19
18
  # The 'DatasetRecipe' interface is similar to the 'HafniaDataset' interface.
20
19
  # To demonstrate, we will first create a dataset with the regular 'HafniaDataset' interface.
21
20
  # This line will get the "mnist" dataset, shuffle it, and select 20 samples.
22
- dataset = HafniaDataset.from_name(name="mnist").shuffle().select_samples(n_samples=20)
21
+ dataset = HafniaDataset.from_name(name="mnist", version=MNIST_VERSION).shuffle().select_samples(n_samples=20)
23
22
 
24
23
  # Now the same dataset is created using the 'DatasetRecipe' interface.
25
- dataset_recipe = DatasetRecipe.from_name(name="mnist").shuffle().select_samples(n_samples=20)
24
+ dataset_recipe = DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION).shuffle().select_samples(n_samples=20)
26
25
  dataset = dataset_recipe.build()
27
26
  # Note that the interface is similar, but to actually create the dataset you need to call `build()` on the recipe.
28
27
 
@@ -65,8 +64,8 @@ if utils.is_hafnia_configured(): # First ensure you are connected to the hafnia
65
64
  # Example: 'DatasetRecipe' by merging multiple dataset recipes
66
65
  dataset_recipe = DatasetRecipe.from_merger(
67
66
  recipes=[
68
- DatasetRecipe.from_name(name="mnist"),
69
- DatasetRecipe.from_name(name="mnist"),
67
+ DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION),
68
+ DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION),
70
69
  ]
71
70
  )
72
71
 
@@ -75,14 +74,14 @@ dataset_recipe = DatasetRecipe.from_merger(
75
74
  recipes=[
76
75
  DatasetRecipe.from_merger(
77
76
  recipes=[
78
- DatasetRecipe.from_name(name="mnist"),
79
- DatasetRecipe.from_name(name="mnist"),
77
+ DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION),
78
+ DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION),
80
79
  ]
81
80
  ),
82
81
  DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
83
82
  .select_samples(n_samples=30)
84
83
  .splits_by_ratios(split_ratios={"train": 0.8, "val": 0.1, "test": 0.1}),
85
- DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle(),
84
+ DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION).select_samples(n_samples=20).shuffle(),
86
85
  ]
87
86
  )
88
87
 
@@ -99,14 +98,14 @@ print(dataset_recipe.as_json_str()) # as a JSON string
99
98
  # 1) The first step is to use the regular 'HafniaDataset' interface to investigate and understand the datasets
100
99
 
101
100
  # 1a) Explore 'coco-2017'
102
- coco = HafniaDataset.from_name("coco-2017")
101
+ coco = HafniaDataset.from_name("coco-2017", version=COCO_VERSION)
103
102
  coco.print_stats() # Print dataset statistics
104
103
  coco_class_names = coco.info.get_task_by_primitive("Bbox").class_names # Get the class names for the bbox task
105
104
  # You will notice coco has 80 classes including 'person' and various vehicle classes such as 'car', 'bus', 'truck', etc.
106
105
  # but also many unrelated classes such as 'toaster', 'hair drier', etc.
107
106
 
108
107
  # 1b) Explore 'midwest-vehicle-detection'
109
- midwest = HafniaDataset.from_name("midwest-vehicle-detection")
108
+ midwest = HafniaDataset.from_name("midwest-vehicle-detection", version=MIDWEST_VERSION)
110
109
  midwest.print_stats() # Print dataset statistics
111
110
  midwest_class_names = midwest.info.get_task_by_primitive("Bbox").class_names
112
111
  # You will also notice midwest has similar classes, but they are named differently, e.g. 'Persons',
@@ -144,10 +143,10 @@ merged_dataset.print_stats()
144
143
  # 3) Once you have verified operations using the 'HafniaDataset' interface, you can convert
145
144
  # the operations to a single 'DatasetRecipe'
146
145
  merged_recipe = DatasetRecipe.from_merge(
147
- recipe0=DatasetRecipe.from_name("coco-2017").class_mapper(
146
+ recipe0=DatasetRecipe.from_name("coco-2017", version=COCO_VERSION).class_mapper(
148
147
  class_mapping=mappings_coco, method="remove_undefined", task_name="object_detection"
149
148
  ),
150
- recipe1=DatasetRecipe.from_name("midwest-vehicle-detection").class_mapper(
149
+ recipe1=DatasetRecipe.from_name("midwest-vehicle-detection", version=MIDWEST_VERSION).class_mapper(
151
150
  class_mapping=mapping_midwest, task_name="object_detection"
152
151
  ),
153
152
  ).select_samples_by_class_name(name=["Person", "Vehicle"], task_name="object_detection")
@@ -176,74 +175,3 @@ if utils.is_hafnia_configured():
176
175
  # 6) Monitor and manage your experiments
177
176
  # 6a) View experiments using the web platform https://staging02.mdi.milestonesys.com/training-aas/experiments
178
177
  # 6b) Or use the CLI: 'hafnia experiment ls'
179
- ### DatasetRecipe Implicit Form ###
180
- # Below we demonstrate the difference between implicit and explicit forms of dataset recipes.
181
- # Example: Get dataset by name with implicit and explicit forms
182
- recipe_implicit_form = "mnist"
183
- recipe_explicit_form = DatasetRecipe.from_name(name="mnist")
184
-
185
- # The implicit form can now be loaded and built as a dataset
186
- dataset_implicit = DatasetRecipe.from_implicit_form(recipe_implicit_form).build()
187
- # Or directly as a dataset
188
- dataset_implicit = HafniaDataset.from_recipe(recipe_implicit_form)
189
-
190
-
191
- # Example: Get dataset from path with implicit and explicit forms:
192
- recipe_implicit_form = Path(".data/datasets/mnist")
193
- recipe_explicit_form = DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
194
-
195
- # Example: Merge datasets with implicit and explicit forms
196
- recipe_implicit_form = ("mnist", "mnist")
197
- recipe_explicit_form = DatasetRecipe.from_merger(
198
- recipes=[
199
- DatasetRecipe.from_name(name="mnist"),
200
- DatasetRecipe.from_name(name="mnist"),
201
- ]
202
- )
203
-
204
- # Example: Define a dataset with transformations using implicit and explicit forms
205
- recipe_implicit_form = ["mnist", SelectSamples(n_samples=20), Shuffle()]
206
- recipe_explicit_form = DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle()
207
-
208
-
209
- # Example: Complex nested example with implicit vs explicit forms
210
- # Implicit form of a complex dataset recipe
211
- split_ratio = {"train": 0.8, "val": 0.1, "test": 0.1}
212
- recipe_implicit_complex = (
213
- ("mnist", "mnist"),
214
- [Path(".data/datasets/mnist"), SelectSamples(n_samples=30), SplitsByRatios(split_ratios=split_ratio)],
215
- ["mnist", SelectSamples(n_samples=20), Shuffle()],
216
- )
217
-
218
- # Explicit form of the same complex dataset recipe
219
- recipe_explicit_complex = DatasetRecipe.from_merger(
220
- recipes=[
221
- DatasetRecipe.from_merger(
222
- recipes=[
223
- DatasetRecipe.from_name(name="mnist"),
224
- DatasetRecipe.from_name(name="mnist"),
225
- ]
226
- ),
227
- DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
228
- .select_samples(n_samples=30)
229
- .splits_by_ratios(split_ratios=split_ratio),
230
- DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle(),
231
- ]
232
- )
233
-
234
- # The implicit form uses the following rules:
235
- # str: Will get a dataset by name -> In explicit form it becomes 'DatasetRecipe.from_name'
236
- # Path: Will get a dataset from path -> In explicit form it becomes 'DatasetRecipe.from_path'
237
- # tuple: Will merge datasets specified in the tuple -> In explicit form it becomes 'DatasetRecipe.from_merger'
238
- # list: Will define a dataset followed by a list of transformations -> In explicit form it becomes chained method calls
239
- # Generally, we recommend using the explicit form over the implicit form when multiple datasets and transformations are involved.
240
-
241
-
242
- # To convert from implicit to explicit recipe form, you can use the `from_implicit_form` method.
243
- explicit_recipe_from_implicit = DatasetRecipe.from_implicit_form(recipe_implicit_complex)
244
- rprint("Converted explicit recipe:")
245
- rprint(explicit_recipe_from_implicit)
246
-
247
- # Verify that the conversion produces the same result
248
- assert explicit_recipe_from_implicit == recipe_explicit_complex
249
- rprint("Conversion successful - recipes are equivalent!")
@@ -13,6 +13,10 @@ from hafnia.dataset.primitives.bitmask import Bitmask
13
13
  from hafnia.dataset.primitives.classification import Classification
14
14
  from hafnia.dataset.primitives.polygon import Polygon
15
15
 
16
+ MNIST_VERSION = "1.0.0"
17
+ MIDWEST_VERSION = "1.0.0"
18
+ COCO_VERSION = "1.0.0"
19
+
16
20
  # First ensure that you have the Hafnia CLI installed and configured.
17
21
  # You can install it via pip:
18
22
  # pip install hafnia
@@ -20,7 +24,11 @@ from hafnia.dataset.primitives.polygon import Polygon
20
24
  # hafnia configure
21
25
 
22
26
  # Load sample dataset
23
- dataset = HafniaDataset.from_name("mnist")
27
+ dataset = HafniaDataset.from_name("mnist", version=MNIST_VERSION)
28
+
29
+ # Use 'from_name' without version-argument to get available versions:
30
+ # dataset = HafniaDataset.from_name("mnist")
31
+ # >>> ValueError: Version must be specified. Available versions: ['1.0.0', '0.0.1']
24
32
 
25
33
  # Dataset information is stored in 'dataset.info'
26
34
  rprint(dataset.info)
@@ -76,7 +84,11 @@ dataset_mapped = dataset.class_mapper(class_mapping=class_mapping_strict)
76
84
  dataset_mapped.print_class_distribution()
77
85
 
78
86
  # Support Chaining Operations (load, shuffle, select samples)
79
- dataset = HafniaDataset.from_name("midwest-vehicle-detection").shuffle(seed=42).select_samples(n_samples=10)
87
+ dataset = (
88
+ HafniaDataset.from_name("midwest-vehicle-detection", version=MIDWEST_VERSION)
89
+ .shuffle(seed=42)
90
+ .select_samples(n_samples=10)
91
+ )
80
92
 
81
93
 
82
94
  # Write dataset to disk
@@ -88,7 +100,7 @@ dataset.write(path_dataset)
88
100
  dataset_again = HafniaDataset.from_path(path_dataset)
89
101
 
90
102
  ## Dataset importers and exporters ##
91
- dataset_od = HafniaDataset.from_name("coco-2017").select_samples(n_samples=5, seed=42)
103
+ dataset_od = HafniaDataset.from_name("coco-2017", version=COCO_VERSION).select_samples(n_samples=5, seed=42)
92
104
 
93
105
  # Export/import dataset to YOLO format
94
106
  path_yolo_format = Path(".data/tmp/yolo_dataset")
@@ -12,7 +12,8 @@ from hafnia.dataset.hafnia_dataset import HafniaDataset
12
12
  if __name__ == "__main__":
13
13
  torch.manual_seed(1)
14
14
  # Load Hugging Face dataset
15
- dataset = HafniaDataset.from_name("midwest-vehicle-detection")
15
+ MIDWEST_VERSION = "1.0.0"
16
+ dataset = HafniaDataset.from_name("midwest-vehicle-detection", version=MIDWEST_VERSION)
16
17
 
17
18
  # Define transforms
18
19
  train_transforms = v2.Compose(
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hafnia"
3
- version = "0.5.0"
3
+ version = "0.5.1"
4
4
  description = "Python SDK for communication with Hafnia platform."
5
5
  readme = "README.md"
6
6
  authors = [
@@ -28,7 +28,7 @@ dependencies = [
28
28
  "xxhash>=3.5.0",
29
29
  "mlflow>=3.4.0",
30
30
  "sagemaker-mlflow>=0.1.0",
31
- "mcp>=1.16.0",
31
+ "mcp>=1.23.0",
32
32
  ]
33
33
 
34
34
  [dependency-groups]
@@ -3,12 +3,70 @@ import math
3
3
  import random
4
4
  import shutil
5
5
  from pathlib import Path
6
- from typing import Dict, List
6
+ from typing import Dict, List, Optional, Tuple
7
7
 
8
8
  import numpy as np
9
9
  import xxhash
10
+ from packaging.version import InvalidVersion, Version
10
11
  from PIL import Image
11
12
 
13
+ from hafnia.log import user_logger
14
+
15
+
16
+ def is_valid_version_string(version: Optional[str], allow_none: bool = False, allow_latest: bool = False) -> bool:
17
+ if allow_none and version is None:
18
+ return True
19
+ if allow_latest and version == "latest":
20
+ return True
21
+ return version_from_string(version, raise_error=False) is not None
22
+
23
+
24
+ def version_from_string(version: Optional[str], raise_error: bool = True) -> Optional[Version]:
25
+ if version is None:
26
+ if raise_error:
27
+ raise ValueError("Version is 'None'. A valid version string is required e.g '1.0.0'")
28
+ return None
29
+
30
+ try:
31
+ version_casted = Version(version)
32
+ except (InvalidVersion, TypeError) as e:
33
+ if raise_error:
34
+ raise ValueError(f"Invalid version string/type: {version}") from e
35
+ return None
36
+
37
+ # Check if version is semantic versioning (MAJOR.MINOR.PATCH)
38
+ if len(version_casted.release) < 3:
39
+ if raise_error:
40
+ raise ValueError(f"Version string '{version}' is not semantic versioning (MAJOR.MINOR.PATCH)")
41
+ return None
42
+ return version_casted
43
+
44
+
45
+ def dataset_name_and_version_from_string(
46
+ string: str,
47
+ resolve_missing_version: bool = True,
48
+ ) -> Tuple[str, Optional[str]]:
49
+ if not isinstance(string, str):
50
+ raise TypeError(f"'{type(string)}' for '{string}' is an unsupported type. Expected 'str' e.g 'mnist:1.0.0'")
51
+
52
+ parts = string.split(":")
53
+ if len(parts) == 1:
54
+ dataset_name = parts[0]
55
+ if resolve_missing_version:
56
+ version = "latest" # Default to 'latest' if version is missing. This will be resolved to a specific version later.
57
+ user_logger.info(f"Version is missing in dataset name: {string}. Defaulting to version='latest'.")
58
+ else:
59
+ raise ValueError(f"Version is missing in dataset name: {string}. Use 'name:version'")
60
+ elif len(parts) == 2:
61
+ dataset_name, version = parts
62
+ else:
63
+ raise ValueError(f"Invalid dataset name format: {string}. Use 'name' or 'name:version' ")
64
+
65
+ if not is_valid_version_string(version, allow_none=True, allow_latest=True):
66
+ raise ValueError(f"Invalid version string: {version}. Use semantic versioning e.g. '1.0.0' or 'latest'")
67
+
68
+ return dataset_name, version
69
+
12
70
 
13
71
  def create_split_name_list_from_ratios(split_ratios: Dict[str, float], n_items: int, seed: int = 42) -> List[str]:
14
72
  samples_per_split = split_sizes_from_ratios(split_ratios=split_ratios, n_items=n_items)
@@ -0,0 +1,123 @@
1
+ from enum import Enum
2
+ from typing import List
3
+
4
+ FILENAME_RECIPE_JSON = "recipe.json"
5
+ FILENAME_DATASET_INFO = "dataset_info.json"
6
+ FILENAME_ANNOTATIONS_JSONL = "annotations.jsonl"
7
+ FILENAME_ANNOTATIONS_PARQUET = "annotations.parquet"
8
+
9
+ DATASET_FILENAMES_REQUIRED = [
10
+ FILENAME_DATASET_INFO,
11
+ FILENAME_ANNOTATIONS_JSONL,
12
+ FILENAME_ANNOTATIONS_PARQUET,
13
+ ]
14
+
15
+
16
+ class DeploymentStage(Enum):
17
+ STAGING = "staging"
18
+ PRODUCTION = "production"
19
+
20
+
21
+ TAG_IS_SAMPLE = "sample"
22
+
23
+ OPS_REMOVE_CLASS = "__REMOVE__"
24
+
25
+
26
+ class PrimitiveField:
27
+ CLASS_NAME: str = "class_name" # Name of the class this primitive is associated with, e.g. "car" for Bbox
28
+ CLASS_IDX: str = "class_idx" # Index of the class this primitive is associated with, e.g. 0 for "car" if it is the first class # noqa: E501
29
+ OBJECT_ID: str = "object_id" # Unique identifier for the object, e.g. "12345123"
30
+ CONFIDENCE: str = "confidence" # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
31
+
32
+ META: str = "meta" # Contains metadata about each primitive, e.g. attributes color, occluded, iscrowd, etc.
33
+ TASK_NAME: str = "task_name" # Name of the task this primitive is associated with, e.g. "bboxes" for Bbox
34
+
35
+ @staticmethod
36
+ def fields() -> List[str]:
37
+ """
38
+ Returns a list of expected field names for primitives.
39
+ """
40
+ return [
41
+ PrimitiveField.CLASS_NAME,
42
+ PrimitiveField.CLASS_IDX,
43
+ PrimitiveField.OBJECT_ID,
44
+ PrimitiveField.CONFIDENCE,
45
+ PrimitiveField.META,
46
+ PrimitiveField.TASK_NAME,
47
+ ]
48
+
49
+
50
+ class SampleField:
51
+ FILE_PATH: str = "file_path"
52
+ HEIGHT: str = "height"
53
+ WIDTH: str = "width"
54
+ SPLIT: str = "split"
55
+ TAGS: str = "tags"
56
+
57
+ CLASSIFICATIONS: str = "classifications"
58
+ BBOXES: str = "bboxes"
59
+ BITMASKS: str = "bitmasks"
60
+ POLYGONS: str = "polygons"
61
+
62
+ STORAGE_FORMAT: str = "storage_format" # E.g. "image", "video", "zip"
63
+ COLLECTION_INDEX: str = "collection_index"
64
+ COLLECTION_ID: str = "collection_id"
65
+ REMOTE_PATH: str = "remote_path" # Path to the file in remote storage, e.g. S3
66
+ SAMPLE_INDEX: str = "sample_index"
67
+
68
+ ATTRIBUTION: str = "attribution" # Attribution for the sample (image/video), e.g. creator, license, source, etc.
69
+ META: str = "meta"
70
+ DATASET_NAME: str = "dataset_name"
71
+
72
+
73
+ class StorageFormat:
74
+ IMAGE: str = "image"
75
+ VIDEO: str = "video"
76
+ ZIP: str = "zip"
77
+
78
+
79
+ class SplitName:
80
+ TRAIN: str = "train"
81
+ VAL: str = "validation"
82
+ TEST: str = "test"
83
+ UNDEFINED: str = "UNDEFINED"
84
+
85
+ @staticmethod
86
+ def valid_splits() -> List[str]:
87
+ return [SplitName.TRAIN, SplitName.VAL, SplitName.TEST]
88
+
89
+ @staticmethod
90
+ def all_split_names() -> List[str]:
91
+ return [*SplitName.valid_splits(), SplitName.UNDEFINED]
92
+
93
+ @staticmethod
94
+ def map_split_name(potential_split_name: str, strict: bool = True) -> str:
95
+ normalized = potential_split_name.strip().lower()
96
+
97
+ if normalized in SPLIT_NAME_MAPPINGS:
98
+ return SPLIT_NAME_MAPPINGS[normalized]
99
+
100
+ if strict:
101
+ raise ValueError(f"Unrecognized split name: {potential_split_name}")
102
+ else:
103
+ return SplitName.UNDEFINED
104
+
105
+
106
+ SPLIT_NAME_MAPPINGS = {
107
+ # Train variations
108
+ "train": SplitName.TRAIN,
109
+ "training": SplitName.TRAIN,
110
+ # Validation variations
111
+ "validation": SplitName.VAL,
112
+ "val": SplitName.VAL,
113
+ "valid": SplitName.VAL,
114
+ # Test variations
115
+ "test": SplitName.TEST,
116
+ "testing": SplitName.TEST,
117
+ }
118
+
119
+
120
+ class DatasetVariant(Enum):
121
+ DUMP = "dump"
122
+ SAMPLE = "sample"
123
+ HIDDEN = "hidden"
@@ -11,14 +11,19 @@ from pydantic import (
11
11
  )
12
12
 
13
13
  from hafnia import utils
14
+ from hafnia.dataset.dataset_helpers import dataset_name_and_version_from_string
14
15
  from hafnia.dataset.dataset_recipe import recipe_transforms
15
16
  from hafnia.dataset.dataset_recipe.recipe_types import (
16
17
  RecipeCreation,
17
18
  RecipeTransform,
18
19
  Serializable,
19
20
  )
20
- from hafnia.dataset.hafnia_dataset import HafniaDataset
21
+ from hafnia.dataset.hafnia_dataset import (
22
+ HafniaDataset,
23
+ available_dataset_versions_from_name,
24
+ )
21
25
  from hafnia.dataset.primitives.primitive import Primitive
26
+ from hafnia.log import user_logger
22
27
 
23
28
 
24
29
  class DatasetRecipe(Serializable):
@@ -41,8 +46,31 @@ class DatasetRecipe(Serializable):
41
46
 
42
47
  ### Creation Methods (using the 'from_X' )###
43
48
  @staticmethod
44
- def from_name(name: str, force_redownload: bool = False, download_files: bool = True) -> DatasetRecipe:
45
- creation = FromName(name=name, force_redownload=force_redownload, download_files=download_files)
49
+ def from_name(
50
+ name: str,
51
+ version: Optional[str] = None,
52
+ force_redownload: bool = False,
53
+ download_files: bool = True,
54
+ ) -> DatasetRecipe:
55
+ if version == "latest":
56
+ user_logger.info(
57
+ f"The dataset '{name}' in a dataset recipe uses 'latest' as version. For dataset recipes the "
58
+ "version is pinned to a specific version. Consider specifying a specific version to ensure "
59
+ "reproducibility of your experiments. "
60
+ )
61
+ available_versions = available_dataset_versions_from_name(name)
62
+ version = str(max(available_versions))
63
+ if version is None:
64
+ available_versions = available_dataset_versions_from_name(name)
65
+ str_versions = ", ".join([str(v) for v in available_versions])
66
+ raise ValueError(
67
+ f"Version must be specified when creating a DatasetRecipe from name. "
68
+ f"Available versions are: {str_versions}"
69
+ )
70
+
71
+ creation = FromName(
72
+ name=name, version=version, force_redownload=force_redownload, download_files=download_files
73
+ )
46
74
  return DatasetRecipe(creation=creation)
47
75
 
48
76
  @staticmethod
@@ -125,6 +153,21 @@ class DatasetRecipe(Serializable):
125
153
  recipe_id = recipe["id"]
126
154
  return DatasetRecipe.from_recipe_id(recipe_id)
127
155
 
156
+ @staticmethod
157
+ def from_name_and_version_string(string: str, resolve_missing_version: bool = False) -> "DatasetRecipe":
158
+ """
159
+ Validates and converts a dataset name and version string (name:version) to a DatasetRecipe.from_name recipe.
160
+ If version is missing and 'resolve_missing_version' is True, it will default to 'latest'.
161
+ If resolve_missing_version is False, it will raise an error if version is missing.
162
+ """
163
+
164
+ dataset_name, version = dataset_name_and_version_from_string(
165
+ string=string,
166
+ resolve_missing_version=resolve_missing_version,
167
+ )
168
+
169
+ return DatasetRecipe.from_name(name=dataset_name, version=version)
170
+
128
171
  @staticmethod
129
172
  def from_implicit_form(recipe: Any) -> DatasetRecipe:
130
173
  """
@@ -180,7 +223,7 @@ class DatasetRecipe(Serializable):
180
223
  return recipe
181
224
 
182
225
  if isinstance(recipe, str): # str-type is convert to DatasetFromName
183
- return DatasetRecipe.from_name(name=recipe)
226
+ return DatasetRecipe.from_name_and_version_string(string=recipe, resolve_missing_version=True)
184
227
 
185
228
  if isinstance(recipe, Path): # Path-type is convert to DatasetFromPath
186
229
  return DatasetRecipe.from_path(path_folder=recipe)
@@ -409,6 +452,7 @@ class FromPath(RecipeCreation):
409
452
 
410
453
  class FromName(RecipeCreation):
411
454
  name: str
455
+ version: Optional[str] = None
412
456
  force_redownload: bool = False
413
457
  download_files: bool = True
414
458