hafnia 0.4.1__tar.gz → 0.4.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/Dockerfile +4 -1
  2. {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/build.yaml +5 -1
  3. {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/ci_cd.yaml +7 -6
  4. {hafnia-0.4.1 → hafnia-0.4.3}/.vscode/launch.json +6 -6
  5. {hafnia-0.4.1 → hafnia-0.4.3}/PKG-INFO +2 -2
  6. {hafnia-0.4.1 → hafnia-0.4.3}/README.md +1 -1
  7. {hafnia-0.4.1 → hafnia-0.4.3}/examples/example_hafnia_dataset.py +11 -6
  8. {hafnia-0.4.1 → hafnia-0.4.3}/examples/example_logger.py +1 -1
  9. {hafnia-0.4.1 → hafnia-0.4.3}/pyproject.toml +3 -3
  10. hafnia-0.4.1/src/hafnia/dataset/dataset_upload_helper.py → hafnia-0.4.3/src/hafnia/dataset/dataset_details_uploader.py +115 -192
  11. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/dataset_names.py +26 -0
  12. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/dataset_recipe/dataset_recipe.py +3 -3
  13. hafnia-0.4.3/src/hafnia/dataset/format_conversions/format_coco.py +490 -0
  14. hafnia-0.4.3/src/hafnia/dataset/format_conversions/format_helpers.py +33 -0
  15. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/format_conversions/format_image_classification_folder.py +95 -14
  16. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/format_conversions/format_yolo.py +115 -25
  17. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/format_conversions/torchvision_datasets.py +10 -8
  18. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/hafnia_dataset.py +20 -466
  19. hafnia-0.4.3/src/hafnia/dataset/hafnia_dataset_types.py +477 -0
  20. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/license_types.py +4 -4
  21. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/operations/dataset_stats.py +3 -3
  22. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/operations/dataset_transformations.py +14 -17
  23. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/operations/table_transformations.py +20 -13
  24. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/bbox.py +6 -2
  25. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/bitmask.py +21 -46
  26. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/classification.py +1 -1
  27. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/polygon.py +43 -2
  28. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/primitive.py +1 -1
  29. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/segmentation.py +1 -1
  30. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/experiment/hafnia_logger.py +13 -4
  31. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/platform/datasets.py +3 -4
  32. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/torch_helpers.py +48 -4
  33. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/utils.py +35 -1
  34. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/visualizations/image_visualizations.py +3 -1
  35. {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/__main__.py +2 -2
  36. {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/config.py +2 -2
  37. {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/dataset_cmds.py +2 -2
  38. {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/dataset_recipe_cmds.py +1 -1
  39. {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/experiment_cmds.py +1 -1
  40. {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/profile_cmds.py +2 -2
  41. {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/runc_cmds.py +1 -1
  42. {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/trainer_package_cmds.py +2 -2
  43. {hafnia-0.4.1 → hafnia-0.4.3}/tests/conftest.py +7 -1
  44. hafnia-0.4.3/tests/data/dataset_formats/format_coco_roboflow/train/000000000724.jpg +0 -0
  45. hafnia-0.4.3/tests/data/dataset_formats/format_coco_roboflow/train/_annotations.coco.json +2967 -0
  46. hafnia-0.4.3/tests/data/dataset_formats/format_coco_roboflow/valid/_annotations.coco.json +1423 -0
  47. hafnia-0.4.3/tests/data/dataset_formats/format_yolo/train/data/000000000139.jpg +0 -0
  48. hafnia-0.4.3/tests/data/dataset_formats/format_yolo/train/data/000000000285.jpg +0 -0
  49. {hafnia-0.4.1/tests/data/dataset_formats/format_yolo → hafnia-0.4.3/tests/data/dataset_formats/format_yolo/train}/images.txt +0 -1
  50. hafnia-0.4.3/tests/data/dataset_formats/format_yolo/validation/data/000000000632.jpg +0 -0
  51. hafnia-0.4.3/tests/data/dataset_formats/format_yolo/validation/images.txt +1 -0
  52. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/dataset_image_metadata_schema.yaml +19 -4
  53. hafnia-0.4.3/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[polygon].png +0 -0
  54. hafnia-0.4.3/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_as_ints].png +0 -0
  55. hafnia-0.4.3/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_compressed_bytes].png +0 -0
  56. hafnia-0.4.3/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_compressed_str].png +0 -0
  57. hafnia-0.4.3/tests/data/expected_images/test_format_coco/test_from_coco_format_visualized.png +0 -0
  58. hafnia-0.4.3/tests/data/expected_images/test_format_coco/test_to_coco_format_visualized.png +0 -0
  59. hafnia-0.4.3/tests/data/expected_images/test_format_yolo/test_format_yolo_import_export_tiny_dataset.png +0 -0
  60. hafnia-0.4.3/tests/data/expected_images/test_samples/test_check_dataset[coco-2017].png +0 -0
  61. hafnia-0.4.3/tests/data/expected_images/test_samples/test_check_dataset[tiny-dataset].png +0 -0
  62. hafnia-0.4.3/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[coco-2017].png +0 -0
  63. hafnia-0.4.3/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[tiny-dataset].png +0 -0
  64. hafnia-0.4.3/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-coco-2017].png +0 -0
  65. hafnia-0.4.3/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-tiny-dataset].png +0 -0
  66. hafnia-0.4.3/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-coco-2017].png +0 -0
  67. hafnia-0.4.3/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-tiny-dataset].png +0 -0
  68. hafnia-0.4.3/tests/data/expected_images/test_visualizations/test_mask_region[micro-coco-2017].png +0 -0
  69. hafnia-0.4.3/tests/data/expected_images/test_visualizations/test_mask_region[micro-tiny-dataset].png +0 -0
  70. hafnia-0.4.3/tests/data/expected_images/test_visualizations/test_polygon_to_bitmask_conversion.png +0 -0
  71. hafnia-0.4.3/tests/data/micro_test_datasets/micro-coco-2017/annotations.jsonl +3 -0
  72. hafnia-0.4.3/tests/data/micro_test_datasets/micro-coco-2017/annotations.parquet +0 -0
  73. hafnia-0.4.3/tests/data/micro_test_datasets/micro-coco-2017/data/253/253925d334c002ce6662d8133535dd4c.jpg +0 -0
  74. hafnia-0.4.3/tests/data/micro_test_datasets/micro-coco-2017/data/b1a/b1a09f4d922f8f6904bab0c1caf172ab.jpg +0 -0
  75. hafnia-0.4.3/tests/data/micro_test_datasets/micro-coco-2017/data/f67/f675c8a1e862b5e00203ab888ac7fff4.jpg +0 -0
  76. hafnia-0.4.3/tests/data/micro_test_datasets/micro-coco-2017/dataset_info.json +184 -0
  77. hafnia-0.4.3/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.jsonl +3 -0
  78. hafnia-0.4.3/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.parquet +0 -0
  79. hafnia-0.4.3/tests/data/micro_test_datasets/micro-tiny-dataset/data/25c/25c3a206e7b60ab50245ee3d52d97f11.png +0 -0
  80. hafnia-0.4.3/tests/data/micro_test_datasets/micro-tiny-dataset/data/962/962fd865fdd45f169d5ca8c8f284d68d.png +0 -0
  81. hafnia-0.4.3/tests/data/micro_test_datasets/micro-tiny-dataset/data/ec6/ec60f2f4fb854b59c97e16b45c713de0.png +0 -0
  82. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/micro_test_datasets/micro-tiny-dataset/dataset_info.json +17 -1
  83. {hafnia-0.4.1 → hafnia-0.4.3}/tests/helper_testing.py +18 -6
  84. {hafnia-0.4.1 → hafnia-0.4.3}/tests/integration/test_cli_integration.py +1 -1
  85. {hafnia-0.4.1 → hafnia-0.4.3}/tests/integration/test_dataset_recipes_with_platform.py +1 -1
  86. {hafnia-0.4.1 → hafnia-0.4.3}/tests/integration/test_samples.py +9 -3
  87. hafnia-0.4.3/tests/unit/dataset/format_conversions/test_format_coco.py +153 -0
  88. hafnia-0.4.1/tests/unit/dataset/format_conversions/test_image_classification_directory.py → hafnia-0.4.3/tests/unit/dataset/format_conversions/test_format_image_classification_folder.py +6 -9
  89. hafnia-0.4.3/tests/unit/dataset/format_conversions/test_format_yolo.py +102 -0
  90. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/operations/test_dataset_transformations.py +2 -1
  91. hafnia-0.4.3/tests/unit/dataset/test_dataset_details_uploader.py +29 -0
  92. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/test_dataset_names.py +1 -1
  93. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/test_hafnia_dataset.py +3 -19
  94. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/test_shape_primitives.py +2 -2
  95. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/test_cli.py +9 -9
  96. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/test_hafnia_logger.py +2 -2
  97. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/test_visualizations.py +15 -14
  98. {hafnia-0.4.1 → hafnia-0.4.3}/uv.lock +1 -1
  99. hafnia-0.4.1/tests/data/expected_images/test_format_yolo/test_format_yolo_import_export_tiny_dataset.png +0 -0
  100. hafnia-0.4.1/tests/data/expected_images/test_samples/test_check_dataset[coco-2017].png +0 -0
  101. hafnia-0.4.1/tests/data/expected_images/test_samples/test_check_dataset[tiny-dataset].png +0 -0
  102. hafnia-0.4.1/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[coco-2017].png +0 -0
  103. hafnia-0.4.1/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[tiny-dataset].png +0 -0
  104. hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-coco-2017].png +0 -0
  105. hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-tiny-dataset].png +0 -0
  106. hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-coco-2017].png +0 -0
  107. hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-tiny-dataset].png +0 -0
  108. hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_mask_region[micro-coco-2017].png +0 -0
  109. hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_mask_region[micro-tiny-dataset].png +0 -0
  110. hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/annotations.jsonl +0 -3
  111. hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/annotations.parquet +0 -0
  112. hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/data/657/657dff54d5175e2ae9f4b9629cf57646.jpg +0 -0
  113. hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/data/825/825fa2d2d9416694b8e81a47ca38f580.jpg +0 -0
  114. hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/data/aa3/aa3cc40b5cde88e5bd189c0b3e6c223c.jpg +0 -0
  115. hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/dataset_info.json +0 -325
  116. hafnia-0.4.1/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.jsonl +0 -3
  117. hafnia-0.4.1/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.parquet +0 -0
  118. hafnia-0.4.1/tests/data/micro_test_datasets/micro-tiny-dataset/data/2da/2da1d8dbf2b60bdab8dff1d7f5c2dfb5.png +0 -0
  119. hafnia-0.4.1/tests/data/micro_test_datasets/micro-tiny-dataset/data/3dd/3ddec2275a02e79e3251d85443622e4c.png +0 -0
  120. hafnia-0.4.1/tests/data/micro_test_datasets/micro-tiny-dataset/data/4d8/4d8450b045e60e8f3657ababa44af9b6.png +0 -0
  121. hafnia-0.4.1/tests/unit/dataset/format_conversions/test_format_yolo.py +0 -85
  122. {hafnia-0.4.1 → hafnia-0.4.3}/.devcontainer/devcontainer.json +0 -0
  123. {hafnia-0.4.1 → hafnia-0.4.3}/.devcontainer/hooks/post_create +0 -0
  124. {hafnia-0.4.1 → hafnia-0.4.3}/.github/dependabot.yaml +0 -0
  125. {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/check_release.yaml +0 -0
  126. {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/lint.yaml +0 -0
  127. {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/publish_docker.yaml +0 -0
  128. {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/publish_pypi.yaml +0 -0
  129. {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/tests.yaml +0 -0
  130. {hafnia-0.4.1 → hafnia-0.4.3}/.gitignore +0 -0
  131. {hafnia-0.4.1 → hafnia-0.4.3}/.pre-commit-config.yaml +0 -0
  132. {hafnia-0.4.1 → hafnia-0.4.3}/.python-version +0 -0
  133. {hafnia-0.4.1 → hafnia-0.4.3}/.trivyignore +0 -0
  134. {hafnia-0.4.1 → hafnia-0.4.3}/.vscode/extensions.json +0 -0
  135. {hafnia-0.4.1 → hafnia-0.4.3}/.vscode/settings.json +0 -0
  136. {hafnia-0.4.1 → hafnia-0.4.3}/LICENSE +0 -0
  137. {hafnia-0.4.1 → hafnia-0.4.3}/docs/cli.md +0 -0
  138. {hafnia-0.4.1 → hafnia-0.4.3}/docs/release.md +0 -0
  139. {hafnia-0.4.1 → hafnia-0.4.3}/examples/example_dataset_recipe.py +0 -0
  140. {hafnia-0.4.1 → hafnia-0.4.3}/examples/example_torchvision_dataloader.py +0 -0
  141. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/__init__.py +0 -0
  142. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/data/__init__.py +0 -0
  143. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/data/factory.py +0 -0
  144. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/dataset_helpers.py +0 -0
  145. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/dataset_recipe/recipe_transforms.py +0 -0
  146. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/dataset_recipe/recipe_types.py +0 -0
  147. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/__init__.py +0 -0
  148. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/point.py +0 -0
  149. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/utils.py +0 -0
  150. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/experiment/__init__.py +0 -0
  151. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/http.py +0 -0
  152. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/log.py +0 -0
  153. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/platform/__init__.py +0 -0
  154. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/platform/builder.py +0 -0
  155. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/platform/dataset_recipe.py +0 -0
  156. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/platform/download.py +0 -0
  157. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/platform/experiment.py +0 -0
  158. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/platform/trainer_package.py +0 -0
  159. {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/visualizations/colors.py +0 -0
  160. {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/__init__.py +0 -0
  161. {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/consts.py +0 -0
  162. {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/keychain.py +0 -0
  163. {hafnia-0.4.1 → hafnia-0.4.3}/tests/__init__.py +0 -0
  164. {hafnia-0.4.1/tests/data/dataset_formats/format_yolo/data → hafnia-0.4.3/tests/data/dataset_formats/format_coco_roboflow/train}/000000000632.jpg +0 -0
  165. {hafnia-0.4.1/tests/data/dataset_formats/format_yolo/data → hafnia-0.4.3/tests/data/dataset_formats/format_coco_roboflow/valid}/000000000139.jpg +0 -0
  166. {hafnia-0.4.1/tests/data/dataset_formats/format_yolo/data → hafnia-0.4.3/tests/data/dataset_formats/format_coco_roboflow/valid}/000000000285.jpg +0 -0
  167. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/dataset_formats/format_yolo/obj.names +0 -0
  168. {hafnia-0.4.1/tests/data/dataset_formats/format_yolo → hafnia-0.4.3/tests/data/dataset_formats/format_yolo/train}/data/000000000139.txt +0 -0
  169. {hafnia-0.4.1/tests/data/dataset_formats/format_yolo → hafnia-0.4.3/tests/data/dataset_formats/format_yolo/train}/data/000000000285.txt +0 -0
  170. {hafnia-0.4.1/tests/data/dataset_formats/format_yolo → hafnia-0.4.3/tests/data/dataset_formats/format_yolo/validation}/data/000000000632.txt +0 -0
  171. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_dataset_transformations/test_video_storage_format_read_image.png +0 -0
  172. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_format_yolo/test_import_yolo_format_visualized.png +0 -0
  173. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png +0 -0
  174. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_check_dataset[caltech-256].png +0 -0
  175. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_check_dataset[cifar100].png +0 -0
  176. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_check_dataset[cifar10].png +0 -0
  177. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_check_dataset[midwest-vehicle-detection].png +0 -0
  178. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_check_dataset[mnist].png +0 -0
  179. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-101].png +0 -0
  180. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-256].png +0 -0
  181. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar100].png +0 -0
  182. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar10].png +0 -0
  183. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[midwest-vehicle-detection].png +0 -0
  184. {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[mnist].png +0 -0
  185. {hafnia-0.4.1 → hafnia-0.4.3}/tests/integration/test_check_example_scripts.py +0 -0
  186. {hafnia-0.4.1 → hafnia-0.4.3}/tests/integration/test_dataset_merges.py +0 -0
  187. {hafnia-0.4.1 → hafnia-0.4.3}/tests/integration/test_torchvision_datasets.py +0 -0
  188. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/dataset_recipe/test_dataset_recipe_helpers.py +0 -0
  189. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/dataset_recipe/test_dataset_recipes.py +0 -0
  190. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/dataset_recipe/test_recipe_transformations.py +0 -0
  191. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/operations/test_dataset_stats.py +0 -0
  192. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/operations/test_table_transformations.py +0 -0
  193. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/test_colors.py +0 -0
  194. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/test_dataset_helpers.py +0 -0
  195. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/test_builder.py +0 -0
  196. {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/test_utils.py +0 -0
@@ -1,5 +1,8 @@
1
1
  ARG PYTHON_VERSION
2
2
  FROM python:${PYTHON_VERSION}-slim
3
+
4
+ RUN apt-get update && apt-get install -y pigz && rm -rf /var/lib/apt/lists/*
5
+
3
6
  WORKDIR /opt/ml/processing
4
7
 
5
8
  COPY dist/*.whl .
@@ -8,4 +11,4 @@ RUN pip install --no-cache-dir *.whl && \
8
11
 
9
12
  RUN mkdir -p /opt/ml/processing/input \
10
13
  /opt/ml/processing/output \
11
- /opt/ml/processing/tmp
14
+ /opt/ml/processing/tmp
@@ -6,6 +6,10 @@ on:
6
6
  python-version-file:
7
7
  required: true
8
8
  type: string
9
+ pyproject-toml-file:
10
+ required: false
11
+ type: string
12
+ default: "pyproject.toml"
9
13
  outputs:
10
14
  package-version:
11
15
  description: "The extracted package version"
@@ -25,7 +29,7 @@ jobs:
25
29
  - name: Extract package version
26
30
  id: extract-version
27
31
  run: |
28
- VERSION=$(grep -m 1 'version = ' ${{ inputs.python-version-file }} | sed -e 's/version = "\(.*\)"/\1/')
32
+ VERSION=$(grep -m 1 'version = ' ${{ inputs.pyproject-toml-file }} | sed -e 's/version = "\(.*\)"/\1/')
29
33
  echo "package_version=$VERSION" >> $GITHUB_OUTPUT
30
34
 
31
35
  - name: Install uv
@@ -5,14 +5,14 @@ on:
5
5
  branches: [main]
6
6
  pull_request:
7
7
  branches: [main]
8
- paths: ["src/**", "tests/**", "pyproject.toml"]
8
+ paths: ["src/**", "tests/**", "pyproject.toml", ".python-version", "uv.lock", ".github/workflows/**"]
9
9
 
10
10
  jobs:
11
11
  lint:
12
12
  name: Lint Code
13
13
  uses: ./.github/workflows/lint.yaml
14
14
  with:
15
- python-version-file: "pyproject.toml"
15
+ python-version-file: ".python-version"
16
16
 
17
17
  security-scan:
18
18
  name: Security Scan
@@ -36,14 +36,15 @@ jobs:
36
36
  secrets: inherit
37
37
  uses: ./.github/workflows/tests.yaml
38
38
  with:
39
- python-version-file: "pyproject.toml"
39
+ python-version-file: ".python-version"
40
40
 
41
41
  build:
42
42
  name: Build Package
43
43
  needs: [test, security-scan]
44
44
  uses: ./.github/workflows/build.yaml
45
45
  with:
46
- python-version-file: "pyproject.toml"
46
+ python-version-file: ".python-version"
47
+ pyproject-toml-file: "pyproject.toml"
47
48
 
48
49
  publish-docker-staging:
49
50
  name: Publish Docker Image to Staging
@@ -52,7 +53,7 @@ jobs:
52
53
  if: github.event_name == 'push' && github.ref == 'refs/heads/main'
53
54
  uses: ./.github/workflows/publish_docker.yaml
54
55
  with:
55
- python-version-file: "pyproject.toml"
56
+ python-version-file: ".python-version"
56
57
  package-version: ${{ needs.build.outputs.package-version }}
57
58
  environment: "staging"
58
59
 
@@ -80,7 +81,7 @@ jobs:
80
81
  if: github.event_name == 'push' && github.ref == 'refs/heads/main'
81
82
  uses: ./.github/workflows/publish_docker.yaml
82
83
  with:
83
- python-version-file: "pyproject.toml"
84
+ python-version-file: ".python-version"
84
85
  package-version: ${{ needs.build.outputs.package-version }}
85
86
  environment: "production"
86
87
 
@@ -15,7 +15,7 @@
15
15
  "name": "debug (list profiles)",
16
16
  "type": "debugpy",
17
17
  "request": "launch",
18
- "program": "${workspaceFolder}/src/cli/__main__.py",
18
+ "program": "${workspaceFolder}/src/hafnia_cli/__main__.py",
19
19
  "args": [
20
20
  "profile",
21
21
  "ls"
@@ -25,7 +25,7 @@
25
25
  "name": "cmd: hafnia runc launch-local",
26
26
  "type": "debugpy",
27
27
  "request": "launch",
28
- "program": "${workspaceFolder}/src/cli/__main__.py",
28
+ "program": "${workspaceFolder}/src/hafnia_cli/__main__.py",
29
29
  "args": [
30
30
  "runc",
31
31
  "launch-local",
@@ -38,7 +38,7 @@
38
38
  "name": "cmd: hafnia runc build-local",
39
39
  "type": "debugpy",
40
40
  "request": "launch",
41
- "program": "${workspaceFolder}/src/cli/__main__.py",
41
+ "program": "${workspaceFolder}/src/hafnia_cli/__main__.py",
42
42
  "args": [
43
43
  "runc",
44
44
  "build-local",
@@ -51,7 +51,7 @@
51
51
  "name": "cmd: 'hafnia dataset [X]'",
52
52
  "type": "debugpy",
53
53
  "request": "launch",
54
- "program": "${workspaceFolder}/src/cli/__main__.py",
54
+ "program": "${workspaceFolder}/src/hafnia_cli/__main__.py",
55
55
  "args": [
56
56
  "dataset",
57
57
  //"ls",
@@ -64,7 +64,7 @@
64
64
  "name": "cmd: 'hafnia experiment [X]'",
65
65
  "type": "debugpy",
66
66
  "request": "launch",
67
- "program": "${workspaceFolder}/src/cli/__main__.py",
67
+ "program": "${workspaceFolder}/src/hafnia_cli/__main__.py",
68
68
  "args": [
69
69
  "experiment",
70
70
  "create",
@@ -82,7 +82,7 @@
82
82
  "name": "cmd: 'hafnia train-recipe [X]'",
83
83
  "type": "debugpy",
84
84
  "request": "launch",
85
- "program": "${workspaceFolder}/src/cli/__main__.py",
85
+ "program": "${workspaceFolder}/src/hafnia_cli/__main__.py",
86
86
  "args": [
87
87
  "trainer",
88
88
  "ls"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hafnia
3
- Version: 0.4.1
3
+ Version: 0.4.3
4
4
  Summary: Python SDK for communication with Hafnia platform.
5
5
  Author-email: Milestone Systems <hafniaplatform@milestone.dk>
6
6
  License-File: LICENSE
@@ -343,7 +343,7 @@ batch_size = 128
343
343
  learning_rate = 0.001
344
344
 
345
345
  # Initialize Hafnia logger
346
- logger = HafniaLogger()
346
+ logger = HafniaLogger(project_name="my_classification_project")
347
347
 
348
348
  # Log experiment parameters
349
349
  logger.log_configuration({"batch_size": 128, "learning_rate": 0.001})
@@ -314,7 +314,7 @@ batch_size = 128
314
314
  learning_rate = 0.001
315
315
 
316
316
  # Initialize Hafnia logger
317
- logger = HafniaLogger()
317
+ logger = HafniaLogger(project_name="my_classification_project")
318
318
 
319
319
  # Log experiment parameters
320
320
  logger.log_configuration({"batch_size": 128, "learning_rate": 0.001})
@@ -6,7 +6,8 @@ from PIL import Image
6
6
  from rich import print as rprint
7
7
 
8
8
  from hafnia.dataset.dataset_names import SplitName
9
- from hafnia.dataset.hafnia_dataset import DatasetInfo, HafniaDataset, Sample, TaskInfo
9
+ from hafnia.dataset.hafnia_dataset import HafniaDataset
10
+ from hafnia.dataset.hafnia_dataset_types import DatasetInfo, Sample, TaskInfo
10
11
  from hafnia.dataset.primitives.bbox import Bbox
11
12
  from hafnia.dataset.primitives.bitmask import Bitmask
12
13
  from hafnia.dataset.primitives.classification import Classification
@@ -87,14 +88,18 @@ dataset.write(path_dataset)
87
88
  dataset_again = HafniaDataset.from_path(path_dataset)
88
89
 
89
90
  ## Dataset importers and exporters ##
90
- dataset_coco = HafniaDataset.from_name("coco-2017").select_samples(n_samples=5, seed=42)
91
+ dataset_od = HafniaDataset.from_name("coco-2017").select_samples(n_samples=5, seed=42)
92
+
93
+ # Export/import dataset to YOLO format
91
94
  path_yolo_format = Path(".data/tmp/yolo_dataset")
95
+ dataset_od.to_yolo_format(path_output=path_yolo_format) # Export to YOLO format
96
+ dataset_od_imported = HafniaDataset.from_yolo_format(path_yolo_format) # Import dataset from YOLO format
92
97
 
93
- # Export dataset to YOLO format
94
- dataset_coco.to_yolo_format(path_export_yolo_dataset=path_yolo_format)
95
98
 
96
- # Import dataset from YOLO format
97
- dataset_coco_imported = HafniaDataset.from_yolo_format(path_yolo_format)
99
+ # Export/import dataset to YOLO format
100
+ path_coco_format = Path(".data/tmp/coco_dataset")
101
+ dataset_od.to_coco_format(path_output=path_coco_format) # Export to COCO format
102
+ dataset_od_imported = HafniaDataset.from_coco_format(path_coco_format) # Import dataset from COCO format
98
103
 
99
104
  ## Custom dataset operations and statistics ##
100
105
  # Want custom dataset transformations or statistics? Use the polars table (dataset.samples) directly
@@ -4,7 +4,7 @@ batch_size = 128
4
4
  learning_rate = 0.001
5
5
 
6
6
  # Initialize Hafnia logger
7
- logger = HafniaLogger()
7
+ logger = HafniaLogger(project_name="example_classification_project")
8
8
 
9
9
  # Log experiment parameters
10
10
  logger.log_configuration({"batch_size": 128, "learning_rate": 0.001})
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hafnia"
3
- version = "0.4.1"
3
+ version = "0.4.3"
4
4
  description = "Python SDK for communication with Hafnia platform."
5
5
  readme = "README.md"
6
6
  authors = [
@@ -44,14 +44,14 @@ dev = [
44
44
  test = ["pytest>=8.3.4", "pre-commit>=4.2.0", "ruff>=0.9.1"]
45
45
 
46
46
  [project.scripts]
47
- hafnia = 'cli.__main__:main'
47
+ hafnia = 'hafnia_cli.__main__:main'
48
48
 
49
49
  [build-system]
50
50
  requires = ["hatchling"]
51
51
  build-backend = "hatchling.build"
52
52
 
53
53
  [tool.hatch.build.targets.wheel]
54
- packages = ["src/cli", "src/hafnia"]
54
+ packages = ["src/hafnia_cli", "src/hafnia"]
55
55
 
56
56
  [tool.uv]
57
57
  default-groups = ["test"]
@@ -11,8 +11,6 @@ import polars as pl
11
11
  from PIL import Image
12
12
  from pydantic import BaseModel, ConfigDict, field_validator
13
13
 
14
- from cli.config import Config
15
- from hafnia.dataset import primitives
16
14
  from hafnia.dataset.dataset_names import (
17
15
  DatasetVariant,
18
16
  DeploymentStage,
@@ -20,7 +18,8 @@ from hafnia.dataset.dataset_names import (
20
18
  SampleField,
21
19
  SplitName,
22
20
  )
23
- from hafnia.dataset.hafnia_dataset import Attribution, HafniaDataset, Sample, TaskInfo
21
+ from hafnia.dataset.hafnia_dataset import HafniaDataset
22
+ from hafnia.dataset.hafnia_dataset_types import Attribution, Sample, TaskInfo
24
23
  from hafnia.dataset.operations import table_transformations
25
24
  from hafnia.dataset.primitives import (
26
25
  Bbox,
@@ -33,6 +32,7 @@ from hafnia.dataset.primitives.primitive import Primitive
33
32
  from hafnia.http import post
34
33
  from hafnia.log import user_logger
35
34
  from hafnia.platform.datasets import get_dataset_id
35
+ from hafnia_cli.config import Config
36
36
 
37
37
 
38
38
  def generate_bucket_name(dataset_name: str, deployment_stage: DeploymentStage) -> str:
@@ -41,7 +41,7 @@ def generate_bucket_name(dataset_name: str, deployment_stage: DeploymentStage) -
41
41
  return f"mdi-{deployment_stage.value}-{dataset_name}"
42
42
 
43
43
 
44
- class DbDataset(BaseModel, validate_assignment=True): # type: ignore[call-arg]
44
+ class DatasetDetails(BaseModel, validate_assignment=True): # type: ignore[call-arg]
45
45
  model_config = ConfigDict(use_enum_values=True) # To parse Enum values as strings
46
46
  name: str
47
47
  data_captured_start: Optional[datetime] = None
@@ -150,14 +150,6 @@ class DbAnnotationType(BaseModel, validate_assignment=True): # type: ignore[cal
150
150
  name: str
151
151
 
152
152
 
153
- class AnnotationType(Enum):
154
- ImageClassification = "Image Classification"
155
- ObjectDetection = "Object Detection"
156
- SegmentationMask = "Segmentation Mask"
157
- ImageCaptioning = "Image Captioning"
158
- InstanceSegmentation = "Instance Segmentation"
159
-
160
-
161
153
  class DbResolution(BaseModel, validate_assignment=True): # type: ignore[call-arg]
162
154
  height: int
163
155
  width: int
@@ -289,7 +281,7 @@ def get_folder_size(path: Path) -> int:
289
281
  return sum([path.stat().st_size for path in path.rglob("*")])
290
282
 
291
283
 
292
- def upload_to_hafnia_dataset_detail_page(dataset_update: DbDataset, upload_gallery_images: bool) -> dict:
284
+ def upload_to_hafnia_dataset_detail_page(dataset_update: DatasetDetails, upload_gallery_images: bool) -> dict:
293
285
  if not upload_gallery_images:
294
286
  dataset_update.imgs = None
295
287
 
@@ -322,18 +314,6 @@ def get_resolutions(dataset: HafniaDataset, max_resolutions_selected: int = 8) -
322
314
  return resolutions
323
315
 
324
316
 
325
- def has_primitive(dataset: Union[HafniaDataset, pl.DataFrame], PrimitiveType: Type[Primitive]) -> bool:
326
- col_name = PrimitiveType.column_name()
327
- table = dataset.samples if isinstance(dataset, HafniaDataset) else dataset
328
- if col_name not in table.columns:
329
- return False
330
-
331
- if table[col_name].dtype == pl.Null:
332
- return False
333
-
334
- return True
335
-
336
-
337
317
  def calculate_distribution_values(
338
318
  dataset_split: pl.DataFrame, distribution_tasks: Optional[List[TaskInfo]]
339
319
  ) -> List[DbDistributionValue]:
@@ -378,15 +358,15 @@ def s3_based_fields(bucket_name: str, variant_type: DatasetVariant, session: bot
378
358
  return last_modified, size
379
359
 
380
360
 
381
- def dataset_info_from_dataset(
361
+ def dataset_details_from_hafnia_dataset(
382
362
  dataset: HafniaDataset,
383
363
  deployment_stage: DeploymentStage,
384
364
  path_sample: Optional[Path],
385
365
  path_hidden: Optional[Path],
386
366
  path_gallery_images: Optional[Path] = None,
387
367
  gallery_image_names: Optional[List[str]] = None,
388
- distribution_task_names: Optional[List[TaskInfo]] = None,
389
- ) -> DbDataset:
368
+ distribution_task_names: Optional[List[str]] = None,
369
+ ) -> DatasetDetails:
390
370
  dataset_variants = []
391
371
  dataset_reports = []
392
372
  dataset_meta_info = dataset.info.meta or {}
@@ -448,177 +428,20 @@ def dataset_info_from_dataset(
448
428
  )
449
429
 
450
430
  object_reports: List[DbAnnotatedObjectReport] = []
451
- primitive_columns = [primitive.column_name() for primitive in primitives.PRIMITIVE_TYPES]
452
- if has_primitive(dataset_split, PrimitiveType=Bbox):
453
- df_per_instance = table_transformations.create_primitive_table(
454
- dataset_split, PrimitiveType=Bbox, keep_sample_data=True
455
- )
456
- if df_per_instance is None:
457
- raise ValueError(f"Expected {Bbox.__name__} primitive column to be present in the dataset split.")
458
- # Calculate area of bounding boxes
459
- df_per_instance = df_per_instance.with_columns(
460
- (pl.col("height") * pl.col("width")).alias("area"),
461
- ).with_columns(
462
- (pl.col("height") * pl.col("image.height")).alias("height_px"),
463
- (pl.col("width") * pl.col("image.width")).alias("width_px"),
464
- (pl.col("area") * (pl.col("image.height") * pl.col("image.width"))).alias("area_px"),
465
- )
466
-
467
- annotation_type = DbAnnotationType(name=AnnotationType.ObjectDetection.value)
468
- for (class_name, task_name), class_group in df_per_instance.group_by(
469
- PrimitiveField.CLASS_NAME, PrimitiveField.TASK_NAME
470
- ):
471
- if class_name is None:
472
- continue
473
- object_reports.append(
474
- DbAnnotatedObjectReport(
475
- obj=DbAnnotatedObject(
476
- name=class_name,
477
- entity_type=EntityTypeChoices.OBJECT.value,
478
- annotation_type=annotation_type,
479
- task_name=task_name,
480
- ),
481
- unique_obj_ids=class_group[PrimitiveField.OBJECT_ID].n_unique(),
482
- obj_instances=len(class_group),
483
- annotation_type=[annotation_type],
484
- images_with_obj=class_group[SampleField.SAMPLE_INDEX].n_unique(),
485
- area_avg_ratio=class_group["area"].mean(),
486
- area_min_ratio=class_group["area"].min(),
487
- area_max_ratio=class_group["area"].max(),
488
- height_avg_ratio=class_group["height"].mean(),
489
- height_min_ratio=class_group["height"].min(),
490
- height_max_ratio=class_group["height"].max(),
491
- width_avg_ratio=class_group["width"].mean(),
492
- width_min_ratio=class_group["width"].min(),
493
- width_max_ratio=class_group["width"].max(),
494
- area_avg_px=class_group["area_px"].mean(),
495
- area_min_px=int(class_group["area_px"].min()),
496
- area_max_px=int(class_group["area_px"].max()),
497
- height_avg_px=class_group["height_px"].mean(),
498
- height_min_px=int(class_group["height_px"].min()),
499
- height_max_px=int(class_group["height_px"].max()),
500
- width_avg_px=class_group["width_px"].mean(),
501
- width_min_px=int(class_group["width_px"].min()),
502
- width_max_px=int(class_group["width_px"].max()),
503
- average_count_per_image=len(class_group) / class_group[SampleField.SAMPLE_INDEX].n_unique(),
504
- )
505
- )
506
-
507
- if has_primitive(dataset_split, PrimitiveType=Classification):
508
- annotation_type = DbAnnotationType(name=AnnotationType.ImageClassification.value)
509
- col_name = Classification.column_name()
510
- classification_tasks = [task.name for task in dataset.info.tasks if task.primitive == Classification]
511
- has_classification_data = dataset_split[col_name].dtype != pl.List(pl.Null)
512
- if has_classification_data:
513
- classification_df = dataset_split.select(col_name).explode(col_name).unnest(col_name)
514
-
515
- # Include only classification tasks that are defined in the dataset info
516
- classification_df = classification_df.filter(
517
- pl.col(PrimitiveField.TASK_NAME).is_in(classification_tasks)
518
- )
519
-
520
- for (
521
- task_name,
522
- class_name,
523
- ), class_group in classification_df.group_by(PrimitiveField.TASK_NAME, PrimitiveField.CLASS_NAME):
524
- if class_name is None:
525
- continue
526
- if task_name == Classification.default_task_name():
527
- display_name = class_name # Prefix class name with task name
528
- else:
529
- display_name = f"{task_name}.{class_name}"
530
- object_reports.append(
531
- DbAnnotatedObjectReport(
532
- obj=DbAnnotatedObject(
533
- name=display_name,
534
- entity_type=EntityTypeChoices.EVENT.value,
535
- annotation_type=annotation_type,
536
- task_name=task_name,
537
- ),
538
- unique_obj_ids=len(
539
- class_group
540
- ), # Unique object IDs are not applicable for classification
541
- obj_instances=len(class_group),
542
- annotation_type=[annotation_type],
543
- )
544
- )
545
-
546
- if has_primitive(dataset_split, PrimitiveType=Segmentation):
547
- raise NotImplementedError("Not Implemented yet")
548
-
549
- if has_primitive(dataset_split, PrimitiveType=Bitmask):
550
- col_name = Bitmask.column_name()
551
- drop_columns = [col for col in primitive_columns if col != col_name]
552
- drop_columns.append(PrimitiveField.META)
553
-
554
- df_per_instance = table_transformations.create_primitive_table(
555
- dataset_split, PrimitiveType=Bitmask, keep_sample_data=True
556
- )
557
- if df_per_instance is None:
558
- raise ValueError(
559
- f"Expected {Bitmask.__name__} primitive column to be present in the dataset split."
560
- )
561
- df_per_instance = df_per_instance.rename({"height": "height_px", "width": "width_px"})
562
- df_per_instance = df_per_instance.with_columns(
563
- (pl.col("image.height") * pl.col("image.width") * pl.col("area")).alias("area_px"),
564
- (pl.col("height_px") / pl.col("image.height")).alias("height"),
565
- (pl.col("width_px") / pl.col("image.width")).alias("width"),
566
- )
567
-
568
- annotation_type = DbAnnotationType(name=AnnotationType.InstanceSegmentation)
569
- for (class_name, task_name), class_group in df_per_instance.group_by(
570
- PrimitiveField.CLASS_NAME, PrimitiveField.TASK_NAME
571
- ):
572
- if class_name is None:
573
- continue
574
- object_reports.append(
575
- DbAnnotatedObjectReport(
576
- obj=DbAnnotatedObject(
577
- name=class_name,
578
- entity_type=EntityTypeChoices.OBJECT.value,
579
- annotation_type=annotation_type,
580
- task_name=task_name,
581
- ),
582
- unique_obj_ids=class_group[PrimitiveField.OBJECT_ID].n_unique(),
583
- obj_instances=len(class_group),
584
- annotation_type=[annotation_type],
585
- average_count_per_image=len(class_group) / class_group[SampleField.SAMPLE_INDEX].n_unique(),
586
- images_with_obj=class_group[SampleField.SAMPLE_INDEX].n_unique(),
587
- area_avg_ratio=class_group["area"].mean(),
588
- area_min_ratio=class_group["area"].min(),
589
- area_max_ratio=class_group["area"].max(),
590
- height_avg_ratio=class_group["height"].mean(),
591
- height_min_ratio=class_group["height"].min(),
592
- height_max_ratio=class_group["height"].max(),
593
- width_avg_ratio=class_group["width"].mean(),
594
- width_min_ratio=class_group["width"].min(),
595
- width_max_ratio=class_group["width"].max(),
596
- area_avg_px=class_group["area_px"].mean(),
597
- area_min_px=int(class_group["area_px"].min()),
598
- area_max_px=int(class_group["area_px"].max()),
599
- height_avg_px=class_group["height_px"].mean(),
600
- height_min_px=int(class_group["height_px"].min()),
601
- height_max_px=int(class_group["height_px"].max()),
602
- width_avg_px=class_group["width_px"].mean(),
603
- width_min_px=int(class_group["width_px"].min()),
604
- width_max_px=int(class_group["width_px"].max()),
605
- )
606
- )
607
-
608
- if has_primitive(dataset_split, PrimitiveType=Polygon):
609
- raise NotImplementedError("Not Implemented yet")
431
+ for PrimitiveType in [Classification, Bbox, Bitmask, Polygon, Segmentation]:
432
+ object_reports.extend(create_reports_from_primitive(dataset_split, PrimitiveType=PrimitiveType)) # type: ignore[type-abstract]
610
433
 
611
434
  # Sort object reports by name to more easily compare between versions
612
435
  object_reports = sorted(object_reports, key=lambda x: x.obj.name) # Sort object reports by name
613
436
  report.annotated_object_reports = object_reports
614
437
 
615
- if report.distribution_values is None:
616
- report.distribution_values = []
438
+ if report.distribution_values is None:
439
+ report.distribution_values = []
617
440
 
618
- dataset_reports.append(report)
441
+ dataset_reports.append(report)
619
442
  dataset_name = dataset.info.dataset_name
620
443
  bucket_sample = generate_bucket_name(dataset_name, deployment_stage=deployment_stage)
621
- dataset_info = DbDataset(
444
+ dataset_info = DatasetDetails(
622
445
  name=dataset_name,
623
446
  version=dataset.info.version,
624
447
  s3_bucket_name=bucket_sample,
@@ -639,6 +462,101 @@ def dataset_info_from_dataset(
639
462
  return dataset_info
640
463
 
641
464
 
465
+ def create_reports_from_primitive(
466
+ dataset_split: pl.DataFrame, PrimitiveType: Type[Primitive]
467
+ ) -> List[DbAnnotatedObjectReport]:
468
+ if not table_transformations.has_primitive(dataset_split, PrimitiveType=PrimitiveType):
469
+ return []
470
+
471
+ if PrimitiveType == Segmentation:
472
+ raise NotImplementedError("Not Implemented yet")
473
+
474
+ df_per_instance = table_transformations.create_primitive_table(
475
+ dataset_split, PrimitiveType=PrimitiveType, keep_sample_data=True
476
+ )
477
+ if df_per_instance is None:
478
+ raise ValueError(f"Expected {PrimitiveType.__name__} primitive column to be present in the dataset split.")
479
+
480
+ entity_type = EntityTypeChoices.OBJECT.value
481
+ if PrimitiveType == Classification:
482
+ entity_type = EntityTypeChoices.EVENT.value
483
+
484
+ if PrimitiveType == Bbox:
485
+ df_per_instance = df_per_instance.with_columns(area=pl.col("height") * pl.col("width"))
486
+
487
+ if PrimitiveType == Bitmask:
488
+ # width and height are in pixel format for Bitmask convert to ratio
489
+ df_per_instance = df_per_instance.with_columns(
490
+ width=pl.col("width") / pl.col("image.width"),
491
+ height=pl.col("height") / pl.col("image.height"),
492
+ )
493
+
494
+ has_height_field = "height" in df_per_instance.columns and df_per_instance["height"].dtype != pl.Null
495
+ if has_height_field:
496
+ df_per_instance = df_per_instance.with_columns(
497
+ height_px=pl.col("height") * pl.col("image.height"),
498
+ )
499
+
500
+ has_width_field = "width" in df_per_instance.columns and df_per_instance["width"].dtype != pl.Null
501
+ if has_width_field:
502
+ df_per_instance = df_per_instance.with_columns(
503
+ width_px=pl.col("width") * pl.col("image.width"),
504
+ )
505
+
506
+ has_area_field = "area" in df_per_instance.columns and df_per_instance["area"].dtype != pl.Null
507
+ if has_area_field:
508
+ df_per_instance = df_per_instance.with_columns(
509
+ area_px=pl.col("image.height") * pl.col("image.width") * pl.col("area")
510
+ )
511
+ object_reports: List[DbAnnotatedObjectReport] = []
512
+ annotation_type = DbAnnotationType(name=PrimitiveType.__name__)
513
+ for (class_name, task_name), class_group in df_per_instance.group_by(
514
+ PrimitiveField.CLASS_NAME, PrimitiveField.TASK_NAME
515
+ ):
516
+ if class_name is None:
517
+ continue
518
+
519
+ object_report = DbAnnotatedObjectReport(
520
+ obj=DbAnnotatedObject(
521
+ name=class_name,
522
+ entity_type=entity_type,
523
+ annotation_type=annotation_type,
524
+ task_name=task_name,
525
+ ),
526
+ unique_obj_ids=class_group[PrimitiveField.OBJECT_ID].n_unique(),
527
+ obj_instances=len(class_group),
528
+ annotation_type=[annotation_type],
529
+ average_count_per_image=len(class_group) / class_group[SampleField.SAMPLE_INDEX].n_unique(),
530
+ images_with_obj=class_group[SampleField.SAMPLE_INDEX].n_unique(),
531
+ )
532
+ if has_height_field:
533
+ object_report.height_avg_ratio = class_group["height"].mean()
534
+ object_report.height_min_ratio = class_group["height"].min()
535
+ object_report.height_max_ratio = class_group["height"].max()
536
+ object_report.height_avg_px = class_group["height_px"].mean()
537
+ object_report.height_min_px = int(class_group["height_px"].min())
538
+ object_report.height_max_px = int(class_group["height_px"].max())
539
+
540
+ if has_width_field:
541
+ object_report.width_avg_ratio = class_group["width"].mean()
542
+ object_report.width_min_ratio = class_group["width"].min()
543
+ object_report.width_max_ratio = class_group["width"].max()
544
+ object_report.width_avg_px = class_group["width_px"].mean()
545
+ object_report.width_min_px = int(class_group["width_px"].min())
546
+ object_report.width_max_px = int(class_group["width_px"].max())
547
+
548
+ if has_area_field:
549
+ object_report.area_avg_ratio = class_group["area"].mean()
550
+ object_report.area_min_ratio = class_group["area"].min()
551
+ object_report.area_max_ratio = class_group["area"].max()
552
+ object_report.area_avg_px = class_group["area_px"].mean()
553
+ object_report.area_min_px = int(class_group["area_px"].min())
554
+ object_report.area_max_px = int(class_group["area_px"].max())
555
+
556
+ object_reports.append(object_report)
557
+ return object_reports
558
+
559
+
642
560
  def create_gallery_images(
643
561
  dataset: HafniaDataset,
644
562
  path_gallery_images: Optional[Path],
@@ -657,7 +575,12 @@ def create_gallery_images(
657
575
 
658
576
  missing_gallery_samples = set(gallery_image_names) - set(gallery_samples[COL_IMAGE_NAME])
659
577
  if len(missing_gallery_samples):
660
- raise ValueError(f"Gallery images not found in dataset: {missing_gallery_samples}")
578
+ potential_samples = samples[COL_IMAGE_NAME].sort().to_list()
579
+ formatted_samples = ", ".join([f'"{s}"' for s in potential_samples[:9]])
580
+ raise ValueError(
581
+ f"Gallery images not found in dataset: {missing_gallery_samples}. "
582
+ f"Consider adding this to dataset definition: \ngallery_image_names=[{formatted_samples}]"
583
+ )
661
584
  gallery_images = []
662
585
  for gallery_sample in gallery_samples.iter_rows(named=True):
663
586
  sample = Sample(**gallery_sample)
@@ -93,6 +93,32 @@ class SplitName:
93
93
  def all_split_names() -> List[str]:
94
94
  return [*SplitName.valid_splits(), SplitName.UNDEFINED]
95
95
 
96
+ @staticmethod
97
+ def map_split_name(potential_split_name: str, strict: bool = True) -> str:
98
+ normalized = potential_split_name.strip().lower()
99
+
100
+ if normalized in SPLIT_NAME_MAPPINGS:
101
+ return SPLIT_NAME_MAPPINGS[normalized]
102
+
103
+ if strict:
104
+ raise ValueError(f"Unrecognized split name: {potential_split_name}")
105
+ else:
106
+ return SplitName.UNDEFINED
107
+
108
+
109
+ SPLIT_NAME_MAPPINGS = {
110
+ # Train variations
111
+ "train": SplitName.TRAIN,
112
+ "training": SplitName.TRAIN,
113
+ # Validation variations
114
+ "validation": SplitName.VAL,
115
+ "val": SplitName.VAL,
116
+ "valid": SplitName.VAL,
117
+ # Test variations
118
+ "test": SplitName.TEST,
119
+ "testing": SplitName.TEST,
120
+ }
121
+
96
122
 
97
123
  class DatasetVariant(Enum):
98
124
  DUMP = "dump"