hafnia 0.4.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. {hafnia-0.4.0 → hafnia-0.4.1}/.github/workflows/build.yaml +2 -2
  2. {hafnia-0.4.0 → hafnia-0.4.1}/.github/workflows/check_release.yaml +1 -1
  3. {hafnia-0.4.0 → hafnia-0.4.1}/.github/workflows/publish_docker.yaml +1 -1
  4. {hafnia-0.4.0 → hafnia-0.4.1}/.github/workflows/publish_pypi.yaml +1 -1
  5. {hafnia-0.4.0 → hafnia-0.4.1}/.github/workflows/tests.yaml +1 -1
  6. {hafnia-0.4.0 → hafnia-0.4.1}/PKG-INFO +4 -4
  7. {hafnia-0.4.0 → hafnia-0.4.1}/README.md +3 -3
  8. {hafnia-0.4.0 → hafnia-0.4.1}/examples/example_dataset_recipe.py +8 -6
  9. {hafnia-0.4.0 → hafnia-0.4.1}/examples/example_hafnia_dataset.py +15 -5
  10. {hafnia-0.4.0 → hafnia-0.4.1}/pyproject.toml +1 -1
  11. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/__init__.py +1 -1
  12. hafnia-0.4.1/src/hafnia/dataset/dataset_names.py +190 -0
  13. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/dataset_upload_helper.py +30 -25
  14. hafnia-0.4.0/src/hafnia/dataset/format_conversions/image_classification_from_directory.py → hafnia-0.4.1/src/hafnia/dataset/format_conversions/format_image_classification_folder.py +14 -10
  15. hafnia-0.4.1/src/hafnia/dataset/format_conversions/format_yolo.py +164 -0
  16. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/format_conversions/torchvision_datasets.py +10 -4
  17. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/hafnia_dataset.py +246 -72
  18. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/operations/dataset_stats.py +82 -70
  19. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/operations/dataset_transformations.py +102 -37
  20. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/operations/table_transformations.py +132 -15
  21. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/primitives/bbox.py +3 -5
  22. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/primitives/bitmask.py +2 -7
  23. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/primitives/classification.py +3 -3
  24. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/primitives/polygon.py +2 -4
  25. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/primitives/primitive.py +1 -1
  26. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/primitives/segmentation.py +2 -2
  27. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/platform/datasets.py +3 -7
  28. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/platform/download.py +1 -72
  29. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/torch_helpers.py +12 -12
  30. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/visualizations/image_visualizations.py +2 -0
  31. {hafnia-0.4.0 → hafnia-0.4.1}/tests/conftest.py +3 -7
  32. hafnia-0.4.1/tests/data/dataset_formats/format_yolo/data/000000000139.jpg +0 -0
  33. hafnia-0.4.1/tests/data/dataset_formats/format_yolo/data/000000000139.txt +20 -0
  34. hafnia-0.4.1/tests/data/dataset_formats/format_yolo/data/000000000285.jpg +0 -0
  35. hafnia-0.4.1/tests/data/dataset_formats/format_yolo/data/000000000285.txt +1 -0
  36. hafnia-0.4.1/tests/data/dataset_formats/format_yolo/data/000000000632.jpg +0 -0
  37. hafnia-0.4.1/tests/data/dataset_formats/format_yolo/data/000000000632.txt +18 -0
  38. hafnia-0.4.1/tests/data/dataset_formats/format_yolo/images.txt +3 -0
  39. hafnia-0.4.1/tests/data/dataset_formats/format_yolo/obj.names +80 -0
  40. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/dataset_image_metadata_schema.yaml +16 -24
  41. hafnia-0.4.1/tests/data/expected_images/test_dataset_transformations/test_video_storage_format_read_image.png +0 -0
  42. hafnia-0.4.1/tests/data/expected_images/test_format_yolo/test_format_yolo_import_export_tiny_dataset.png +0 -0
  43. hafnia-0.4.1/tests/data/expected_images/test_format_yolo/test_import_yolo_format_visualized.png +0 -0
  44. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/expected_images/test_samples/test_check_dataset[caltech-256].png +0 -0
  45. hafnia-0.4.1/tests/data/expected_images/test_samples/test_check_dataset[midwest-vehicle-detection].png +0 -0
  46. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/expected_images/test_samples/test_check_dataset[tiny-dataset].png +0 -0
  47. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-256].png +0 -0
  48. hafnia-0.4.1/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[midwest-vehicle-detection].png +0 -0
  49. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[tiny-dataset].png +0 -0
  50. hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-coco-2017].png +0 -0
  51. hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-tiny-dataset].png +0 -0
  52. hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-coco-2017].png +0 -0
  53. hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-tiny-dataset].png +0 -0
  54. hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_mask_region[micro-coco-2017].png +0 -0
  55. hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_mask_region[micro-tiny-dataset].png +0 -0
  56. hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/annotations.jsonl +3 -0
  57. hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/annotations.parquet +0 -0
  58. hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/data/657/657dff54d5175e2ae9f4b9629cf57646.jpg +0 -0
  59. hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/data/825/825fa2d2d9416694b8e81a47ca38f580.jpg +0 -0
  60. hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/data/aa3/aa3cc40b5cde88e5bd189c0b3e6c223c.jpg +0 -0
  61. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/micro_test_datasets/micro-coco-2017/dataset_info.json +6 -8
  62. hafnia-0.4.1/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.jsonl +3 -0
  63. hafnia-0.4.1/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.parquet +0 -0
  64. hafnia-0.4.1/tests/data/micro_test_datasets/micro-tiny-dataset/data/2da/2da1d8dbf2b60bdab8dff1d7f5c2dfb5.png +0 -0
  65. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/micro_test_datasets/micro-tiny-dataset/dataset_info.json +5 -42
  66. {hafnia-0.4.0 → hafnia-0.4.1}/tests/helper_testing.py +101 -4
  67. {hafnia-0.4.0 → hafnia-0.4.1}/tests/integration/test_dataset_merges.py +11 -8
  68. {hafnia-0.4.0 → hafnia-0.4.1}/tests/integration/test_samples.py +2 -9
  69. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/dataset/dataset_recipe/test_recipe_transformations.py +7 -7
  70. hafnia-0.4.1/tests/unit/dataset/format_conversions/test_format_yolo.py +85 -0
  71. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/dataset/format_conversions/test_image_classification_directory.py +7 -7
  72. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/dataset/operations/test_dataset_stats.py +8 -8
  73. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/dataset/operations/test_dataset_transformations.py +89 -9
  74. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/dataset/operations/test_table_transformations.py +9 -3
  75. hafnia-0.4.1/tests/unit/dataset/test_dataset_names.py +13 -0
  76. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/dataset/test_hafnia_dataset.py +3 -3
  77. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/dataset/test_shape_primitives.py +5 -5
  78. {hafnia-0.4.0 → hafnia-0.4.1}/uv.lock +51 -295
  79. hafnia-0.4.0/src/hafnia/dataset/dataset_names.py +0 -77
  80. hafnia-0.4.0/tests/data/expected_images/test_samples/test_check_dataset[midwest-vehicle-detection].png +0 -0
  81. hafnia-0.4.0/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[midwest-vehicle-detection].png +0 -0
  82. hafnia-0.4.0/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-coco-2017].png +0 -0
  83. hafnia-0.4.0/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-tiny-dataset].png +0 -0
  84. hafnia-0.4.0/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-coco-2017].png +0 -0
  85. hafnia-0.4.0/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-tiny-dataset].png +0 -0
  86. hafnia-0.4.0/tests/data/expected_images/test_visualizations/test_mask_region[micro-coco-2017].png +0 -0
  87. hafnia-0.4.0/tests/data/expected_images/test_visualizations/test_mask_region[micro-tiny-dataset].png +0 -0
  88. hafnia-0.4.0/tests/data/micro_test_datasets/micro-coco-2017/annotations.jsonl +0 -3
  89. hafnia-0.4.0/tests/data/micro_test_datasets/micro-coco-2017/annotations.parquet +0 -0
  90. hafnia-0.4.0/tests/data/micro_test_datasets/micro-coco-2017/data/3b4/3b4165c8c4f830be4e95c6eb6209880a.jpg +0 -0
  91. hafnia-0.4.0/tests/data/micro_test_datasets/micro-coco-2017/data/837/837b642d8a7b3b8dcf86c7a23edb55ce.jpg +0 -0
  92. hafnia-0.4.0/tests/data/micro_test_datasets/micro-coco-2017/data/dc8/dc8efc98ce6304fe182a2c0a3ce312cf.jpg +0 -0
  93. hafnia-0.4.0/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.jsonl +0 -3
  94. hafnia-0.4.0/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.parquet +0 -0
  95. hafnia-0.4.0/tests/data/micro_test_datasets/micro-tiny-dataset/data/907/907f182da7bcedb8222bbd5721a8a86e.png +0 -0
  96. {hafnia-0.4.0 → hafnia-0.4.1}/.devcontainer/devcontainer.json +0 -0
  97. {hafnia-0.4.0 → hafnia-0.4.1}/.devcontainer/hooks/post_create +0 -0
  98. {hafnia-0.4.0 → hafnia-0.4.1}/.github/dependabot.yaml +0 -0
  99. {hafnia-0.4.0 → hafnia-0.4.1}/.github/workflows/Dockerfile +0 -0
  100. {hafnia-0.4.0 → hafnia-0.4.1}/.github/workflows/ci_cd.yaml +0 -0
  101. {hafnia-0.4.0 → hafnia-0.4.1}/.github/workflows/lint.yaml +0 -0
  102. {hafnia-0.4.0 → hafnia-0.4.1}/.gitignore +0 -0
  103. {hafnia-0.4.0 → hafnia-0.4.1}/.pre-commit-config.yaml +0 -0
  104. {hafnia-0.4.0 → hafnia-0.4.1}/.python-version +0 -0
  105. {hafnia-0.4.0 → hafnia-0.4.1}/.trivyignore +0 -0
  106. {hafnia-0.4.0 → hafnia-0.4.1}/.vscode/extensions.json +0 -0
  107. {hafnia-0.4.0 → hafnia-0.4.1}/.vscode/launch.json +0 -0
  108. {hafnia-0.4.0 → hafnia-0.4.1}/.vscode/settings.json +0 -0
  109. {hafnia-0.4.0 → hafnia-0.4.1}/LICENSE +0 -0
  110. {hafnia-0.4.0 → hafnia-0.4.1}/docs/cli.md +0 -0
  111. {hafnia-0.4.0 → hafnia-0.4.1}/docs/release.md +0 -0
  112. {hafnia-0.4.0 → hafnia-0.4.1}/examples/example_logger.py +0 -0
  113. {hafnia-0.4.0 → hafnia-0.4.1}/examples/example_torchvision_dataloader.py +0 -0
  114. {hafnia-0.4.0 → hafnia-0.4.1}/src/cli/__init__.py +0 -0
  115. {hafnia-0.4.0 → hafnia-0.4.1}/src/cli/__main__.py +0 -0
  116. {hafnia-0.4.0 → hafnia-0.4.1}/src/cli/config.py +0 -0
  117. {hafnia-0.4.0 → hafnia-0.4.1}/src/cli/consts.py +0 -0
  118. {hafnia-0.4.0 → hafnia-0.4.1}/src/cli/dataset_cmds.py +0 -0
  119. {hafnia-0.4.0 → hafnia-0.4.1}/src/cli/dataset_recipe_cmds.py +0 -0
  120. {hafnia-0.4.0 → hafnia-0.4.1}/src/cli/experiment_cmds.py +0 -0
  121. {hafnia-0.4.0 → hafnia-0.4.1}/src/cli/keychain.py +0 -0
  122. {hafnia-0.4.0 → hafnia-0.4.1}/src/cli/profile_cmds.py +0 -0
  123. {hafnia-0.4.0 → hafnia-0.4.1}/src/cli/runc_cmds.py +0 -0
  124. {hafnia-0.4.0 → hafnia-0.4.1}/src/cli/trainer_package_cmds.py +0 -0
  125. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/data/__init__.py +0 -0
  126. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/data/factory.py +0 -0
  127. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/dataset_helpers.py +0 -0
  128. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/dataset_recipe/dataset_recipe.py +0 -0
  129. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/dataset_recipe/recipe_transforms.py +0 -0
  130. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/dataset_recipe/recipe_types.py +0 -0
  131. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/license_types.py +0 -0
  132. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/primitives/__init__.py +0 -0
  133. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/primitives/point.py +0 -0
  134. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/dataset/primitives/utils.py +0 -0
  135. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/experiment/__init__.py +0 -0
  136. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/experiment/hafnia_logger.py +0 -0
  137. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/http.py +0 -0
  138. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/log.py +0 -0
  139. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/platform/__init__.py +0 -0
  140. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/platform/builder.py +0 -0
  141. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/platform/dataset_recipe.py +0 -0
  142. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/platform/experiment.py +0 -0
  143. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/platform/trainer_package.py +0 -0
  144. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/utils.py +0 -0
  145. {hafnia-0.4.0 → hafnia-0.4.1}/src/hafnia/visualizations/colors.py +0 -0
  146. {hafnia-0.4.0 → hafnia-0.4.1}/tests/__init__.py +0 -0
  147. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png +0 -0
  148. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/expected_images/test_samples/test_check_dataset[cifar100].png +0 -0
  149. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/expected_images/test_samples/test_check_dataset[cifar10].png +0 -0
  150. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/expected_images/test_samples/test_check_dataset[coco-2017].png +0 -0
  151. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/expected_images/test_samples/test_check_dataset[mnist].png +0 -0
  152. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-101].png +0 -0
  153. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar100].png +0 -0
  154. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar10].png +0 -0
  155. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[coco-2017].png +0 -0
  156. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[mnist].png +0 -0
  157. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/micro_test_datasets/micro-tiny-dataset/data/3dd/3ddec2275a02e79e3251d85443622e4c.png +0 -0
  158. {hafnia-0.4.0 → hafnia-0.4.1}/tests/data/micro_test_datasets/micro-tiny-dataset/data/4d8/4d8450b045e60e8f3657ababa44af9b6.png +0 -0
  159. {hafnia-0.4.0 → hafnia-0.4.1}/tests/integration/test_check_example_scripts.py +0 -0
  160. {hafnia-0.4.0 → hafnia-0.4.1}/tests/integration/test_cli_integration.py +0 -0
  161. {hafnia-0.4.0 → hafnia-0.4.1}/tests/integration/test_dataset_recipes_with_platform.py +0 -0
  162. {hafnia-0.4.0 → hafnia-0.4.1}/tests/integration/test_torchvision_datasets.py +0 -0
  163. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/dataset/dataset_recipe/test_dataset_recipe_helpers.py +0 -0
  164. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/dataset/dataset_recipe/test_dataset_recipes.py +0 -0
  165. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/dataset/test_colors.py +0 -0
  166. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/dataset/test_dataset_helpers.py +0 -0
  167. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/test_builder.py +0 -0
  168. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/test_cli.py +0 -0
  169. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/test_hafnia_logger.py +0 -0
  170. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/test_utils.py +0 -0
  171. {hafnia-0.4.0 → hafnia-0.4.1}/tests/unit/test_visualizations.py +0 -0
@@ -29,7 +29,7 @@ jobs:
29
29
  echo "package_version=$VERSION" >> $GITHUB_OUTPUT
30
30
 
31
31
  - name: Install uv
32
- uses: astral-sh/setup-uv@v6
32
+ uses: astral-sh/setup-uv@v7
33
33
  with:
34
34
  version: 0.6.8
35
35
 
@@ -45,7 +45,7 @@ jobs:
45
45
  run: uv build
46
46
 
47
47
  - name: Upload package artifact
48
- uses: actions/upload-artifact@v4.6.2
48
+ uses: actions/upload-artifact@v5.0.0
49
49
  with:
50
50
  name: python-package
51
51
  path: dist/
@@ -20,7 +20,7 @@ jobs:
20
20
  make_release: ${{ steps.check_release.outputs.make_release }}
21
21
  steps:
22
22
  - name: Download package artifact
23
- uses: actions/download-artifact@v5.0.0
23
+ uses: actions/download-artifact@v6.0.0
24
24
  with:
25
25
  name: python-package
26
26
  path: dist/
@@ -31,7 +31,7 @@ jobs:
31
31
  python-version-file: ${{ inputs.python-version-file }}
32
32
 
33
33
  - name: Download package artifact
34
- uses: actions/download-artifact@v5.0.0
34
+ uses: actions/download-artifact@v6.0.0
35
35
  with:
36
36
  name: python-package
37
37
  path: dist/
@@ -17,7 +17,7 @@ jobs:
17
17
  contents: read
18
18
  steps:
19
19
  - name: Download package artifact
20
- uses: actions/download-artifact@v5.0.0
20
+ uses: actions/download-artifact@v6.0.0
21
21
  with:
22
22
  name: python-package
23
23
  path: dist/
@@ -20,7 +20,7 @@ jobs:
20
20
  with:
21
21
  python-version-file: ${{ inputs.python-version-file }}
22
22
  - name: Install uv
23
- uses: astral-sh/setup-uv@v6
23
+ uses: astral-sh/setup-uv@v7
24
24
  with:
25
25
  version: 0.6.8
26
26
  - name: Install the project
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hafnia
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Python SDK for communication with Hafnia platform.
5
5
  Author-email: Milestone Systems <hafniaplatform@milestone.dk>
6
6
  License-File: LICENSE
@@ -158,7 +158,7 @@ and `dataset.samples` with annotations as a polars DataFrame
158
158
  print(dataset.samples.head(2))
159
159
  shape: (2, 14)
160
160
  ┌──────────────┬─────────────────────────────────┬────────┬───────┬───┬─────────────────────────────────┬──────────┬──────────┬─────────────────────────────────┐
161
- │ sample_index ┆ file_name ┆ height ┆ width ┆ … ┆ objects ┆ bitmasks ┆ polygons ┆ meta │
161
+ │ sample_index ┆ file_name ┆ height ┆ width ┆ … ┆ bboxes ┆ bitmasks ┆ polygons ┆ meta │
162
162
  │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
163
163
  │ u32 ┆ str ┆ i64 ┆ i64 ┆ ┆ list[struct[11]] ┆ null ┆ null ┆ struct[5] │
164
164
  ╞══════════════╪═════════════════════════════════╪════════╪═══════╪═══╪═════════════════════════════════╪══════════╪══════════╪═════════════════════════════════╡
@@ -218,7 +218,7 @@ sample_dict = dataset[0]
218
218
 
219
219
  for sample_dict in dataset:
220
220
  sample = Sample(**sample_dict)
221
- print(sample.sample_id, sample.objects)
221
+ print(sample.sample_id, sample.bboxes)
222
222
  break
223
223
  ```
224
224
  Not that it is possible to create a `Sample` object from the sample dictionary.
@@ -421,7 +421,7 @@ pil_image.save("visualized_labels.png")
421
421
 
422
422
  # Create DataLoaders - using TorchVisionCollateFn
423
423
  collate_fn = torch_helpers.TorchVisionCollateFn(
424
- skip_stacking=["objects.bbox", "objects.class_idx"]
424
+ skip_stacking=["bboxes.bbox", "bboxes.class_idx"]
425
425
  )
426
426
  train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)
427
427
  ```
@@ -129,7 +129,7 @@ and `dataset.samples` with annotations as a polars DataFrame
129
129
  print(dataset.samples.head(2))
130
130
  shape: (2, 14)
131
131
  ┌──────────────┬─────────────────────────────────┬────────┬───────┬───┬─────────────────────────────────┬──────────┬──────────┬─────────────────────────────────┐
132
- │ sample_index ┆ file_name ┆ height ┆ width ┆ … ┆ objects ┆ bitmasks ┆ polygons ┆ meta │
132
+ │ sample_index ┆ file_name ┆ height ┆ width ┆ … ┆ bboxes ┆ bitmasks ┆ polygons ┆ meta │
133
133
  │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
134
134
  │ u32 ┆ str ┆ i64 ┆ i64 ┆ ┆ list[struct[11]] ┆ null ┆ null ┆ struct[5] │
135
135
  ╞══════════════╪═════════════════════════════════╪════════╪═══════╪═══╪═════════════════════════════════╪══════════╪══════════╪═════════════════════════════════╡
@@ -189,7 +189,7 @@ sample_dict = dataset[0]
189
189
 
190
190
  for sample_dict in dataset:
191
191
  sample = Sample(**sample_dict)
192
- print(sample.sample_id, sample.objects)
192
+ print(sample.sample_id, sample.bboxes)
193
193
  break
194
194
  ```
195
195
  Not that it is possible to create a `Sample` object from the sample dictionary.
@@ -392,7 +392,7 @@ pil_image.save("visualized_labels.png")
392
392
 
393
393
  # Create DataLoaders - using TorchVisionCollateFn
394
394
  collate_fn = torch_helpers.TorchVisionCollateFn(
395
- skip_stacking=["objects.bbox", "objects.class_idx"]
395
+ skip_stacking=["bboxes.bbox", "bboxes.class_idx"]
396
396
  )
397
397
  train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)
398
398
  ```
@@ -129,26 +129,28 @@ mapping_midwest = {
129
129
  "Vehicle*": "Vehicle", # Wildcard mapping. Selects class names starting with 'Vehicle.' e.g. 'Vehicle.Bicycle', "Vehicle.Car', etc.
130
130
  "Vehicle.Trailer": OPS_REMOVE_CLASS, # Use this to remove a class
131
131
  }
132
- coco_remapped = coco.class_mapper(class_mapping=mappings_coco, method="remove_undefined", task_name="bboxes")
133
- midwest_remapped = midwest.class_mapper(class_mapping=mapping_midwest, task_name="bboxes")
132
+ coco_remapped = coco.class_mapper(class_mapping=mappings_coco, method="remove_undefined", task_name="object_detection")
133
+ midwest_remapped = midwest.class_mapper(class_mapping=mapping_midwest, task_name="object_detection")
134
134
 
135
135
  # 2b) Merge datasets
136
136
  merged_dataset_all_images = HafniaDataset.from_merge(dataset0=coco_remapped, dataset1=midwest_remapped)
137
137
 
138
138
  # 2c) Remove images without 'Person' or 'Vehicle' annotations
139
- merged_dataset = merged_dataset_all_images.select_samples_by_class_name(name=["Person", "Vehicle"], task_name="bboxes")
139
+ merged_dataset = merged_dataset_all_images.select_samples_by_class_name(
140
+ name=["Person", "Vehicle"], task_name="object_detection"
141
+ )
140
142
  merged_dataset.print_stats()
141
143
 
142
144
  # 3) Once you have verified operations using the 'HafniaDataset' interface, you can convert
143
145
  # the operations to a single 'DatasetRecipe'
144
146
  merged_recipe = DatasetRecipe.from_merge(
145
147
  recipe0=DatasetRecipe.from_name("coco-2017").class_mapper(
146
- class_mapping=mappings_coco, method="remove_undefined", task_name="bboxes"
148
+ class_mapping=mappings_coco, method="remove_undefined", task_name="object_detection"
147
149
  ),
148
150
  recipe1=DatasetRecipe.from_name("midwest-vehicle-detection").class_mapper(
149
- class_mapping=mapping_midwest, task_name="bboxes"
151
+ class_mapping=mapping_midwest, task_name="object_detection"
150
152
  ),
151
- ).select_samples_by_class_name(name=["Person", "Vehicle"], task_name="bboxes")
153
+ ).select_samples_by_class_name(name=["Person", "Vehicle"], task_name="object_detection")
152
154
 
153
155
  # 3a) Verify again on the sample datasets, that the recipe works and can build as a dataset
154
156
  merged_dataset = merged_recipe.build()
@@ -33,8 +33,8 @@ dataset.print_class_distribution()
33
33
  dataset.print_stats() # Print verbose dataset statistics
34
34
 
35
35
  # Get dataset stats
36
- dataset.class_counts_all() # Get class counts for all tasks
37
- dataset.class_counts_for_task(primitive=Classification) # Get class counts for a specific task
36
+ dataset.calculate_class_counts() # Get class counts for all tasks
37
+ dataset.calculate_task_class_counts(primitive=Classification) # Get class counts for a specific task
38
38
 
39
39
  # Create a dataset split for training
40
40
  dataset_train = dataset.create_split_dataset("train")
@@ -86,9 +86,19 @@ dataset.write(path_dataset)
86
86
  # Load dataset from disk
87
87
  dataset_again = HafniaDataset.from_path(path_dataset)
88
88
 
89
+ ## Dataset importers and exporters ##
90
+ dataset_coco = HafniaDataset.from_name("coco-2017").select_samples(n_samples=5, seed=42)
91
+ path_yolo_format = Path(".data/tmp/yolo_dataset")
89
92
 
93
+ # Export dataset to YOLO format
94
+ dataset_coco.to_yolo_format(path_export_yolo_dataset=path_yolo_format)
95
+
96
+ # Import dataset from YOLO format
97
+ dataset_coco_imported = HafniaDataset.from_yolo_format(path_yolo_format)
98
+
99
+ ## Custom dataset operations and statistics ##
90
100
  # Want custom dataset transformations or statistics? Use the polars table (dataset.samples) directly
91
- n_objects = dataset.samples["objects"].list.len().sum()
101
+ n_objects = dataset.samples["bboxes"].list.len().sum()
92
102
  n_objects = dataset.samples[Bbox.column_name()].list.len().sum() # Use Bbox.column_name() to avoid magic variables
93
103
  n_classifications = dataset.samples[Classification.column_name()].list.len().sum()
94
104
 
@@ -106,7 +116,7 @@ for sample_dict in dataset_train:
106
116
  # Unpack dict into a Sample-object! Important for data validation, useability, IDE completion and mypy hints
107
117
  sample: Sample = Sample(**sample_dict)
108
118
 
109
- objects: List[Bbox] = sample.objects # Use 'sample.objects' access bounding boxes as a list of Bbox objects
119
+ bboxes: List[Bbox] = sample.bboxes # Use 'sample.bboxes' access bounding boxes as a list of Bbox objects
110
120
  bitmasks: List[Bitmask] = sample.bitmasks # Use 'sample.bitmasks' to access bitmasks as a list of Bitmask objects
111
121
  polygons: List[Polygon] = sample.polygons # Use 'sample.polygons' to access polygons as a list of Polygon objects
112
122
  classifications: List[Classification] = sample.classifications # As a list of Classification objects
@@ -134,7 +144,7 @@ for i_fake_sample in range(5):
134
144
  width=640,
135
145
  split="train",
136
146
  tags=["sample"],
137
- objects=bboxes,
147
+ bboxes=bboxes,
138
148
  classifications=classifications,
139
149
  )
140
150
  fake_samples.append(sample)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hafnia"
3
- version = "0.4.0"
3
+ version = "0.4.1"
4
4
  description = "Python SDK for communication with Hafnia platform."
5
5
  readme = "README.md"
6
6
  authors = [
@@ -3,4 +3,4 @@ from importlib.metadata import version
3
3
  __package_name__ = "hafnia"
4
4
  __version__ = version(__package_name__)
5
5
 
6
- __dataset_format_version__ = "0.1.0" # Hafnia dataset format version
6
+ __dataset_format_version__ = "0.2.0" # Hafnia dataset format version
@@ -0,0 +1,190 @@
1
+ from enum import Enum
2
+ from typing import Dict, List, Optional
3
+
4
+ import boto3
5
+ from pydantic import BaseModel, field_validator
6
+
7
+ FILENAME_RECIPE_JSON = "recipe.json"
8
+ FILENAME_DATASET_INFO = "dataset_info.json"
9
+ FILENAME_ANNOTATIONS_JSONL = "annotations.jsonl"
10
+ FILENAME_ANNOTATIONS_PARQUET = "annotations.parquet"
11
+
12
+ DATASET_FILENAMES_REQUIRED = [
13
+ FILENAME_DATASET_INFO,
14
+ FILENAME_ANNOTATIONS_JSONL,
15
+ FILENAME_ANNOTATIONS_PARQUET,
16
+ ]
17
+
18
+
19
+ class DeploymentStage(Enum):
20
+ STAGING = "staging"
21
+ PRODUCTION = "production"
22
+
23
+
24
+ TAG_IS_SAMPLE = "sample"
25
+
26
+ OPS_REMOVE_CLASS = "__REMOVE__"
27
+
28
+
29
+ class PrimitiveField:
30
+ CLASS_NAME: str = "class_name" # Name of the class this primitive is associated with, e.g. "car" for Bbox
31
+ CLASS_IDX: str = "class_idx" # Index of the class this primitive is associated with, e.g. 0 for "car" if it is the first class # noqa: E501
32
+ OBJECT_ID: str = "object_id" # Unique identifier for the object, e.g. "12345123"
33
+ CONFIDENCE: str = "confidence" # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
34
+
35
+ META: str = "meta" # Contains metadata about each primitive, e.g. attributes color, occluded, iscrowd, etc.
36
+ TASK_NAME: str = "task_name" # Name of the task this primitive is associated with, e.g. "bboxes" for Bbox
37
+
38
+ @staticmethod
39
+ def fields() -> List[str]:
40
+ """
41
+ Returns a list of expected field names for primitives.
42
+ """
43
+ return [
44
+ PrimitiveField.CLASS_NAME,
45
+ PrimitiveField.CLASS_IDX,
46
+ PrimitiveField.OBJECT_ID,
47
+ PrimitiveField.CONFIDENCE,
48
+ PrimitiveField.META,
49
+ PrimitiveField.TASK_NAME,
50
+ ]
51
+
52
+
53
+ class SampleField:
54
+ FILE_PATH: str = "file_path"
55
+ HEIGHT: str = "height"
56
+ WIDTH: str = "width"
57
+ SPLIT: str = "split"
58
+ TAGS: str = "tags"
59
+
60
+ CLASSIFICATIONS: str = "classifications"
61
+ BBOXES: str = "bboxes"
62
+ BITMASKS: str = "bitmasks"
63
+ POLYGONS: str = "polygons"
64
+
65
+ STORAGE_FORMAT: str = "storage_format" # E.g. "image", "video", "zip"
66
+ COLLECTION_INDEX: str = "collection_index"
67
+ COLLECTION_ID: str = "collection_id"
68
+ REMOTE_PATH: str = "remote_path" # Path to the file in remote storage, e.g. S3
69
+ SAMPLE_INDEX: str = "sample_index"
70
+
71
+ ATTRIBUTION: str = "attribution" # Attribution for the sample (image/video), e.g. creator, license, source, etc.
72
+ META: str = "meta"
73
+ DATASET_NAME: str = "dataset_name"
74
+
75
+
76
+ class StorageFormat:
77
+ IMAGE: str = "image"
78
+ VIDEO: str = "video"
79
+ ZIP: str = "zip"
80
+
81
+
82
+ class SplitName:
83
+ TRAIN: str = "train"
84
+ VAL: str = "validation"
85
+ TEST: str = "test"
86
+ UNDEFINED: str = "UNDEFINED"
87
+
88
+ @staticmethod
89
+ def valid_splits() -> List[str]:
90
+ return [SplitName.TRAIN, SplitName.VAL, SplitName.TEST]
91
+
92
+ @staticmethod
93
+ def all_split_names() -> List[str]:
94
+ return [*SplitName.valid_splits(), SplitName.UNDEFINED]
95
+
96
+
97
+ class DatasetVariant(Enum):
98
+ DUMP = "dump"
99
+ SAMPLE = "sample"
100
+ HIDDEN = "hidden"
101
+
102
+
103
+ class AwsCredentials(BaseModel):
104
+ access_key: str
105
+ secret_key: str
106
+ session_token: str
107
+ region: Optional[str]
108
+
109
+ def aws_credentials(self) -> Dict[str, str]:
110
+ """
111
+ Returns the AWS credentials as a dictionary.
112
+ """
113
+ environment_vars = {
114
+ "AWS_ACCESS_KEY_ID": self.access_key,
115
+ "AWS_SECRET_ACCESS_KEY": self.secret_key,
116
+ "AWS_SESSION_TOKEN": self.session_token,
117
+ }
118
+ if self.region:
119
+ environment_vars["AWS_REGION"] = self.region
120
+
121
+ return environment_vars
122
+
123
+ @staticmethod
124
+ def from_session(session: boto3.Session) -> "AwsCredentials":
125
+ """
126
+ Creates AwsCredentials from a Boto3 session.
127
+ """
128
+ frozen_credentials = session.get_credentials().get_frozen_credentials()
129
+ return AwsCredentials(
130
+ access_key=frozen_credentials.access_key,
131
+ secret_key=frozen_credentials.secret_key,
132
+ session_token=frozen_credentials.token,
133
+ region=session.region_name,
134
+ )
135
+
136
+
137
+ ARN_PREFIX = "arn:aws:s3:::"
138
+
139
+
140
+ class ResourceCredentials(AwsCredentials):
141
+ s3_arn: str
142
+
143
+ @staticmethod
144
+ def fix_naming(payload: Dict[str, str]) -> "ResourceCredentials":
145
+ """
146
+ The endpoint returns a payload with a key called 's3_path', but it
147
+ is actually an ARN path (starts with arn:aws:s3::). This method renames it to 's3_arn' for consistency.
148
+ """
149
+ if "s3_path" in payload and payload["s3_path"].startswith(ARN_PREFIX):
150
+ payload["s3_arn"] = payload.pop("s3_path")
151
+
152
+ if "region" not in payload:
153
+ payload["region"] = "eu-west-1"
154
+ return ResourceCredentials(**payload)
155
+
156
+ @field_validator("s3_arn")
157
+ @classmethod
158
+ def validate_s3_arn(cls, value: str) -> str:
159
+ """Validate s3_arn to ensure it starts with 'arn:aws:s3:::'"""
160
+ if not value.startswith("arn:aws:s3:::"):
161
+ raise ValueError(f"Invalid S3 ARN: {value}. It should start with 'arn:aws:s3:::'")
162
+ return value
163
+
164
+ def s3_path(self) -> str:
165
+ """
166
+ Extracts the S3 path from the ARN.
167
+ Example: arn:aws:s3:::my-bucket/my-prefix -> my-bucket/my-prefix
168
+ """
169
+ return self.s3_arn[len(ARN_PREFIX) :]
170
+
171
+ def s3_uri(self) -> str:
172
+ """
173
+ Converts the S3 ARN to a URI format.
174
+ Example: arn:aws:s3:::my-bucket/my-prefix -> s3://my-bucket/my-prefix
175
+ """
176
+ return f"s3://{self.s3_path()}"
177
+
178
+ def bucket_name(self) -> str:
179
+ """
180
+ Extracts the bucket name from the S3 ARN.
181
+ Example: arn:aws:s3:::my-bucket/my-prefix -> my-bucket
182
+ """
183
+ return self.s3_path().split("/")[0]
184
+
185
+ def object_key(self) -> str:
186
+ """
187
+ Extracts the object key from the S3 ARN.
188
+ Example: arn:aws:s3:::my-bucket/my-prefix -> my-prefix
189
+ """
190
+ return "/".join(self.s3_path().split("/")[1:])
@@ -14,10 +14,10 @@ from pydantic import BaseModel, ConfigDict, field_validator
14
14
  from cli.config import Config
15
15
  from hafnia.dataset import primitives
16
16
  from hafnia.dataset.dataset_names import (
17
- ColumnName,
18
17
  DatasetVariant,
19
18
  DeploymentStage,
20
- FieldName,
19
+ PrimitiveField,
20
+ SampleField,
21
21
  SplitName,
22
22
  )
23
23
  from hafnia.dataset.hafnia_dataset import Attribution, HafniaDataset, Sample, TaskInfo
@@ -193,7 +193,7 @@ class Annotations(BaseModel):
193
193
  in gallery images on the dataset detail page.
194
194
  """
195
195
 
196
- objects: Optional[List[Bbox]] = None
196
+ bboxes: Optional[List[Bbox]] = None
197
197
  classifications: Optional[List[Classification]] = None
198
198
  polygons: Optional[List[Polygon]] = None
199
199
  bitmasks: Optional[List[Bitmask]] = None
@@ -210,13 +210,15 @@ class DatasetImageMetadata(BaseModel):
210
210
  @classmethod
211
211
  def from_sample(cls, sample: Sample) -> "DatasetImageMetadata":
212
212
  sample = sample.model_copy(deep=True)
213
+ if sample.file_path is None:
214
+ raise ValueError("Sample has no file_path defined.")
213
215
  sample.file_path = "/".join(Path(sample.file_path).parts[-3:])
214
216
  metadata = {}
215
217
  metadata_field_names = [
216
- ColumnName.FILE_PATH,
217
- ColumnName.HEIGHT,
218
- ColumnName.WIDTH,
219
- ColumnName.SPLIT,
218
+ SampleField.FILE_PATH,
219
+ SampleField.HEIGHT,
220
+ SampleField.WIDTH,
221
+ SampleField.SPLIT,
220
222
  ]
221
223
  for field_name in metadata_field_names:
222
224
  if hasattr(sample, field_name) and getattr(sample, field_name) is not None:
@@ -224,7 +226,7 @@ class DatasetImageMetadata(BaseModel):
224
226
 
225
227
  obj = DatasetImageMetadata(
226
228
  annotations=Annotations(
227
- objects=sample.objects,
229
+ bboxes=sample.bboxes,
228
230
  classifications=sample.classifications,
229
231
  polygons=sample.polygons,
230
232
  bitmasks=sample.bitmasks,
@@ -343,13 +345,13 @@ def calculate_distribution_values(
343
345
  classifications = dataset_split.select(pl.col(classification_column).explode())
344
346
  classifications = classifications.filter(pl.col(classification_column).is_not_null()).unnest(classification_column)
345
347
  classifications = classifications.filter(
346
- pl.col(FieldName.TASK_NAME).is_in([task.name for task in distribution_tasks])
348
+ pl.col(PrimitiveField.TASK_NAME).is_in([task.name for task in distribution_tasks])
347
349
  )
348
350
  dist_values = []
349
- for (task_name,), task_group in classifications.group_by(FieldName.TASK_NAME):
351
+ for (task_name,), task_group in classifications.group_by(PrimitiveField.TASK_NAME):
350
352
  distribution_type = DbDistributionType(name=task_name)
351
353
  n_annotated_total = len(task_group)
352
- for (class_name,), class_group in task_group.group_by(FieldName.CLASS_NAME):
354
+ for (class_name,), class_group in task_group.group_by(PrimitiveField.CLASS_NAME):
353
355
  class_count = len(class_group)
354
356
 
355
357
  dist_values.append(
@@ -383,6 +385,7 @@ def dataset_info_from_dataset(
383
385
  path_hidden: Optional[Path],
384
386
  path_gallery_images: Optional[Path] = None,
385
387
  gallery_image_names: Optional[List[str]] = None,
388
+ distribution_task_names: Optional[List[TaskInfo]] = None,
386
389
  ) -> DbDataset:
387
390
  dataset_variants = []
388
391
  dataset_reports = []
@@ -427,13 +430,15 @@ def dataset_info_from_dataset(
427
430
  )
428
431
  )
429
432
 
433
+ distribution_task_names = distribution_task_names or []
434
+ distribution_tasks = [t for t in dataset.info.tasks if t.name in distribution_task_names]
430
435
  for split_name in SplitChoices:
431
436
  split_names = SPLIT_CHOICE_MAPPING[split_name]
432
- dataset_split = dataset_variant.samples.filter(pl.col(ColumnName.SPLIT).is_in(split_names))
437
+ dataset_split = dataset_variant.samples.filter(pl.col(SampleField.SPLIT).is_in(split_names))
433
438
 
434
439
  distribution_values = calculate_distribution_values(
435
440
  dataset_split=dataset_split,
436
- distribution_tasks=dataset.info.distributions,
441
+ distribution_tasks=distribution_tasks,
437
442
  )
438
443
  report = DbSplitAnnotationsReport(
439
444
  variant_type=VARIANT_TYPE_MAPPING[variant_type], # type: ignore[index]
@@ -461,7 +466,7 @@ def dataset_info_from_dataset(
461
466
 
462
467
  annotation_type = DbAnnotationType(name=AnnotationType.ObjectDetection.value)
463
468
  for (class_name, task_name), class_group in df_per_instance.group_by(
464
- FieldName.CLASS_NAME, FieldName.TASK_NAME
469
+ PrimitiveField.CLASS_NAME, PrimitiveField.TASK_NAME
465
470
  ):
466
471
  if class_name is None:
467
472
  continue
@@ -473,10 +478,10 @@ def dataset_info_from_dataset(
473
478
  annotation_type=annotation_type,
474
479
  task_name=task_name,
475
480
  ),
476
- unique_obj_ids=class_group[FieldName.OBJECT_ID].n_unique(),
481
+ unique_obj_ids=class_group[PrimitiveField.OBJECT_ID].n_unique(),
477
482
  obj_instances=len(class_group),
478
483
  annotation_type=[annotation_type],
479
- images_with_obj=class_group[ColumnName.SAMPLE_INDEX].n_unique(),
484
+ images_with_obj=class_group[SampleField.SAMPLE_INDEX].n_unique(),
480
485
  area_avg_ratio=class_group["area"].mean(),
481
486
  area_min_ratio=class_group["area"].min(),
482
487
  area_max_ratio=class_group["area"].max(),
@@ -495,7 +500,7 @@ def dataset_info_from_dataset(
495
500
  width_avg_px=class_group["width_px"].mean(),
496
501
  width_min_px=int(class_group["width_px"].min()),
497
502
  width_max_px=int(class_group["width_px"].max()),
498
- average_count_per_image=len(class_group) / class_group[ColumnName.SAMPLE_INDEX].n_unique(),
503
+ average_count_per_image=len(class_group) / class_group[SampleField.SAMPLE_INDEX].n_unique(),
499
504
  )
500
505
  )
501
506
 
@@ -509,13 +514,13 @@ def dataset_info_from_dataset(
509
514
 
510
515
  # Include only classification tasks that are defined in the dataset info
511
516
  classification_df = classification_df.filter(
512
- pl.col(FieldName.TASK_NAME).is_in(classification_tasks)
517
+ pl.col(PrimitiveField.TASK_NAME).is_in(classification_tasks)
513
518
  )
514
519
 
515
520
  for (
516
521
  task_name,
517
522
  class_name,
518
- ), class_group in classification_df.group_by(FieldName.TASK_NAME, FieldName.CLASS_NAME):
523
+ ), class_group in classification_df.group_by(PrimitiveField.TASK_NAME, PrimitiveField.CLASS_NAME):
519
524
  if class_name is None:
520
525
  continue
521
526
  if task_name == Classification.default_task_name():
@@ -544,7 +549,7 @@ def dataset_info_from_dataset(
544
549
  if has_primitive(dataset_split, PrimitiveType=Bitmask):
545
550
  col_name = Bitmask.column_name()
546
551
  drop_columns = [col for col in primitive_columns if col != col_name]
547
- drop_columns.append(FieldName.META)
552
+ drop_columns.append(PrimitiveField.META)
548
553
 
549
554
  df_per_instance = table_transformations.create_primitive_table(
550
555
  dataset_split, PrimitiveType=Bitmask, keep_sample_data=True
@@ -562,7 +567,7 @@ def dataset_info_from_dataset(
562
567
 
563
568
  annotation_type = DbAnnotationType(name=AnnotationType.InstanceSegmentation)
564
569
  for (class_name, task_name), class_group in df_per_instance.group_by(
565
- FieldName.CLASS_NAME, FieldName.TASK_NAME
570
+ PrimitiveField.CLASS_NAME, PrimitiveField.TASK_NAME
566
571
  ):
567
572
  if class_name is None:
568
573
  continue
@@ -574,11 +579,11 @@ def dataset_info_from_dataset(
574
579
  annotation_type=annotation_type,
575
580
  task_name=task_name,
576
581
  ),
577
- unique_obj_ids=class_group[FieldName.OBJECT_ID].n_unique(),
582
+ unique_obj_ids=class_group[PrimitiveField.OBJECT_ID].n_unique(),
578
583
  obj_instances=len(class_group),
579
584
  annotation_type=[annotation_type],
580
- average_count_per_image=len(class_group) / class_group[ColumnName.SAMPLE_INDEX].n_unique(),
581
- images_with_obj=class_group[ColumnName.SAMPLE_INDEX].n_unique(),
585
+ average_count_per_image=len(class_group) / class_group[SampleField.SAMPLE_INDEX].n_unique(),
586
+ images_with_obj=class_group[SampleField.SAMPLE_INDEX].n_unique(),
582
587
  area_avg_ratio=class_group["area"].mean(),
583
588
  area_min_ratio=class_group["area"].min(),
584
589
  area_max_ratio=class_group["area"].max(),
@@ -646,7 +651,7 @@ def create_gallery_images(
646
651
  path_gallery_images.mkdir(parents=True, exist_ok=True)
647
652
  COL_IMAGE_NAME = "image_name"
648
653
  samples = dataset.samples.with_columns(
649
- dataset.samples[ColumnName.FILE_PATH].str.split("/").list.last().alias(COL_IMAGE_NAME)
654
+ dataset.samples[SampleField.FILE_PATH].str.split("/").list.last().alias(COL_IMAGE_NAME)
650
655
  )
651
656
  gallery_samples = samples.filter(pl.col(COL_IMAGE_NAME).is_in(gallery_image_names))
652
657
 
@@ -1,23 +1,27 @@
1
1
  import shutil
2
2
  from pathlib import Path
3
- from typing import List, Optional
3
+ from typing import TYPE_CHECKING, List, Optional
4
4
 
5
5
  import more_itertools
6
6
  import polars as pl
7
7
  from PIL import Image
8
8
  from rich.progress import track
9
9
 
10
- from hafnia.dataset.dataset_names import ColumnName, FieldName
11
- from hafnia.dataset.hafnia_dataset import DatasetInfo, HafniaDataset, Sample, TaskInfo
10
+ from hafnia.dataset.dataset_names import PrimitiveField, SampleField
12
11
  from hafnia.dataset.primitives import Classification
13
12
  from hafnia.utils import is_image_file
14
13
 
14
+ if TYPE_CHECKING:
15
+ from hafnia.dataset.hafnia_dataset import HafniaDataset
15
16
 
16
- def import_image_classification_directory_tree(
17
+
18
+ def from_image_classification_folder(
17
19
  path_folder: Path,
18
20
  split: str,
19
21
  n_samples: Optional[int] = None,
20
- ) -> HafniaDataset:
22
+ ) -> "HafniaDataset":
23
+ from hafnia.dataset.hafnia_dataset import DatasetInfo, HafniaDataset, Sample, TaskInfo
24
+
21
25
  class_folder_paths = [path for path in path_folder.iterdir() if path.is_dir()]
22
26
  class_names = sorted([folder.name for folder in class_folder_paths]) # Sort for determinism
23
27
 
@@ -62,8 +66,8 @@ def import_image_classification_directory_tree(
62
66
  return hafnia_dataset
63
67
 
64
68
 
65
- def export_image_classification_directory_tree(
66
- dataset: HafniaDataset,
69
+ def to_image_classification_folder(
70
+ dataset: "HafniaDataset",
67
71
  path_output: Path,
68
72
  task_name: Optional[str] = None,
69
73
  clean_folder: bool = False,
@@ -72,7 +76,7 @@ def export_image_classification_directory_tree(
72
76
 
73
77
  samples = dataset.samples.with_columns(
74
78
  pl.col(task.primitive.column_name())
75
- .list.filter(pl.element().struct.field(FieldName.TASK_NAME) == task.name)
79
+ .list.filter(pl.element().struct.field(PrimitiveField.TASK_NAME) == task.name)
76
80
  .alias(task.primitive.column_name())
77
81
  )
78
82
 
@@ -95,11 +99,11 @@ def export_image_classification_directory_tree(
95
99
  if len(classifications) != 1:
96
100
  raise ValueError("Each sample should have exactly one classification.")
97
101
  classification = classifications[0]
98
- class_name = classification[FieldName.CLASS_NAME].replace("/", "_") # Avoid issues with subfolders
102
+ class_name = classification[PrimitiveField.CLASS_NAME].replace("/", "_") # Avoid issues with subfolders
99
103
  path_class_folder = path_output / class_name
100
104
  path_class_folder.mkdir(parents=True, exist_ok=True)
101
105
 
102
- path_image_org = Path(sample_dict[ColumnName.FILE_PATH])
106
+ path_image_org = Path(sample_dict[SampleField.FILE_PATH])
103
107
  path_image_new = path_class_folder / path_image_org.name
104
108
  shutil.copy2(path_image_org, path_image_new)
105
109