hafnia 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. {hafnia-0.2.0 → hafnia-0.2.2}/.github/workflows/ci_cd.yaml +1 -1
  2. {hafnia-0.2.0 → hafnia-0.2.2}/PKG-INFO +17 -20
  3. {hafnia-0.2.0 → hafnia-0.2.2}/README.md +16 -19
  4. hafnia-0.2.2/examples/example_dataset_recipe.py +165 -0
  5. {hafnia-0.2.0 → hafnia-0.2.2}/examples/example_hafnia_dataset.py +6 -5
  6. {hafnia-0.2.0 → hafnia-0.2.2}/examples/example_torchvision_dataloader.py +7 -2
  7. {hafnia-0.2.0 → hafnia-0.2.2}/pyproject.toml +1 -1
  8. {hafnia-0.2.0 → hafnia-0.2.2}/src/cli/config.py +17 -4
  9. hafnia-0.2.2/src/hafnia/data/factory.py +23 -0
  10. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/dataset/dataset_names.py +2 -1
  11. hafnia-0.2.2/src/hafnia/dataset/dataset_recipe/dataset_recipe.py +327 -0
  12. hafnia-0.2.2/src/hafnia/dataset/dataset_recipe/recipe_transforms.py +53 -0
  13. hafnia-0.2.2/src/hafnia/dataset/dataset_recipe/recipe_types.py +140 -0
  14. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/dataset/hafnia_dataset.py +201 -32
  15. hafnia-0.2.2/src/hafnia/dataset/operations/dataset_stats.py +15 -0
  16. hafnia-0.2.2/src/hafnia/dataset/operations/dataset_transformations.py +82 -0
  17. {hafnia-0.2.0/src/hafnia/dataset → hafnia-0.2.2/src/hafnia/dataset/operations}/table_transformations.py +1 -1
  18. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/experiment/hafnia_logger.py +5 -5
  19. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/platform/datasets.py +30 -15
  20. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/utils.py +20 -1
  21. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/visualizations/image_visualizations.py +1 -1
  22. {hafnia-0.2.0 → hafnia-0.2.2}/tests/conftest.py +1 -1
  23. hafnia-0.2.2/tests/dataset/dataset_recipe/test_dataset_recipe_helpers.py +120 -0
  24. hafnia-0.2.2/tests/dataset/dataset_recipe/test_dataset_recipes.py +260 -0
  25. hafnia-0.2.2/tests/dataset/dataset_recipe/test_recipe_transformations.py +224 -0
  26. hafnia-0.2.2/tests/dataset/operations/test_dataset_transformations.py +0 -0
  27. {hafnia-0.2.0/tests/dataset → hafnia-0.2.2/tests/dataset/operations}/test_table_transformations.py +3 -3
  28. {hafnia-0.2.0 → hafnia-0.2.2}/tests/dataset/test_hafnia_dataset.py +35 -4
  29. {hafnia-0.2.0/src/hafnia → hafnia-0.2.2/tests}/helper_testing.py +49 -4
  30. {hafnia-0.2.0 → hafnia-0.2.2}/tests/test_check_example_scripts.py +3 -2
  31. {hafnia-0.2.0 → hafnia-0.2.2}/tests/test_cli.py +49 -1
  32. {hafnia-0.2.0 → hafnia-0.2.2}/tests/test_samples.py +2 -2
  33. {hafnia-0.2.0 → hafnia-0.2.2}/tests/test_visualizations.py +2 -1
  34. {hafnia-0.2.0 → hafnia-0.2.2}/uv.lock +1 -1
  35. hafnia-0.2.0/src/hafnia/data/factory.py +0 -20
  36. hafnia-0.2.0/src/hafnia/dataset/dataset_transformation.py +0 -187
  37. {hafnia-0.2.0 → hafnia-0.2.2}/.devcontainer/devcontainer.json +0 -0
  38. {hafnia-0.2.0 → hafnia-0.2.2}/.devcontainer/hooks/post_create +0 -0
  39. {hafnia-0.2.0 → hafnia-0.2.2}/.github/dependabot.yaml +0 -0
  40. {hafnia-0.2.0 → hafnia-0.2.2}/.github/workflows/Dockerfile +0 -0
  41. {hafnia-0.2.0 → hafnia-0.2.2}/.github/workflows/build.yaml +0 -0
  42. {hafnia-0.2.0 → hafnia-0.2.2}/.github/workflows/check_release.yaml +0 -0
  43. {hafnia-0.2.0 → hafnia-0.2.2}/.github/workflows/lint.yaml +0 -0
  44. {hafnia-0.2.0 → hafnia-0.2.2}/.github/workflows/publish_docker.yaml +0 -0
  45. {hafnia-0.2.0 → hafnia-0.2.2}/.github/workflows/publish_pypi.yaml +0 -0
  46. {hafnia-0.2.0 → hafnia-0.2.2}/.github/workflows/tests.yaml +0 -0
  47. {hafnia-0.2.0 → hafnia-0.2.2}/.gitignore +0 -0
  48. {hafnia-0.2.0 → hafnia-0.2.2}/.pre-commit-config.yaml +0 -0
  49. {hafnia-0.2.0 → hafnia-0.2.2}/.python-version +0 -0
  50. {hafnia-0.2.0 → hafnia-0.2.2}/.vscode/extensions.json +0 -0
  51. {hafnia-0.2.0 → hafnia-0.2.2}/.vscode/launch.json +0 -0
  52. {hafnia-0.2.0 → hafnia-0.2.2}/.vscode/settings.json +0 -0
  53. {hafnia-0.2.0 → hafnia-0.2.2}/LICENSE +0 -0
  54. {hafnia-0.2.0 → hafnia-0.2.2}/docs/cli.md +0 -0
  55. {hafnia-0.2.0 → hafnia-0.2.2}/docs/release.md +0 -0
  56. {hafnia-0.2.0 → hafnia-0.2.2}/examples/example_logger.py +0 -0
  57. {hafnia-0.2.0 → hafnia-0.2.2}/src/cli/__init__.py +0 -0
  58. {hafnia-0.2.0 → hafnia-0.2.2}/src/cli/__main__.py +0 -0
  59. {hafnia-0.2.0 → hafnia-0.2.2}/src/cli/consts.py +0 -0
  60. {hafnia-0.2.0 → hafnia-0.2.2}/src/cli/dataset_cmds.py +0 -0
  61. {hafnia-0.2.0 → hafnia-0.2.2}/src/cli/experiment_cmds.py +0 -0
  62. {hafnia-0.2.0 → hafnia-0.2.2}/src/cli/profile_cmds.py +0 -0
  63. {hafnia-0.2.0 → hafnia-0.2.2}/src/cli/recipe_cmds.py +0 -0
  64. {hafnia-0.2.0 → hafnia-0.2.2}/src/cli/runc_cmds.py +0 -0
  65. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/__init__.py +0 -0
  66. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/data/__init__.py +0 -0
  67. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/dataset/dataset_helpers.py +0 -0
  68. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/dataset/dataset_upload_helper.py +0 -0
  69. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/dataset/primitives/__init__.py +0 -0
  70. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/dataset/primitives/bbox.py +0 -0
  71. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/dataset/primitives/bitmask.py +0 -0
  72. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/dataset/primitives/classification.py +0 -0
  73. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/dataset/primitives/point.py +0 -0
  74. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/dataset/primitives/polygon.py +0 -0
  75. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/dataset/primitives/primitive.py +0 -0
  76. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/dataset/primitives/segmentation.py +0 -0
  77. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/dataset/primitives/utils.py +0 -0
  78. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/experiment/__init__.py +0 -0
  79. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/http.py +0 -0
  80. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/log.py +0 -0
  81. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/platform/__init__.py +0 -0
  82. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/platform/builder.py +0 -0
  83. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/platform/download.py +0 -0
  84. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/platform/experiment.py +0 -0
  85. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/torch_helpers.py +0 -0
  86. {hafnia-0.2.0 → hafnia-0.2.2}/src/hafnia/visualizations/colors.py +0 -0
  87. /hafnia-0.2.0/tests/dataset/test_dataset_transformations.py → /hafnia-0.2.2/tests/__init__.py +0 -0
  88. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png +0 -0
  89. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_check_dataset[caltech-256].png +0 -0
  90. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_check_dataset[cifar100].png +0 -0
  91. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_check_dataset[cifar10].png +0 -0
  92. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_check_dataset[coco-2017].png +0 -0
  93. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_check_dataset[midwest-vehicle-detection].png +0 -0
  94. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_check_dataset[mnist].png +0 -0
  95. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_check_dataset[tiny-dataset].png +0 -0
  96. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-101].png +0 -0
  97. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-256].png +0 -0
  98. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar100].png +0 -0
  99. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar10].png +0 -0
  100. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[coco-2017].png +0 -0
  101. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[midwest-vehicle-detection].png +0 -0
  102. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[mnist].png +0 -0
  103. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[tiny-dataset].png +0 -0
  104. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_visualizations/test_blur_anonymization[coco-2017].png +0 -0
  105. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_visualizations/test_blur_anonymization[tiny-dataset].png +0 -0
  106. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_visualizations/test_draw_annotations[coco-2017].png +0 -0
  107. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_visualizations/test_draw_annotations[tiny-dataset].png +0 -0
  108. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_visualizations/test_mask_region[coco-2017].png +0 -0
  109. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/expected_images/test_visualizations/test_mask_region[tiny-dataset].png +0 -0
  110. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/micro_test_datasets/coco-2017/annotations.jsonl +0 -0
  111. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/micro_test_datasets/coco-2017/annotations.parquet +0 -0
  112. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/micro_test_datasets/coco-2017/data/182a2c0a3ce312cf.jpg +0 -0
  113. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/micro_test_datasets/coco-2017/data/4e95c6eb6209880a.jpg +0 -0
  114. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/micro_test_datasets/coco-2017/data/cf86c7a23edb55ce.jpg +0 -0
  115. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/micro_test_datasets/coco-2017/dataset_info.json +0 -0
  116. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/micro_test_datasets/tiny-dataset/annotations.jsonl +0 -0
  117. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/micro_test_datasets/tiny-dataset/annotations.parquet +0 -0
  118. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/micro_test_datasets/tiny-dataset/data/222bbd5721a8a86e.png +0 -0
  119. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/micro_test_datasets/tiny-dataset/data/3251d85443622e4c.png +0 -0
  120. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/micro_test_datasets/tiny-dataset/data/3657ababa44af9b6.png +0 -0
  121. {hafnia-0.2.0 → hafnia-0.2.2}/tests/data/micro_test_datasets/tiny-dataset/dataset_info.json +0 -0
  122. {hafnia-0.2.0 → hafnia-0.2.2}/tests/dataset/test_colors.py +0 -0
  123. {hafnia-0.2.0 → hafnia-0.2.2}/tests/dataset/test_dataset_helpers.py +0 -0
  124. {hafnia-0.2.0 → hafnia-0.2.2}/tests/dataset/test_shape_primitives.py +0 -0
  125. {hafnia-0.2.0 → hafnia-0.2.2}/tests/test_builder.py +0 -0
  126. {hafnia-0.2.0 → hafnia-0.2.2}/tests/test_hafnia_logger.py +0 -0
  127. {hafnia-0.2.0 → hafnia-0.2.2}/tests/test_utils.py +0 -0
@@ -21,7 +21,7 @@ jobs:
21
21
  steps:
22
22
  - uses: actions/checkout@v4.2.2
23
23
  - name: Run Trivy vulnerability scanner
24
- uses: aquasecurity/trivy-action@0.31.0
24
+ uses: aquasecurity/trivy-action@0.32.0
25
25
  with:
26
26
  scan-type: 'fs'
27
27
  scan-ref: '.'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hafnia
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Python SDK for communication with Hafnia platform.
5
5
  Author-email: Milestone Systems <hafniaplatform@milestone.dk>
6
6
  License-File: LICENSE
@@ -147,22 +147,20 @@ The `HafniaDataset` object provides a convenient way to interact with the datase
147
147
  creating splits, accessing samples, printing statistics, saving to and loading from disk.
148
148
 
149
149
  In essence, the `HafniaDataset` class contains `dataset.info` with dataset information
150
- and `dataset.table` with annotations as a polars DataFrame
150
+ and `dataset.samples` with annotations as a polars DataFrame
151
151
 
152
152
  ```python
153
153
  # Annotations are stored in a polars DataFrame
154
- print(dataset.table.head(2))
154
+ print(dataset.samples.head(2))
155
155
  shape: (2, 14)
156
- ┌──────────┬────────────────────────────────┬────────┬───────┬───┬───────────────────────────────┬──────────┬──────────┬───────────────────────────────┐
157
- image_id ┆ file_name ┆ height ┆ width ┆ … ┆ objects ┆ bitmasks ┆ polygons ┆ meta
158
- │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ ---
159
- str ┆ str ┆ i64 ┆ i64 ┆ ┆ list[struct[12]] ┆ null ┆ null ┆ struct[5]
160
- ╞══════════╪════════════════════════════════╪════════╪═══════╪═══╪═══════════════════════════════╪══════════╪══════════╪═══════════════════════════════╡
161
- 7800 ┆ /home/ubuntu/code/hafnia/.data ┆ 1080 ┆ 1920 ┆ … ┆ [{0.0492,0.0357,0.2083,0.23," ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30: │
162
- │ ┆ … ┆ ┆V ┆ 0…
163
- │ 7900 ┆ /home/ubuntu/code/hafnia/.data ┆ 1080 ┆ 1920 ┆ … ┆ [{0.146382,0.078704,0.42963,0 ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30: │
164
- │ ┆ … ┆ ┆ ┆ ┆ .… ┆ ┆ ┆ 0… │
165
- └──────────┴────────────────────────────────┴────────┴───────┴───┴───────────────────────────────┴──────────┴──────────┴───────────────────────────────┘
156
+ ┌──────────────┬─────────────────────────────────┬────────┬───────┬───┬─────────────────────────────────┬──────────┬──────────┬─────────────────────────────────┐
157
+ sample_index ┆ file_name ┆ height ┆ width ┆ … ┆ objects ┆ bitmasks ┆ polygons ┆ meta
158
+ │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ ---
159
+ u32 ┆ str ┆ i64 ┆ i64 ┆ ┆ list[struct[11]] ┆ null ┆ null ┆ struct[5]
160
+ ╞══════════════╪═════════════════════════════════╪════════╪═══════╪═══╪═════════════════════════════════╪══════════╪══════════╪═════════════════════════════════╡
161
+ 0 ┆ /home/ubuntu/code/hafnia/.data ┆ 1080 ┆ 1920 ┆ … ┆ [{0.0492,0.0357,0.2083,0.23,"V… ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30:0…
162
+ 100/home/ubuntu/code/hafnia/.data 10801920 ┆ [{0.146382,0.078704,0.42963,0.… null null ┆ {120.0,1.0,"2024-07-10T18:30:0
163
+ └──────────────┴─────────────────────────────────┴────────┴───────┴───┴─────────────────────────────────┴──────────┴──────────┴─────────────────────────────────┘
166
164
  ```
167
165
 
168
166
  ```python
@@ -235,16 +233,15 @@ It also contain annotations as primitive types such as `Bbox`, `Classification`.
235
233
  ```python
236
234
  rich.print(sample)
237
235
  Sample(
238
- image_id='7800',
239
- file_name='data/video_0026a86b-2f43-49f2-a17c-59244d10a585_1fps_mp4_frame_0
240
- 0000.png',
236
+ sample_index=120,
237
+ file_name='/home/ubuntu/code/hafnia/.data/datasets/midwest-vehicle-detection/data/343403325f27e390.png',
241
238
  height=1080,
242
239
  width=1920,
243
- split='test',
240
+ split='train',
244
241
  is_sample=True,
245
- frame_number=None,
246
- video_name=None,
247
- remote_path=None,
242
+ collection_index=None,
243
+ collection_id=None,
244
+ remote_path='s3://mdi-production-midwest-vehicle-detection/sample/data/343403325f27e390.png',
248
245
  classifications=[
249
246
  Classification(
250
247
  class_name='Clear',
@@ -121,22 +121,20 @@ The `HafniaDataset` object provides a convenient way to interact with the datase
121
121
  creating splits, accessing samples, printing statistics, saving to and loading from disk.
122
122
 
123
123
  In essence, the `HafniaDataset` class contains `dataset.info` with dataset information
124
- and `dataset.table` with annotations as a polars DataFrame
124
+ and `dataset.samples` with annotations as a polars DataFrame
125
125
 
126
126
  ```python
127
127
  # Annotations are stored in a polars DataFrame
128
- print(dataset.table.head(2))
128
+ print(dataset.samples.head(2))
129
129
  shape: (2, 14)
130
- ┌──────────┬────────────────────────────────┬────────┬───────┬───┬───────────────────────────────┬──────────┬──────────┬───────────────────────────────┐
131
- image_id ┆ file_name ┆ height ┆ width ┆ … ┆ objects ┆ bitmasks ┆ polygons ┆ meta
132
- │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ ---
133
- str ┆ str ┆ i64 ┆ i64 ┆ ┆ list[struct[12]] ┆ null ┆ null ┆ struct[5]
134
- ╞══════════╪════════════════════════════════╪════════╪═══════╪═══╪═══════════════════════════════╪══════════╪══════════╪═══════════════════════════════╡
135
- 7800 ┆ /home/ubuntu/code/hafnia/.data ┆ 1080 ┆ 1920 ┆ … ┆ [{0.0492,0.0357,0.2083,0.23," ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30: │
136
- │ ┆ … ┆ ┆V ┆ 0…
137
- │ 7900 ┆ /home/ubuntu/code/hafnia/.data ┆ 1080 ┆ 1920 ┆ … ┆ [{0.146382,0.078704,0.42963,0 ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30: │
138
- │ ┆ … ┆ ┆ ┆ ┆ .… ┆ ┆ ┆ 0… │
139
- └──────────┴────────────────────────────────┴────────┴───────┴───┴───────────────────────────────┴──────────┴──────────┴───────────────────────────────┘
130
+ ┌──────────────┬─────────────────────────────────┬────────┬───────┬───┬─────────────────────────────────┬──────────┬──────────┬─────────────────────────────────┐
131
+ sample_index ┆ file_name ┆ height ┆ width ┆ … ┆ objects ┆ bitmasks ┆ polygons ┆ meta
132
+ │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ ---
133
+ u32 ┆ str ┆ i64 ┆ i64 ┆ ┆ list[struct[11]] ┆ null ┆ null ┆ struct[5]
134
+ ╞══════════════╪═════════════════════════════════╪════════╪═══════╪═══╪═════════════════════════════════╪══════════╪══════════╪═════════════════════════════════╡
135
+ 0 ┆ /home/ubuntu/code/hafnia/.data ┆ 1080 ┆ 1920 ┆ … ┆ [{0.0492,0.0357,0.2083,0.23,"V… ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30:0…
136
+ 100/home/ubuntu/code/hafnia/.data 10801920 ┆ [{0.146382,0.078704,0.42963,0.… null null ┆ {120.0,1.0,"2024-07-10T18:30:0
137
+ └──────────────┴─────────────────────────────────┴────────┴───────┴───┴─────────────────────────────────┴──────────┴──────────┴─────────────────────────────────┘
140
138
  ```
141
139
 
142
140
  ```python
@@ -209,16 +207,15 @@ It also contain annotations as primitive types such as `Bbox`, `Classification`.
209
207
  ```python
210
208
  rich.print(sample)
211
209
  Sample(
212
- image_id='7800',
213
- file_name='data/video_0026a86b-2f43-49f2-a17c-59244d10a585_1fps_mp4_frame_0
214
- 0000.png',
210
+ sample_index=120,
211
+ file_name='/home/ubuntu/code/hafnia/.data/datasets/midwest-vehicle-detection/data/343403325f27e390.png',
215
212
  height=1080,
216
213
  width=1920,
217
- split='test',
214
+ split='train',
218
215
  is_sample=True,
219
- frame_number=None,
220
- video_name=None,
221
- remote_path=None,
216
+ collection_index=None,
217
+ collection_id=None,
218
+ remote_path='s3://mdi-production-midwest-vehicle-detection/sample/data/343403325f27e390.png',
222
219
  classifications=[
223
220
  Classification(
224
221
  class_name='Clear',
@@ -0,0 +1,165 @@
1
+ from pathlib import Path
2
+
3
+ from rich import print as rprint
4
+
5
+ from hafnia.data.factory import load_dataset
6
+ from hafnia.dataset.dataset_recipe.dataset_recipe import DatasetRecipe
7
+ from hafnia.dataset.dataset_recipe.recipe_transforms import (
8
+ SelectSamples,
9
+ Shuffle,
10
+ SplitsByRatios,
11
+ )
12
+ from hafnia.dataset.hafnia_dataset import HafniaDataset
13
+
14
+ ### Introducing DatasetRecipe ###
15
+ # A DatasetRecipe is a recipe for the dataset you want to create.
16
+ # The recipe itself is not executed - this is just a specification of the dataset you want!
17
+
18
+ # A DatasetRecipe is an important concept in Hafnia as it allows you to merge multiple datasets
19
+ # and transformations in a single recipe. This is especially useful for Training as a Service (TaaS)
20
+ # where you need to define the dataset you want as a configuration and load it in the TaaS platform.
21
+
22
+ # The 'DatasetRecipe' interface is similar to the 'HafniaDataset' interface.
23
+ # To demonstrate, we will first create a dataset with the regular 'HafniaDataset' interface.
24
+ # This line will get the "mnist" dataset, shuffle it, and select 20 samples.
25
+ dataset = HafniaDataset.from_name(name="mnist").shuffle().select_samples(n_samples=20)
26
+
27
+ # Now the same dataset is created using the 'DatasetRecipe' interface.
28
+ dataset_recipe = DatasetRecipe.from_name(name="mnist").shuffle().select_samples(n_samples=20)
29
+ dataset = dataset_recipe.build()
30
+ # Note that the interface is similar, but to actually create the dataset you need to call `build()` on the recipe.
31
+
32
+ # An important feature of a 'DatasetRecipe' is that the recipe itself - and not the dataset - can be saved as a file
33
+ # and loaded from file. Meaning you can easily save, share, load and build the dataset later or in a different
34
+ # environment.
35
+ # In programming language, the recipe can be serialized to JSON and deserialized back to the original python object
36
+ # recipe.
37
+ # For TaaS, this is the only way to include multiple datasets during training.
38
+
39
+ # This is how it looks like in practice:
40
+ # 1) Save the dataset recipe to a file
41
+ path_json = Path(".data/tmp/dataset_recipe.json")
42
+ dataset_recipe.as_json_file(path_json)
43
+
44
+ # 2) The recipe can be loaded from the file
45
+ dataset_recipe_again = DatasetRecipe.from_json_file(path_json)
46
+
47
+ # We can verify that the loaded recipe is the same as the original recipe.
48
+ assert dataset_recipe_again == dataset_recipe
49
+
50
+ # Additionally, you can get the python code for creating the same recipe.
51
+ dataset_recipe.as_python_code()
52
+
53
+ # Example: DatasetRecipe from Path
54
+ dataset_recipe = DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
55
+
56
+ # Example: DatasetRecipe by merging multiple dataset recipes
57
+ dataset_recipe = DatasetRecipe.from_merger(
58
+ recipes=[
59
+ DatasetRecipe.from_name(name="mnist"),
60
+ DatasetRecipe.from_name(name="mnist"),
61
+ ]
62
+ )
63
+
64
+ # Example: Recipes can be infinitely nested and combined.
65
+ dataset_recipe = DatasetRecipe.from_merger(
66
+ recipes=[
67
+ DatasetRecipe.from_merger(
68
+ recipes=[
69
+ DatasetRecipe.from_name(name="mnist"),
70
+ DatasetRecipe.from_name(name="mnist"),
71
+ ]
72
+ ),
73
+ DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
74
+ .select_samples(n_samples=30)
75
+ .splits_by_ratios(split_ratios={"train": 0.8, "val": 0.1, "test": 0.1}),
76
+ DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle(),
77
+ ]
78
+ )
79
+
80
+ # Now you can build the dataset from the recipe.
81
+ dataset: HafniaDataset = dataset_recipe.build()
82
+ assert len(dataset) == 450 # 2x200 + 30 + 20
83
+
84
+ # Finally, you can print the dataset recipe to see what it contains.
85
+ rprint(dataset_recipe) # as a python object
86
+ print(dataset_recipe.as_json_str()) # as a JSON string
87
+
88
+
89
+ # Example: Using the 'load_dataset' function
90
+ merged_dataset: HafniaDataset = load_dataset(dataset_recipe)
91
+ # You get a few extra things when using `load_dataset`.
92
+ # 1) You get the dataset directly - you don't have to call `build()` on the recipe.
93
+ # 2) The dataset is cached if it already exists, so you don't have to
94
+ # download or rebuild the dataset on the second run.
95
+ # 3) You can use an implicit form of the recipe. One example of this is that you just specify
96
+ # the dataset name `load_dataset("mnist")` or path `load_dataset(Path(".data/datasets/mnist"))`
97
+
98
+
99
+ ### DatasetRecipe Implicit Form ###
100
+ # Below we demonstrate the difference between implicit and explicit forms of dataset recipes.
101
+ # Example: Get dataset by name with implicit and explicit forms
102
+ dataset = load_dataset("mnist") # Implicit form
103
+ dataset = load_dataset(DatasetRecipe.from_name(name="mnist")) # Explicit form
104
+
105
+ # Example: Get dataset from path with implicit and explicit forms:
106
+ dataset = load_dataset(Path(".data/datasets/mnist")) # Implicit form
107
+ dataset = load_dataset(DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))) # Explicit form
108
+
109
+ # Example: Merge datasets with implicit and explicit forms
110
+ dataset = load_dataset(("mnist", "mnist")) # Implicit form
111
+ dataset = load_dataset( # Explicit form
112
+ DatasetRecipe.from_merger(
113
+ recipes=[
114
+ DatasetRecipe.from_name(name="mnist"),
115
+ DatasetRecipe.from_name(name="mnist"),
116
+ ]
117
+ )
118
+ )
119
+
120
+ # Example: Define a dataset with transformations using implicit and explicit forms
121
+ dataset = load_dataset(["mnist", SelectSamples(n_samples=20), Shuffle()]) # Implicit form
122
+ dataset = load_dataset(DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle()) # Explicit form
123
+
124
+
125
+ # Example: Complex nested example with implicit vs explicit forms
126
+ # Implicit form of a complex dataset recipe
127
+ split_ratio = {"train": 0.8, "val": 0.1, "test": 0.1}
128
+ implicit_recipe = (
129
+ ("mnist", "mnist"),
130
+ [Path(".data/datasets/mnist"), SelectSamples(n_samples=30), SplitsByRatios(split_ratios=split_ratio)],
131
+ ["mnist", SelectSamples(n_samples=20), Shuffle()],
132
+ )
133
+
134
+ # Explicit form of the same complex dataset recipe
135
+ explicit_recipe = DatasetRecipe.from_merger(
136
+ recipes=[
137
+ DatasetRecipe.from_merger(
138
+ recipes=[
139
+ DatasetRecipe.from_name(name="mnist"),
140
+ DatasetRecipe.from_name(name="mnist"),
141
+ ]
142
+ ),
143
+ DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
144
+ .select_samples(n_samples=30)
145
+ .splits_by_ratios(split_ratios=split_ratio),
146
+ DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle(),
147
+ ]
148
+ )
149
+
150
+ # The implicit form uses the following rules:
151
+ # str: Will get a dataset by name -> In explicit form it becomes 'DatasetRecipe.from_name'
152
+ # Path: Will get a dataset from path -> In explicit form it becomes 'DatasetRecipe.from_path'
153
+ # tuple: Will merge datasets specified in the tuple -> In explicit form it becomes 'DatasetRecipe.from_merger'
154
+ # list: Will define a dataset followed by a list of transformations -> In explicit form it becomes chained method calls
155
+ # Generally, we recommend using the explicit form over the implicit form when multiple datasets and transformations are involved.
156
+
157
+
158
+ # To convert from implicit to explicit recipe form, you can use the `from_implicit_form` method.
159
+ explicit_recipe_from_implicit = DatasetRecipe.from_implicit_form(implicit_recipe)
160
+ rprint("Converted explicit recipe:")
161
+ rprint(explicit_recipe_from_implicit)
162
+
163
+ # Verify that the conversion produces the same result
164
+ assert explicit_recipe_from_implicit == explicit_recipe
165
+ rprint("✓ Conversion successful - recipes are equivalent!")
@@ -21,7 +21,7 @@ from hafnia.dataset.primitives.polygon import Polygon
21
21
 
22
22
  # Load dataset
23
23
  path_dataset = get_dataset_path("midwest-vehicle-detection")
24
- dataset = HafniaDataset.read_from_path(path_dataset)
24
+ dataset = HafniaDataset.from_path(path_dataset)
25
25
 
26
26
  # Alternatively, you can use the 'load_dataset' function
27
27
  dataset = load_dataset("midwest-vehicle-detection")
@@ -39,14 +39,15 @@ dataset.print_stats()
39
39
  # Create a dataset split for training
40
40
  dataset_train = dataset.create_split_dataset("train")
41
41
 
42
- # Checkout built-in transformations in 'dataset_transformation' or 'HafniaDataset'
42
+ # Checkout built-in transformations in 'operations/dataset_transformations' or 'HafniaDataset'
43
43
  dataset_val = dataset.create_split_dataset(SplitName.VAL) # Use 'SplitName' to avoid magic strings
44
44
 
45
- small_dataset = dataset.sample(n_samples=10, seed=42) # Sample 10 samples from the dataset
45
+
46
+ small_dataset = dataset.select_samples(n_samples=10, seed=42) # Selects 10 samples from the dataset
46
47
  shuffled_dataset = dataset.shuffle(seed=42) # Shuffle the dataset
47
48
 
48
49
  split_ratios = {SplitName.TRAIN: 0.8, SplitName.VAL: 0.1, SplitName.TEST: 0.1}
49
- new_dataset_splits = dataset.split_by_ratios(split_ratios)
50
+ new_dataset_splits = dataset.splits_by_ratios(split_ratios)
50
51
 
51
52
  # Write dataset to disk
52
53
  path_tmp = Path(".data/tmp")
@@ -54,7 +55,7 @@ path_dataset = path_tmp / "hafnia_dataset"
54
55
  dataset.write(path_dataset) # --> Check that data is human readable
55
56
 
56
57
  # Load dataset from disk
57
- dataset_again = HafniaDataset.read_from_path(path_dataset)
58
+ dataset_again = HafniaDataset.from_path(path_dataset)
58
59
 
59
60
  # Access the first sample in the training split - data is stored in a dictionary
60
61
  sample_dict = dataset_train[0]
@@ -1,3 +1,5 @@
1
+ from pathlib import Path
2
+
1
3
  import torch
2
4
  import torchvision
3
5
  import torchvision.transforms.functional
@@ -43,11 +45,14 @@ if __name__ == "__main__":
43
45
  image, targets = train_dataset[0]
44
46
  visualize_image = torch_helpers.draw_image_and_targets(image=image, targets=targets)
45
47
  pil_image = torchvision.transforms.functional.to_pil_image(visualize_image)
46
- pil_image.save("visualized_labels.png")
48
+
49
+ path_tmp = Path(".data/tmp")
50
+ path_tmp.mkdir(parents=True, exist_ok=True)
51
+ pil_image.save(path_tmp / "visualized_labels.png")
47
52
 
48
53
  # Create DataLoaders - using TorchVisionCollateFn
49
54
  collate_fn = torch_helpers.TorchVisionCollateFn()
50
- train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)
55
+ train_loader = DataLoader(train_dataset, batch_size=5, shuffle=True, collate_fn=collate_fn)
51
56
 
52
57
  for images, targets in train_loader:
53
58
  print(f"Batch of images: {len(images)}")
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hafnia"
3
- version = "0.2.0"
3
+ version = "0.2.2"
4
4
  description = "Python SDK for communication with Hafnia platform."
5
5
  readme = "README.md"
6
6
  authors = [
@@ -80,7 +80,7 @@ class Config:
80
80
  def __init__(self, config_path: Optional[Path] = None) -> None:
81
81
  self.config_path = self.resolve_config_path(config_path)
82
82
  self.config_path.parent.mkdir(parents=True, exist_ok=True)
83
- self.config_data = self.load_config()
83
+ self.config_data = Config.load_config(self.config_path)
84
84
 
85
85
  def resolve_config_path(self, path: Optional[Path] = None) -> Path:
86
86
  if path:
@@ -111,12 +111,25 @@ class Config:
111
111
  endpoint = self.config.platform_url + PLATFORM_API_MAPPING[method]
112
112
  return endpoint
113
113
 
114
- def load_config(self) -> ConfigFileSchema:
114
+ @staticmethod
115
+ def load_config(config_path: Path) -> ConfigFileSchema:
115
116
  """Load configuration from file."""
116
- if not self.config_path.exists():
117
+
118
+ # Environment variables has higher priority than config file
119
+ HAFNIA_API_KEY = os.getenv("HAFNIA_API_KEY")
120
+ HAFNIA_PLATFORM_URL = os.getenv("HAFNIA_PLATFORM_URL")
121
+ if HAFNIA_API_KEY and HAFNIA_PLATFORM_URL:
122
+ HAFNIA_PROFILE_NAME = os.getenv("HAFNIA_PROFILE_NAME", "default").strip()
123
+ cfg = ConfigFileSchema(
124
+ active_profile=HAFNIA_PROFILE_NAME,
125
+ profiles={HAFNIA_PROFILE_NAME: ConfigSchema(platform_url=HAFNIA_PLATFORM_URL, api_key=HAFNIA_API_KEY)},
126
+ )
127
+ return cfg
128
+
129
+ if not config_path.exists():
117
130
  return ConfigFileSchema()
118
131
  try:
119
- with open(self.config_path.as_posix(), "r") as f:
132
+ with open(config_path.as_posix(), "r") as f:
120
133
  data = json.load(f)
121
134
  return ConfigFileSchema(**data)
122
135
  except json.JSONDecodeError:
@@ -0,0 +1,23 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Any
4
+
5
+ from hafnia import utils
6
+ from hafnia.dataset.hafnia_dataset import HafniaDataset, get_or_create_dataset_path_from_recipe
7
+
8
+
9
+ def load_dataset(recipe: Any, force_redownload: bool = False) -> HafniaDataset:
10
+ """Load a dataset either from a local path or from the Hafnia platform."""
11
+
12
+ path_dataset = get_dataset_path(recipe, force_redownload=force_redownload)
13
+ dataset = HafniaDataset.from_path(path_dataset)
14
+ return dataset
15
+
16
+
17
+ def get_dataset_path(recipe: Any, force_redownload: bool = False) -> Path:
18
+ if utils.is_hafnia_cloud_job():
19
+ return Path(os.getenv("MDI_DATASET_DIR", "/opt/ml/input/data/training"))
20
+
21
+ path_dataset = get_or_create_dataset_path_from_recipe(recipe, force_redownload=force_redownload)
22
+
23
+ return path_dataset
@@ -1,11 +1,12 @@
1
1
  from enum import Enum
2
2
  from typing import List
3
3
 
4
+ FILENAME_RECIPE_JSON = "recipe.json"
4
5
  FILENAME_DATASET_INFO = "dataset_info.json"
5
6
  FILENAME_ANNOTATIONS_JSONL = "annotations.jsonl"
6
7
  FILENAME_ANNOTATIONS_PARQUET = "annotations.parquet"
7
8
 
8
- DATASET_FILENAMES = [
9
+ DATASET_FILENAMES_REQUIRED = [
9
10
  FILENAME_DATASET_INFO,
10
11
  FILENAME_ANNOTATIONS_JSONL,
11
12
  FILENAME_ANNOTATIONS_PARQUET,