hafnia 0.2.4__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. {hafnia-0.2.4 → hafnia-0.3.0}/.github/workflows/build.yaml +1 -1
  2. {hafnia-0.2.4 → hafnia-0.3.0}/.github/workflows/ci_cd.yaml +2 -1
  3. {hafnia-0.2.4 → hafnia-0.3.0}/.github/workflows/lint.yaml +1 -1
  4. {hafnia-0.2.4 → hafnia-0.3.0}/.github/workflows/publish_docker.yaml +4 -4
  5. {hafnia-0.2.4 → hafnia-0.3.0}/.github/workflows/tests.yaml +14 -3
  6. {hafnia-0.2.4 → hafnia-0.3.0}/.gitignore +1 -1
  7. {hafnia-0.2.4 → hafnia-0.3.0}/.vscode/extensions.json +2 -1
  8. {hafnia-0.2.4 → hafnia-0.3.0}/.vscode/launch.json +30 -2
  9. {hafnia-0.2.4 → hafnia-0.3.0}/PKG-INFO +34 -30
  10. {hafnia-0.2.4 → hafnia-0.3.0}/README.md +31 -29
  11. {hafnia-0.2.4 → hafnia-0.3.0}/examples/example_dataset_recipe.py +27 -22
  12. {hafnia-0.2.4 → hafnia-0.3.0}/examples/example_hafnia_dataset.py +31 -10
  13. {hafnia-0.2.4 → hafnia-0.3.0}/pyproject.toml +4 -1
  14. {hafnia-0.2.4 → hafnia-0.3.0}/src/cli/__main__.py +13 -2
  15. {hafnia-0.2.4 → hafnia-0.3.0}/src/cli/config.py +2 -1
  16. {hafnia-0.2.4 → hafnia-0.3.0}/src/cli/consts.py +1 -1
  17. {hafnia-0.2.4 → hafnia-0.3.0}/src/cli/dataset_cmds.py +6 -14
  18. hafnia-0.3.0/src/cli/dataset_recipe_cmds.py +78 -0
  19. hafnia-0.3.0/src/cli/experiment_cmds.py +243 -0
  20. {hafnia-0.2.4 → hafnia-0.3.0}/src/cli/profile_cmds.py +6 -5
  21. {hafnia-0.2.4 → hafnia-0.3.0}/src/cli/runc_cmds.py +5 -5
  22. hafnia-0.3.0/src/cli/trainer_package_cmds.py +65 -0
  23. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/__init__.py +2 -0
  24. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/data/factory.py +1 -2
  25. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/dataset_helpers.py +0 -12
  26. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/dataset_names.py +8 -4
  27. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/dataset_recipe/dataset_recipe.py +119 -33
  28. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/dataset_recipe/recipe_transforms.py +32 -4
  29. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/dataset_recipe/recipe_types.py +1 -1
  30. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/dataset_upload_helper.py +206 -53
  31. hafnia-0.3.0/src/hafnia/dataset/hafnia_dataset.py +848 -0
  32. hafnia-0.3.0/src/hafnia/dataset/license_types.py +63 -0
  33. hafnia-0.3.0/src/hafnia/dataset/operations/dataset_stats.py +272 -0
  34. hafnia-0.3.0/src/hafnia/dataset/operations/dataset_transformations.py +403 -0
  35. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/operations/table_transformations.py +39 -2
  36. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/primitives/__init__.py +8 -0
  37. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/primitives/classification.py +1 -1
  38. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/experiment/hafnia_logger.py +112 -0
  39. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/http.py +16 -2
  40. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/platform/__init__.py +9 -3
  41. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/platform/builder.py +12 -10
  42. hafnia-0.3.0/src/hafnia/platform/dataset_recipe.py +99 -0
  43. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/platform/datasets.py +44 -6
  44. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/platform/download.py +2 -1
  45. hafnia-0.3.0/src/hafnia/platform/experiment.py +68 -0
  46. hafnia-0.3.0/src/hafnia/platform/trainer_package.py +57 -0
  47. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/utils.py +64 -13
  48. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/visualizations/image_visualizations.py +3 -3
  49. {hafnia-0.2.4 → hafnia-0.3.0}/tests/conftest.py +2 -0
  50. hafnia-0.3.0/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png +0 -0
  51. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_check_dataset[caltech-256].png +0 -0
  52. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_check_dataset[midwest-vehicle-detection].png +0 -0
  53. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_check_dataset[tiny-dataset].png +0 -0
  54. hafnia-0.2.4/tests/data/expected_images/test_visualizations/test_draw_annotations[tiny-dataset].png → hafnia-0.3.0/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-tiny-dataset].png +0 -0
  55. hafnia-0.3.0/tests/data/micro_test_datasets/micro-coco-2017/annotations.jsonl +3 -0
  56. hafnia-0.3.0/tests/data/micro_test_datasets/micro-coco-2017/annotations.parquet +0 -0
  57. {hafnia-0.2.4/tests/data/micro_test_datasets/coco-2017 → hafnia-0.3.0/tests/data/micro_test_datasets/micro-coco-2017}/dataset_info.json +94 -2
  58. hafnia-0.3.0/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.jsonl +3 -0
  59. hafnia-0.3.0/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.parquet +0 -0
  60. {hafnia-0.2.4/tests/data/micro_test_datasets/tiny-dataset → hafnia-0.3.0/tests/data/micro_test_datasets/micro-tiny-dataset}/dataset_info.json +3 -1
  61. hafnia-0.3.0/tests/helper_testing.py +188 -0
  62. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/integration}/test_check_example_scripts.py +1 -1
  63. hafnia-0.3.0/tests/integration/test_cli_integration.py +99 -0
  64. hafnia-0.3.0/tests/integration/test_dataset_merges.py +51 -0
  65. hafnia-0.3.0/tests/integration/test_dataset_recipes_with_platform.py +48 -0
  66. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/integration}/test_samples.py +22 -4
  67. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/unit}/dataset/dataset_recipe/test_dataset_recipes.py +20 -19
  68. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/unit}/dataset/dataset_recipe/test_recipe_transformations.py +55 -25
  69. hafnia-0.3.0/tests/unit/dataset/operations/test_dataset_stats.py +56 -0
  70. hafnia-0.3.0/tests/unit/dataset/operations/test_dataset_transformations.py +312 -0
  71. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/unit}/dataset/operations/test_table_transformations.py +3 -3
  72. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/unit}/dataset/test_hafnia_dataset.py +66 -7
  73. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/unit}/dataset/test_shape_primitives.py +1 -1
  74. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/unit}/test_builder.py +19 -24
  75. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/unit}/test_utils.py +17 -17
  76. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/unit}/test_visualizations.py +3 -4
  77. hafnia-0.3.0/uv.lock +3308 -0
  78. hafnia-0.2.4/src/cli/experiment_cmds.py +0 -60
  79. hafnia-0.2.4/src/cli/recipe_cmds.py +0 -45
  80. hafnia-0.2.4/src/hafnia/dataset/hafnia_dataset.py +0 -610
  81. hafnia-0.2.4/src/hafnia/dataset/operations/dataset_stats.py +0 -15
  82. hafnia-0.2.4/src/hafnia/dataset/operations/dataset_transformations.py +0 -82
  83. hafnia-0.2.4/src/hafnia/platform/experiment.py +0 -73
  84. hafnia-0.2.4/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png +0 -0
  85. hafnia-0.2.4/tests/data/micro_test_datasets/coco-2017/annotations.jsonl +0 -3
  86. hafnia-0.2.4/tests/data/micro_test_datasets/coco-2017/annotations.parquet +0 -0
  87. hafnia-0.2.4/tests/data/micro_test_datasets/tiny-dataset/annotations.jsonl +0 -3
  88. hafnia-0.2.4/tests/data/micro_test_datasets/tiny-dataset/annotations.parquet +0 -0
  89. hafnia-0.2.4/tests/dataset/operations/test_dataset_transformations.py +0 -0
  90. hafnia-0.2.4/tests/helper_testing.py +0 -108
  91. hafnia-0.2.4/uv.lock +0 -1861
  92. {hafnia-0.2.4 → hafnia-0.3.0}/.devcontainer/devcontainer.json +0 -0
  93. {hafnia-0.2.4 → hafnia-0.3.0}/.devcontainer/hooks/post_create +0 -0
  94. {hafnia-0.2.4 → hafnia-0.3.0}/.github/dependabot.yaml +0 -0
  95. {hafnia-0.2.4 → hafnia-0.3.0}/.github/workflows/Dockerfile +0 -0
  96. {hafnia-0.2.4 → hafnia-0.3.0}/.github/workflows/check_release.yaml +0 -0
  97. {hafnia-0.2.4 → hafnia-0.3.0}/.github/workflows/publish_pypi.yaml +0 -0
  98. {hafnia-0.2.4 → hafnia-0.3.0}/.pre-commit-config.yaml +0 -0
  99. {hafnia-0.2.4 → hafnia-0.3.0}/.python-version +0 -0
  100. {hafnia-0.2.4 → hafnia-0.3.0}/.vscode/settings.json +0 -0
  101. {hafnia-0.2.4 → hafnia-0.3.0}/LICENSE +0 -0
  102. {hafnia-0.2.4 → hafnia-0.3.0}/docs/cli.md +0 -0
  103. {hafnia-0.2.4 → hafnia-0.3.0}/docs/release.md +0 -0
  104. {hafnia-0.2.4 → hafnia-0.3.0}/examples/example_logger.py +0 -0
  105. {hafnia-0.2.4 → hafnia-0.3.0}/examples/example_torchvision_dataloader.py +0 -0
  106. {hafnia-0.2.4 → hafnia-0.3.0}/src/cli/__init__.py +0 -0
  107. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/data/__init__.py +0 -0
  108. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/primitives/bbox.py +0 -0
  109. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/primitives/bitmask.py +0 -0
  110. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/primitives/point.py +0 -0
  111. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/primitives/polygon.py +0 -0
  112. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/primitives/primitive.py +0 -0
  113. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/primitives/segmentation.py +0 -0
  114. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/dataset/primitives/utils.py +0 -0
  115. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/experiment/__init__.py +0 -0
  116. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/log.py +0 -0
  117. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/torch_helpers.py +0 -0
  118. {hafnia-0.2.4 → hafnia-0.3.0}/src/hafnia/visualizations/colors.py +0 -0
  119. {hafnia-0.2.4 → hafnia-0.3.0}/tests/__init__.py +0 -0
  120. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_check_dataset[cifar100].png +0 -0
  121. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_check_dataset[cifar10].png +0 -0
  122. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_check_dataset[coco-2017].png +0 -0
  123. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_check_dataset[mnist].png +0 -0
  124. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-101].png +0 -0
  125. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-256].png +0 -0
  126. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar100].png +0 -0
  127. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar10].png +0 -0
  128. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[coco-2017].png +0 -0
  129. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[midwest-vehicle-detection].png +0 -0
  130. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[mnist].png +0 -0
  131. {hafnia-0.2.4 → hafnia-0.3.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[tiny-dataset].png +0 -0
  132. /hafnia-0.2.4/tests/data/expected_images/test_visualizations/test_blur_anonymization[coco-2017].png → /hafnia-0.3.0/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-coco-2017].png +0 -0
  133. /hafnia-0.2.4/tests/data/expected_images/test_visualizations/test_blur_anonymization[tiny-dataset].png → /hafnia-0.3.0/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-tiny-dataset].png +0 -0
  134. /hafnia-0.2.4/tests/data/expected_images/test_visualizations/test_draw_annotations[coco-2017].png → /hafnia-0.3.0/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-coco-2017].png +0 -0
  135. /hafnia-0.2.4/tests/data/expected_images/test_visualizations/test_mask_region[coco-2017].png → /hafnia-0.3.0/tests/data/expected_images/test_visualizations/test_mask_region[micro-coco-2017].png +0 -0
  136. /hafnia-0.2.4/tests/data/expected_images/test_visualizations/test_mask_region[tiny-dataset].png → /hafnia-0.3.0/tests/data/expected_images/test_visualizations/test_mask_region[micro-tiny-dataset].png +0 -0
  137. /hafnia-0.2.4/tests/data/micro_test_datasets/coco-2017/data/4e95c6eb6209880a.jpg → /hafnia-0.3.0/tests/data/micro_test_datasets/micro-coco-2017/data/3b4/3b4165c8c4f830be4e95c6eb6209880a.jpg +0 -0
  138. /hafnia-0.2.4/tests/data/micro_test_datasets/coco-2017/data/cf86c7a23edb55ce.jpg → /hafnia-0.3.0/tests/data/micro_test_datasets/micro-coco-2017/data/837/837b642d8a7b3b8dcf86c7a23edb55ce.jpg +0 -0
  139. /hafnia-0.2.4/tests/data/micro_test_datasets/coco-2017/data/182a2c0a3ce312cf.jpg → /hafnia-0.3.0/tests/data/micro_test_datasets/micro-coco-2017/data/dc8/dc8efc98ce6304fe182a2c0a3ce312cf.jpg +0 -0
  140. /hafnia-0.2.4/tests/data/micro_test_datasets/tiny-dataset/data/3251d85443622e4c.png → /hafnia-0.3.0/tests/data/micro_test_datasets/micro-tiny-dataset/data/3dd/3ddec2275a02e79e3251d85443622e4c.png +0 -0
  141. /hafnia-0.2.4/tests/data/micro_test_datasets/tiny-dataset/data/3657ababa44af9b6.png → /hafnia-0.3.0/tests/data/micro_test_datasets/micro-tiny-dataset/data/4d8/4d8450b045e60e8f3657ababa44af9b6.png +0 -0
  142. /hafnia-0.2.4/tests/data/micro_test_datasets/tiny-dataset/data/222bbd5721a8a86e.png → /hafnia-0.3.0/tests/data/micro_test_datasets/micro-tiny-dataset/data/907/907f182da7bcedb8222bbd5721a8a86e.png +0 -0
  143. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/unit}/dataset/dataset_recipe/test_dataset_recipe_helpers.py +0 -0
  144. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/unit}/dataset/test_colors.py +0 -0
  145. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/unit}/dataset/test_dataset_helpers.py +0 -0
  146. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/unit}/test_cli.py +0 -0
  147. {hafnia-0.2.4/tests → hafnia-0.3.0/tests/unit}/test_hafnia_logger.py +0 -0
@@ -18,7 +18,7 @@ jobs:
18
18
  package-version: ${{ steps.extract-version.outputs.package_version }}
19
19
  steps:
20
20
  - uses: actions/checkout@v5.0.0
21
- - uses: actions/setup-python@v5.6.0
21
+ - uses: actions/setup-python@v6.0.0
22
22
  with:
23
23
  python-version-file: ${{ inputs.python-version-file }}
24
24
 
@@ -21,7 +21,7 @@ jobs:
21
21
  steps:
22
22
  - uses: actions/checkout@v5.0.0
23
23
  - name: Run Trivy vulnerability scanner
24
- uses: aquasecurity/trivy-action@0.32.0
24
+ uses: aquasecurity/trivy-action@0.33.1
25
25
  with:
26
26
  scan-type: 'fs'
27
27
  scan-ref: '.'
@@ -33,6 +33,7 @@ jobs:
33
33
  test:
34
34
  name: Run Tests
35
35
  needs: lint
36
+ secrets: inherit
36
37
  uses: ./.github/workflows/tests.yaml
37
38
  with:
38
39
  python-version-file: "pyproject.toml"
@@ -11,7 +11,7 @@ jobs:
11
11
  runs-on: ubuntu-latest
12
12
  steps:
13
13
  - uses: actions/checkout@v5.0.0
14
- - uses: actions/setup-python@v5.6.0
14
+ - uses: actions/setup-python@v6.0.0
15
15
  with:
16
16
  python-version-file: ${{ inputs.python-version-file }}
17
17
  - uses: pre-commit/action@v3.0.1
@@ -25,7 +25,7 @@ jobs:
25
25
  runs-on: ubuntu-latest
26
26
  steps:
27
27
  - uses: actions/checkout@v5.0.0
28
- - uses: actions/setup-python@v5.6.0
28
+ - uses: actions/setup-python@v6.0.0
29
29
  id: python
30
30
  with:
31
31
  python-version-file: ${{ inputs.python-version-file }}
@@ -47,7 +47,7 @@ jobs:
47
47
  echo "aws_region=${{ secrets.STAGE_AWS_REGION }}" >> $GITHUB_OUTPUT
48
48
  fi
49
49
  - name: Configure AWS credentials
50
- uses: aws-actions/configure-aws-credentials@v4.3.1
50
+ uses: aws-actions/configure-aws-credentials@v5.0.0
51
51
  with:
52
52
  role-to-assume: arn:aws:iam::${{ steps.env-vars.outputs.aws_account_id }}:role/${{ secrets.AWS_ROLE_NAME }}
53
53
  aws-region: ${{ steps.env-vars.outputs.aws_region }}
@@ -63,7 +63,7 @@ jobs:
63
63
  uses: docker/build-push-action@v6.18.0
64
64
  env:
65
65
  ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
66
- ECR_REPOSITORY: mdi-runtime
66
+ ECR_REPOSITORY: platform_sdk_runtime
67
67
  with:
68
68
  context: .
69
69
  file: .github/workflows/Dockerfile
@@ -77,4 +77,4 @@ jobs:
77
77
  cache-from: type=gha
78
78
  cache-to: type=gha,mode=max
79
79
  build-args: |
80
- PYTHON_VERSION=${{ steps.python.outputs.python-version }}
80
+ PYTHON_VERSION=${{ steps.python.outputs.python-version }}
@@ -9,10 +9,13 @@ on:
9
9
  type: string
10
10
  jobs:
11
11
  test:
12
- runs-on: ubuntu-latest
12
+ runs-on: ${{ matrix.os }}
13
+ strategy:
14
+ matrix:
15
+ os: [ubuntu-latest, windows-latest]
13
16
  steps:
14
17
  - uses: actions/checkout@v5.0.0
15
- - uses: actions/setup-python@v5.6.0
18
+ - uses: actions/setup-python@v6.0.0
16
19
  with:
17
20
  python-version-file: ${{ inputs.python-version-file }}
18
21
  - name: Install uv
@@ -21,5 +24,13 @@ jobs:
21
24
  version: 0.6.8
22
25
  - name: Install the project
23
26
  run: uv sync --group dev
27
+ - name: Mount secrets config
28
+ shell: bash
29
+ env:
30
+ HAFNIA_CONFIG: ${{ secrets.HAFNIA_CONFIG }}
31
+ run: |
32
+ mkdir -p ~/.hafnia
33
+ echo "$HAFNIA_CONFIG" | jq . > ~/.hafnia/config.json
24
34
  - name: Run tests
25
- run: uv run pytest tests
35
+ run: uv run pytest tests
36
+
@@ -163,4 +163,4 @@ cython_debug/
163
163
  /pypiprivate/
164
164
  /packaging/
165
165
  /.data/
166
- /recipe.zip
166
+ /trainer.zip
@@ -5,7 +5,8 @@
5
5
  "ms-python.mypy-type-checker",
6
6
  "charliermarsh.ruff",
7
7
  "tamasfe.even-better-toml",
8
- "streetsidesoftware.code-spell-checker"
8
+ "streetsidesoftware.code-spell-checker",
9
+ "ryanluker.vscode-coverage-gutters"
9
10
  ]
10
11
  }
11
12
 
@@ -48,17 +48,45 @@
48
48
  ],
49
49
  },
50
50
  {
51
- "name": "debug (hafnia dataset X)",
51
+ "name": "cmd: 'hafnia dataset [X]'",
52
52
  "type": "debugpy",
53
53
  "request": "launch",
54
54
  "program": "${workspaceFolder}/src/cli/__main__.py",
55
55
  "args": [
56
56
  "dataset",
57
+ //"ls",
57
58
  "download",
58
59
  "mnist",
59
- //"./.data",
60
60
  "--force"
61
61
  ]
62
+ },
63
+ {
64
+ "name": "cmd: 'hafnia experiment [X]'",
65
+ "type": "debugpy",
66
+ "request": "launch",
67
+ "program": "${workspaceFolder}/src/cli/__main__.py",
68
+ "args": [
69
+ "experiment",
70
+ "create",
71
+ // "--trainer-path",
72
+ // "${workspaceFolder}/../trainer-classification",
73
+ //"--trainer-id",
74
+ //"e47d701d-c5ed-4014-9480-434f04e9459b",
75
+ "--trainer-path",
76
+ "${workspaceFolder}/../trainer-classification",
77
+ "--dataset",
78
+ "mnist",
79
+ ]
80
+ },
81
+ {
82
+ "name": "cmd: 'hafnia train-recipe [X]'",
83
+ "type": "debugpy",
84
+ "request": "launch",
85
+ "program": "${workspaceFolder}/src/cli/__main__.py",
86
+ "args": [
87
+ "trainer",
88
+ "ls"
89
+ ]
62
90
  }
63
91
  ]
64
92
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hafnia
3
- Version: 0.2.4
3
+ Version: 0.3.0
4
4
  Summary: Python SDK for communication with Hafnia platform.
5
5
  Author-email: Milestone Systems <hafniaplatform@milestone.dk>
6
6
  License-File: LICENSE
@@ -9,6 +9,7 @@ Requires-Dist: boto3>=1.35.91
9
9
  Requires-Dist: click>=8.1.8
10
10
  Requires-Dist: emoji>=2.14.1
11
11
  Requires-Dist: flatten-dict>=0.4.2
12
+ Requires-Dist: mlflow>=3.2.0
12
13
  Requires-Dist: more-itertools>=10.7.0
13
14
  Requires-Dist: opencv-python-headless>=4.11.0.86
14
15
  Requires-Dist: pathspec>=0.12.1
@@ -19,6 +20,7 @@ Requires-Dist: pycocotools>=2.0.10
19
20
  Requires-Dist: pydantic>=2.10.4
20
21
  Requires-Dist: rich>=13.9.4
21
22
  Requires-Dist: s5cmd>=0.2.0
23
+ Requires-Dist: sagemaker-mlflow>=0.1.0
22
24
  Requires-Dist: seedir>=0.5.0
23
25
  Requires-Dist: tqdm>=4.67.1
24
26
  Requires-Dist: xxhash>=3.5.0
@@ -26,13 +28,13 @@ Description-Content-Type: text/markdown
26
28
 
27
29
  # Hafnia
28
30
 
29
- The `hafnia` python package is a collection of tools to create and run model training recipes on
31
+ The `hafnia` python sdk and cli is a collection of tools to create and run model trainer packages on
30
32
  the [Hafnia Platform](https://hafnia.milestonesys.com/).
31
33
 
32
34
  The package includes the following interfaces:
33
35
 
34
36
  - `cli`: A Command Line Interface (CLI) to 1) configure/connect to Hafnia's [Training-aaS](https://hafnia.readme.io/docs/training-as-a-service) and 2) create and
35
- launch recipe scripts.
37
+ launch trainer packages.
36
38
  - `hafnia`: A python package including `HafniaDataset` to manage datasets and `HafniaLogger` to do
37
39
  experiment tracking.
38
40
 
@@ -42,19 +44,19 @@ experiment tracking.
42
44
  and *hidden* datasets. Hidden datasets refers to datasets that can be used for
43
45
  training, but are not available for download or direct access.
44
46
 
45
- This is a key feature of the Hafnia platform, as a hidden dataset ensures data
47
+ This is a key for the Hafnia platform, as a hidden dataset ensures data
46
48
  privacy, and allow models to be trained compliantly and ethically by third parties (you).
47
49
 
48
50
  The `script2model` approach is a Training-aaS concept, where you package your custom training
49
- script as a *training recipe* and use the recipe to train models on the hidden datasets.
51
+ project or script as a *trainer package* and use the package to train models on the hidden datasets.
50
52
 
51
- To support local development of a training recipe, we have introduced a **sample dataset**
53
+ To support local development of a trainer package, we have introduced a **sample dataset**
52
54
  for each dataset available in the Hafnia [data library](https://hafnia.milestonesys.com/training-aas/datasets). The sample dataset is a small
53
- and anonymized subset of the full dataset and available for download.
55
+ and an anonymized subset of the full dataset and available for download.
54
56
 
55
57
  With the sample dataset, you can seamlessly switch between local development and Training-aaS.
56
- Locally, you can create, validate and debug your training recipe. The recipe is then
57
- launched with Training-aaS, where the recipe runs on the full dataset and can be scaled to run on
58
+ Locally, you can create, validate and debug your trainer package. The trainer package is then
59
+ launched with Training-aaS, where the package runs on the full dataset and can be scaled to run on
58
60
  multiple GPUs and instances if needed.
59
61
 
60
62
  ## Getting started: Configuration
@@ -122,19 +124,19 @@ midwest-vehicle-detection
122
124
  You can interact with data as you want, but we also provide `HafniaDataset`
123
125
  for loading/saving, managing and interacting with the dataset.
124
126
 
125
- We recommend to visit and potentially execute the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py)
126
- to see how to use the `HafniaDataset` class and its methods.
127
+ We recommend the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py)
128
+ for a short introduction on the `HafniaDataset`.
127
129
 
128
130
  Below is a short introduction to the `HafniaDataset` class.
129
131
 
130
132
  ```python
131
133
  from hafnia.dataset.hafnia_dataset import HafniaDataset, Sample
132
134
 
133
- # Load dataset
135
+ # Load dataset from path
134
136
  dataset = HafniaDataset.read_from_path(path_dataset)
135
137
 
136
- # Alternatively, you can use the 'load_dataset' function to download and load dataset in one go.
137
- # dataset = load_dataset("midwest-vehicle-detection")
138
+ # Or get dataset directly by name
139
+ dataset = HafniaDataset.from_name("midwest-vehicle-detection")
138
140
 
139
141
  # Print dataset information
140
142
  dataset.print_stats()
@@ -199,6 +201,8 @@ DatasetInfo(
199
201
  'duration_average': 120.0,
200
202
  ...
201
203
  }
204
+ "format_version": "0.0.2",
205
+ "updated_at": "2025-09-24T21:50:20.231263"
202
206
  )
203
207
  ```
204
208
 
@@ -238,7 +242,7 @@ Sample(
238
242
  height=1080,
239
243
  width=1920,
240
244
  split='train',
241
- is_sample=True,
245
+ tags=["sample"],
242
246
  collection_index=None,
243
247
  collection_id=None,
244
248
  remote_path='s3://mdi-production-midwest-vehicle-detection/sample/data/343403325f27e390.png',
@@ -302,10 +306,10 @@ Sample(
302
306
  )
303
307
  ```
304
308
 
305
- To learn more, view and potentially execute the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py).
309
+ To learn more, we recommend the `HafniaDataset` example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py).
306
310
 
307
311
  ### Dataset Locally vs. Training-aaS
308
- An important feature of `load_dataset` is that it will return the full dataset
312
+ An important feature of `HafniaDataset.from_name` is that it will return the full dataset
309
313
  when loaded with Training-aaS on the Hafnia platform.
310
314
 
311
315
  This enables seamlessly switching between running/validating a training script
@@ -316,7 +320,7 @@ Available datasets with corresponding sample datasets can be found in [data libr
316
320
 
317
321
 
318
322
  ## Getting started: Experiment Tracking with HafniaLogger
319
- The `HafniaLogger` is an important part of the recipe script and enables you to track, log and
323
+ The `HafniaLogger` is an important part of the trainer and enables you to track, log and
320
324
  reproduce your experiments.
321
325
 
322
326
  When integrated into your training script, the `HafniaLogger` is responsible for collecting:
@@ -422,25 +426,25 @@ train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=
422
426
 
423
427
 
424
428
  ## Example: Training-aaS
425
- By combining logging and dataset loading, we can now construct our model training recipe.
429
+ By combining logging and dataset loading, we can now construct our model trainer package.
426
430
 
427
- To demonstrate this, we have provided a recipe project that serves as a template for creating and structuring training recipes
428
- [recipe-classification](https://github.com/milestone-hafnia/recipe-classification)
431
+ To demonstrate this, we have provided a trainer package project that serves as a template for creating and structuring trainers. The example repo is called
432
+ [trainer-classification](https://github.com/milestone-hafnia/trainer-classification)
429
433
 
430
- The project also contains additional information on how to structure your training recipe, use the `HafniaLogger`, the `load_dataset` function and different approach for launching
431
- the training recipe on the Hafnia platform.
434
+ The project also contains additional information on how to structure your trainer package, use the `HafniaLogger`, loading a dataset and different approach for launching
435
+ the trainer on the Hafnia platform.
432
436
 
433
437
 
434
- ## Create, Build and Run `recipe.zip` locally
435
- In order to test recipe compatibility with Hafnia cloud use the following command to build and
438
+ ## Create, Build and Run `trainer.zip` locally
439
+ In order to test trainer package compatibility with Hafnia cloud use the following command to build and
436
440
  start the job locally.
437
441
 
438
442
  ```bash
439
- # Create 'recipe.zip' from source folder '.'
440
- hafnia recipe create .
441
-
442
- # Build the docker image locally from a 'recipe.zip' file
443
- hafnia runc build-local recipe.zip
443
+ # Create 'trainer.zip' in the root folder of your training trainer project '../trainer/classification'
444
+ hafnia trainer create-zip ../trainer-classification
445
+
446
+ # Build the docker image locally from a 'trainer.zip' file
447
+ hafnia runc build-local trainer.zip
444
448
 
445
449
  # Execute the docker image locally with a desired dataset
446
450
  hafnia runc launch-local --dataset mnist "python scripts/train.py"
@@ -1,12 +1,12 @@
1
1
  # Hafnia
2
2
 
3
- The `hafnia` python package is a collection of tools to create and run model training recipes on
3
+ The `hafnia` python sdk and cli is a collection of tools to create and run model trainer packages on
4
4
  the [Hafnia Platform](https://hafnia.milestonesys.com/).
5
5
 
6
6
  The package includes the following interfaces:
7
7
 
8
8
  - `cli`: A Command Line Interface (CLI) to 1) configure/connect to Hafnia's [Training-aaS](https://hafnia.readme.io/docs/training-as-a-service) and 2) create and
9
- launch recipe scripts.
9
+ launch trainer packages.
10
10
  - `hafnia`: A python package including `HafniaDataset` to manage datasets and `HafniaLogger` to do
11
11
  experiment tracking.
12
12
 
@@ -16,19 +16,19 @@ experiment tracking.
16
16
  and *hidden* datasets. Hidden datasets refers to datasets that can be used for
17
17
  training, but are not available for download or direct access.
18
18
 
19
- This is a key feature of the Hafnia platform, as a hidden dataset ensures data
19
+ This is a key for the Hafnia platform, as a hidden dataset ensures data
20
20
  privacy, and allow models to be trained compliantly and ethically by third parties (you).
21
21
 
22
22
  The `script2model` approach is a Training-aaS concept, where you package your custom training
23
- script as a *training recipe* and use the recipe to train models on the hidden datasets.
23
+ project or script as a *trainer package* and use the package to train models on the hidden datasets.
24
24
 
25
- To support local development of a training recipe, we have introduced a **sample dataset**
25
+ To support local development of a trainer package, we have introduced a **sample dataset**
26
26
  for each dataset available in the Hafnia [data library](https://hafnia.milestonesys.com/training-aas/datasets). The sample dataset is a small
27
- and anonymized subset of the full dataset and available for download.
27
+ and an anonymized subset of the full dataset and available for download.
28
28
 
29
29
  With the sample dataset, you can seamlessly switch between local development and Training-aaS.
30
- Locally, you can create, validate and debug your training recipe. The recipe is then
31
- launched with Training-aaS, where the recipe runs on the full dataset and can be scaled to run on
30
+ Locally, you can create, validate and debug your trainer package. The trainer package is then
31
+ launched with Training-aaS, where the package runs on the full dataset and can be scaled to run on
32
32
  multiple GPUs and instances if needed.
33
33
 
34
34
  ## Getting started: Configuration
@@ -96,19 +96,19 @@ midwest-vehicle-detection
96
96
  You can interact with data as you want, but we also provide `HafniaDataset`
97
97
  for loading/saving, managing and interacting with the dataset.
98
98
 
99
- We recommend to visit and potentially execute the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py)
100
- to see how to use the `HafniaDataset` class and its methods.
99
+ We recommend the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py)
100
+ for a short introduction on the `HafniaDataset`.
101
101
 
102
102
  Below is a short introduction to the `HafniaDataset` class.
103
103
 
104
104
  ```python
105
105
  from hafnia.dataset.hafnia_dataset import HafniaDataset, Sample
106
106
 
107
- # Load dataset
107
+ # Load dataset from path
108
108
  dataset = HafniaDataset.read_from_path(path_dataset)
109
109
 
110
- # Alternatively, you can use the 'load_dataset' function to download and load dataset in one go.
111
- # dataset = load_dataset("midwest-vehicle-detection")
110
+ # Or get dataset directly by name
111
+ dataset = HafniaDataset.from_name("midwest-vehicle-detection")
112
112
 
113
113
  # Print dataset information
114
114
  dataset.print_stats()
@@ -173,6 +173,8 @@ DatasetInfo(
173
173
  'duration_average': 120.0,
174
174
  ...
175
175
  }
176
+ "format_version": "0.0.2",
177
+ "updated_at": "2025-09-24T21:50:20.231263"
176
178
  )
177
179
  ```
178
180
 
@@ -212,7 +214,7 @@ Sample(
212
214
  height=1080,
213
215
  width=1920,
214
216
  split='train',
215
- is_sample=True,
217
+ tags=["sample"],
216
218
  collection_index=None,
217
219
  collection_id=None,
218
220
  remote_path='s3://mdi-production-midwest-vehicle-detection/sample/data/343403325f27e390.png',
@@ -276,10 +278,10 @@ Sample(
276
278
  )
277
279
  ```
278
280
 
279
- To learn more, view and potentially execute the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py).
281
+ To learn more, we recommend the `HafniaDataset` example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py).
280
282
 
281
283
  ### Dataset Locally vs. Training-aaS
282
- An important feature of `load_dataset` is that it will return the full dataset
284
+ An important feature of `HafniaDataset.from_name` is that it will return the full dataset
283
285
  when loaded with Training-aaS on the Hafnia platform.
284
286
 
285
287
  This enables seamlessly switching between running/validating a training script
@@ -290,7 +292,7 @@ Available datasets with corresponding sample datasets can be found in [data libr
290
292
 
291
293
 
292
294
  ## Getting started: Experiment Tracking with HafniaLogger
293
- The `HafniaLogger` is an important part of the recipe script and enables you to track, log and
295
+ The `HafniaLogger` is an important part of the trainer and enables you to track, log and
294
296
  reproduce your experiments.
295
297
 
296
298
  When integrated into your training script, the `HafniaLogger` is responsible for collecting:
@@ -396,25 +398,25 @@ train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=
396
398
 
397
399
 
398
400
  ## Example: Training-aaS
399
- By combining logging and dataset loading, we can now construct our model training recipe.
401
+ By combining logging and dataset loading, we can now construct our model trainer package.
400
402
 
401
- To demonstrate this, we have provided a recipe project that serves as a template for creating and structuring training recipes
402
- [recipe-classification](https://github.com/milestone-hafnia/recipe-classification)
403
+ To demonstrate this, we have provided a trainer package project that serves as a template for creating and structuring trainers. The example repo is called
404
+ [trainer-classification](https://github.com/milestone-hafnia/trainer-classification)
403
405
 
404
- The project also contains additional information on how to structure your training recipe, use the `HafniaLogger`, the `load_dataset` function and different approach for launching
405
- the training recipe on the Hafnia platform.
406
+ The project also contains additional information on how to structure your trainer package, use the `HafniaLogger`, loading a dataset and different approach for launching
407
+ the trainer on the Hafnia platform.
406
408
 
407
409
 
408
- ## Create, Build and Run `recipe.zip` locally
409
- In order to test recipe compatibility with Hafnia cloud use the following command to build and
410
+ ## Create, Build and Run `trainer.zip` locally
411
+ In order to test trainer package compatibility with Hafnia cloud use the following command to build and
410
412
  start the job locally.
411
413
 
412
414
  ```bash
413
- # Create 'recipe.zip' from source folder '.'
414
- hafnia recipe create .
415
-
416
- # Build the docker image locally from a 'recipe.zip' file
417
- hafnia runc build-local recipe.zip
415
+ # Create 'trainer.zip' in the root folder of your training trainer project '../trainer/classification'
416
+ hafnia trainer create-zip ../trainer-classification
417
+
418
+ # Build the docker image locally from a 'trainer.zip' file
419
+ hafnia runc build-local trainer.zip
418
420
 
419
421
  # Execute the docker image locally with a desired dataset
420
422
  hafnia runc launch-local --dataset mnist "python scripts/train.py"
@@ -2,6 +2,7 @@ from pathlib import Path
2
2
 
3
3
  from rich import print as rprint
4
4
 
5
+ from hafnia import utils
5
6
  from hafnia.data.factory import load_dataset
6
7
  from hafnia.dataset.dataset_recipe.dataset_recipe import DatasetRecipe
7
8
  from hafnia.dataset.dataset_recipe.recipe_transforms import (
@@ -15,10 +16,6 @@ from hafnia.dataset.hafnia_dataset import HafniaDataset
15
16
  # A DatasetRecipe is a recipe for the dataset you want to create.
16
17
  # The recipe itself is not executed - this is just a specification of the dataset you want!
17
18
 
18
- # A DatasetRecipe is an important concept in Hafnia as it allows you to merge multiple datasets
19
- # and transformations in a single recipe. This is especially useful for Training as a Service (TaaS)
20
- # where you need to define the dataset you want as a configuration and load it in the TaaS platform.
21
-
22
19
  # The 'DatasetRecipe' interface is similar to the 'HafniaDataset' interface.
23
20
  # To demonstrate, we will first create a dataset with the regular 'HafniaDataset' interface.
24
21
  # This line will get the "mnist" dataset, shuffle it, and select 20 samples.
@@ -34,30 +31,38 @@ dataset = dataset_recipe.build()
34
31
  # You can print the dataset recipe to the operations that were applied to it.
35
32
  rprint(dataset_recipe)
36
33
 
37
- # Or as a JSON string:
38
- json_str: str = dataset_recipe.as_json_str()
39
- rprint(json_str)
40
-
41
- # This is an important feature of a 'DatasetRecipe' it only registers operations and that the recipe itself
42
- # - and not the dataset - can be saved as a file and loaded from file.
43
- # Meaning you can easily save, share, load and build the dataset later or in a different environment.
44
- # For TaaS, this is the only way to include multiple datasets during training.
45
-
34
+ # The key for recipes is that they can be saved and loaded as a JSON.
35
+ # This also allows the recipe to be saved, shared, loaded and used later to build a dataset
36
+ # in a different environment.
46
37
 
47
- # 2) The recipe can be loaded from json string
48
- dataset_recipe_again: DatasetRecipe = DatasetRecipe.from_json_str(json_str)
49
- # dataset_recipe_again.build()
38
+ # Example: Saving and loading a dataset recipe from file.
39
+ path_recipe = Path(".data/dataset_recipes/example_recipe.json")
40
+ json_str: str = dataset_recipe.as_json_file(path_recipe)
41
+ dataset_recipe_again: DatasetRecipe = DatasetRecipe.from_json_file(path_recipe)
50
42
 
51
- # We can verify that the loaded recipe is the same as the original recipe.
43
+ # Verify that the loaded recipe is identical to the original recipe.
52
44
  assert dataset_recipe_again == dataset_recipe
53
45
 
54
- # Additionally, you can get the python code for creating the same recipe.
46
+ # It is also possible to generate the recipe as python code
55
47
  dataset_recipe.as_python_code()
56
48
 
57
- # Example: DatasetRecipe from Path
58
- dataset_recipe = DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
49
+ # The recipe also allows you to combine multiple datasets and transformations that can be
50
+ # executed in the TaaS platform. This is demonstrated below:
51
+ if utils.is_hafnia_configured(): # First ensure you are connected to the hafnia platform
52
+ # Upload the dataset recipe - this will make it available for TaaS and for users of your organization
53
+ dataset_recipe.as_platform_recipe(recipe_name="example-mnist-recipe")
54
+
55
+ # The recipe is now available in TaaS, for different environments and other users in your organization
56
+ dataset_recipe_again = DatasetRecipe.from_recipe_name(name="example-mnist-recipe")
57
+
58
+ # Launch an experiment with the dataset recipe using the CLI:
59
+ # hafnia experiment create --dataset-recipe example-mnist-recipe --trainer-path ../trainer-classification
60
+
61
+ # Coming soon: Dataset recipes will be included in the web platform to them to be shared, managed
62
+ # and used in experiments.
59
63
 
60
- # Example: DatasetRecipe by merging multiple dataset recipes
64
+ ### More examples dataset recipes ###
65
+ # Example: 'DatasetRecipe' by merging multiple dataset recipes
61
66
  dataset_recipe = DatasetRecipe.from_merger(
62
67
  recipes=[
63
68
  DatasetRecipe.from_name(name="mnist"),
@@ -166,4 +171,4 @@ rprint(explicit_recipe_from_implicit)
166
171
 
167
172
  # Verify that the conversion produces the same result
168
173
  assert explicit_recipe_from_implicit == explicit_recipe
169
- rprint("Conversion successful - recipes are equivalent!")
174
+ rprint("Conversion successful - recipes are equivalent!")
@@ -5,7 +5,7 @@ import numpy as np
5
5
  from PIL import Image
6
6
  from rich import print as rprint
7
7
 
8
- from hafnia.data import get_dataset_path, load_dataset
8
+ from hafnia.data import load_dataset
9
9
  from hafnia.dataset.dataset_names import SplitName
10
10
  from hafnia.dataset.hafnia_dataset import DatasetInfo, HafniaDataset, Sample, TaskInfo
11
11
  from hafnia.dataset.primitives.bbox import Bbox
@@ -20,11 +20,7 @@ from hafnia.dataset.primitives.polygon import Polygon
20
20
  # hafnia configure
21
21
 
22
22
  # Load dataset
23
- path_dataset = get_dataset_path("midwest-vehicle-detection")
24
- dataset = HafniaDataset.from_path(path_dataset)
25
-
26
- # Alternatively, you can use the 'load_dataset' function
27
- dataset = load_dataset("midwest-vehicle-detection")
23
+ dataset = HafniaDataset.from_name("mnist")
28
24
 
29
25
  # Dataset information is stored in 'dataset.info'
30
26
  rprint(dataset.info)
@@ -33,7 +29,13 @@ rprint(dataset.info)
33
29
  dataset.samples.head(2)
34
30
 
35
31
  # Print dataset information
36
- dataset.print_stats()
32
+ dataset.print_sample_and_task_counts()
33
+ dataset.print_class_distribution()
34
+ dataset.print_stats() # Print verbose dataset statistics
35
+
36
+ # Get dataset stats
37
+ dataset.class_counts_all() # Get class counts for all tasks
38
+ dataset.class_counts_for_task(primitive=Classification) # Get class counts for a specific task
37
39
 
38
40
  # Create a dataset split for training
39
41
  dataset_train = dataset.create_split_dataset("train")
@@ -41,13 +43,32 @@ dataset_train = dataset.create_split_dataset("train")
41
43
  # Checkout built-in transformations in 'operations/dataset_transformations' or 'HafniaDataset'
42
44
  dataset_val = dataset.create_split_dataset(SplitName.VAL) # Use 'SplitName' to avoid magic strings
43
45
 
44
-
45
46
  small_dataset = dataset.select_samples(n_samples=10, seed=42) # Selects 10 samples from the dataset
46
47
  shuffled_dataset = dataset.shuffle(seed=42) # Shuffle the dataset
47
48
 
49
+ # Create dataset splits by ratios
48
50
  split_ratios = {SplitName.TRAIN: 0.8, SplitName.VAL: 0.1, SplitName.TEST: 0.1}
49
51
  new_dataset_splits = dataset.splits_by_ratios(split_ratios)
50
52
 
53
+ # Get only samples with specific class names
54
+ dataset_ones = dataset.select_samples_by_class_name(name="1 - one", primitive=Classification)
55
+
56
+ # Rename class names with mapping
57
+ class_mapping_strict = {
58
+ "0 - zero": "even", # "0 - zero" will be renamed to "even". "even" appear first and get class index 0
59
+ "1 - one": "odd", # "1 - one" will be renamed to "odd". "odd" appear second and will get class index 1
60
+ "2 - two": "even",
61
+ "3 - three": "odd",
62
+ "4 - four": "even",
63
+ "5 - five": "odd",
64
+ "6 - six": "even",
65
+ "7 - seven": "odd",
66
+ "8 - eight": "even",
67
+ "9 - nine": "__REMOVE__", # Remove all samples with class "9 - nine"
68
+ }
69
+ dataset_mapped = dataset.class_mapper(class_mapping=class_mapping_strict)
70
+ dataset_mapped.print_class_distribution()
71
+
51
72
  # Support Chaining Operations (load, shuffle, select samples)
52
73
  dataset = load_dataset("midwest-vehicle-detection").shuffle(seed=42).select_samples(n_samples=10)
53
74
 
@@ -70,7 +91,6 @@ class_counts = dataset.samples[Classification.column_name()].explode().struct.fi
70
91
  class_counts = dataset.samples[Bbox.column_name()].explode().struct.field("class_name").value_counts()
71
92
  rprint(dict(class_counts.iter_rows()))
72
93
 
73
-
74
94
  # Access the first sample in the training split - data is stored in a dictionary
75
95
  sample_dict = dataset_train[0]
76
96
 
@@ -86,6 +106,7 @@ bitmasks: List[Bitmask] = sample.bitmasks # Use 'sample.bitmasks' to access bit
86
106
  polygons: List[Polygon] = sample.polygons # Use 'sample.polygons' to access polygons as a list of Polygon objects
87
107
  classifications: List[Classification] = sample.classifications # As a list of Classification objects
88
108
 
109
+
89
110
  # Read image using the sample object
90
111
  image: np.ndarray = sample.read_image()
91
112
 
@@ -107,7 +128,7 @@ for i_fake_sample in range(5):
107
128
  height=480,
108
129
  width=640,
109
130
  split="train",
110
- is_sample=True,
131
+ tags=["sample"],
111
132
  objects=bboxes,
112
133
  classifications=classifications,
113
134
  )