hafnia 0.5.0__tar.gz → 0.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/build.yaml +3 -3
- {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/check_release.yaml +1 -1
- {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/ci_cd.yaml +1 -1
- {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/lint.yaml +2 -2
- {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/publish_docker.yaml +4 -4
- {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/publish_pypi.yaml +1 -1
- {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/tests.yaml +3 -5
- {hafnia-0.5.0 → hafnia-0.5.1}/PKG-INFO +2 -2
- {hafnia-0.5.0 → hafnia-0.5.1}/examples/example_dataset_recipe.py +15 -87
- {hafnia-0.5.0 → hafnia-0.5.1}/examples/example_hafnia_dataset.py +15 -3
- {hafnia-0.5.0 → hafnia-0.5.1}/examples/example_torchvision_dataloader.py +2 -1
- {hafnia-0.5.0 → hafnia-0.5.1}/pyproject.toml +2 -2
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/dataset_helpers.py +59 -1
- hafnia-0.5.1/src/hafnia/dataset/dataset_names.py +123 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/dataset_recipe/dataset_recipe.py +48 -4
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/format_conversions/torchvision_datasets.py +2 -2
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/hafnia_dataset.py +163 -69
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/hafnia_dataset_types.py +142 -18
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/operations/dataset_s3_storage.py +7 -2
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/operations/table_transformations.py +0 -18
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/datasets.py +32 -132
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/download.py +1 -1
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/s5cmd_utils.py +122 -3
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/dataset_cmds.py +19 -13
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/runc_cmds.py +7 -2
- hafnia-0.5.1/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png +0 -0
- hafnia-0.5.1/tests/data/expected_images/test_samples/test_check_dataset[caltech-256].png +0 -0
- hafnia-0.5.1/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-101].png +0 -0
- hafnia-0.5.1/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-256].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/helper_testing.py +25 -14
- hafnia-0.5.1/tests/helper_testing_datasets.py +73 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/integration/test_bring_your_own_data.py +9 -8
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/integration/test_cli_integration.py +6 -3
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/integration/test_dataset_merges.py +10 -7
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/integration/test_dataset_recipes_with_platform.py +9 -6
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/integration/test_samples.py +57 -22
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/dataset_recipe/test_dataset_recipe_helpers.py +37 -28
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/dataset_recipe/test_dataset_recipes.py +16 -7
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/test_dataset_helpers.py +44 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/test_hafnia_dataset.py +8 -8
- hafnia-0.5.1/tests/unit/dataset/test_hafnia_dataset_types.py +61 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/uv.lock +5 -5
- hafnia-0.5.0/src/hafnia/dataset/dataset_names.py +0 -230
- hafnia-0.5.0/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png +0 -0
- hafnia-0.5.0/tests/data/expected_images/test_samples/test_check_dataset[caltech-256].png +0 -0
- hafnia-0.5.0/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-101].png +0 -0
- hafnia-0.5.0/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-256].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/.devcontainer/devcontainer.json +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/.devcontainer/hooks/post_create +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/.github/dependabot.yaml +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/Dockerfile +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/.gitignore +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/.pre-commit-config.yaml +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/.python-version +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/.trivyignore +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/.vscode/extensions.json +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/.vscode/launch.json +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/.vscode/settings.json +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/LICENSE +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/README.md +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/docs/cli.md +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/docs/release.md +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/examples/example_logger.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/__init__.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/data/__init__.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/data/factory.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/dataset_details_uploader.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/dataset_recipe/recipe_transforms.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/dataset_recipe/recipe_types.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/format_conversions/format_coco.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/format_conversions/format_helpers.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/format_conversions/format_image_classification_folder.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/format_conversions/format_yolo.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/license_types.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/operations/dataset_stats.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/operations/dataset_transformations.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/__init__.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/bbox.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/bitmask.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/classification.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/point.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/polygon.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/primitive.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/segmentation.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/primitives/utils.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/experiment/__init__.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/experiment/hafnia_logger.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/http.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/log.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/__init__.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/builder.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/dataset_recipe.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/experiment.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/platform/trainer_package.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/torch_helpers.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/utils.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/visualizations/colors.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/visualizations/image_visualizations.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/__init__.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/__main__.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/config.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/consts.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/dataset_recipe_cmds.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/experiment_cmds.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/keychain.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/profile_cmds.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia_cli/trainer_package_cmds.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/__init__.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/conftest.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_coco_roboflow/train/000000000632.jpg +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_coco_roboflow/train/000000000724.jpg +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_coco_roboflow/train/_annotations.coco.json +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_coco_roboflow/valid/000000000139.jpg +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_coco_roboflow/valid/000000000285.jpg +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_coco_roboflow/valid/_annotations.coco.json +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/obj.names +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/train/data/000000000139.jpg +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/train/data/000000000139.txt +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/train/data/000000000285.jpg +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/train/data/000000000285.txt +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/train/images.txt +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/validation/data/000000000632.jpg +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/validation/data/000000000632.txt +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_formats/format_yolo/validation/images.txt +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/dataset_image_metadata_schema.yaml +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_dataset_transformations/test_video_storage_format_read_image.png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[polygon].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_as_ints].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_compressed_bytes].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_compressed_str].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_coco/test_from_coco_format_visualized.png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_coco/test_to_coco_format_visualized.png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_yolo/test_format_yolo_import_export_tiny_dataset.png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_format_yolo/test_import_yolo_format_visualized.png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_check_dataset[cifar100].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_check_dataset[cifar10].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_check_dataset[coco-2017].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_check_dataset[midwest-vehicle-detection].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_check_dataset[mnist].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_check_dataset[tiny-dataset].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar100].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar10].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[coco-2017].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[midwest-vehicle-detection].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[mnist].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[tiny-dataset].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-coco-2017].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-tiny-dataset].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-coco-2017].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-tiny-dataset].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_visualizations/test_mask_region[micro-coco-2017].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_visualizations/test_mask_region[micro-tiny-dataset].png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/expected_images/test_visualizations/test_polygon_to_bitmask_conversion.png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-coco-2017/annotations.jsonl +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-coco-2017/annotations.parquet +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-coco-2017/data/253/253925d334c002ce6662d8133535dd4c.jpg +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-coco-2017/data/b1a/b1a09f4d922f8f6904bab0c1caf172ab.jpg +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-coco-2017/data/f67/f675c8a1e862b5e00203ab888ac7fff4.jpg +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-coco-2017/dataset_info.json +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.jsonl +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.parquet +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-tiny-dataset/data/25c/25c3a206e7b60ab50245ee3d52d97f11.png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-tiny-dataset/data/962/962fd865fdd45f169d5ca8c8f284d68d.png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-tiny-dataset/data/ec6/ec60f2f4fb854b59c97e16b45c713de0.png +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/data/micro_test_datasets/micro-tiny-dataset/dataset_info.json +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/integration/test_check_example_scripts.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/integration/test_torchvision_datasets.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/dataset_recipe/test_recipe_transformations.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/format_conversions/test_format_coco.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/format_conversions/test_format_image_classification_folder.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/format_conversions/test_format_yolo.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/operations/test_dataset_stats.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/operations/test_dataset_transformations.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/operations/test_table_transformations.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/test_colors.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/test_dataset_details_uploader.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/test_dataset_names.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/dataset/test_shape_primitives.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/test_builder.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/test_cli.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/test_hafnia_logger.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/test_utils.py +0 -0
- {hafnia-0.5.0 → hafnia-0.5.1}/tests/unit/test_visualizations.py +0 -0
|
@@ -21,8 +21,8 @@ jobs:
|
|
|
21
21
|
outputs:
|
|
22
22
|
package-version: ${{ steps.extract-version.outputs.package_version }}
|
|
23
23
|
steps:
|
|
24
|
-
- uses: actions/checkout@
|
|
25
|
-
- uses: actions/setup-python@v6.
|
|
24
|
+
- uses: actions/checkout@v6.0.1
|
|
25
|
+
- uses: actions/setup-python@v6.1.0
|
|
26
26
|
with:
|
|
27
27
|
python-version-file: ${{ inputs.python-version-file }}
|
|
28
28
|
|
|
@@ -49,7 +49,7 @@ jobs:
|
|
|
49
49
|
run: uv build
|
|
50
50
|
|
|
51
51
|
- name: Upload package artifact
|
|
52
|
-
uses: actions/upload-artifact@
|
|
52
|
+
uses: actions/upload-artifact@v6.0.0
|
|
53
53
|
with:
|
|
54
54
|
name: python-package
|
|
55
55
|
path: dist/
|
|
@@ -10,8 +10,8 @@ jobs:
|
|
|
10
10
|
lint:
|
|
11
11
|
runs-on: ubuntu-latest
|
|
12
12
|
steps:
|
|
13
|
-
- uses: actions/checkout@
|
|
14
|
-
- uses: actions/setup-python@v6.
|
|
13
|
+
- uses: actions/checkout@v6.0.1
|
|
14
|
+
- uses: actions/setup-python@v6.1.0
|
|
15
15
|
with:
|
|
16
16
|
python-version-file: ${{ inputs.python-version-file }}
|
|
17
17
|
- uses: pre-commit/action@v3.0.1
|
|
@@ -24,14 +24,14 @@ jobs:
|
|
|
24
24
|
build:
|
|
25
25
|
runs-on: ubuntu-latest
|
|
26
26
|
steps:
|
|
27
|
-
- uses: actions/checkout@
|
|
28
|
-
- uses: actions/setup-python@v6.
|
|
27
|
+
- uses: actions/checkout@v6.0.1
|
|
28
|
+
- uses: actions/setup-python@v6.1.0
|
|
29
29
|
id: python
|
|
30
30
|
with:
|
|
31
31
|
python-version-file: ${{ inputs.python-version-file }}
|
|
32
32
|
|
|
33
33
|
- name: Download package artifact
|
|
34
|
-
uses: actions/download-artifact@
|
|
34
|
+
uses: actions/download-artifact@v7.0.0
|
|
35
35
|
with:
|
|
36
36
|
name: python-package
|
|
37
37
|
path: dist/
|
|
@@ -47,7 +47,7 @@ jobs:
|
|
|
47
47
|
echo "aws_region=${{ secrets.STAGE_AWS_REGION }}" >> $GITHUB_OUTPUT
|
|
48
48
|
fi
|
|
49
49
|
- name: Configure AWS credentials
|
|
50
|
-
uses: aws-actions/configure-aws-credentials@v5.1.
|
|
50
|
+
uses: aws-actions/configure-aws-credentials@v5.1.1
|
|
51
51
|
with:
|
|
52
52
|
role-to-assume: arn:aws:iam::${{ steps.env-vars.outputs.aws_account_id }}:role/${{ secrets.AWS_ROLE_NAME }}
|
|
53
53
|
aws-region: ${{ steps.env-vars.outputs.aws_region }}
|
|
@@ -15,8 +15,8 @@ jobs:
|
|
|
15
15
|
matrix:
|
|
16
16
|
os: [ubuntu-latest, windows-latest]
|
|
17
17
|
steps:
|
|
18
|
-
- uses: actions/checkout@
|
|
19
|
-
- uses: actions/setup-python@v6.
|
|
18
|
+
- uses: actions/checkout@v6.0.1
|
|
19
|
+
- uses: actions/setup-python@v6.1.0
|
|
20
20
|
with:
|
|
21
21
|
python-version-file: ${{ inputs.python-version-file }}
|
|
22
22
|
- name: Install uv
|
|
@@ -32,9 +32,7 @@ jobs:
|
|
|
32
32
|
run: |
|
|
33
33
|
mkdir -p ~/.hafnia
|
|
34
34
|
echo "$HAFNIA_CONFIG" | jq . > ~/.hafnia/config.json
|
|
35
|
-
- name: Check hafnia
|
|
36
|
-
run: uv run hafnia profile active
|
|
37
|
-
- name: Check hafnia by download
|
|
35
|
+
- name: Check hafnia configuration by download
|
|
38
36
|
run: uv run hafnia dataset download mnist --force
|
|
39
37
|
- name: Run tests
|
|
40
38
|
run: uv run pytest tests
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hafnia
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: Python SDK for communication with Hafnia platform.
|
|
5
5
|
Author-email: Milestone Systems <hafniaplatform@milestone.dk>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -10,7 +10,7 @@ Requires-Dist: click>=8.1.8
|
|
|
10
10
|
Requires-Dist: emoji>=2.14.1
|
|
11
11
|
Requires-Dist: flatten-dict>=0.4.2
|
|
12
12
|
Requires-Dist: keyring>=25.6.0
|
|
13
|
-
Requires-Dist: mcp>=1.
|
|
13
|
+
Requires-Dist: mcp>=1.23.0
|
|
14
14
|
Requires-Dist: mlflow>=3.4.0
|
|
15
15
|
Requires-Dist: more-itertools>=10.7.0
|
|
16
16
|
Requires-Dist: opencv-python-headless>=4.11.0.86
|
|
@@ -5,13 +5,12 @@ from rich import print as rprint
|
|
|
5
5
|
from hafnia import utils
|
|
6
6
|
from hafnia.dataset.dataset_names import OPS_REMOVE_CLASS
|
|
7
7
|
from hafnia.dataset.dataset_recipe.dataset_recipe import DatasetRecipe
|
|
8
|
-
from hafnia.dataset.dataset_recipe.recipe_transforms import (
|
|
9
|
-
SelectSamples,
|
|
10
|
-
Shuffle,
|
|
11
|
-
SplitsByRatios,
|
|
12
|
-
)
|
|
13
8
|
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
14
9
|
|
|
10
|
+
COCO_VERSION = "1.0.0"
|
|
11
|
+
MIDWEST_VERSION = "1.0.0"
|
|
12
|
+
MNIST_VERSION = "1.0.0"
|
|
13
|
+
|
|
15
14
|
### Introducing DatasetRecipe ###
|
|
16
15
|
# A DatasetRecipe is a recipe for the dataset you want to create.
|
|
17
16
|
# The recipe itself is not executed - this is just a specification of the dataset you want!
|
|
@@ -19,10 +18,10 @@ from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
|
19
18
|
# The 'DatasetRecipe' interface is similar to the 'HafniaDataset' interface.
|
|
20
19
|
# To demonstrate, we will first create a dataset with the regular 'HafniaDataset' interface.
|
|
21
20
|
# This line will get the "mnist" dataset, shuffle it, and select 20 samples.
|
|
22
|
-
dataset = HafniaDataset.from_name(name="mnist").shuffle().select_samples(n_samples=20)
|
|
21
|
+
dataset = HafniaDataset.from_name(name="mnist", version=MNIST_VERSION).shuffle().select_samples(n_samples=20)
|
|
23
22
|
|
|
24
23
|
# Now the same dataset is created using the 'DatasetRecipe' interface.
|
|
25
|
-
dataset_recipe = DatasetRecipe.from_name(name="mnist").shuffle().select_samples(n_samples=20)
|
|
24
|
+
dataset_recipe = DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION).shuffle().select_samples(n_samples=20)
|
|
26
25
|
dataset = dataset_recipe.build()
|
|
27
26
|
# Note that the interface is similar, but to actually create the dataset you need to call `build()` on the recipe.
|
|
28
27
|
|
|
@@ -65,8 +64,8 @@ if utils.is_hafnia_configured(): # First ensure you are connected to the hafnia
|
|
|
65
64
|
# Example: 'DatasetRecipe' by merging multiple dataset recipes
|
|
66
65
|
dataset_recipe = DatasetRecipe.from_merger(
|
|
67
66
|
recipes=[
|
|
68
|
-
DatasetRecipe.from_name(name="mnist"),
|
|
69
|
-
DatasetRecipe.from_name(name="mnist"),
|
|
67
|
+
DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION),
|
|
68
|
+
DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION),
|
|
70
69
|
]
|
|
71
70
|
)
|
|
72
71
|
|
|
@@ -75,14 +74,14 @@ dataset_recipe = DatasetRecipe.from_merger(
|
|
|
75
74
|
recipes=[
|
|
76
75
|
DatasetRecipe.from_merger(
|
|
77
76
|
recipes=[
|
|
78
|
-
DatasetRecipe.from_name(name="mnist"),
|
|
79
|
-
DatasetRecipe.from_name(name="mnist"),
|
|
77
|
+
DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION),
|
|
78
|
+
DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION),
|
|
80
79
|
]
|
|
81
80
|
),
|
|
82
81
|
DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
|
|
83
82
|
.select_samples(n_samples=30)
|
|
84
83
|
.splits_by_ratios(split_ratios={"train": 0.8, "val": 0.1, "test": 0.1}),
|
|
85
|
-
DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle(),
|
|
84
|
+
DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION).select_samples(n_samples=20).shuffle(),
|
|
86
85
|
]
|
|
87
86
|
)
|
|
88
87
|
|
|
@@ -99,14 +98,14 @@ print(dataset_recipe.as_json_str()) # as a JSON string
|
|
|
99
98
|
# 1) The first step is to use the regular 'HafniaDataset' interface to investigate and understand the datasets
|
|
100
99
|
|
|
101
100
|
# 1a) Explore 'coco-2017'
|
|
102
|
-
coco = HafniaDataset.from_name("coco-2017")
|
|
101
|
+
coco = HafniaDataset.from_name("coco-2017", version=COCO_VERSION)
|
|
103
102
|
coco.print_stats() # Print dataset statistics
|
|
104
103
|
coco_class_names = coco.info.get_task_by_primitive("Bbox").class_names # Get the class names for the bbox task
|
|
105
104
|
# You will notice coco has 80 classes including 'person' and various vehicle classes such as 'car', 'bus', 'truck', etc.
|
|
106
105
|
# but also many unrelated classes such as 'toaster', 'hair drier', etc.
|
|
107
106
|
|
|
108
107
|
# 1b) Explore 'midwest-vehicle-detection'
|
|
109
|
-
midwest = HafniaDataset.from_name("midwest-vehicle-detection")
|
|
108
|
+
midwest = HafniaDataset.from_name("midwest-vehicle-detection", version=MIDWEST_VERSION)
|
|
110
109
|
midwest.print_stats() # Print dataset statistics
|
|
111
110
|
midwest_class_names = midwest.info.get_task_by_primitive("Bbox").class_names
|
|
112
111
|
# You will also notice midwest has similar classes, but they are named differently, e.g. 'Persons',
|
|
@@ -144,10 +143,10 @@ merged_dataset.print_stats()
|
|
|
144
143
|
# 3) Once you have verified operations using the 'HafniaDataset' interface, you can convert
|
|
145
144
|
# the operations to a single 'DatasetRecipe'
|
|
146
145
|
merged_recipe = DatasetRecipe.from_merge(
|
|
147
|
-
recipe0=DatasetRecipe.from_name("coco-2017").class_mapper(
|
|
146
|
+
recipe0=DatasetRecipe.from_name("coco-2017", version=COCO_VERSION).class_mapper(
|
|
148
147
|
class_mapping=mappings_coco, method="remove_undefined", task_name="object_detection"
|
|
149
148
|
),
|
|
150
|
-
recipe1=DatasetRecipe.from_name("midwest-vehicle-detection").class_mapper(
|
|
149
|
+
recipe1=DatasetRecipe.from_name("midwest-vehicle-detection", version=MIDWEST_VERSION).class_mapper(
|
|
151
150
|
class_mapping=mapping_midwest, task_name="object_detection"
|
|
152
151
|
),
|
|
153
152
|
).select_samples_by_class_name(name=["Person", "Vehicle"], task_name="object_detection")
|
|
@@ -176,74 +175,3 @@ if utils.is_hafnia_configured():
|
|
|
176
175
|
# 6) Monitor and manage your experiments
|
|
177
176
|
# 6a) View experiments using the web platform https://staging02.mdi.milestonesys.com/training-aas/experiments
|
|
178
177
|
# 6b) Or use the CLI: 'hafnia experiment ls'
|
|
179
|
-
### DatasetRecipe Implicit Form ###
|
|
180
|
-
# Below we demonstrate the difference between implicit and explicit forms of dataset recipes.
|
|
181
|
-
# Example: Get dataset by name with implicit and explicit forms
|
|
182
|
-
recipe_implicit_form = "mnist"
|
|
183
|
-
recipe_explicit_form = DatasetRecipe.from_name(name="mnist")
|
|
184
|
-
|
|
185
|
-
# The implicit form can now be loaded and built as a dataset
|
|
186
|
-
dataset_implicit = DatasetRecipe.from_implicit_form(recipe_implicit_form).build()
|
|
187
|
-
# Or directly as a dataset
|
|
188
|
-
dataset_implicit = HafniaDataset.from_recipe(recipe_implicit_form)
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
# Example: Get dataset from path with implicit and explicit forms:
|
|
192
|
-
recipe_implicit_form = Path(".data/datasets/mnist")
|
|
193
|
-
recipe_explicit_form = DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
|
|
194
|
-
|
|
195
|
-
# Example: Merge datasets with implicit and explicit forms
|
|
196
|
-
recipe_implicit_form = ("mnist", "mnist")
|
|
197
|
-
recipe_explicit_form = DatasetRecipe.from_merger(
|
|
198
|
-
recipes=[
|
|
199
|
-
DatasetRecipe.from_name(name="mnist"),
|
|
200
|
-
DatasetRecipe.from_name(name="mnist"),
|
|
201
|
-
]
|
|
202
|
-
)
|
|
203
|
-
|
|
204
|
-
# Example: Define a dataset with transformations using implicit and explicit forms
|
|
205
|
-
recipe_implicit_form = ["mnist", SelectSamples(n_samples=20), Shuffle()]
|
|
206
|
-
recipe_explicit_form = DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle()
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
# Example: Complex nested example with implicit vs explicit forms
|
|
210
|
-
# Implicit form of a complex dataset recipe
|
|
211
|
-
split_ratio = {"train": 0.8, "val": 0.1, "test": 0.1}
|
|
212
|
-
recipe_implicit_complex = (
|
|
213
|
-
("mnist", "mnist"),
|
|
214
|
-
[Path(".data/datasets/mnist"), SelectSamples(n_samples=30), SplitsByRatios(split_ratios=split_ratio)],
|
|
215
|
-
["mnist", SelectSamples(n_samples=20), Shuffle()],
|
|
216
|
-
)
|
|
217
|
-
|
|
218
|
-
# Explicit form of the same complex dataset recipe
|
|
219
|
-
recipe_explicit_complex = DatasetRecipe.from_merger(
|
|
220
|
-
recipes=[
|
|
221
|
-
DatasetRecipe.from_merger(
|
|
222
|
-
recipes=[
|
|
223
|
-
DatasetRecipe.from_name(name="mnist"),
|
|
224
|
-
DatasetRecipe.from_name(name="mnist"),
|
|
225
|
-
]
|
|
226
|
-
),
|
|
227
|
-
DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
|
|
228
|
-
.select_samples(n_samples=30)
|
|
229
|
-
.splits_by_ratios(split_ratios=split_ratio),
|
|
230
|
-
DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle(),
|
|
231
|
-
]
|
|
232
|
-
)
|
|
233
|
-
|
|
234
|
-
# The implicit form uses the following rules:
|
|
235
|
-
# str: Will get a dataset by name -> In explicit form it becomes 'DatasetRecipe.from_name'
|
|
236
|
-
# Path: Will get a dataset from path -> In explicit form it becomes 'DatasetRecipe.from_path'
|
|
237
|
-
# tuple: Will merge datasets specified in the tuple -> In explicit form it becomes 'DatasetRecipe.from_merger'
|
|
238
|
-
# list: Will define a dataset followed by a list of transformations -> In explicit form it becomes chained method calls
|
|
239
|
-
# Generally, we recommend using the explicit form over the implicit form when multiple datasets and transformations are involved.
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
# To convert from implicit to explicit recipe form, you can use the `from_implicit_form` method.
|
|
243
|
-
explicit_recipe_from_implicit = DatasetRecipe.from_implicit_form(recipe_implicit_complex)
|
|
244
|
-
rprint("Converted explicit recipe:")
|
|
245
|
-
rprint(explicit_recipe_from_implicit)
|
|
246
|
-
|
|
247
|
-
# Verify that the conversion produces the same result
|
|
248
|
-
assert explicit_recipe_from_implicit == recipe_explicit_complex
|
|
249
|
-
rprint("Conversion successful - recipes are equivalent!")
|
|
@@ -13,6 +13,10 @@ from hafnia.dataset.primitives.bitmask import Bitmask
|
|
|
13
13
|
from hafnia.dataset.primitives.classification import Classification
|
|
14
14
|
from hafnia.dataset.primitives.polygon import Polygon
|
|
15
15
|
|
|
16
|
+
MNIST_VERSION = "1.0.0"
|
|
17
|
+
MIDWEST_VERSION = "1.0.0"
|
|
18
|
+
COCO_VERSION = "1.0.0"
|
|
19
|
+
|
|
16
20
|
# First ensure that you have the Hafnia CLI installed and configured.
|
|
17
21
|
# You can install it via pip:
|
|
18
22
|
# pip install hafnia
|
|
@@ -20,7 +24,11 @@ from hafnia.dataset.primitives.polygon import Polygon
|
|
|
20
24
|
# hafnia configure
|
|
21
25
|
|
|
22
26
|
# Load sample dataset
|
|
23
|
-
dataset = HafniaDataset.from_name("mnist")
|
|
27
|
+
dataset = HafniaDataset.from_name("mnist", version=MNIST_VERSION)
|
|
28
|
+
|
|
29
|
+
# Use 'from_name' without version-argument to get available versions:
|
|
30
|
+
# dataset = HafniaDataset.from_name("mnist")
|
|
31
|
+
# >>> ValueError: Version must be specified. Available versions: ['1.0.0', '0.0.1']
|
|
24
32
|
|
|
25
33
|
# Dataset information is stored in 'dataset.info'
|
|
26
34
|
rprint(dataset.info)
|
|
@@ -76,7 +84,11 @@ dataset_mapped = dataset.class_mapper(class_mapping=class_mapping_strict)
|
|
|
76
84
|
dataset_mapped.print_class_distribution()
|
|
77
85
|
|
|
78
86
|
# Support Chaining Operations (load, shuffle, select samples)
|
|
79
|
-
dataset =
|
|
87
|
+
dataset = (
|
|
88
|
+
HafniaDataset.from_name("midwest-vehicle-detection", version=MIDWEST_VERSION)
|
|
89
|
+
.shuffle(seed=42)
|
|
90
|
+
.select_samples(n_samples=10)
|
|
91
|
+
)
|
|
80
92
|
|
|
81
93
|
|
|
82
94
|
# Write dataset to disk
|
|
@@ -88,7 +100,7 @@ dataset.write(path_dataset)
|
|
|
88
100
|
dataset_again = HafniaDataset.from_path(path_dataset)
|
|
89
101
|
|
|
90
102
|
## Dataset importers and exporters ##
|
|
91
|
-
dataset_od = HafniaDataset.from_name("coco-2017").select_samples(n_samples=5, seed=42)
|
|
103
|
+
dataset_od = HafniaDataset.from_name("coco-2017", version=COCO_VERSION).select_samples(n_samples=5, seed=42)
|
|
92
104
|
|
|
93
105
|
# Export/import dataset to YOLO format
|
|
94
106
|
path_yolo_format = Path(".data/tmp/yolo_dataset")
|
|
@@ -12,7 +12,8 @@ from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
|
12
12
|
if __name__ == "__main__":
|
|
13
13
|
torch.manual_seed(1)
|
|
14
14
|
# Load Hugging Face dataset
|
|
15
|
-
|
|
15
|
+
MIDWEST_VERSION = "1.0.0"
|
|
16
|
+
dataset = HafniaDataset.from_name("midwest-vehicle-detection", version=MIDWEST_VERSION)
|
|
16
17
|
|
|
17
18
|
# Define transforms
|
|
18
19
|
train_transforms = v2.Compose(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "hafnia"
|
|
3
|
-
version = "0.5.
|
|
3
|
+
version = "0.5.1"
|
|
4
4
|
description = "Python SDK for communication with Hafnia platform."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -28,7 +28,7 @@ dependencies = [
|
|
|
28
28
|
"xxhash>=3.5.0",
|
|
29
29
|
"mlflow>=3.4.0",
|
|
30
30
|
"sagemaker-mlflow>=0.1.0",
|
|
31
|
-
"mcp>=1.
|
|
31
|
+
"mcp>=1.23.0",
|
|
32
32
|
]
|
|
33
33
|
|
|
34
34
|
[dependency-groups]
|
|
@@ -3,12 +3,70 @@ import math
|
|
|
3
3
|
import random
|
|
4
4
|
import shutil
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Dict, List
|
|
6
|
+
from typing import Dict, List, Optional, Tuple
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import xxhash
|
|
10
|
+
from packaging.version import InvalidVersion, Version
|
|
10
11
|
from PIL import Image
|
|
11
12
|
|
|
13
|
+
from hafnia.log import user_logger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def is_valid_version_string(version: Optional[str], allow_none: bool = False, allow_latest: bool = False) -> bool:
|
|
17
|
+
if allow_none and version is None:
|
|
18
|
+
return True
|
|
19
|
+
if allow_latest and version == "latest":
|
|
20
|
+
return True
|
|
21
|
+
return version_from_string(version, raise_error=False) is not None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def version_from_string(version: Optional[str], raise_error: bool = True) -> Optional[Version]:
|
|
25
|
+
if version is None:
|
|
26
|
+
if raise_error:
|
|
27
|
+
raise ValueError("Version is 'None'. A valid version string is required e.g '1.0.0'")
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
version_casted = Version(version)
|
|
32
|
+
except (InvalidVersion, TypeError) as e:
|
|
33
|
+
if raise_error:
|
|
34
|
+
raise ValueError(f"Invalid version string/type: {version}") from e
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
# Check if version is semantic versioning (MAJOR.MINOR.PATCH)
|
|
38
|
+
if len(version_casted.release) < 3:
|
|
39
|
+
if raise_error:
|
|
40
|
+
raise ValueError(f"Version string '{version}' is not semantic versioning (MAJOR.MINOR.PATCH)")
|
|
41
|
+
return None
|
|
42
|
+
return version_casted
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def dataset_name_and_version_from_string(
|
|
46
|
+
string: str,
|
|
47
|
+
resolve_missing_version: bool = True,
|
|
48
|
+
) -> Tuple[str, Optional[str]]:
|
|
49
|
+
if not isinstance(string, str):
|
|
50
|
+
raise TypeError(f"'{type(string)}' for '{string}' is an unsupported type. Expected 'str' e.g 'mnist:1.0.0'")
|
|
51
|
+
|
|
52
|
+
parts = string.split(":")
|
|
53
|
+
if len(parts) == 1:
|
|
54
|
+
dataset_name = parts[0]
|
|
55
|
+
if resolve_missing_version:
|
|
56
|
+
version = "latest" # Default to 'latest' if version is missing. This will be resolved to a specific version later.
|
|
57
|
+
user_logger.info(f"Version is missing in dataset name: {string}. Defaulting to version='latest'.")
|
|
58
|
+
else:
|
|
59
|
+
raise ValueError(f"Version is missing in dataset name: {string}. Use 'name:version'")
|
|
60
|
+
elif len(parts) == 2:
|
|
61
|
+
dataset_name, version = parts
|
|
62
|
+
else:
|
|
63
|
+
raise ValueError(f"Invalid dataset name format: {string}. Use 'name' or 'name:version' ")
|
|
64
|
+
|
|
65
|
+
if not is_valid_version_string(version, allow_none=True, allow_latest=True):
|
|
66
|
+
raise ValueError(f"Invalid version string: {version}. Use semantic versioning e.g. '1.0.0' or 'latest'")
|
|
67
|
+
|
|
68
|
+
return dataset_name, version
|
|
69
|
+
|
|
12
70
|
|
|
13
71
|
def create_split_name_list_from_ratios(split_ratios: Dict[str, float], n_items: int, seed: int = 42) -> List[str]:
|
|
14
72
|
samples_per_split = split_sizes_from_ratios(split_ratios=split_ratios, n_items=n_items)
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
FILENAME_RECIPE_JSON = "recipe.json"
|
|
5
|
+
FILENAME_DATASET_INFO = "dataset_info.json"
|
|
6
|
+
FILENAME_ANNOTATIONS_JSONL = "annotations.jsonl"
|
|
7
|
+
FILENAME_ANNOTATIONS_PARQUET = "annotations.parquet"
|
|
8
|
+
|
|
9
|
+
DATASET_FILENAMES_REQUIRED = [
|
|
10
|
+
FILENAME_DATASET_INFO,
|
|
11
|
+
FILENAME_ANNOTATIONS_JSONL,
|
|
12
|
+
FILENAME_ANNOTATIONS_PARQUET,
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DeploymentStage(Enum):
|
|
17
|
+
STAGING = "staging"
|
|
18
|
+
PRODUCTION = "production"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
TAG_IS_SAMPLE = "sample"
|
|
22
|
+
|
|
23
|
+
OPS_REMOVE_CLASS = "__REMOVE__"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class PrimitiveField:
|
|
27
|
+
CLASS_NAME: str = "class_name" # Name of the class this primitive is associated with, e.g. "car" for Bbox
|
|
28
|
+
CLASS_IDX: str = "class_idx" # Index of the class this primitive is associated with, e.g. 0 for "car" if it is the first class # noqa: E501
|
|
29
|
+
OBJECT_ID: str = "object_id" # Unique identifier for the object, e.g. "12345123"
|
|
30
|
+
CONFIDENCE: str = "confidence" # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
|
|
31
|
+
|
|
32
|
+
META: str = "meta" # Contains metadata about each primitive, e.g. attributes color, occluded, iscrowd, etc.
|
|
33
|
+
TASK_NAME: str = "task_name" # Name of the task this primitive is associated with, e.g. "bboxes" for Bbox
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def fields() -> List[str]:
|
|
37
|
+
"""
|
|
38
|
+
Returns a list of expected field names for primitives.
|
|
39
|
+
"""
|
|
40
|
+
return [
|
|
41
|
+
PrimitiveField.CLASS_NAME,
|
|
42
|
+
PrimitiveField.CLASS_IDX,
|
|
43
|
+
PrimitiveField.OBJECT_ID,
|
|
44
|
+
PrimitiveField.CONFIDENCE,
|
|
45
|
+
PrimitiveField.META,
|
|
46
|
+
PrimitiveField.TASK_NAME,
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class SampleField:
|
|
51
|
+
FILE_PATH: str = "file_path"
|
|
52
|
+
HEIGHT: str = "height"
|
|
53
|
+
WIDTH: str = "width"
|
|
54
|
+
SPLIT: str = "split"
|
|
55
|
+
TAGS: str = "tags"
|
|
56
|
+
|
|
57
|
+
CLASSIFICATIONS: str = "classifications"
|
|
58
|
+
BBOXES: str = "bboxes"
|
|
59
|
+
BITMASKS: str = "bitmasks"
|
|
60
|
+
POLYGONS: str = "polygons"
|
|
61
|
+
|
|
62
|
+
STORAGE_FORMAT: str = "storage_format" # E.g. "image", "video", "zip"
|
|
63
|
+
COLLECTION_INDEX: str = "collection_index"
|
|
64
|
+
COLLECTION_ID: str = "collection_id"
|
|
65
|
+
REMOTE_PATH: str = "remote_path" # Path to the file in remote storage, e.g. S3
|
|
66
|
+
SAMPLE_INDEX: str = "sample_index"
|
|
67
|
+
|
|
68
|
+
ATTRIBUTION: str = "attribution" # Attribution for the sample (image/video), e.g. creator, license, source, etc.
|
|
69
|
+
META: str = "meta"
|
|
70
|
+
DATASET_NAME: str = "dataset_name"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class StorageFormat:
|
|
74
|
+
IMAGE: str = "image"
|
|
75
|
+
VIDEO: str = "video"
|
|
76
|
+
ZIP: str = "zip"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class SplitName:
|
|
80
|
+
TRAIN: str = "train"
|
|
81
|
+
VAL: str = "validation"
|
|
82
|
+
TEST: str = "test"
|
|
83
|
+
UNDEFINED: str = "UNDEFINED"
|
|
84
|
+
|
|
85
|
+
@staticmethod
|
|
86
|
+
def valid_splits() -> List[str]:
|
|
87
|
+
return [SplitName.TRAIN, SplitName.VAL, SplitName.TEST]
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def all_split_names() -> List[str]:
|
|
91
|
+
return [*SplitName.valid_splits(), SplitName.UNDEFINED]
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
def map_split_name(potential_split_name: str, strict: bool = True) -> str:
|
|
95
|
+
normalized = potential_split_name.strip().lower()
|
|
96
|
+
|
|
97
|
+
if normalized in SPLIT_NAME_MAPPINGS:
|
|
98
|
+
return SPLIT_NAME_MAPPINGS[normalized]
|
|
99
|
+
|
|
100
|
+
if strict:
|
|
101
|
+
raise ValueError(f"Unrecognized split name: {potential_split_name}")
|
|
102
|
+
else:
|
|
103
|
+
return SplitName.UNDEFINED
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
SPLIT_NAME_MAPPINGS = {
|
|
107
|
+
# Train variations
|
|
108
|
+
"train": SplitName.TRAIN,
|
|
109
|
+
"training": SplitName.TRAIN,
|
|
110
|
+
# Validation variations
|
|
111
|
+
"validation": SplitName.VAL,
|
|
112
|
+
"val": SplitName.VAL,
|
|
113
|
+
"valid": SplitName.VAL,
|
|
114
|
+
# Test variations
|
|
115
|
+
"test": SplitName.TEST,
|
|
116
|
+
"testing": SplitName.TEST,
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class DatasetVariant(Enum):
|
|
121
|
+
DUMP = "dump"
|
|
122
|
+
SAMPLE = "sample"
|
|
123
|
+
HIDDEN = "hidden"
|
|
@@ -11,14 +11,19 @@ from pydantic import (
|
|
|
11
11
|
)
|
|
12
12
|
|
|
13
13
|
from hafnia import utils
|
|
14
|
+
from hafnia.dataset.dataset_helpers import dataset_name_and_version_from_string
|
|
14
15
|
from hafnia.dataset.dataset_recipe import recipe_transforms
|
|
15
16
|
from hafnia.dataset.dataset_recipe.recipe_types import (
|
|
16
17
|
RecipeCreation,
|
|
17
18
|
RecipeTransform,
|
|
18
19
|
Serializable,
|
|
19
20
|
)
|
|
20
|
-
from hafnia.dataset.hafnia_dataset import
|
|
21
|
+
from hafnia.dataset.hafnia_dataset import (
|
|
22
|
+
HafniaDataset,
|
|
23
|
+
available_dataset_versions_from_name,
|
|
24
|
+
)
|
|
21
25
|
from hafnia.dataset.primitives.primitive import Primitive
|
|
26
|
+
from hafnia.log import user_logger
|
|
22
27
|
|
|
23
28
|
|
|
24
29
|
class DatasetRecipe(Serializable):
|
|
@@ -41,8 +46,31 @@ class DatasetRecipe(Serializable):
|
|
|
41
46
|
|
|
42
47
|
### Creation Methods (using the 'from_X' )###
|
|
43
48
|
@staticmethod
|
|
44
|
-
def from_name(
|
|
45
|
-
|
|
49
|
+
def from_name(
|
|
50
|
+
name: str,
|
|
51
|
+
version: Optional[str] = None,
|
|
52
|
+
force_redownload: bool = False,
|
|
53
|
+
download_files: bool = True,
|
|
54
|
+
) -> DatasetRecipe:
|
|
55
|
+
if version == "latest":
|
|
56
|
+
user_logger.info(
|
|
57
|
+
f"The dataset '{name}' in a dataset recipe uses 'latest' as version. For dataset recipes the "
|
|
58
|
+
"version is pinned to a specific version. Consider specifying a specific version to ensure "
|
|
59
|
+
"reproducibility of your experiments. "
|
|
60
|
+
)
|
|
61
|
+
available_versions = available_dataset_versions_from_name(name)
|
|
62
|
+
version = str(max(available_versions))
|
|
63
|
+
if version is None:
|
|
64
|
+
available_versions = available_dataset_versions_from_name(name)
|
|
65
|
+
str_versions = ", ".join([str(v) for v in available_versions])
|
|
66
|
+
raise ValueError(
|
|
67
|
+
f"Version must be specified when creating a DatasetRecipe from name. "
|
|
68
|
+
f"Available versions are: {str_versions}"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
creation = FromName(
|
|
72
|
+
name=name, version=version, force_redownload=force_redownload, download_files=download_files
|
|
73
|
+
)
|
|
46
74
|
return DatasetRecipe(creation=creation)
|
|
47
75
|
|
|
48
76
|
@staticmethod
|
|
@@ -125,6 +153,21 @@ class DatasetRecipe(Serializable):
|
|
|
125
153
|
recipe_id = recipe["id"]
|
|
126
154
|
return DatasetRecipe.from_recipe_id(recipe_id)
|
|
127
155
|
|
|
156
|
+
@staticmethod
|
|
157
|
+
def from_name_and_version_string(string: str, resolve_missing_version: bool = False) -> "DatasetRecipe":
|
|
158
|
+
"""
|
|
159
|
+
Validates and converts a dataset name and version string (name:version) to a DatasetRecipe.from_name recipe.
|
|
160
|
+
If version is missing and 'resolve_missing_version' is True, it will default to 'latest'.
|
|
161
|
+
If resolve_missing_version is False, it will raise an error if version is missing.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
dataset_name, version = dataset_name_and_version_from_string(
|
|
165
|
+
string=string,
|
|
166
|
+
resolve_missing_version=resolve_missing_version,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
return DatasetRecipe.from_name(name=dataset_name, version=version)
|
|
170
|
+
|
|
128
171
|
@staticmethod
|
|
129
172
|
def from_implicit_form(recipe: Any) -> DatasetRecipe:
|
|
130
173
|
"""
|
|
@@ -180,7 +223,7 @@ class DatasetRecipe(Serializable):
|
|
|
180
223
|
return recipe
|
|
181
224
|
|
|
182
225
|
if isinstance(recipe, str): # str-type is convert to DatasetFromName
|
|
183
|
-
return DatasetRecipe.
|
|
226
|
+
return DatasetRecipe.from_name_and_version_string(string=recipe, resolve_missing_version=True)
|
|
184
227
|
|
|
185
228
|
if isinstance(recipe, Path): # Path-type is convert to DatasetFromPath
|
|
186
229
|
return DatasetRecipe.from_path(path_folder=recipe)
|
|
@@ -409,6 +452,7 @@ class FromPath(RecipeCreation):
|
|
|
409
452
|
|
|
410
453
|
class FromName(RecipeCreation):
|
|
411
454
|
name: str
|
|
455
|
+
version: Optional[str] = None
|
|
412
456
|
force_redownload: bool = False
|
|
413
457
|
download_files: bool = True
|
|
414
458
|
|