hafnia 0.4.3__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/tests.yaml +4 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.vscode/settings.json +1 -1
- {hafnia-0.4.3 → hafnia-0.5.0}/PKG-INFO +3 -3
- {hafnia-0.4.3 → hafnia-0.5.0}/README.md +1 -1
- {hafnia-0.4.3 → hafnia-0.5.0}/examples/example_hafnia_dataset.py +39 -24
- {hafnia-0.4.3 → hafnia-0.5.0}/pyproject.toml +2 -2
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/dataset_details_uploader.py +41 -54
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/dataset_helpers.py +1 -15
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/dataset_names.py +17 -3
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/format_conversions/torchvision_datasets.py +6 -3
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/hafnia_dataset.py +99 -24
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/hafnia_dataset_types.py +3 -1
- hafnia-0.5.0/src/hafnia/dataset/operations/dataset_s3_storage.py +211 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/operations/table_transformations.py +2 -1
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/http.py +2 -1
- hafnia-0.5.0/src/hafnia/platform/datasets.py +334 -0
- hafnia-0.5.0/src/hafnia/platform/s5cmd_utils.py +147 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/utils.py +4 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/dataset_cmds.py +18 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/profile_cmds.py +0 -1
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/helper_testing.py +5 -0
- hafnia-0.5.0/tests/integration/test_bring_your_own_data.py +93 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/integration/test_torchvision_datasets.py +1 -5
- hafnia-0.5.0/tests/unit/dataset/test_dataset_details_uploader.py +55 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/uv.lock +1290 -1176
- hafnia-0.4.3/src/hafnia/platform/datasets.py +0 -243
- hafnia-0.4.3/tests/unit/dataset/test_dataset_details_uploader.py +0 -29
- {hafnia-0.4.3 → hafnia-0.5.0}/.devcontainer/devcontainer.json +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.devcontainer/hooks/post_create +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.github/dependabot.yaml +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/Dockerfile +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/build.yaml +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/check_release.yaml +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/ci_cd.yaml +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/lint.yaml +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/publish_docker.yaml +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.github/workflows/publish_pypi.yaml +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.gitignore +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.pre-commit-config.yaml +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.python-version +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.trivyignore +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.vscode/extensions.json +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/.vscode/launch.json +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/LICENSE +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/docs/cli.md +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/docs/release.md +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/examples/example_dataset_recipe.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/examples/example_logger.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/examples/example_torchvision_dataloader.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/__init__.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/data/__init__.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/data/factory.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/dataset_recipe/dataset_recipe.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/dataset_recipe/recipe_transforms.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/dataset_recipe/recipe_types.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/format_conversions/format_coco.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/format_conversions/format_helpers.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/format_conversions/format_image_classification_folder.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/format_conversions/format_yolo.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/license_types.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/operations/dataset_stats.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/operations/dataset_transformations.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/__init__.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/bbox.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/bitmask.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/classification.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/point.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/polygon.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/primitive.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/segmentation.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/dataset/primitives/utils.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/experiment/__init__.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/experiment/hafnia_logger.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/log.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/platform/__init__.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/platform/builder.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/platform/dataset_recipe.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/platform/download.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/platform/experiment.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/platform/trainer_package.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/torch_helpers.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/visualizations/colors.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia/visualizations/image_visualizations.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/__init__.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/__main__.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/config.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/consts.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/dataset_recipe_cmds.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/experiment_cmds.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/keychain.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/runc_cmds.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/src/hafnia_cli/trainer_package_cmds.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/__init__.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/conftest.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_coco_roboflow/train/000000000632.jpg +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_coco_roboflow/train/000000000724.jpg +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_coco_roboflow/train/_annotations.coco.json +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_coco_roboflow/valid/000000000139.jpg +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_coco_roboflow/valid/000000000285.jpg +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_coco_roboflow/valid/_annotations.coco.json +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/obj.names +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/train/data/000000000139.jpg +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/train/data/000000000139.txt +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/train/data/000000000285.jpg +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/train/data/000000000285.txt +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/train/images.txt +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/validation/data/000000000632.jpg +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/validation/data/000000000632.txt +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_formats/format_yolo/validation/images.txt +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/dataset_image_metadata_schema.yaml +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_dataset_transformations/test_video_storage_format_read_image.png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[polygon].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_as_ints].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_compressed_bytes].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_compressed_str].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_coco/test_from_coco_format_visualized.png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_coco/test_to_coco_format_visualized.png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_yolo/test_format_yolo_import_export_tiny_dataset.png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_format_yolo/test_import_yolo_format_visualized.png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[caltech-256].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[cifar100].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[cifar10].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[coco-2017].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[midwest-vehicle-detection].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[mnist].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_check_dataset[tiny-dataset].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-101].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-256].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar100].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar10].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[coco-2017].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[midwest-vehicle-detection].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[mnist].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[tiny-dataset].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-coco-2017].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-tiny-dataset].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-coco-2017].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-tiny-dataset].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_visualizations/test_mask_region[micro-coco-2017].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_visualizations/test_mask_region[micro-tiny-dataset].png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/expected_images/test_visualizations/test_polygon_to_bitmask_conversion.png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-coco-2017/annotations.jsonl +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-coco-2017/annotations.parquet +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-coco-2017/data/253/253925d334c002ce6662d8133535dd4c.jpg +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-coco-2017/data/b1a/b1a09f4d922f8f6904bab0c1caf172ab.jpg +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-coco-2017/data/f67/f675c8a1e862b5e00203ab888ac7fff4.jpg +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-coco-2017/dataset_info.json +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.jsonl +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.parquet +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-tiny-dataset/data/25c/25c3a206e7b60ab50245ee3d52d97f11.png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-tiny-dataset/data/962/962fd865fdd45f169d5ca8c8f284d68d.png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-tiny-dataset/data/ec6/ec60f2f4fb854b59c97e16b45c713de0.png +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/data/micro_test_datasets/micro-tiny-dataset/dataset_info.json +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/integration/test_check_example_scripts.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/integration/test_cli_integration.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/integration/test_dataset_merges.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/integration/test_dataset_recipes_with_platform.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/integration/test_samples.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/dataset_recipe/test_dataset_recipe_helpers.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/dataset_recipe/test_dataset_recipes.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/dataset_recipe/test_recipe_transformations.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/format_conversions/test_format_coco.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/format_conversions/test_format_image_classification_folder.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/format_conversions/test_format_yolo.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/operations/test_dataset_stats.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/operations/test_dataset_transformations.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/operations/test_table_transformations.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/test_colors.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/test_dataset_helpers.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/test_dataset_names.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/test_hafnia_dataset.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/dataset/test_shape_primitives.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/test_builder.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/test_cli.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/test_hafnia_logger.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/test_utils.py +0 -0
- {hafnia-0.4.3 → hafnia-0.5.0}/tests/unit/test_visualizations.py +0 -0
|
@@ -32,6 +32,10 @@ jobs:
|
|
|
32
32
|
run: |
|
|
33
33
|
mkdir -p ~/.hafnia
|
|
34
34
|
echo "$HAFNIA_CONFIG" | jq . > ~/.hafnia/config.json
|
|
35
|
+
- name: Check hafnia configured
|
|
36
|
+
run: uv run hafnia profile active
|
|
37
|
+
- name: Check hafnia by download
|
|
38
|
+
run: uv run hafnia dataset download mnist --force
|
|
35
39
|
- name: Run tests
|
|
36
40
|
run: uv run pytest tests
|
|
37
41
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hafnia
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Python SDK for communication with Hafnia platform.
|
|
5
5
|
Author-email: Milestone Systems <hafniaplatform@milestone.dk>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -10,7 +10,7 @@ Requires-Dist: click>=8.1.8
|
|
|
10
10
|
Requires-Dist: emoji>=2.14.1
|
|
11
11
|
Requires-Dist: flatten-dict>=0.4.2
|
|
12
12
|
Requires-Dist: keyring>=25.6.0
|
|
13
|
-
Requires-Dist: mcp
|
|
13
|
+
Requires-Dist: mcp>=1.16.0
|
|
14
14
|
Requires-Dist: mlflow>=3.4.0
|
|
15
15
|
Requires-Dist: more-itertools>=10.7.0
|
|
16
16
|
Requires-Dist: opencv-python-headless>=4.11.0.86
|
|
@@ -209,7 +209,7 @@ DatasetInfo(
|
|
|
209
209
|
```
|
|
210
210
|
|
|
211
211
|
You can iterate and access samples in the dataset using the `HafniaDataset` object.
|
|
212
|
-
Each sample contain image and annotations information.
|
|
212
|
+
Each sample contain image and annotations information.
|
|
213
213
|
|
|
214
214
|
```python
|
|
215
215
|
from hafnia.dataset.hafnia_dataset import HafniaDataset, Sample
|
|
@@ -180,7 +180,7 @@ DatasetInfo(
|
|
|
180
180
|
```
|
|
181
181
|
|
|
182
182
|
You can iterate and access samples in the dataset using the `HafniaDataset` object.
|
|
183
|
-
Each sample contain image and annotations information.
|
|
183
|
+
Each sample contain image and annotations information.
|
|
184
184
|
|
|
185
185
|
```python
|
|
186
186
|
from hafnia.dataset.hafnia_dataset import HafniaDataset, Sample
|
|
@@ -138,43 +138,58 @@ path_tmp.mkdir(parents=True, exist_ok=True)
|
|
|
138
138
|
Image.fromarray(image_with_annotations).save(path_tmp / "sample_with_annotations.png")
|
|
139
139
|
|
|
140
140
|
|
|
141
|
-
##
|
|
141
|
+
## Create a hafnia dataset from scratch ##
|
|
142
|
+
path_yolo_dataset = Path("tests/data/dataset_formats/format_yolo/train")
|
|
143
|
+
path_class_names = path_yolo_dataset.parent / "obj.names"
|
|
144
|
+
class_names = [line.strip() for line in path_class_names.read_text().splitlines() if line.strip()]
|
|
145
|
+
path_images_file = path_yolo_dataset / "images.txt"
|
|
146
|
+
image_files = [line.strip() for line in path_images_file.read_text().splitlines() if line.strip()]
|
|
147
|
+
|
|
142
148
|
fake_samples = []
|
|
143
|
-
for
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
149
|
+
for image_file in image_files:
|
|
150
|
+
path_image = path_yolo_dataset / image_file
|
|
151
|
+
path_bboxes = path_yolo_dataset / image_file.replace(".jpg", ".txt")
|
|
152
|
+
bboxes: List[Bbox] = []
|
|
153
|
+
for bboxes_line in path_bboxes.read_text().splitlines():
|
|
154
|
+
str_parts = bboxes_line.strip().split()
|
|
155
|
+
class_idx = int(str_parts[0])
|
|
156
|
+
x_center, y_center, bbox_width, bbox_height = (float(value) for value in str_parts[1:5])
|
|
157
|
+
bbox = Bbox(
|
|
158
|
+
top_left_x=x_center - bbox_width / 2,
|
|
159
|
+
top_left_y=y_center - bbox_height / 2,
|
|
160
|
+
width=bbox_width,
|
|
161
|
+
height=bbox_height,
|
|
162
|
+
class_idx=class_idx,
|
|
163
|
+
class_name=class_names[class_idx],
|
|
164
|
+
)
|
|
165
|
+
bboxes.append(bbox)
|
|
166
|
+
image = Image.open(path_image)
|
|
167
|
+
height, width = image.size[1], image.size[0]
|
|
168
|
+
sample = Sample(file_path=str(path_image), height=height, width=width, split="train", bboxes=bboxes)
|
|
155
169
|
fake_samples.append(sample)
|
|
156
170
|
|
|
157
171
|
|
|
158
172
|
fake_dataset_info = DatasetInfo(
|
|
159
|
-
dataset_name="
|
|
173
|
+
dataset_name="custom-dataset",
|
|
160
174
|
version="0.0.1",
|
|
161
|
-
tasks=[
|
|
162
|
-
TaskInfo(primitive=Bbox, class_names=["car", "truck", "bus"]),
|
|
163
|
-
TaskInfo(primitive=Classification, class_names=["vehicle", "pedestrian", "cyclist"]),
|
|
164
|
-
],
|
|
175
|
+
tasks=[TaskInfo(primitive=Bbox, class_names=class_names)],
|
|
165
176
|
)
|
|
166
|
-
|
|
177
|
+
custom_dataset = HafniaDataset.from_samples_list(samples_list=fake_samples, info=fake_dataset_info)
|
|
178
|
+
|
|
179
|
+
sample = Sample(**custom_dataset[0])
|
|
180
|
+
|
|
181
|
+
# To visualize and verify dataset is formatted correctly store image with annotations
|
|
182
|
+
image_with_annotations = sample.draw_annotations()
|
|
183
|
+
Image.fromarray(image_with_annotations).save(path_tmp / "custom_dataset_sample.png") # Save visualization to TM
|
|
167
184
|
|
|
168
|
-
#
|
|
169
|
-
#
|
|
185
|
+
# To upload the dataset to Hafnia platform
|
|
186
|
+
# custom_dataset.upload_to_platform(interactive=True, allow_version_overwrite=False)
|
|
170
187
|
|
|
171
|
-
# Coming soon! Create your own dataset details page in Hafnia
|
|
172
|
-
# fake_dataset.upload_dataset_details()
|
|
173
188
|
|
|
174
189
|
## Storing predictions: A hafnia dataset can also be used for storing predictions per sample
|
|
175
190
|
# set 'ground_truth=False' and add 'confidence'.
|
|
176
191
|
bboxes_predictions = [
|
|
177
|
-
Bbox(top_left_x=
|
|
192
|
+
Bbox(top_left_x=0.1, top_left_y=0.2, width=0.3, height=0.4, class_name="car", ground_truth=False, confidence=0.9)
|
|
178
193
|
]
|
|
179
194
|
|
|
180
195
|
classifications_predictions = [Classification(class_name="vehicle", class_idx=0, ground_truth=False, confidence=0.95)]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "hafnia"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.5.0"
|
|
4
4
|
description = "Python SDK for communication with Hafnia platform."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -28,7 +28,7 @@ dependencies = [
|
|
|
28
28
|
"xxhash>=3.5.0",
|
|
29
29
|
"mlflow>=3.4.0",
|
|
30
30
|
"sagemaker-mlflow>=0.1.0",
|
|
31
|
-
"mcp
|
|
31
|
+
"mcp>=1.16.0",
|
|
32
32
|
]
|
|
33
33
|
|
|
34
34
|
[dependency-groups]
|
|
@@ -4,7 +4,7 @@ import base64
|
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from enum import Enum
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Any, Dict, List, Optional,
|
|
7
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
8
8
|
|
|
9
9
|
import boto3
|
|
10
10
|
import polars as pl
|
|
@@ -13,7 +13,6 @@ from pydantic import BaseModel, ConfigDict, field_validator
|
|
|
13
13
|
|
|
14
14
|
from hafnia.dataset.dataset_names import (
|
|
15
15
|
DatasetVariant,
|
|
16
|
-
DeploymentStage,
|
|
17
16
|
PrimitiveField,
|
|
18
17
|
SampleField,
|
|
19
18
|
SplitName,
|
|
@@ -29,26 +28,21 @@ from hafnia.dataset.primitives import (
|
|
|
29
28
|
Segmentation,
|
|
30
29
|
)
|
|
31
30
|
from hafnia.dataset.primitives.primitive import Primitive
|
|
32
|
-
from hafnia.
|
|
33
|
-
from hafnia.
|
|
34
|
-
from hafnia.platform.datasets import get_dataset_id
|
|
31
|
+
from hafnia.platform.datasets import upload_dataset_details
|
|
32
|
+
from hafnia.utils import get_path_dataset_gallery_images
|
|
35
33
|
from hafnia_cli.config import Config
|
|
36
34
|
|
|
37
35
|
|
|
38
|
-
def generate_bucket_name(dataset_name: str, deployment_stage: DeploymentStage) -> str:
|
|
39
|
-
# TODO: When moving to versioning we do NOT need 'staging' and 'production' specific buckets
|
|
40
|
-
# and the new name convention should be: f"hafnia-dataset-{dataset_name}"
|
|
41
|
-
return f"mdi-{deployment_stage.value}-{dataset_name}"
|
|
42
|
-
|
|
43
|
-
|
|
44
36
|
class DatasetDetails(BaseModel, validate_assignment=True): # type: ignore[call-arg]
|
|
45
37
|
model_config = ConfigDict(use_enum_values=True) # To parse Enum values as strings
|
|
46
38
|
name: str
|
|
39
|
+
title: Optional[str] = None
|
|
40
|
+
overview: Optional[str] = None
|
|
47
41
|
data_captured_start: Optional[datetime] = None
|
|
48
42
|
data_captured_end: Optional[datetime] = None
|
|
49
43
|
data_received_start: Optional[datetime] = None
|
|
50
44
|
data_received_end: Optional[datetime] = None
|
|
51
|
-
|
|
45
|
+
dataset_updated_at: Optional[datetime] = None
|
|
52
46
|
license_citation: Optional[str] = None
|
|
53
47
|
version: Optional[str] = None
|
|
54
48
|
s3_bucket_name: Optional[str] = None
|
|
@@ -281,26 +275,32 @@ def get_folder_size(path: Path) -> int:
|
|
|
281
275
|
return sum([path.stat().st_size for path in path.rglob("*")])
|
|
282
276
|
|
|
283
277
|
|
|
284
|
-
def
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
278
|
+
def upload_dataset_details_to_platform(
|
|
279
|
+
dataset: HafniaDataset,
|
|
280
|
+
path_gallery_images: Optional[Path] = None,
|
|
281
|
+
gallery_image_names: Optional[List[str]] = None,
|
|
282
|
+
distribution_task_names: Optional[List[str]] = None,
|
|
283
|
+
update_platform: bool = True,
|
|
284
|
+
cfg: Optional[Config] = None,
|
|
285
|
+
) -> dict:
|
|
286
|
+
cfg = cfg or Config()
|
|
287
|
+
dataset_details = dataset_details_from_hafnia_dataset(
|
|
288
|
+
dataset=dataset,
|
|
289
|
+
path_gallery_images=path_gallery_images,
|
|
290
|
+
gallery_image_names=gallery_image_names,
|
|
291
|
+
distribution_task_names=distribution_task_names,
|
|
292
|
+
)
|
|
297
293
|
|
|
298
|
-
|
|
299
|
-
|
|
294
|
+
if update_platform:
|
|
295
|
+
dataset_details_exclude_none = dataset_details.model_dump(exclude_none=True, mode="json")
|
|
296
|
+
upload_dataset_details(
|
|
297
|
+
cfg=cfg,
|
|
298
|
+
data=dataset_details_exclude_none,
|
|
299
|
+
dataset_name=dataset_details.name,
|
|
300
|
+
)
|
|
300
301
|
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
return response # type: ignore[return-value]
|
|
302
|
+
dataset_details_dict = dataset_details.model_dump(exclude_none=False, mode="json")
|
|
303
|
+
return dataset_details_dict
|
|
304
304
|
|
|
305
305
|
|
|
306
306
|
def get_resolutions(dataset: HafniaDataset, max_resolutions_selected: int = 8) -> List[DbResolution]:
|
|
@@ -360,9 +360,6 @@ def s3_based_fields(bucket_name: str, variant_type: DatasetVariant, session: bot
|
|
|
360
360
|
|
|
361
361
|
def dataset_details_from_hafnia_dataset(
|
|
362
362
|
dataset: HafniaDataset,
|
|
363
|
-
deployment_stage: DeploymentStage,
|
|
364
|
-
path_sample: Optional[Path],
|
|
365
|
-
path_hidden: Optional[Path],
|
|
366
363
|
path_gallery_images: Optional[Path] = None,
|
|
367
364
|
gallery_image_names: Optional[List[str]] = None,
|
|
368
365
|
distribution_task_names: Optional[List[str]] = None,
|
|
@@ -371,33 +368,24 @@ def dataset_details_from_hafnia_dataset(
|
|
|
371
368
|
dataset_reports = []
|
|
372
369
|
dataset_meta_info = dataset.info.meta or {}
|
|
373
370
|
|
|
374
|
-
path_and_variant
|
|
375
|
-
if path_sample is not None:
|
|
376
|
-
path_and_variant.append((path_sample, DatasetVariant.SAMPLE))
|
|
377
|
-
|
|
378
|
-
if path_hidden is not None:
|
|
379
|
-
path_and_variant.append((path_hidden, DatasetVariant.HIDDEN))
|
|
380
|
-
|
|
381
|
-
if len(path_and_variant) == 0:
|
|
382
|
-
raise ValueError("At least one path must be provided for sample or hidden dataset.")
|
|
383
|
-
|
|
371
|
+
path_and_variant = [DatasetVariant.SAMPLE, DatasetVariant.HIDDEN]
|
|
384
372
|
gallery_images = create_gallery_images(
|
|
385
373
|
dataset=dataset,
|
|
386
374
|
path_gallery_images=path_gallery_images,
|
|
387
375
|
gallery_image_names=gallery_image_names,
|
|
388
376
|
)
|
|
389
377
|
|
|
390
|
-
for
|
|
378
|
+
for variant_type in path_and_variant:
|
|
391
379
|
if variant_type == DatasetVariant.SAMPLE:
|
|
392
380
|
dataset_variant = dataset.create_sample_dataset()
|
|
393
381
|
else:
|
|
394
382
|
dataset_variant = dataset
|
|
395
383
|
|
|
396
|
-
|
|
384
|
+
files_paths = dataset_variant.samples[SampleField.FILE_PATH].to_list()
|
|
385
|
+
size_bytes = sum([Path(file_path).stat().st_size for file_path in files_paths])
|
|
397
386
|
dataset_variants.append(
|
|
398
387
|
DbDatasetVariant(
|
|
399
388
|
variant_type=VARIANT_TYPE_MAPPING[variant_type], # type: ignore[index]
|
|
400
|
-
# upload_date: Optional[datetime] = None
|
|
401
389
|
size_bytes=size_bytes,
|
|
402
390
|
data_type=DataTypeChoices.images,
|
|
403
391
|
number_of_data_items=len(dataset_variant),
|
|
@@ -405,7 +393,6 @@ def dataset_details_from_hafnia_dataset(
|
|
|
405
393
|
duration=dataset_meta_info.get("duration", None),
|
|
406
394
|
duration_average=dataset_meta_info.get("duration_average", None),
|
|
407
395
|
frame_rate=dataset_meta_info.get("frame_rate", None),
|
|
408
|
-
# bit_rate: Optional[float] = None
|
|
409
396
|
n_cameras=dataset_meta_info.get("n_cameras", None),
|
|
410
397
|
)
|
|
411
398
|
)
|
|
@@ -435,19 +422,19 @@ def dataset_details_from_hafnia_dataset(
|
|
|
435
422
|
object_reports = sorted(object_reports, key=lambda x: x.obj.name) # Sort object reports by name
|
|
436
423
|
report.annotated_object_reports = object_reports
|
|
437
424
|
|
|
438
|
-
|
|
439
|
-
|
|
425
|
+
if report.distribution_values is None:
|
|
426
|
+
report.distribution_values = []
|
|
440
427
|
|
|
441
|
-
|
|
428
|
+
dataset_reports.append(report)
|
|
442
429
|
dataset_name = dataset.info.dataset_name
|
|
443
|
-
bucket_sample = generate_bucket_name(dataset_name, deployment_stage=deployment_stage)
|
|
444
430
|
dataset_info = DatasetDetails(
|
|
445
431
|
name=dataset_name,
|
|
432
|
+
title=dataset.info.dataset_title,
|
|
433
|
+
overview=dataset.info.description,
|
|
446
434
|
version=dataset.info.version,
|
|
447
|
-
s3_bucket_name=bucket_sample,
|
|
448
435
|
dataset_variants=dataset_variants,
|
|
449
436
|
split_annotations_reports=dataset_reports,
|
|
450
|
-
|
|
437
|
+
dataset_updated_at=dataset.info.updated_at,
|
|
451
438
|
dataset_format_version=dataset.info.format_version,
|
|
452
439
|
license_citation=dataset.info.reference_bibtex,
|
|
453
440
|
data_captured_start=dataset_meta_info.get("data_captured_start", None),
|
|
@@ -565,7 +552,7 @@ def create_gallery_images(
|
|
|
565
552
|
gallery_images = None
|
|
566
553
|
if (gallery_image_names is not None) and (len(gallery_image_names) > 0):
|
|
567
554
|
if path_gallery_images is None:
|
|
568
|
-
|
|
555
|
+
path_gallery_images = get_path_dataset_gallery_images(dataset.info.dataset_name)
|
|
569
556
|
path_gallery_images.mkdir(parents=True, exist_ok=True)
|
|
570
557
|
COL_IMAGE_NAME = "image_name"
|
|
571
558
|
samples = dataset.samples.with_columns(
|
|
@@ -57,20 +57,6 @@ def save_pil_image_with_hash_name(image: Image.Image, path_folder: Path, allow_s
|
|
|
57
57
|
def copy_and_rename_file_to_hash_value(path_source: Path, path_dataset_root: Path) -> Path:
|
|
58
58
|
"""
|
|
59
59
|
Copies a file to a dataset root directory with a hash-based name and sub-directory structure.
|
|
60
|
-
|
|
61
|
-
E.g. for an "image.png" with hash "dfe8f3b1c2a4f5b6c7d8e9f0a1b2c3d4", the image will be copied to
|
|
62
|
-
'path_dataset_root / "data" / "dfe" / "dfe8f3b1c2a4f5b6c7d8e9f0a1b2c3d4.png"'
|
|
63
|
-
Notice that the hash is used for both the filename and the subfolder name.
|
|
64
|
-
|
|
65
|
-
Placing image/video files into multiple sub-folders (instead of one large folder) is seemingly
|
|
66
|
-
unnecessary, but it is actually a requirement when the dataset is later downloaded from S3.
|
|
67
|
-
|
|
68
|
-
The reason is that AWS has a rate limit of 3500 ops/sec per prefix (sub-folder) in S3 - meaning we can "only"
|
|
69
|
-
download 3500 files per second from a single folder (prefix) in S3.
|
|
70
|
-
|
|
71
|
-
For even a single user, we found that this limit was being reached when files are stored in single folder (prefix)
|
|
72
|
-
in S3. To support multiple users and concurrent experiments, we are required to separate files into
|
|
73
|
-
multiple sub-folders (prefixes) in S3 to not hit the rate limit.
|
|
74
60
|
"""
|
|
75
61
|
|
|
76
62
|
if not path_source.exists():
|
|
@@ -86,7 +72,7 @@ def copy_and_rename_file_to_hash_value(path_source: Path, path_dataset_root: Pat
|
|
|
86
72
|
|
|
87
73
|
|
|
88
74
|
def relative_path_from_hash(hash: str, suffix: str) -> Path:
|
|
89
|
-
path_file = Path("data") /
|
|
75
|
+
path_file = Path("data") / f"{hash}{suffix}"
|
|
90
76
|
return path_file
|
|
91
77
|
|
|
92
78
|
|
|
@@ -2,6 +2,7 @@ from enum import Enum
|
|
|
2
2
|
from typing import Dict, List, Optional
|
|
3
3
|
|
|
4
4
|
import boto3
|
|
5
|
+
from botocore.exceptions import UnauthorizedSSOTokenError
|
|
5
6
|
from pydantic import BaseModel, field_validator
|
|
6
7
|
|
|
7
8
|
FILENAME_RECIPE_JSON = "recipe.json"
|
|
@@ -21,6 +22,7 @@ class DeploymentStage(Enum):
|
|
|
21
22
|
PRODUCTION = "production"
|
|
22
23
|
|
|
23
24
|
|
|
25
|
+
ARN_PREFIX = "arn:aws:s3:::"
|
|
24
26
|
TAG_IS_SAMPLE = "sample"
|
|
25
27
|
|
|
26
28
|
OPS_REMOVE_CLASS = "__REMOVE__"
|
|
@@ -151,7 +153,14 @@ class AwsCredentials(BaseModel):
|
|
|
151
153
|
"""
|
|
152
154
|
Creates AwsCredentials from a Boto3 session.
|
|
153
155
|
"""
|
|
154
|
-
|
|
156
|
+
try:
|
|
157
|
+
frozen_credentials = session.get_credentials().get_frozen_credentials()
|
|
158
|
+
except UnauthorizedSSOTokenError as e:
|
|
159
|
+
raise RuntimeError(
|
|
160
|
+
f"Failed to get AWS credentials from the session for profile '{session.profile_name}'.\n"
|
|
161
|
+
f"Ensure the profile exists in your AWS config in '~/.aws/config' and that you are logged in via AWS SSO.\n"
|
|
162
|
+
f"\tUse 'aws sso login --profile {session.profile_name}' to log in."
|
|
163
|
+
) from e
|
|
155
164
|
return AwsCredentials(
|
|
156
165
|
access_key=frozen_credentials.access_key,
|
|
157
166
|
secret_key=frozen_credentials.secret_key,
|
|
@@ -159,8 +168,13 @@ class AwsCredentials(BaseModel):
|
|
|
159
168
|
region=session.region_name,
|
|
160
169
|
)
|
|
161
170
|
|
|
162
|
-
|
|
163
|
-
|
|
171
|
+
def to_resource_credentials(self, bucket_name: str) -> "ResourceCredentials":
|
|
172
|
+
"""
|
|
173
|
+
Converts AwsCredentials to ResourceCredentials by adding the S3 ARN.
|
|
174
|
+
"""
|
|
175
|
+
payload = self.model_dump()
|
|
176
|
+
payload["s3_arn"] = f"{ARN_PREFIX}{bucket_name}"
|
|
177
|
+
return ResourceCredentials(**payload)
|
|
164
178
|
|
|
165
179
|
|
|
166
180
|
class ResourceCredentials(AwsCredentials):
|
|
@@ -40,7 +40,7 @@ def mnist_as_hafnia_dataset(force_redownload=False, n_samples: Optional[int] = N
|
|
|
40
40
|
|
|
41
41
|
dataset_info = DatasetInfo(
|
|
42
42
|
dataset_name="mnist",
|
|
43
|
-
version="1.
|
|
43
|
+
version="1.0.0",
|
|
44
44
|
tasks=tasks,
|
|
45
45
|
reference_bibtex=textwrap.dedent("""\
|
|
46
46
|
@article{lecun2010mnist,
|
|
@@ -150,7 +150,7 @@ def cifar_as_hafnia_dataset(
|
|
|
150
150
|
|
|
151
151
|
dataset_info = DatasetInfo(
|
|
152
152
|
dataset_name=dataset_name,
|
|
153
|
-
version="1.
|
|
153
|
+
version="1.0.0",
|
|
154
154
|
tasks=tasks,
|
|
155
155
|
reference_bibtex=textwrap.dedent("""\
|
|
156
156
|
@@TECHREPORT{Krizhevsky09learningmultiple,
|
|
@@ -268,7 +268,10 @@ def _download_and_extract_caltech_dataset(dataset_name: str, force_redownload: b
|
|
|
268
268
|
path_output_extracted = path_tmp_output / "caltech-101"
|
|
269
269
|
for gzip_file in os.listdir(path_output_extracted):
|
|
270
270
|
if gzip_file.endswith(".gz"):
|
|
271
|
-
extract_archive(
|
|
271
|
+
extract_archive(
|
|
272
|
+
from_path=os.path.join(path_output_extracted, gzip_file),
|
|
273
|
+
to_path=path_output_extracted,
|
|
274
|
+
)
|
|
272
275
|
path_org = path_output_extracted / "101_ObjectCategories"
|
|
273
276
|
|
|
274
277
|
elif dataset_name == "caltech-256":
|
|
@@ -12,7 +12,6 @@ from packaging.version import Version
|
|
|
12
12
|
|
|
13
13
|
from hafnia.dataset import dataset_helpers
|
|
14
14
|
from hafnia.dataset.dataset_names import (
|
|
15
|
-
DATASET_FILENAMES_REQUIRED,
|
|
16
15
|
FILENAME_ANNOTATIONS_JSONL,
|
|
17
16
|
FILENAME_ANNOTATIONS_PARQUET,
|
|
18
17
|
FILENAME_DATASET_INFO,
|
|
@@ -38,6 +37,7 @@ from hafnia.dataset.operations import (
|
|
|
38
37
|
from hafnia.dataset.primitives.primitive import Primitive
|
|
39
38
|
from hafnia.log import user_logger
|
|
40
39
|
from hafnia.utils import progress_bar
|
|
40
|
+
from hafnia_cli.config import Config
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
@dataclass
|
|
@@ -434,7 +434,7 @@ class HafniaDataset:
|
|
|
434
434
|
aws_credentials: AwsCredentials,
|
|
435
435
|
force_redownload: bool = False,
|
|
436
436
|
) -> HafniaDataset:
|
|
437
|
-
from hafnia.platform.
|
|
437
|
+
from hafnia.platform.s5cmd_utils import fast_copy_files
|
|
438
438
|
|
|
439
439
|
remote_src_paths = dataset.samples[SampleField.REMOTE_PATH].unique().to_list()
|
|
440
440
|
update_rows = []
|
|
@@ -470,7 +470,7 @@ class HafniaDataset:
|
|
|
470
470
|
return dataset
|
|
471
471
|
|
|
472
472
|
environment_vars = aws_credentials.aws_credentials()
|
|
473
|
-
|
|
473
|
+
fast_copy_files(
|
|
474
474
|
src_paths=remote_src_paths,
|
|
475
475
|
dst_paths=local_dst_paths,
|
|
476
476
|
append_envs=environment_vars,
|
|
@@ -563,7 +563,7 @@ class HafniaDataset:
|
|
|
563
563
|
keep_sample_data=keep_sample_data,
|
|
564
564
|
)
|
|
565
565
|
|
|
566
|
-
def write(self, path_folder: Path,
|
|
566
|
+
def write(self, path_folder: Path, drop_null_cols: bool = True) -> None:
|
|
567
567
|
user_logger.info(f"Writing dataset to {path_folder}...")
|
|
568
568
|
path_folder = path_folder.absolute()
|
|
569
569
|
if not path_folder.exists():
|
|
@@ -578,18 +578,9 @@ class HafniaDataset:
|
|
|
578
578
|
)
|
|
579
579
|
new_paths.append(str(new_path))
|
|
580
580
|
hafnia_dataset.samples = hafnia_dataset.samples.with_columns(pl.Series(new_paths).alias(SampleField.FILE_PATH))
|
|
581
|
-
hafnia_dataset.write_annotations(
|
|
582
|
-
path_folder=path_folder,
|
|
583
|
-
drop_null_cols=drop_null_cols,
|
|
584
|
-
add_version=add_version,
|
|
585
|
-
)
|
|
581
|
+
hafnia_dataset.write_annotations(path_folder=path_folder, drop_null_cols=drop_null_cols)
|
|
586
582
|
|
|
587
|
-
def write_annotations(
|
|
588
|
-
dataset: HafniaDataset,
|
|
589
|
-
path_folder: Path,
|
|
590
|
-
drop_null_cols: bool = True,
|
|
591
|
-
add_version: bool = False,
|
|
592
|
-
) -> None:
|
|
583
|
+
def write_annotations(dataset: HafniaDataset, path_folder: Path, drop_null_cols: bool = True) -> None:
|
|
593
584
|
"""
|
|
594
585
|
Writes only the annotations files (JSONL and Parquet) to the specified folder.
|
|
595
586
|
"""
|
|
@@ -604,18 +595,102 @@ class HafniaDataset:
|
|
|
604
595
|
samples = samples.drop(pl.selectors.by_dtype(pl.Null))
|
|
605
596
|
|
|
606
597
|
# Store only relative paths in the annotations files
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
598
|
+
if SampleField.FILE_PATH in samples.columns: # We drop column for remote datasets
|
|
599
|
+
absolute_paths = samples[SampleField.FILE_PATH].to_list()
|
|
600
|
+
relative_paths = [str(Path(path).relative_to(path_folder)) for path in absolute_paths]
|
|
601
|
+
samples = samples.with_columns(pl.Series(relative_paths).alias(SampleField.FILE_PATH))
|
|
602
|
+
else:
|
|
603
|
+
samples = samples.with_columns(pl.lit("").alias(SampleField.FILE_PATH))
|
|
611
604
|
samples.write_ndjson(path_folder / FILENAME_ANNOTATIONS_JSONL) # Json for readability
|
|
612
605
|
samples.write_parquet(path_folder / FILENAME_ANNOTATIONS_PARQUET) # Parquet for speed
|
|
613
606
|
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
607
|
+
def delete_on_platform(dataset: HafniaDataset, interactive: bool = True) -> None:
|
|
608
|
+
"""
|
|
609
|
+
Delete this dataset from the Hafnia platform.
|
|
610
|
+
This is a thin wrapper around `hafnia.platform.datasets.delete_dataset_completely_by_name`.
|
|
611
|
+
|
|
612
|
+
Args:
|
|
613
|
+
dataset (HafniaDataset): The :class:`HafniaDataset` instance to delete from the platform. The
|
|
614
|
+
dataset name is taken from `dataset.info.dataset_name`.
|
|
615
|
+
interactive (bool): If ``True``, perform the deletion in interactive mode (for example,
|
|
616
|
+
prompting the user for confirmation where supported). If ``False``,
|
|
617
|
+
run non-interactively, suitable for automated scripts or CI usage. Defaults to True.
|
|
618
|
+
"""
|
|
619
|
+
from hafnia.platform.datasets import delete_dataset_completely_by_name
|
|
620
|
+
|
|
621
|
+
delete_dataset_completely_by_name(dataset_name=dataset.info.dataset_name, interactive=interactive)
|
|
622
|
+
|
|
623
|
+
def upload_to_platform(
|
|
624
|
+
dataset: HafniaDataset,
|
|
625
|
+
dataset_sample: Optional[HafniaDataset] = None,
|
|
626
|
+
allow_version_overwrite: bool = False,
|
|
627
|
+
interactive: bool = True,
|
|
628
|
+
gallery_images: Optional[Any] = None,
|
|
629
|
+
distribution_task_names: Optional[List[str]] = None,
|
|
630
|
+
cfg: Optional[Config] = None,
|
|
631
|
+
) -> dict:
|
|
632
|
+
"""
|
|
633
|
+
Upload the dataset and dataset details to the Hafnia platform.
|
|
634
|
+
This method ensures the dataset exists on the platform, synchronizes the
|
|
635
|
+
dataset files to remote storage, and uploads dataset details and optional gallery images
|
|
636
|
+
distributions.
|
|
637
|
+
Args:
|
|
638
|
+
dataset: The full :class:`HafniaDataset` instance that should be uploaded
|
|
639
|
+
to the platform.
|
|
640
|
+
dataset_sample: Optional sample :class:`HafniaDataset` used as a smaller
|
|
641
|
+
preview or subset of the main dataset on the platform. If provided,
|
|
642
|
+
it is uploaded alongside the full dataset for demonstration or
|
|
643
|
+
inspection purposes. Use only this if the sample dataset uses different
|
|
644
|
+
image files than the main dataset. Otherwise it is sufficient to just provide
|
|
645
|
+
the main dataset and the platform will create a sample automatically.
|
|
646
|
+
allow_version_overwrite: If ``True``, allows an existing dataset version
|
|
647
|
+
with the same name to be overwritten on the platform. If ``False``,
|
|
648
|
+
an error or confirmation may be required when a version conflict is
|
|
649
|
+
detected.
|
|
650
|
+
interactive: If ``True``, the upload process may prompt the user for
|
|
651
|
+
confirmation or additional input (for example when overwriting
|
|
652
|
+
existing versions). If ``False``, the upload is performed without
|
|
653
|
+
interactive prompts.
|
|
654
|
+
gallery_images: Optional collection of image identifiers or file names
|
|
655
|
+
that should be marked or displayed as gallery images for the dataset
|
|
656
|
+
on the platform. These are forwarded as ``gallery_image_names`` to
|
|
657
|
+
the platform API.
|
|
658
|
+
distribution_task_names: Optional list of task names associated with the
|
|
659
|
+
dataset that should be considered when configuring how the dataset is
|
|
660
|
+
distributed or exposed on the platform.
|
|
661
|
+
cfg: Optional :class:`hafnia_cli.config.Config` instance providing
|
|
662
|
+
configuration for platform access and storage. If not supplied, a
|
|
663
|
+
default configuration is created.
|
|
664
|
+
Returns:
|
|
665
|
+
dict: The response returned by the platform after uploading the dataset
|
|
666
|
+
details. The exact contents depend on the platform API but typically
|
|
667
|
+
include information about the created or updated dataset (such as
|
|
668
|
+
identifiers and status).
|
|
669
|
+
"""
|
|
670
|
+
|
|
671
|
+
from hafnia.dataset.dataset_details_uploader import upload_dataset_details_to_platform
|
|
672
|
+
from hafnia.dataset.operations.dataset_s3_storage import sync_dataset_files_to_platform
|
|
673
|
+
from hafnia.platform.datasets import get_or_create_dataset
|
|
674
|
+
|
|
675
|
+
cfg = cfg or Config()
|
|
676
|
+
get_or_create_dataset(dataset.info.dataset_name, cfg=cfg)
|
|
677
|
+
|
|
678
|
+
sync_dataset_files_to_platform(
|
|
679
|
+
dataset=dataset,
|
|
680
|
+
sample_dataset=dataset_sample,
|
|
681
|
+
interactive=interactive,
|
|
682
|
+
allow_version_overwrite=allow_version_overwrite,
|
|
683
|
+
cfg=cfg,
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
response = upload_dataset_details_to_platform(
|
|
687
|
+
dataset=dataset,
|
|
688
|
+
distribution_task_names=distribution_task_names,
|
|
689
|
+
gallery_image_names=gallery_images,
|
|
690
|
+
cfg=cfg,
|
|
691
|
+
)
|
|
692
|
+
|
|
693
|
+
return response
|
|
619
694
|
|
|
620
695
|
def __eq__(self, value) -> bool:
|
|
621
696
|
if not isinstance(value, HafniaDataset):
|
|
@@ -51,7 +51,7 @@ class TaskInfo(BaseModel):
|
|
|
51
51
|
return self.class_names.index(class_name)
|
|
52
52
|
|
|
53
53
|
# The 'primitive'-field of type 'Type[Primitive]' is not supported by pydantic out-of-the-box as
|
|
54
|
-
# the 'Primitive' class is an abstract base class and for the actual
|
|
54
|
+
# the 'Primitive' class is an abstract base class and for the actual primitives such as Bbox, Bitmask, Classification.
|
|
55
55
|
# Below magic functions ('ensure_primitive' and 'serialize_primitive') ensures that the 'primitive' field can
|
|
56
56
|
# correctly validate and serialize sub-classes (Bbox, Classification, ...).
|
|
57
57
|
@field_validator("primitive", mode="plain")
|
|
@@ -103,6 +103,8 @@ class TaskInfo(BaseModel):
|
|
|
103
103
|
class DatasetInfo(BaseModel):
|
|
104
104
|
dataset_name: str = Field(description="Name of the dataset, e.g. 'coco'")
|
|
105
105
|
version: Optional[str] = Field(default=None, description="Version of the dataset")
|
|
106
|
+
dataset_title: Optional[str] = Field(default=None, description="Optional, human-readable title of the dataset")
|
|
107
|
+
description: Optional[str] = Field(default=None, description="Optional, description of the dataset")
|
|
106
108
|
tasks: List[TaskInfo] = Field(default=None, description="List of tasks in the dataset")
|
|
107
109
|
reference_bibtex: Optional[str] = Field(
|
|
108
110
|
default=None,
|