hafnia 0.4.1__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/Dockerfile +4 -1
- {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/build.yaml +5 -1
- {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/ci_cd.yaml +7 -6
- {hafnia-0.4.1 → hafnia-0.4.3}/.vscode/launch.json +6 -6
- {hafnia-0.4.1 → hafnia-0.4.3}/PKG-INFO +2 -2
- {hafnia-0.4.1 → hafnia-0.4.3}/README.md +1 -1
- {hafnia-0.4.1 → hafnia-0.4.3}/examples/example_hafnia_dataset.py +11 -6
- {hafnia-0.4.1 → hafnia-0.4.3}/examples/example_logger.py +1 -1
- {hafnia-0.4.1 → hafnia-0.4.3}/pyproject.toml +3 -3
- hafnia-0.4.1/src/hafnia/dataset/dataset_upload_helper.py → hafnia-0.4.3/src/hafnia/dataset/dataset_details_uploader.py +115 -192
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/dataset_names.py +26 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/dataset_recipe/dataset_recipe.py +3 -3
- hafnia-0.4.3/src/hafnia/dataset/format_conversions/format_coco.py +490 -0
- hafnia-0.4.3/src/hafnia/dataset/format_conversions/format_helpers.py +33 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/format_conversions/format_image_classification_folder.py +95 -14
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/format_conversions/format_yolo.py +115 -25
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/format_conversions/torchvision_datasets.py +10 -8
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/hafnia_dataset.py +20 -466
- hafnia-0.4.3/src/hafnia/dataset/hafnia_dataset_types.py +477 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/license_types.py +4 -4
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/operations/dataset_stats.py +3 -3
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/operations/dataset_transformations.py +14 -17
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/operations/table_transformations.py +20 -13
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/bbox.py +6 -2
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/bitmask.py +21 -46
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/classification.py +1 -1
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/polygon.py +43 -2
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/primitive.py +1 -1
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/segmentation.py +1 -1
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/experiment/hafnia_logger.py +13 -4
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/platform/datasets.py +3 -4
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/torch_helpers.py +48 -4
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/utils.py +35 -1
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/visualizations/image_visualizations.py +3 -1
- {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/__main__.py +2 -2
- {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/config.py +2 -2
- {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/dataset_cmds.py +2 -2
- {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/dataset_recipe_cmds.py +1 -1
- {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/experiment_cmds.py +1 -1
- {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/profile_cmds.py +2 -2
- {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/runc_cmds.py +1 -1
- {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/trainer_package_cmds.py +2 -2
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/conftest.py +7 -1
- hafnia-0.4.3/tests/data/dataset_formats/format_coco_roboflow/train/000000000724.jpg +0 -0
- hafnia-0.4.3/tests/data/dataset_formats/format_coco_roboflow/train/_annotations.coco.json +2967 -0
- hafnia-0.4.3/tests/data/dataset_formats/format_coco_roboflow/valid/_annotations.coco.json +1423 -0
- hafnia-0.4.3/tests/data/dataset_formats/format_yolo/train/data/000000000139.jpg +0 -0
- hafnia-0.4.3/tests/data/dataset_formats/format_yolo/train/data/000000000285.jpg +0 -0
- {hafnia-0.4.1/tests/data/dataset_formats/format_yolo → hafnia-0.4.3/tests/data/dataset_formats/format_yolo/train}/images.txt +0 -1
- hafnia-0.4.3/tests/data/dataset_formats/format_yolo/validation/data/000000000632.jpg +0 -0
- hafnia-0.4.3/tests/data/dataset_formats/format_yolo/validation/images.txt +1 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/dataset_image_metadata_schema.yaml +19 -4
- hafnia-0.4.3/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[polygon].png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_as_ints].png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_compressed_bytes].png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_format_coco/test_convert_segmentation_to_rle_list[rle_compressed_str].png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_format_coco/test_from_coco_format_visualized.png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_format_coco/test_to_coco_format_visualized.png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_format_yolo/test_format_yolo_import_export_tiny_dataset.png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_samples/test_check_dataset[coco-2017].png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_samples/test_check_dataset[tiny-dataset].png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[coco-2017].png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[tiny-dataset].png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-coco-2017].png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-tiny-dataset].png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-coco-2017].png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-tiny-dataset].png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_visualizations/test_mask_region[micro-coco-2017].png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_visualizations/test_mask_region[micro-tiny-dataset].png +0 -0
- hafnia-0.4.3/tests/data/expected_images/test_visualizations/test_polygon_to_bitmask_conversion.png +0 -0
- hafnia-0.4.3/tests/data/micro_test_datasets/micro-coco-2017/annotations.jsonl +3 -0
- hafnia-0.4.3/tests/data/micro_test_datasets/micro-coco-2017/annotations.parquet +0 -0
- hafnia-0.4.3/tests/data/micro_test_datasets/micro-coco-2017/data/253/253925d334c002ce6662d8133535dd4c.jpg +0 -0
- hafnia-0.4.3/tests/data/micro_test_datasets/micro-coco-2017/data/b1a/b1a09f4d922f8f6904bab0c1caf172ab.jpg +0 -0
- hafnia-0.4.3/tests/data/micro_test_datasets/micro-coco-2017/data/f67/f675c8a1e862b5e00203ab888ac7fff4.jpg +0 -0
- hafnia-0.4.3/tests/data/micro_test_datasets/micro-coco-2017/dataset_info.json +184 -0
- hafnia-0.4.3/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.jsonl +3 -0
- hafnia-0.4.3/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.parquet +0 -0
- hafnia-0.4.3/tests/data/micro_test_datasets/micro-tiny-dataset/data/25c/25c3a206e7b60ab50245ee3d52d97f11.png +0 -0
- hafnia-0.4.3/tests/data/micro_test_datasets/micro-tiny-dataset/data/962/962fd865fdd45f169d5ca8c8f284d68d.png +0 -0
- hafnia-0.4.3/tests/data/micro_test_datasets/micro-tiny-dataset/data/ec6/ec60f2f4fb854b59c97e16b45c713de0.png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/micro_test_datasets/micro-tiny-dataset/dataset_info.json +17 -1
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/helper_testing.py +18 -6
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/integration/test_cli_integration.py +1 -1
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/integration/test_dataset_recipes_with_platform.py +1 -1
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/integration/test_samples.py +9 -3
- hafnia-0.4.3/tests/unit/dataset/format_conversions/test_format_coco.py +153 -0
- hafnia-0.4.1/tests/unit/dataset/format_conversions/test_image_classification_directory.py → hafnia-0.4.3/tests/unit/dataset/format_conversions/test_format_image_classification_folder.py +6 -9
- hafnia-0.4.3/tests/unit/dataset/format_conversions/test_format_yolo.py +102 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/operations/test_dataset_transformations.py +2 -1
- hafnia-0.4.3/tests/unit/dataset/test_dataset_details_uploader.py +29 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/test_dataset_names.py +1 -1
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/test_hafnia_dataset.py +3 -19
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/test_shape_primitives.py +2 -2
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/test_cli.py +9 -9
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/test_hafnia_logger.py +2 -2
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/test_visualizations.py +15 -14
- {hafnia-0.4.1 → hafnia-0.4.3}/uv.lock +1 -1
- hafnia-0.4.1/tests/data/expected_images/test_format_yolo/test_format_yolo_import_export_tiny_dataset.png +0 -0
- hafnia-0.4.1/tests/data/expected_images/test_samples/test_check_dataset[coco-2017].png +0 -0
- hafnia-0.4.1/tests/data/expected_images/test_samples/test_check_dataset[tiny-dataset].png +0 -0
- hafnia-0.4.1/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[coco-2017].png +0 -0
- hafnia-0.4.1/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[tiny-dataset].png +0 -0
- hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-coco-2017].png +0 -0
- hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_blur_anonymization[micro-tiny-dataset].png +0 -0
- hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-coco-2017].png +0 -0
- hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_draw_annotations[micro-tiny-dataset].png +0 -0
- hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_mask_region[micro-coco-2017].png +0 -0
- hafnia-0.4.1/tests/data/expected_images/test_visualizations/test_mask_region[micro-tiny-dataset].png +0 -0
- hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/annotations.jsonl +0 -3
- hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/annotations.parquet +0 -0
- hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/data/657/657dff54d5175e2ae9f4b9629cf57646.jpg +0 -0
- hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/data/825/825fa2d2d9416694b8e81a47ca38f580.jpg +0 -0
- hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/data/aa3/aa3cc40b5cde88e5bd189c0b3e6c223c.jpg +0 -0
- hafnia-0.4.1/tests/data/micro_test_datasets/micro-coco-2017/dataset_info.json +0 -325
- hafnia-0.4.1/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.jsonl +0 -3
- hafnia-0.4.1/tests/data/micro_test_datasets/micro-tiny-dataset/annotations.parquet +0 -0
- hafnia-0.4.1/tests/data/micro_test_datasets/micro-tiny-dataset/data/2da/2da1d8dbf2b60bdab8dff1d7f5c2dfb5.png +0 -0
- hafnia-0.4.1/tests/data/micro_test_datasets/micro-tiny-dataset/data/3dd/3ddec2275a02e79e3251d85443622e4c.png +0 -0
- hafnia-0.4.1/tests/data/micro_test_datasets/micro-tiny-dataset/data/4d8/4d8450b045e60e8f3657ababa44af9b6.png +0 -0
- hafnia-0.4.1/tests/unit/dataset/format_conversions/test_format_yolo.py +0 -85
- {hafnia-0.4.1 → hafnia-0.4.3}/.devcontainer/devcontainer.json +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/.devcontainer/hooks/post_create +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/.github/dependabot.yaml +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/check_release.yaml +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/lint.yaml +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/publish_docker.yaml +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/publish_pypi.yaml +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/.github/workflows/tests.yaml +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/.gitignore +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/.pre-commit-config.yaml +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/.python-version +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/.trivyignore +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/.vscode/extensions.json +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/.vscode/settings.json +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/LICENSE +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/docs/cli.md +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/docs/release.md +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/examples/example_dataset_recipe.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/examples/example_torchvision_dataloader.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/__init__.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/data/__init__.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/data/factory.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/dataset_helpers.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/dataset_recipe/recipe_transforms.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/dataset_recipe/recipe_types.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/__init__.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/point.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/dataset/primitives/utils.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/experiment/__init__.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/http.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/log.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/platform/__init__.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/platform/builder.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/platform/dataset_recipe.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/platform/download.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/platform/experiment.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/platform/trainer_package.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/src/hafnia/visualizations/colors.py +0 -0
- {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/__init__.py +0 -0
- {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/consts.py +0 -0
- {hafnia-0.4.1/src/cli → hafnia-0.4.3/src/hafnia_cli}/keychain.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/__init__.py +0 -0
- {hafnia-0.4.1/tests/data/dataset_formats/format_yolo/data → hafnia-0.4.3/tests/data/dataset_formats/format_coco_roboflow/train}/000000000632.jpg +0 -0
- {hafnia-0.4.1/tests/data/dataset_formats/format_yolo/data → hafnia-0.4.3/tests/data/dataset_formats/format_coco_roboflow/valid}/000000000139.jpg +0 -0
- {hafnia-0.4.1/tests/data/dataset_formats/format_yolo/data → hafnia-0.4.3/tests/data/dataset_formats/format_coco_roboflow/valid}/000000000285.jpg +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/dataset_formats/format_yolo/obj.names +0 -0
- {hafnia-0.4.1/tests/data/dataset_formats/format_yolo → hafnia-0.4.3/tests/data/dataset_formats/format_yolo/train}/data/000000000139.txt +0 -0
- {hafnia-0.4.1/tests/data/dataset_formats/format_yolo → hafnia-0.4.3/tests/data/dataset_formats/format_yolo/train}/data/000000000285.txt +0 -0
- {hafnia-0.4.1/tests/data/dataset_formats/format_yolo → hafnia-0.4.3/tests/data/dataset_formats/format_yolo/validation}/data/000000000632.txt +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_dataset_transformations/test_video_storage_format_read_image.png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_format_yolo/test_import_yolo_format_visualized.png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_check_dataset[caltech-256].png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_check_dataset[cifar100].png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_check_dataset[cifar10].png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_check_dataset[midwest-vehicle-detection].png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_check_dataset[mnist].png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-101].png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-256].png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar100].png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar10].png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[midwest-vehicle-detection].png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[mnist].png +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/integration/test_check_example_scripts.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/integration/test_dataset_merges.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/integration/test_torchvision_datasets.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/dataset_recipe/test_dataset_recipe_helpers.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/dataset_recipe/test_dataset_recipes.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/dataset_recipe/test_recipe_transformations.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/operations/test_dataset_stats.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/operations/test_table_transformations.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/test_colors.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/dataset/test_dataset_helpers.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/test_builder.py +0 -0
- {hafnia-0.4.1 → hafnia-0.4.3}/tests/unit/test_utils.py +0 -0
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
ARG PYTHON_VERSION
|
|
2
2
|
FROM python:${PYTHON_VERSION}-slim
|
|
3
|
+
|
|
4
|
+
RUN apt-get update && apt-get install -y pigz && rm -rf /var/lib/apt/lists/*
|
|
5
|
+
|
|
3
6
|
WORKDIR /opt/ml/processing
|
|
4
7
|
|
|
5
8
|
COPY dist/*.whl .
|
|
@@ -8,4 +11,4 @@ RUN pip install --no-cache-dir *.whl && \
|
|
|
8
11
|
|
|
9
12
|
RUN mkdir -p /opt/ml/processing/input \
|
|
10
13
|
/opt/ml/processing/output \
|
|
11
|
-
/opt/ml/processing/tmp
|
|
14
|
+
/opt/ml/processing/tmp
|
|
@@ -6,6 +6,10 @@ on:
|
|
|
6
6
|
python-version-file:
|
|
7
7
|
required: true
|
|
8
8
|
type: string
|
|
9
|
+
pyproject-toml-file:
|
|
10
|
+
required: false
|
|
11
|
+
type: string
|
|
12
|
+
default: "pyproject.toml"
|
|
9
13
|
outputs:
|
|
10
14
|
package-version:
|
|
11
15
|
description: "The extracted package version"
|
|
@@ -25,7 +29,7 @@ jobs:
|
|
|
25
29
|
- name: Extract package version
|
|
26
30
|
id: extract-version
|
|
27
31
|
run: |
|
|
28
|
-
VERSION=$(grep -m 1 'version = ' ${{ inputs.
|
|
32
|
+
VERSION=$(grep -m 1 'version = ' ${{ inputs.pyproject-toml-file }} | sed -e 's/version = "\(.*\)"/\1/')
|
|
29
33
|
echo "package_version=$VERSION" >> $GITHUB_OUTPUT
|
|
30
34
|
|
|
31
35
|
- name: Install uv
|
|
@@ -5,14 +5,14 @@ on:
|
|
|
5
5
|
branches: [main]
|
|
6
6
|
pull_request:
|
|
7
7
|
branches: [main]
|
|
8
|
-
paths: ["src/**", "tests/**", "pyproject.toml"]
|
|
8
|
+
paths: ["src/**", "tests/**", "pyproject.toml", ".python-version", "uv.lock", ".github/workflows/**"]
|
|
9
9
|
|
|
10
10
|
jobs:
|
|
11
11
|
lint:
|
|
12
12
|
name: Lint Code
|
|
13
13
|
uses: ./.github/workflows/lint.yaml
|
|
14
14
|
with:
|
|
15
|
-
python-version-file: "
|
|
15
|
+
python-version-file: ".python-version"
|
|
16
16
|
|
|
17
17
|
security-scan:
|
|
18
18
|
name: Security Scan
|
|
@@ -36,14 +36,15 @@ jobs:
|
|
|
36
36
|
secrets: inherit
|
|
37
37
|
uses: ./.github/workflows/tests.yaml
|
|
38
38
|
with:
|
|
39
|
-
python-version-file: "
|
|
39
|
+
python-version-file: ".python-version"
|
|
40
40
|
|
|
41
41
|
build:
|
|
42
42
|
name: Build Package
|
|
43
43
|
needs: [test, security-scan]
|
|
44
44
|
uses: ./.github/workflows/build.yaml
|
|
45
45
|
with:
|
|
46
|
-
python-version-file: "
|
|
46
|
+
python-version-file: ".python-version"
|
|
47
|
+
pyproject-toml-file: "pyproject.toml"
|
|
47
48
|
|
|
48
49
|
publish-docker-staging:
|
|
49
50
|
name: Publish Docker Image to Staging
|
|
@@ -52,7 +53,7 @@ jobs:
|
|
|
52
53
|
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
53
54
|
uses: ./.github/workflows/publish_docker.yaml
|
|
54
55
|
with:
|
|
55
|
-
python-version-file: "
|
|
56
|
+
python-version-file: ".python-version"
|
|
56
57
|
package-version: ${{ needs.build.outputs.package-version }}
|
|
57
58
|
environment: "staging"
|
|
58
59
|
|
|
@@ -80,7 +81,7 @@ jobs:
|
|
|
80
81
|
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
81
82
|
uses: ./.github/workflows/publish_docker.yaml
|
|
82
83
|
with:
|
|
83
|
-
python-version-file: "
|
|
84
|
+
python-version-file: ".python-version"
|
|
84
85
|
package-version: ${{ needs.build.outputs.package-version }}
|
|
85
86
|
environment: "production"
|
|
86
87
|
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"name": "debug (list profiles)",
|
|
16
16
|
"type": "debugpy",
|
|
17
17
|
"request": "launch",
|
|
18
|
-
"program": "${workspaceFolder}/src/
|
|
18
|
+
"program": "${workspaceFolder}/src/hafnia_cli/__main__.py",
|
|
19
19
|
"args": [
|
|
20
20
|
"profile",
|
|
21
21
|
"ls"
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
"name": "cmd: hafnia runc launch-local",
|
|
26
26
|
"type": "debugpy",
|
|
27
27
|
"request": "launch",
|
|
28
|
-
"program": "${workspaceFolder}/src/
|
|
28
|
+
"program": "${workspaceFolder}/src/hafnia_cli/__main__.py",
|
|
29
29
|
"args": [
|
|
30
30
|
"runc",
|
|
31
31
|
"launch-local",
|
|
@@ -38,7 +38,7 @@
|
|
|
38
38
|
"name": "cmd: hafnia runc build-local",
|
|
39
39
|
"type": "debugpy",
|
|
40
40
|
"request": "launch",
|
|
41
|
-
"program": "${workspaceFolder}/src/
|
|
41
|
+
"program": "${workspaceFolder}/src/hafnia_cli/__main__.py",
|
|
42
42
|
"args": [
|
|
43
43
|
"runc",
|
|
44
44
|
"build-local",
|
|
@@ -51,7 +51,7 @@
|
|
|
51
51
|
"name": "cmd: 'hafnia dataset [X]'",
|
|
52
52
|
"type": "debugpy",
|
|
53
53
|
"request": "launch",
|
|
54
|
-
"program": "${workspaceFolder}/src/
|
|
54
|
+
"program": "${workspaceFolder}/src/hafnia_cli/__main__.py",
|
|
55
55
|
"args": [
|
|
56
56
|
"dataset",
|
|
57
57
|
//"ls",
|
|
@@ -64,7 +64,7 @@
|
|
|
64
64
|
"name": "cmd: 'hafnia experiment [X]'",
|
|
65
65
|
"type": "debugpy",
|
|
66
66
|
"request": "launch",
|
|
67
|
-
"program": "${workspaceFolder}/src/
|
|
67
|
+
"program": "${workspaceFolder}/src/hafnia_cli/__main__.py",
|
|
68
68
|
"args": [
|
|
69
69
|
"experiment",
|
|
70
70
|
"create",
|
|
@@ -82,7 +82,7 @@
|
|
|
82
82
|
"name": "cmd: 'hafnia train-recipe [X]'",
|
|
83
83
|
"type": "debugpy",
|
|
84
84
|
"request": "launch",
|
|
85
|
-
"program": "${workspaceFolder}/src/
|
|
85
|
+
"program": "${workspaceFolder}/src/hafnia_cli/__main__.py",
|
|
86
86
|
"args": [
|
|
87
87
|
"trainer",
|
|
88
88
|
"ls"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hafnia
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: Python SDK for communication with Hafnia platform.
|
|
5
5
|
Author-email: Milestone Systems <hafniaplatform@milestone.dk>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -343,7 +343,7 @@ batch_size = 128
|
|
|
343
343
|
learning_rate = 0.001
|
|
344
344
|
|
|
345
345
|
# Initialize Hafnia logger
|
|
346
|
-
logger = HafniaLogger()
|
|
346
|
+
logger = HafniaLogger(project_name="my_classification_project")
|
|
347
347
|
|
|
348
348
|
# Log experiment parameters
|
|
349
349
|
logger.log_configuration({"batch_size": 128, "learning_rate": 0.001})
|
|
@@ -314,7 +314,7 @@ batch_size = 128
|
|
|
314
314
|
learning_rate = 0.001
|
|
315
315
|
|
|
316
316
|
# Initialize Hafnia logger
|
|
317
|
-
logger = HafniaLogger()
|
|
317
|
+
logger = HafniaLogger(project_name="my_classification_project")
|
|
318
318
|
|
|
319
319
|
# Log experiment parameters
|
|
320
320
|
logger.log_configuration({"batch_size": 128, "learning_rate": 0.001})
|
|
@@ -6,7 +6,8 @@ from PIL import Image
|
|
|
6
6
|
from rich import print as rprint
|
|
7
7
|
|
|
8
8
|
from hafnia.dataset.dataset_names import SplitName
|
|
9
|
-
from hafnia.dataset.hafnia_dataset import
|
|
9
|
+
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
10
|
+
from hafnia.dataset.hafnia_dataset_types import DatasetInfo, Sample, TaskInfo
|
|
10
11
|
from hafnia.dataset.primitives.bbox import Bbox
|
|
11
12
|
from hafnia.dataset.primitives.bitmask import Bitmask
|
|
12
13
|
from hafnia.dataset.primitives.classification import Classification
|
|
@@ -87,14 +88,18 @@ dataset.write(path_dataset)
|
|
|
87
88
|
dataset_again = HafniaDataset.from_path(path_dataset)
|
|
88
89
|
|
|
89
90
|
## Dataset importers and exporters ##
|
|
90
|
-
|
|
91
|
+
dataset_od = HafniaDataset.from_name("coco-2017").select_samples(n_samples=5, seed=42)
|
|
92
|
+
|
|
93
|
+
# Export/import dataset to YOLO format
|
|
91
94
|
path_yolo_format = Path(".data/tmp/yolo_dataset")
|
|
95
|
+
dataset_od.to_yolo_format(path_output=path_yolo_format) # Export to YOLO format
|
|
96
|
+
dataset_od_imported = HafniaDataset.from_yolo_format(path_yolo_format) # Import dataset from YOLO format
|
|
92
97
|
|
|
93
|
-
# Export dataset to YOLO format
|
|
94
|
-
dataset_coco.to_yolo_format(path_export_yolo_dataset=path_yolo_format)
|
|
95
98
|
|
|
96
|
-
#
|
|
97
|
-
|
|
99
|
+
# Export/import dataset to YOLO format
|
|
100
|
+
path_coco_format = Path(".data/tmp/coco_dataset")
|
|
101
|
+
dataset_od.to_coco_format(path_output=path_coco_format) # Export to COCO format
|
|
102
|
+
dataset_od_imported = HafniaDataset.from_coco_format(path_coco_format) # Import dataset from COCO format
|
|
98
103
|
|
|
99
104
|
## Custom dataset operations and statistics ##
|
|
100
105
|
# Want custom dataset transformations or statistics? Use the polars table (dataset.samples) directly
|
|
@@ -4,7 +4,7 @@ batch_size = 128
|
|
|
4
4
|
learning_rate = 0.001
|
|
5
5
|
|
|
6
6
|
# Initialize Hafnia logger
|
|
7
|
-
logger = HafniaLogger()
|
|
7
|
+
logger = HafniaLogger(project_name="example_classification_project")
|
|
8
8
|
|
|
9
9
|
# Log experiment parameters
|
|
10
10
|
logger.log_configuration({"batch_size": 128, "learning_rate": 0.001})
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "hafnia"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.3"
|
|
4
4
|
description = "Python SDK for communication with Hafnia platform."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -44,14 +44,14 @@ dev = [
|
|
|
44
44
|
test = ["pytest>=8.3.4", "pre-commit>=4.2.0", "ruff>=0.9.1"]
|
|
45
45
|
|
|
46
46
|
[project.scripts]
|
|
47
|
-
hafnia = '
|
|
47
|
+
hafnia = 'hafnia_cli.__main__:main'
|
|
48
48
|
|
|
49
49
|
[build-system]
|
|
50
50
|
requires = ["hatchling"]
|
|
51
51
|
build-backend = "hatchling.build"
|
|
52
52
|
|
|
53
53
|
[tool.hatch.build.targets.wheel]
|
|
54
|
-
packages = ["src/
|
|
54
|
+
packages = ["src/hafnia_cli", "src/hafnia"]
|
|
55
55
|
|
|
56
56
|
[tool.uv]
|
|
57
57
|
default-groups = ["test"]
|
|
@@ -11,8 +11,6 @@ import polars as pl
|
|
|
11
11
|
from PIL import Image
|
|
12
12
|
from pydantic import BaseModel, ConfigDict, field_validator
|
|
13
13
|
|
|
14
|
-
from cli.config import Config
|
|
15
|
-
from hafnia.dataset import primitives
|
|
16
14
|
from hafnia.dataset.dataset_names import (
|
|
17
15
|
DatasetVariant,
|
|
18
16
|
DeploymentStage,
|
|
@@ -20,7 +18,8 @@ from hafnia.dataset.dataset_names import (
|
|
|
20
18
|
SampleField,
|
|
21
19
|
SplitName,
|
|
22
20
|
)
|
|
23
|
-
from hafnia.dataset.hafnia_dataset import
|
|
21
|
+
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
22
|
+
from hafnia.dataset.hafnia_dataset_types import Attribution, Sample, TaskInfo
|
|
24
23
|
from hafnia.dataset.operations import table_transformations
|
|
25
24
|
from hafnia.dataset.primitives import (
|
|
26
25
|
Bbox,
|
|
@@ -33,6 +32,7 @@ from hafnia.dataset.primitives.primitive import Primitive
|
|
|
33
32
|
from hafnia.http import post
|
|
34
33
|
from hafnia.log import user_logger
|
|
35
34
|
from hafnia.platform.datasets import get_dataset_id
|
|
35
|
+
from hafnia_cli.config import Config
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
def generate_bucket_name(dataset_name: str, deployment_stage: DeploymentStage) -> str:
|
|
@@ -41,7 +41,7 @@ def generate_bucket_name(dataset_name: str, deployment_stage: DeploymentStage) -
|
|
|
41
41
|
return f"mdi-{deployment_stage.value}-{dataset_name}"
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
class
|
|
44
|
+
class DatasetDetails(BaseModel, validate_assignment=True): # type: ignore[call-arg]
|
|
45
45
|
model_config = ConfigDict(use_enum_values=True) # To parse Enum values as strings
|
|
46
46
|
name: str
|
|
47
47
|
data_captured_start: Optional[datetime] = None
|
|
@@ -150,14 +150,6 @@ class DbAnnotationType(BaseModel, validate_assignment=True): # type: ignore[cal
|
|
|
150
150
|
name: str
|
|
151
151
|
|
|
152
152
|
|
|
153
|
-
class AnnotationType(Enum):
|
|
154
|
-
ImageClassification = "Image Classification"
|
|
155
|
-
ObjectDetection = "Object Detection"
|
|
156
|
-
SegmentationMask = "Segmentation Mask"
|
|
157
|
-
ImageCaptioning = "Image Captioning"
|
|
158
|
-
InstanceSegmentation = "Instance Segmentation"
|
|
159
|
-
|
|
160
|
-
|
|
161
153
|
class DbResolution(BaseModel, validate_assignment=True): # type: ignore[call-arg]
|
|
162
154
|
height: int
|
|
163
155
|
width: int
|
|
@@ -289,7 +281,7 @@ def get_folder_size(path: Path) -> int:
|
|
|
289
281
|
return sum([path.stat().st_size for path in path.rglob("*")])
|
|
290
282
|
|
|
291
283
|
|
|
292
|
-
def upload_to_hafnia_dataset_detail_page(dataset_update:
|
|
284
|
+
def upload_to_hafnia_dataset_detail_page(dataset_update: DatasetDetails, upload_gallery_images: bool) -> dict:
|
|
293
285
|
if not upload_gallery_images:
|
|
294
286
|
dataset_update.imgs = None
|
|
295
287
|
|
|
@@ -322,18 +314,6 @@ def get_resolutions(dataset: HafniaDataset, max_resolutions_selected: int = 8) -
|
|
|
322
314
|
return resolutions
|
|
323
315
|
|
|
324
316
|
|
|
325
|
-
def has_primitive(dataset: Union[HafniaDataset, pl.DataFrame], PrimitiveType: Type[Primitive]) -> bool:
|
|
326
|
-
col_name = PrimitiveType.column_name()
|
|
327
|
-
table = dataset.samples if isinstance(dataset, HafniaDataset) else dataset
|
|
328
|
-
if col_name not in table.columns:
|
|
329
|
-
return False
|
|
330
|
-
|
|
331
|
-
if table[col_name].dtype == pl.Null:
|
|
332
|
-
return False
|
|
333
|
-
|
|
334
|
-
return True
|
|
335
|
-
|
|
336
|
-
|
|
337
317
|
def calculate_distribution_values(
|
|
338
318
|
dataset_split: pl.DataFrame, distribution_tasks: Optional[List[TaskInfo]]
|
|
339
319
|
) -> List[DbDistributionValue]:
|
|
@@ -378,15 +358,15 @@ def s3_based_fields(bucket_name: str, variant_type: DatasetVariant, session: bot
|
|
|
378
358
|
return last_modified, size
|
|
379
359
|
|
|
380
360
|
|
|
381
|
-
def
|
|
361
|
+
def dataset_details_from_hafnia_dataset(
|
|
382
362
|
dataset: HafniaDataset,
|
|
383
363
|
deployment_stage: DeploymentStage,
|
|
384
364
|
path_sample: Optional[Path],
|
|
385
365
|
path_hidden: Optional[Path],
|
|
386
366
|
path_gallery_images: Optional[Path] = None,
|
|
387
367
|
gallery_image_names: Optional[List[str]] = None,
|
|
388
|
-
distribution_task_names: Optional[List[
|
|
389
|
-
) ->
|
|
368
|
+
distribution_task_names: Optional[List[str]] = None,
|
|
369
|
+
) -> DatasetDetails:
|
|
390
370
|
dataset_variants = []
|
|
391
371
|
dataset_reports = []
|
|
392
372
|
dataset_meta_info = dataset.info.meta or {}
|
|
@@ -448,177 +428,20 @@ def dataset_info_from_dataset(
|
|
|
448
428
|
)
|
|
449
429
|
|
|
450
430
|
object_reports: List[DbAnnotatedObjectReport] = []
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
df_per_instance = table_transformations.create_primitive_table(
|
|
454
|
-
dataset_split, PrimitiveType=Bbox, keep_sample_data=True
|
|
455
|
-
)
|
|
456
|
-
if df_per_instance is None:
|
|
457
|
-
raise ValueError(f"Expected {Bbox.__name__} primitive column to be present in the dataset split.")
|
|
458
|
-
# Calculate area of bounding boxes
|
|
459
|
-
df_per_instance = df_per_instance.with_columns(
|
|
460
|
-
(pl.col("height") * pl.col("width")).alias("area"),
|
|
461
|
-
).with_columns(
|
|
462
|
-
(pl.col("height") * pl.col("image.height")).alias("height_px"),
|
|
463
|
-
(pl.col("width") * pl.col("image.width")).alias("width_px"),
|
|
464
|
-
(pl.col("area") * (pl.col("image.height") * pl.col("image.width"))).alias("area_px"),
|
|
465
|
-
)
|
|
466
|
-
|
|
467
|
-
annotation_type = DbAnnotationType(name=AnnotationType.ObjectDetection.value)
|
|
468
|
-
for (class_name, task_name), class_group in df_per_instance.group_by(
|
|
469
|
-
PrimitiveField.CLASS_NAME, PrimitiveField.TASK_NAME
|
|
470
|
-
):
|
|
471
|
-
if class_name is None:
|
|
472
|
-
continue
|
|
473
|
-
object_reports.append(
|
|
474
|
-
DbAnnotatedObjectReport(
|
|
475
|
-
obj=DbAnnotatedObject(
|
|
476
|
-
name=class_name,
|
|
477
|
-
entity_type=EntityTypeChoices.OBJECT.value,
|
|
478
|
-
annotation_type=annotation_type,
|
|
479
|
-
task_name=task_name,
|
|
480
|
-
),
|
|
481
|
-
unique_obj_ids=class_group[PrimitiveField.OBJECT_ID].n_unique(),
|
|
482
|
-
obj_instances=len(class_group),
|
|
483
|
-
annotation_type=[annotation_type],
|
|
484
|
-
images_with_obj=class_group[SampleField.SAMPLE_INDEX].n_unique(),
|
|
485
|
-
area_avg_ratio=class_group["area"].mean(),
|
|
486
|
-
area_min_ratio=class_group["area"].min(),
|
|
487
|
-
area_max_ratio=class_group["area"].max(),
|
|
488
|
-
height_avg_ratio=class_group["height"].mean(),
|
|
489
|
-
height_min_ratio=class_group["height"].min(),
|
|
490
|
-
height_max_ratio=class_group["height"].max(),
|
|
491
|
-
width_avg_ratio=class_group["width"].mean(),
|
|
492
|
-
width_min_ratio=class_group["width"].min(),
|
|
493
|
-
width_max_ratio=class_group["width"].max(),
|
|
494
|
-
area_avg_px=class_group["area_px"].mean(),
|
|
495
|
-
area_min_px=int(class_group["area_px"].min()),
|
|
496
|
-
area_max_px=int(class_group["area_px"].max()),
|
|
497
|
-
height_avg_px=class_group["height_px"].mean(),
|
|
498
|
-
height_min_px=int(class_group["height_px"].min()),
|
|
499
|
-
height_max_px=int(class_group["height_px"].max()),
|
|
500
|
-
width_avg_px=class_group["width_px"].mean(),
|
|
501
|
-
width_min_px=int(class_group["width_px"].min()),
|
|
502
|
-
width_max_px=int(class_group["width_px"].max()),
|
|
503
|
-
average_count_per_image=len(class_group) / class_group[SampleField.SAMPLE_INDEX].n_unique(),
|
|
504
|
-
)
|
|
505
|
-
)
|
|
506
|
-
|
|
507
|
-
if has_primitive(dataset_split, PrimitiveType=Classification):
|
|
508
|
-
annotation_type = DbAnnotationType(name=AnnotationType.ImageClassification.value)
|
|
509
|
-
col_name = Classification.column_name()
|
|
510
|
-
classification_tasks = [task.name for task in dataset.info.tasks if task.primitive == Classification]
|
|
511
|
-
has_classification_data = dataset_split[col_name].dtype != pl.List(pl.Null)
|
|
512
|
-
if has_classification_data:
|
|
513
|
-
classification_df = dataset_split.select(col_name).explode(col_name).unnest(col_name)
|
|
514
|
-
|
|
515
|
-
# Include only classification tasks that are defined in the dataset info
|
|
516
|
-
classification_df = classification_df.filter(
|
|
517
|
-
pl.col(PrimitiveField.TASK_NAME).is_in(classification_tasks)
|
|
518
|
-
)
|
|
519
|
-
|
|
520
|
-
for (
|
|
521
|
-
task_name,
|
|
522
|
-
class_name,
|
|
523
|
-
), class_group in classification_df.group_by(PrimitiveField.TASK_NAME, PrimitiveField.CLASS_NAME):
|
|
524
|
-
if class_name is None:
|
|
525
|
-
continue
|
|
526
|
-
if task_name == Classification.default_task_name():
|
|
527
|
-
display_name = class_name # Prefix class name with task name
|
|
528
|
-
else:
|
|
529
|
-
display_name = f"{task_name}.{class_name}"
|
|
530
|
-
object_reports.append(
|
|
531
|
-
DbAnnotatedObjectReport(
|
|
532
|
-
obj=DbAnnotatedObject(
|
|
533
|
-
name=display_name,
|
|
534
|
-
entity_type=EntityTypeChoices.EVENT.value,
|
|
535
|
-
annotation_type=annotation_type,
|
|
536
|
-
task_name=task_name,
|
|
537
|
-
),
|
|
538
|
-
unique_obj_ids=len(
|
|
539
|
-
class_group
|
|
540
|
-
), # Unique object IDs are not applicable for classification
|
|
541
|
-
obj_instances=len(class_group),
|
|
542
|
-
annotation_type=[annotation_type],
|
|
543
|
-
)
|
|
544
|
-
)
|
|
545
|
-
|
|
546
|
-
if has_primitive(dataset_split, PrimitiveType=Segmentation):
|
|
547
|
-
raise NotImplementedError("Not Implemented yet")
|
|
548
|
-
|
|
549
|
-
if has_primitive(dataset_split, PrimitiveType=Bitmask):
|
|
550
|
-
col_name = Bitmask.column_name()
|
|
551
|
-
drop_columns = [col for col in primitive_columns if col != col_name]
|
|
552
|
-
drop_columns.append(PrimitiveField.META)
|
|
553
|
-
|
|
554
|
-
df_per_instance = table_transformations.create_primitive_table(
|
|
555
|
-
dataset_split, PrimitiveType=Bitmask, keep_sample_data=True
|
|
556
|
-
)
|
|
557
|
-
if df_per_instance is None:
|
|
558
|
-
raise ValueError(
|
|
559
|
-
f"Expected {Bitmask.__name__} primitive column to be present in the dataset split."
|
|
560
|
-
)
|
|
561
|
-
df_per_instance = df_per_instance.rename({"height": "height_px", "width": "width_px"})
|
|
562
|
-
df_per_instance = df_per_instance.with_columns(
|
|
563
|
-
(pl.col("image.height") * pl.col("image.width") * pl.col("area")).alias("area_px"),
|
|
564
|
-
(pl.col("height_px") / pl.col("image.height")).alias("height"),
|
|
565
|
-
(pl.col("width_px") / pl.col("image.width")).alias("width"),
|
|
566
|
-
)
|
|
567
|
-
|
|
568
|
-
annotation_type = DbAnnotationType(name=AnnotationType.InstanceSegmentation)
|
|
569
|
-
for (class_name, task_name), class_group in df_per_instance.group_by(
|
|
570
|
-
PrimitiveField.CLASS_NAME, PrimitiveField.TASK_NAME
|
|
571
|
-
):
|
|
572
|
-
if class_name is None:
|
|
573
|
-
continue
|
|
574
|
-
object_reports.append(
|
|
575
|
-
DbAnnotatedObjectReport(
|
|
576
|
-
obj=DbAnnotatedObject(
|
|
577
|
-
name=class_name,
|
|
578
|
-
entity_type=EntityTypeChoices.OBJECT.value,
|
|
579
|
-
annotation_type=annotation_type,
|
|
580
|
-
task_name=task_name,
|
|
581
|
-
),
|
|
582
|
-
unique_obj_ids=class_group[PrimitiveField.OBJECT_ID].n_unique(),
|
|
583
|
-
obj_instances=len(class_group),
|
|
584
|
-
annotation_type=[annotation_type],
|
|
585
|
-
average_count_per_image=len(class_group) / class_group[SampleField.SAMPLE_INDEX].n_unique(),
|
|
586
|
-
images_with_obj=class_group[SampleField.SAMPLE_INDEX].n_unique(),
|
|
587
|
-
area_avg_ratio=class_group["area"].mean(),
|
|
588
|
-
area_min_ratio=class_group["area"].min(),
|
|
589
|
-
area_max_ratio=class_group["area"].max(),
|
|
590
|
-
height_avg_ratio=class_group["height"].mean(),
|
|
591
|
-
height_min_ratio=class_group["height"].min(),
|
|
592
|
-
height_max_ratio=class_group["height"].max(),
|
|
593
|
-
width_avg_ratio=class_group["width"].mean(),
|
|
594
|
-
width_min_ratio=class_group["width"].min(),
|
|
595
|
-
width_max_ratio=class_group["width"].max(),
|
|
596
|
-
area_avg_px=class_group["area_px"].mean(),
|
|
597
|
-
area_min_px=int(class_group["area_px"].min()),
|
|
598
|
-
area_max_px=int(class_group["area_px"].max()),
|
|
599
|
-
height_avg_px=class_group["height_px"].mean(),
|
|
600
|
-
height_min_px=int(class_group["height_px"].min()),
|
|
601
|
-
height_max_px=int(class_group["height_px"].max()),
|
|
602
|
-
width_avg_px=class_group["width_px"].mean(),
|
|
603
|
-
width_min_px=int(class_group["width_px"].min()),
|
|
604
|
-
width_max_px=int(class_group["width_px"].max()),
|
|
605
|
-
)
|
|
606
|
-
)
|
|
607
|
-
|
|
608
|
-
if has_primitive(dataset_split, PrimitiveType=Polygon):
|
|
609
|
-
raise NotImplementedError("Not Implemented yet")
|
|
431
|
+
for PrimitiveType in [Classification, Bbox, Bitmask, Polygon, Segmentation]:
|
|
432
|
+
object_reports.extend(create_reports_from_primitive(dataset_split, PrimitiveType=PrimitiveType)) # type: ignore[type-abstract]
|
|
610
433
|
|
|
611
434
|
# Sort object reports by name to more easily compare between versions
|
|
612
435
|
object_reports = sorted(object_reports, key=lambda x: x.obj.name) # Sort object reports by name
|
|
613
436
|
report.annotated_object_reports = object_reports
|
|
614
437
|
|
|
615
|
-
|
|
616
|
-
|
|
438
|
+
if report.distribution_values is None:
|
|
439
|
+
report.distribution_values = []
|
|
617
440
|
|
|
618
|
-
|
|
441
|
+
dataset_reports.append(report)
|
|
619
442
|
dataset_name = dataset.info.dataset_name
|
|
620
443
|
bucket_sample = generate_bucket_name(dataset_name, deployment_stage=deployment_stage)
|
|
621
|
-
dataset_info =
|
|
444
|
+
dataset_info = DatasetDetails(
|
|
622
445
|
name=dataset_name,
|
|
623
446
|
version=dataset.info.version,
|
|
624
447
|
s3_bucket_name=bucket_sample,
|
|
@@ -639,6 +462,101 @@ def dataset_info_from_dataset(
|
|
|
639
462
|
return dataset_info
|
|
640
463
|
|
|
641
464
|
|
|
465
|
+
def create_reports_from_primitive(
|
|
466
|
+
dataset_split: pl.DataFrame, PrimitiveType: Type[Primitive]
|
|
467
|
+
) -> List[DbAnnotatedObjectReport]:
|
|
468
|
+
if not table_transformations.has_primitive(dataset_split, PrimitiveType=PrimitiveType):
|
|
469
|
+
return []
|
|
470
|
+
|
|
471
|
+
if PrimitiveType == Segmentation:
|
|
472
|
+
raise NotImplementedError("Not Implemented yet")
|
|
473
|
+
|
|
474
|
+
df_per_instance = table_transformations.create_primitive_table(
|
|
475
|
+
dataset_split, PrimitiveType=PrimitiveType, keep_sample_data=True
|
|
476
|
+
)
|
|
477
|
+
if df_per_instance is None:
|
|
478
|
+
raise ValueError(f"Expected {PrimitiveType.__name__} primitive column to be present in the dataset split.")
|
|
479
|
+
|
|
480
|
+
entity_type = EntityTypeChoices.OBJECT.value
|
|
481
|
+
if PrimitiveType == Classification:
|
|
482
|
+
entity_type = EntityTypeChoices.EVENT.value
|
|
483
|
+
|
|
484
|
+
if PrimitiveType == Bbox:
|
|
485
|
+
df_per_instance = df_per_instance.with_columns(area=pl.col("height") * pl.col("width"))
|
|
486
|
+
|
|
487
|
+
if PrimitiveType == Bitmask:
|
|
488
|
+
# width and height are in pixel format for Bitmask convert to ratio
|
|
489
|
+
df_per_instance = df_per_instance.with_columns(
|
|
490
|
+
width=pl.col("width") / pl.col("image.width"),
|
|
491
|
+
height=pl.col("height") / pl.col("image.height"),
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
has_height_field = "height" in df_per_instance.columns and df_per_instance["height"].dtype != pl.Null
|
|
495
|
+
if has_height_field:
|
|
496
|
+
df_per_instance = df_per_instance.with_columns(
|
|
497
|
+
height_px=pl.col("height") * pl.col("image.height"),
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
has_width_field = "width" in df_per_instance.columns and df_per_instance["width"].dtype != pl.Null
|
|
501
|
+
if has_width_field:
|
|
502
|
+
df_per_instance = df_per_instance.with_columns(
|
|
503
|
+
width_px=pl.col("width") * pl.col("image.width"),
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
has_area_field = "area" in df_per_instance.columns and df_per_instance["area"].dtype != pl.Null
|
|
507
|
+
if has_area_field:
|
|
508
|
+
df_per_instance = df_per_instance.with_columns(
|
|
509
|
+
area_px=pl.col("image.height") * pl.col("image.width") * pl.col("area")
|
|
510
|
+
)
|
|
511
|
+
object_reports: List[DbAnnotatedObjectReport] = []
|
|
512
|
+
annotation_type = DbAnnotationType(name=PrimitiveType.__name__)
|
|
513
|
+
for (class_name, task_name), class_group in df_per_instance.group_by(
|
|
514
|
+
PrimitiveField.CLASS_NAME, PrimitiveField.TASK_NAME
|
|
515
|
+
):
|
|
516
|
+
if class_name is None:
|
|
517
|
+
continue
|
|
518
|
+
|
|
519
|
+
object_report = DbAnnotatedObjectReport(
|
|
520
|
+
obj=DbAnnotatedObject(
|
|
521
|
+
name=class_name,
|
|
522
|
+
entity_type=entity_type,
|
|
523
|
+
annotation_type=annotation_type,
|
|
524
|
+
task_name=task_name,
|
|
525
|
+
),
|
|
526
|
+
unique_obj_ids=class_group[PrimitiveField.OBJECT_ID].n_unique(),
|
|
527
|
+
obj_instances=len(class_group),
|
|
528
|
+
annotation_type=[annotation_type],
|
|
529
|
+
average_count_per_image=len(class_group) / class_group[SampleField.SAMPLE_INDEX].n_unique(),
|
|
530
|
+
images_with_obj=class_group[SampleField.SAMPLE_INDEX].n_unique(),
|
|
531
|
+
)
|
|
532
|
+
if has_height_field:
|
|
533
|
+
object_report.height_avg_ratio = class_group["height"].mean()
|
|
534
|
+
object_report.height_min_ratio = class_group["height"].min()
|
|
535
|
+
object_report.height_max_ratio = class_group["height"].max()
|
|
536
|
+
object_report.height_avg_px = class_group["height_px"].mean()
|
|
537
|
+
object_report.height_min_px = int(class_group["height_px"].min())
|
|
538
|
+
object_report.height_max_px = int(class_group["height_px"].max())
|
|
539
|
+
|
|
540
|
+
if has_width_field:
|
|
541
|
+
object_report.width_avg_ratio = class_group["width"].mean()
|
|
542
|
+
object_report.width_min_ratio = class_group["width"].min()
|
|
543
|
+
object_report.width_max_ratio = class_group["width"].max()
|
|
544
|
+
object_report.width_avg_px = class_group["width_px"].mean()
|
|
545
|
+
object_report.width_min_px = int(class_group["width_px"].min())
|
|
546
|
+
object_report.width_max_px = int(class_group["width_px"].max())
|
|
547
|
+
|
|
548
|
+
if has_area_field:
|
|
549
|
+
object_report.area_avg_ratio = class_group["area"].mean()
|
|
550
|
+
object_report.area_min_ratio = class_group["area"].min()
|
|
551
|
+
object_report.area_max_ratio = class_group["area"].max()
|
|
552
|
+
object_report.area_avg_px = class_group["area_px"].mean()
|
|
553
|
+
object_report.area_min_px = int(class_group["area_px"].min())
|
|
554
|
+
object_report.area_max_px = int(class_group["area_px"].max())
|
|
555
|
+
|
|
556
|
+
object_reports.append(object_report)
|
|
557
|
+
return object_reports
|
|
558
|
+
|
|
559
|
+
|
|
642
560
|
def create_gallery_images(
|
|
643
561
|
dataset: HafniaDataset,
|
|
644
562
|
path_gallery_images: Optional[Path],
|
|
@@ -657,7 +575,12 @@ def create_gallery_images(
|
|
|
657
575
|
|
|
658
576
|
missing_gallery_samples = set(gallery_image_names) - set(gallery_samples[COL_IMAGE_NAME])
|
|
659
577
|
if len(missing_gallery_samples):
|
|
660
|
-
|
|
578
|
+
potential_samples = samples[COL_IMAGE_NAME].sort().to_list()
|
|
579
|
+
formatted_samples = ", ".join([f'"{s}"' for s in potential_samples[:9]])
|
|
580
|
+
raise ValueError(
|
|
581
|
+
f"Gallery images not found in dataset: {missing_gallery_samples}. "
|
|
582
|
+
f"Consider adding this to dataset definition: \ngallery_image_names=[{formatted_samples}]"
|
|
583
|
+
)
|
|
661
584
|
gallery_images = []
|
|
662
585
|
for gallery_sample in gallery_samples.iter_rows(named=True):
|
|
663
586
|
sample = Sample(**gallery_sample)
|
|
@@ -93,6 +93,32 @@ class SplitName:
|
|
|
93
93
|
def all_split_names() -> List[str]:
|
|
94
94
|
return [*SplitName.valid_splits(), SplitName.UNDEFINED]
|
|
95
95
|
|
|
96
|
+
@staticmethod
|
|
97
|
+
def map_split_name(potential_split_name: str, strict: bool = True) -> str:
|
|
98
|
+
normalized = potential_split_name.strip().lower()
|
|
99
|
+
|
|
100
|
+
if normalized in SPLIT_NAME_MAPPINGS:
|
|
101
|
+
return SPLIT_NAME_MAPPINGS[normalized]
|
|
102
|
+
|
|
103
|
+
if strict:
|
|
104
|
+
raise ValueError(f"Unrecognized split name: {potential_split_name}")
|
|
105
|
+
else:
|
|
106
|
+
return SplitName.UNDEFINED
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
SPLIT_NAME_MAPPINGS = {
|
|
110
|
+
# Train variations
|
|
111
|
+
"train": SplitName.TRAIN,
|
|
112
|
+
"training": SplitName.TRAIN,
|
|
113
|
+
# Validation variations
|
|
114
|
+
"validation": SplitName.VAL,
|
|
115
|
+
"val": SplitName.VAL,
|
|
116
|
+
"valid": SplitName.VAL,
|
|
117
|
+
# Test variations
|
|
118
|
+
"test": SplitName.TEST,
|
|
119
|
+
"testing": SplitName.TEST,
|
|
120
|
+
}
|
|
121
|
+
|
|
96
122
|
|
|
97
123
|
class DatasetVariant(Enum):
|
|
98
124
|
DUMP = "dump"
|