hafnia 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. {hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/publish_docker.yaml +1 -1
  2. {hafnia-0.2.2 → hafnia-0.2.3}/PKG-INFO +1 -1
  3. {hafnia-0.2.2 → hafnia-0.2.3}/examples/example_dataset_recipe.py +15 -11
  4. {hafnia-0.2.2 → hafnia-0.2.3}/examples/example_hafnia_dataset.py +26 -16
  5. {hafnia-0.2.2 → hafnia-0.2.3}/pyproject.toml +1 -1
  6. {hafnia-0.2.2 → hafnia-0.2.3}/src/cli/__main__.py +6 -10
  7. {hafnia-0.2.2 → hafnia-0.2.3}/src/cli/config.py +19 -5
  8. {hafnia-0.2.2 → hafnia-0.2.3}/src/cli/profile_cmds.py +2 -1
  9. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/dataset_helpers.py +39 -6
  10. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/dataset_recipe/dataset_recipe.py +59 -1
  11. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/dataset_recipe/recipe_types.py +4 -0
  12. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/hafnia_dataset.py +5 -17
  13. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/platform/datasets.py +6 -1
  14. {hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/dataset_recipe/test_dataset_recipes.py +46 -3
  15. {hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/test_dataset_helpers.py +10 -5
  16. {hafnia-0.2.2 → hafnia-0.2.3}/tests/test_cli.py +38 -1
  17. {hafnia-0.2.2 → hafnia-0.2.3}/tests/test_samples.py +7 -0
  18. {hafnia-0.2.2 → hafnia-0.2.3}/uv.lock +1 -1
  19. {hafnia-0.2.2 → hafnia-0.2.3}/.devcontainer/devcontainer.json +0 -0
  20. {hafnia-0.2.2 → hafnia-0.2.3}/.devcontainer/hooks/post_create +0 -0
  21. {hafnia-0.2.2 → hafnia-0.2.3}/.github/dependabot.yaml +0 -0
  22. {hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/Dockerfile +0 -0
  23. {hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/build.yaml +0 -0
  24. {hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/check_release.yaml +0 -0
  25. {hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/ci_cd.yaml +0 -0
  26. {hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/lint.yaml +0 -0
  27. {hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/publish_pypi.yaml +0 -0
  28. {hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/tests.yaml +0 -0
  29. {hafnia-0.2.2 → hafnia-0.2.3}/.gitignore +0 -0
  30. {hafnia-0.2.2 → hafnia-0.2.3}/.pre-commit-config.yaml +0 -0
  31. {hafnia-0.2.2 → hafnia-0.2.3}/.python-version +0 -0
  32. {hafnia-0.2.2 → hafnia-0.2.3}/.vscode/extensions.json +0 -0
  33. {hafnia-0.2.2 → hafnia-0.2.3}/.vscode/launch.json +0 -0
  34. {hafnia-0.2.2 → hafnia-0.2.3}/.vscode/settings.json +0 -0
  35. {hafnia-0.2.2 → hafnia-0.2.3}/LICENSE +0 -0
  36. {hafnia-0.2.2 → hafnia-0.2.3}/README.md +0 -0
  37. {hafnia-0.2.2 → hafnia-0.2.3}/docs/cli.md +0 -0
  38. {hafnia-0.2.2 → hafnia-0.2.3}/docs/release.md +0 -0
  39. {hafnia-0.2.2 → hafnia-0.2.3}/examples/example_logger.py +0 -0
  40. {hafnia-0.2.2 → hafnia-0.2.3}/examples/example_torchvision_dataloader.py +0 -0
  41. {hafnia-0.2.2 → hafnia-0.2.3}/src/cli/__init__.py +0 -0
  42. {hafnia-0.2.2 → hafnia-0.2.3}/src/cli/consts.py +0 -0
  43. {hafnia-0.2.2 → hafnia-0.2.3}/src/cli/dataset_cmds.py +0 -0
  44. {hafnia-0.2.2 → hafnia-0.2.3}/src/cli/experiment_cmds.py +0 -0
  45. {hafnia-0.2.2 → hafnia-0.2.3}/src/cli/recipe_cmds.py +0 -0
  46. {hafnia-0.2.2 → hafnia-0.2.3}/src/cli/runc_cmds.py +0 -0
  47. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/__init__.py +0 -0
  48. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/data/__init__.py +0 -0
  49. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/data/factory.py +0 -0
  50. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/dataset_names.py +0 -0
  51. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/dataset_recipe/recipe_transforms.py +0 -0
  52. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/dataset_upload_helper.py +0 -0
  53. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/operations/dataset_stats.py +0 -0
  54. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/operations/dataset_transformations.py +0 -0
  55. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/operations/table_transformations.py +0 -0
  56. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/__init__.py +0 -0
  57. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/bbox.py +0 -0
  58. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/bitmask.py +0 -0
  59. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/classification.py +0 -0
  60. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/point.py +0 -0
  61. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/polygon.py +0 -0
  62. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/primitive.py +0 -0
  63. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/segmentation.py +0 -0
  64. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/utils.py +0 -0
  65. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/experiment/__init__.py +0 -0
  66. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/experiment/hafnia_logger.py +0 -0
  67. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/http.py +0 -0
  68. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/log.py +0 -0
  69. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/platform/__init__.py +0 -0
  70. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/platform/builder.py +0 -0
  71. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/platform/download.py +0 -0
  72. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/platform/experiment.py +0 -0
  73. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/torch_helpers.py +0 -0
  74. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/utils.py +0 -0
  75. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/visualizations/colors.py +0 -0
  76. {hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/visualizations/image_visualizations.py +0 -0
  77. {hafnia-0.2.2 → hafnia-0.2.3}/tests/__init__.py +0 -0
  78. {hafnia-0.2.2 → hafnia-0.2.3}/tests/conftest.py +0 -0
  79. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png +0 -0
  80. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[caltech-256].png +0 -0
  81. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[cifar100].png +0 -0
  82. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[cifar10].png +0 -0
  83. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[coco-2017].png +0 -0
  84. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[midwest-vehicle-detection].png +0 -0
  85. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[mnist].png +0 -0
  86. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[tiny-dataset].png +0 -0
  87. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-101].png +0 -0
  88. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-256].png +0 -0
  89. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar100].png +0 -0
  90. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar10].png +0 -0
  91. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[coco-2017].png +0 -0
  92. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[midwest-vehicle-detection].png +0 -0
  93. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[mnist].png +0 -0
  94. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[tiny-dataset].png +0 -0
  95. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_visualizations/test_blur_anonymization[coco-2017].png +0 -0
  96. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_visualizations/test_blur_anonymization[tiny-dataset].png +0 -0
  97. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_visualizations/test_draw_annotations[coco-2017].png +0 -0
  98. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_visualizations/test_draw_annotations[tiny-dataset].png +0 -0
  99. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_visualizations/test_mask_region[coco-2017].png +0 -0
  100. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_visualizations/test_mask_region[tiny-dataset].png +0 -0
  101. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/coco-2017/annotations.jsonl +0 -0
  102. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/coco-2017/annotations.parquet +0 -0
  103. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/coco-2017/data/182a2c0a3ce312cf.jpg +0 -0
  104. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/coco-2017/data/4e95c6eb6209880a.jpg +0 -0
  105. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/coco-2017/data/cf86c7a23edb55ce.jpg +0 -0
  106. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/coco-2017/dataset_info.json +0 -0
  107. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/tiny-dataset/annotations.jsonl +0 -0
  108. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/tiny-dataset/annotations.parquet +0 -0
  109. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/tiny-dataset/data/222bbd5721a8a86e.png +0 -0
  110. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/tiny-dataset/data/3251d85443622e4c.png +0 -0
  111. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/tiny-dataset/data/3657ababa44af9b6.png +0 -0
  112. {hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/tiny-dataset/dataset_info.json +0 -0
  113. {hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/dataset_recipe/test_dataset_recipe_helpers.py +0 -0
  114. {hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/dataset_recipe/test_recipe_transformations.py +0 -0
  115. {hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/operations/test_dataset_transformations.py +0 -0
  116. {hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/operations/test_table_transformations.py +0 -0
  117. {hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/test_colors.py +0 -0
  118. {hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/test_hafnia_dataset.py +0 -0
  119. {hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/test_shape_primitives.py +0 -0
  120. {hafnia-0.2.2 → hafnia-0.2.3}/tests/helper_testing.py +0 -0
  121. {hafnia-0.2.2 → hafnia-0.2.3}/tests/test_builder.py +0 -0
  122. {hafnia-0.2.2 → hafnia-0.2.3}/tests/test_check_example_scripts.py +0 -0
  123. {hafnia-0.2.2 → hafnia-0.2.3}/tests/test_hafnia_logger.py +0 -0
  124. {hafnia-0.2.2 → hafnia-0.2.3}/tests/test_utils.py +0 -0
  125. {hafnia-0.2.2 → hafnia-0.2.3}/tests/test_visualizations.py +0 -0
@@ -47,7 +47,7 @@ jobs:
47
47
  echo "aws_region=${{ secrets.STAGE_AWS_REGION }}" >> $GITHUB_OUTPUT
48
48
  fi
49
49
  - name: Configure AWS credentials
50
- uses: aws-actions/configure-aws-credentials@v4.2.1
50
+ uses: aws-actions/configure-aws-credentials@v4.3.0
51
51
  with:
52
52
  role-to-assume: arn:aws:iam::${{ steps.env-vars.outputs.aws_account_id }}:role/${{ secrets.AWS_ROLE_NAME }}
53
53
  aws-region: ${{ steps.env-vars.outputs.aws_region }}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hafnia
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Python SDK for communication with Hafnia platform.
5
5
  Author-email: Milestone Systems <hafniaplatform@milestone.dk>
6
6
  License-File: LICENSE
@@ -29,20 +29,24 @@ dataset_recipe = DatasetRecipe.from_name(name="mnist").shuffle().select_samples(
29
29
  dataset = dataset_recipe.build()
30
30
  # Note that the interface is similar, but to actually create the dataset you need to call `build()` on the recipe.
31
31
 
32
- # An important feature of a 'DatasetRecipe' is that the recipe itself - and not the dataset - can be saved as a file
33
- # and loaded from file. Meaning you can easily save, share, load and build the dataset later or in a different
34
- # environment.
35
- # In programming language, the recipe can be serialized to JSON and deserialized back to the original python object
36
- # recipe.
32
+ # Unlike the HafniaDataset, a DatasetRecipe does not execute operations. It only registers
33
+ # the operations applied to the recipe and can be used to build the dataset later.
34
+ # You can print the dataset recipe to the operations that were applied to it.
35
+ rprint(dataset_recipe)
36
+
37
+ # Or as a JSON string:
38
+ json_str: str = dataset_recipe.as_json_str()
39
+ rprint(json_str)
40
+
41
+ # This is an important feature of a 'DatasetRecipe' it only registers operations and that the recipe itself
42
+ # - and not the dataset - can be saved as a file and loaded from file.
43
+ # Meaning you can easily save, share, load and build the dataset later or in a different environment.
37
44
  # For TaaS, this is the only way to include multiple datasets during training.
38
45
 
39
- # This is how it looks like in practice:
40
- # 1) Save the dataset recipe to a file
41
- path_json = Path(".data/tmp/dataset_recipe.json")
42
- dataset_recipe.as_json_file(path_json)
43
46
 
44
- # 2) The recipe can be loaded from the file
45
- dataset_recipe_again = DatasetRecipe.from_json_file(path_json)
47
+ # 2) The recipe can be loaded from json string
48
+ dataset_recipe_again: DatasetRecipe = DatasetRecipe.from_json_str(json_str)
49
+ # dataset_recipe_again.build()
46
50
 
47
51
  # We can verify that the loaded recipe is the same as the original recipe.
48
52
  assert dataset_recipe_again == dataset_recipe
@@ -26,11 +26,10 @@ dataset = HafniaDataset.from_path(path_dataset)
26
26
  # Alternatively, you can use the 'load_dataset' function
27
27
  dataset = load_dataset("midwest-vehicle-detection")
28
28
 
29
-
30
29
  # Dataset information is stored in 'dataset.info'
31
30
  rprint(dataset.info)
32
31
 
33
- # Annotations are stored in 'dataset.table' as a Polars DataFrame
32
+ # Annotations are stored in 'dataset.samples' as a Polars DataFrame
34
33
  dataset.samples.head(2)
35
34
 
36
35
  # Print dataset information
@@ -49,14 +48,29 @@ shuffled_dataset = dataset.shuffle(seed=42) # Shuffle the dataset
49
48
  split_ratios = {SplitName.TRAIN: 0.8, SplitName.VAL: 0.1, SplitName.TEST: 0.1}
50
49
  new_dataset_splits = dataset.splits_by_ratios(split_ratios)
51
50
 
51
+ # Support Chaining Operations (load, shuffle, select samples)
52
+ dataset = load_dataset("midwest-vehicle-detection").shuffle(seed=42).select_samples(n_samples=10)
53
+
54
+
52
55
  # Write dataset to disk
53
56
  path_tmp = Path(".data/tmp")
54
57
  path_dataset = path_tmp / "hafnia_dataset"
55
- dataset.write(path_dataset) # --> Check that data is human readable
58
+ dataset.write(path_dataset)
56
59
 
57
60
  # Load dataset from disk
58
61
  dataset_again = HafniaDataset.from_path(path_dataset)
59
62
 
63
+
64
+ # Want custom dataset transformations or statistics? Use the polars table (dataset.samples) directly
65
+ n_objects = dataset.samples["objects"].list.len().sum()
66
+ n_objects = dataset.samples[Bbox.column_name()].list.len().sum() # Use Bbox.column_name() to avoid magic variables
67
+ n_classifications = dataset.samples[Classification.column_name()].list.len().sum()
68
+
69
+ class_counts = dataset.samples[Classification.column_name()].explode().struct.field("class_name").value_counts()
70
+ class_counts = dataset.samples[Bbox.column_name()].explode().struct.field("class_name").value_counts()
71
+ rprint(dict(class_counts.iter_rows()))
72
+
73
+
60
74
  # Access the first sample in the training split - data is stored in a dictionary
61
75
  sample_dict = dataset_train[0]
62
76
 
@@ -78,25 +92,15 @@ image: np.ndarray = sample.read_image()
78
92
  # Visualize sample and annotations
79
93
  image_with_annotations = sample.draw_annotations()
80
94
 
81
-
95
+ # Save the image with annotations to a temporary directory
82
96
  path_tmp.mkdir(parents=True, exist_ok=True)
83
97
  Image.fromarray(image_with_annotations).save(path_tmp / "sample_with_annotations.png")
84
98
 
85
99
 
86
- # Do dataset transformations and statistics on the Polars DataFrame
87
- n_objects = dataset.samples["objects"].list.len().sum()
88
- n_objects = dataset.samples[Bbox.column_name()].list.len().sum() # Use Bbox.column_name() to avoid magic variables
89
- n_classifications = dataset.samples[Classification.column_name()].list.len().sum()
90
-
91
- class_counts = dataset.samples[Classification.column_name()].explode().struct.field("class_name").value_counts()
92
- class_counts = dataset.samples[Bbox.column_name()].explode().struct.field("class_name").value_counts()
93
- rprint(dict(class_counts.iter_rows()))
94
-
95
-
96
100
  ## Bring-your-own-data: Create a new dataset from samples
97
101
  fake_samples = []
98
102
  for i_fake_sample in range(5):
99
- bboxes = [Bbox(top_left_x=10, top_left_y=20, width=100, height=200, class_name="car")]
103
+ bboxes = [Bbox(top_left_x=0.1, top_left_y=0.20, width=0.1, height=0.2, class_name="car")]
100
104
  classifications = [Classification(class_name="vehicle", class_idx=0)]
101
105
  sample = Sample(
102
106
  file_name=f"path/to/image_{i_fake_sample:05}.jpg",
@@ -120,8 +124,14 @@ fake_dataset_info = DatasetInfo(
120
124
  )
121
125
  fake_dataset = HafniaDataset.from_samples_list(samples_list=fake_samples, info=fake_dataset_info)
122
126
 
127
+ # Coming soon! Upload your dataset to the Hafnia Platform
128
+ # fake_dataset.upload_to_hafnia()
129
+
130
+ # Coming soon! Create your own dataset details page in Hafnia
131
+ # fake_dataset.upload_dataset_details()
123
132
 
124
- ## A hafnia dataset can also be used for storing predictions per sample set 'ground_truth=False' and add 'confidence'.
133
+ ## Storing predictions: A hafnia dataset can also be used for storing predictions per sample
134
+ # set 'ground_truth=False' and add 'confidence'.
125
135
  bboxes_predictions = [
126
136
  Bbox(top_left_x=10, top_left_y=20, width=100, height=200, class_name="car", ground_truth=False, confidence=0.9)
127
137
  ]
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hafnia"
3
- version = "0.2.2"
3
+ version = "0.2.3"
4
4
  description = "Python SDK for communication with Hafnia platform."
5
5
  readme = "README.md"
6
6
  authors = [
@@ -20,19 +20,15 @@ def configure(cfg: Config) -> None:
20
20
 
21
21
  profile_name = click.prompt("Profile Name", type=str, default=consts.DEFAULT_PROFILE_NAME)
22
22
  profile_name = profile_name.strip()
23
- try:
24
- cfg.add_profile(profile_name, ConfigSchema(), set_active=True)
25
- except ValueError:
26
- raise click.ClickException(consts.ERROR_CREATE_PROFILE)
23
+
24
+ cfg.check_profile_name(profile_name)
27
25
 
28
26
  api_key = click.prompt("Hafnia API Key", type=str, hide_input=True)
29
- try:
30
- cfg.api_key = api_key.strip()
31
- except ValueError as e:
32
- click.echo(f"Error: {str(e)}", err=True)
33
- return
27
+
34
28
  platform_url = click.prompt("Hafnia Platform URL", type=str, default=consts.DEFAULT_API_URL)
35
- cfg.platform_url = platform_url.strip()
29
+
30
+ cfg_profile = ConfigSchema(api_key=api_key, platform_url=platform_url)
31
+ cfg.add_profile(profile_name, cfg_profile, set_active=True)
36
32
  cfg.save_config()
37
33
  profile_cmds.profile_show(cfg)
38
34
 
@@ -6,7 +6,7 @@ from typing import Dict, List, Optional
6
6
  from pydantic import BaseModel, field_validator
7
7
 
8
8
  import cli.consts as consts
9
- from hafnia.log import user_logger
9
+ from hafnia.log import sys_logger, user_logger
10
10
 
11
11
  PLATFORM_API_MAPPING = {
12
12
  "recipes": "/api/v1/recipes",
@@ -23,9 +23,17 @@ class ConfigSchema(BaseModel):
23
23
  api_key: Optional[str] = None
24
24
 
25
25
  @field_validator("api_key")
26
- def validate_api_key(cls, value: str) -> str:
27
- if value is not None and len(value) < 10:
26
+ def validate_api_key(cls, value: Optional[str]) -> Optional[str]:
27
+ if value is None:
28
+ return value
29
+
30
+ if len(value) < 10:
28
31
  raise ValueError("API key is too short.")
32
+
33
+ if not value.startswith("ApiKey "):
34
+ sys_logger.warning("API key is missing the 'ApiKey ' prefix. Prefix is being added automatically.")
35
+ value = f"ApiKey {value}"
36
+
29
37
  return value
30
38
 
31
39
 
@@ -51,6 +59,7 @@ class Config:
51
59
  if profile_name not in self.config_data.profiles:
52
60
  raise ValueError(f"Profile '{profile_name}' does not exist.")
53
61
  self.config_data.active_profile = profile_name
62
+ self.save_config()
54
63
 
55
64
  @property
56
65
  def config(self) -> ConfigSchema:
@@ -92,13 +101,18 @@ class Config:
92
101
 
93
102
  return Path.home() / ".hafnia" / "config.json"
94
103
 
95
- def add_profile(self, profile_name: str, profile: ConfigSchema, set_active: bool = False) -> None:
96
- profile_name = profile_name.strip()
104
+ def check_profile_name(self, profile_name: str) -> None:
105
+ if not profile_name or not isinstance(profile_name, str):
106
+ raise ValueError("Profile name must be a non-empty string.")
107
+
97
108
  if profile_name in self.config_data.profiles:
98
109
  user_logger.warning(
99
110
  f"Profile with name '{profile_name}' already exists, it will be overwritten by the new one."
100
111
  )
101
112
 
113
+ def add_profile(self, profile_name: str, profile: ConfigSchema, set_active: bool = False) -> None:
114
+ profile_name = profile_name.strip()
115
+ self.check_profile_name(profile_name)
102
116
  self.config_data.profiles[profile_name] = profile
103
117
  if set_active:
104
118
  self.config_data.active_profile = profile_name
@@ -56,6 +56,7 @@ def profile_create(cfg: Config, name: str, api_url: str, api_key: str, activate:
56
56
  cfg_profile = ConfigSchema(platform_url=api_url, api_key=api_key)
57
57
 
58
58
  cfg.add_profile(profile_name=name, profile=cfg_profile, set_active=activate)
59
+ profile_show(cfg)
59
60
 
60
61
 
61
62
  @profile.command("rm")
@@ -87,7 +88,7 @@ def profile_active(cfg: Config) -> None:
87
88
 
88
89
 
89
90
  def profile_show(cfg: Config) -> None:
90
- masked_key = f"{cfg.api_key[:4]}...{cfg.api_key[-4:]}" if len(cfg.api_key) > 8 else "****"
91
+ masked_key = f"{cfg.api_key[:11]}...{cfg.api_key[-4:]}" if len(cfg.api_key) > 20 else "****"
91
92
  console = Console()
92
93
 
93
94
  table = Table(title=f"{consts.PROFILE_TABLE_HEADER} {cfg.active_profile}", show_header=False)
@@ -1,6 +1,7 @@
1
1
  import io
2
2
  import math
3
3
  import random
4
+ import shutil
4
5
  from pathlib import Path
5
6
  from typing import Dict, List
6
7
 
@@ -21,7 +22,7 @@ def create_split_name_list_from_ratios(split_ratios: Dict[str, float], n_items:
21
22
 
22
23
 
23
24
  def hash_file_xxhash(path: Path, chunk_size: int = 262144) -> str:
24
- hasher = xxhash.xxh3_64()
25
+ hasher = xxhash.xxh3_128()
25
26
 
26
27
  with open(path, "rb") as f:
27
28
  for chunk in iter(lambda: f.read(chunk_size), b""): # 8192, 16384, 32768, 65536
@@ -30,7 +31,7 @@ def hash_file_xxhash(path: Path, chunk_size: int = 262144) -> str:
30
31
 
31
32
 
32
33
  def hash_from_bytes(data: bytes) -> str:
33
- hasher = xxhash.xxh3_64()
34
+ hasher = xxhash.xxh3_128()
34
35
  hasher.update(data)
35
36
  return hasher.hexdigest()
36
37
 
@@ -40,14 +41,46 @@ def save_image_with_hash_name(image: np.ndarray, path_folder: Path) -> Path:
40
41
  buffer = io.BytesIO()
41
42
  pil_image.save(buffer, format="PNG")
42
43
  hash_value = hash_from_bytes(buffer.getvalue())
43
- path_image = Path(path_folder) / f"{hash_value}.png"
44
+ path_image = Path(path_folder) / relative_path_from_hash(hash=hash_value, suffix=".png")
45
+ path_image.parent.mkdir(parents=True, exist_ok=True)
44
46
  pil_image.save(path_image)
45
47
  return path_image
46
48
 
47
49
 
48
- def filename_as_hash_from_path(path_image: Path) -> str:
49
- hash = hash_file_xxhash(path_image)
50
- return f"{hash}{path_image.suffix}"
50
+ def copy_and_rename_file_to_hash_value(path_source: Path, path_dataset_root: Path) -> Path:
51
+ """
52
+ Copies a file to a dataset root directory with a hash-based name and sub-directory structure.
53
+
54
+ E.g. for an "image.png" with hash "dfe8f3b1c2a4f5b6c7d8e9f0a1b2c3d4", the image will be copied to
55
+ 'path_dataset_root / "data" / "dfe" / "dfe8f3b1c2a4f5b6c7d8e9f0a1b2c3d4.png"'
56
+ Notice that the hash is used for both the filename and the subfolder name.
57
+
58
+ Placing image/video files into multiple sub-folders (instead of one large folder) is seemingly
59
+ unnecessary, but it is actually a requirement when the dataset is later downloaded from S3.
60
+
61
+ The reason is that AWS has a rate limit of 3500 ops/sec per prefix (sub-folder) in S3 - meaning we can "only"
62
+ download 3500 files per second from a single folder (prefix) in S3.
63
+
64
+ For even a single user, we found that this limit was being reached when files are stored in single folder (prefix)
65
+ in S3. To support multiple users and concurrent experiments, we are required to separate files into
66
+ multiple sub-folders (prefixes) in S3 to not hit the rate limit.
67
+ """
68
+
69
+ if not path_source.exists():
70
+ raise FileNotFoundError(f"Source file {path_source} does not exist.")
71
+
72
+ hash_value = hash_file_xxhash(path_source)
73
+ path_file = path_dataset_root / relative_path_from_hash(hash=hash_value, suffix=path_source.suffix)
74
+ path_file.parent.mkdir(parents=True, exist_ok=True)
75
+ if not path_file.exists():
76
+ shutil.copy2(path_source, path_file)
77
+
78
+ return path_file
79
+
80
+
81
+ def relative_path_from_hash(hash: str, suffix: str) -> Path:
82
+ path_file = Path("data") / hash[:3] / f"{hash}{suffix}"
83
+ return path_file
51
84
 
52
85
 
53
86
  def split_sizes_from_ratios(n_items: int, split_ratios: Dict[str, float]) -> Dict[str, int]:
@@ -216,6 +216,16 @@ class DatasetRecipe(Serializable):
216
216
  json_str = self.as_json_str(indent=indent)
217
217
  path_json.write_text(json_str, encoding="utf-8")
218
218
 
219
+ ### Helper methods ###
220
+ def get_dataset_names(self) -> List[str]:
221
+ """
222
+ Get all dataset names added with 'from_name'.
223
+ Function recursively gathers dataset names.
224
+ """
225
+ if self.creation is None:
226
+ return []
227
+ return self.creation.get_dataset_names()
228
+
219
229
  ### Validation and Serialization ###
220
230
  @field_validator("creation", mode="plain")
221
231
  @classmethod
@@ -282,7 +292,10 @@ class FromPath(RecipeCreation):
282
292
  return HafniaDataset.from_path
283
293
 
284
294
  def as_short_name(self) -> str:
285
- return f"'{self.path_folder}'".replace(os.sep, "|")
295
+ return f"'{self.path_folder}'".replace(os.sep, "-")
296
+
297
+ def get_dataset_names(self) -> List[str]:
298
+ return [] # Only counts 'from_name' datasets
286
299
 
287
300
 
288
301
  class FromName(RecipeCreation):
@@ -297,6 +310,9 @@ class FromName(RecipeCreation):
297
310
  def as_short_name(self) -> str:
298
311
  return self.name
299
312
 
313
+ def get_dataset_names(self) -> List[str]:
314
+ return [self.name]
315
+
300
316
 
301
317
  class FromMerge(RecipeCreation):
302
318
  recipe0: DatasetRecipe
@@ -310,6 +326,11 @@ class FromMerge(RecipeCreation):
310
326
  merger = FromMerger(recipes=[self.recipe0, self.recipe1])
311
327
  return merger.as_short_name()
312
328
 
329
+ def get_dataset_names(self) -> List[str]:
330
+ """Get the dataset names from the merged recipes."""
331
+ names = [*self.recipe0.creation.get_dataset_names(), *self.recipe1.creation.get_dataset_names()]
332
+ return names
333
+
313
334
 
314
335
  class FromMerger(RecipeCreation):
315
336
  recipes: List[DatasetRecipe]
@@ -325,3 +346,40 @@ class FromMerger(RecipeCreation):
325
346
 
326
347
  def as_short_name(self) -> str:
327
348
  return f"Merger({','.join(recipe.as_short_name() for recipe in self.recipes)})"
349
+
350
+ def get_dataset_names(self) -> List[str]:
351
+ """Get the dataset names from the merged recipes."""
352
+ names = []
353
+ for recipe in self.recipes:
354
+ names.extend(recipe.creation.get_dataset_names())
355
+ return names
356
+
357
+
358
+ def extract_dataset_names_from_json_dict(data: dict) -> list[str]:
359
+ """
360
+ Extract dataset names recursively from a JSON dictionary added with 'from_name'.
361
+
362
+ Even if the same functionality is achieved with `DatasetRecipe.get_dataset_names()`,
363
+ we want to keep this function in 'dipdatalib' to extract dataset names from json dictionaries
364
+ directly.
365
+ """
366
+ creation_field = data.get("creation")
367
+ if creation_field is None:
368
+ return []
369
+ if creation_field.get("__type__") == "FromName":
370
+ return [creation_field["name"]]
371
+ elif creation_field.get("__type__") == "FromMerge":
372
+ recipe_names = ["recipe0", "recipe1"]
373
+ dataset_name = []
374
+ for recipe_name in recipe_names:
375
+ recipe = creation_field.get(recipe_name)
376
+ if recipe is None:
377
+ continue
378
+ dataset_name.extend(extract_dataset_names_from_json_dict(recipe))
379
+ return dataset_name
380
+ elif creation_field.get("__type__") == "FromMerger":
381
+ dataset_name = []
382
+ for recipe in creation_field.get("recipes", []):
383
+ dataset_name.extend(extract_dataset_names_from_json_dict(recipe))
384
+ return dataset_name
385
+ return []
@@ -108,6 +108,10 @@ class RecipeCreation(Serializable):
108
108
  def get_function() -> Callable[..., "HafniaDataset"]:
109
109
  pass
110
110
 
111
+ @abstractmethod
112
+ def get_dataset_names(self) -> List[str]:
113
+ pass
114
+
111
115
  def build(self) -> "HafniaDataset":
112
116
  from hafnia.dataset.dataset_recipe.dataset_recipe import DatasetRecipe
113
117
 
@@ -411,30 +411,18 @@ class HafniaDataset:
411
411
 
412
412
  return True
413
413
 
414
- def write(self, path_folder: Path, name_by_hash: bool = True, add_version: bool = False) -> None:
414
+ def write(self, path_folder: Path, add_version: bool = False) -> None:
415
415
  user_logger.info(f"Writing dataset to {path_folder}...")
416
416
  if not path_folder.exists():
417
417
  path_folder.mkdir(parents=True)
418
- path_folder_images = path_folder / "data"
419
- path_folder_images.mkdir(parents=True, exist_ok=True)
420
418
 
421
419
  new_relative_paths = []
422
420
  for org_path in tqdm(self.samples["file_name"].to_list(), desc="- Copy images"):
423
- org_path = Path(org_path)
424
- if not org_path.exists():
425
- raise FileNotFoundError(f"File {org_path} does not exist in the dataset.")
426
- if name_by_hash:
427
- filename = dataset_helpers.filename_as_hash_from_path(org_path)
428
- else:
429
- filename = Path(org_path).name
430
- new_path = path_folder_images / filename
431
- if not new_path.exists():
432
- shutil.copy2(org_path, new_path)
433
-
434
- if not new_path.exists():
435
- raise FileNotFoundError(f"File {new_path} does not exist in the dataset.")
421
+ new_path = dataset_helpers.copy_and_rename_file_to_hash_value(
422
+ path_source=Path(org_path),
423
+ path_dataset_root=path_folder,
424
+ )
436
425
  new_relative_paths.append(str(new_path.relative_to(path_folder)))
437
-
438
426
  table = self.samples.with_columns(pl.Series(new_relative_paths).alias("file_name"))
439
427
  table.write_ndjson(path_folder / FILENAME_ANNOTATIONS_JSONL) # Json for readability
440
428
  table.write_parquet(path_folder / FILENAME_ANNOTATIONS_PARQUET) # Parquet for speed
@@ -62,7 +62,12 @@ def download_or_get_dataset_path(
62
62
  dataset_id = get_dataset_id(dataset_name=dataset_name, endpoint=endpoint_dataset, api_key=api_key)
63
63
  if dataset_id is None:
64
64
  sys_logger.error(f"Dataset '{dataset_name}' not found on the Hafnia platform.")
65
- access_dataset_endpoint = f"{endpoint_dataset}/{dataset_id}/temporary-credentials"
65
+
66
+ if utils.is_hafnia_cloud_job():
67
+ credentials_endpoint_suffix = "temporary-credentials-hidden" # Access to hidden datasets
68
+ else:
69
+ credentials_endpoint_suffix = "temporary-credentials" # Access to sample dataset
70
+ access_dataset_endpoint = f"{endpoint_dataset}/{dataset_id}/{credentials_endpoint_suffix}"
66
71
 
67
72
  download_dataset_from_access_endpoint(
68
73
  endpoint=access_dataset_endpoint,
@@ -1,4 +1,5 @@
1
1
  import inspect
2
+ import json
2
3
  import tempfile
3
4
  from dataclasses import dataclass
4
5
  from pathlib import Path
@@ -9,6 +10,7 @@ import pytest
9
10
  from hafnia.dataset.dataset_recipe.dataset_recipe import (
10
11
  DatasetRecipe,
11
12
  FromMerger,
13
+ extract_dataset_names_from_json_dict,
12
14
  get_dataset_path_from_recipe,
13
15
  )
14
16
  from hafnia.dataset.dataset_recipe.recipe_transforms import SelectSamples, Shuffle
@@ -171,7 +173,7 @@ class IntegrationTestUseCase:
171
173
  ),
172
174
  IntegrationTestUseCase(
173
175
  recipe=DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"), check_for_images=False),
174
- short_name="'.data|datasets|mnist'",
176
+ short_name="'.data-datasets-mnist'",
175
177
  ),
176
178
  IntegrationTestUseCase(
177
179
  recipe=DatasetRecipe.from_merger(
@@ -180,14 +182,14 @@ class IntegrationTestUseCase:
180
182
  DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"), check_for_images=False),
181
183
  ]
182
184
  ),
183
- short_name="Merger(mnist,'.data|datasets|mnist')",
185
+ short_name="Merger(mnist,'.data-datasets-mnist')",
184
186
  ),
185
187
  IntegrationTestUseCase(
186
188
  recipe=DatasetRecipe.from_merge(
187
189
  recipe0=DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"), check_for_images=False),
188
190
  recipe1=DatasetRecipe.from_name(name="mnist", force_redownload=False),
189
191
  ),
190
- short_name="Merger('.data|datasets|mnist',mnist)",
192
+ short_name="Merger('.data-datasets-mnist',mnist)",
191
193
  ),
192
194
  IntegrationTestUseCase(
193
195
  recipe=DatasetRecipe.from_name(name="mnist", force_redownload=False)
@@ -258,3 +260,44 @@ def test_cases_integration_tests(recipe_use_case: IntegrationTestUseCase):
258
260
 
259
261
  assert isinstance(dataset, HafniaDataset), "Dataset is not an instance of HafniaDataset"
260
262
  # assert isinstance(dataset, HafniaDataset), "Dataset is not an instance of HafniaDataset"
263
+
264
+
265
+ def test_get_dataset_names():
266
+ expected_dataset_names = {"dataset0", "dataset1", "dataset2", "dataset3", "dataset4", "dataset5", "dataset6"}
267
+ nested_recipe = DatasetRecipe.from_merger(
268
+ recipes=[
269
+ DatasetRecipe.from_merger(
270
+ recipes=[
271
+ DatasetRecipe.from_name(name="dataset0"),
272
+ DatasetRecipe.from_name(name="dataset1"),
273
+ DatasetRecipe.from_merge(
274
+ recipe0=DatasetRecipe.from_name(name="dataset2"),
275
+ recipe1=DatasetRecipe.from_name(name="dataset3"),
276
+ ),
277
+ ]
278
+ ),
279
+ DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
280
+ .select_samples(n_samples=30)
281
+ .splits_by_ratios(split_ratios={"train": 0.8, "val": 0.1, "test": 0.1}),
282
+ DatasetRecipe.from_name(name="dataset4").select_samples(n_samples=20).shuffle(),
283
+ DatasetRecipe.from_merger(
284
+ recipes=[
285
+ DatasetRecipe.from_name(name="dataset5"),
286
+ DatasetRecipe.from_name(name="dataset6"),
287
+ ]
288
+ ),
289
+ ]
290
+ )
291
+
292
+ assert set(nested_recipe.get_dataset_names()) == expected_dataset_names, "Dataset names do not match expected names"
293
+
294
+ json_str = nested_recipe.as_json_str()
295
+ nested_recipe.as_json_file(path_json=Path("nested_recipe.json"))
296
+ data_dict = json.loads(json_str)
297
+
298
+ dataset_names = extract_dataset_names_from_json_dict(data_dict)
299
+ assert set(dataset_names) == expected_dataset_names, (
300
+ f"If this function fails, you should be concerned !! The '{extract_dataset_names_from_json_dict.__name__}' "
301
+ "function is copy/pasted to 'dipdatalib' to extract dataset names from json dictionaries directly. "
302
+ "If this test fails, please fix the function and copy/paste the function to dipdatalib as well."
303
+ )
@@ -72,8 +72,13 @@ def test_create_split_name_list_from_ratios(test_case: CreateSplitNameListFromRa
72
72
 
73
73
  def test_save_image_with_hash_name(tmp_path: Path):
74
74
  dummy_image = (255 * np.random.rand(100, 100, 3)).astype(np.uint8) # Create a dummy image
75
- path_image = dataset_helpers.save_image_with_hash_name(dummy_image, tmp_path)
76
- filename_from_path = dataset_helpers.filename_as_hash_from_path(path_image)
77
- assert filename_from_path == path_image.name
78
- assert path_image.exists()
79
- assert path_image.suffix in [".png"]
75
+ tmp_path0 = tmp_path / "folder0"
76
+ path_image0 = dataset_helpers.save_image_with_hash_name(dummy_image, tmp_path0)
77
+
78
+ tmp_path1 = tmp_path / "folder1"
79
+ path_image1 = dataset_helpers.copy_and_rename_file_to_hash_value(path_image0, tmp_path1)
80
+ assert path_image1.relative_to(tmp_path1) == path_image0.relative_to(tmp_path0)
81
+ assert path_image0.exists()
82
+ assert path_image1.exists()
83
+ assert path_image0.suffix in [".png"]
84
+ assert path_image1.suffix in [".png"]
@@ -47,10 +47,47 @@ def config_with_profiles(test_config_path: Path, profile_data: dict) -> Config:
47
47
 
48
48
 
49
49
  def test_configure(cli_runner: CliRunner, empty_config: Config, api_key: str) -> None:
50
- inputs = f"default\ntest-api-key\n{consts.DEFAULT_API_URL}\n"
50
+ inputs = f"default\nApiKey some-fake-test-api-key\n{consts.DEFAULT_API_URL}\n"
51
51
  result = cli_runner.invoke(cli.main, ["configure"], input="".join(inputs))
52
52
  assert result.exit_code == 0
53
53
  assert f"{consts.PROFILE_TABLE_HEADER} default" in result.output
54
+ assert "ApiKey some" in result.output
55
+
56
+
57
+ def test_configure_api_key_autofix(cli_runner: CliRunner, empty_config: Config, api_key: str) -> None:
58
+ """
59
+ The submitted api key should always contain an "ApiKey " prefix.
60
+ Namely the submitted api key should be in this form "ApiKey [HASH_VALUE]"
61
+ Many users submit the api key without the prefix.
62
+ This test ensures that the CLI will automatically add the prefix if missing.
63
+ """
64
+ inputs = f"default\nfake-api-key-with-out-prefix\n{consts.DEFAULT_API_URL}\n"
65
+ result = cli_runner.invoke(cli.main, ["configure"], input="".join(inputs))
66
+ assert result.exit_code == 0
67
+ assert f"{consts.PROFILE_TABLE_HEADER} default" in result.output
68
+ assert "ApiKey fake" in result.output, (
69
+ "'ApiKey ' was not added automatically. API key should be automatically prefixed with 'ApiKey ' when missing"
70
+ )
71
+
72
+
73
+ def test_create_profile(cli_runner: CliRunner, empty_config: Config, api_key: str) -> None:
74
+ fake_api_key = "SomeFakeApiKey123"
75
+ args = [
76
+ "profile",
77
+ "create",
78
+ fake_api_key,
79
+ "--name",
80
+ "test_profile",
81
+ "--api-url",
82
+ consts.DEFAULT_API_URL,
83
+ "--activate",
84
+ ]
85
+
86
+ result = cli_runner.invoke(cli.main, args)
87
+ assert result.exit_code == 0
88
+ assert f"ApiKey {fake_api_key[:3]}" in result.output, (
89
+ "'ApiKey ' was not added automatically. API key should be automatically prefixed with 'ApiKey ' when missing"
90
+ )
54
91
 
55
92
 
56
93
  class TestProfile:
@@ -1,3 +1,5 @@
1
+ import collections
2
+ from pathlib import Path
1
3
  from typing import Any, Dict
2
4
 
3
5
  import numpy as np
@@ -94,6 +96,11 @@ def test_check_dataset(loaded_dataset, compare_to_expected_image):
94
96
 
95
97
  compare_to_expected_image(image)
96
98
 
99
+ # We are arranging dataset files in multiple sub-folders to avoid S3 rate limits.
100
+ # This test checks that the dataset files are distributed across multiple sub-folders.
101
+ unique_sub_folders = collections.Counter([Path(path).parent.name for path in dataset.samples[ColumnName.FILE_NAME]])
102
+ assert len(unique_sub_folders) > 1, "Expected dataset files to be distributed across sub-folders"
103
+
97
104
 
98
105
  @pytest.mark.slow
99
106
  def test_dataset_draw_image_and_target(loaded_dataset, compare_to_expected_image):
@@ -269,7 +269,7 @@ wheels = [
269
269
 
270
270
  [[package]]
271
271
  name = "hafnia"
272
- version = "0.2.2"
272
+ version = "0.2.3"
273
273
  source = { editable = "." }
274
274
  dependencies = [
275
275
  { name = "boto3" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes