hafnia 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hafnia-0.2.0 → hafnia-0.2.1}/.github/workflows/ci_cd.yaml +1 -1
- {hafnia-0.2.0 → hafnia-0.2.1}/PKG-INFO +17 -20
- {hafnia-0.2.0 → hafnia-0.2.1}/README.md +16 -19
- hafnia-0.2.1/examples/example_dataset_recipe.py +165 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/examples/example_hafnia_dataset.py +6 -5
- {hafnia-0.2.0 → hafnia-0.2.1}/examples/example_torchvision_dataloader.py +7 -2
- {hafnia-0.2.0 → hafnia-0.2.1}/pyproject.toml +1 -1
- {hafnia-0.2.0 → hafnia-0.2.1}/src/cli/config.py +17 -4
- hafnia-0.2.1/src/hafnia/data/factory.py +23 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/dataset/dataset_names.py +2 -1
- hafnia-0.2.1/src/hafnia/dataset/dataset_recipe/dataset_recipe.py +327 -0
- hafnia-0.2.1/src/hafnia/dataset/dataset_recipe/recipe_transforms.py +53 -0
- hafnia-0.2.1/src/hafnia/dataset/dataset_recipe/recipe_types.py +140 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/dataset/hafnia_dataset.py +202 -31
- hafnia-0.2.1/src/hafnia/dataset/operations/dataset_stats.py +15 -0
- hafnia-0.2.1/src/hafnia/dataset/operations/dataset_transformations.py +82 -0
- {hafnia-0.2.0/src/hafnia/dataset → hafnia-0.2.1/src/hafnia/dataset/operations}/table_transformations.py +1 -1
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/experiment/hafnia_logger.py +5 -5
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/helper_testing.py +48 -3
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/platform/datasets.py +26 -13
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/utils.py +20 -1
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/visualizations/image_visualizations.py +1 -1
- hafnia-0.2.1/tests/dataset/dataset_recipe/test_dataset_recipe_helpers.py +120 -0
- hafnia-0.2.1/tests/dataset/dataset_recipe/test_dataset_recipes.py +260 -0
- hafnia-0.2.1/tests/dataset/dataset_recipe/test_recipe_transformations.py +224 -0
- {hafnia-0.2.0/tests/dataset → hafnia-0.2.1/tests/dataset/operations}/test_table_transformations.py +2 -2
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/dataset/test_hafnia_dataset.py +35 -4
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/test_check_example_scripts.py +3 -2
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/test_cli.py +49 -1
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/test_samples.py +2 -2
- {hafnia-0.2.0 → hafnia-0.2.1}/uv.lock +1 -1
- hafnia-0.2.0/src/hafnia/data/factory.py +0 -20
- hafnia-0.2.0/src/hafnia/dataset/dataset_transformation.py +0 -187
- {hafnia-0.2.0 → hafnia-0.2.1}/.devcontainer/devcontainer.json +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.devcontainer/hooks/post_create +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.github/dependabot.yaml +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.github/workflows/Dockerfile +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.github/workflows/build.yaml +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.github/workflows/check_release.yaml +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.github/workflows/lint.yaml +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.github/workflows/publish_docker.yaml +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.github/workflows/publish_pypi.yaml +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.github/workflows/tests.yaml +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.gitignore +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.pre-commit-config.yaml +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.python-version +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.vscode/extensions.json +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.vscode/launch.json +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/.vscode/settings.json +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/LICENSE +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/docs/cli.md +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/docs/release.md +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/examples/example_logger.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/cli/__init__.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/cli/__main__.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/cli/consts.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/cli/dataset_cmds.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/cli/experiment_cmds.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/cli/profile_cmds.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/cli/recipe_cmds.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/cli/runc_cmds.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/__init__.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/data/__init__.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/dataset/dataset_helpers.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/dataset/dataset_upload_helper.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/dataset/primitives/__init__.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/dataset/primitives/bbox.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/dataset/primitives/bitmask.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/dataset/primitives/classification.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/dataset/primitives/point.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/dataset/primitives/polygon.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/dataset/primitives/primitive.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/dataset/primitives/segmentation.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/dataset/primitives/utils.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/experiment/__init__.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/http.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/log.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/platform/__init__.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/platform/builder.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/platform/download.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/platform/experiment.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/torch_helpers.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/src/hafnia/visualizations/colors.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/conftest.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_check_dataset[caltech-256].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_check_dataset[cifar100].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_check_dataset[cifar10].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_check_dataset[coco-2017].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_check_dataset[midwest-vehicle-detection].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_check_dataset[mnist].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_check_dataset[tiny-dataset].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-101].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-256].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar100].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar10].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[coco-2017].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[midwest-vehicle-detection].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[mnist].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[tiny-dataset].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_visualizations/test_blur_anonymization[coco-2017].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_visualizations/test_blur_anonymization[tiny-dataset].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_visualizations/test_draw_annotations[coco-2017].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_visualizations/test_draw_annotations[tiny-dataset].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_visualizations/test_mask_region[coco-2017].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/expected_images/test_visualizations/test_mask_region[tiny-dataset].png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/micro_test_datasets/coco-2017/annotations.jsonl +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/micro_test_datasets/coco-2017/annotations.parquet +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/micro_test_datasets/coco-2017/data/182a2c0a3ce312cf.jpg +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/micro_test_datasets/coco-2017/data/4e95c6eb6209880a.jpg +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/micro_test_datasets/coco-2017/data/cf86c7a23edb55ce.jpg +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/micro_test_datasets/coco-2017/dataset_info.json +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/micro_test_datasets/tiny-dataset/annotations.jsonl +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/micro_test_datasets/tiny-dataset/annotations.parquet +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/micro_test_datasets/tiny-dataset/data/222bbd5721a8a86e.png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/micro_test_datasets/tiny-dataset/data/3251d85443622e4c.png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/micro_test_datasets/tiny-dataset/data/3657ababa44af9b6.png +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/data/micro_test_datasets/tiny-dataset/dataset_info.json +0 -0
- {hafnia-0.2.0/tests/dataset → hafnia-0.2.1/tests/dataset/operations}/test_dataset_transformations.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/dataset/test_colors.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/dataset/test_dataset_helpers.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/dataset/test_shape_primitives.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/test_builder.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/test_hafnia_logger.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/test_utils.py +0 -0
- {hafnia-0.2.0 → hafnia-0.2.1}/tests/test_visualizations.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hafnia
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Python SDK for communication with Hafnia platform.
|
|
5
5
|
Author-email: Milestone Systems <hafniaplatform@milestone.dk>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -147,22 +147,20 @@ The `HafniaDataset` object provides a convenient way to interact with the datase
|
|
|
147
147
|
creating splits, accessing samples, printing statistics, saving to and loading from disk.
|
|
148
148
|
|
|
149
149
|
In essence, the `HafniaDataset` class contains `dataset.info` with dataset information
|
|
150
|
-
and `dataset.
|
|
150
|
+
and `dataset.samples` with annotations as a polars DataFrame
|
|
151
151
|
|
|
152
152
|
```python
|
|
153
153
|
# Annotations are stored in a polars DataFrame
|
|
154
|
-
print(dataset.
|
|
154
|
+
print(dataset.samples.head(2))
|
|
155
155
|
shape: (2, 14)
|
|
156
|
-
|
|
157
|
-
│
|
|
158
|
-
│ ---
|
|
159
|
-
│
|
|
160
|
-
|
|
161
|
-
│
|
|
162
|
-
│ ┆ …
|
|
163
|
-
|
|
164
|
-
│ ┆ … ┆ ┆ ┆ ┆ .… ┆ ┆ ┆ 0… │
|
|
165
|
-
└──────────┴────────────────────────────────┴────────┴───────┴───┴───────────────────────────────┴──────────┴──────────┴───────────────────────────────┘
|
|
156
|
+
┌──────────────┬─────────────────────────────────┬────────┬───────┬───┬─────────────────────────────────┬──────────┬──────────┬─────────────────────────────────┐
|
|
157
|
+
│ sample_index ┆ file_name ┆ height ┆ width ┆ … ┆ objects ┆ bitmasks ┆ polygons ┆ meta │
|
|
158
|
+
│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
159
|
+
│ u32 ┆ str ┆ i64 ┆ i64 ┆ ┆ list[struct[11]] ┆ null ┆ null ┆ struct[5] │
|
|
160
|
+
╞══════════════╪═════════════════════════════════╪════════╪═══════╪═══╪═════════════════════════════════╪══════════╪══════════╪═════════════════════════════════╡
|
|
161
|
+
│ 0 ┆ /home/ubuntu/code/hafnia/.data… ┆ 1080 ┆ 1920 ┆ … ┆ [{0.0492,0.0357,0.2083,0.23,"V… ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30:0… │
|
|
162
|
+
│ 100 ┆ /home/ubuntu/code/hafnia/.data… ┆ 1080 ┆ 1920 ┆ … ┆ [{0.146382,0.078704,0.42963,0.… ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30:0… │
|
|
163
|
+
└──────────────┴─────────────────────────────────┴────────┴───────┴───┴─────────────────────────────────┴──────────┴──────────┴─────────────────────────────────┘
|
|
166
164
|
```
|
|
167
165
|
|
|
168
166
|
```python
|
|
@@ -235,16 +233,15 @@ It also contain annotations as primitive types such as `Bbox`, `Classification`.
|
|
|
235
233
|
```python
|
|
236
234
|
rich.print(sample)
|
|
237
235
|
Sample(
|
|
238
|
-
|
|
239
|
-
file_name='data/
|
|
240
|
-
0000.png',
|
|
236
|
+
sample_index=120,
|
|
237
|
+
file_name='/home/ubuntu/code/hafnia/.data/datasets/midwest-vehicle-detection/data/343403325f27e390.png',
|
|
241
238
|
height=1080,
|
|
242
239
|
width=1920,
|
|
243
|
-
split='
|
|
240
|
+
split='train',
|
|
244
241
|
is_sample=True,
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
remote_path=
|
|
242
|
+
collection_index=None,
|
|
243
|
+
collection_id=None,
|
|
244
|
+
remote_path='s3://mdi-production-midwest-vehicle-detection/sample/data/343403325f27e390.png',
|
|
248
245
|
classifications=[
|
|
249
246
|
Classification(
|
|
250
247
|
class_name='Clear',
|
|
@@ -121,22 +121,20 @@ The `HafniaDataset` object provides a convenient way to interact with the datase
|
|
|
121
121
|
creating splits, accessing samples, printing statistics, saving to and loading from disk.
|
|
122
122
|
|
|
123
123
|
In essence, the `HafniaDataset` class contains `dataset.info` with dataset information
|
|
124
|
-
and `dataset.
|
|
124
|
+
and `dataset.samples` with annotations as a polars DataFrame
|
|
125
125
|
|
|
126
126
|
```python
|
|
127
127
|
# Annotations are stored in a polars DataFrame
|
|
128
|
-
print(dataset.
|
|
128
|
+
print(dataset.samples.head(2))
|
|
129
129
|
shape: (2, 14)
|
|
130
|
-
|
|
131
|
-
│
|
|
132
|
-
│ ---
|
|
133
|
-
│
|
|
134
|
-
|
|
135
|
-
│
|
|
136
|
-
│ ┆ …
|
|
137
|
-
|
|
138
|
-
│ ┆ … ┆ ┆ ┆ ┆ .… ┆ ┆ ┆ 0… │
|
|
139
|
-
└──────────┴────────────────────────────────┴────────┴───────┴───┴───────────────────────────────┴──────────┴──────────┴───────────────────────────────┘
|
|
130
|
+
┌──────────────┬─────────────────────────────────┬────────┬───────┬───┬─────────────────────────────────┬──────────┬──────────┬─────────────────────────────────┐
|
|
131
|
+
│ sample_index ┆ file_name ┆ height ┆ width ┆ … ┆ objects ┆ bitmasks ┆ polygons ┆ meta │
|
|
132
|
+
│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
133
|
+
│ u32 ┆ str ┆ i64 ┆ i64 ┆ ┆ list[struct[11]] ┆ null ┆ null ┆ struct[5] │
|
|
134
|
+
╞══════════════╪═════════════════════════════════╪════════╪═══════╪═══╪═════════════════════════════════╪══════════╪══════════╪═════════════════════════════════╡
|
|
135
|
+
│ 0 ┆ /home/ubuntu/code/hafnia/.data… ┆ 1080 ┆ 1920 ┆ … ┆ [{0.0492,0.0357,0.2083,0.23,"V… ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30:0… │
|
|
136
|
+
│ 100 ┆ /home/ubuntu/code/hafnia/.data… ┆ 1080 ┆ 1920 ┆ … ┆ [{0.146382,0.078704,0.42963,0.… ┆ null ┆ null ┆ {120.0,1.0,"2024-07-10T18:30:0… │
|
|
137
|
+
└──────────────┴─────────────────────────────────┴────────┴───────┴───┴─────────────────────────────────┴──────────┴──────────┴─────────────────────────────────┘
|
|
140
138
|
```
|
|
141
139
|
|
|
142
140
|
```python
|
|
@@ -209,16 +207,15 @@ It also contain annotations as primitive types such as `Bbox`, `Classification`.
|
|
|
209
207
|
```python
|
|
210
208
|
rich.print(sample)
|
|
211
209
|
Sample(
|
|
212
|
-
|
|
213
|
-
file_name='data/
|
|
214
|
-
0000.png',
|
|
210
|
+
sample_index=120,
|
|
211
|
+
file_name='/home/ubuntu/code/hafnia/.data/datasets/midwest-vehicle-detection/data/343403325f27e390.png',
|
|
215
212
|
height=1080,
|
|
216
213
|
width=1920,
|
|
217
|
-
split='
|
|
214
|
+
split='train',
|
|
218
215
|
is_sample=True,
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
remote_path=
|
|
216
|
+
collection_index=None,
|
|
217
|
+
collection_id=None,
|
|
218
|
+
remote_path='s3://mdi-production-midwest-vehicle-detection/sample/data/343403325f27e390.png',
|
|
222
219
|
classifications=[
|
|
223
220
|
Classification(
|
|
224
221
|
class_name='Clear',
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from rich import print as rprint
|
|
4
|
+
|
|
5
|
+
from hafnia.data.factory import load_dataset
|
|
6
|
+
from hafnia.dataset.dataset_recipe.dataset_recipe import DatasetRecipe
|
|
7
|
+
from hafnia.dataset.dataset_recipe.recipe_transforms import (
|
|
8
|
+
SelectSamples,
|
|
9
|
+
Shuffle,
|
|
10
|
+
SplitsByRatios,
|
|
11
|
+
)
|
|
12
|
+
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
13
|
+
|
|
14
|
+
### Introducing DatasetRecipe ###
|
|
15
|
+
# A DatasetRecipe is a recipe for the dataset you want to create.
|
|
16
|
+
# The recipe itself is not executed - this is just a specification of the dataset you want!
|
|
17
|
+
|
|
18
|
+
# A DatasetRecipe is an important concept in Hafnia as it allows you to merge multiple datasets
|
|
19
|
+
# and transformations in a single recipe. This is especially useful for Training as a Service (TaaS)
|
|
20
|
+
# where you need to define the dataset you want as a configuration and load it in the TaaS platform.
|
|
21
|
+
|
|
22
|
+
# The 'DatasetRecipe' interface is similar to the 'HafniaDataset' interface.
|
|
23
|
+
# To demonstrate, we will first create a dataset with the regular 'HafniaDataset' interface.
|
|
24
|
+
# This line will get the "mnist" dataset, shuffle it, and select 20 samples.
|
|
25
|
+
dataset = HafniaDataset.from_name(name="mnist").shuffle().select_samples(n_samples=20)
|
|
26
|
+
|
|
27
|
+
# Now the same dataset is created using the 'DatasetRecipe' interface.
|
|
28
|
+
dataset_recipe = DatasetRecipe.from_name(name="mnist").shuffle().select_samples(n_samples=20)
|
|
29
|
+
dataset = dataset_recipe.build()
|
|
30
|
+
# Note that the interface is similar, but to actually create the dataset you need to call `build()` on the recipe.
|
|
31
|
+
|
|
32
|
+
# An important feature of a 'DatasetRecipe' is that the recipe itself - and not the dataset - can be saved as a file
|
|
33
|
+
# and loaded from file. Meaning you can easily save, share, load and build the dataset later or in a different
|
|
34
|
+
# environment.
|
|
35
|
+
# In programming language, the recipe can be serialized to JSON and deserialized back to the original python object
|
|
36
|
+
# recipe.
|
|
37
|
+
# For TaaS, this is the only way to include multiple datasets during training.
|
|
38
|
+
|
|
39
|
+
# This is how it looks like in practice:
|
|
40
|
+
# 1) Save the dataset recipe to a file
|
|
41
|
+
path_json = Path(".data/tmp/dataset_recipe.json")
|
|
42
|
+
dataset_recipe.as_json_file(path_json)
|
|
43
|
+
|
|
44
|
+
# 2) The recipe can be loaded from the file
|
|
45
|
+
dataset_recipe_again = DatasetRecipe.from_json_file(path_json)
|
|
46
|
+
|
|
47
|
+
# We can verify that the loaded recipe is the same as the original recipe.
|
|
48
|
+
assert dataset_recipe_again == dataset_recipe
|
|
49
|
+
|
|
50
|
+
# Additionally, you can get the python code for creating the same recipe.
|
|
51
|
+
dataset_recipe.as_python_code()
|
|
52
|
+
|
|
53
|
+
# Example: DatasetRecipe from Path
|
|
54
|
+
dataset_recipe = DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
|
|
55
|
+
|
|
56
|
+
# Example: DatasetRecipe by merging multiple dataset recipes
|
|
57
|
+
dataset_recipe = DatasetRecipe.from_merger(
|
|
58
|
+
recipes=[
|
|
59
|
+
DatasetRecipe.from_name(name="mnist"),
|
|
60
|
+
DatasetRecipe.from_name(name="mnist"),
|
|
61
|
+
]
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Example: Recipes can be infinitely nested and combined.
|
|
65
|
+
dataset_recipe = DatasetRecipe.from_merger(
|
|
66
|
+
recipes=[
|
|
67
|
+
DatasetRecipe.from_merger(
|
|
68
|
+
recipes=[
|
|
69
|
+
DatasetRecipe.from_name(name="mnist"),
|
|
70
|
+
DatasetRecipe.from_name(name="mnist"),
|
|
71
|
+
]
|
|
72
|
+
),
|
|
73
|
+
DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
|
|
74
|
+
.select_samples(n_samples=30)
|
|
75
|
+
.splits_by_ratios(split_ratios={"train": 0.8, "val": 0.1, "test": 0.1}),
|
|
76
|
+
DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle(),
|
|
77
|
+
]
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Now you can build the dataset from the recipe.
|
|
81
|
+
dataset: HafniaDataset = dataset_recipe.build()
|
|
82
|
+
assert len(dataset) == 450 # 2x200 + 30 + 20
|
|
83
|
+
|
|
84
|
+
# Finally, you can print the dataset recipe to see what it contains.
|
|
85
|
+
rprint(dataset_recipe) # as a python object
|
|
86
|
+
print(dataset_recipe.as_json_str()) # as a JSON string
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# Example: Using the 'load_dataset' function
|
|
90
|
+
merged_dataset: HafniaDataset = load_dataset(dataset_recipe)
|
|
91
|
+
# You get a few extra things when using `load_dataset`.
|
|
92
|
+
# 1) You get the dataset directly - you don't have to call `build()` on the recipe.
|
|
93
|
+
# 2) The dataset is cached if it already exists, so you don't have to
|
|
94
|
+
# download or rebuild the dataset on the second run.
|
|
95
|
+
# 3) You can use an implicit form of the recipe. One example of this is that you just specify
|
|
96
|
+
# the dataset name `load_dataset("mnist")` or path `load_dataset(Path(".data/datasets/mnist"))`
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
### DatasetRecipe Implicit Form ###
|
|
100
|
+
# Below we demonstrate the difference between implicit and explicit forms of dataset recipes.
|
|
101
|
+
# Example: Get dataset by name with implicit and explicit forms
|
|
102
|
+
dataset = load_dataset("mnist") # Implicit form
|
|
103
|
+
dataset = load_dataset(DatasetRecipe.from_name(name="mnist")) # Explicit form
|
|
104
|
+
|
|
105
|
+
# Example: Get dataset from path with implicit and explicit forms:
|
|
106
|
+
dataset = load_dataset(Path(".data/datasets/mnist")) # Implicit form
|
|
107
|
+
dataset = load_dataset(DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))) # Explicit form
|
|
108
|
+
|
|
109
|
+
# Example: Merge datasets with implicit and explicit forms
|
|
110
|
+
dataset = load_dataset(("mnist", "mnist")) # Implicit form
|
|
111
|
+
dataset = load_dataset( # Explicit form
|
|
112
|
+
DatasetRecipe.from_merger(
|
|
113
|
+
recipes=[
|
|
114
|
+
DatasetRecipe.from_name(name="mnist"),
|
|
115
|
+
DatasetRecipe.from_name(name="mnist"),
|
|
116
|
+
]
|
|
117
|
+
)
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Example: Define a dataset with transformations using implicit and explicit forms
|
|
121
|
+
dataset = load_dataset(["mnist", SelectSamples(n_samples=20), Shuffle()]) # Implicit form
|
|
122
|
+
dataset = load_dataset(DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle()) # Explicit form
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# Example: Complex nested example with implicit vs explicit forms
|
|
126
|
+
# Implicit form of a complex dataset recipe
|
|
127
|
+
split_ratio = {"train": 0.8, "val": 0.1, "test": 0.1}
|
|
128
|
+
implicit_recipe = (
|
|
129
|
+
("mnist", "mnist"),
|
|
130
|
+
[Path(".data/datasets/mnist"), SelectSamples(n_samples=30), SplitsByRatios(split_ratios=split_ratio)],
|
|
131
|
+
["mnist", SelectSamples(n_samples=20), Shuffle()],
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Explicit form of the same complex dataset recipe
|
|
135
|
+
explicit_recipe = DatasetRecipe.from_merger(
|
|
136
|
+
recipes=[
|
|
137
|
+
DatasetRecipe.from_merger(
|
|
138
|
+
recipes=[
|
|
139
|
+
DatasetRecipe.from_name(name="mnist"),
|
|
140
|
+
DatasetRecipe.from_name(name="mnist"),
|
|
141
|
+
]
|
|
142
|
+
),
|
|
143
|
+
DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
|
|
144
|
+
.select_samples(n_samples=30)
|
|
145
|
+
.splits_by_ratios(split_ratios=split_ratio),
|
|
146
|
+
DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle(),
|
|
147
|
+
]
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# The implicit form uses the following rules:
|
|
151
|
+
# str: Will get a dataset by name -> In explicit form it becomes 'DatasetRecipe.from_name'
|
|
152
|
+
# Path: Will get a dataset from path -> In explicit form it becomes 'DatasetRecipe.from_path'
|
|
153
|
+
# tuple: Will merge datasets specified in the tuple -> In explicit form it becomes 'DatasetRecipe.from_merger'
|
|
154
|
+
# list: Will define a dataset followed by a list of transformations -> In explicit form it becomes chained method calls
|
|
155
|
+
# Generally, we recommend using the explicit form over the implicit form when multiple datasets and transformations are involved.
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# To convert from implicit to explicit recipe form, you can use the `from_implicit_form` method.
|
|
159
|
+
explicit_recipe_from_implicit = DatasetRecipe.from_implicit_form(implicit_recipe)
|
|
160
|
+
rprint("Converted explicit recipe:")
|
|
161
|
+
rprint(explicit_recipe_from_implicit)
|
|
162
|
+
|
|
163
|
+
# Verify that the conversion produces the same result
|
|
164
|
+
assert explicit_recipe_from_implicit == explicit_recipe
|
|
165
|
+
rprint("✓ Conversion successful - recipes are equivalent!")
|
|
@@ -21,7 +21,7 @@ from hafnia.dataset.primitives.polygon import Polygon
|
|
|
21
21
|
|
|
22
22
|
# Load dataset
|
|
23
23
|
path_dataset = get_dataset_path("midwest-vehicle-detection")
|
|
24
|
-
dataset = HafniaDataset.
|
|
24
|
+
dataset = HafniaDataset.from_path(path_dataset)
|
|
25
25
|
|
|
26
26
|
# Alternatively, you can use the 'load_dataset' function
|
|
27
27
|
dataset = load_dataset("midwest-vehicle-detection")
|
|
@@ -39,14 +39,15 @@ dataset.print_stats()
|
|
|
39
39
|
# Create a dataset split for training
|
|
40
40
|
dataset_train = dataset.create_split_dataset("train")
|
|
41
41
|
|
|
42
|
-
# Checkout built-in transformations in '
|
|
42
|
+
# Checkout built-in transformations in 'operations/dataset_transformations' or 'HafniaDataset'
|
|
43
43
|
dataset_val = dataset.create_split_dataset(SplitName.VAL) # Use 'SplitName' to avoid magic strings
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
|
|
46
|
+
small_dataset = dataset.select_samples(n_samples=10, seed=42) # Selects 10 samples from the dataset
|
|
46
47
|
shuffled_dataset = dataset.shuffle(seed=42) # Shuffle the dataset
|
|
47
48
|
|
|
48
49
|
split_ratios = {SplitName.TRAIN: 0.8, SplitName.VAL: 0.1, SplitName.TEST: 0.1}
|
|
49
|
-
new_dataset_splits = dataset.
|
|
50
|
+
new_dataset_splits = dataset.splits_by_ratios(split_ratios)
|
|
50
51
|
|
|
51
52
|
# Write dataset to disk
|
|
52
53
|
path_tmp = Path(".data/tmp")
|
|
@@ -54,7 +55,7 @@ path_dataset = path_tmp / "hafnia_dataset"
|
|
|
54
55
|
dataset.write(path_dataset) # --> Check that data is human readable
|
|
55
56
|
|
|
56
57
|
# Load dataset from disk
|
|
57
|
-
dataset_again = HafniaDataset.
|
|
58
|
+
dataset_again = HafniaDataset.from_path(path_dataset)
|
|
58
59
|
|
|
59
60
|
# Access the first sample in the training split - data is stored in a dictionary
|
|
60
61
|
sample_dict = dataset_train[0]
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
1
3
|
import torch
|
|
2
4
|
import torchvision
|
|
3
5
|
import torchvision.transforms.functional
|
|
@@ -43,11 +45,14 @@ if __name__ == "__main__":
|
|
|
43
45
|
image, targets = train_dataset[0]
|
|
44
46
|
visualize_image = torch_helpers.draw_image_and_targets(image=image, targets=targets)
|
|
45
47
|
pil_image = torchvision.transforms.functional.to_pil_image(visualize_image)
|
|
46
|
-
|
|
48
|
+
|
|
49
|
+
path_tmp = Path(".data/tmp")
|
|
50
|
+
path_tmp.mkdir(parents=True, exist_ok=True)
|
|
51
|
+
pil_image.save(path_tmp / "visualized_labels.png")
|
|
47
52
|
|
|
48
53
|
# Create DataLoaders - using TorchVisionCollateFn
|
|
49
54
|
collate_fn = torch_helpers.TorchVisionCollateFn()
|
|
50
|
-
train_loader = DataLoader(train_dataset, batch_size=
|
|
55
|
+
train_loader = DataLoader(train_dataset, batch_size=5, shuffle=True, collate_fn=collate_fn)
|
|
51
56
|
|
|
52
57
|
for images, targets in train_loader:
|
|
53
58
|
print(f"Batch of images: {len(images)}")
|
|
@@ -80,7 +80,7 @@ class Config:
|
|
|
80
80
|
def __init__(self, config_path: Optional[Path] = None) -> None:
|
|
81
81
|
self.config_path = self.resolve_config_path(config_path)
|
|
82
82
|
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
83
|
-
self.config_data =
|
|
83
|
+
self.config_data = Config.load_config(self.config_path)
|
|
84
84
|
|
|
85
85
|
def resolve_config_path(self, path: Optional[Path] = None) -> Path:
|
|
86
86
|
if path:
|
|
@@ -111,12 +111,25 @@ class Config:
|
|
|
111
111
|
endpoint = self.config.platform_url + PLATFORM_API_MAPPING[method]
|
|
112
112
|
return endpoint
|
|
113
113
|
|
|
114
|
-
|
|
114
|
+
@staticmethod
|
|
115
|
+
def load_config(config_path: Path) -> ConfigFileSchema:
|
|
115
116
|
"""Load configuration from file."""
|
|
116
|
-
|
|
117
|
+
|
|
118
|
+
# Environment variables has higher priority than config file
|
|
119
|
+
HAFNIA_API_KEY = os.getenv("HAFNIA_API_KEY")
|
|
120
|
+
HAFNIA_PLATFORM_URL = os.getenv("HAFNIA_PLATFORM_URL")
|
|
121
|
+
if HAFNIA_API_KEY and HAFNIA_PLATFORM_URL:
|
|
122
|
+
HAFNIA_PROFILE_NAME = os.getenv("HAFNIA_PROFILE_NAME", "default").strip()
|
|
123
|
+
cfg = ConfigFileSchema(
|
|
124
|
+
active_profile=HAFNIA_PROFILE_NAME,
|
|
125
|
+
profiles={HAFNIA_PROFILE_NAME: ConfigSchema(platform_url=HAFNIA_PLATFORM_URL, api_key=HAFNIA_API_KEY)},
|
|
126
|
+
)
|
|
127
|
+
return cfg
|
|
128
|
+
|
|
129
|
+
if not config_path.exists():
|
|
117
130
|
return ConfigFileSchema()
|
|
118
131
|
try:
|
|
119
|
-
with open(
|
|
132
|
+
with open(config_path.as_posix(), "r") as f:
|
|
120
133
|
data = json.load(f)
|
|
121
134
|
return ConfigFileSchema(**data)
|
|
122
135
|
except json.JSONDecodeError:
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from hafnia import utils
|
|
6
|
+
from hafnia.dataset.hafnia_dataset import HafniaDataset, get_or_create_dataset_path_from_recipe
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_dataset(recipe: Any, force_redownload: bool = False) -> HafniaDataset:
|
|
10
|
+
"""Load a dataset either from a local path or from the Hafnia platform."""
|
|
11
|
+
|
|
12
|
+
path_dataset = get_dataset_path(recipe, force_redownload=force_redownload)
|
|
13
|
+
dataset = HafniaDataset.from_path(path_dataset)
|
|
14
|
+
return dataset
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_dataset_path(recipe: Any, force_redownload: bool = False) -> Path:
|
|
18
|
+
if utils.is_hafnia_cloud_job():
|
|
19
|
+
return Path(os.getenv("MDI_DATASET_DIR", "/opt/ml/input/data/training"))
|
|
20
|
+
|
|
21
|
+
path_dataset = get_or_create_dataset_path_from_recipe(recipe, force_redownload=force_redownload)
|
|
22
|
+
|
|
23
|
+
return path_dataset
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
2
|
from typing import List
|
|
3
3
|
|
|
4
|
+
FILENAME_RECIPE_JSON = "recipe.json"
|
|
4
5
|
FILENAME_DATASET_INFO = "dataset_info.json"
|
|
5
6
|
FILENAME_ANNOTATIONS_JSONL = "annotations.jsonl"
|
|
6
7
|
FILENAME_ANNOTATIONS_PARQUET = "annotations.parquet"
|
|
7
8
|
|
|
8
|
-
|
|
9
|
+
DATASET_FILENAMES_REQUIRED = [
|
|
9
10
|
FILENAME_DATASET_INFO,
|
|
10
11
|
FILENAME_ANNOTATIONS_JSONL,
|
|
11
12
|
FILENAME_ANNOTATIONS_PARQUET,
|