hafnia 0.1.25__tar.gz → 0.1.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/ci_cd.yaml +1 -1
  2. {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/publish_docker.yaml +2 -2
  3. {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/tests.yaml +1 -1
  4. {hafnia-0.1.25 → hafnia-0.1.26}/.gitignore +1 -1
  5. hafnia-0.1.26/.pre-commit-config.yaml +32 -0
  6. {hafnia-0.1.25 → hafnia-0.1.26}/PKG-INFO +157 -12
  7. {hafnia-0.1.25 → hafnia-0.1.26}/README.md +156 -7
  8. {hafnia-0.1.25 → hafnia-0.1.26}/docs/cli.md +4 -3
  9. {hafnia-0.1.25 → hafnia-0.1.26}/examples/dataset_builder.py +1 -2
  10. hafnia-0.1.26/examples/example_load_dataset.py +14 -0
  11. {hafnia-0.1.25 → hafnia-0.1.26}/pyproject.toml +28 -22
  12. {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/__main__.py +4 -9
  13. {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/config.py +20 -27
  14. {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/consts.py +3 -1
  15. {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/experiment_cmds.py +3 -17
  16. {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/profile_cmds.py +16 -2
  17. {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/recipe_cmds.py +2 -6
  18. {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/runc_cmds.py +50 -49
  19. {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/data/factory.py +3 -3
  20. {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/experiment/hafnia_logger.py +5 -5
  21. {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/http.py +2 -2
  22. hafnia-0.1.26/src/hafnia/log.py +23 -0
  23. {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/platform/__init__.py +0 -2
  24. hafnia-0.1.26/src/hafnia/platform/builder.py +144 -0
  25. {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/platform/download.py +8 -8
  26. {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/platform/experiment.py +31 -25
  27. hafnia-0.1.26/src/hafnia/utils.py +137 -0
  28. hafnia-0.1.26/tests/test_builder.py +101 -0
  29. {hafnia-0.1.25 → hafnia-0.1.26}/tests/test_check_example_scripts.py +1 -0
  30. {hafnia-0.1.25 → hafnia-0.1.26}/tests/test_cli.py +8 -28
  31. {hafnia-0.1.25 → hafnia-0.1.26}/tests/test_samples.py +4 -0
  32. hafnia-0.1.26/tests/test_utils.py +84 -0
  33. {hafnia-0.1.25 → hafnia-0.1.26}/uv.lock +11 -16
  34. hafnia-0.1.25/.pre-commit-config.yaml +0 -16
  35. hafnia-0.1.25/examples/example_load_dataset.py +0 -4
  36. hafnia-0.1.25/src/hafnia/log.py +0 -32
  37. hafnia-0.1.25/src/hafnia/platform/api.py +0 -12
  38. hafnia-0.1.25/src/hafnia/platform/builder.py +0 -184
  39. hafnia-0.1.25/src/hafnia/platform/executor.py +0 -111
  40. hafnia-0.1.25/src/hafnia/utils.py +0 -135
  41. hafnia-0.1.25/tests/test_builder.py +0 -198
  42. hafnia-0.1.25/tests/test_executor.py +0 -84
  43. {hafnia-0.1.25 → hafnia-0.1.26}/.devcontainer/devcontainer.json +0 -0
  44. {hafnia-0.1.25 → hafnia-0.1.26}/.devcontainer/hooks/post_create +0 -0
  45. {hafnia-0.1.25 → hafnia-0.1.26}/.github/dependabot.yaml +0 -0
  46. {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/Dockerfile +0 -0
  47. {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/build.yaml +0 -0
  48. {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/check_release.yaml +0 -0
  49. {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/lint.yaml +0 -0
  50. {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/publish_pypi.yaml +0 -0
  51. {hafnia-0.1.25 → hafnia-0.1.26}/.python-version +0 -0
  52. {hafnia-0.1.25 → hafnia-0.1.26}/.vscode/extensions.json +0 -0
  53. {hafnia-0.1.25 → hafnia-0.1.26}/.vscode/launch.json +0 -0
  54. {hafnia-0.1.25 → hafnia-0.1.26}/.vscode/settings.json +0 -0
  55. {hafnia-0.1.25 → hafnia-0.1.26}/LICENSE +0 -0
  56. {hafnia-0.1.25 → hafnia-0.1.26}/docs/release.md +0 -0
  57. {hafnia-0.1.25 → hafnia-0.1.26}/examples/example_logger.py +0 -0
  58. {hafnia-0.1.25 → hafnia-0.1.26}/examples/example_torchvision_dataloader.py +0 -0
  59. {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/__init__.py +0 -0
  60. {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/data_cmds.py +0 -0
  61. {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/__init__.py +0 -0
  62. {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/data/__init__.py +0 -0
  63. {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/experiment/__init__.py +0 -0
  64. {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/torch_helpers.py +0 -0
  65. /hafnia-0.1.25/tests/test_mdi_logger.py → /hafnia-0.1.26/tests/test_hafnia_logger.py +0 -0
@@ -21,7 +21,7 @@ jobs:
21
21
  steps:
22
22
  - uses: actions/checkout@v4.2.2
23
23
  - name: Run Trivy vulnerability scanner
24
- uses: aquasecurity/trivy-action@0.30.0
24
+ uses: aquasecurity/trivy-action@0.31.0
25
25
  with:
26
26
  scan-type: 'fs'
27
27
  scan-ref: '.'
@@ -47,7 +47,7 @@ jobs:
47
47
  echo "aws_region=${{ secrets.STAGE_AWS_REGION }}" >> $GITHUB_OUTPUT
48
48
  fi
49
49
  - name: Configure AWS credentials
50
- uses: aws-actions/configure-aws-credentials@v4.1.0
50
+ uses: aws-actions/configure-aws-credentials@v4.2.1
51
51
  with:
52
52
  role-to-assume: arn:aws:iam::${{ steps.env-vars.outputs.aws_account_id }}:role/${{ secrets.AWS_ROLE_NAME }}
53
53
  aws-region: ${{ steps.env-vars.outputs.aws_region }}
@@ -60,7 +60,7 @@ jobs:
60
60
  uses: docker/setup-buildx-action@v3.10.0
61
61
 
62
62
  - name: Build and push
63
- uses: docker/build-push-action@v6.16.0
63
+ uses: docker/build-push-action@v6.18.0
64
64
  env:
65
65
  ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
66
66
  ECR_REPOSITORY: mdi-runtime
@@ -20,6 +20,6 @@ jobs:
20
20
  with:
21
21
  version: 0.6.8
22
22
  - name: Install the project
23
- run: uv sync --all-extras --dev
23
+ run: uv sync --group dev
24
24
  - name: Run tests
25
25
  run: uv run pytest tests
@@ -163,4 +163,4 @@ cython_debug/
163
163
  /pypiprivate/
164
164
  /packaging/
165
165
  /.data/
166
- *.zip
166
+ /recipe.zip
@@ -0,0 +1,32 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.11.13
4
+ hooks:
5
+ - id: ruff-check
6
+ types_or: [python, pyi]
7
+ args: [--show-fixes]
8
+ files: ^(src|tests)/
9
+ - id: ruff-format
10
+ types_or: [python, pyi]
11
+ files: ^(src|tests)/
12
+ - repo: https://github.com/astral-sh/uv-pre-commit
13
+ rev: 0.7.13
14
+ hooks:
15
+ # Update the uv lockfile
16
+ - id: uv-lock
17
+ - repo: https://github.com/pre-commit/mirrors-mypy
18
+ rev: v1.16.0
19
+ hooks:
20
+ - id: mypy
21
+ args: ["src/", "tests/"]
22
+ pass_filenames: false
23
+ - repo: local
24
+ hooks:
25
+ - id: pytest
26
+ stages: [manual]
27
+ name: pytest
28
+ entry: pytest
29
+ language: system
30
+ types: [python]
31
+ pass_filenames: false
32
+ args: ["-m", "not slow", "--tb=short", "-q"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hafnia
3
- Version: 0.1.25
3
+ Version: 0.1.26
4
4
  Summary: Python SDK for communication with Hafnia platform.
5
5
  Author-email: Milestone Systems <hafniaplatform@milestone.dk>
6
6
  License-File: LICENSE
@@ -17,10 +17,6 @@ Requires-Dist: pydantic>=2.10.4
17
17
  Requires-Dist: rich>=13.9.4
18
18
  Requires-Dist: seedir>=0.5.0
19
19
  Requires-Dist: tqdm>=4.67.1
20
- Provides-Extra: torch
21
- Requires-Dist: flatten-dict>=0.4.2; extra == 'torch'
22
- Requires-Dist: torch>=2.6.0; extra == 'torch'
23
- Requires-Dist: torchvision>=0.21.0; extra == 'torch'
24
20
  Description-Content-Type: text/markdown
25
21
 
26
22
  # Hafnia
@@ -91,14 +87,117 @@ and explore the dataset sample with a python script:
91
87
  from hafnia.data import load_dataset
92
88
 
93
89
  dataset_splits = load_dataset("mnist")
94
- print(dataset_splits)
95
- print(dataset_splits["train"])
96
90
  ```
91
+
92
+ ### Dataset Format
97
93
  The returned sample dataset is a [hugging face dataset](https://huggingface.co/docs/datasets/index)
98
94
  and contains train, validation and test splits.
99
95
 
96
+ ```python
97
+ print(dataset_splits)
98
+
99
+ # Output:
100
+ >>> DatasetDict({
101
+ train: Dataset({
102
+ features: ['image_id', 'image', 'height', 'width', 'objects', 'Weather', 'Surface Conditions'],
103
+ num_rows: 172
104
+ })
105
+ validation: Dataset({
106
+ features: ['image_id', 'image', 'height', 'width', 'objects', 'Weather', 'Surface Conditions'],
107
+ num_rows: 21
108
+ })
109
+ test: Dataset({
110
+ features: ['image_id', 'image', 'height', 'width', 'objects', 'Weather', 'Surface Conditions'],
111
+ num_rows: 21
112
+ })
113
+ })
114
+
115
+ ```
116
+
117
+ A Hugging Face dataset is a dictionary with splits, where each split is a `Dataset` object.
118
+ Each `Dataset` is structured as a table with a set of columns (also called features) and a row for each sample.
119
+
120
+ The features of the dataset can be viewed with the `features` attribute.
121
+ ```python
122
+ # View features of the train split
123
+ pprint.pprint(dataset["train"].features)
124
+ {'Surface Conditions': ClassLabel(names=['Dry', 'Wet'], id=None),
125
+ 'Weather': ClassLabel(names=['Clear', 'Foggy'], id=None),
126
+ 'height': Value(dtype='int64', id=None),
127
+ 'image': Image(mode=None, decode=True, id=None),
128
+ 'image_id': Value(dtype='int64', id=None),
129
+ 'objects': Sequence(feature={'bbox': Sequence(feature=Value(dtype='int64',
130
+ id=None),
131
+ length=-1,
132
+ id=None),
133
+ 'class_idx': ClassLabel(names=['Vehicle.Bicycle',
134
+ 'Vehicle.Motorcycle',
135
+ 'Vehicle.Car',
136
+ 'Vehicle.Van',
137
+ 'Vehicle.RV',
138
+ 'Vehicle.Single_Truck',
139
+ 'Vehicle.Combo_Truck',
140
+ 'Vehicle.Pickup_Truck',
141
+ 'Vehicle.Trailer',
142
+ 'Vehicle.Emergency_Vehicle',
143
+ 'Vehicle.Bus',
144
+ 'Vehicle.Heavy_Duty_Vehicle'],
145
+ id=None),
146
+ 'class_name': Value(dtype='string', id=None),
147
+ 'id': Value(dtype='string', id=None)},
148
+ length=-1,
149
+ id=None),
150
+ 'width': Value(dtype='int64', id=None)}
151
+ ```
152
+
153
+ View the first sample in the training set:
154
+ ```python
155
+ # Print sample from the training set
156
+ pprint.pprint(dataset["train"][0])
157
+
158
+ {'image': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1920x1080 at 0x79D6292C5ED0>,
159
+ 'image_id': 4920,
160
+ 'height': 1080,
161
+ 'Weather': 0,
162
+ 'Surface Conditions': 0,
163
+ 'objects': {'bbox': [[441, 180, 121, 126],
164
+ [549, 151, 131, 103],
165
+ [1845, 722, 68, 130],
166
+ [1810, 571, 110, 149]],
167
+ 'class_idx': [7, 7, 2, 2],
168
+ 'class_name': ['Vehicle.Pickup_Truck',
169
+ 'Vehicle.Pickup_Truck',
170
+ 'Vehicle.Car',
171
+ 'Vehicle.Car'],
172
+ 'id': ['HW6WiLAJ', 'T/ccFpRi', 'CS0O8B6W', 'DKrJGzjp']},
173
+ 'width': 1920}
174
+
175
+ ```
176
+
177
+ For hafnia based datasets, we want to standardized how a dataset and dataset tasks are represented.
178
+ We have defined a set of features that are common across all datasets in the Hafnia data library.
179
+
180
+ - `image`: The image itself, stored as a PIL image
181
+ - `height`: The height of the image in pixels
182
+ - `width`: The width of the image in pixels
183
+ - `[IMAGE_CLASSIFICATION_TASK]`: [Optional] Image classification tasks are top-level `ClassLabel` feature.
184
+ `ClassLabel` is a Hugging Face feature that maps class indices to class names.
185
+ In above example we have two classification tasks:
186
+ - `Weather`: Classifies the weather conditions in the image, with possible values `Clear` and `Foggy`
187
+ - `Surface Conditions`: Classifies the surface conditions in the image, with possible values `Dry` and `Wet`
188
+ - `objects`: A dictionary containing information about objects in the image, including:
189
+ - `bbox`: Bounding boxes for each object, represented with a list of bounding box coordinates
190
+ `[xmin, ymin, bbox_width, bbox_height]`. Each bounding box is defined with a top-left corner coordinate
191
+ `(xmin, ymin)` and bounding box width and height `(bbox_width, bbox_height)` in pixels.
192
+ - `class_idx`: Class indices for each detected object. This is a
193
+ `ClassLabel` feature that maps to the `class_name` feature.
194
+ - `class_name`: Class names for each detected object
195
+ - `id`: Unique identifiers for each detected object
196
+
197
+ ### Dataset Locally vs. Training-aaS
100
198
  An important feature of `load_dataset` is that it will return the full dataset
101
- when loaded on the Hafnia platform.
199
+ when loaded with Training-aaS on the Hafnia platform.
200
+
102
201
  This enables seamlessly switching between running/validating a training script
103
202
  locally (on the sample dataset) and running full model trainings with Training-aaS (on the full dataset).
104
203
  without changing code or configurations for the training script.
@@ -160,12 +259,58 @@ with a dataloader that performs data augmentations and batching of the dataset a
160
259
  To support this, we have provided a torch dataloader example script
161
260
  [example_torchvision_dataloader.py](./examples/example_torchvision_dataloader.py).
162
261
 
163
- The script demonstrates how to make a dataloader with data augmentation (`torchvision.transforms.v2`)
164
- and a helper function for visualizing image and labels.
262
+ The script demonstrates how to load a dataset sample, apply data augmentations using
263
+ `torchvision.transforms.v2`, and visualize the dataset with `torch_helpers.draw_image_and_targets`.
264
+
265
+ Note also how `torch_helpers.TorchVisionCollateFn` is used in combination with the `DataLoader` from
266
+ `torch.utils.data` to handle the dataset's collate function.
165
267
 
166
268
  The dataloader and visualization function supports computer vision tasks
167
269
  and datasets available in the data library.
168
270
 
271
+ ```python
272
+ # Load Hugging Face dataset
273
+ dataset_splits = load_dataset("midwest-vehicle-detection")
274
+
275
+ # Define transforms
276
+ train_transforms = v2.Compose(
277
+ [
278
+ v2.RandomResizedCrop(size=(224, 224), antialias=True),
279
+ v2.RandomHorizontalFlip(p=0.5),
280
+ v2.ToDtype(torch.float32, scale=True),
281
+ v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
282
+ ]
283
+ )
284
+ test_transforms = v2.Compose(
285
+ [
286
+ v2.Resize(size=(224, 224), antialias=True),
287
+ v2.ToDtype(torch.float32, scale=True),
288
+ v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
289
+ ]
290
+ )
291
+
292
+ keep_metadata = True
293
+ train_dataset = torch_helpers.TorchvisionDataset(
294
+ dataset_splits["train"], transforms=train_transforms, keep_metadata=keep_metadata
295
+ )
296
+ test_dataset = torch_helpers.TorchvisionDataset(
297
+ dataset_splits["test"], transforms=test_transforms, keep_metadata=keep_metadata
298
+ )
299
+
300
+ # Visualize sample
301
+ image, targets = train_dataset[0]
302
+ visualize_image = torch_helpers.draw_image_and_targets(image=image, targets=targets)
303
+ pil_image = torchvision.transforms.functional.to_pil_image(visualize_image)
304
+ pil_image.save("visualized_labels.png")
305
+
306
+ # Create DataLoaders - using TorchVisionCollateFn
307
+ collate_fn = torch_helpers.TorchVisionCollateFn(
308
+ skip_stacking=["objects.bbox", "objects.class_idx"]
309
+ )
310
+ train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)
311
+ ```
312
+
313
+
169
314
  ## Example: Training-aaS
170
315
  By combining logging and dataset loading, we can now construct our model training recipe.
171
316
 
@@ -206,10 +351,10 @@ Install uv
206
351
  curl -LsSf https://astral.sh/uv/install.sh | sh
207
352
  ```
208
353
 
209
- Install python dependencies including developer (`--dev`) and optional dependencies (`--all-extras`).
354
+ Create virtual environment and install python dependencies
210
355
 
211
356
  ```bash
212
- uv sync --all-extras --dev
357
+ uv sync
213
358
  ```
214
359
 
215
360
  Run tests:
@@ -66,14 +66,117 @@ and explore the dataset sample with a python script:
66
66
  from hafnia.data import load_dataset
67
67
 
68
68
  dataset_splits = load_dataset("mnist")
69
- print(dataset_splits)
70
- print(dataset_splits["train"])
71
69
  ```
70
+
71
+ ### Dataset Format
72
72
  The returned sample dataset is a [hugging face dataset](https://huggingface.co/docs/datasets/index)
73
73
  and contains train, validation and test splits.
74
74
 
75
+ ```python
76
+ print(dataset_splits)
77
+
78
+ # Output:
79
+ >>> DatasetDict({
80
+ train: Dataset({
81
+ features: ['image_id', 'image', 'height', 'width', 'objects', 'Weather', 'Surface Conditions'],
82
+ num_rows: 172
83
+ })
84
+ validation: Dataset({
85
+ features: ['image_id', 'image', 'height', 'width', 'objects', 'Weather', 'Surface Conditions'],
86
+ num_rows: 21
87
+ })
88
+ test: Dataset({
89
+ features: ['image_id', 'image', 'height', 'width', 'objects', 'Weather', 'Surface Conditions'],
90
+ num_rows: 21
91
+ })
92
+ })
93
+
94
+ ```
95
+
96
+ A Hugging Face dataset is a dictionary with splits, where each split is a `Dataset` object.
97
+ Each `Dataset` is structured as a table with a set of columns (also called features) and a row for each sample.
98
+
99
+ The features of the dataset can be viewed with the `features` attribute.
100
+ ```python
101
+ # View features of the train split
102
+ pprint.pprint(dataset["train"].features)
103
+ {'Surface Conditions': ClassLabel(names=['Dry', 'Wet'], id=None),
104
+ 'Weather': ClassLabel(names=['Clear', 'Foggy'], id=None),
105
+ 'height': Value(dtype='int64', id=None),
106
+ 'image': Image(mode=None, decode=True, id=None),
107
+ 'image_id': Value(dtype='int64', id=None),
108
+ 'objects': Sequence(feature={'bbox': Sequence(feature=Value(dtype='int64',
109
+ id=None),
110
+ length=-1,
111
+ id=None),
112
+ 'class_idx': ClassLabel(names=['Vehicle.Bicycle',
113
+ 'Vehicle.Motorcycle',
114
+ 'Vehicle.Car',
115
+ 'Vehicle.Van',
116
+ 'Vehicle.RV',
117
+ 'Vehicle.Single_Truck',
118
+ 'Vehicle.Combo_Truck',
119
+ 'Vehicle.Pickup_Truck',
120
+ 'Vehicle.Trailer',
121
+ 'Vehicle.Emergency_Vehicle',
122
+ 'Vehicle.Bus',
123
+ 'Vehicle.Heavy_Duty_Vehicle'],
124
+ id=None),
125
+ 'class_name': Value(dtype='string', id=None),
126
+ 'id': Value(dtype='string', id=None)},
127
+ length=-1,
128
+ id=None),
129
+ 'width': Value(dtype='int64', id=None)}
130
+ ```
131
+
132
+ View the first sample in the training set:
133
+ ```python
134
+ # Print sample from the training set
135
+ pprint.pprint(dataset["train"][0])
136
+
137
+ {'image': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1920x1080 at 0x79D6292C5ED0>,
138
+ 'image_id': 4920,
139
+ 'height': 1080,
140
+ 'Weather': 0,
141
+ 'Surface Conditions': 0,
142
+ 'objects': {'bbox': [[441, 180, 121, 126],
143
+ [549, 151, 131, 103],
144
+ [1845, 722, 68, 130],
145
+ [1810, 571, 110, 149]],
146
+ 'class_idx': [7, 7, 2, 2],
147
+ 'class_name': ['Vehicle.Pickup_Truck',
148
+ 'Vehicle.Pickup_Truck',
149
+ 'Vehicle.Car',
150
+ 'Vehicle.Car'],
151
+ 'id': ['HW6WiLAJ', 'T/ccFpRi', 'CS0O8B6W', 'DKrJGzjp']},
152
+ 'width': 1920}
153
+
154
+ ```
155
+
156
+ For hafnia based datasets, we want to standardized how a dataset and dataset tasks are represented.
157
+ We have defined a set of features that are common across all datasets in the Hafnia data library.
158
+
159
+ - `image`: The image itself, stored as a PIL image
160
+ - `height`: The height of the image in pixels
161
+ - `width`: The width of the image in pixels
162
+ - `[IMAGE_CLASSIFICATION_TASK]`: [Optional] Image classification tasks are top-level `ClassLabel` feature.
163
+ `ClassLabel` is a Hugging Face feature that maps class indices to class names.
164
+ In above example we have two classification tasks:
165
+ - `Weather`: Classifies the weather conditions in the image, with possible values `Clear` and `Foggy`
166
+ - `Surface Conditions`: Classifies the surface conditions in the image, with possible values `Dry` and `Wet`
167
+ - `objects`: A dictionary containing information about objects in the image, including:
168
+ - `bbox`: Bounding boxes for each object, represented with a list of bounding box coordinates
169
+ `[xmin, ymin, bbox_width, bbox_height]`. Each bounding box is defined with a top-left corner coordinate
170
+ `(xmin, ymin)` and bounding box width and height `(bbox_width, bbox_height)` in pixels.
171
+ - `class_idx`: Class indices for each detected object. This is a
172
+ `ClassLabel` feature that maps to the `class_name` feature.
173
+ - `class_name`: Class names for each detected object
174
+ - `id`: Unique identifiers for each detected object
175
+
176
+ ### Dataset Locally vs. Training-aaS
75
177
  An important feature of `load_dataset` is that it will return the full dataset
76
- when loaded on the Hafnia platform.
178
+ when loaded with Training-aaS on the Hafnia platform.
179
+
77
180
  This enables seamlessly switching between running/validating a training script
78
181
  locally (on the sample dataset) and running full model trainings with Training-aaS (on the full dataset).
79
182
  without changing code or configurations for the training script.
@@ -135,12 +238,58 @@ with a dataloader that performs data augmentations and batching of the dataset a
135
238
  To support this, we have provided a torch dataloader example script
136
239
  [example_torchvision_dataloader.py](./examples/example_torchvision_dataloader.py).
137
240
 
138
- The script demonstrates how to make a dataloader with data augmentation (`torchvision.transforms.v2`)
139
- and a helper function for visualizing image and labels.
241
+ The script demonstrates how to load a dataset sample, apply data augmentations using
242
+ `torchvision.transforms.v2`, and visualize the dataset with `torch_helpers.draw_image_and_targets`.
243
+
244
+ Note also how `torch_helpers.TorchVisionCollateFn` is used in combination with the `DataLoader` from
245
+ `torch.utils.data` to handle the dataset's collate function.
140
246
 
141
247
  The dataloader and visualization function supports computer vision tasks
142
248
  and datasets available in the data library.
143
249
 
250
+ ```python
251
+ # Load Hugging Face dataset
252
+ dataset_splits = load_dataset("midwest-vehicle-detection")
253
+
254
+ # Define transforms
255
+ train_transforms = v2.Compose(
256
+ [
257
+ v2.RandomResizedCrop(size=(224, 224), antialias=True),
258
+ v2.RandomHorizontalFlip(p=0.5),
259
+ v2.ToDtype(torch.float32, scale=True),
260
+ v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
261
+ ]
262
+ )
263
+ test_transforms = v2.Compose(
264
+ [
265
+ v2.Resize(size=(224, 224), antialias=True),
266
+ v2.ToDtype(torch.float32, scale=True),
267
+ v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
268
+ ]
269
+ )
270
+
271
+ keep_metadata = True
272
+ train_dataset = torch_helpers.TorchvisionDataset(
273
+ dataset_splits["train"], transforms=train_transforms, keep_metadata=keep_metadata
274
+ )
275
+ test_dataset = torch_helpers.TorchvisionDataset(
276
+ dataset_splits["test"], transforms=test_transforms, keep_metadata=keep_metadata
277
+ )
278
+
279
+ # Visualize sample
280
+ image, targets = train_dataset[0]
281
+ visualize_image = torch_helpers.draw_image_and_targets(image=image, targets=targets)
282
+ pil_image = torchvision.transforms.functional.to_pil_image(visualize_image)
283
+ pil_image.save("visualized_labels.png")
284
+
285
+ # Create DataLoaders - using TorchVisionCollateFn
286
+ collate_fn = torch_helpers.TorchVisionCollateFn(
287
+ skip_stacking=["objects.bbox", "objects.class_idx"]
288
+ )
289
+ train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)
290
+ ```
291
+
292
+
144
293
  ## Example: Training-aaS
145
294
  By combining logging and dataset loading, we can now construct our model training recipe.
146
295
 
@@ -181,10 +330,10 @@ Install uv
181
330
  curl -LsSf https://astral.sh/uv/install.sh | sh
182
331
  ```
183
332
 
184
- Install python dependencies including developer (`--dev`) and optional dependencies (`--all-extras`).
333
+ Create virtual environment and install python dependencies
185
334
 
186
335
  ```bash
187
- uv sync --all-extras --dev
336
+ uv sync
188
337
  ```
189
338
 
190
339
  Run tests:
@@ -28,8 +28,8 @@ A command-line interface tool for managing data science experiments and resource
28
28
  ### Experiment Management
29
29
 
30
30
  - `hafnia runc launch <task>` - Launch a job within the image
31
- - `hafnia runc build <recipe_url> [state_file] [ecr_repository] [image_name]` - Build docker image with a given recipe
32
- - `hafnia runc build-local <recipe> [state_file] [image_name]` - Build recipe from local path as image with prefix - localhost
31
+ - `hafnia runc build <recipe_url> --st [state_file] --repo [registry/repo]` - Build docker image with a given recipe
32
+ - `hafnia runc build-local <recipe> --st [state_file] --repo [registry/repo | localhost]` - Build recipe from local path as image with prefix - localhost
33
33
 
34
34
  ## Configuration
35
35
 
@@ -96,4 +96,5 @@ Available environment variables:
96
96
  - `MDI_API_KEY_SECRET_NAME` - Name of the AWS Secrets Manager secret containing the API key
97
97
  - `AWS_REGION` - AWS region for ECR and Secrets Manager operations
98
98
  - `RECIPE_DIR` - Directory containing recipe code (used by the `runc launch` command
99
- - `HAFNIA_CLOUD` – Allow emulate cloud behaviour
99
+ - `HAFNIA_CLOUD` – Allow emulate cloud behaviour
100
+ - `HAFNIA_LOG` – Allow changing log level for messages
@@ -10,9 +10,8 @@ from pathlib import Path
10
10
  from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
11
11
 
12
12
  import datasets
13
- from datasets import ClassLabel, DatasetDict
14
-
15
13
  import mdi
14
+ from datasets import ClassLabel, DatasetDict
16
15
 
17
16
  # dataset = mdi.load_dataset("mnist")
18
17
  # dataset = mdi.load_dataset("[ADD SOME CUSTOM DATASET SPECIFICATION]")
@@ -0,0 +1,14 @@
1
+ import pprint
2
+
3
+ from hafnia.data import load_dataset
4
+
5
+ dataset = load_dataset("midwest-vehicle-detection")
6
+
7
+ # Print information on each dataset split
8
+ print(dataset)
9
+
10
+ # View features of the train split
11
+ pprint.pprint(dataset["train"].features)
12
+
13
+ # Print sample from the training set
14
+ pprint.pprint(dataset["train"][0])
@@ -1,10 +1,13 @@
1
1
  [project]
2
2
  name = "hafnia"
3
- version = "0.1.25"
3
+ version = "0.1.26"
4
4
  description = "Python SDK for communication with Hafnia platform."
5
5
  readme = "README.md"
6
- authors = [{ name = "Milestone Systems", email = "hafniaplatform@milestone.dk" }]
6
+ authors = [
7
+ { name = "Milestone Systems", email = "hafniaplatform@milestone.dk" },
8
+ ]
7
9
  requires-python = ">=3.10"
10
+
8
11
  dependencies = [
9
12
  "boto3>=1.35.91",
10
13
  "click>=8.1.8",
@@ -24,38 +27,41 @@ dependencies = [
24
27
  dev = [
25
28
  "ipykernel>=6.29.5",
26
29
  "lark>=1.2.2",
27
- "pytest>=8.3.4",
28
- ]
29
- lint = [
30
- "pre-commit>=4.2.0",
31
- "ruff>=0.9.1",
30
+ "torch>=2.6.0",
31
+ "torchvision>=0.21.0",
32
+ "flatten-dict>=0.4.2",
32
33
  ]
33
34
 
34
- [tool.uv]
35
- default-groups = "all"
35
+ test = ["pytest>=8.3.4", "pre-commit>=4.2.0", "ruff>=0.9.1"]
36
36
 
37
37
  [project.scripts]
38
38
  hafnia = 'cli.__main__:main'
39
39
 
40
- [project.optional-dependencies]
41
- # Use "uv sync --extra torch" to install torch dependencies
42
- torch = [
43
- "torch>=2.6.0",
44
- "torchvision>=0.21.0",
45
- "flatten-dict>=0.4.2",
46
- ]
47
-
48
40
  [build-system]
49
41
  requires = ["hatchling"]
50
42
  build-backend = "hatchling.build"
51
43
 
44
+ [tool.hatch.build.targets.wheel]
45
+ packages = ["src/cli", "src/hafnia"]
46
+
47
+ [tool.uv]
48
+ default-groups = ["test"]
49
+
52
50
  [tool.ruff]
53
- select = ["I", "E", "F"]
54
- ignore = ["E501"]
51
+ lint.select = ["I", "E", "F"]
52
+ lint.ignore = ["E501"]
55
53
  line-length = 120
56
54
 
57
- [tool.ruff.isort]
55
+ [tool.ruff.lint.isort]
58
56
  relative-imports-order = "closest-to-furthest"
59
57
 
60
- [tool.hatch.build.targets.wheel]
61
- packages = ["src/cli", "src/hafnia"]
58
+ [tool.mypy]
59
+ ignore_missing_imports = true
60
+ check_untyped_defs = false
61
+ disallow_untyped_defs = false
62
+ disallow_incomplete_defs = false
63
+ disallow_untyped_calls = false
64
+ warn_unused_ignores = false
65
+
66
+ [tool.pytest.ini_options]
67
+ markers = ["slow: marks tests as slow (deselect with '-m \"not slow\"')"]
@@ -10,6 +10,7 @@ from cli.config import Config, ConfigSchema
10
10
  def main(ctx: click.Context) -> None:
11
11
  """Hafnia CLI."""
12
12
  ctx.obj = Config()
13
+ ctx.max_content_width = 120
13
14
 
14
15
 
15
16
  @main.command("configure")
@@ -17,9 +18,7 @@ def main(ctx: click.Context) -> None:
17
18
  def configure(cfg: Config) -> None:
18
19
  """Configure Hafnia CLI settings."""
19
20
 
20
- from hafnia.platform.api import get_organization_id
21
-
22
- profile_name = click.prompt("Profile Name", type=str, default="default")
21
+ profile_name = click.prompt("Profile Name", type=str, default=consts.DEFAULT_PROFILE_NAME)
23
22
  profile_name = profile_name.strip()
24
23
  try:
25
24
  cfg.add_profile(profile_name, ConfigSchema(), set_active=True)
@@ -32,12 +31,8 @@ def configure(cfg: Config) -> None:
32
31
  except ValueError as e:
33
32
  click.echo(f"Error: {str(e)}", err=True)
34
33
  return
35
- platform_url = click.prompt("Hafnia Platform URL", type=str, default="https://api.mdi.milestonesys.com")
34
+ platform_url = click.prompt("Hafnia Platform URL", type=str, default=consts.DEFAULT_API_URL)
36
35
  cfg.platform_url = platform_url.strip()
37
- try:
38
- cfg.organization_id = get_organization_id(cfg.get_platform_endpoint("organizations"), cfg.api_key)
39
- except Exception:
40
- raise click.ClickException(consts.ERROR_ORG_ID)
41
36
  cfg.save_config()
42
37
  profile_cmds.profile_show(cfg)
43
38
 
@@ -57,4 +52,4 @@ main.add_command(experiment_cmds.experiment)
57
52
  main.add_command(recipe_cmds.recipe)
58
53
 
59
54
  if __name__ == "__main__":
60
- main()
55
+ main(max_content_width=120)