hafnia 0.1.25__tar.gz → 0.1.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/ci_cd.yaml +1 -1
- {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/publish_docker.yaml +2 -2
- {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/tests.yaml +1 -1
- {hafnia-0.1.25 → hafnia-0.1.26}/.gitignore +1 -1
- hafnia-0.1.26/.pre-commit-config.yaml +32 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/PKG-INFO +157 -12
- {hafnia-0.1.25 → hafnia-0.1.26}/README.md +156 -7
- {hafnia-0.1.25 → hafnia-0.1.26}/docs/cli.md +4 -3
- {hafnia-0.1.25 → hafnia-0.1.26}/examples/dataset_builder.py +1 -2
- hafnia-0.1.26/examples/example_load_dataset.py +14 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/pyproject.toml +28 -22
- {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/__main__.py +4 -9
- {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/config.py +20 -27
- {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/consts.py +3 -1
- {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/experiment_cmds.py +3 -17
- {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/profile_cmds.py +16 -2
- {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/recipe_cmds.py +2 -6
- {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/runc_cmds.py +50 -49
- {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/data/factory.py +3 -3
- {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/experiment/hafnia_logger.py +5 -5
- {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/http.py +2 -2
- hafnia-0.1.26/src/hafnia/log.py +23 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/platform/__init__.py +0 -2
- hafnia-0.1.26/src/hafnia/platform/builder.py +144 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/platform/download.py +8 -8
- {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/platform/experiment.py +31 -25
- hafnia-0.1.26/src/hafnia/utils.py +137 -0
- hafnia-0.1.26/tests/test_builder.py +101 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/tests/test_check_example_scripts.py +1 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/tests/test_cli.py +8 -28
- {hafnia-0.1.25 → hafnia-0.1.26}/tests/test_samples.py +4 -0
- hafnia-0.1.26/tests/test_utils.py +84 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/uv.lock +11 -16
- hafnia-0.1.25/.pre-commit-config.yaml +0 -16
- hafnia-0.1.25/examples/example_load_dataset.py +0 -4
- hafnia-0.1.25/src/hafnia/log.py +0 -32
- hafnia-0.1.25/src/hafnia/platform/api.py +0 -12
- hafnia-0.1.25/src/hafnia/platform/builder.py +0 -184
- hafnia-0.1.25/src/hafnia/platform/executor.py +0 -111
- hafnia-0.1.25/src/hafnia/utils.py +0 -135
- hafnia-0.1.25/tests/test_builder.py +0 -198
- hafnia-0.1.25/tests/test_executor.py +0 -84
- {hafnia-0.1.25 → hafnia-0.1.26}/.devcontainer/devcontainer.json +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/.devcontainer/hooks/post_create +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/.github/dependabot.yaml +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/Dockerfile +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/build.yaml +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/check_release.yaml +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/lint.yaml +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/.github/workflows/publish_pypi.yaml +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/.python-version +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/.vscode/extensions.json +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/.vscode/launch.json +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/.vscode/settings.json +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/LICENSE +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/docs/release.md +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/examples/example_logger.py +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/examples/example_torchvision_dataloader.py +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/__init__.py +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/src/cli/data_cmds.py +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/__init__.py +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/data/__init__.py +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/experiment/__init__.py +0 -0
- {hafnia-0.1.25 → hafnia-0.1.26}/src/hafnia/torch_helpers.py +0 -0
- /hafnia-0.1.25/tests/test_mdi_logger.py → /hafnia-0.1.26/tests/test_hafnia_logger.py +0 -0
|
@@ -47,7 +47,7 @@ jobs:
|
|
|
47
47
|
echo "aws_region=${{ secrets.STAGE_AWS_REGION }}" >> $GITHUB_OUTPUT
|
|
48
48
|
fi
|
|
49
49
|
- name: Configure AWS credentials
|
|
50
|
-
uses: aws-actions/configure-aws-credentials@v4.1
|
|
50
|
+
uses: aws-actions/configure-aws-credentials@v4.2.1
|
|
51
51
|
with:
|
|
52
52
|
role-to-assume: arn:aws:iam::${{ steps.env-vars.outputs.aws_account_id }}:role/${{ secrets.AWS_ROLE_NAME }}
|
|
53
53
|
aws-region: ${{ steps.env-vars.outputs.aws_region }}
|
|
@@ -60,7 +60,7 @@ jobs:
|
|
|
60
60
|
uses: docker/setup-buildx-action@v3.10.0
|
|
61
61
|
|
|
62
62
|
- name: Build and push
|
|
63
|
-
uses: docker/build-push-action@v6.
|
|
63
|
+
uses: docker/build-push-action@v6.18.0
|
|
64
64
|
env:
|
|
65
65
|
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
|
|
66
66
|
ECR_REPOSITORY: mdi-runtime
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.11.13
|
|
4
|
+
hooks:
|
|
5
|
+
- id: ruff-check
|
|
6
|
+
types_or: [python, pyi]
|
|
7
|
+
args: [--show-fixes]
|
|
8
|
+
files: ^(src|tests)/
|
|
9
|
+
- id: ruff-format
|
|
10
|
+
types_or: [python, pyi]
|
|
11
|
+
files: ^(src|tests)/
|
|
12
|
+
- repo: https://github.com/astral-sh/uv-pre-commit
|
|
13
|
+
rev: 0.7.13
|
|
14
|
+
hooks:
|
|
15
|
+
# Update the uv lockfile
|
|
16
|
+
- id: uv-lock
|
|
17
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
18
|
+
rev: v1.16.0
|
|
19
|
+
hooks:
|
|
20
|
+
- id: mypy
|
|
21
|
+
args: ["src/", "tests/"]
|
|
22
|
+
pass_filenames: false
|
|
23
|
+
- repo: local
|
|
24
|
+
hooks:
|
|
25
|
+
- id: pytest
|
|
26
|
+
stages: [manual]
|
|
27
|
+
name: pytest
|
|
28
|
+
entry: pytest
|
|
29
|
+
language: system
|
|
30
|
+
types: [python]
|
|
31
|
+
pass_filenames: false
|
|
32
|
+
args: ["-m", "not slow", "--tb=short", "-q"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hafnia
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.26
|
|
4
4
|
Summary: Python SDK for communication with Hafnia platform.
|
|
5
5
|
Author-email: Milestone Systems <hafniaplatform@milestone.dk>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -17,10 +17,6 @@ Requires-Dist: pydantic>=2.10.4
|
|
|
17
17
|
Requires-Dist: rich>=13.9.4
|
|
18
18
|
Requires-Dist: seedir>=0.5.0
|
|
19
19
|
Requires-Dist: tqdm>=4.67.1
|
|
20
|
-
Provides-Extra: torch
|
|
21
|
-
Requires-Dist: flatten-dict>=0.4.2; extra == 'torch'
|
|
22
|
-
Requires-Dist: torch>=2.6.0; extra == 'torch'
|
|
23
|
-
Requires-Dist: torchvision>=0.21.0; extra == 'torch'
|
|
24
20
|
Description-Content-Type: text/markdown
|
|
25
21
|
|
|
26
22
|
# Hafnia
|
|
@@ -91,14 +87,117 @@ and explore the dataset sample with a python script:
|
|
|
91
87
|
from hafnia.data import load_dataset
|
|
92
88
|
|
|
93
89
|
dataset_splits = load_dataset("mnist")
|
|
94
|
-
print(dataset_splits)
|
|
95
|
-
print(dataset_splits["train"])
|
|
96
90
|
```
|
|
91
|
+
|
|
92
|
+
### Dataset Format
|
|
97
93
|
The returned sample dataset is a [hugging face dataset](https://huggingface.co/docs/datasets/index)
|
|
98
94
|
and contains train, validation and test splits.
|
|
99
95
|
|
|
96
|
+
```python
|
|
97
|
+
print(dataset_splits)
|
|
98
|
+
|
|
99
|
+
# Output:
|
|
100
|
+
>>> DatasetDict({
|
|
101
|
+
train: Dataset({
|
|
102
|
+
features: ['image_id', 'image', 'height', 'width', 'objects', 'Weather', 'Surface Conditions'],
|
|
103
|
+
num_rows: 172
|
|
104
|
+
})
|
|
105
|
+
validation: Dataset({
|
|
106
|
+
features: ['image_id', 'image', 'height', 'width', 'objects', 'Weather', 'Surface Conditions'],
|
|
107
|
+
num_rows: 21
|
|
108
|
+
})
|
|
109
|
+
test: Dataset({
|
|
110
|
+
features: ['image_id', 'image', 'height', 'width', 'objects', 'Weather', 'Surface Conditions'],
|
|
111
|
+
num_rows: 21
|
|
112
|
+
})
|
|
113
|
+
})
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
A Hugging Face dataset is a dictionary with splits, where each split is a `Dataset` object.
|
|
118
|
+
Each `Dataset` is structured as a table with a set of columns (also called features) and a row for each sample.
|
|
119
|
+
|
|
120
|
+
The features of the dataset can be viewed with the `features` attribute.
|
|
121
|
+
```python
|
|
122
|
+
# View features of the train split
|
|
123
|
+
pprint.pprint(dataset["train"].features)
|
|
124
|
+
{'Surface Conditions': ClassLabel(names=['Dry', 'Wet'], id=None),
|
|
125
|
+
'Weather': ClassLabel(names=['Clear', 'Foggy'], id=None),
|
|
126
|
+
'height': Value(dtype='int64', id=None),
|
|
127
|
+
'image': Image(mode=None, decode=True, id=None),
|
|
128
|
+
'image_id': Value(dtype='int64', id=None),
|
|
129
|
+
'objects': Sequence(feature={'bbox': Sequence(feature=Value(dtype='int64',
|
|
130
|
+
id=None),
|
|
131
|
+
length=-1,
|
|
132
|
+
id=None),
|
|
133
|
+
'class_idx': ClassLabel(names=['Vehicle.Bicycle',
|
|
134
|
+
'Vehicle.Motorcycle',
|
|
135
|
+
'Vehicle.Car',
|
|
136
|
+
'Vehicle.Van',
|
|
137
|
+
'Vehicle.RV',
|
|
138
|
+
'Vehicle.Single_Truck',
|
|
139
|
+
'Vehicle.Combo_Truck',
|
|
140
|
+
'Vehicle.Pickup_Truck',
|
|
141
|
+
'Vehicle.Trailer',
|
|
142
|
+
'Vehicle.Emergency_Vehicle',
|
|
143
|
+
'Vehicle.Bus',
|
|
144
|
+
'Vehicle.Heavy_Duty_Vehicle'],
|
|
145
|
+
id=None),
|
|
146
|
+
'class_name': Value(dtype='string', id=None),
|
|
147
|
+
'id': Value(dtype='string', id=None)},
|
|
148
|
+
length=-1,
|
|
149
|
+
id=None),
|
|
150
|
+
'width': Value(dtype='int64', id=None)}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
View the first sample in the training set:
|
|
154
|
+
```python
|
|
155
|
+
# Print sample from the training set
|
|
156
|
+
pprint.pprint(dataset["train"][0])
|
|
157
|
+
|
|
158
|
+
{'image': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1920x1080 at 0x79D6292C5ED0>,
|
|
159
|
+
'image_id': 4920,
|
|
160
|
+
'height': 1080,
|
|
161
|
+
'Weather': 0,
|
|
162
|
+
'Surface Conditions': 0,
|
|
163
|
+
'objects': {'bbox': [[441, 180, 121, 126],
|
|
164
|
+
[549, 151, 131, 103],
|
|
165
|
+
[1845, 722, 68, 130],
|
|
166
|
+
[1810, 571, 110, 149]],
|
|
167
|
+
'class_idx': [7, 7, 2, 2],
|
|
168
|
+
'class_name': ['Vehicle.Pickup_Truck',
|
|
169
|
+
'Vehicle.Pickup_Truck',
|
|
170
|
+
'Vehicle.Car',
|
|
171
|
+
'Vehicle.Car'],
|
|
172
|
+
'id': ['HW6WiLAJ', 'T/ccFpRi', 'CS0O8B6W', 'DKrJGzjp']},
|
|
173
|
+
'width': 1920}
|
|
174
|
+
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
For hafnia based datasets, we want to standardized how a dataset and dataset tasks are represented.
|
|
178
|
+
We have defined a set of features that are common across all datasets in the Hafnia data library.
|
|
179
|
+
|
|
180
|
+
- `image`: The image itself, stored as a PIL image
|
|
181
|
+
- `height`: The height of the image in pixels
|
|
182
|
+
- `width`: The width of the image in pixels
|
|
183
|
+
- `[IMAGE_CLASSIFICATION_TASK]`: [Optional] Image classification tasks are top-level `ClassLabel` feature.
|
|
184
|
+
`ClassLabel` is a Hugging Face feature that maps class indices to class names.
|
|
185
|
+
In above example we have two classification tasks:
|
|
186
|
+
- `Weather`: Classifies the weather conditions in the image, with possible values `Clear` and `Foggy`
|
|
187
|
+
- `Surface Conditions`: Classifies the surface conditions in the image, with possible values `Dry` and `Wet`
|
|
188
|
+
- `objects`: A dictionary containing information about objects in the image, including:
|
|
189
|
+
- `bbox`: Bounding boxes for each object, represented with a list of bounding box coordinates
|
|
190
|
+
`[xmin, ymin, bbox_width, bbox_height]`. Each bounding box is defined with a top-left corner coordinate
|
|
191
|
+
`(xmin, ymin)` and bounding box width and height `(bbox_width, bbox_height)` in pixels.
|
|
192
|
+
- `class_idx`: Class indices for each detected object. This is a
|
|
193
|
+
`ClassLabel` feature that maps to the `class_name` feature.
|
|
194
|
+
- `class_name`: Class names for each detected object
|
|
195
|
+
- `id`: Unique identifiers for each detected object
|
|
196
|
+
|
|
197
|
+
### Dataset Locally vs. Training-aaS
|
|
100
198
|
An important feature of `load_dataset` is that it will return the full dataset
|
|
101
|
-
when loaded on the Hafnia platform.
|
|
199
|
+
when loaded with Training-aaS on the Hafnia platform.
|
|
200
|
+
|
|
102
201
|
This enables seamlessly switching between running/validating a training script
|
|
103
202
|
locally (on the sample dataset) and running full model trainings with Training-aaS (on the full dataset).
|
|
104
203
|
without changing code or configurations for the training script.
|
|
@@ -160,12 +259,58 @@ with a dataloader that performs data augmentations and batching of the dataset a
|
|
|
160
259
|
To support this, we have provided a torch dataloader example script
|
|
161
260
|
[example_torchvision_dataloader.py](./examples/example_torchvision_dataloader.py).
|
|
162
261
|
|
|
163
|
-
The script demonstrates how to
|
|
164
|
-
and
|
|
262
|
+
The script demonstrates how to load a dataset sample, apply data augmentations using
|
|
263
|
+
`torchvision.transforms.v2`, and visualize the dataset with `torch_helpers.draw_image_and_targets`.
|
|
264
|
+
|
|
265
|
+
Note also how `torch_helpers.TorchVisionCollateFn` is used in combination with the `DataLoader` from
|
|
266
|
+
`torch.utils.data` to handle the dataset's collate function.
|
|
165
267
|
|
|
166
268
|
The dataloader and visualization function supports computer vision tasks
|
|
167
269
|
and datasets available in the data library.
|
|
168
270
|
|
|
271
|
+
```python
|
|
272
|
+
# Load Hugging Face dataset
|
|
273
|
+
dataset_splits = load_dataset("midwest-vehicle-detection")
|
|
274
|
+
|
|
275
|
+
# Define transforms
|
|
276
|
+
train_transforms = v2.Compose(
|
|
277
|
+
[
|
|
278
|
+
v2.RandomResizedCrop(size=(224, 224), antialias=True),
|
|
279
|
+
v2.RandomHorizontalFlip(p=0.5),
|
|
280
|
+
v2.ToDtype(torch.float32, scale=True),
|
|
281
|
+
v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
|
282
|
+
]
|
|
283
|
+
)
|
|
284
|
+
test_transforms = v2.Compose(
|
|
285
|
+
[
|
|
286
|
+
v2.Resize(size=(224, 224), antialias=True),
|
|
287
|
+
v2.ToDtype(torch.float32, scale=True),
|
|
288
|
+
v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
|
289
|
+
]
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
keep_metadata = True
|
|
293
|
+
train_dataset = torch_helpers.TorchvisionDataset(
|
|
294
|
+
dataset_splits["train"], transforms=train_transforms, keep_metadata=keep_metadata
|
|
295
|
+
)
|
|
296
|
+
test_dataset = torch_helpers.TorchvisionDataset(
|
|
297
|
+
dataset_splits["test"], transforms=test_transforms, keep_metadata=keep_metadata
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# Visualize sample
|
|
301
|
+
image, targets = train_dataset[0]
|
|
302
|
+
visualize_image = torch_helpers.draw_image_and_targets(image=image, targets=targets)
|
|
303
|
+
pil_image = torchvision.transforms.functional.to_pil_image(visualize_image)
|
|
304
|
+
pil_image.save("visualized_labels.png")
|
|
305
|
+
|
|
306
|
+
# Create DataLoaders - using TorchVisionCollateFn
|
|
307
|
+
collate_fn = torch_helpers.TorchVisionCollateFn(
|
|
308
|
+
skip_stacking=["objects.bbox", "objects.class_idx"]
|
|
309
|
+
)
|
|
310
|
+
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
|
|
169
314
|
## Example: Training-aaS
|
|
170
315
|
By combining logging and dataset loading, we can now construct our model training recipe.
|
|
171
316
|
|
|
@@ -206,10 +351,10 @@ Install uv
|
|
|
206
351
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
207
352
|
```
|
|
208
353
|
|
|
209
|
-
|
|
354
|
+
Create virtual environment and install python dependencies
|
|
210
355
|
|
|
211
356
|
```bash
|
|
212
|
-
uv sync
|
|
357
|
+
uv sync
|
|
213
358
|
```
|
|
214
359
|
|
|
215
360
|
Run tests:
|
|
@@ -66,14 +66,117 @@ and explore the dataset sample with a python script:
|
|
|
66
66
|
from hafnia.data import load_dataset
|
|
67
67
|
|
|
68
68
|
dataset_splits = load_dataset("mnist")
|
|
69
|
-
print(dataset_splits)
|
|
70
|
-
print(dataset_splits["train"])
|
|
71
69
|
```
|
|
70
|
+
|
|
71
|
+
### Dataset Format
|
|
72
72
|
The returned sample dataset is a [hugging face dataset](https://huggingface.co/docs/datasets/index)
|
|
73
73
|
and contains train, validation and test splits.
|
|
74
74
|
|
|
75
|
+
```python
|
|
76
|
+
print(dataset_splits)
|
|
77
|
+
|
|
78
|
+
# Output:
|
|
79
|
+
>>> DatasetDict({
|
|
80
|
+
train: Dataset({
|
|
81
|
+
features: ['image_id', 'image', 'height', 'width', 'objects', 'Weather', 'Surface Conditions'],
|
|
82
|
+
num_rows: 172
|
|
83
|
+
})
|
|
84
|
+
validation: Dataset({
|
|
85
|
+
features: ['image_id', 'image', 'height', 'width', 'objects', 'Weather', 'Surface Conditions'],
|
|
86
|
+
num_rows: 21
|
|
87
|
+
})
|
|
88
|
+
test: Dataset({
|
|
89
|
+
features: ['image_id', 'image', 'height', 'width', 'objects', 'Weather', 'Surface Conditions'],
|
|
90
|
+
num_rows: 21
|
|
91
|
+
})
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
A Hugging Face dataset is a dictionary with splits, where each split is a `Dataset` object.
|
|
97
|
+
Each `Dataset` is structured as a table with a set of columns (also called features) and a row for each sample.
|
|
98
|
+
|
|
99
|
+
The features of the dataset can be viewed with the `features` attribute.
|
|
100
|
+
```python
|
|
101
|
+
# View features of the train split
|
|
102
|
+
pprint.pprint(dataset["train"].features)
|
|
103
|
+
{'Surface Conditions': ClassLabel(names=['Dry', 'Wet'], id=None),
|
|
104
|
+
'Weather': ClassLabel(names=['Clear', 'Foggy'], id=None),
|
|
105
|
+
'height': Value(dtype='int64', id=None),
|
|
106
|
+
'image': Image(mode=None, decode=True, id=None),
|
|
107
|
+
'image_id': Value(dtype='int64', id=None),
|
|
108
|
+
'objects': Sequence(feature={'bbox': Sequence(feature=Value(dtype='int64',
|
|
109
|
+
id=None),
|
|
110
|
+
length=-1,
|
|
111
|
+
id=None),
|
|
112
|
+
'class_idx': ClassLabel(names=['Vehicle.Bicycle',
|
|
113
|
+
'Vehicle.Motorcycle',
|
|
114
|
+
'Vehicle.Car',
|
|
115
|
+
'Vehicle.Van',
|
|
116
|
+
'Vehicle.RV',
|
|
117
|
+
'Vehicle.Single_Truck',
|
|
118
|
+
'Vehicle.Combo_Truck',
|
|
119
|
+
'Vehicle.Pickup_Truck',
|
|
120
|
+
'Vehicle.Trailer',
|
|
121
|
+
'Vehicle.Emergency_Vehicle',
|
|
122
|
+
'Vehicle.Bus',
|
|
123
|
+
'Vehicle.Heavy_Duty_Vehicle'],
|
|
124
|
+
id=None),
|
|
125
|
+
'class_name': Value(dtype='string', id=None),
|
|
126
|
+
'id': Value(dtype='string', id=None)},
|
|
127
|
+
length=-1,
|
|
128
|
+
id=None),
|
|
129
|
+
'width': Value(dtype='int64', id=None)}
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
View the first sample in the training set:
|
|
133
|
+
```python
|
|
134
|
+
# Print sample from the training set
|
|
135
|
+
pprint.pprint(dataset["train"][0])
|
|
136
|
+
|
|
137
|
+
{'image': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1920x1080 at 0x79D6292C5ED0>,
|
|
138
|
+
'image_id': 4920,
|
|
139
|
+
'height': 1080,
|
|
140
|
+
'Weather': 0,
|
|
141
|
+
'Surface Conditions': 0,
|
|
142
|
+
'objects': {'bbox': [[441, 180, 121, 126],
|
|
143
|
+
[549, 151, 131, 103],
|
|
144
|
+
[1845, 722, 68, 130],
|
|
145
|
+
[1810, 571, 110, 149]],
|
|
146
|
+
'class_idx': [7, 7, 2, 2],
|
|
147
|
+
'class_name': ['Vehicle.Pickup_Truck',
|
|
148
|
+
'Vehicle.Pickup_Truck',
|
|
149
|
+
'Vehicle.Car',
|
|
150
|
+
'Vehicle.Car'],
|
|
151
|
+
'id': ['HW6WiLAJ', 'T/ccFpRi', 'CS0O8B6W', 'DKrJGzjp']},
|
|
152
|
+
'width': 1920}
|
|
153
|
+
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
For hafnia based datasets, we want to standardized how a dataset and dataset tasks are represented.
|
|
157
|
+
We have defined a set of features that are common across all datasets in the Hafnia data library.
|
|
158
|
+
|
|
159
|
+
- `image`: The image itself, stored as a PIL image
|
|
160
|
+
- `height`: The height of the image in pixels
|
|
161
|
+
- `width`: The width of the image in pixels
|
|
162
|
+
- `[IMAGE_CLASSIFICATION_TASK]`: [Optional] Image classification tasks are top-level `ClassLabel` feature.
|
|
163
|
+
`ClassLabel` is a Hugging Face feature that maps class indices to class names.
|
|
164
|
+
In above example we have two classification tasks:
|
|
165
|
+
- `Weather`: Classifies the weather conditions in the image, with possible values `Clear` and `Foggy`
|
|
166
|
+
- `Surface Conditions`: Classifies the surface conditions in the image, with possible values `Dry` and `Wet`
|
|
167
|
+
- `objects`: A dictionary containing information about objects in the image, including:
|
|
168
|
+
- `bbox`: Bounding boxes for each object, represented with a list of bounding box coordinates
|
|
169
|
+
`[xmin, ymin, bbox_width, bbox_height]`. Each bounding box is defined with a top-left corner coordinate
|
|
170
|
+
`(xmin, ymin)` and bounding box width and height `(bbox_width, bbox_height)` in pixels.
|
|
171
|
+
- `class_idx`: Class indices for each detected object. This is a
|
|
172
|
+
`ClassLabel` feature that maps to the `class_name` feature.
|
|
173
|
+
- `class_name`: Class names for each detected object
|
|
174
|
+
- `id`: Unique identifiers for each detected object
|
|
175
|
+
|
|
176
|
+
### Dataset Locally vs. Training-aaS
|
|
75
177
|
An important feature of `load_dataset` is that it will return the full dataset
|
|
76
|
-
when loaded on the Hafnia platform.
|
|
178
|
+
when loaded with Training-aaS on the Hafnia platform.
|
|
179
|
+
|
|
77
180
|
This enables seamlessly switching between running/validating a training script
|
|
78
181
|
locally (on the sample dataset) and running full model trainings with Training-aaS (on the full dataset).
|
|
79
182
|
without changing code or configurations for the training script.
|
|
@@ -135,12 +238,58 @@ with a dataloader that performs data augmentations and batching of the dataset a
|
|
|
135
238
|
To support this, we have provided a torch dataloader example script
|
|
136
239
|
[example_torchvision_dataloader.py](./examples/example_torchvision_dataloader.py).
|
|
137
240
|
|
|
138
|
-
The script demonstrates how to
|
|
139
|
-
and
|
|
241
|
+
The script demonstrates how to load a dataset sample, apply data augmentations using
|
|
242
|
+
`torchvision.transforms.v2`, and visualize the dataset with `torch_helpers.draw_image_and_targets`.
|
|
243
|
+
|
|
244
|
+
Note also how `torch_helpers.TorchVisionCollateFn` is used in combination with the `DataLoader` from
|
|
245
|
+
`torch.utils.data` to handle the dataset's collate function.
|
|
140
246
|
|
|
141
247
|
The dataloader and visualization function supports computer vision tasks
|
|
142
248
|
and datasets available in the data library.
|
|
143
249
|
|
|
250
|
+
```python
|
|
251
|
+
# Load Hugging Face dataset
|
|
252
|
+
dataset_splits = load_dataset("midwest-vehicle-detection")
|
|
253
|
+
|
|
254
|
+
# Define transforms
|
|
255
|
+
train_transforms = v2.Compose(
|
|
256
|
+
[
|
|
257
|
+
v2.RandomResizedCrop(size=(224, 224), antialias=True),
|
|
258
|
+
v2.RandomHorizontalFlip(p=0.5),
|
|
259
|
+
v2.ToDtype(torch.float32, scale=True),
|
|
260
|
+
v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
|
261
|
+
]
|
|
262
|
+
)
|
|
263
|
+
test_transforms = v2.Compose(
|
|
264
|
+
[
|
|
265
|
+
v2.Resize(size=(224, 224), antialias=True),
|
|
266
|
+
v2.ToDtype(torch.float32, scale=True),
|
|
267
|
+
v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
|
268
|
+
]
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
keep_metadata = True
|
|
272
|
+
train_dataset = torch_helpers.TorchvisionDataset(
|
|
273
|
+
dataset_splits["train"], transforms=train_transforms, keep_metadata=keep_metadata
|
|
274
|
+
)
|
|
275
|
+
test_dataset = torch_helpers.TorchvisionDataset(
|
|
276
|
+
dataset_splits["test"], transforms=test_transforms, keep_metadata=keep_metadata
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# Visualize sample
|
|
280
|
+
image, targets = train_dataset[0]
|
|
281
|
+
visualize_image = torch_helpers.draw_image_and_targets(image=image, targets=targets)
|
|
282
|
+
pil_image = torchvision.transforms.functional.to_pil_image(visualize_image)
|
|
283
|
+
pil_image.save("visualized_labels.png")
|
|
284
|
+
|
|
285
|
+
# Create DataLoaders - using TorchVisionCollateFn
|
|
286
|
+
collate_fn = torch_helpers.TorchVisionCollateFn(
|
|
287
|
+
skip_stacking=["objects.bbox", "objects.class_idx"]
|
|
288
|
+
)
|
|
289
|
+
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
|
|
144
293
|
## Example: Training-aaS
|
|
145
294
|
By combining logging and dataset loading, we can now construct our model training recipe.
|
|
146
295
|
|
|
@@ -181,10 +330,10 @@ Install uv
|
|
|
181
330
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
182
331
|
```
|
|
183
332
|
|
|
184
|
-
|
|
333
|
+
Create virtual environment and install python dependencies
|
|
185
334
|
|
|
186
335
|
```bash
|
|
187
|
-
uv sync
|
|
336
|
+
uv sync
|
|
188
337
|
```
|
|
189
338
|
|
|
190
339
|
Run tests:
|
|
@@ -28,8 +28,8 @@ A command-line interface tool for managing data science experiments and resource
|
|
|
28
28
|
### Experiment Management
|
|
29
29
|
|
|
30
30
|
- `hafnia runc launch <task>` - Launch a job within the image
|
|
31
|
-
- `hafnia runc build <recipe_url> [state_file]
|
|
32
|
-
- `hafnia runc build-local <recipe> [state_file] [
|
|
31
|
+
- `hafnia runc build <recipe_url> --st [state_file] --repo [registry/repo]` - Build docker image with a given recipe
|
|
32
|
+
- `hafnia runc build-local <recipe> --st [state_file] --repo [registry/repo | localhost]` - Build recipe from local path as image with prefix - localhost
|
|
33
33
|
|
|
34
34
|
## Configuration
|
|
35
35
|
|
|
@@ -96,4 +96,5 @@ Available environment variables:
|
|
|
96
96
|
- `MDI_API_KEY_SECRET_NAME` - Name of the AWS Secrets Manager secret containing the API key
|
|
97
97
|
- `AWS_REGION` - AWS region for ECR and Secrets Manager operations
|
|
98
98
|
- `RECIPE_DIR` - Directory containing recipe code (used by the `runc launch` command
|
|
99
|
-
- `HAFNIA_CLOUD` – Allow emulate cloud behaviour
|
|
99
|
+
- `HAFNIA_CLOUD` – Allow emulate cloud behaviour
|
|
100
|
+
- `HAFNIA_LOG` – Allow changing log level for messages
|
|
@@ -10,9 +10,8 @@ from pathlib import Path
|
|
|
10
10
|
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
|
|
11
11
|
|
|
12
12
|
import datasets
|
|
13
|
-
from datasets import ClassLabel, DatasetDict
|
|
14
|
-
|
|
15
13
|
import mdi
|
|
14
|
+
from datasets import ClassLabel, DatasetDict
|
|
16
15
|
|
|
17
16
|
# dataset = mdi.load_dataset("mnist")
|
|
18
17
|
# dataset = mdi.load_dataset("[ADD SOME CUSTOM DATASET SPECIFICATION]")
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import pprint
|
|
2
|
+
|
|
3
|
+
from hafnia.data import load_dataset
|
|
4
|
+
|
|
5
|
+
dataset = load_dataset("midwest-vehicle-detection")
|
|
6
|
+
|
|
7
|
+
# Print information on each dataset split
|
|
8
|
+
print(dataset)
|
|
9
|
+
|
|
10
|
+
# View features of the train split
|
|
11
|
+
pprint.pprint(dataset["train"].features)
|
|
12
|
+
|
|
13
|
+
# Print sample from the training set
|
|
14
|
+
pprint.pprint(dataset["train"][0])
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "hafnia"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.26"
|
|
4
4
|
description = "Python SDK for communication with Hafnia platform."
|
|
5
5
|
readme = "README.md"
|
|
6
|
-
authors = [
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Milestone Systems", email = "hafniaplatform@milestone.dk" },
|
|
8
|
+
]
|
|
7
9
|
requires-python = ">=3.10"
|
|
10
|
+
|
|
8
11
|
dependencies = [
|
|
9
12
|
"boto3>=1.35.91",
|
|
10
13
|
"click>=8.1.8",
|
|
@@ -24,38 +27,41 @@ dependencies = [
|
|
|
24
27
|
dev = [
|
|
25
28
|
"ipykernel>=6.29.5",
|
|
26
29
|
"lark>=1.2.2",
|
|
27
|
-
"
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
"pre-commit>=4.2.0",
|
|
31
|
-
"ruff>=0.9.1",
|
|
30
|
+
"torch>=2.6.0",
|
|
31
|
+
"torchvision>=0.21.0",
|
|
32
|
+
"flatten-dict>=0.4.2",
|
|
32
33
|
]
|
|
33
34
|
|
|
34
|
-
[
|
|
35
|
-
default-groups = "all"
|
|
35
|
+
test = ["pytest>=8.3.4", "pre-commit>=4.2.0", "ruff>=0.9.1"]
|
|
36
36
|
|
|
37
37
|
[project.scripts]
|
|
38
38
|
hafnia = 'cli.__main__:main'
|
|
39
39
|
|
|
40
|
-
[project.optional-dependencies]
|
|
41
|
-
# Use "uv sync --extra torch" to install torch dependencies
|
|
42
|
-
torch = [
|
|
43
|
-
"torch>=2.6.0",
|
|
44
|
-
"torchvision>=0.21.0",
|
|
45
|
-
"flatten-dict>=0.4.2",
|
|
46
|
-
]
|
|
47
|
-
|
|
48
40
|
[build-system]
|
|
49
41
|
requires = ["hatchling"]
|
|
50
42
|
build-backend = "hatchling.build"
|
|
51
43
|
|
|
44
|
+
[tool.hatch.build.targets.wheel]
|
|
45
|
+
packages = ["src/cli", "src/hafnia"]
|
|
46
|
+
|
|
47
|
+
[tool.uv]
|
|
48
|
+
default-groups = ["test"]
|
|
49
|
+
|
|
52
50
|
[tool.ruff]
|
|
53
|
-
select = ["I", "E", "F"]
|
|
54
|
-
ignore = ["E501"]
|
|
51
|
+
lint.select = ["I", "E", "F"]
|
|
52
|
+
lint.ignore = ["E501"]
|
|
55
53
|
line-length = 120
|
|
56
54
|
|
|
57
|
-
[tool.ruff.isort]
|
|
55
|
+
[tool.ruff.lint.isort]
|
|
58
56
|
relative-imports-order = "closest-to-furthest"
|
|
59
57
|
|
|
60
|
-
[tool.
|
|
61
|
-
|
|
58
|
+
[tool.mypy]
|
|
59
|
+
ignore_missing_imports = true
|
|
60
|
+
check_untyped_defs = false
|
|
61
|
+
disallow_untyped_defs = false
|
|
62
|
+
disallow_incomplete_defs = false
|
|
63
|
+
disallow_untyped_calls = false
|
|
64
|
+
warn_unused_ignores = false
|
|
65
|
+
|
|
66
|
+
[tool.pytest.ini_options]
|
|
67
|
+
markers = ["slow: marks tests as slow (deselect with '-m \"not slow\"')"]
|
|
@@ -10,6 +10,7 @@ from cli.config import Config, ConfigSchema
|
|
|
10
10
|
def main(ctx: click.Context) -> None:
|
|
11
11
|
"""Hafnia CLI."""
|
|
12
12
|
ctx.obj = Config()
|
|
13
|
+
ctx.max_content_width = 120
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
@main.command("configure")
|
|
@@ -17,9 +18,7 @@ def main(ctx: click.Context) -> None:
|
|
|
17
18
|
def configure(cfg: Config) -> None:
|
|
18
19
|
"""Configure Hafnia CLI settings."""
|
|
19
20
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
profile_name = click.prompt("Profile Name", type=str, default="default")
|
|
21
|
+
profile_name = click.prompt("Profile Name", type=str, default=consts.DEFAULT_PROFILE_NAME)
|
|
23
22
|
profile_name = profile_name.strip()
|
|
24
23
|
try:
|
|
25
24
|
cfg.add_profile(profile_name, ConfigSchema(), set_active=True)
|
|
@@ -32,12 +31,8 @@ def configure(cfg: Config) -> None:
|
|
|
32
31
|
except ValueError as e:
|
|
33
32
|
click.echo(f"Error: {str(e)}", err=True)
|
|
34
33
|
return
|
|
35
|
-
platform_url = click.prompt("Hafnia Platform URL", type=str, default=
|
|
34
|
+
platform_url = click.prompt("Hafnia Platform URL", type=str, default=consts.DEFAULT_API_URL)
|
|
36
35
|
cfg.platform_url = platform_url.strip()
|
|
37
|
-
try:
|
|
38
|
-
cfg.organization_id = get_organization_id(cfg.get_platform_endpoint("organizations"), cfg.api_key)
|
|
39
|
-
except Exception:
|
|
40
|
-
raise click.ClickException(consts.ERROR_ORG_ID)
|
|
41
36
|
cfg.save_config()
|
|
42
37
|
profile_cmds.profile_show(cfg)
|
|
43
38
|
|
|
@@ -57,4 +52,4 @@ main.add_command(experiment_cmds.experiment)
|
|
|
57
52
|
main.add_command(recipe_cmds.recipe)
|
|
58
53
|
|
|
59
54
|
if __name__ == "__main__":
|
|
60
|
-
main()
|
|
55
|
+
main(max_content_width=120)
|