hafnia 0.1.24__tar.gz → 0.1.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hafnia-0.1.24 → hafnia-0.1.25}/.pre-commit-config.yaml +1 -1
- {hafnia-0.1.24 → hafnia-0.1.25}/.vscode/launch.json +26 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/LICENSE +1 -1
- {hafnia-0.1.24 → hafnia-0.1.25}/PKG-INFO +32 -11
- {hafnia-0.1.24 → hafnia-0.1.25}/README.md +26 -8
- {hafnia-0.1.24 → hafnia-0.1.25}/docs/cli.md +2 -1
- {hafnia-0.1.24 → hafnia-0.1.25}/pyproject.toml +6 -3
- {hafnia-0.1.24 → hafnia-0.1.25}/src/cli/__main__.py +2 -1
- {hafnia-0.1.24 → hafnia-0.1.25}/src/cli/consts.py +1 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/cli/data_cmds.py +5 -6
- {hafnia-0.1.24 → hafnia-0.1.25}/src/cli/experiment_cmds.py +0 -26
- hafnia-0.1.25/src/cli/recipe_cmds.py +49 -0
- hafnia-0.1.25/src/cli/runc_cmds.py +143 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/hafnia/data/factory.py +10 -29
- {hafnia-0.1.24 → hafnia-0.1.25}/src/hafnia/experiment/hafnia_logger.py +6 -2
- {hafnia-0.1.24 → hafnia-0.1.25}/src/hafnia/platform/builder.py +5 -5
- {hafnia-0.1.24 → hafnia-0.1.25}/src/hafnia/platform/executor.py +6 -6
- hafnia-0.1.25/src/hafnia/utils.py +135 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/tests/test_builder.py +84 -4
- {hafnia-0.1.24 → hafnia-0.1.25}/tests/test_executor.py +1 -1
- {hafnia-0.1.24 → hafnia-0.1.25}/tests/test_mdi_logger.py +13 -6
- {hafnia-0.1.24 → hafnia-0.1.25}/uv.lock +1254 -1209
- hafnia-0.1.24/docs/s2m.md +0 -84
- hafnia-0.1.24/examples/script2model/pytorch/Dockerfile +0 -10
- hafnia-0.1.24/examples/script2model/pytorch/src/lib/train_utils.py +0 -252
- hafnia-0.1.24/examples/script2model/pytorch/src/scripts/train.py +0 -60
- hafnia-0.1.24/src/cli/runc_cmds.py +0 -68
- hafnia-0.1.24/src/hafnia/utils.py +0 -83
- {hafnia-0.1.24 → hafnia-0.1.25}/.devcontainer/devcontainer.json +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/.devcontainer/hooks/post_create +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/.github/dependabot.yaml +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/.github/workflows/Dockerfile +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/.github/workflows/build.yaml +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/.github/workflows/check_release.yaml +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/.github/workflows/ci_cd.yaml +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/.github/workflows/lint.yaml +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/.github/workflows/publish_docker.yaml +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/.github/workflows/publish_pypi.yaml +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/.github/workflows/tests.yaml +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/.gitignore +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/.python-version +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/.vscode/extensions.json +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/.vscode/settings.json +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/docs/release.md +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/examples/dataset_builder.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/examples/example_load_dataset.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/examples/example_logger.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/examples/example_torchvision_dataloader.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/cli/__init__.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/cli/config.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/cli/profile_cmds.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/hafnia/__init__.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/hafnia/data/__init__.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/hafnia/experiment/__init__.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/hafnia/http.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/hafnia/log.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/hafnia/platform/__init__.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/hafnia/platform/api.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/hafnia/platform/download.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/hafnia/platform/experiment.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/src/hafnia/torch_helpers.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/tests/test_check_example_scripts.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/tests/test_cli.py +0 -0
- {hafnia-0.1.24 → hafnia-0.1.25}/tests/test_samples.py +0 -0
|
@@ -21,6 +21,32 @@
|
|
|
21
21
|
"ls"
|
|
22
22
|
],
|
|
23
23
|
},
|
|
24
|
+
{
|
|
25
|
+
"name": "cmd: hafnia runc launch-local",
|
|
26
|
+
"type": "debugpy",
|
|
27
|
+
"request": "launch",
|
|
28
|
+
"program": "${workspaceFolder}/src/cli/__main__.py",
|
|
29
|
+
"args": [
|
|
30
|
+
"runc",
|
|
31
|
+
"launch-local",
|
|
32
|
+
"--dataset",
|
|
33
|
+
"midwest-vehicle-detection-tiny",
|
|
34
|
+
"train --config-name yolov4-hafnia.yaml"
|
|
35
|
+
],
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"name": "cmd: hafnia runc build-local",
|
|
39
|
+
"type": "debugpy",
|
|
40
|
+
"request": "launch",
|
|
41
|
+
"program": "${workspaceFolder}/src/cli/__main__.py",
|
|
42
|
+
"args": [
|
|
43
|
+
"runc",
|
|
44
|
+
"build-local",
|
|
45
|
+
"train",
|
|
46
|
+
"--dataset",
|
|
47
|
+
"mnist",
|
|
48
|
+
],
|
|
49
|
+
},
|
|
24
50
|
{
|
|
25
51
|
"name": "debug (hafnia data download mnist)",
|
|
26
52
|
"type": "debugpy",
|
|
@@ -1,18 +1,21 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hafnia
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary: Python
|
|
5
|
-
Author-email:
|
|
3
|
+
Version: 0.1.25
|
|
4
|
+
Summary: Python SDK for communication with Hafnia platform.
|
|
5
|
+
Author-email: Milestone Systems <hafniaplatform@milestone.dk>
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
Requires-Python: >=3.10
|
|
8
8
|
Requires-Dist: boto3>=1.35.91
|
|
9
9
|
Requires-Dist: click>=8.1.8
|
|
10
10
|
Requires-Dist: datasets>=3.2.0
|
|
11
|
+
Requires-Dist: emoji>=2.14.1
|
|
11
12
|
Requires-Dist: flatten-dict>=0.4.2
|
|
13
|
+
Requires-Dist: pathspec>=0.12.1
|
|
12
14
|
Requires-Dist: pillow>=11.1.0
|
|
13
15
|
Requires-Dist: pyarrow>=18.1.0
|
|
14
16
|
Requires-Dist: pydantic>=2.10.4
|
|
15
17
|
Requires-Dist: rich>=13.9.4
|
|
18
|
+
Requires-Dist: seedir>=0.5.0
|
|
16
19
|
Requires-Dist: tqdm>=4.67.1
|
|
17
20
|
Provides-Extra: torch
|
|
18
21
|
Requires-Dist: flatten-dict>=0.4.2; extra == 'torch'
|
|
@@ -167,31 +170,49 @@ and datasets available in the data library.
|
|
|
167
170
|
By combining logging and dataset loading, we can now construct our model training recipe.
|
|
168
171
|
|
|
169
172
|
To demonstrate this, we have provided a recipe project that serves as a template for creating and structuring training recipes
|
|
170
|
-
[recipe-classification](https://github.com/
|
|
173
|
+
[recipe-classification](https://github.com/milestone-hafnia/recipe-classification)
|
|
171
174
|
|
|
172
175
|
The project also contains additional information on how to structure your training recipe, use the `HafniaLogger`, the `load_dataset` function and different approach for launching
|
|
173
176
|
the training recipe on the Hafnia platform.
|
|
174
177
|
|
|
178
|
+
|
|
179
|
+
## Create, Build and Run `recipe.zip` locally
|
|
180
|
+
In order to test recipe compatibility with Hafnia cloud use the following command to build and
|
|
181
|
+
start the job locally.
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
# Create 'recipe.zip' from source folder '.'
|
|
185
|
+
hafnia recipe create .
|
|
186
|
+
|
|
187
|
+
# Build the docker image locally from a 'recipe.zip' file
|
|
188
|
+
hafnia runc build-local recipe.zip
|
|
189
|
+
|
|
190
|
+
# Execute the docker image locally with a desired dataset
|
|
191
|
+
hafnia runc launch-local --dataset mnist "python scripts/train.py"
|
|
192
|
+
```
|
|
193
|
+
|
|
175
194
|
## Detailed Documentation
|
|
176
195
|
For more information, go to our [documentation page](https://hafnia.readme.io/docs/welcome-to-hafnia)
|
|
177
196
|
or in below markdown pages.
|
|
178
197
|
|
|
179
198
|
- [CLI](docs/cli.md) - Detailed guide for the Hafnia command-line interface
|
|
180
|
-
- [Script2Model Documentation](docs/s2m.md) - Detailed guide for script2model
|
|
181
199
|
- [Release lifecycle](docs/release.md) - Details about package release lifecycle.
|
|
182
200
|
|
|
183
201
|
## Development
|
|
184
202
|
For development, we are using an uv based virtual python environment.
|
|
185
203
|
|
|
186
204
|
Install uv
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
205
|
+
```bash
|
|
206
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
207
|
+
```
|
|
190
208
|
|
|
191
209
|
Install python dependencies including developer (`--dev`) and optional dependencies (`--all-extras`).
|
|
192
210
|
|
|
193
|
-
|
|
211
|
+
```bash
|
|
212
|
+
uv sync --all-extras --dev
|
|
213
|
+
```
|
|
194
214
|
|
|
195
215
|
Run tests:
|
|
196
|
-
|
|
197
|
-
|
|
216
|
+
```bash
|
|
217
|
+
uv run pytest tests
|
|
218
|
+
```
|
|
@@ -145,31 +145,49 @@ and datasets available in the data library.
|
|
|
145
145
|
By combining logging and dataset loading, we can now construct our model training recipe.
|
|
146
146
|
|
|
147
147
|
To demonstrate this, we have provided a recipe project that serves as a template for creating and structuring training recipes
|
|
148
|
-
[recipe-classification](https://github.com/
|
|
148
|
+
[recipe-classification](https://github.com/milestone-hafnia/recipe-classification)
|
|
149
149
|
|
|
150
150
|
The project also contains additional information on how to structure your training recipe, use the `HafniaLogger`, the `load_dataset` function and different approach for launching
|
|
151
151
|
the training recipe on the Hafnia platform.
|
|
152
152
|
|
|
153
|
+
|
|
154
|
+
## Create, Build and Run `recipe.zip` locally
|
|
155
|
+
In order to test recipe compatibility with Hafnia cloud use the following command to build and
|
|
156
|
+
start the job locally.
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
# Create 'recipe.zip' from source folder '.'
|
|
160
|
+
hafnia recipe create .
|
|
161
|
+
|
|
162
|
+
# Build the docker image locally from a 'recipe.zip' file
|
|
163
|
+
hafnia runc build-local recipe.zip
|
|
164
|
+
|
|
165
|
+
# Execute the docker image locally with a desired dataset
|
|
166
|
+
hafnia runc launch-local --dataset mnist "python scripts/train.py"
|
|
167
|
+
```
|
|
168
|
+
|
|
153
169
|
## Detailed Documentation
|
|
154
170
|
For more information, go to our [documentation page](https://hafnia.readme.io/docs/welcome-to-hafnia)
|
|
155
171
|
or in below markdown pages.
|
|
156
172
|
|
|
157
173
|
- [CLI](docs/cli.md) - Detailed guide for the Hafnia command-line interface
|
|
158
|
-
- [Script2Model Documentation](docs/s2m.md) - Detailed guide for script2model
|
|
159
174
|
- [Release lifecycle](docs/release.md) - Details about package release lifecycle.
|
|
160
175
|
|
|
161
176
|
## Development
|
|
162
177
|
For development, we are using an uv based virtual python environment.
|
|
163
178
|
|
|
164
179
|
Install uv
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
180
|
+
```bash
|
|
181
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
182
|
+
```
|
|
168
183
|
|
|
169
184
|
Install python dependencies including developer (`--dev`) and optional dependencies (`--all-extras`).
|
|
170
185
|
|
|
171
|
-
|
|
186
|
+
```bash
|
|
187
|
+
uv sync --all-extras --dev
|
|
188
|
+
```
|
|
172
189
|
|
|
173
190
|
Run tests:
|
|
174
|
-
|
|
175
|
-
|
|
191
|
+
```bash
|
|
192
|
+
uv run pytest tests
|
|
193
|
+
```
|
|
@@ -95,4 +95,5 @@ Available environment variables:
|
|
|
95
95
|
- `MDI_CONFIG_PATH` - Custom path to the configuration file
|
|
96
96
|
- `MDI_API_KEY_SECRET_NAME` - Name of the AWS Secrets Manager secret containing the API key
|
|
97
97
|
- `AWS_REGION` - AWS region for ECR and Secrets Manager operations
|
|
98
|
-
- `RECIPE_DIR` - Directory containing recipe code (used by the `runc launch` command
|
|
98
|
+
- `RECIPE_DIR` - Directory containing recipe code (used by the `runc launch` command
|
|
99
|
+
- `HAFNIA_CLOUD` – Allow emulate cloud behaviour
|
|
@@ -1,19 +1,22 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "hafnia"
|
|
3
|
-
version = "0.1.
|
|
4
|
-
description = "Python
|
|
3
|
+
version = "0.1.25"
|
|
4
|
+
description = "Python SDK for communication with Hafnia platform."
|
|
5
5
|
readme = "README.md"
|
|
6
|
-
authors = [{ name = "
|
|
6
|
+
authors = [{ name = "Milestone Systems", email = "hafniaplatform@milestone.dk" }]
|
|
7
7
|
requires-python = ">=3.10"
|
|
8
8
|
dependencies = [
|
|
9
9
|
"boto3>=1.35.91",
|
|
10
10
|
"click>=8.1.8",
|
|
11
11
|
"datasets>=3.2.0",
|
|
12
|
+
"emoji>=2.14.1",
|
|
12
13
|
"flatten-dict>=0.4.2",
|
|
14
|
+
"pathspec>=0.12.1",
|
|
13
15
|
"pillow>=11.1.0",
|
|
14
16
|
"pyarrow>=18.1.0",
|
|
15
17
|
"pydantic>=2.10.4",
|
|
16
18
|
"rich>=13.9.4",
|
|
19
|
+
"seedir>=0.5.0",
|
|
17
20
|
"tqdm>=4.67.1",
|
|
18
21
|
]
|
|
19
22
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python
|
|
2
2
|
import click
|
|
3
3
|
|
|
4
|
-
from cli import consts, data_cmds, experiment_cmds, profile_cmds, runc_cmds
|
|
4
|
+
from cli import consts, data_cmds, experiment_cmds, profile_cmds, recipe_cmds, runc_cmds
|
|
5
5
|
from cli.config import Config, ConfigSchema
|
|
6
6
|
|
|
7
7
|
|
|
@@ -54,6 +54,7 @@ main.add_command(profile_cmds.profile)
|
|
|
54
54
|
main.add_command(data_cmds.data)
|
|
55
55
|
main.add_command(runc_cmds.runc)
|
|
56
56
|
main.add_command(experiment_cmds.experiment)
|
|
57
|
+
main.add_command(recipe_cmds.recipe)
|
|
57
58
|
|
|
58
59
|
if __name__ == "__main__":
|
|
59
60
|
main()
|
|
@@ -8,6 +8,7 @@ ERROR_CREATE_PROFILE: str = "Failed to create profile. Profile name must be uniq
|
|
|
8
8
|
ERROR_GET_RESOURCE: str = "Failed to get the data from platform. Verify url or api key."
|
|
9
9
|
|
|
10
10
|
ERROR_EXPERIMENT_DIR: str = "Source directory does not exist"
|
|
11
|
+
ERROR_RECIPE_FILE_FORMAT: str = "Recipe filename must be a '.zip' file"
|
|
11
12
|
|
|
12
13
|
PROFILE_SWITCHED_SUCCESS: str = "Switched to profile:"
|
|
13
14
|
PROFILE_REMOVED_SUCCESS: str = "Removed profile:"
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from pathlib import Path
|
|
1
2
|
from typing import Optional
|
|
2
3
|
|
|
3
4
|
import click
|
|
@@ -35,20 +36,18 @@ def data_get(cfg: Config, url: str, destination: click.Path) -> None:
|
|
|
35
36
|
@click.argument("destination", default=None, required=False)
|
|
36
37
|
@click.option("--force", is_flag=True, default=False, help="Force download")
|
|
37
38
|
@click.pass_obj
|
|
38
|
-
def data_download(cfg: Config, dataset_name: str, destination: Optional[click.Path], force: bool) ->
|
|
39
|
+
def data_download(cfg: Config, dataset_name: str, destination: Optional[click.Path], force: bool) -> Path:
|
|
39
40
|
"""Download dataset from Hafnia platform"""
|
|
40
41
|
|
|
41
42
|
from hafnia.data.factory import download_or_get_dataset_path
|
|
42
43
|
|
|
43
44
|
try:
|
|
44
|
-
|
|
45
|
-
api_key = cfg.api_key
|
|
46
|
-
download_or_get_dataset_path(
|
|
45
|
+
path_dataset = download_or_get_dataset_path(
|
|
47
46
|
dataset_name=dataset_name,
|
|
48
|
-
|
|
49
|
-
api_key=api_key,
|
|
47
|
+
cfg=cfg,
|
|
50
48
|
output_dir=destination,
|
|
51
49
|
force_redownload=force,
|
|
52
50
|
)
|
|
53
51
|
except Exception:
|
|
54
52
|
raise click.ClickException(consts.ERROR_GET_RESOURCE)
|
|
53
|
+
return path_dataset
|
|
@@ -13,32 +13,6 @@ def experiment() -> None:
|
|
|
13
13
|
pass
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
@experiment.command(name="create_recipe")
|
|
17
|
-
@click.option("--source_folder", default=".", type=Path, help="Path to the source folder", show_default=True)
|
|
18
|
-
@click.option(
|
|
19
|
-
"--recipe_filename",
|
|
20
|
-
default="recipe.zip",
|
|
21
|
-
type=Path,
|
|
22
|
-
help="Recipe filename. Should have a '.zip' suffix",
|
|
23
|
-
show_default=True,
|
|
24
|
-
)
|
|
25
|
-
def create_recipe(source_folder: str, recipe_filename: str) -> None:
|
|
26
|
-
"""Build recipe from local path as image with prefix - localhost"""
|
|
27
|
-
|
|
28
|
-
from hafnia.platform.builder import validate_recipe
|
|
29
|
-
from hafnia.utils import archive_dir
|
|
30
|
-
|
|
31
|
-
path_output_zip = Path(recipe_filename)
|
|
32
|
-
|
|
33
|
-
if path_output_zip.suffix != ".zip":
|
|
34
|
-
raise click.ClickException("Recipe filename must be a '.zip' file")
|
|
35
|
-
|
|
36
|
-
path_source = Path(source_folder)
|
|
37
|
-
|
|
38
|
-
path_output_zip = archive_dir(path_source, path_output_zip)
|
|
39
|
-
validate_recipe(path_output_zip)
|
|
40
|
-
|
|
41
|
-
|
|
42
16
|
@experiment.command(name="create")
|
|
43
17
|
@click.argument("name")
|
|
44
18
|
@click.argument("source_dir", type=Path)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
import cli.consts as consts
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@click.group(name="recipe")
|
|
9
|
+
def recipe() -> None:
|
|
10
|
+
"""Hafnia Recipe management commands"""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@recipe.command(name="create")
|
|
15
|
+
@click.argument("source")
|
|
16
|
+
@click.option(
|
|
17
|
+
"--output", type=click.Path(writable=True), default="./recipe.zip", show_default=True, help="Output recipe path."
|
|
18
|
+
)
|
|
19
|
+
def create(source: str, output: str) -> None:
|
|
20
|
+
"""Create HRF from local path"""
|
|
21
|
+
|
|
22
|
+
from hafnia.platform.builder import validate_recipe
|
|
23
|
+
from hafnia.utils import archive_dir
|
|
24
|
+
|
|
25
|
+
path_output_zip = Path(output)
|
|
26
|
+
if path_output_zip.suffix != ".zip":
|
|
27
|
+
raise click.ClickException(consts.ERROR_RECIPE_FILE_FORMAT)
|
|
28
|
+
|
|
29
|
+
path_source = Path(source)
|
|
30
|
+
path_output_zip = archive_dir(path_source, path_output_zip)
|
|
31
|
+
validate_recipe(path_output_zip)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@recipe.command(name="view")
|
|
35
|
+
@click.option("--path", type=str, default="./recipe.zip", show_default=True, help="Path of recipe.zip.")
|
|
36
|
+
@click.option("--depth-limit", type=int, default=3, help="Limit the depth of the tree view.", show_default=True)
|
|
37
|
+
def view(path: str, depth_limit: int) -> None:
|
|
38
|
+
"""View the content of a recipe zip file."""
|
|
39
|
+
from hafnia.utils import view_recipe_content
|
|
40
|
+
|
|
41
|
+
path_recipe = Path(path)
|
|
42
|
+
if not path_recipe.exists():
|
|
43
|
+
raise click.ClickException(
|
|
44
|
+
f"Recipe file '{path_recipe}' does not exist. Please provide a valid path. "
|
|
45
|
+
f"To create a recipe, use the 'hafnia recipe create' command."
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
tree_str = view_recipe_content(path_recipe, depth_limit=depth_limit)
|
|
49
|
+
click.echo(tree_str)
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import subprocess
|
|
3
|
+
import zipfile
|
|
4
|
+
from hashlib import sha256
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from tempfile import TemporaryDirectory
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
import click
|
|
10
|
+
|
|
11
|
+
from cli.config import Config
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@click.group(name="runc")
|
|
15
|
+
def runc():
|
|
16
|
+
"""Experiment management commands"""
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@runc.command(name="launch")
|
|
21
|
+
@click.argument("task", required=True)
|
|
22
|
+
def launch(task: str) -> None:
|
|
23
|
+
"""Launch a job within the image."""
|
|
24
|
+
from hafnia.platform.executor import handle_launch
|
|
25
|
+
|
|
26
|
+
handle_launch(task)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@runc.command(name="launch-local")
|
|
30
|
+
@click.argument("exec_cmd", type=str)
|
|
31
|
+
@click.option(
|
|
32
|
+
"--dataset",
|
|
33
|
+
type=str,
|
|
34
|
+
help="Hafnia dataset name e.g. mnist, midwest-vehicle-detection or a path to a local dataset",
|
|
35
|
+
required=True,
|
|
36
|
+
)
|
|
37
|
+
@click.option(
|
|
38
|
+
"--image_name",
|
|
39
|
+
type=Optional[str],
|
|
40
|
+
default=None,
|
|
41
|
+
help=(
|
|
42
|
+
"Docker image name to use for the launch. "
|
|
43
|
+
"By default, it will use image name from '.state.json' "
|
|
44
|
+
"file generated by the 'hafnia runc build-local' command"
|
|
45
|
+
),
|
|
46
|
+
)
|
|
47
|
+
@click.pass_obj
|
|
48
|
+
def launch_local(cfg: Config, exec_cmd: str, dataset: str, image_name: str) -> None:
|
|
49
|
+
"""Launch a job within the image."""
|
|
50
|
+
from hafnia.data.factory import download_or_get_dataset_path
|
|
51
|
+
|
|
52
|
+
is_local_dataset = "/" in dataset
|
|
53
|
+
if is_local_dataset:
|
|
54
|
+
click.echo(f"Using local dataset: {dataset}")
|
|
55
|
+
path_dataset = Path(dataset)
|
|
56
|
+
if not path_dataset.exists():
|
|
57
|
+
raise click.ClickException(f"Dataset path does not exist: {path_dataset}")
|
|
58
|
+
else:
|
|
59
|
+
click.echo(f"Using Hafnia dataset: {dataset}")
|
|
60
|
+
path_dataset = download_or_get_dataset_path(dataset_name=dataset, cfg=cfg, force_redownload=False)
|
|
61
|
+
|
|
62
|
+
if image_name is None:
|
|
63
|
+
# Load image name from state.json
|
|
64
|
+
path_state_file = Path("state.json")
|
|
65
|
+
if not path_state_file.exists():
|
|
66
|
+
raise click.ClickException("State file does not exist. Please build the image first.")
|
|
67
|
+
state_dict = json.loads(path_state_file.read_text())
|
|
68
|
+
if "mdi_tag" not in state_dict:
|
|
69
|
+
raise click.ClickException("mdi_tag not found in state file. Please build the image first.")
|
|
70
|
+
image_name = state_dict["mdi_tag"]
|
|
71
|
+
|
|
72
|
+
docker_cmds = [
|
|
73
|
+
"docker",
|
|
74
|
+
"run",
|
|
75
|
+
"--rm",
|
|
76
|
+
"-v",
|
|
77
|
+
f"{path_dataset.absolute()}:/opt/ml/input/data/training",
|
|
78
|
+
"-e",
|
|
79
|
+
"HAFNIA_CLOUD=true",
|
|
80
|
+
"-e",
|
|
81
|
+
"PYTHONPATH=src",
|
|
82
|
+
"--runtime",
|
|
83
|
+
"nvidia",
|
|
84
|
+
image_name,
|
|
85
|
+
] + exec_cmd.split(" ")
|
|
86
|
+
|
|
87
|
+
# Use the "hafnia runc launch" cmd when we have moved to the new folder structure and
|
|
88
|
+
# direct commands.
|
|
89
|
+
# Replace '+ exec_cmd.split(" ")' with '["hafnia", "runc", "launch"] + exec_cmd.split(" ")'
|
|
90
|
+
|
|
91
|
+
click.echo(f"Running command: \n\t{' '.join(docker_cmds)}")
|
|
92
|
+
subprocess.run(docker_cmds, check=True)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@runc.command(name="build")
|
|
96
|
+
@click.argument("recipe_url")
|
|
97
|
+
@click.argument("state_file", default="state.json")
|
|
98
|
+
@click.argument("ecr_repository", default="localhost")
|
|
99
|
+
@click.argument("image_name", default="recipe")
|
|
100
|
+
@click.pass_obj
|
|
101
|
+
def build(cfg: Config, recipe_url: str, state_file: str, ecr_repository: str, image_name: str) -> None:
|
|
102
|
+
"""Build docker image with a given recipe."""
|
|
103
|
+
from hafnia.platform.builder import build_image, prepare_recipe
|
|
104
|
+
|
|
105
|
+
with TemporaryDirectory() as temp_dir:
|
|
106
|
+
image_info = prepare_recipe(recipe_url, Path(temp_dir), cfg.api_key)
|
|
107
|
+
image_info["name"] = image_name
|
|
108
|
+
build_image(image_info, ecr_repository, state_file)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@runc.command(name="build-local")
|
|
112
|
+
@click.argument("recipe")
|
|
113
|
+
@click.argument("state_file", default="state.json")
|
|
114
|
+
@click.argument("image_name", default="recipe")
|
|
115
|
+
def build_local(recipe: str, state_file: str, image_name: str) -> None:
|
|
116
|
+
"""Build recipe from local path as image with prefix - localhost"""
|
|
117
|
+
|
|
118
|
+
from hafnia.platform.builder import build_image, validate_recipe
|
|
119
|
+
from hafnia.utils import archive_dir
|
|
120
|
+
|
|
121
|
+
recipe_zip = Path(recipe)
|
|
122
|
+
recipe_created = False
|
|
123
|
+
if not recipe_zip.suffix == ".zip" and recipe_zip.is_dir():
|
|
124
|
+
recipe_zip = archive_dir(recipe_zip)
|
|
125
|
+
recipe_created = True
|
|
126
|
+
|
|
127
|
+
validate_recipe(recipe_zip)
|
|
128
|
+
click.echo("Recipe successfully validated")
|
|
129
|
+
with TemporaryDirectory() as temp_dir:
|
|
130
|
+
temp_dir_path = Path(temp_dir)
|
|
131
|
+
with zipfile.ZipFile(recipe_zip, "r") as zip_ref:
|
|
132
|
+
zip_ref.extractall(temp_dir_path)
|
|
133
|
+
|
|
134
|
+
image_info = {
|
|
135
|
+
"name": image_name,
|
|
136
|
+
"dockerfile": (temp_dir_path / "Dockerfile").as_posix(),
|
|
137
|
+
"docker_context": temp_dir_path.as_posix(),
|
|
138
|
+
"hash": sha256(recipe_zip.read_bytes()).hexdigest()[:8],
|
|
139
|
+
}
|
|
140
|
+
click.echo("Start building image")
|
|
141
|
+
build_image(image_info, "localhost", state_file=state_file)
|
|
142
|
+
if recipe_created:
|
|
143
|
+
recipe_zip.unlink()
|
|
@@ -21,12 +21,16 @@ def load_local(dataset_path: Path) -> Union[Dataset, DatasetDict]:
|
|
|
21
21
|
|
|
22
22
|
def download_or_get_dataset_path(
|
|
23
23
|
dataset_name: str,
|
|
24
|
-
|
|
25
|
-
api_key: str,
|
|
24
|
+
cfg: Optional[Config] = None,
|
|
26
25
|
output_dir: Optional[str] = None,
|
|
27
26
|
force_redownload: bool = False,
|
|
28
27
|
) -> Path:
|
|
29
28
|
"""Download or get the path of the dataset."""
|
|
29
|
+
|
|
30
|
+
cfg = cfg or Config()
|
|
31
|
+
endpoint_dataset = cfg.get_platform_endpoint("datasets")
|
|
32
|
+
api_key = cfg.api_key
|
|
33
|
+
|
|
30
34
|
output_dir = output_dir or str(utils.PATH_DATASET)
|
|
31
35
|
dataset_path_base = Path(output_dir).absolute() / dataset_name
|
|
32
36
|
dataset_path_base.mkdir(exist_ok=True, parents=True)
|
|
@@ -36,8 +40,8 @@ def download_or_get_dataset_path(
|
|
|
36
40
|
logger.info("Dataset found locally. Set 'force=True' or add `--force` flag with cli to re-download")
|
|
37
41
|
return dataset_path_sample
|
|
38
42
|
|
|
39
|
-
dataset_id = get_dataset_id(dataset_name,
|
|
40
|
-
dataset_access_info_url = f"{
|
|
43
|
+
dataset_id = get_dataset_id(dataset_name, endpoint_dataset, api_key)
|
|
44
|
+
dataset_access_info_url = f"{endpoint_dataset}/{dataset_id}/temporary-credentials"
|
|
41
45
|
|
|
42
46
|
if force_redownload and dataset_path_sample.exists():
|
|
43
47
|
# Remove old files to avoid old files conflicting with new files
|
|
@@ -48,23 +52,6 @@ def download_or_get_dataset_path(
|
|
|
48
52
|
raise RuntimeError("Failed to download dataset")
|
|
49
53
|
|
|
50
54
|
|
|
51
|
-
def load_from_platform(
|
|
52
|
-
dataset_name: str,
|
|
53
|
-
endpoint: str,
|
|
54
|
-
api_key: str,
|
|
55
|
-
output_dir: Optional[str] = None,
|
|
56
|
-
force_redownload: bool = False,
|
|
57
|
-
) -> Union[Dataset, DatasetDict]:
|
|
58
|
-
path_dataset = download_or_get_dataset_path(
|
|
59
|
-
dataset_name=dataset_name,
|
|
60
|
-
endpoint=endpoint,
|
|
61
|
-
api_key=api_key,
|
|
62
|
-
output_dir=output_dir,
|
|
63
|
-
force_redownload=force_redownload,
|
|
64
|
-
)
|
|
65
|
-
return load_local(path_dataset)
|
|
66
|
-
|
|
67
|
-
|
|
68
55
|
def load_dataset(dataset_name: str, force_redownload: bool = False) -> Union[Dataset, DatasetDict]:
|
|
69
56
|
"""Load a dataset either from a local path or from the Hafnia platform."""
|
|
70
57
|
|
|
@@ -72,15 +59,9 @@ def load_dataset(dataset_name: str, force_redownload: bool = False) -> Union[Dat
|
|
|
72
59
|
path_dataset = Path(os.getenv("MDI_DATASET_DIR", "/opt/ml/input/data/training"))
|
|
73
60
|
return load_local(path_dataset)
|
|
74
61
|
|
|
75
|
-
|
|
76
|
-
endpoint_dataset = cfg.get_platform_endpoint("datasets")
|
|
77
|
-
api_key = cfg.api_key
|
|
78
|
-
dataset = load_from_platform(
|
|
62
|
+
path_dataset = download_or_get_dataset_path(
|
|
79
63
|
dataset_name=dataset_name,
|
|
80
|
-
endpoint=endpoint_dataset,
|
|
81
|
-
api_key=api_key,
|
|
82
|
-
output_dir=None,
|
|
83
64
|
force_redownload=force_redownload,
|
|
84
65
|
)
|
|
85
|
-
|
|
66
|
+
dataset = load_local(path_dataset)
|
|
86
67
|
return dataset
|
|
@@ -159,8 +159,12 @@ class HafniaLogger:
|
|
|
159
159
|
def log_hparams(self, params: Dict, fname: str = "hparams.json"):
|
|
160
160
|
file_path = self._path_artifacts() / fname
|
|
161
161
|
try:
|
|
162
|
-
|
|
163
|
-
json.
|
|
162
|
+
if file_path.exists(): # New params are appended to existing params
|
|
163
|
+
existing_params = json.loads(file_path.read_text())
|
|
164
|
+
else:
|
|
165
|
+
existing_params = {}
|
|
166
|
+
existing_params.update(params)
|
|
167
|
+
file_path.write_text(json.dumps(existing_params, indent=2))
|
|
164
168
|
logger.info(f"Saved parameters to {file_path}")
|
|
165
169
|
except Exception as e:
|
|
166
170
|
logger.error(f"Failed to save parameters to {file_path}: {e}")
|
|
@@ -25,7 +25,7 @@ def validate_recipe(zip_path: Path, required_paths: Optional[set] = None) -> Non
|
|
|
25
25
|
Raises:
|
|
26
26
|
FileNotFoundError: If any required file or directory is missing.
|
|
27
27
|
"""
|
|
28
|
-
required_paths = {"src
|
|
28
|
+
required_paths = {"src", "scripts", "Dockerfile"} if required_paths is None else required_paths
|
|
29
29
|
with ZipFile(zip_path, "r") as archive:
|
|
30
30
|
archive_contents = {Path(file).as_posix() for file in archive.namelist()}
|
|
31
31
|
missing_paths = {
|
|
@@ -35,10 +35,10 @@ def validate_recipe(zip_path: Path, required_paths: Optional[set] = None) -> Non
|
|
|
35
35
|
if missing_paths:
|
|
36
36
|
raise FileNotFoundError(f"The following required paths are missing in the zip archive: {missing_paths}")
|
|
37
37
|
|
|
38
|
-
script_files = [f for f in archive_contents if f.startswith("
|
|
38
|
+
script_files = [f for f in archive_contents if f.startswith("scripts/") and f.endswith(".py")]
|
|
39
39
|
|
|
40
40
|
if not script_files:
|
|
41
|
-
raise ValueError("No Python script files found in the '
|
|
41
|
+
raise ValueError("No Python script files found in the 'scripts' directory.")
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
def clean_up(files: List[Path], dirs: List[Path], prefix: str = "__") -> None:
|
|
@@ -82,11 +82,11 @@ def get_recipe_content(recipe_url: str, output_dir: Path, state_file: str, api_k
|
|
|
82
82
|
|
|
83
83
|
tag = sha256(recipe_path.read_bytes()).hexdigest()[:8]
|
|
84
84
|
|
|
85
|
-
scripts_dir = output_dir / "
|
|
85
|
+
scripts_dir = output_dir / "scripts"
|
|
86
86
|
valid_commands = [str(f.name)[:-3] for f in scripts_dir.iterdir() if f.is_file() and f.suffix.lower() == ".py"]
|
|
87
87
|
|
|
88
88
|
if not valid_commands:
|
|
89
|
-
raise ValueError("No valid Python script commands found in the '
|
|
89
|
+
raise ValueError("No valid Python script commands found in the 'scripts' directory.")
|
|
90
90
|
|
|
91
91
|
state = {
|
|
92
92
|
"user_data": (output_dir / "src").as_posix(),
|
|
@@ -21,22 +21,22 @@ def handle_mount(source: str) -> None:
|
|
|
21
21
|
Mounts the Hafnia environment by adding source directories to PYTHONPATH.
|
|
22
22
|
|
|
23
23
|
Args:
|
|
24
|
-
source (str): Path to the root directory containing '
|
|
24
|
+
source (str): Path to the root directory containing 'src' and 'scripts' subdirectories
|
|
25
25
|
|
|
26
26
|
Raises:
|
|
27
27
|
FileNotFoundError: If the required directory structure is not found
|
|
28
28
|
"""
|
|
29
29
|
source_path = Path(source)
|
|
30
|
-
|
|
30
|
+
src_dir = source_path / "src"
|
|
31
31
|
scripts_dir = source_path / "scripts"
|
|
32
32
|
|
|
33
|
-
if not
|
|
34
|
-
logger.error(f"Filestructure is not supported. Expected '
|
|
33
|
+
if not src_dir.exists() and not scripts_dir.exists():
|
|
34
|
+
logger.error(f"Filestructure is not supported. Expected 'src' and 'scripts' directories in {source_path}.")
|
|
35
35
|
exit(1)
|
|
36
36
|
|
|
37
|
-
sys.path.extend([
|
|
37
|
+
sys.path.extend([src_dir.as_posix(), scripts_dir.as_posix()])
|
|
38
38
|
python_path = os.getenv("PYTHONPATH", "")
|
|
39
|
-
os.environ["PYTHONPATH"] = f"{python_path}:{
|
|
39
|
+
os.environ["PYTHONPATH"] = f"{python_path}:{src_dir.as_posix()}:{scripts_dir.as_posix()}"
|
|
40
40
|
logger.info(f"Mounted codebase from {source_path}")
|
|
41
41
|
|
|
42
42
|
|