hafnia 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__main__.py +13 -2
- cli/config.py +2 -1
- cli/consts.py +1 -1
- cli/dataset_cmds.py +6 -14
- cli/dataset_recipe_cmds.py +78 -0
- cli/experiment_cmds.py +226 -43
- cli/profile_cmds.py +6 -5
- cli/runc_cmds.py +5 -5
- cli/trainer_package_cmds.py +65 -0
- hafnia/__init__.py +2 -0
- hafnia/data/factory.py +1 -2
- hafnia/dataset/dataset_helpers.py +0 -12
- hafnia/dataset/dataset_names.py +8 -4
- hafnia/dataset/dataset_recipe/dataset_recipe.py +119 -33
- hafnia/dataset/dataset_recipe/recipe_transforms.py +32 -4
- hafnia/dataset/dataset_recipe/recipe_types.py +1 -1
- hafnia/dataset/dataset_upload_helper.py +206 -53
- hafnia/dataset/hafnia_dataset.py +432 -194
- hafnia/dataset/license_types.py +63 -0
- hafnia/dataset/operations/dataset_stats.py +260 -3
- hafnia/dataset/operations/dataset_transformations.py +325 -4
- hafnia/dataset/operations/table_transformations.py +39 -2
- hafnia/dataset/primitives/__init__.py +8 -0
- hafnia/dataset/primitives/classification.py +1 -1
- hafnia/experiment/hafnia_logger.py +112 -0
- hafnia/http.py +16 -2
- hafnia/platform/__init__.py +9 -3
- hafnia/platform/builder.py +12 -10
- hafnia/platform/dataset_recipe.py +99 -0
- hafnia/platform/datasets.py +44 -6
- hafnia/platform/download.py +2 -1
- hafnia/platform/experiment.py +51 -56
- hafnia/platform/trainer_package.py +57 -0
- hafnia/utils.py +64 -13
- hafnia/visualizations/image_visualizations.py +3 -3
- {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/METADATA +34 -30
- hafnia-0.3.0.dist-info/RECORD +53 -0
- cli/recipe_cmds.py +0 -45
- hafnia-0.2.4.dist-info/RECORD +0 -49
- {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/WHEEL +0 -0
- {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/entry_points.txt +0 -0
- {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/licenses/LICENSE +0 -0
hafnia/platform/experiment.py
CHANGED
|
@@ -1,73 +1,68 @@
|
|
|
1
|
-
from
|
|
2
|
-
from typing import Optional
|
|
1
|
+
from typing import Dict, List
|
|
3
2
|
|
|
4
|
-
from hafnia
|
|
5
|
-
from hafnia.
|
|
6
|
-
from hafnia.utils import archive_dir, get_recipe_path, timed
|
|
3
|
+
from hafnia import http
|
|
4
|
+
from hafnia.utils import pretty_print_list_as_table, timed
|
|
7
5
|
|
|
8
6
|
|
|
9
|
-
@timed("
|
|
10
|
-
def
|
|
7
|
+
@timed("Creating experiment.")
|
|
8
|
+
def create_experiment(
|
|
9
|
+
experiment_name: str,
|
|
10
|
+
dataset_recipe_id: str,
|
|
11
|
+
trainer_id: str,
|
|
12
|
+
exec_cmd: str,
|
|
13
|
+
environment_id: str,
|
|
14
|
+
endpoint: str,
|
|
15
|
+
api_key: str,
|
|
16
|
+
) -> Dict:
|
|
11
17
|
headers = {"Authorization": api_key}
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
response = http.post(
|
|
19
|
+
endpoint,
|
|
20
|
+
headers=headers,
|
|
21
|
+
data={
|
|
22
|
+
"name": experiment_name,
|
|
23
|
+
"trainer": trainer_id,
|
|
24
|
+
"dataset_recipe": dataset_recipe_id,
|
|
25
|
+
"command": exec_cmd,
|
|
26
|
+
"environment": environment_id,
|
|
27
|
+
},
|
|
28
|
+
)
|
|
29
|
+
return response
|
|
20
30
|
|
|
21
31
|
|
|
22
|
-
@timed("
|
|
23
|
-
def
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
user_logger.info(f"Recipe created and stored in '{path_recipe}'")
|
|
32
|
+
@timed("Fetching environment info.")
|
|
33
|
+
def get_environments(endpoint: str, api_key: str) -> List[Dict]:
|
|
34
|
+
headers = {"Authorization": api_key}
|
|
35
|
+
envs: List[Dict] = http.fetch(endpoint, headers=headers) # type: ignore[assignment]
|
|
36
|
+
return envs
|
|
28
37
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
"
|
|
33
|
-
"
|
|
38
|
+
|
|
39
|
+
def pretty_print_training_environments(envs: List[Dict]) -> None:
|
|
40
|
+
ENV_FIELDS = {
|
|
41
|
+
"Name": "name",
|
|
42
|
+
"Instance": "instance",
|
|
43
|
+
"GPU": "gpu",
|
|
44
|
+
"GPU Count": "gpu_count",
|
|
45
|
+
"GPU RAM": "vram",
|
|
46
|
+
"CPU": "cpu",
|
|
47
|
+
"CPU Count": "cpu_count",
|
|
48
|
+
"RAM": "ram",
|
|
34
49
|
}
|
|
35
|
-
|
|
36
|
-
|
|
50
|
+
pretty_print_list_as_table(
|
|
51
|
+
table_title="Available Training Environments",
|
|
52
|
+
dict_items=envs,
|
|
53
|
+
column_name_to_key_mapping=ENV_FIELDS,
|
|
54
|
+
)
|
|
37
55
|
|
|
38
56
|
|
|
39
|
-
@timed("Fetching environment info.")
|
|
40
57
|
def get_exp_environment_id(name: str, endpoint: str, api_key: str) -> str:
|
|
41
|
-
|
|
42
|
-
env_info = fetch(endpoint, headers=headers)
|
|
58
|
+
envs = get_environments(endpoint=endpoint, api_key=api_key)
|
|
43
59
|
|
|
44
|
-
for env in
|
|
60
|
+
for env in envs:
|
|
45
61
|
if env["name"] == name:
|
|
46
62
|
return env["id"]
|
|
47
63
|
|
|
48
|
-
|
|
64
|
+
pretty_print_training_environments(envs)
|
|
49
65
|
|
|
66
|
+
available_envs = [env["name"] for env in envs]
|
|
50
67
|
|
|
51
|
-
|
|
52
|
-
def create_experiment(
|
|
53
|
-
exp_name: str,
|
|
54
|
-
dataset_id: str,
|
|
55
|
-
recipe_id: str,
|
|
56
|
-
exec_cmd: str,
|
|
57
|
-
environment_id: str,
|
|
58
|
-
endpoint: str,
|
|
59
|
-
api_key: str,
|
|
60
|
-
) -> Optional[str]:
|
|
61
|
-
headers = {"Authorization": api_key}
|
|
62
|
-
response = post(
|
|
63
|
-
endpoint,
|
|
64
|
-
headers=headers,
|
|
65
|
-
data={
|
|
66
|
-
"name": exp_name,
|
|
67
|
-
"recipe": recipe_id,
|
|
68
|
-
"dataset": dataset_id,
|
|
69
|
-
"command": exec_cmd,
|
|
70
|
-
"environment": environment_id,
|
|
71
|
-
},
|
|
72
|
-
)
|
|
73
|
-
return response["id"]
|
|
68
|
+
raise ValueError(f"Environment '{name}' not found. Available environments are: {available_envs}")
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Dict, List, Optional
|
|
3
|
+
|
|
4
|
+
from hafnia import http
|
|
5
|
+
from hafnia.log import user_logger
|
|
6
|
+
from hafnia.utils import archive_dir, get_trainer_package_path, pretty_print_list_as_table, timed
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@timed("Uploading trainer package.")
|
|
10
|
+
def create_trainer_package(source_dir: Path, endpoint: str, api_key: str) -> str:
|
|
11
|
+
source_dir = source_dir.resolve() # Ensure the path is absolute to handle '.' paths are given an appropriate name.
|
|
12
|
+
path_trainer = get_trainer_package_path(trainer_name=source_dir.name)
|
|
13
|
+
zip_path = archive_dir(source_dir, output_path=path_trainer)
|
|
14
|
+
user_logger.info(f"Trainer package created and stored in '{path_trainer}'")
|
|
15
|
+
|
|
16
|
+
headers = {"Authorization": api_key, "accept": "application/json"}
|
|
17
|
+
data = {
|
|
18
|
+
"name": path_trainer.name,
|
|
19
|
+
"description": "Trainer package created by Hafnia CLI",
|
|
20
|
+
"file": (zip_path.name, Path(zip_path).read_bytes()),
|
|
21
|
+
}
|
|
22
|
+
response = http.post(endpoint, headers=headers, data=data, multipart=True)
|
|
23
|
+
return response["id"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@timed("Get trainer package.")
|
|
27
|
+
def get_trainer_package_by_id(id: str, endpoint: str, api_key: str) -> Dict:
|
|
28
|
+
full_url = f"{endpoint}/{id}"
|
|
29
|
+
headers = {"Authorization": api_key}
|
|
30
|
+
response: Dict = http.fetch(full_url, headers=headers) # type: ignore[assignment]
|
|
31
|
+
return response
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@timed("Get trainer packages")
|
|
35
|
+
def get_trainer_packages(endpoint: str, api_key: str) -> List[Dict]:
|
|
36
|
+
headers = {"Authorization": api_key}
|
|
37
|
+
trainers: List[Dict] = http.fetch(endpoint, headers=headers) # type: ignore[assignment]
|
|
38
|
+
return trainers
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def pretty_print_trainer_packages(trainers: List[Dict[str, str]], limit: Optional[int]) -> None:
|
|
42
|
+
# Sort trainer packages to have the most recent first
|
|
43
|
+
trainers = sorted(trainers, key=lambda x: x["created_at"], reverse=True)
|
|
44
|
+
if limit is not None:
|
|
45
|
+
trainers = trainers[:limit]
|
|
46
|
+
|
|
47
|
+
mapping = {
|
|
48
|
+
"ID": "id",
|
|
49
|
+
"Name": "name",
|
|
50
|
+
"Description": "description",
|
|
51
|
+
"Created At": "created_at",
|
|
52
|
+
}
|
|
53
|
+
pretty_print_list_as_table(
|
|
54
|
+
table_title="Available Trainer Packages (most recent first)",
|
|
55
|
+
dict_items=trainers,
|
|
56
|
+
column_name_to_key_mapping=mapping,
|
|
57
|
+
)
|
hafnia/utils.py
CHANGED
|
@@ -5,10 +5,12 @@ import zipfile
|
|
|
5
5
|
from datetime import datetime
|
|
6
6
|
from functools import wraps
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Any, Callable, Iterator, Optional
|
|
8
|
+
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional
|
|
9
9
|
from zipfile import ZipFile
|
|
10
10
|
|
|
11
|
+
import more_itertools
|
|
11
12
|
import pathspec
|
|
13
|
+
import rich
|
|
12
14
|
import seedir
|
|
13
15
|
from rich import print as rprint
|
|
14
16
|
|
|
@@ -16,19 +18,21 @@ from hafnia.log import sys_logger, user_logger
|
|
|
16
18
|
|
|
17
19
|
PATH_DATA = Path("./.data")
|
|
18
20
|
PATH_DATASETS = PATH_DATA / "datasets"
|
|
19
|
-
|
|
21
|
+
PATH_DATASET_RECIPES = PATH_DATA / "dataset_recipes"
|
|
22
|
+
PATH_TRAINER_PACKAGES = PATH_DATA / "trainers"
|
|
20
23
|
FILENAME_HAFNIAIGNORE = ".hafniaignore"
|
|
21
24
|
DEFAULT_IGNORE_SPECIFICATION = [
|
|
22
25
|
"*.jpg",
|
|
23
26
|
"*.png",
|
|
24
27
|
"*.py[cod]",
|
|
25
28
|
"*_cache/",
|
|
29
|
+
"**.egg-info/",
|
|
26
30
|
".data",
|
|
27
31
|
".git",
|
|
28
32
|
".venv",
|
|
29
33
|
".vscode",
|
|
30
34
|
"__pycache__",
|
|
31
|
-
"
|
|
35
|
+
"trainer.zip",
|
|
32
36
|
"tests",
|
|
33
37
|
"wandb",
|
|
34
38
|
]
|
|
@@ -49,6 +53,7 @@ def timed(label: str):
|
|
|
49
53
|
return func(*args, **kwargs)
|
|
50
54
|
except Exception as e:
|
|
51
55
|
sys_logger.error(f"{operation_label} failed: {e}")
|
|
56
|
+
raise # Re-raise the exception after logging
|
|
52
57
|
finally:
|
|
53
58
|
elapsed = time.perf_counter() - tik
|
|
54
59
|
sys_logger.debug(f"{operation_label} took {elapsed:.2f} seconds.")
|
|
@@ -63,14 +68,14 @@ def now_as_str() -> str:
|
|
|
63
68
|
return datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
|
|
64
69
|
|
|
65
70
|
|
|
66
|
-
def
|
|
71
|
+
def get_trainer_package_path(trainer_name: str) -> Path:
|
|
67
72
|
now = now_as_str()
|
|
68
|
-
|
|
69
|
-
return
|
|
73
|
+
path_trainer = PATH_TRAINER_PACKAGES / f"{trainer_name}_{now}.zip"
|
|
74
|
+
return path_trainer
|
|
70
75
|
|
|
71
76
|
|
|
72
|
-
def
|
|
73
|
-
path_ignore_file = path_ignore_file or
|
|
77
|
+
def filter_trainer_package_files(trainer_path: Path, path_ignore_file: Optional[Path] = None) -> Iterator:
|
|
78
|
+
path_ignore_file = path_ignore_file or trainer_path / FILENAME_HAFNIAIGNORE
|
|
74
79
|
if not path_ignore_file.exists():
|
|
75
80
|
ignore_specification_lines = DEFAULT_IGNORE_SPECIFICATION
|
|
76
81
|
user_logger.info(
|
|
@@ -81,7 +86,7 @@ def filter_recipe_files(recipe_path: Path, path_ignore_file: Optional[Path] = No
|
|
|
81
86
|
else:
|
|
82
87
|
ignore_specification_lines = Path(path_ignore_file).read_text().splitlines()
|
|
83
88
|
ignore_specification = pathspec.GitIgnoreSpec.from_lines(ignore_specification_lines)
|
|
84
|
-
include_files = ignore_specification.match_tree(
|
|
89
|
+
include_files = ignore_specification.match_tree(trainer_path, negate=True)
|
|
85
90
|
return include_files
|
|
86
91
|
|
|
87
92
|
|
|
@@ -91,17 +96,17 @@ def archive_dir(
|
|
|
91
96
|
output_path: Optional[Path] = None,
|
|
92
97
|
path_ignore_file: Optional[Path] = None,
|
|
93
98
|
) -> Path:
|
|
94
|
-
recipe_zip_path = output_path or recipe_path / "
|
|
99
|
+
recipe_zip_path = output_path or recipe_path / "trainer.zip"
|
|
95
100
|
assert recipe_zip_path.suffix == ".zip", "Output path must be a zip file"
|
|
96
101
|
recipe_zip_path.parent.mkdir(parents=True, exist_ok=True)
|
|
97
102
|
|
|
98
103
|
user_logger.info(f" Creating zip archive of '{recipe_path}'")
|
|
99
|
-
include_files =
|
|
104
|
+
include_files = filter_trainer_package_files(recipe_path, path_ignore_file)
|
|
100
105
|
with ZipFile(recipe_zip_path, "w", compression=zipfile.ZIP_STORED, allowZip64=True) as zip_ref:
|
|
101
106
|
for str_filepath in include_files:
|
|
102
107
|
full_path = recipe_path / str_filepath
|
|
103
108
|
zip_ref.write(full_path, str_filepath)
|
|
104
|
-
|
|
109
|
+
show_trainer_package_content(recipe_zip_path)
|
|
105
110
|
|
|
106
111
|
return recipe_zip_path
|
|
107
112
|
|
|
@@ -115,7 +120,7 @@ def size_human_readable(size_bytes: int, suffix="B") -> str:
|
|
|
115
120
|
return f"{size_value:.1f}Yi{suffix}"
|
|
116
121
|
|
|
117
122
|
|
|
118
|
-
def
|
|
123
|
+
def show_trainer_package_content(recipe_path: Path, style: str = "emoji", depth_limit: int = 3) -> None:
|
|
119
124
|
def scan(parent: seedir.FakeDir, path: zipfile.Path, depth: int = 0) -> None:
|
|
120
125
|
if depth >= depth_limit:
|
|
121
126
|
return
|
|
@@ -133,6 +138,16 @@ def show_recipe_content(recipe_path: Path, style: str = "emoji", depth_limit: in
|
|
|
133
138
|
user_logger.info(f"Recipe size: {size_human_readable(os.path.getsize(recipe_path))}. Max size 800 MiB")
|
|
134
139
|
|
|
135
140
|
|
|
141
|
+
def get_dataset_path_in_hafnia_cloud() -> Path:
|
|
142
|
+
if not is_hafnia_cloud_job():
|
|
143
|
+
user_logger.error(
|
|
144
|
+
f"The function '{get_dataset_path_in_hafnia_cloud.__name__}' should only be called, when "
|
|
145
|
+
"running in HAFNIA cloud environment (HAFNIA_CLOUD-environment variable have been defined)"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
return Path(os.getenv("MDI_DATASET_DIR", "/opt/ml/input/data/training"))
|
|
149
|
+
|
|
150
|
+
|
|
136
151
|
def is_hafnia_cloud_job() -> bool:
|
|
137
152
|
"""Check if the current job is running in HAFNIA cloud environment."""
|
|
138
153
|
return os.getenv("HAFNIA_CLOUD", "false").lower() == "true"
|
|
@@ -154,3 +169,39 @@ def snake_to_pascal_case(name: str) -> str:
|
|
|
154
169
|
|
|
155
170
|
def hash_from_string(s: str) -> str:
|
|
156
171
|
return hashlib.md5(s.encode("utf-8")).hexdigest()
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def pretty_print_list_as_table(
|
|
175
|
+
table_title: str,
|
|
176
|
+
dict_items: List[Dict],
|
|
177
|
+
column_name_to_key_mapping: Dict,
|
|
178
|
+
) -> None:
|
|
179
|
+
"""
|
|
180
|
+
Pretty print a list of dictionary elements as a table.
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
table = rich.table.Table(title=table_title)
|
|
184
|
+
for i_dict, dictionary in enumerate(dict_items):
|
|
185
|
+
if i_dict == 0:
|
|
186
|
+
for column_name, _ in column_name_to_key_mapping.items():
|
|
187
|
+
table.add_column(column_name, justify="left", style="cyan", no_wrap=True)
|
|
188
|
+
row = [str(dictionary.get(field, "")) for field in column_name_to_key_mapping.values()]
|
|
189
|
+
table.add_row(*row)
|
|
190
|
+
|
|
191
|
+
rich.print(table)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def is_hafnia_configured() -> bool:
|
|
195
|
+
"""
|
|
196
|
+
Check if Hafnia is configured by verifying if the API key is set.
|
|
197
|
+
"""
|
|
198
|
+
from cli.config import Config
|
|
199
|
+
|
|
200
|
+
return Config().is_configured()
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def remove_duplicates_preserve_order(seq: Iterable) -> List:
|
|
204
|
+
"""
|
|
205
|
+
Remove duplicates from a list while preserving the order of elements.
|
|
206
|
+
"""
|
|
207
|
+
return list(more_itertools.unique_everseen(seq))
|
|
@@ -102,10 +102,10 @@ def resize_width_by_padding(img0: np.ndarray, new_width: int) -> np.ndarray:
|
|
|
102
102
|
return img0_padded
|
|
103
103
|
|
|
104
104
|
|
|
105
|
-
def append_text_below_frame(frame: np.ndarray, text: str) -> np.ndarray:
|
|
106
|
-
font_size_px = int(frame.shape[0] *
|
|
105
|
+
def append_text_below_frame(frame: np.ndarray, text: str, text_size_ratio: float = 0.1) -> np.ndarray:
|
|
106
|
+
font_size_px = int(frame.shape[0] * text_size_ratio) # 10% of the frame height
|
|
107
107
|
font_size_px = max(font_size_px, 7) # Ensure a minimum font size
|
|
108
|
-
font_size_px = min(font_size_px,
|
|
108
|
+
font_size_px = min(font_size_px, 25) # Ensure a maximum font size
|
|
109
109
|
|
|
110
110
|
text_region = create_text_img(text, font_size_px=font_size_px)
|
|
111
111
|
frame_with_text = concatenate_below_resize_by_padding(frame, text_region)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hafnia
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Python SDK for communication with Hafnia platform.
|
|
5
5
|
Author-email: Milestone Systems <hafniaplatform@milestone.dk>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -9,6 +9,7 @@ Requires-Dist: boto3>=1.35.91
|
|
|
9
9
|
Requires-Dist: click>=8.1.8
|
|
10
10
|
Requires-Dist: emoji>=2.14.1
|
|
11
11
|
Requires-Dist: flatten-dict>=0.4.2
|
|
12
|
+
Requires-Dist: mlflow>=3.2.0
|
|
12
13
|
Requires-Dist: more-itertools>=10.7.0
|
|
13
14
|
Requires-Dist: opencv-python-headless>=4.11.0.86
|
|
14
15
|
Requires-Dist: pathspec>=0.12.1
|
|
@@ -19,6 +20,7 @@ Requires-Dist: pycocotools>=2.0.10
|
|
|
19
20
|
Requires-Dist: pydantic>=2.10.4
|
|
20
21
|
Requires-Dist: rich>=13.9.4
|
|
21
22
|
Requires-Dist: s5cmd>=0.2.0
|
|
23
|
+
Requires-Dist: sagemaker-mlflow>=0.1.0
|
|
22
24
|
Requires-Dist: seedir>=0.5.0
|
|
23
25
|
Requires-Dist: tqdm>=4.67.1
|
|
24
26
|
Requires-Dist: xxhash>=3.5.0
|
|
@@ -26,13 +28,13 @@ Description-Content-Type: text/markdown
|
|
|
26
28
|
|
|
27
29
|
# Hafnia
|
|
28
30
|
|
|
29
|
-
The `hafnia` python
|
|
31
|
+
The `hafnia` python sdk and cli is a collection of tools to create and run model trainer packages on
|
|
30
32
|
the [Hafnia Platform](https://hafnia.milestonesys.com/).
|
|
31
33
|
|
|
32
34
|
The package includes the following interfaces:
|
|
33
35
|
|
|
34
36
|
- `cli`: A Command Line Interface (CLI) to 1) configure/connect to Hafnia's [Training-aaS](https://hafnia.readme.io/docs/training-as-a-service) and 2) create and
|
|
35
|
-
launch
|
|
37
|
+
launch trainer packages.
|
|
36
38
|
- `hafnia`: A python package including `HafniaDataset` to manage datasets and `HafniaLogger` to do
|
|
37
39
|
experiment tracking.
|
|
38
40
|
|
|
@@ -42,19 +44,19 @@ experiment tracking.
|
|
|
42
44
|
and *hidden* datasets. Hidden datasets refers to datasets that can be used for
|
|
43
45
|
training, but are not available for download or direct access.
|
|
44
46
|
|
|
45
|
-
This is a key
|
|
47
|
+
This is a key for the Hafnia platform, as a hidden dataset ensures data
|
|
46
48
|
privacy, and allow models to be trained compliantly and ethically by third parties (you).
|
|
47
49
|
|
|
48
50
|
The `script2model` approach is a Training-aaS concept, where you package your custom training
|
|
49
|
-
script as a *
|
|
51
|
+
project or script as a *trainer package* and use the package to train models on the hidden datasets.
|
|
50
52
|
|
|
51
|
-
To support local development of a
|
|
53
|
+
To support local development of a trainer package, we have introduced a **sample dataset**
|
|
52
54
|
for each dataset available in the Hafnia [data library](https://hafnia.milestonesys.com/training-aas/datasets). The sample dataset is a small
|
|
53
|
-
and anonymized subset of the full dataset and available for download.
|
|
55
|
+
and an anonymized subset of the full dataset and available for download.
|
|
54
56
|
|
|
55
57
|
With the sample dataset, you can seamlessly switch between local development and Training-aaS.
|
|
56
|
-
Locally, you can create, validate and debug your
|
|
57
|
-
launched with Training-aaS, where the
|
|
58
|
+
Locally, you can create, validate and debug your trainer package. The trainer package is then
|
|
59
|
+
launched with Training-aaS, where the package runs on the full dataset and can be scaled to run on
|
|
58
60
|
multiple GPUs and instances if needed.
|
|
59
61
|
|
|
60
62
|
## Getting started: Configuration
|
|
@@ -122,19 +124,19 @@ midwest-vehicle-detection
|
|
|
122
124
|
You can interact with data as you want, but we also provide `HafniaDataset`
|
|
123
125
|
for loading/saving, managing and interacting with the dataset.
|
|
124
126
|
|
|
125
|
-
We recommend
|
|
126
|
-
|
|
127
|
+
We recommend the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py)
|
|
128
|
+
for a short introduction on the `HafniaDataset`.
|
|
127
129
|
|
|
128
130
|
Below is a short introduction to the `HafniaDataset` class.
|
|
129
131
|
|
|
130
132
|
```python
|
|
131
133
|
from hafnia.dataset.hafnia_dataset import HafniaDataset, Sample
|
|
132
134
|
|
|
133
|
-
# Load dataset
|
|
135
|
+
# Load dataset from path
|
|
134
136
|
dataset = HafniaDataset.read_from_path(path_dataset)
|
|
135
137
|
|
|
136
|
-
#
|
|
137
|
-
|
|
138
|
+
# Or get dataset directly by name
|
|
139
|
+
dataset = HafniaDataset.from_name("midwest-vehicle-detection")
|
|
138
140
|
|
|
139
141
|
# Print dataset information
|
|
140
142
|
dataset.print_stats()
|
|
@@ -199,6 +201,8 @@ DatasetInfo(
|
|
|
199
201
|
'duration_average': 120.0,
|
|
200
202
|
...
|
|
201
203
|
}
|
|
204
|
+
"format_version": "0.0.2",
|
|
205
|
+
"updated_at": "2025-09-24T21:50:20.231263"
|
|
202
206
|
)
|
|
203
207
|
```
|
|
204
208
|
|
|
@@ -238,7 +242,7 @@ Sample(
|
|
|
238
242
|
height=1080,
|
|
239
243
|
width=1920,
|
|
240
244
|
split='train',
|
|
241
|
-
|
|
245
|
+
tags=["sample"],
|
|
242
246
|
collection_index=None,
|
|
243
247
|
collection_id=None,
|
|
244
248
|
remote_path='s3://mdi-production-midwest-vehicle-detection/sample/data/343403325f27e390.png',
|
|
@@ -302,10 +306,10 @@ Sample(
|
|
|
302
306
|
)
|
|
303
307
|
```
|
|
304
308
|
|
|
305
|
-
To learn more,
|
|
309
|
+
To learn more, we recommend the `HafniaDataset` example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py).
|
|
306
310
|
|
|
307
311
|
### Dataset Locally vs. Training-aaS
|
|
308
|
-
An important feature of `
|
|
312
|
+
An important feature of `HafniaDataset.from_name` is that it will return the full dataset
|
|
309
313
|
when loaded with Training-aaS on the Hafnia platform.
|
|
310
314
|
|
|
311
315
|
This enables seamlessly switching between running/validating a training script
|
|
@@ -316,7 +320,7 @@ Available datasets with corresponding sample datasets can be found in [data libr
|
|
|
316
320
|
|
|
317
321
|
|
|
318
322
|
## Getting started: Experiment Tracking with HafniaLogger
|
|
319
|
-
The `HafniaLogger` is an important part of the
|
|
323
|
+
The `HafniaLogger` is an important part of the trainer and enables you to track, log and
|
|
320
324
|
reproduce your experiments.
|
|
321
325
|
|
|
322
326
|
When integrated into your training script, the `HafniaLogger` is responsible for collecting:
|
|
@@ -422,25 +426,25 @@ train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=
|
|
|
422
426
|
|
|
423
427
|
|
|
424
428
|
## Example: Training-aaS
|
|
425
|
-
By combining logging and dataset loading, we can now construct our model
|
|
429
|
+
By combining logging and dataset loading, we can now construct our model trainer package.
|
|
426
430
|
|
|
427
|
-
To demonstrate this, we have provided a
|
|
428
|
-
[
|
|
431
|
+
To demonstrate this, we have provided a trainer package project that serves as a template for creating and structuring trainers. The example repo is called
|
|
432
|
+
[trainer-classification](https://github.com/milestone-hafnia/trainer-classification)
|
|
429
433
|
|
|
430
|
-
The project also contains additional information on how to structure your
|
|
431
|
-
the
|
|
434
|
+
The project also contains additional information on how to structure your trainer package, use the `HafniaLogger`, loading a dataset and different approach for launching
|
|
435
|
+
the trainer on the Hafnia platform.
|
|
432
436
|
|
|
433
437
|
|
|
434
|
-
## Create, Build and Run `
|
|
435
|
-
In order to test
|
|
438
|
+
## Create, Build and Run `trainer.zip` locally
|
|
439
|
+
In order to test trainer package compatibility with Hafnia cloud use the following command to build and
|
|
436
440
|
start the job locally.
|
|
437
441
|
|
|
438
442
|
```bash
|
|
439
|
-
# Create '
|
|
440
|
-
hafnia
|
|
441
|
-
|
|
442
|
-
# Build the docker image locally from a '
|
|
443
|
-
hafnia runc build-local
|
|
443
|
+
# Create 'trainer.zip' in the root folder of your training trainer project '../trainer/classification'
|
|
444
|
+
hafnia trainer create-zip ../trainer-classification
|
|
445
|
+
|
|
446
|
+
# Build the docker image locally from a 'trainer.zip' file
|
|
447
|
+
hafnia runc build-local trainer.zip
|
|
444
448
|
|
|
445
449
|
# Execute the docker image locally with a desired dataset
|
|
446
450
|
hafnia runc launch-local --dataset mnist "python scripts/train.py"
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
cli/__main__.py,sha256=04iqem4UAw6zSVujBt8ER0HQbEhno6rgmARmECUwRPA,1634
|
|
3
|
+
cli/config.py,sha256=m9jrka0Rnh8_mnxnU4S_JgoYA7l1pvp2o2a7KgkYInY,6115
|
|
4
|
+
cli/consts.py,sha256=uCpYX44NCu_Zvte0QwChunxOo-qqhcaJRSYDAIsoJ8A,972
|
|
5
|
+
cli/dataset_cmds.py,sha256=KOPYdAJ1SyzTMlEi_J-70vSGIJ5acHPGIPOCKVIdlQ4,1418
|
|
6
|
+
cli/dataset_recipe_cmds.py,sha256=O1Pd-VvFFsmZ-nE1Mh6sCC9x45ztCJEpPCZK93qz_HQ,2887
|
|
7
|
+
cli/experiment_cmds.py,sha256=vc7J9JJog4EvRdgkpoMvr0kceb0QF_Rk7mn6H2KNvFE,7963
|
|
8
|
+
cli/profile_cmds.py,sha256=eRo4FtPvXPG5LK2fINVFMsBd_HpkNygY468essBb57o,3285
|
|
9
|
+
cli/runc_cmds.py,sha256=qV-LOwbFlbegH8XSELOo4h_2ajFAzdB5LtuVKKoRq8Y,5009
|
|
10
|
+
cli/trainer_package_cmds.py,sha256=nL7yTtR41BKzo1DWZdBdpRXGPZZIbmBe0byHAi6_n2c,2312
|
|
11
|
+
hafnia/__init__.py,sha256=xXUwwQ18P1YqmsZkvlkavaDqq8DbrrHf38pv5_JyV_M,179
|
|
12
|
+
hafnia/http.py,sha256=bjXbV_3uKbBdudqMdYtnsMttUAsNRMsetYZ4F2xXlEs,3635
|
|
13
|
+
hafnia/log.py,sha256=sWF8tz78yBtwZ9ddzm19L1MBSBJ3L4G704IGeT1_OEU,784
|
|
14
|
+
hafnia/torch_helpers.py,sha256=ho65B0WIu_SjbaKPRL4wabDNrnVumWH8QSXVH4r7NAY,11605
|
|
15
|
+
hafnia/utils.py,sha256=x2dPsiO0EPP6YnpQX4HBtbl29UN9hV4zHvOnDa9xYTg,6850
|
|
16
|
+
hafnia/data/__init__.py,sha256=o9QjiGbEcNa6r-qDmwwmxPXf-1UitNl5-WxFNcujqsg,111
|
|
17
|
+
hafnia/data/factory.py,sha256=kHkvOtBUbwaShZBGf1kZzocDJBn_1dHHLrQxnUpJmfY,778
|
|
18
|
+
hafnia/dataset/dataset_helpers.py,sha256=HHm4KG_-upIEmxHWeSJO4m8RmrCUxKgseRNs4WD6kUQ,4781
|
|
19
|
+
hafnia/dataset/dataset_names.py,sha256=wdLoH48ph1PjVpUYPEDnAfQYVDCvYGQCyqFmR0-ixDU,2286
|
|
20
|
+
hafnia/dataset/dataset_upload_helper.py,sha256=QdJl92aKm3czpkgXt3G_AgwBjyOV9R4iKn4bjjlPyXA,28007
|
|
21
|
+
hafnia/dataset/hafnia_dataset.py,sha256=XiCHv-ZSzjA4CImpyMevJ2qIJlFcKBLzwNB_HMuQGHo,36841
|
|
22
|
+
hafnia/dataset/license_types.py,sha256=xpanTfui1pGG76mGQ9r6EywcUe1scI_zullEpmCO4GI,2190
|
|
23
|
+
hafnia/dataset/dataset_recipe/dataset_recipe.py,sha256=A54N5qEPcM0Yswg26qaOvnr-uj3xUq-KbOInJayzbEI,19269
|
|
24
|
+
hafnia/dataset/dataset_recipe/recipe_transforms.py,sha256=6tY4jG1Bzx15-BXp5RavjtVRWcms3o9svzfUO9-m9Ps,2154
|
|
25
|
+
hafnia/dataset/dataset_recipe/recipe_types.py,sha256=AcrG6gpRt3Igl-CCJ60uyh-WkfI1NCnQ55M8yClSI9Q,5328
|
|
26
|
+
hafnia/dataset/operations/dataset_stats.py,sha256=V2lCmTfIbJ-LeAyu1fZf0_1jSa4MMfP0psuXi77gOYk,11630
|
|
27
|
+
hafnia/dataset/operations/dataset_transformations.py,sha256=JVxfw4fV51eGB7ekYfLB5FKQql6l1whTqRY_BwfX0Us,16593
|
|
28
|
+
hafnia/dataset/operations/table_transformations.py,sha256=6LFQfFRAb1B25cS3QeliRzj26EgVyub5kE-6Sab5Ymo,9250
|
|
29
|
+
hafnia/dataset/primitives/__init__.py,sha256=xFLJ3R7gpbuQnNJuFhuu836L3nicwoaY5aHkqk7Bbr8,927
|
|
30
|
+
hafnia/dataset/primitives/bbox.py,sha256=HXYYy5BLNZwh-bO7aiAWg3z0OurUev8ISa-vYey8b8A,6055
|
|
31
|
+
hafnia/dataset/primitives/bitmask.py,sha256=mq_wchMqGupJDc-a-mJh9uBO_mjHcXpLH49g591doAM,7619
|
|
32
|
+
hafnia/dataset/primitives/classification.py,sha256=ri4lTtS5gBQX13vF07YbeN11rKl1CJaKeTIzCmoT9Iw,2363
|
|
33
|
+
hafnia/dataset/primitives/point.py,sha256=JCRwb-E4sDafodkg6wqyuAS1Yj-yaJbwiD8aB69_Ros,635
|
|
34
|
+
hafnia/dataset/primitives/polygon.py,sha256=vhPrYHv6TqQZMTAyv9r3NV8Hu6YRSSD0srB0wOCIwQ4,4289
|
|
35
|
+
hafnia/dataset/primitives/primitive.py,sha256=7jxcyFADVGf95pjeQHEOqAnR9eucLpxA2h8Blz3ppXI,1253
|
|
36
|
+
hafnia/dataset/primitives/segmentation.py,sha256=jUMjOmYr9j4An3YSCw5CJC1W8ihXAbus3CXaTOpc7Xw,1905
|
|
37
|
+
hafnia/dataset/primitives/utils.py,sha256=3gT1as-xXEj8CamoIuBb9gQwUN9Ae9qnqtqF_uEe0zo,1993
|
|
38
|
+
hafnia/experiment/__init__.py,sha256=OEFE6HqhO5zcTCLZcPcPVjIg7wMFFnvZ1uOtAVhRz7M,85
|
|
39
|
+
hafnia/experiment/hafnia_logger.py,sha256=k8UIK3mXNyxpmfFndTD_u8pFA6TKbjmR-9EJ6JjvY-U,10729
|
|
40
|
+
hafnia/platform/__init__.py,sha256=L_Q7CNpsJ0HMNPy_rLlLK5RhmuCU7IF4BchxKv6amYc,782
|
|
41
|
+
hafnia/platform/builder.py,sha256=kUEuj5-qtL1uk5v2tUvOCREn5yV-G4Fr6F31haIAb5E,5808
|
|
42
|
+
hafnia/platform/dataset_recipe.py,sha256=-scelPECr1eLn5tB_jFm5bJdygw_guktrWfoVquSu2A,3790
|
|
43
|
+
hafnia/platform/datasets.py,sha256=orWdZUwrNk7BTJFJP_8TLhf0CB5PYopZug4u36w08FQ,9018
|
|
44
|
+
hafnia/platform/download.py,sha256=Tzycmb6I6LmwACDHmJmR1zsrOX6OLgYWRbfkCXKEAdQ,6903
|
|
45
|
+
hafnia/platform/experiment.py,sha256=SrEH0nuwwBXf1Iu4diB1BEPqL-TxW3aQkZWBbM1-tY0,1846
|
|
46
|
+
hafnia/platform/trainer_package.py,sha256=w6JC7o-279ujcwtNTbUaQ9AnPcYRPPbD8EACa6XyUHA,2206
|
|
47
|
+
hafnia/visualizations/colors.py,sha256=003eAJVnBal4abaYIIpsrT7erIOIjTUHHYVJ1Tj1CDc,5226
|
|
48
|
+
hafnia/visualizations/image_visualizations.py,sha256=6mPnRAc0dMPZCUCTqnHjgtAhQdVL_QrtyToXUuJjwxQ,7355
|
|
49
|
+
hafnia-0.3.0.dist-info/METADATA,sha256=nP2mgz_AtfOAq2OX5NgLq6lJG3fTLOog8AbKLGWYSyM,19235
|
|
50
|
+
hafnia-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
51
|
+
hafnia-0.3.0.dist-info/entry_points.txt,sha256=FCJVIQ8GP2VE9I3eeGVF5eLxVDNW_01pOJCpG_CGnMM,45
|
|
52
|
+
hafnia-0.3.0.dist-info/licenses/LICENSE,sha256=wLZw1B7_mod_CO1H8LXqQgfqlWD6QceJR8--LJYRZGE,1078
|
|
53
|
+
hafnia-0.3.0.dist-info/RECORD,,
|
cli/recipe_cmds.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
import click
|
|
4
|
-
|
|
5
|
-
import cli.consts as consts
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
@click.group(name="recipe")
|
|
9
|
-
def recipe() -> None:
|
|
10
|
-
"""Hafnia Recipe management commands"""
|
|
11
|
-
pass
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@recipe.command(name="create")
|
|
15
|
-
@click.argument("source")
|
|
16
|
-
@click.option(
|
|
17
|
-
"--output", type=click.Path(writable=True), default="./recipe.zip", show_default=True, help="Output recipe path."
|
|
18
|
-
)
|
|
19
|
-
def create(source: str, output: str) -> None:
|
|
20
|
-
"""Create HRF from local path"""
|
|
21
|
-
|
|
22
|
-
from hafnia.utils import archive_dir
|
|
23
|
-
|
|
24
|
-
path_output_zip = Path(output)
|
|
25
|
-
if path_output_zip.suffix != ".zip":
|
|
26
|
-
raise click.ClickException(consts.ERROR_RECIPE_FILE_FORMAT)
|
|
27
|
-
|
|
28
|
-
path_source = Path(source)
|
|
29
|
-
path_output_zip = archive_dir(path_source, path_output_zip)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
@recipe.command(name="view")
|
|
33
|
-
@click.option("--path", type=str, default="./recipe.zip", show_default=True, help="Path of recipe.zip.")
|
|
34
|
-
@click.option("--depth-limit", type=int, default=3, help="Limit the depth of the tree view.", show_default=True)
|
|
35
|
-
def view(path: str, depth_limit: int) -> None:
|
|
36
|
-
"""View the content of a recipe zip file."""
|
|
37
|
-
from hafnia.utils import show_recipe_content
|
|
38
|
-
|
|
39
|
-
path_recipe = Path(path)
|
|
40
|
-
if not path_recipe.exists():
|
|
41
|
-
raise click.ClickException(
|
|
42
|
-
f"Recipe file '{path_recipe}' does not exist. Please provide a valid path. "
|
|
43
|
-
f"To create a recipe, use the 'hafnia recipe create' command."
|
|
44
|
-
)
|
|
45
|
-
show_recipe_content(path_recipe, depth_limit=depth_limit)
|