hafnia 0.2.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__main__.py +16 -3
- cli/config.py +45 -4
- cli/consts.py +1 -1
- cli/dataset_cmds.py +6 -14
- cli/dataset_recipe_cmds.py +78 -0
- cli/experiment_cmds.py +226 -43
- cli/keychain.py +88 -0
- cli/profile_cmds.py +10 -6
- cli/runc_cmds.py +5 -5
- cli/trainer_package_cmds.py +65 -0
- hafnia/__init__.py +2 -0
- hafnia/data/factory.py +1 -2
- hafnia/dataset/dataset_helpers.py +9 -14
- hafnia/dataset/dataset_names.py +10 -5
- hafnia/dataset/dataset_recipe/dataset_recipe.py +165 -67
- hafnia/dataset/dataset_recipe/recipe_transforms.py +48 -4
- hafnia/dataset/dataset_recipe/recipe_types.py +1 -1
- hafnia/dataset/dataset_upload_helper.py +265 -56
- hafnia/dataset/format_conversions/image_classification_from_directory.py +106 -0
- hafnia/dataset/format_conversions/torchvision_datasets.py +281 -0
- hafnia/dataset/hafnia_dataset.py +577 -213
- hafnia/dataset/license_types.py +63 -0
- hafnia/dataset/operations/dataset_stats.py +259 -3
- hafnia/dataset/operations/dataset_transformations.py +332 -7
- hafnia/dataset/operations/table_transformations.py +43 -5
- hafnia/dataset/primitives/__init__.py +8 -0
- hafnia/dataset/primitives/bbox.py +25 -12
- hafnia/dataset/primitives/bitmask.py +26 -14
- hafnia/dataset/primitives/classification.py +16 -8
- hafnia/dataset/primitives/point.py +7 -3
- hafnia/dataset/primitives/polygon.py +16 -9
- hafnia/dataset/primitives/segmentation.py +10 -7
- hafnia/experiment/hafnia_logger.py +111 -8
- hafnia/http.py +16 -2
- hafnia/platform/__init__.py +9 -3
- hafnia/platform/builder.py +12 -10
- hafnia/platform/dataset_recipe.py +104 -0
- hafnia/platform/datasets.py +47 -9
- hafnia/platform/download.py +25 -19
- hafnia/platform/experiment.py +51 -56
- hafnia/platform/trainer_package.py +57 -0
- hafnia/utils.py +81 -13
- hafnia/visualizations/image_visualizations.py +4 -4
- {hafnia-0.2.4.dist-info → hafnia-0.4.0.dist-info}/METADATA +40 -34
- hafnia-0.4.0.dist-info/RECORD +56 -0
- cli/recipe_cmds.py +0 -45
- hafnia-0.2.4.dist-info/RECORD +0 -49
- {hafnia-0.2.4.dist-info → hafnia-0.4.0.dist-info}/WHEEL +0 -0
- {hafnia-0.2.4.dist-info → hafnia-0.4.0.dist-info}/entry_points.txt +0 -0
- {hafnia-0.2.4.dist-info → hafnia-0.4.0.dist-info}/licenses/LICENSE +0 -0
hafnia/platform/experiment.py
CHANGED
|
@@ -1,73 +1,68 @@
|
|
|
1
|
-
from
|
|
2
|
-
from typing import Optional
|
|
1
|
+
from typing import Dict, List
|
|
3
2
|
|
|
4
|
-
from hafnia
|
|
5
|
-
from hafnia.
|
|
6
|
-
from hafnia.utils import archive_dir, get_recipe_path, timed
|
|
3
|
+
from hafnia import http
|
|
4
|
+
from hafnia.utils import pretty_print_list_as_table, timed
|
|
7
5
|
|
|
8
6
|
|
|
9
|
-
@timed("
|
|
10
|
-
def
|
|
7
|
+
@timed("Creating experiment.")
|
|
8
|
+
def create_experiment(
|
|
9
|
+
experiment_name: str,
|
|
10
|
+
dataset_recipe_id: str,
|
|
11
|
+
trainer_id: str,
|
|
12
|
+
exec_cmd: str,
|
|
13
|
+
environment_id: str,
|
|
14
|
+
endpoint: str,
|
|
15
|
+
api_key: str,
|
|
16
|
+
) -> Dict:
|
|
11
17
|
headers = {"Authorization": api_key}
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
response = http.post(
|
|
19
|
+
endpoint,
|
|
20
|
+
headers=headers,
|
|
21
|
+
data={
|
|
22
|
+
"name": experiment_name,
|
|
23
|
+
"trainer": trainer_id,
|
|
24
|
+
"dataset_recipe": dataset_recipe_id,
|
|
25
|
+
"command": exec_cmd,
|
|
26
|
+
"environment": environment_id,
|
|
27
|
+
},
|
|
28
|
+
)
|
|
29
|
+
return response
|
|
20
30
|
|
|
21
31
|
|
|
22
|
-
@timed("
|
|
23
|
-
def
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
user_logger.info(f"Recipe created and stored in '{path_recipe}'")
|
|
32
|
+
@timed("Fetching environment info.")
|
|
33
|
+
def get_environments(endpoint: str, api_key: str) -> List[Dict]:
|
|
34
|
+
headers = {"Authorization": api_key}
|
|
35
|
+
envs: List[Dict] = http.fetch(endpoint, headers=headers) # type: ignore[assignment]
|
|
36
|
+
return envs
|
|
28
37
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
"
|
|
33
|
-
"
|
|
38
|
+
|
|
39
|
+
def pretty_print_training_environments(envs: List[Dict]) -> None:
|
|
40
|
+
ENV_FIELDS = {
|
|
41
|
+
"Name": "name",
|
|
42
|
+
"Instance": "instance",
|
|
43
|
+
"GPU": "gpu",
|
|
44
|
+
"GPU Count": "gpu_count",
|
|
45
|
+
"GPU RAM": "vram",
|
|
46
|
+
"CPU": "cpu",
|
|
47
|
+
"CPU Count": "cpu_count",
|
|
48
|
+
"RAM": "ram",
|
|
34
49
|
}
|
|
35
|
-
|
|
36
|
-
|
|
50
|
+
pretty_print_list_as_table(
|
|
51
|
+
table_title="Available Training Environments",
|
|
52
|
+
dict_items=envs,
|
|
53
|
+
column_name_to_key_mapping=ENV_FIELDS,
|
|
54
|
+
)
|
|
37
55
|
|
|
38
56
|
|
|
39
|
-
@timed("Fetching environment info.")
|
|
40
57
|
def get_exp_environment_id(name: str, endpoint: str, api_key: str) -> str:
|
|
41
|
-
|
|
42
|
-
env_info = fetch(endpoint, headers=headers)
|
|
58
|
+
envs = get_environments(endpoint=endpoint, api_key=api_key)
|
|
43
59
|
|
|
44
|
-
for env in
|
|
60
|
+
for env in envs:
|
|
45
61
|
if env["name"] == name:
|
|
46
62
|
return env["id"]
|
|
47
63
|
|
|
48
|
-
|
|
64
|
+
pretty_print_training_environments(envs)
|
|
49
65
|
|
|
66
|
+
available_envs = [env["name"] for env in envs]
|
|
50
67
|
|
|
51
|
-
|
|
52
|
-
def create_experiment(
|
|
53
|
-
exp_name: str,
|
|
54
|
-
dataset_id: str,
|
|
55
|
-
recipe_id: str,
|
|
56
|
-
exec_cmd: str,
|
|
57
|
-
environment_id: str,
|
|
58
|
-
endpoint: str,
|
|
59
|
-
api_key: str,
|
|
60
|
-
) -> Optional[str]:
|
|
61
|
-
headers = {"Authorization": api_key}
|
|
62
|
-
response = post(
|
|
63
|
-
endpoint,
|
|
64
|
-
headers=headers,
|
|
65
|
-
data={
|
|
66
|
-
"name": exp_name,
|
|
67
|
-
"recipe": recipe_id,
|
|
68
|
-
"dataset": dataset_id,
|
|
69
|
-
"command": exec_cmd,
|
|
70
|
-
"environment": environment_id,
|
|
71
|
-
},
|
|
72
|
-
)
|
|
73
|
-
return response["id"]
|
|
68
|
+
raise ValueError(f"Environment '{name}' not found. Available environments are: {available_envs}")
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Dict, List, Optional
|
|
3
|
+
|
|
4
|
+
from hafnia import http
|
|
5
|
+
from hafnia.log import user_logger
|
|
6
|
+
from hafnia.utils import archive_dir, get_trainer_package_path, pretty_print_list_as_table, timed
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@timed("Uploading trainer package.")
|
|
10
|
+
def create_trainer_package(source_dir: Path, endpoint: str, api_key: str) -> str:
|
|
11
|
+
source_dir = source_dir.resolve() # Ensure the path is absolute to handle '.' paths are given an appropriate name.
|
|
12
|
+
path_trainer = get_trainer_package_path(trainer_name=source_dir.name)
|
|
13
|
+
zip_path = archive_dir(source_dir, output_path=path_trainer)
|
|
14
|
+
user_logger.info(f"Trainer package created and stored in '{path_trainer}'")
|
|
15
|
+
|
|
16
|
+
headers = {"Authorization": api_key, "accept": "application/json"}
|
|
17
|
+
data = {
|
|
18
|
+
"name": path_trainer.name,
|
|
19
|
+
"description": "Trainer package created by Hafnia CLI",
|
|
20
|
+
"file": (zip_path.name, Path(zip_path).read_bytes()),
|
|
21
|
+
}
|
|
22
|
+
response = http.post(endpoint, headers=headers, data=data, multipart=True)
|
|
23
|
+
return response["id"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@timed("Get trainer package.")
|
|
27
|
+
def get_trainer_package_by_id(id: str, endpoint: str, api_key: str) -> Dict:
|
|
28
|
+
full_url = f"{endpoint}/{id}"
|
|
29
|
+
headers = {"Authorization": api_key}
|
|
30
|
+
response: Dict = http.fetch(full_url, headers=headers) # type: ignore[assignment]
|
|
31
|
+
return response
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@timed("Get trainer packages")
|
|
35
|
+
def get_trainer_packages(endpoint: str, api_key: str) -> List[Dict]:
|
|
36
|
+
headers = {"Authorization": api_key}
|
|
37
|
+
trainers: List[Dict] = http.fetch(endpoint, headers=headers) # type: ignore[assignment]
|
|
38
|
+
return trainers
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def pretty_print_trainer_packages(trainers: List[Dict[str, str]], limit: Optional[int]) -> None:
|
|
42
|
+
# Sort trainer packages to have the most recent first
|
|
43
|
+
trainers = sorted(trainers, key=lambda x: x["created_at"], reverse=True)
|
|
44
|
+
if limit is not None:
|
|
45
|
+
trainers = trainers[:limit]
|
|
46
|
+
|
|
47
|
+
mapping = {
|
|
48
|
+
"ID": "id",
|
|
49
|
+
"Name": "name",
|
|
50
|
+
"Description": "description",
|
|
51
|
+
"Created At": "created_at",
|
|
52
|
+
}
|
|
53
|
+
pretty_print_list_as_table(
|
|
54
|
+
table_title="Available Trainer Packages (most recent first)",
|
|
55
|
+
dict_items=trainers,
|
|
56
|
+
column_name_to_key_mapping=mapping,
|
|
57
|
+
)
|
hafnia/utils.py
CHANGED
|
@@ -5,10 +5,12 @@ import zipfile
|
|
|
5
5
|
from datetime import datetime
|
|
6
6
|
from functools import wraps
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Any, Callable, Iterator, Optional
|
|
8
|
+
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional
|
|
9
9
|
from zipfile import ZipFile
|
|
10
10
|
|
|
11
|
+
import more_itertools
|
|
11
12
|
import pathspec
|
|
13
|
+
import rich
|
|
12
14
|
import seedir
|
|
13
15
|
from rich import print as rprint
|
|
14
16
|
|
|
@@ -16,19 +18,21 @@ from hafnia.log import sys_logger, user_logger
|
|
|
16
18
|
|
|
17
19
|
PATH_DATA = Path("./.data")
|
|
18
20
|
PATH_DATASETS = PATH_DATA / "datasets"
|
|
19
|
-
|
|
21
|
+
PATH_DATASET_RECIPES = PATH_DATA / "dataset_recipes"
|
|
22
|
+
PATH_TRAINER_PACKAGES = PATH_DATA / "trainers"
|
|
20
23
|
FILENAME_HAFNIAIGNORE = ".hafniaignore"
|
|
21
24
|
DEFAULT_IGNORE_SPECIFICATION = [
|
|
22
25
|
"*.jpg",
|
|
23
26
|
"*.png",
|
|
24
27
|
"*.py[cod]",
|
|
25
28
|
"*_cache/",
|
|
29
|
+
"**.egg-info/",
|
|
26
30
|
".data",
|
|
27
31
|
".git",
|
|
28
32
|
".venv",
|
|
29
33
|
".vscode",
|
|
30
34
|
"__pycache__",
|
|
31
|
-
"
|
|
35
|
+
"trainer.zip",
|
|
32
36
|
"tests",
|
|
33
37
|
"wandb",
|
|
34
38
|
]
|
|
@@ -49,6 +53,7 @@ def timed(label: str):
|
|
|
49
53
|
return func(*args, **kwargs)
|
|
50
54
|
except Exception as e:
|
|
51
55
|
sys_logger.error(f"{operation_label} failed: {e}")
|
|
56
|
+
raise # Re-raise the exception after logging
|
|
52
57
|
finally:
|
|
53
58
|
elapsed = time.perf_counter() - tik
|
|
54
59
|
sys_logger.debug(f"{operation_label} took {elapsed:.2f} seconds.")
|
|
@@ -58,19 +63,31 @@ def timed(label: str):
|
|
|
58
63
|
return decorator
|
|
59
64
|
|
|
60
65
|
|
|
66
|
+
def get_path_hafnia_cache() -> Path:
|
|
67
|
+
return Path.home() / "hafnia"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_path_torchvision_downloads() -> Path:
|
|
71
|
+
return get_path_hafnia_cache() / "torchvision_downloads"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_path_hafnia_conversions() -> Path:
|
|
75
|
+
return get_path_hafnia_cache() / "hafnia_conversions"
|
|
76
|
+
|
|
77
|
+
|
|
61
78
|
def now_as_str() -> str:
|
|
62
79
|
"""Get the current date and time as a string."""
|
|
63
80
|
return datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
|
|
64
81
|
|
|
65
82
|
|
|
66
|
-
def
|
|
83
|
+
def get_trainer_package_path(trainer_name: str) -> Path:
|
|
67
84
|
now = now_as_str()
|
|
68
|
-
|
|
69
|
-
return
|
|
85
|
+
path_trainer = PATH_TRAINER_PACKAGES / f"{trainer_name}_{now}.zip"
|
|
86
|
+
return path_trainer
|
|
70
87
|
|
|
71
88
|
|
|
72
|
-
def
|
|
73
|
-
path_ignore_file = path_ignore_file or
|
|
89
|
+
def filter_trainer_package_files(trainer_path: Path, path_ignore_file: Optional[Path] = None) -> Iterator:
|
|
90
|
+
path_ignore_file = path_ignore_file or trainer_path / FILENAME_HAFNIAIGNORE
|
|
74
91
|
if not path_ignore_file.exists():
|
|
75
92
|
ignore_specification_lines = DEFAULT_IGNORE_SPECIFICATION
|
|
76
93
|
user_logger.info(
|
|
@@ -81,7 +98,7 @@ def filter_recipe_files(recipe_path: Path, path_ignore_file: Optional[Path] = No
|
|
|
81
98
|
else:
|
|
82
99
|
ignore_specification_lines = Path(path_ignore_file).read_text().splitlines()
|
|
83
100
|
ignore_specification = pathspec.GitIgnoreSpec.from_lines(ignore_specification_lines)
|
|
84
|
-
include_files = ignore_specification.match_tree(
|
|
101
|
+
include_files = ignore_specification.match_tree(trainer_path, negate=True)
|
|
85
102
|
return include_files
|
|
86
103
|
|
|
87
104
|
|
|
@@ -91,17 +108,17 @@ def archive_dir(
|
|
|
91
108
|
output_path: Optional[Path] = None,
|
|
92
109
|
path_ignore_file: Optional[Path] = None,
|
|
93
110
|
) -> Path:
|
|
94
|
-
recipe_zip_path = output_path or recipe_path / "
|
|
111
|
+
recipe_zip_path = output_path or recipe_path / "trainer.zip"
|
|
95
112
|
assert recipe_zip_path.suffix == ".zip", "Output path must be a zip file"
|
|
96
113
|
recipe_zip_path.parent.mkdir(parents=True, exist_ok=True)
|
|
97
114
|
|
|
98
115
|
user_logger.info(f" Creating zip archive of '{recipe_path}'")
|
|
99
|
-
include_files =
|
|
116
|
+
include_files = filter_trainer_package_files(recipe_path, path_ignore_file)
|
|
100
117
|
with ZipFile(recipe_zip_path, "w", compression=zipfile.ZIP_STORED, allowZip64=True) as zip_ref:
|
|
101
118
|
for str_filepath in include_files:
|
|
102
119
|
full_path = recipe_path / str_filepath
|
|
103
120
|
zip_ref.write(full_path, str_filepath)
|
|
104
|
-
|
|
121
|
+
show_trainer_package_content(recipe_zip_path)
|
|
105
122
|
|
|
106
123
|
return recipe_zip_path
|
|
107
124
|
|
|
@@ -115,7 +132,7 @@ def size_human_readable(size_bytes: int, suffix="B") -> str:
|
|
|
115
132
|
return f"{size_value:.1f}Yi{suffix}"
|
|
116
133
|
|
|
117
134
|
|
|
118
|
-
def
|
|
135
|
+
def show_trainer_package_content(recipe_path: Path, style: str = "emoji", depth_limit: int = 3) -> None:
|
|
119
136
|
def scan(parent: seedir.FakeDir, path: zipfile.Path, depth: int = 0) -> None:
|
|
120
137
|
if depth >= depth_limit:
|
|
121
138
|
return
|
|
@@ -133,6 +150,16 @@ def show_recipe_content(recipe_path: Path, style: str = "emoji", depth_limit: in
|
|
|
133
150
|
user_logger.info(f"Recipe size: {size_human_readable(os.path.getsize(recipe_path))}. Max size 800 MiB")
|
|
134
151
|
|
|
135
152
|
|
|
153
|
+
def get_dataset_path_in_hafnia_cloud() -> Path:
|
|
154
|
+
if not is_hafnia_cloud_job():
|
|
155
|
+
user_logger.error(
|
|
156
|
+
f"The function '{get_dataset_path_in_hafnia_cloud.__name__}' should only be called, when "
|
|
157
|
+
"running in HAFNIA cloud environment (HAFNIA_CLOUD-environment variable have been defined)"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
return Path(os.getenv("MDI_DATASET_DIR", "/opt/ml/input/data/training"))
|
|
161
|
+
|
|
162
|
+
|
|
136
163
|
def is_hafnia_cloud_job() -> bool:
|
|
137
164
|
"""Check if the current job is running in HAFNIA cloud environment."""
|
|
138
165
|
return os.getenv("HAFNIA_CLOUD", "false").lower() == "true"
|
|
@@ -154,3 +181,44 @@ def snake_to_pascal_case(name: str) -> str:
|
|
|
154
181
|
|
|
155
182
|
def hash_from_string(s: str) -> str:
|
|
156
183
|
return hashlib.md5(s.encode("utf-8")).hexdigest()
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def pretty_print_list_as_table(
|
|
187
|
+
table_title: str,
|
|
188
|
+
dict_items: List[Dict],
|
|
189
|
+
column_name_to_key_mapping: Dict,
|
|
190
|
+
) -> None:
|
|
191
|
+
"""
|
|
192
|
+
Pretty print a list of dictionary elements as a table.
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
table = rich.table.Table(title=table_title)
|
|
196
|
+
for i_dict, dictionary in enumerate(dict_items):
|
|
197
|
+
if i_dict == 0:
|
|
198
|
+
for column_name, _ in column_name_to_key_mapping.items():
|
|
199
|
+
table.add_column(column_name, justify="left", style="cyan", no_wrap=True)
|
|
200
|
+
row = [str(dictionary.get(field, "")) for field in column_name_to_key_mapping.values()]
|
|
201
|
+
table.add_row(*row)
|
|
202
|
+
|
|
203
|
+
rich.print(table)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def is_hafnia_configured() -> bool:
|
|
207
|
+
"""
|
|
208
|
+
Check if Hafnia is configured by verifying if the API key is set.
|
|
209
|
+
"""
|
|
210
|
+
from cli.config import Config
|
|
211
|
+
|
|
212
|
+
return Config().is_configured()
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def remove_duplicates_preserve_order(seq: Iterable) -> List:
|
|
216
|
+
"""
|
|
217
|
+
Remove duplicates from a list while preserving the order of elements.
|
|
218
|
+
"""
|
|
219
|
+
return list(more_itertools.unique_everseen(seq))
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def is_image_file(file_path: Path) -> bool:
|
|
223
|
+
image_extensions = (".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif")
|
|
224
|
+
return file_path.suffix.lower() in image_extensions
|
|
@@ -102,10 +102,10 @@ def resize_width_by_padding(img0: np.ndarray, new_width: int) -> np.ndarray:
|
|
|
102
102
|
return img0_padded
|
|
103
103
|
|
|
104
104
|
|
|
105
|
-
def append_text_below_frame(frame: np.ndarray, text: str) -> np.ndarray:
|
|
106
|
-
font_size_px = int(frame.shape[0] *
|
|
105
|
+
def append_text_below_frame(frame: np.ndarray, text: str, text_size_ratio: float = 0.1) -> np.ndarray:
|
|
106
|
+
font_size_px = int(frame.shape[0] * text_size_ratio) # 10% of the frame height
|
|
107
107
|
font_size_px = max(font_size_px, 7) # Ensure a minimum font size
|
|
108
|
-
font_size_px = min(font_size_px,
|
|
108
|
+
font_size_px = min(font_size_px, 25) # Ensure a maximum font size
|
|
109
109
|
|
|
110
110
|
text_region = create_text_img(text, font_size_px=font_size_px)
|
|
111
111
|
frame_with_text = concatenate_below_resize_by_padding(frame, text_region)
|
|
@@ -193,7 +193,7 @@ def save_dataset_sample_set_visualizations(
|
|
|
193
193
|
image = draw_annotations(image, annotations, draw_settings=draw_settings)
|
|
194
194
|
|
|
195
195
|
pil_image = Image.fromarray(image)
|
|
196
|
-
path_image = path_output_folder / Path(sample.
|
|
196
|
+
path_image = path_output_folder / Path(sample.file_path).name
|
|
197
197
|
pil_image.save(path_image)
|
|
198
198
|
paths.append(path_image)
|
|
199
199
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hafnia
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Python SDK for communication with Hafnia platform.
|
|
5
5
|
Author-email: Milestone Systems <hafniaplatform@milestone.dk>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -9,6 +9,9 @@ Requires-Dist: boto3>=1.35.91
|
|
|
9
9
|
Requires-Dist: click>=8.1.8
|
|
10
10
|
Requires-Dist: emoji>=2.14.1
|
|
11
11
|
Requires-Dist: flatten-dict>=0.4.2
|
|
12
|
+
Requires-Dist: keyring>=25.6.0
|
|
13
|
+
Requires-Dist: mcp==1.16.0
|
|
14
|
+
Requires-Dist: mlflow>=3.4.0
|
|
12
15
|
Requires-Dist: more-itertools>=10.7.0
|
|
13
16
|
Requires-Dist: opencv-python-headless>=4.11.0.86
|
|
14
17
|
Requires-Dist: pathspec>=0.12.1
|
|
@@ -19,20 +22,20 @@ Requires-Dist: pycocotools>=2.0.10
|
|
|
19
22
|
Requires-Dist: pydantic>=2.10.4
|
|
20
23
|
Requires-Dist: rich>=13.9.4
|
|
21
24
|
Requires-Dist: s5cmd>=0.2.0
|
|
25
|
+
Requires-Dist: sagemaker-mlflow>=0.1.0
|
|
22
26
|
Requires-Dist: seedir>=0.5.0
|
|
23
|
-
Requires-Dist: tqdm>=4.67.1
|
|
24
27
|
Requires-Dist: xxhash>=3.5.0
|
|
25
28
|
Description-Content-Type: text/markdown
|
|
26
29
|
|
|
27
30
|
# Hafnia
|
|
28
31
|
|
|
29
|
-
The `hafnia` python
|
|
32
|
+
The `hafnia` python sdk and cli is a collection of tools to create and run model trainer packages on
|
|
30
33
|
the [Hafnia Platform](https://hafnia.milestonesys.com/).
|
|
31
34
|
|
|
32
35
|
The package includes the following interfaces:
|
|
33
36
|
|
|
34
37
|
- `cli`: A Command Line Interface (CLI) to 1) configure/connect to Hafnia's [Training-aaS](https://hafnia.readme.io/docs/training-as-a-service) and 2) create and
|
|
35
|
-
launch
|
|
38
|
+
launch trainer packages.
|
|
36
39
|
- `hafnia`: A python package including `HafniaDataset` to manage datasets and `HafniaLogger` to do
|
|
37
40
|
experiment tracking.
|
|
38
41
|
|
|
@@ -42,19 +45,19 @@ experiment tracking.
|
|
|
42
45
|
and *hidden* datasets. Hidden datasets refers to datasets that can be used for
|
|
43
46
|
training, but are not available for download or direct access.
|
|
44
47
|
|
|
45
|
-
This is a key
|
|
48
|
+
This is a key for the Hafnia platform, as a hidden dataset ensures data
|
|
46
49
|
privacy, and allow models to be trained compliantly and ethically by third parties (you).
|
|
47
50
|
|
|
48
51
|
The `script2model` approach is a Training-aaS concept, where you package your custom training
|
|
49
|
-
script as a *
|
|
52
|
+
project or script as a *trainer package* and use the package to train models on the hidden datasets.
|
|
50
53
|
|
|
51
|
-
To support local development of a
|
|
54
|
+
To support local development of a trainer package, we have introduced a **sample dataset**
|
|
52
55
|
for each dataset available in the Hafnia [data library](https://hafnia.milestonesys.com/training-aas/datasets). The sample dataset is a small
|
|
53
|
-
and anonymized subset of the full dataset and available for download.
|
|
56
|
+
and an anonymized subset of the full dataset and available for download.
|
|
54
57
|
|
|
55
58
|
With the sample dataset, you can seamlessly switch between local development and Training-aaS.
|
|
56
|
-
Locally, you can create, validate and debug your
|
|
57
|
-
launched with Training-aaS, where the
|
|
59
|
+
Locally, you can create, validate and debug your trainer package. The trainer package is then
|
|
60
|
+
launched with Training-aaS, where the package runs on the full dataset and can be scaled to run on
|
|
58
61
|
multiple GPUs and instances if needed.
|
|
59
62
|
|
|
60
63
|
## Getting started: Configuration
|
|
@@ -78,6 +81,7 @@ Copy the key and save it for later use.
|
|
|
78
81
|
Hafnia API Key: # Pass your HAFNIA API key
|
|
79
82
|
Hafnia Platform URL [https://api.mdi.milestonesys.com]: # Press [Enter]
|
|
80
83
|
```
|
|
84
|
+
|
|
81
85
|
1. Download `mnist` from terminal to verify that your configuration is working.
|
|
82
86
|
|
|
83
87
|
```bash
|
|
@@ -89,7 +93,7 @@ With Hafnia configured on your local machine, it is now possible to download
|
|
|
89
93
|
and explore the dataset sample with a python script:
|
|
90
94
|
|
|
91
95
|
```python
|
|
92
|
-
from hafnia.data import
|
|
96
|
+
from hafnia.data import get_dataset_path
|
|
93
97
|
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
94
98
|
|
|
95
99
|
# To download the sample dataset use:
|
|
@@ -122,19 +126,19 @@ midwest-vehicle-detection
|
|
|
122
126
|
You can interact with data as you want, but we also provide `HafniaDataset`
|
|
123
127
|
for loading/saving, managing and interacting with the dataset.
|
|
124
128
|
|
|
125
|
-
We recommend
|
|
126
|
-
|
|
129
|
+
We recommend the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py)
|
|
130
|
+
for a short introduction on the `HafniaDataset`.
|
|
127
131
|
|
|
128
132
|
Below is a short introduction to the `HafniaDataset` class.
|
|
129
133
|
|
|
130
134
|
```python
|
|
131
135
|
from hafnia.dataset.hafnia_dataset import HafniaDataset, Sample
|
|
132
136
|
|
|
133
|
-
# Load dataset
|
|
137
|
+
# Load dataset from path
|
|
134
138
|
dataset = HafniaDataset.read_from_path(path_dataset)
|
|
135
139
|
|
|
136
|
-
#
|
|
137
|
-
|
|
140
|
+
# Or get dataset directly by name
|
|
141
|
+
dataset = HafniaDataset.from_name("midwest-vehicle-detection")
|
|
138
142
|
|
|
139
143
|
# Print dataset information
|
|
140
144
|
dataset.print_stats()
|
|
@@ -199,6 +203,8 @@ DatasetInfo(
|
|
|
199
203
|
'duration_average': 120.0,
|
|
200
204
|
...
|
|
201
205
|
}
|
|
206
|
+
"format_version": "0.0.2",
|
|
207
|
+
"updated_at": "2025-09-24T21:50:20.231263"
|
|
202
208
|
)
|
|
203
209
|
```
|
|
204
210
|
|
|
@@ -238,7 +244,7 @@ Sample(
|
|
|
238
244
|
height=1080,
|
|
239
245
|
width=1920,
|
|
240
246
|
split='train',
|
|
241
|
-
|
|
247
|
+
tags=["sample"],
|
|
242
248
|
collection_index=None,
|
|
243
249
|
collection_id=None,
|
|
244
250
|
remote_path='s3://mdi-production-midwest-vehicle-detection/sample/data/343403325f27e390.png',
|
|
@@ -302,10 +308,10 @@ Sample(
|
|
|
302
308
|
)
|
|
303
309
|
```
|
|
304
310
|
|
|
305
|
-
To learn more,
|
|
311
|
+
To learn more, we recommend the `HafniaDataset` example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py).
|
|
306
312
|
|
|
307
313
|
### Dataset Locally vs. Training-aaS
|
|
308
|
-
An important feature of `
|
|
314
|
+
An important feature of `HafniaDataset.from_name` is that it will return the full dataset
|
|
309
315
|
when loaded with Training-aaS on the Hafnia platform.
|
|
310
316
|
|
|
311
317
|
This enables seamlessly switching between running/validating a training script
|
|
@@ -316,7 +322,7 @@ Available datasets with corresponding sample datasets can be found in [data libr
|
|
|
316
322
|
|
|
317
323
|
|
|
318
324
|
## Getting started: Experiment Tracking with HafniaLogger
|
|
319
|
-
The `HafniaLogger` is an important part of the
|
|
325
|
+
The `HafniaLogger` is an important part of the trainer and enables you to track, log and
|
|
320
326
|
reproduce your experiments.
|
|
321
327
|
|
|
322
328
|
When integrated into your training script, the `HafniaLogger` is responsible for collecting:
|
|
@@ -356,7 +362,7 @@ logger.log_scalar("validation/loss", value=0.1, step=100)
|
|
|
356
362
|
logger.log_metric("validation/accuracy", value=0.95, step=100)
|
|
357
363
|
```
|
|
358
364
|
|
|
359
|
-
|
|
365
|
+
The tracker behaves differently when running locally or in the cloud.
|
|
360
366
|
Locally, experiment data is stored in a local folder `.data/experiments/{DATE_TIME}`.
|
|
361
367
|
|
|
362
368
|
In the cloud, the experiment data will be available in the Hafnia platform under
|
|
@@ -380,7 +386,7 @@ and datasets available in the data library.
|
|
|
380
386
|
|
|
381
387
|
```python
|
|
382
388
|
# Load Hugging Face dataset
|
|
383
|
-
dataset_splits =
|
|
389
|
+
dataset_splits = HafniaDataset.from_name("midwest-vehicle-detection")
|
|
384
390
|
|
|
385
391
|
# Define transforms
|
|
386
392
|
train_transforms = v2.Compose(
|
|
@@ -422,25 +428,25 @@ train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=
|
|
|
422
428
|
|
|
423
429
|
|
|
424
430
|
## Example: Training-aaS
|
|
425
|
-
By combining logging and dataset loading, we can now construct our model
|
|
431
|
+
By combining logging and dataset loading, we can now construct our model trainer package.
|
|
426
432
|
|
|
427
|
-
To demonstrate this, we have provided a
|
|
428
|
-
[
|
|
433
|
+
To demonstrate this, we have provided a trainer package project that serves as a template for creating and structuring trainers. The example repo is called
|
|
434
|
+
[trainer-classification](https://github.com/milestone-hafnia/trainer-classification)
|
|
429
435
|
|
|
430
|
-
The project also contains additional information on how to structure your
|
|
431
|
-
the
|
|
436
|
+
The project also contains additional information on how to structure your trainer package, use the `HafniaLogger`, loading a dataset and different approach for launching
|
|
437
|
+
the trainer on the Hafnia platform.
|
|
432
438
|
|
|
433
439
|
|
|
434
|
-
## Create, Build and Run `
|
|
435
|
-
In order to test
|
|
440
|
+
## Create, Build and Run `trainer.zip` locally
|
|
441
|
+
In order to test trainer package compatibility with Hafnia cloud use the following command to build and
|
|
436
442
|
start the job locally.
|
|
437
443
|
|
|
438
444
|
```bash
|
|
439
|
-
# Create '
|
|
440
|
-
hafnia
|
|
441
|
-
|
|
442
|
-
# Build the docker image locally from a '
|
|
443
|
-
hafnia runc build-local
|
|
445
|
+
# Create 'trainer.zip' in the root folder of your training trainer project '../trainer/classification'
|
|
446
|
+
hafnia trainer create-zip ../trainer-classification
|
|
447
|
+
|
|
448
|
+
# Build the docker image locally from a 'trainer.zip' file
|
|
449
|
+
hafnia runc build-local trainer.zip
|
|
444
450
|
|
|
445
451
|
# Execute the docker image locally with a desired dataset
|
|
446
452
|
hafnia runc launch-local --dataset mnist "python scripts/train.py"
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
cli/__main__.py,sha256=1IOe1Ewd3QMeuzUMfoWv663_jQpd7_pTn_R1sDdEmbI,1747
|
|
3
|
+
cli/config.py,sha256=xCLdgqBqFN3EInzJ1AO5Nj1e35jOjcc_jaukaodrrMc,7681
|
|
4
|
+
cli/consts.py,sha256=uCpYX44NCu_Zvte0QwChunxOo-qqhcaJRSYDAIsoJ8A,972
|
|
5
|
+
cli/dataset_cmds.py,sha256=KOPYdAJ1SyzTMlEi_J-70vSGIJ5acHPGIPOCKVIdlQ4,1418
|
|
6
|
+
cli/dataset_recipe_cmds.py,sha256=O1Pd-VvFFsmZ-nE1Mh6sCC9x45ztCJEpPCZK93qz_HQ,2887
|
|
7
|
+
cli/experiment_cmds.py,sha256=vc7J9JJog4EvRdgkpoMvr0kceb0QF_Rk7mn6H2KNvFE,7963
|
|
8
|
+
cli/keychain.py,sha256=bNyjjULVQu7kV338wUC65UvbCwmSGOmEjKWPLIQjT0k,2555
|
|
9
|
+
cli/profile_cmds.py,sha256=3OwpomV6Wb21ZG43xrwhvoB5S4IN1IqmVCxs-MItAho,3463
|
|
10
|
+
cli/runc_cmds.py,sha256=qV-LOwbFlbegH8XSELOo4h_2ajFAzdB5LtuVKKoRq8Y,5009
|
|
11
|
+
cli/trainer_package_cmds.py,sha256=nL7yTtR41BKzo1DWZdBdpRXGPZZIbmBe0byHAi6_n2c,2312
|
|
12
|
+
hafnia/__init__.py,sha256=5_DWVYRRh3ZWUo5QSRpl3tKCDDoX7YNiB14o1aaBhfo,179
|
|
13
|
+
hafnia/http.py,sha256=bjXbV_3uKbBdudqMdYtnsMttUAsNRMsetYZ4F2xXlEs,3635
|
|
14
|
+
hafnia/log.py,sha256=sWF8tz78yBtwZ9ddzm19L1MBSBJ3L4G704IGeT1_OEU,784
|
|
15
|
+
hafnia/torch_helpers.py,sha256=ho65B0WIu_SjbaKPRL4wabDNrnVumWH8QSXVH4r7NAY,11605
|
|
16
|
+
hafnia/utils.py,sha256=ieNNL8IK3IqDsRf7NJnF_NU3eMLi8d3tSQzOgW5sVOw,7319
|
|
17
|
+
hafnia/data/__init__.py,sha256=o9QjiGbEcNa6r-qDmwwmxPXf-1UitNl5-WxFNcujqsg,111
|
|
18
|
+
hafnia/data/factory.py,sha256=kHkvOtBUbwaShZBGf1kZzocDJBn_1dHHLrQxnUpJmfY,778
|
|
19
|
+
hafnia/dataset/dataset_helpers.py,sha256=0GbS6PfaiYBulDKRCbd0miN5LHaUIp-XzGt_wZay8xs,5044
|
|
20
|
+
hafnia/dataset/dataset_names.py,sha256=ZFdxsc-R6yOusT0kyh0GppjZbrDycp2Pn4WbIR060Ss,2325
|
|
21
|
+
hafnia/dataset/dataset_upload_helper.py,sha256=v7EwsatnD7HF3MxbGU7q7HNgRISqguKcx7qYZ56Ok-0,29911
|
|
22
|
+
hafnia/dataset/hafnia_dataset.py,sha256=BvJ63luf06kleR-lkqfVgU5p0ZI_JxHBbBY0FdShYKI,41864
|
|
23
|
+
hafnia/dataset/license_types.py,sha256=xpanTfui1pGG76mGQ9r6EywcUe1scI_zullEpmCO4GI,2190
|
|
24
|
+
hafnia/dataset/dataset_recipe/dataset_recipe.py,sha256=mwNL7VMhbEFHC2p6L_OO7os7bVVb05YFKZxvzQ_BySk,19059
|
|
25
|
+
hafnia/dataset/dataset_recipe/recipe_transforms.py,sha256=j3Oiytt3LI2rCaJid7Y44oT9MXvlZVqvZanngMebIWg,3088
|
|
26
|
+
hafnia/dataset/dataset_recipe/recipe_types.py,sha256=AcrG6gpRt3Igl-CCJ60uyh-WkfI1NCnQ55M8yClSI9Q,5328
|
|
27
|
+
hafnia/dataset/format_conversions/image_classification_from_directory.py,sha256=PVjvwjxMjvCLXSBzg0W-XRDRsZP4XkazPFK6I-S6CiE,4272
|
|
28
|
+
hafnia/dataset/format_conversions/torchvision_datasets.py,sha256=akDB8JpeRGJgAVgZNWgg_a4mw3uJTbYNdcYy8eadz9A,11612
|
|
29
|
+
hafnia/dataset/operations/dataset_stats.py,sha256=AKi17FfcnoSQLZzs64EZlIElUQ16PO2cCICkVHWj1kI,11565
|
|
30
|
+
hafnia/dataset/operations/dataset_transformations.py,sha256=fHN5-0FPZZ_SeaJKcAHPsQPhqOlEQCX-9B54J8tiBwk,16868
|
|
31
|
+
hafnia/dataset/operations/table_transformations.py,sha256=BoUmm0TDz7Iuw7942nzYjVLHGQAVDZmzI3CoCV0chR8,9305
|
|
32
|
+
hafnia/dataset/primitives/__init__.py,sha256=xFLJ3R7gpbuQnNJuFhuu836L3nicwoaY5aHkqk7Bbr8,927
|
|
33
|
+
hafnia/dataset/primitives/bbox.py,sha256=WMXnU5ISTkqkHapOxQlovU1PD9Ap_2WaSFE6fSEXOGk,6409
|
|
34
|
+
hafnia/dataset/primitives/bitmask.py,sha256=zldW2SrJE-8I9qEpbUMTua_ARqg5OeUljJDGVbB4QJo,8045
|
|
35
|
+
hafnia/dataset/primitives/classification.py,sha256=cg_ndGy5Pt1rkqV1oMN7oUZ6Y2shk-vwCod7uDMS0e4,2637
|
|
36
|
+
hafnia/dataset/primitives/point.py,sha256=VzCNLTQOPA6wyJVVKddZHGhltkep6V_B7pg5pk7rd9Y,879
|
|
37
|
+
hafnia/dataset/primitives/polygon.py,sha256=Y3c8lc54qEXB9K1netVz3BzbjMDGVukyFfxNb-7LKto,4518
|
|
38
|
+
hafnia/dataset/primitives/primitive.py,sha256=7jxcyFADVGf95pjeQHEOqAnR9eucLpxA2h8Blz3ppXI,1253
|
|
39
|
+
hafnia/dataset/primitives/segmentation.py,sha256=3kSEcz56xz149Y7WpJh-rYzdO8Oe-n3JvRIOXRgosgw,1970
|
|
40
|
+
hafnia/dataset/primitives/utils.py,sha256=3gT1as-xXEj8CamoIuBb9gQwUN9Ae9qnqtqF_uEe0zo,1993
|
|
41
|
+
hafnia/experiment/__init__.py,sha256=OEFE6HqhO5zcTCLZcPcPVjIg7wMFFnvZ1uOtAVhRz7M,85
|
|
42
|
+
hafnia/experiment/hafnia_logger.py,sha256=Qia8orPu7PZk12geB97alJaq6AjxYedDmKri_tmyldo,10408
|
|
43
|
+
hafnia/platform/__init__.py,sha256=L_Q7CNpsJ0HMNPy_rLlLK5RhmuCU7IF4BchxKv6amYc,782
|
|
44
|
+
hafnia/platform/builder.py,sha256=kUEuj5-qtL1uk5v2tUvOCREn5yV-G4Fr6F31haIAb5E,5808
|
|
45
|
+
hafnia/platform/dataset_recipe.py,sha256=ybfSSHVPG0eFUbzg_1McezPSOtMoDZEg7l6rFYndtb4,3857
|
|
46
|
+
hafnia/platform/datasets.py,sha256=8liAntg1GCBqqS2l80_1jaWchjBhc2Y4aVHfEEo4gsU,9036
|
|
47
|
+
hafnia/platform/download.py,sha256=IpxQNUABHHXTY42ET0K7qyVGlZZddYJRVRImuLWE6Ic,7337
|
|
48
|
+
hafnia/platform/experiment.py,sha256=SrEH0nuwwBXf1Iu4diB1BEPqL-TxW3aQkZWBbM1-tY0,1846
|
|
49
|
+
hafnia/platform/trainer_package.py,sha256=w6JC7o-279ujcwtNTbUaQ9AnPcYRPPbD8EACa6XyUHA,2206
|
|
50
|
+
hafnia/visualizations/colors.py,sha256=003eAJVnBal4abaYIIpsrT7erIOIjTUHHYVJ1Tj1CDc,5226
|
|
51
|
+
hafnia/visualizations/image_visualizations.py,sha256=wDWtJ7_5ACb83QtyCCslHJGK7NRWxHpRPbdop8ELE4o,7355
|
|
52
|
+
hafnia-0.4.0.dist-info/METADATA,sha256=fyGToq0jzW7lOf21Cr2o0pyU4eVAimYbvfxzPjB4yj0,19236
|
|
53
|
+
hafnia-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
54
|
+
hafnia-0.4.0.dist-info/entry_points.txt,sha256=FCJVIQ8GP2VE9I3eeGVF5eLxVDNW_01pOJCpG_CGnMM,45
|
|
55
|
+
hafnia-0.4.0.dist-info/licenses/LICENSE,sha256=wLZw1B7_mod_CO1H8LXqQgfqlWD6QceJR8--LJYRZGE,1078
|
|
56
|
+
hafnia-0.4.0.dist-info/RECORD,,
|