hafnia 0.2.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. cli/__main__.py +16 -3
  2. cli/config.py +45 -4
  3. cli/consts.py +1 -1
  4. cli/dataset_cmds.py +6 -14
  5. cli/dataset_recipe_cmds.py +78 -0
  6. cli/experiment_cmds.py +226 -43
  7. cli/keychain.py +88 -0
  8. cli/profile_cmds.py +10 -6
  9. cli/runc_cmds.py +5 -5
  10. cli/trainer_package_cmds.py +65 -0
  11. hafnia/__init__.py +2 -0
  12. hafnia/data/factory.py +1 -2
  13. hafnia/dataset/dataset_helpers.py +9 -14
  14. hafnia/dataset/dataset_names.py +10 -5
  15. hafnia/dataset/dataset_recipe/dataset_recipe.py +165 -67
  16. hafnia/dataset/dataset_recipe/recipe_transforms.py +48 -4
  17. hafnia/dataset/dataset_recipe/recipe_types.py +1 -1
  18. hafnia/dataset/dataset_upload_helper.py +265 -56
  19. hafnia/dataset/format_conversions/image_classification_from_directory.py +106 -0
  20. hafnia/dataset/format_conversions/torchvision_datasets.py +281 -0
  21. hafnia/dataset/hafnia_dataset.py +577 -213
  22. hafnia/dataset/license_types.py +63 -0
  23. hafnia/dataset/operations/dataset_stats.py +259 -3
  24. hafnia/dataset/operations/dataset_transformations.py +332 -7
  25. hafnia/dataset/operations/table_transformations.py +43 -5
  26. hafnia/dataset/primitives/__init__.py +8 -0
  27. hafnia/dataset/primitives/bbox.py +25 -12
  28. hafnia/dataset/primitives/bitmask.py +26 -14
  29. hafnia/dataset/primitives/classification.py +16 -8
  30. hafnia/dataset/primitives/point.py +7 -3
  31. hafnia/dataset/primitives/polygon.py +16 -9
  32. hafnia/dataset/primitives/segmentation.py +10 -7
  33. hafnia/experiment/hafnia_logger.py +111 -8
  34. hafnia/http.py +16 -2
  35. hafnia/platform/__init__.py +9 -3
  36. hafnia/platform/builder.py +12 -10
  37. hafnia/platform/dataset_recipe.py +104 -0
  38. hafnia/platform/datasets.py +47 -9
  39. hafnia/platform/download.py +25 -19
  40. hafnia/platform/experiment.py +51 -56
  41. hafnia/platform/trainer_package.py +57 -0
  42. hafnia/utils.py +81 -13
  43. hafnia/visualizations/image_visualizations.py +4 -4
  44. {hafnia-0.2.4.dist-info → hafnia-0.4.0.dist-info}/METADATA +40 -34
  45. hafnia-0.4.0.dist-info/RECORD +56 -0
  46. cli/recipe_cmds.py +0 -45
  47. hafnia-0.2.4.dist-info/RECORD +0 -49
  48. {hafnia-0.2.4.dist-info → hafnia-0.4.0.dist-info}/WHEEL +0 -0
  49. {hafnia-0.2.4.dist-info → hafnia-0.4.0.dist-info}/entry_points.txt +0 -0
  50. {hafnia-0.2.4.dist-info → hafnia-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,73 +1,68 @@
1
- from pathlib import Path
2
- from typing import Optional
1
+ from typing import Dict, List
3
2
 
4
- from hafnia.http import fetch, post
5
- from hafnia.log import user_logger
6
- from hafnia.utils import archive_dir, get_recipe_path, timed
3
+ from hafnia import http
4
+ from hafnia.utils import pretty_print_list_as_table, timed
7
5
 
8
6
 
9
- @timed("Fetching dataset info.")
10
- def get_dataset_id(dataset_name: str, endpoint: str, api_key: str) -> str:
7
+ @timed("Creating experiment.")
8
+ def create_experiment(
9
+ experiment_name: str,
10
+ dataset_recipe_id: str,
11
+ trainer_id: str,
12
+ exec_cmd: str,
13
+ environment_id: str,
14
+ endpoint: str,
15
+ api_key: str,
16
+ ) -> Dict:
11
17
  headers = {"Authorization": api_key}
12
- full_url = f"{endpoint}?name__iexact={dataset_name}"
13
- dataset_info = fetch(full_url, headers=headers)
14
- if not dataset_info:
15
- raise ValueError(f"Dataset '{dataset_name}' was not found in the dataset library.")
16
- try:
17
- return dataset_info[0]["id"]
18
- except (IndexError, KeyError) as e:
19
- raise ValueError("Dataset information is missing or invalid") from e
18
+ response = http.post(
19
+ endpoint,
20
+ headers=headers,
21
+ data={
22
+ "name": experiment_name,
23
+ "trainer": trainer_id,
24
+ "dataset_recipe": dataset_recipe_id,
25
+ "command": exec_cmd,
26
+ "environment": environment_id,
27
+ },
28
+ )
29
+ return response
20
30
 
21
31
 
22
- @timed("Uploading recipe.")
23
- def create_recipe(source_dir: Path, endpoint: str, api_key: str) -> str:
24
- source_dir = source_dir.resolve() # Ensure the path is absolute to handle '.' paths are given an appropriate name.
25
- path_recipe = get_recipe_path(recipe_name=source_dir.name)
26
- zip_path = archive_dir(source_dir, output_path=path_recipe)
27
- user_logger.info(f"Recipe created and stored in '{path_recipe}'")
32
+ @timed("Fetching environment info.")
33
+ def get_environments(endpoint: str, api_key: str) -> List[Dict]:
34
+ headers = {"Authorization": api_key}
35
+ envs: List[Dict] = http.fetch(endpoint, headers=headers) # type: ignore[assignment]
36
+ return envs
28
37
 
29
- headers = {"Authorization": api_key, "accept": "application/json"}
30
- data = {
31
- "name": path_recipe.name,
32
- "description": "Recipe created by Hafnia CLI",
33
- "file": (zip_path.name, Path(zip_path).read_bytes()),
38
+
39
+ def pretty_print_training_environments(envs: List[Dict]) -> None:
40
+ ENV_FIELDS = {
41
+ "Name": "name",
42
+ "Instance": "instance",
43
+ "GPU": "gpu",
44
+ "GPU Count": "gpu_count",
45
+ "GPU RAM": "vram",
46
+ "CPU": "cpu",
47
+ "CPU Count": "cpu_count",
48
+ "RAM": "ram",
34
49
  }
35
- response = post(endpoint, headers=headers, data=data, multipart=True)
36
- return response["id"]
50
+ pretty_print_list_as_table(
51
+ table_title="Available Training Environments",
52
+ dict_items=envs,
53
+ column_name_to_key_mapping=ENV_FIELDS,
54
+ )
37
55
 
38
56
 
39
- @timed("Fetching environment info.")
40
57
  def get_exp_environment_id(name: str, endpoint: str, api_key: str) -> str:
41
- headers = {"Authorization": api_key}
42
- env_info = fetch(endpoint, headers=headers)
58
+ envs = get_environments(endpoint=endpoint, api_key=api_key)
43
59
 
44
- for env in env_info:
60
+ for env in envs:
45
61
  if env["name"] == name:
46
62
  return env["id"]
47
63
 
48
- raise ValueError(f"Environment '{name}' not found")
64
+ pretty_print_training_environments(envs)
49
65
 
66
+ available_envs = [env["name"] for env in envs]
50
67
 
51
- @timed("Creating exepriment.")
52
- def create_experiment(
53
- exp_name: str,
54
- dataset_id: str,
55
- recipe_id: str,
56
- exec_cmd: str,
57
- environment_id: str,
58
- endpoint: str,
59
- api_key: str,
60
- ) -> Optional[str]:
61
- headers = {"Authorization": api_key}
62
- response = post(
63
- endpoint,
64
- headers=headers,
65
- data={
66
- "name": exp_name,
67
- "recipe": recipe_id,
68
- "dataset": dataset_id,
69
- "command": exec_cmd,
70
- "environment": environment_id,
71
- },
72
- )
73
- return response["id"]
68
+ raise ValueError(f"Environment '{name}' not found. Available environments are: {available_envs}")
@@ -0,0 +1,57 @@
1
+ from pathlib import Path
2
+ from typing import Dict, List, Optional
3
+
4
+ from hafnia import http
5
+ from hafnia.log import user_logger
6
+ from hafnia.utils import archive_dir, get_trainer_package_path, pretty_print_list_as_table, timed
7
+
8
+
9
+ @timed("Uploading trainer package.")
10
+ def create_trainer_package(source_dir: Path, endpoint: str, api_key: str) -> str:
11
+ source_dir = source_dir.resolve() # Ensure the path is absolute to handle '.' paths are given an appropriate name.
12
+ path_trainer = get_trainer_package_path(trainer_name=source_dir.name)
13
+ zip_path = archive_dir(source_dir, output_path=path_trainer)
14
+ user_logger.info(f"Trainer package created and stored in '{path_trainer}'")
15
+
16
+ headers = {"Authorization": api_key, "accept": "application/json"}
17
+ data = {
18
+ "name": path_trainer.name,
19
+ "description": "Trainer package created by Hafnia CLI",
20
+ "file": (zip_path.name, Path(zip_path).read_bytes()),
21
+ }
22
+ response = http.post(endpoint, headers=headers, data=data, multipart=True)
23
+ return response["id"]
24
+
25
+
26
+ @timed("Get trainer package.")
27
+ def get_trainer_package_by_id(id: str, endpoint: str, api_key: str) -> Dict:
28
+ full_url = f"{endpoint}/{id}"
29
+ headers = {"Authorization": api_key}
30
+ response: Dict = http.fetch(full_url, headers=headers) # type: ignore[assignment]
31
+ return response
32
+
33
+
34
+ @timed("Get trainer packages")
35
+ def get_trainer_packages(endpoint: str, api_key: str) -> List[Dict]:
36
+ headers = {"Authorization": api_key}
37
+ trainers: List[Dict] = http.fetch(endpoint, headers=headers) # type: ignore[assignment]
38
+ return trainers
39
+
40
+
41
+ def pretty_print_trainer_packages(trainers: List[Dict[str, str]], limit: Optional[int]) -> None:
42
+ # Sort trainer packages to have the most recent first
43
+ trainers = sorted(trainers, key=lambda x: x["created_at"], reverse=True)
44
+ if limit is not None:
45
+ trainers = trainers[:limit]
46
+
47
+ mapping = {
48
+ "ID": "id",
49
+ "Name": "name",
50
+ "Description": "description",
51
+ "Created At": "created_at",
52
+ }
53
+ pretty_print_list_as_table(
54
+ table_title="Available Trainer Packages (most recent first)",
55
+ dict_items=trainers,
56
+ column_name_to_key_mapping=mapping,
57
+ )
hafnia/utils.py CHANGED
@@ -5,10 +5,12 @@ import zipfile
5
5
  from datetime import datetime
6
6
  from functools import wraps
7
7
  from pathlib import Path
8
- from typing import Any, Callable, Iterator, Optional
8
+ from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional
9
9
  from zipfile import ZipFile
10
10
 
11
+ import more_itertools
11
12
  import pathspec
13
+ import rich
12
14
  import seedir
13
15
  from rich import print as rprint
14
16
 
@@ -16,19 +18,21 @@ from hafnia.log import sys_logger, user_logger
16
18
 
17
19
  PATH_DATA = Path("./.data")
18
20
  PATH_DATASETS = PATH_DATA / "datasets"
19
- PATH_RECIPES = PATH_DATA / "recipes"
21
+ PATH_DATASET_RECIPES = PATH_DATA / "dataset_recipes"
22
+ PATH_TRAINER_PACKAGES = PATH_DATA / "trainers"
20
23
  FILENAME_HAFNIAIGNORE = ".hafniaignore"
21
24
  DEFAULT_IGNORE_SPECIFICATION = [
22
25
  "*.jpg",
23
26
  "*.png",
24
27
  "*.py[cod]",
25
28
  "*_cache/",
29
+ "**.egg-info/",
26
30
  ".data",
27
31
  ".git",
28
32
  ".venv",
29
33
  ".vscode",
30
34
  "__pycache__",
31
- "recipe.zip",
35
+ "trainer.zip",
32
36
  "tests",
33
37
  "wandb",
34
38
  ]
@@ -49,6 +53,7 @@ def timed(label: str):
49
53
  return func(*args, **kwargs)
50
54
  except Exception as e:
51
55
  sys_logger.error(f"{operation_label} failed: {e}")
56
+ raise # Re-raise the exception after logging
52
57
  finally:
53
58
  elapsed = time.perf_counter() - tik
54
59
  sys_logger.debug(f"{operation_label} took {elapsed:.2f} seconds.")
@@ -58,19 +63,31 @@ def timed(label: str):
58
63
  return decorator
59
64
 
60
65
 
66
+ def get_path_hafnia_cache() -> Path:
67
+ return Path.home() / "hafnia"
68
+
69
+
70
+ def get_path_torchvision_downloads() -> Path:
71
+ return get_path_hafnia_cache() / "torchvision_downloads"
72
+
73
+
74
+ def get_path_hafnia_conversions() -> Path:
75
+ return get_path_hafnia_cache() / "hafnia_conversions"
76
+
77
+
61
78
  def now_as_str() -> str:
62
79
  """Get the current date and time as a string."""
63
80
  return datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
64
81
 
65
82
 
66
- def get_recipe_path(recipe_name: str) -> Path:
83
+ def get_trainer_package_path(trainer_name: str) -> Path:
67
84
  now = now_as_str()
68
- path_recipe = PATH_RECIPES / f"{recipe_name}_{now}.zip"
69
- return path_recipe
85
+ path_trainer = PATH_TRAINER_PACKAGES / f"{trainer_name}_{now}.zip"
86
+ return path_trainer
70
87
 
71
88
 
72
- def filter_recipe_files(recipe_path: Path, path_ignore_file: Optional[Path] = None) -> Iterator:
73
- path_ignore_file = path_ignore_file or recipe_path / FILENAME_HAFNIAIGNORE
89
+ def filter_trainer_package_files(trainer_path: Path, path_ignore_file: Optional[Path] = None) -> Iterator:
90
+ path_ignore_file = path_ignore_file or trainer_path / FILENAME_HAFNIAIGNORE
74
91
  if not path_ignore_file.exists():
75
92
  ignore_specification_lines = DEFAULT_IGNORE_SPECIFICATION
76
93
  user_logger.info(
@@ -81,7 +98,7 @@ def filter_recipe_files(recipe_path: Path, path_ignore_file: Optional[Path] = No
81
98
  else:
82
99
  ignore_specification_lines = Path(path_ignore_file).read_text().splitlines()
83
100
  ignore_specification = pathspec.GitIgnoreSpec.from_lines(ignore_specification_lines)
84
- include_files = ignore_specification.match_tree(recipe_path, negate=True)
101
+ include_files = ignore_specification.match_tree(trainer_path, negate=True)
85
102
  return include_files
86
103
 
87
104
 
@@ -91,17 +108,17 @@ def archive_dir(
91
108
  output_path: Optional[Path] = None,
92
109
  path_ignore_file: Optional[Path] = None,
93
110
  ) -> Path:
94
- recipe_zip_path = output_path or recipe_path / "recipe.zip"
111
+ recipe_zip_path = output_path or recipe_path / "trainer.zip"
95
112
  assert recipe_zip_path.suffix == ".zip", "Output path must be a zip file"
96
113
  recipe_zip_path.parent.mkdir(parents=True, exist_ok=True)
97
114
 
98
115
  user_logger.info(f" Creating zip archive of '{recipe_path}'")
99
- include_files = filter_recipe_files(recipe_path, path_ignore_file)
116
+ include_files = filter_trainer_package_files(recipe_path, path_ignore_file)
100
117
  with ZipFile(recipe_zip_path, "w", compression=zipfile.ZIP_STORED, allowZip64=True) as zip_ref:
101
118
  for str_filepath in include_files:
102
119
  full_path = recipe_path / str_filepath
103
120
  zip_ref.write(full_path, str_filepath)
104
- show_recipe_content(recipe_zip_path)
121
+ show_trainer_package_content(recipe_zip_path)
105
122
 
106
123
  return recipe_zip_path
107
124
 
@@ -115,7 +132,7 @@ def size_human_readable(size_bytes: int, suffix="B") -> str:
115
132
  return f"{size_value:.1f}Yi{suffix}"
116
133
 
117
134
 
118
- def show_recipe_content(recipe_path: Path, style: str = "emoji", depth_limit: int = 3) -> None:
135
+ def show_trainer_package_content(recipe_path: Path, style: str = "emoji", depth_limit: int = 3) -> None:
119
136
  def scan(parent: seedir.FakeDir, path: zipfile.Path, depth: int = 0) -> None:
120
137
  if depth >= depth_limit:
121
138
  return
@@ -133,6 +150,16 @@ def show_recipe_content(recipe_path: Path, style: str = "emoji", depth_limit: in
133
150
  user_logger.info(f"Recipe size: {size_human_readable(os.path.getsize(recipe_path))}. Max size 800 MiB")
134
151
 
135
152
 
153
+ def get_dataset_path_in_hafnia_cloud() -> Path:
154
+ if not is_hafnia_cloud_job():
155
+ user_logger.error(
156
+ f"The function '{get_dataset_path_in_hafnia_cloud.__name__}' should only be called, when "
157
+ "running in HAFNIA cloud environment (HAFNIA_CLOUD-environment variable have been defined)"
158
+ )
159
+
160
+ return Path(os.getenv("MDI_DATASET_DIR", "/opt/ml/input/data/training"))
161
+
162
+
136
163
  def is_hafnia_cloud_job() -> bool:
137
164
  """Check if the current job is running in HAFNIA cloud environment."""
138
165
  return os.getenv("HAFNIA_CLOUD", "false").lower() == "true"
@@ -154,3 +181,44 @@ def snake_to_pascal_case(name: str) -> str:
154
181
 
155
182
  def hash_from_string(s: str) -> str:
156
183
  return hashlib.md5(s.encode("utf-8")).hexdigest()
184
+
185
+
186
+ def pretty_print_list_as_table(
187
+ table_title: str,
188
+ dict_items: List[Dict],
189
+ column_name_to_key_mapping: Dict,
190
+ ) -> None:
191
+ """
192
+ Pretty print a list of dictionary elements as a table.
193
+ """
194
+
195
+ table = rich.table.Table(title=table_title)
196
+ for i_dict, dictionary in enumerate(dict_items):
197
+ if i_dict == 0:
198
+ for column_name, _ in column_name_to_key_mapping.items():
199
+ table.add_column(column_name, justify="left", style="cyan", no_wrap=True)
200
+ row = [str(dictionary.get(field, "")) for field in column_name_to_key_mapping.values()]
201
+ table.add_row(*row)
202
+
203
+ rich.print(table)
204
+
205
+
206
+ def is_hafnia_configured() -> bool:
207
+ """
208
+ Check if Hafnia is configured by verifying if the API key is set.
209
+ """
210
+ from cli.config import Config
211
+
212
+ return Config().is_configured()
213
+
214
+
215
+ def remove_duplicates_preserve_order(seq: Iterable) -> List:
216
+ """
217
+ Remove duplicates from a list while preserving the order of elements.
218
+ """
219
+ return list(more_itertools.unique_everseen(seq))
220
+
221
+
222
+ def is_image_file(file_path: Path) -> bool:
223
+ image_extensions = (".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif")
224
+ return file_path.suffix.lower() in image_extensions
@@ -102,10 +102,10 @@ def resize_width_by_padding(img0: np.ndarray, new_width: int) -> np.ndarray:
102
102
  return img0_padded
103
103
 
104
104
 
105
- def append_text_below_frame(frame: np.ndarray, text: str) -> np.ndarray:
106
- font_size_px = int(frame.shape[0] * 0.1) # 10% of the frame height
105
+ def append_text_below_frame(frame: np.ndarray, text: str, text_size_ratio: float = 0.1) -> np.ndarray:
106
+ font_size_px = int(frame.shape[0] * text_size_ratio) # 10% of the frame height
107
107
  font_size_px = max(font_size_px, 7) # Ensure a minimum font size
108
- font_size_px = min(font_size_px, 50) # Ensure a maximum font size
108
+ font_size_px = min(font_size_px, 25) # Ensure a maximum font size
109
109
 
110
110
  text_region = create_text_img(text, font_size_px=font_size_px)
111
111
  frame_with_text = concatenate_below_resize_by_padding(frame, text_region)
@@ -193,7 +193,7 @@ def save_dataset_sample_set_visualizations(
193
193
  image = draw_annotations(image, annotations, draw_settings=draw_settings)
194
194
 
195
195
  pil_image = Image.fromarray(image)
196
- path_image = path_output_folder / Path(sample.file_name).name
196
+ path_image = path_output_folder / Path(sample.file_path).name
197
197
  pil_image.save(path_image)
198
198
  paths.append(path_image)
199
199
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hafnia
3
- Version: 0.2.4
3
+ Version: 0.4.0
4
4
  Summary: Python SDK for communication with Hafnia platform.
5
5
  Author-email: Milestone Systems <hafniaplatform@milestone.dk>
6
6
  License-File: LICENSE
@@ -9,6 +9,9 @@ Requires-Dist: boto3>=1.35.91
9
9
  Requires-Dist: click>=8.1.8
10
10
  Requires-Dist: emoji>=2.14.1
11
11
  Requires-Dist: flatten-dict>=0.4.2
12
+ Requires-Dist: keyring>=25.6.0
13
+ Requires-Dist: mcp==1.16.0
14
+ Requires-Dist: mlflow>=3.4.0
12
15
  Requires-Dist: more-itertools>=10.7.0
13
16
  Requires-Dist: opencv-python-headless>=4.11.0.86
14
17
  Requires-Dist: pathspec>=0.12.1
@@ -19,20 +22,20 @@ Requires-Dist: pycocotools>=2.0.10
19
22
  Requires-Dist: pydantic>=2.10.4
20
23
  Requires-Dist: rich>=13.9.4
21
24
  Requires-Dist: s5cmd>=0.2.0
25
+ Requires-Dist: sagemaker-mlflow>=0.1.0
22
26
  Requires-Dist: seedir>=0.5.0
23
- Requires-Dist: tqdm>=4.67.1
24
27
  Requires-Dist: xxhash>=3.5.0
25
28
  Description-Content-Type: text/markdown
26
29
 
27
30
  # Hafnia
28
31
 
29
- The `hafnia` python package is a collection of tools to create and run model training recipes on
32
+ The `hafnia` python sdk and cli is a collection of tools to create and run model trainer packages on
30
33
  the [Hafnia Platform](https://hafnia.milestonesys.com/).
31
34
 
32
35
  The package includes the following interfaces:
33
36
 
34
37
  - `cli`: A Command Line Interface (CLI) to 1) configure/connect to Hafnia's [Training-aaS](https://hafnia.readme.io/docs/training-as-a-service) and 2) create and
35
- launch recipe scripts.
38
+ launch trainer packages.
36
39
  - `hafnia`: A python package including `HafniaDataset` to manage datasets and `HafniaLogger` to do
37
40
  experiment tracking.
38
41
 
@@ -42,19 +45,19 @@ experiment tracking.
42
45
  and *hidden* datasets. Hidden datasets refers to datasets that can be used for
43
46
  training, but are not available for download or direct access.
44
47
 
45
- This is a key feature of the Hafnia platform, as a hidden dataset ensures data
48
+ This is a key for the Hafnia platform, as a hidden dataset ensures data
46
49
  privacy, and allow models to be trained compliantly and ethically by third parties (you).
47
50
 
48
51
  The `script2model` approach is a Training-aaS concept, where you package your custom training
49
- script as a *training recipe* and use the recipe to train models on the hidden datasets.
52
+ project or script as a *trainer package* and use the package to train models on the hidden datasets.
50
53
 
51
- To support local development of a training recipe, we have introduced a **sample dataset**
54
+ To support local development of a trainer package, we have introduced a **sample dataset**
52
55
  for each dataset available in the Hafnia [data library](https://hafnia.milestonesys.com/training-aas/datasets). The sample dataset is a small
53
- and anonymized subset of the full dataset and available for download.
56
+ and an anonymized subset of the full dataset and available for download.
54
57
 
55
58
  With the sample dataset, you can seamlessly switch between local development and Training-aaS.
56
- Locally, you can create, validate and debug your training recipe. The recipe is then
57
- launched with Training-aaS, where the recipe runs on the full dataset and can be scaled to run on
59
+ Locally, you can create, validate and debug your trainer package. The trainer package is then
60
+ launched with Training-aaS, where the package runs on the full dataset and can be scaled to run on
58
61
  multiple GPUs and instances if needed.
59
62
 
60
63
  ## Getting started: Configuration
@@ -78,6 +81,7 @@ Copy the key and save it for later use.
78
81
  Hafnia API Key: # Pass your HAFNIA API key
79
82
  Hafnia Platform URL [https://api.mdi.milestonesys.com]: # Press [Enter]
80
83
  ```
84
+
81
85
  1. Download `mnist` from terminal to verify that your configuration is working.
82
86
 
83
87
  ```bash
@@ -89,7 +93,7 @@ With Hafnia configured on your local machine, it is now possible to download
89
93
  and explore the dataset sample with a python script:
90
94
 
91
95
  ```python
92
- from hafnia.data import load_dataset, get_dataset_path
96
+ from hafnia.data import get_dataset_path
93
97
  from hafnia.dataset.hafnia_dataset import HafniaDataset
94
98
 
95
99
  # To download the sample dataset use:
@@ -122,19 +126,19 @@ midwest-vehicle-detection
122
126
  You can interact with data as you want, but we also provide `HafniaDataset`
123
127
  for loading/saving, managing and interacting with the dataset.
124
128
 
125
- We recommend to visit and potentially execute the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py)
126
- to see how to use the `HafniaDataset` class and its methods.
129
+ We recommend the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py)
130
+ for a short introduction on the `HafniaDataset`.
127
131
 
128
132
  Below is a short introduction to the `HafniaDataset` class.
129
133
 
130
134
  ```python
131
135
  from hafnia.dataset.hafnia_dataset import HafniaDataset, Sample
132
136
 
133
- # Load dataset
137
+ # Load dataset from path
134
138
  dataset = HafniaDataset.read_from_path(path_dataset)
135
139
 
136
- # Alternatively, you can use the 'load_dataset' function to download and load dataset in one go.
137
- # dataset = load_dataset("midwest-vehicle-detection")
140
+ # Or get dataset directly by name
141
+ dataset = HafniaDataset.from_name("midwest-vehicle-detection")
138
142
 
139
143
  # Print dataset information
140
144
  dataset.print_stats()
@@ -199,6 +203,8 @@ DatasetInfo(
199
203
  'duration_average': 120.0,
200
204
  ...
201
205
  }
206
+ "format_version": "0.0.2",
207
+ "updated_at": "2025-09-24T21:50:20.231263"
202
208
  )
203
209
  ```
204
210
 
@@ -238,7 +244,7 @@ Sample(
238
244
  height=1080,
239
245
  width=1920,
240
246
  split='train',
241
- is_sample=True,
247
+ tags=["sample"],
242
248
  collection_index=None,
243
249
  collection_id=None,
244
250
  remote_path='s3://mdi-production-midwest-vehicle-detection/sample/data/343403325f27e390.png',
@@ -302,10 +308,10 @@ Sample(
302
308
  )
303
309
  ```
304
310
 
305
- To learn more, view and potentially execute the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py).
311
+ To learn more, we recommend the `HafniaDataset` example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py).
306
312
 
307
313
  ### Dataset Locally vs. Training-aaS
308
- An important feature of `load_dataset` is that it will return the full dataset
314
+ An important feature of `HafniaDataset.from_name` is that it will return the full dataset
309
315
  when loaded with Training-aaS on the Hafnia platform.
310
316
 
311
317
  This enables seamlessly switching between running/validating a training script
@@ -316,7 +322,7 @@ Available datasets with corresponding sample datasets can be found in [data libr
316
322
 
317
323
 
318
324
  ## Getting started: Experiment Tracking with HafniaLogger
319
- The `HafniaLogger` is an important part of the recipe script and enables you to track, log and
325
+ The `HafniaLogger` is an important part of the trainer and enables you to track, log and
320
326
  reproduce your experiments.
321
327
 
322
328
  When integrated into your training script, the `HafniaLogger` is responsible for collecting:
@@ -356,7 +362,7 @@ logger.log_scalar("validation/loss", value=0.1, step=100)
356
362
  logger.log_metric("validation/accuracy", value=0.95, step=100)
357
363
  ```
358
364
 
359
- Similar to `load_dataset`, the tracker behaves differently when running locally or in the cloud.
365
+ The tracker behaves differently when running locally or in the cloud.
360
366
  Locally, experiment data is stored in a local folder `.data/experiments/{DATE_TIME}`.
361
367
 
362
368
  In the cloud, the experiment data will be available in the Hafnia platform under
@@ -380,7 +386,7 @@ and datasets available in the data library.
380
386
 
381
387
  ```python
382
388
  # Load Hugging Face dataset
383
- dataset_splits = load_dataset("midwest-vehicle-detection")
389
+ dataset_splits = HafniaDataset.from_name("midwest-vehicle-detection")
384
390
 
385
391
  # Define transforms
386
392
  train_transforms = v2.Compose(
@@ -422,25 +428,25 @@ train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=
422
428
 
423
429
 
424
430
  ## Example: Training-aaS
425
- By combining logging and dataset loading, we can now construct our model training recipe.
431
+ By combining logging and dataset loading, we can now construct our model trainer package.
426
432
 
427
- To demonstrate this, we have provided a recipe project that serves as a template for creating and structuring training recipes
428
- [recipe-classification](https://github.com/milestone-hafnia/recipe-classification)
433
+ To demonstrate this, we have provided a trainer package project that serves as a template for creating and structuring trainers. The example repo is called
434
+ [trainer-classification](https://github.com/milestone-hafnia/trainer-classification)
429
435
 
430
- The project also contains additional information on how to structure your training recipe, use the `HafniaLogger`, the `load_dataset` function and different approach for launching
431
- the training recipe on the Hafnia platform.
436
+ The project also contains additional information on how to structure your trainer package, use the `HafniaLogger`, loading a dataset and different approach for launching
437
+ the trainer on the Hafnia platform.
432
438
 
433
439
 
434
- ## Create, Build and Run `recipe.zip` locally
435
- In order to test recipe compatibility with Hafnia cloud use the following command to build and
440
+ ## Create, Build and Run `trainer.zip` locally
441
+ In order to test trainer package compatibility with Hafnia cloud use the following command to build and
436
442
  start the job locally.
437
443
 
438
444
  ```bash
439
- # Create 'recipe.zip' from source folder '.'
440
- hafnia recipe create .
441
-
442
- # Build the docker image locally from a 'recipe.zip' file
443
- hafnia runc build-local recipe.zip
445
+ # Create 'trainer.zip' in the root folder of your training trainer project '../trainer/classification'
446
+ hafnia trainer create-zip ../trainer-classification
447
+
448
+ # Build the docker image locally from a 'trainer.zip' file
449
+ hafnia runc build-local trainer.zip
444
450
 
445
451
  # Execute the docker image locally with a desired dataset
446
452
  hafnia runc launch-local --dataset mnist "python scripts/train.py"
@@ -0,0 +1,56 @@
1
+ cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ cli/__main__.py,sha256=1IOe1Ewd3QMeuzUMfoWv663_jQpd7_pTn_R1sDdEmbI,1747
3
+ cli/config.py,sha256=xCLdgqBqFN3EInzJ1AO5Nj1e35jOjcc_jaukaodrrMc,7681
4
+ cli/consts.py,sha256=uCpYX44NCu_Zvte0QwChunxOo-qqhcaJRSYDAIsoJ8A,972
5
+ cli/dataset_cmds.py,sha256=KOPYdAJ1SyzTMlEi_J-70vSGIJ5acHPGIPOCKVIdlQ4,1418
6
+ cli/dataset_recipe_cmds.py,sha256=O1Pd-VvFFsmZ-nE1Mh6sCC9x45ztCJEpPCZK93qz_HQ,2887
7
+ cli/experiment_cmds.py,sha256=vc7J9JJog4EvRdgkpoMvr0kceb0QF_Rk7mn6H2KNvFE,7963
8
+ cli/keychain.py,sha256=bNyjjULVQu7kV338wUC65UvbCwmSGOmEjKWPLIQjT0k,2555
9
+ cli/profile_cmds.py,sha256=3OwpomV6Wb21ZG43xrwhvoB5S4IN1IqmVCxs-MItAho,3463
10
+ cli/runc_cmds.py,sha256=qV-LOwbFlbegH8XSELOo4h_2ajFAzdB5LtuVKKoRq8Y,5009
11
+ cli/trainer_package_cmds.py,sha256=nL7yTtR41BKzo1DWZdBdpRXGPZZIbmBe0byHAi6_n2c,2312
12
+ hafnia/__init__.py,sha256=5_DWVYRRh3ZWUo5QSRpl3tKCDDoX7YNiB14o1aaBhfo,179
13
+ hafnia/http.py,sha256=bjXbV_3uKbBdudqMdYtnsMttUAsNRMsetYZ4F2xXlEs,3635
14
+ hafnia/log.py,sha256=sWF8tz78yBtwZ9ddzm19L1MBSBJ3L4G704IGeT1_OEU,784
15
+ hafnia/torch_helpers.py,sha256=ho65B0WIu_SjbaKPRL4wabDNrnVumWH8QSXVH4r7NAY,11605
16
+ hafnia/utils.py,sha256=ieNNL8IK3IqDsRf7NJnF_NU3eMLi8d3tSQzOgW5sVOw,7319
17
+ hafnia/data/__init__.py,sha256=o9QjiGbEcNa6r-qDmwwmxPXf-1UitNl5-WxFNcujqsg,111
18
+ hafnia/data/factory.py,sha256=kHkvOtBUbwaShZBGf1kZzocDJBn_1dHHLrQxnUpJmfY,778
19
+ hafnia/dataset/dataset_helpers.py,sha256=0GbS6PfaiYBulDKRCbd0miN5LHaUIp-XzGt_wZay8xs,5044
20
+ hafnia/dataset/dataset_names.py,sha256=ZFdxsc-R6yOusT0kyh0GppjZbrDycp2Pn4WbIR060Ss,2325
21
+ hafnia/dataset/dataset_upload_helper.py,sha256=v7EwsatnD7HF3MxbGU7q7HNgRISqguKcx7qYZ56Ok-0,29911
22
+ hafnia/dataset/hafnia_dataset.py,sha256=BvJ63luf06kleR-lkqfVgU5p0ZI_JxHBbBY0FdShYKI,41864
23
+ hafnia/dataset/license_types.py,sha256=xpanTfui1pGG76mGQ9r6EywcUe1scI_zullEpmCO4GI,2190
24
+ hafnia/dataset/dataset_recipe/dataset_recipe.py,sha256=mwNL7VMhbEFHC2p6L_OO7os7bVVb05YFKZxvzQ_BySk,19059
25
+ hafnia/dataset/dataset_recipe/recipe_transforms.py,sha256=j3Oiytt3LI2rCaJid7Y44oT9MXvlZVqvZanngMebIWg,3088
26
+ hafnia/dataset/dataset_recipe/recipe_types.py,sha256=AcrG6gpRt3Igl-CCJ60uyh-WkfI1NCnQ55M8yClSI9Q,5328
27
+ hafnia/dataset/format_conversions/image_classification_from_directory.py,sha256=PVjvwjxMjvCLXSBzg0W-XRDRsZP4XkazPFK6I-S6CiE,4272
28
+ hafnia/dataset/format_conversions/torchvision_datasets.py,sha256=akDB8JpeRGJgAVgZNWgg_a4mw3uJTbYNdcYy8eadz9A,11612
29
+ hafnia/dataset/operations/dataset_stats.py,sha256=AKi17FfcnoSQLZzs64EZlIElUQ16PO2cCICkVHWj1kI,11565
30
+ hafnia/dataset/operations/dataset_transformations.py,sha256=fHN5-0FPZZ_SeaJKcAHPsQPhqOlEQCX-9B54J8tiBwk,16868
31
+ hafnia/dataset/operations/table_transformations.py,sha256=BoUmm0TDz7Iuw7942nzYjVLHGQAVDZmzI3CoCV0chR8,9305
32
+ hafnia/dataset/primitives/__init__.py,sha256=xFLJ3R7gpbuQnNJuFhuu836L3nicwoaY5aHkqk7Bbr8,927
33
+ hafnia/dataset/primitives/bbox.py,sha256=WMXnU5ISTkqkHapOxQlovU1PD9Ap_2WaSFE6fSEXOGk,6409
34
+ hafnia/dataset/primitives/bitmask.py,sha256=zldW2SrJE-8I9qEpbUMTua_ARqg5OeUljJDGVbB4QJo,8045
35
+ hafnia/dataset/primitives/classification.py,sha256=cg_ndGy5Pt1rkqV1oMN7oUZ6Y2shk-vwCod7uDMS0e4,2637
36
+ hafnia/dataset/primitives/point.py,sha256=VzCNLTQOPA6wyJVVKddZHGhltkep6V_B7pg5pk7rd9Y,879
37
+ hafnia/dataset/primitives/polygon.py,sha256=Y3c8lc54qEXB9K1netVz3BzbjMDGVukyFfxNb-7LKto,4518
38
+ hafnia/dataset/primitives/primitive.py,sha256=7jxcyFADVGf95pjeQHEOqAnR9eucLpxA2h8Blz3ppXI,1253
39
+ hafnia/dataset/primitives/segmentation.py,sha256=3kSEcz56xz149Y7WpJh-rYzdO8Oe-n3JvRIOXRgosgw,1970
40
+ hafnia/dataset/primitives/utils.py,sha256=3gT1as-xXEj8CamoIuBb9gQwUN9Ae9qnqtqF_uEe0zo,1993
41
+ hafnia/experiment/__init__.py,sha256=OEFE6HqhO5zcTCLZcPcPVjIg7wMFFnvZ1uOtAVhRz7M,85
42
+ hafnia/experiment/hafnia_logger.py,sha256=Qia8orPu7PZk12geB97alJaq6AjxYedDmKri_tmyldo,10408
43
+ hafnia/platform/__init__.py,sha256=L_Q7CNpsJ0HMNPy_rLlLK5RhmuCU7IF4BchxKv6amYc,782
44
+ hafnia/platform/builder.py,sha256=kUEuj5-qtL1uk5v2tUvOCREn5yV-G4Fr6F31haIAb5E,5808
45
+ hafnia/platform/dataset_recipe.py,sha256=ybfSSHVPG0eFUbzg_1McezPSOtMoDZEg7l6rFYndtb4,3857
46
+ hafnia/platform/datasets.py,sha256=8liAntg1GCBqqS2l80_1jaWchjBhc2Y4aVHfEEo4gsU,9036
47
+ hafnia/platform/download.py,sha256=IpxQNUABHHXTY42ET0K7qyVGlZZddYJRVRImuLWE6Ic,7337
48
+ hafnia/platform/experiment.py,sha256=SrEH0nuwwBXf1Iu4diB1BEPqL-TxW3aQkZWBbM1-tY0,1846
49
+ hafnia/platform/trainer_package.py,sha256=w6JC7o-279ujcwtNTbUaQ9AnPcYRPPbD8EACa6XyUHA,2206
50
+ hafnia/visualizations/colors.py,sha256=003eAJVnBal4abaYIIpsrT7erIOIjTUHHYVJ1Tj1CDc,5226
51
+ hafnia/visualizations/image_visualizations.py,sha256=wDWtJ7_5ACb83QtyCCslHJGK7NRWxHpRPbdop8ELE4o,7355
52
+ hafnia-0.4.0.dist-info/METADATA,sha256=fyGToq0jzW7lOf21Cr2o0pyU4eVAimYbvfxzPjB4yj0,19236
53
+ hafnia-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
54
+ hafnia-0.4.0.dist-info/entry_points.txt,sha256=FCJVIQ8GP2VE9I3eeGVF5eLxVDNW_01pOJCpG_CGnMM,45
55
+ hafnia-0.4.0.dist-info/licenses/LICENSE,sha256=wLZw1B7_mod_CO1H8LXqQgfqlWD6QceJR8--LJYRZGE,1078
56
+ hafnia-0.4.0.dist-info/RECORD,,