hafnia 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. cli/__main__.py +13 -2
  2. cli/config.py +2 -1
  3. cli/consts.py +1 -1
  4. cli/dataset_cmds.py +6 -14
  5. cli/dataset_recipe_cmds.py +78 -0
  6. cli/experiment_cmds.py +226 -43
  7. cli/profile_cmds.py +6 -5
  8. cli/runc_cmds.py +5 -5
  9. cli/trainer_package_cmds.py +65 -0
  10. hafnia/__init__.py +2 -0
  11. hafnia/data/factory.py +1 -2
  12. hafnia/dataset/dataset_helpers.py +0 -12
  13. hafnia/dataset/dataset_names.py +8 -4
  14. hafnia/dataset/dataset_recipe/dataset_recipe.py +119 -33
  15. hafnia/dataset/dataset_recipe/recipe_transforms.py +32 -4
  16. hafnia/dataset/dataset_recipe/recipe_types.py +1 -1
  17. hafnia/dataset/dataset_upload_helper.py +206 -53
  18. hafnia/dataset/hafnia_dataset.py +432 -194
  19. hafnia/dataset/license_types.py +63 -0
  20. hafnia/dataset/operations/dataset_stats.py +260 -3
  21. hafnia/dataset/operations/dataset_transformations.py +325 -4
  22. hafnia/dataset/operations/table_transformations.py +39 -2
  23. hafnia/dataset/primitives/__init__.py +8 -0
  24. hafnia/dataset/primitives/classification.py +1 -1
  25. hafnia/experiment/hafnia_logger.py +112 -0
  26. hafnia/http.py +16 -2
  27. hafnia/platform/__init__.py +9 -3
  28. hafnia/platform/builder.py +12 -10
  29. hafnia/platform/dataset_recipe.py +99 -0
  30. hafnia/platform/datasets.py +44 -6
  31. hafnia/platform/download.py +2 -1
  32. hafnia/platform/experiment.py +51 -56
  33. hafnia/platform/trainer_package.py +57 -0
  34. hafnia/utils.py +64 -13
  35. hafnia/visualizations/image_visualizations.py +3 -3
  36. {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/METADATA +34 -30
  37. hafnia-0.3.0.dist-info/RECORD +53 -0
  38. cli/recipe_cmds.py +0 -45
  39. hafnia-0.2.4.dist-info/RECORD +0 -49
  40. {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/WHEEL +0 -0
  41. {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/entry_points.txt +0 -0
  42. {hafnia-0.2.4.dist-info → hafnia-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,73 +1,68 @@
1
- from pathlib import Path
2
- from typing import Optional
1
+ from typing import Dict, List
3
2
 
4
- from hafnia.http import fetch, post
5
- from hafnia.log import user_logger
6
- from hafnia.utils import archive_dir, get_recipe_path, timed
3
+ from hafnia import http
4
+ from hafnia.utils import pretty_print_list_as_table, timed
7
5
 
8
6
 
9
- @timed("Fetching dataset info.")
10
- def get_dataset_id(dataset_name: str, endpoint: str, api_key: str) -> str:
7
+ @timed("Creating experiment.")
8
+ def create_experiment(
9
+ experiment_name: str,
10
+ dataset_recipe_id: str,
11
+ trainer_id: str,
12
+ exec_cmd: str,
13
+ environment_id: str,
14
+ endpoint: str,
15
+ api_key: str,
16
+ ) -> Dict:
11
17
  headers = {"Authorization": api_key}
12
- full_url = f"{endpoint}?name__iexact={dataset_name}"
13
- dataset_info = fetch(full_url, headers=headers)
14
- if not dataset_info:
15
- raise ValueError(f"Dataset '{dataset_name}' was not found in the dataset library.")
16
- try:
17
- return dataset_info[0]["id"]
18
- except (IndexError, KeyError) as e:
19
- raise ValueError("Dataset information is missing or invalid") from e
18
+ response = http.post(
19
+ endpoint,
20
+ headers=headers,
21
+ data={
22
+ "name": experiment_name,
23
+ "trainer": trainer_id,
24
+ "dataset_recipe": dataset_recipe_id,
25
+ "command": exec_cmd,
26
+ "environment": environment_id,
27
+ },
28
+ )
29
+ return response
20
30
 
21
31
 
22
- @timed("Uploading recipe.")
23
- def create_recipe(source_dir: Path, endpoint: str, api_key: str) -> str:
24
- source_dir = source_dir.resolve() # Ensure the path is absolute to handle '.' paths are given an appropriate name.
25
- path_recipe = get_recipe_path(recipe_name=source_dir.name)
26
- zip_path = archive_dir(source_dir, output_path=path_recipe)
27
- user_logger.info(f"Recipe created and stored in '{path_recipe}'")
32
+ @timed("Fetching environment info.")
33
+ def get_environments(endpoint: str, api_key: str) -> List[Dict]:
34
+ headers = {"Authorization": api_key}
35
+ envs: List[Dict] = http.fetch(endpoint, headers=headers) # type: ignore[assignment]
36
+ return envs
28
37
 
29
- headers = {"Authorization": api_key, "accept": "application/json"}
30
- data = {
31
- "name": path_recipe.name,
32
- "description": "Recipe created by Hafnia CLI",
33
- "file": (zip_path.name, Path(zip_path).read_bytes()),
38
+
39
+ def pretty_print_training_environments(envs: List[Dict]) -> None:
40
+ ENV_FIELDS = {
41
+ "Name": "name",
42
+ "Instance": "instance",
43
+ "GPU": "gpu",
44
+ "GPU Count": "gpu_count",
45
+ "GPU RAM": "vram",
46
+ "CPU": "cpu",
47
+ "CPU Count": "cpu_count",
48
+ "RAM": "ram",
34
49
  }
35
- response = post(endpoint, headers=headers, data=data, multipart=True)
36
- return response["id"]
50
+ pretty_print_list_as_table(
51
+ table_title="Available Training Environments",
52
+ dict_items=envs,
53
+ column_name_to_key_mapping=ENV_FIELDS,
54
+ )
37
55
 
38
56
 
39
- @timed("Fetching environment info.")
40
57
  def get_exp_environment_id(name: str, endpoint: str, api_key: str) -> str:
41
- headers = {"Authorization": api_key}
42
- env_info = fetch(endpoint, headers=headers)
58
+ envs = get_environments(endpoint=endpoint, api_key=api_key)
43
59
 
44
- for env in env_info:
60
+ for env in envs:
45
61
  if env["name"] == name:
46
62
  return env["id"]
47
63
 
48
- raise ValueError(f"Environment '{name}' not found")
64
+ pretty_print_training_environments(envs)
49
65
 
66
+ available_envs = [env["name"] for env in envs]
50
67
 
51
- @timed("Creating exepriment.")
52
- def create_experiment(
53
- exp_name: str,
54
- dataset_id: str,
55
- recipe_id: str,
56
- exec_cmd: str,
57
- environment_id: str,
58
- endpoint: str,
59
- api_key: str,
60
- ) -> Optional[str]:
61
- headers = {"Authorization": api_key}
62
- response = post(
63
- endpoint,
64
- headers=headers,
65
- data={
66
- "name": exp_name,
67
- "recipe": recipe_id,
68
- "dataset": dataset_id,
69
- "command": exec_cmd,
70
- "environment": environment_id,
71
- },
72
- )
73
- return response["id"]
68
+ raise ValueError(f"Environment '{name}' not found. Available environments are: {available_envs}")
@@ -0,0 +1,57 @@
1
+ from pathlib import Path
2
+ from typing import Dict, List, Optional
3
+
4
+ from hafnia import http
5
+ from hafnia.log import user_logger
6
+ from hafnia.utils import archive_dir, get_trainer_package_path, pretty_print_list_as_table, timed
7
+
8
+
9
+ @timed("Uploading trainer package.")
10
+ def create_trainer_package(source_dir: Path, endpoint: str, api_key: str) -> str:
11
+ source_dir = source_dir.resolve() # Ensure the path is absolute to handle '.' paths are given an appropriate name.
12
+ path_trainer = get_trainer_package_path(trainer_name=source_dir.name)
13
+ zip_path = archive_dir(source_dir, output_path=path_trainer)
14
+ user_logger.info(f"Trainer package created and stored in '{path_trainer}'")
15
+
16
+ headers = {"Authorization": api_key, "accept": "application/json"}
17
+ data = {
18
+ "name": path_trainer.name,
19
+ "description": "Trainer package created by Hafnia CLI",
20
+ "file": (zip_path.name, Path(zip_path).read_bytes()),
21
+ }
22
+ response = http.post(endpoint, headers=headers, data=data, multipart=True)
23
+ return response["id"]
24
+
25
+
26
+ @timed("Get trainer package.")
27
+ def get_trainer_package_by_id(id: str, endpoint: str, api_key: str) -> Dict:
28
+ full_url = f"{endpoint}/{id}"
29
+ headers = {"Authorization": api_key}
30
+ response: Dict = http.fetch(full_url, headers=headers) # type: ignore[assignment]
31
+ return response
32
+
33
+
34
+ @timed("Get trainer packages")
35
+ def get_trainer_packages(endpoint: str, api_key: str) -> List[Dict]:
36
+ headers = {"Authorization": api_key}
37
+ trainers: List[Dict] = http.fetch(endpoint, headers=headers) # type: ignore[assignment]
38
+ return trainers
39
+
40
+
41
+ def pretty_print_trainer_packages(trainers: List[Dict[str, str]], limit: Optional[int]) -> None:
42
+ # Sort trainer packages to have the most recent first
43
+ trainers = sorted(trainers, key=lambda x: x["created_at"], reverse=True)
44
+ if limit is not None:
45
+ trainers = trainers[:limit]
46
+
47
+ mapping = {
48
+ "ID": "id",
49
+ "Name": "name",
50
+ "Description": "description",
51
+ "Created At": "created_at",
52
+ }
53
+ pretty_print_list_as_table(
54
+ table_title="Available Trainer Packages (most recent first)",
55
+ dict_items=trainers,
56
+ column_name_to_key_mapping=mapping,
57
+ )
hafnia/utils.py CHANGED
@@ -5,10 +5,12 @@ import zipfile
5
5
  from datetime import datetime
6
6
  from functools import wraps
7
7
  from pathlib import Path
8
- from typing import Any, Callable, Iterator, Optional
8
+ from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional
9
9
  from zipfile import ZipFile
10
10
 
11
+ import more_itertools
11
12
  import pathspec
13
+ import rich
12
14
  import seedir
13
15
  from rich import print as rprint
14
16
 
@@ -16,19 +18,21 @@ from hafnia.log import sys_logger, user_logger
16
18
 
17
19
  PATH_DATA = Path("./.data")
18
20
  PATH_DATASETS = PATH_DATA / "datasets"
19
- PATH_RECIPES = PATH_DATA / "recipes"
21
+ PATH_DATASET_RECIPES = PATH_DATA / "dataset_recipes"
22
+ PATH_TRAINER_PACKAGES = PATH_DATA / "trainers"
20
23
  FILENAME_HAFNIAIGNORE = ".hafniaignore"
21
24
  DEFAULT_IGNORE_SPECIFICATION = [
22
25
  "*.jpg",
23
26
  "*.png",
24
27
  "*.py[cod]",
25
28
  "*_cache/",
29
+ "**.egg-info/",
26
30
  ".data",
27
31
  ".git",
28
32
  ".venv",
29
33
  ".vscode",
30
34
  "__pycache__",
31
- "recipe.zip",
35
+ "trainer.zip",
32
36
  "tests",
33
37
  "wandb",
34
38
  ]
@@ -49,6 +53,7 @@ def timed(label: str):
49
53
  return func(*args, **kwargs)
50
54
  except Exception as e:
51
55
  sys_logger.error(f"{operation_label} failed: {e}")
56
+ raise # Re-raise the exception after logging
52
57
  finally:
53
58
  elapsed = time.perf_counter() - tik
54
59
  sys_logger.debug(f"{operation_label} took {elapsed:.2f} seconds.")
@@ -63,14 +68,14 @@ def now_as_str() -> str:
63
68
  return datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
64
69
 
65
70
 
66
- def get_recipe_path(recipe_name: str) -> Path:
71
+ def get_trainer_package_path(trainer_name: str) -> Path:
67
72
  now = now_as_str()
68
- path_recipe = PATH_RECIPES / f"{recipe_name}_{now}.zip"
69
- return path_recipe
73
+ path_trainer = PATH_TRAINER_PACKAGES / f"{trainer_name}_{now}.zip"
74
+ return path_trainer
70
75
 
71
76
 
72
- def filter_recipe_files(recipe_path: Path, path_ignore_file: Optional[Path] = None) -> Iterator:
73
- path_ignore_file = path_ignore_file or recipe_path / FILENAME_HAFNIAIGNORE
77
+ def filter_trainer_package_files(trainer_path: Path, path_ignore_file: Optional[Path] = None) -> Iterator:
78
+ path_ignore_file = path_ignore_file or trainer_path / FILENAME_HAFNIAIGNORE
74
79
  if not path_ignore_file.exists():
75
80
  ignore_specification_lines = DEFAULT_IGNORE_SPECIFICATION
76
81
  user_logger.info(
@@ -81,7 +86,7 @@ def filter_recipe_files(recipe_path: Path, path_ignore_file: Optional[Path] = No
81
86
  else:
82
87
  ignore_specification_lines = Path(path_ignore_file).read_text().splitlines()
83
88
  ignore_specification = pathspec.GitIgnoreSpec.from_lines(ignore_specification_lines)
84
- include_files = ignore_specification.match_tree(recipe_path, negate=True)
89
+ include_files = ignore_specification.match_tree(trainer_path, negate=True)
85
90
  return include_files
86
91
 
87
92
 
@@ -91,17 +96,17 @@ def archive_dir(
91
96
  output_path: Optional[Path] = None,
92
97
  path_ignore_file: Optional[Path] = None,
93
98
  ) -> Path:
94
- recipe_zip_path = output_path or recipe_path / "recipe.zip"
99
+ recipe_zip_path = output_path or recipe_path / "trainer.zip"
95
100
  assert recipe_zip_path.suffix == ".zip", "Output path must be a zip file"
96
101
  recipe_zip_path.parent.mkdir(parents=True, exist_ok=True)
97
102
 
98
103
  user_logger.info(f" Creating zip archive of '{recipe_path}'")
99
- include_files = filter_recipe_files(recipe_path, path_ignore_file)
104
+ include_files = filter_trainer_package_files(recipe_path, path_ignore_file)
100
105
  with ZipFile(recipe_zip_path, "w", compression=zipfile.ZIP_STORED, allowZip64=True) as zip_ref:
101
106
  for str_filepath in include_files:
102
107
  full_path = recipe_path / str_filepath
103
108
  zip_ref.write(full_path, str_filepath)
104
- show_recipe_content(recipe_zip_path)
109
+ show_trainer_package_content(recipe_zip_path)
105
110
 
106
111
  return recipe_zip_path
107
112
 
@@ -115,7 +120,7 @@ def size_human_readable(size_bytes: int, suffix="B") -> str:
115
120
  return f"{size_value:.1f}Yi{suffix}"
116
121
 
117
122
 
118
- def show_recipe_content(recipe_path: Path, style: str = "emoji", depth_limit: int = 3) -> None:
123
+ def show_trainer_package_content(recipe_path: Path, style: str = "emoji", depth_limit: int = 3) -> None:
119
124
  def scan(parent: seedir.FakeDir, path: zipfile.Path, depth: int = 0) -> None:
120
125
  if depth >= depth_limit:
121
126
  return
@@ -133,6 +138,16 @@ def show_recipe_content(recipe_path: Path, style: str = "emoji", depth_limit: in
133
138
  user_logger.info(f"Recipe size: {size_human_readable(os.path.getsize(recipe_path))}. Max size 800 MiB")
134
139
 
135
140
 
141
+ def get_dataset_path_in_hafnia_cloud() -> Path:
142
+ if not is_hafnia_cloud_job():
143
+ user_logger.error(
144
+ f"The function '{get_dataset_path_in_hafnia_cloud.__name__}' should only be called, when "
145
+ "running in HAFNIA cloud environment (HAFNIA_CLOUD-environment variable have been defined)"
146
+ )
147
+
148
+ return Path(os.getenv("MDI_DATASET_DIR", "/opt/ml/input/data/training"))
149
+
150
+
136
151
  def is_hafnia_cloud_job() -> bool:
137
152
  """Check if the current job is running in HAFNIA cloud environment."""
138
153
  return os.getenv("HAFNIA_CLOUD", "false").lower() == "true"
@@ -154,3 +169,39 @@ def snake_to_pascal_case(name: str) -> str:
154
169
 
155
170
  def hash_from_string(s: str) -> str:
156
171
  return hashlib.md5(s.encode("utf-8")).hexdigest()
172
+
173
+
174
+ def pretty_print_list_as_table(
175
+ table_title: str,
176
+ dict_items: List[Dict],
177
+ column_name_to_key_mapping: Dict,
178
+ ) -> None:
179
+ """
180
+ Pretty print a list of dictionary elements as a table.
181
+ """
182
+
183
+ table = rich.table.Table(title=table_title)
184
+ for i_dict, dictionary in enumerate(dict_items):
185
+ if i_dict == 0:
186
+ for column_name, _ in column_name_to_key_mapping.items():
187
+ table.add_column(column_name, justify="left", style="cyan", no_wrap=True)
188
+ row = [str(dictionary.get(field, "")) for field in column_name_to_key_mapping.values()]
189
+ table.add_row(*row)
190
+
191
+ rich.print(table)
192
+
193
+
194
+ def is_hafnia_configured() -> bool:
195
+ """
196
+ Check if Hafnia is configured by verifying if the API key is set.
197
+ """
198
+ from cli.config import Config
199
+
200
+ return Config().is_configured()
201
+
202
+
203
+ def remove_duplicates_preserve_order(seq: Iterable) -> List:
204
+ """
205
+ Remove duplicates from a list while preserving the order of elements.
206
+ """
207
+ return list(more_itertools.unique_everseen(seq))
@@ -102,10 +102,10 @@ def resize_width_by_padding(img0: np.ndarray, new_width: int) -> np.ndarray:
102
102
  return img0_padded
103
103
 
104
104
 
105
- def append_text_below_frame(frame: np.ndarray, text: str) -> np.ndarray:
106
- font_size_px = int(frame.shape[0] * 0.1) # 10% of the frame height
105
+ def append_text_below_frame(frame: np.ndarray, text: str, text_size_ratio: float = 0.1) -> np.ndarray:
106
+ font_size_px = int(frame.shape[0] * text_size_ratio) # 10% of the frame height
107
107
  font_size_px = max(font_size_px, 7) # Ensure a minimum font size
108
- font_size_px = min(font_size_px, 50) # Ensure a maximum font size
108
+ font_size_px = min(font_size_px, 25) # Ensure a maximum font size
109
109
 
110
110
  text_region = create_text_img(text, font_size_px=font_size_px)
111
111
  frame_with_text = concatenate_below_resize_by_padding(frame, text_region)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hafnia
3
- Version: 0.2.4
3
+ Version: 0.3.0
4
4
  Summary: Python SDK for communication with Hafnia platform.
5
5
  Author-email: Milestone Systems <hafniaplatform@milestone.dk>
6
6
  License-File: LICENSE
@@ -9,6 +9,7 @@ Requires-Dist: boto3>=1.35.91
9
9
  Requires-Dist: click>=8.1.8
10
10
  Requires-Dist: emoji>=2.14.1
11
11
  Requires-Dist: flatten-dict>=0.4.2
12
+ Requires-Dist: mlflow>=3.2.0
12
13
  Requires-Dist: more-itertools>=10.7.0
13
14
  Requires-Dist: opencv-python-headless>=4.11.0.86
14
15
  Requires-Dist: pathspec>=0.12.1
@@ -19,6 +20,7 @@ Requires-Dist: pycocotools>=2.0.10
19
20
  Requires-Dist: pydantic>=2.10.4
20
21
  Requires-Dist: rich>=13.9.4
21
22
  Requires-Dist: s5cmd>=0.2.0
23
+ Requires-Dist: sagemaker-mlflow>=0.1.0
22
24
  Requires-Dist: seedir>=0.5.0
23
25
  Requires-Dist: tqdm>=4.67.1
24
26
  Requires-Dist: xxhash>=3.5.0
@@ -26,13 +28,13 @@ Description-Content-Type: text/markdown
26
28
 
27
29
  # Hafnia
28
30
 
29
- The `hafnia` python package is a collection of tools to create and run model training recipes on
31
+ The `hafnia` python sdk and cli is a collection of tools to create and run model trainer packages on
30
32
  the [Hafnia Platform](https://hafnia.milestonesys.com/).
31
33
 
32
34
  The package includes the following interfaces:
33
35
 
34
36
  - `cli`: A Command Line Interface (CLI) to 1) configure/connect to Hafnia's [Training-aaS](https://hafnia.readme.io/docs/training-as-a-service) and 2) create and
35
- launch recipe scripts.
37
+ launch trainer packages.
36
38
  - `hafnia`: A python package including `HafniaDataset` to manage datasets and `HafniaLogger` to do
37
39
  experiment tracking.
38
40
 
@@ -42,19 +44,19 @@ experiment tracking.
42
44
  and *hidden* datasets. Hidden datasets refers to datasets that can be used for
43
45
  training, but are not available for download or direct access.
44
46
 
45
- This is a key feature of the Hafnia platform, as a hidden dataset ensures data
47
+ This is a key for the Hafnia platform, as a hidden dataset ensures data
46
48
  privacy, and allow models to be trained compliantly and ethically by third parties (you).
47
49
 
48
50
  The `script2model` approach is a Training-aaS concept, where you package your custom training
49
- script as a *training recipe* and use the recipe to train models on the hidden datasets.
51
+ project or script as a *trainer package* and use the package to train models on the hidden datasets.
50
52
 
51
- To support local development of a training recipe, we have introduced a **sample dataset**
53
+ To support local development of a trainer package, we have introduced a **sample dataset**
52
54
  for each dataset available in the Hafnia [data library](https://hafnia.milestonesys.com/training-aas/datasets). The sample dataset is a small
53
- and anonymized subset of the full dataset and available for download.
55
+ and an anonymized subset of the full dataset and available for download.
54
56
 
55
57
  With the sample dataset, you can seamlessly switch between local development and Training-aaS.
56
- Locally, you can create, validate and debug your training recipe. The recipe is then
57
- launched with Training-aaS, where the recipe runs on the full dataset and can be scaled to run on
58
+ Locally, you can create, validate and debug your trainer package. The trainer package is then
59
+ launched with Training-aaS, where the package runs on the full dataset and can be scaled to run on
58
60
  multiple GPUs and instances if needed.
59
61
 
60
62
  ## Getting started: Configuration
@@ -122,19 +124,19 @@ midwest-vehicle-detection
122
124
  You can interact with data as you want, but we also provide `HafniaDataset`
123
125
  for loading/saving, managing and interacting with the dataset.
124
126
 
125
- We recommend to visit and potentially execute the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py)
126
- to see how to use the `HafniaDataset` class and its methods.
127
+ We recommend the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py)
128
+ for a short introduction on the `HafniaDataset`.
127
129
 
128
130
  Below is a short introduction to the `HafniaDataset` class.
129
131
 
130
132
  ```python
131
133
  from hafnia.dataset.hafnia_dataset import HafniaDataset, Sample
132
134
 
133
- # Load dataset
135
+ # Load dataset from path
134
136
  dataset = HafniaDataset.read_from_path(path_dataset)
135
137
 
136
- # Alternatively, you can use the 'load_dataset' function to download and load dataset in one go.
137
- # dataset = load_dataset("midwest-vehicle-detection")
138
+ # Or get dataset directly by name
139
+ dataset = HafniaDataset.from_name("midwest-vehicle-detection")
138
140
 
139
141
  # Print dataset information
140
142
  dataset.print_stats()
@@ -199,6 +201,8 @@ DatasetInfo(
199
201
  'duration_average': 120.0,
200
202
  ...
201
203
  }
204
+ "format_version": "0.0.2",
205
+ "updated_at": "2025-09-24T21:50:20.231263"
202
206
  )
203
207
  ```
204
208
 
@@ -238,7 +242,7 @@ Sample(
238
242
  height=1080,
239
243
  width=1920,
240
244
  split='train',
241
- is_sample=True,
245
+ tags=["sample"],
242
246
  collection_index=None,
243
247
  collection_id=None,
244
248
  remote_path='s3://mdi-production-midwest-vehicle-detection/sample/data/343403325f27e390.png',
@@ -302,10 +306,10 @@ Sample(
302
306
  )
303
307
  ```
304
308
 
305
- To learn more, view and potentially execute the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py).
309
+ To learn more, we recommend the `HafniaDataset` example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py).
306
310
 
307
311
  ### Dataset Locally vs. Training-aaS
308
- An important feature of `load_dataset` is that it will return the full dataset
312
+ An important feature of `HafniaDataset.from_name` is that it will return the full dataset
309
313
  when loaded with Training-aaS on the Hafnia platform.
310
314
 
311
315
  This enables seamlessly switching between running/validating a training script
@@ -316,7 +320,7 @@ Available datasets with corresponding sample datasets can be found in [data libr
316
320
 
317
321
 
318
322
  ## Getting started: Experiment Tracking with HafniaLogger
319
- The `HafniaLogger` is an important part of the recipe script and enables you to track, log and
323
+ The `HafniaLogger` is an important part of the trainer and enables you to track, log and
320
324
  reproduce your experiments.
321
325
 
322
326
  When integrated into your training script, the `HafniaLogger` is responsible for collecting:
@@ -422,25 +426,25 @@ train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=
422
426
 
423
427
 
424
428
  ## Example: Training-aaS
425
- By combining logging and dataset loading, we can now construct our model training recipe.
429
+ By combining logging and dataset loading, we can now construct our model trainer package.
426
430
 
427
- To demonstrate this, we have provided a recipe project that serves as a template for creating and structuring training recipes
428
- [recipe-classification](https://github.com/milestone-hafnia/recipe-classification)
431
+ To demonstrate this, we have provided a trainer package project that serves as a template for creating and structuring trainers. The example repo is called
432
+ [trainer-classification](https://github.com/milestone-hafnia/trainer-classification)
429
433
 
430
- The project also contains additional information on how to structure your training recipe, use the `HafniaLogger`, the `load_dataset` function and different approach for launching
431
- the training recipe on the Hafnia platform.
434
+ The project also contains additional information on how to structure your trainer package, use the `HafniaLogger`, loading a dataset and different approach for launching
435
+ the trainer on the Hafnia platform.
432
436
 
433
437
 
434
- ## Create, Build and Run `recipe.zip` locally
435
- In order to test recipe compatibility with Hafnia cloud use the following command to build and
438
+ ## Create, Build and Run `trainer.zip` locally
439
+ In order to test trainer package compatibility with Hafnia cloud use the following command to build and
436
440
  start the job locally.
437
441
 
438
442
  ```bash
439
- # Create 'recipe.zip' from source folder '.'
440
- hafnia recipe create .
441
-
442
- # Build the docker image locally from a 'recipe.zip' file
443
- hafnia runc build-local recipe.zip
443
+ # Create 'trainer.zip' in the root folder of your training trainer project '../trainer/classification'
444
+ hafnia trainer create-zip ../trainer-classification
445
+
446
+ # Build the docker image locally from a 'trainer.zip' file
447
+ hafnia runc build-local trainer.zip
444
448
 
445
449
  # Execute the docker image locally with a desired dataset
446
450
  hafnia runc launch-local --dataset mnist "python scripts/train.py"
@@ -0,0 +1,53 @@
1
+ cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ cli/__main__.py,sha256=04iqem4UAw6zSVujBt8ER0HQbEhno6rgmARmECUwRPA,1634
3
+ cli/config.py,sha256=m9jrka0Rnh8_mnxnU4S_JgoYA7l1pvp2o2a7KgkYInY,6115
4
+ cli/consts.py,sha256=uCpYX44NCu_Zvte0QwChunxOo-qqhcaJRSYDAIsoJ8A,972
5
+ cli/dataset_cmds.py,sha256=KOPYdAJ1SyzTMlEi_J-70vSGIJ5acHPGIPOCKVIdlQ4,1418
6
+ cli/dataset_recipe_cmds.py,sha256=O1Pd-VvFFsmZ-nE1Mh6sCC9x45ztCJEpPCZK93qz_HQ,2887
7
+ cli/experiment_cmds.py,sha256=vc7J9JJog4EvRdgkpoMvr0kceb0QF_Rk7mn6H2KNvFE,7963
8
+ cli/profile_cmds.py,sha256=eRo4FtPvXPG5LK2fINVFMsBd_HpkNygY468essBb57o,3285
9
+ cli/runc_cmds.py,sha256=qV-LOwbFlbegH8XSELOo4h_2ajFAzdB5LtuVKKoRq8Y,5009
10
+ cli/trainer_package_cmds.py,sha256=nL7yTtR41BKzo1DWZdBdpRXGPZZIbmBe0byHAi6_n2c,2312
11
+ hafnia/__init__.py,sha256=xXUwwQ18P1YqmsZkvlkavaDqq8DbrrHf38pv5_JyV_M,179
12
+ hafnia/http.py,sha256=bjXbV_3uKbBdudqMdYtnsMttUAsNRMsetYZ4F2xXlEs,3635
13
+ hafnia/log.py,sha256=sWF8tz78yBtwZ9ddzm19L1MBSBJ3L4G704IGeT1_OEU,784
14
+ hafnia/torch_helpers.py,sha256=ho65B0WIu_SjbaKPRL4wabDNrnVumWH8QSXVH4r7NAY,11605
15
+ hafnia/utils.py,sha256=x2dPsiO0EPP6YnpQX4HBtbl29UN9hV4zHvOnDa9xYTg,6850
16
+ hafnia/data/__init__.py,sha256=o9QjiGbEcNa6r-qDmwwmxPXf-1UitNl5-WxFNcujqsg,111
17
+ hafnia/data/factory.py,sha256=kHkvOtBUbwaShZBGf1kZzocDJBn_1dHHLrQxnUpJmfY,778
18
+ hafnia/dataset/dataset_helpers.py,sha256=HHm4KG_-upIEmxHWeSJO4m8RmrCUxKgseRNs4WD6kUQ,4781
19
+ hafnia/dataset/dataset_names.py,sha256=wdLoH48ph1PjVpUYPEDnAfQYVDCvYGQCyqFmR0-ixDU,2286
20
+ hafnia/dataset/dataset_upload_helper.py,sha256=QdJl92aKm3czpkgXt3G_AgwBjyOV9R4iKn4bjjlPyXA,28007
21
+ hafnia/dataset/hafnia_dataset.py,sha256=XiCHv-ZSzjA4CImpyMevJ2qIJlFcKBLzwNB_HMuQGHo,36841
22
+ hafnia/dataset/license_types.py,sha256=xpanTfui1pGG76mGQ9r6EywcUe1scI_zullEpmCO4GI,2190
23
+ hafnia/dataset/dataset_recipe/dataset_recipe.py,sha256=A54N5qEPcM0Yswg26qaOvnr-uj3xUq-KbOInJayzbEI,19269
24
+ hafnia/dataset/dataset_recipe/recipe_transforms.py,sha256=6tY4jG1Bzx15-BXp5RavjtVRWcms3o9svzfUO9-m9Ps,2154
25
+ hafnia/dataset/dataset_recipe/recipe_types.py,sha256=AcrG6gpRt3Igl-CCJ60uyh-WkfI1NCnQ55M8yClSI9Q,5328
26
+ hafnia/dataset/operations/dataset_stats.py,sha256=V2lCmTfIbJ-LeAyu1fZf0_1jSa4MMfP0psuXi77gOYk,11630
27
+ hafnia/dataset/operations/dataset_transformations.py,sha256=JVxfw4fV51eGB7ekYfLB5FKQql6l1whTqRY_BwfX0Us,16593
28
+ hafnia/dataset/operations/table_transformations.py,sha256=6LFQfFRAb1B25cS3QeliRzj26EgVyub5kE-6Sab5Ymo,9250
29
+ hafnia/dataset/primitives/__init__.py,sha256=xFLJ3R7gpbuQnNJuFhuu836L3nicwoaY5aHkqk7Bbr8,927
30
+ hafnia/dataset/primitives/bbox.py,sha256=HXYYy5BLNZwh-bO7aiAWg3z0OurUev8ISa-vYey8b8A,6055
31
+ hafnia/dataset/primitives/bitmask.py,sha256=mq_wchMqGupJDc-a-mJh9uBO_mjHcXpLH49g591doAM,7619
32
+ hafnia/dataset/primitives/classification.py,sha256=ri4lTtS5gBQX13vF07YbeN11rKl1CJaKeTIzCmoT9Iw,2363
33
+ hafnia/dataset/primitives/point.py,sha256=JCRwb-E4sDafodkg6wqyuAS1Yj-yaJbwiD8aB69_Ros,635
34
+ hafnia/dataset/primitives/polygon.py,sha256=vhPrYHv6TqQZMTAyv9r3NV8Hu6YRSSD0srB0wOCIwQ4,4289
35
+ hafnia/dataset/primitives/primitive.py,sha256=7jxcyFADVGf95pjeQHEOqAnR9eucLpxA2h8Blz3ppXI,1253
36
+ hafnia/dataset/primitives/segmentation.py,sha256=jUMjOmYr9j4An3YSCw5CJC1W8ihXAbus3CXaTOpc7Xw,1905
37
+ hafnia/dataset/primitives/utils.py,sha256=3gT1as-xXEj8CamoIuBb9gQwUN9Ae9qnqtqF_uEe0zo,1993
38
+ hafnia/experiment/__init__.py,sha256=OEFE6HqhO5zcTCLZcPcPVjIg7wMFFnvZ1uOtAVhRz7M,85
39
+ hafnia/experiment/hafnia_logger.py,sha256=k8UIK3mXNyxpmfFndTD_u8pFA6TKbjmR-9EJ6JjvY-U,10729
40
+ hafnia/platform/__init__.py,sha256=L_Q7CNpsJ0HMNPy_rLlLK5RhmuCU7IF4BchxKv6amYc,782
41
+ hafnia/platform/builder.py,sha256=kUEuj5-qtL1uk5v2tUvOCREn5yV-G4Fr6F31haIAb5E,5808
42
+ hafnia/platform/dataset_recipe.py,sha256=-scelPECr1eLn5tB_jFm5bJdygw_guktrWfoVquSu2A,3790
43
+ hafnia/platform/datasets.py,sha256=orWdZUwrNk7BTJFJP_8TLhf0CB5PYopZug4u36w08FQ,9018
44
+ hafnia/platform/download.py,sha256=Tzycmb6I6LmwACDHmJmR1zsrOX6OLgYWRbfkCXKEAdQ,6903
45
+ hafnia/platform/experiment.py,sha256=SrEH0nuwwBXf1Iu4diB1BEPqL-TxW3aQkZWBbM1-tY0,1846
46
+ hafnia/platform/trainer_package.py,sha256=w6JC7o-279ujcwtNTbUaQ9AnPcYRPPbD8EACa6XyUHA,2206
47
+ hafnia/visualizations/colors.py,sha256=003eAJVnBal4abaYIIpsrT7erIOIjTUHHYVJ1Tj1CDc,5226
48
+ hafnia/visualizations/image_visualizations.py,sha256=6mPnRAc0dMPZCUCTqnHjgtAhQdVL_QrtyToXUuJjwxQ,7355
49
+ hafnia-0.3.0.dist-info/METADATA,sha256=nP2mgz_AtfOAq2OX5NgLq6lJG3fTLOog8AbKLGWYSyM,19235
50
+ hafnia-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
51
+ hafnia-0.3.0.dist-info/entry_points.txt,sha256=FCJVIQ8GP2VE9I3eeGVF5eLxVDNW_01pOJCpG_CGnMM,45
52
+ hafnia-0.3.0.dist-info/licenses/LICENSE,sha256=wLZw1B7_mod_CO1H8LXqQgfqlWD6QceJR8--LJYRZGE,1078
53
+ hafnia-0.3.0.dist-info/RECORD,,
cli/recipe_cmds.py DELETED
@@ -1,45 +0,0 @@
1
- from pathlib import Path
2
-
3
- import click
4
-
5
- import cli.consts as consts
6
-
7
-
8
- @click.group(name="recipe")
9
- def recipe() -> None:
10
- """Hafnia Recipe management commands"""
11
- pass
12
-
13
-
14
- @recipe.command(name="create")
15
- @click.argument("source")
16
- @click.option(
17
- "--output", type=click.Path(writable=True), default="./recipe.zip", show_default=True, help="Output recipe path."
18
- )
19
- def create(source: str, output: str) -> None:
20
- """Create HRF from local path"""
21
-
22
- from hafnia.utils import archive_dir
23
-
24
- path_output_zip = Path(output)
25
- if path_output_zip.suffix != ".zip":
26
- raise click.ClickException(consts.ERROR_RECIPE_FILE_FORMAT)
27
-
28
- path_source = Path(source)
29
- path_output_zip = archive_dir(path_source, path_output_zip)
30
-
31
-
32
- @recipe.command(name="view")
33
- @click.option("--path", type=str, default="./recipe.zip", show_default=True, help="Path of recipe.zip.")
34
- @click.option("--depth-limit", type=int, default=3, help="Limit the depth of the tree view.", show_default=True)
35
- def view(path: str, depth_limit: int) -> None:
36
- """View the content of a recipe zip file."""
37
- from hafnia.utils import show_recipe_content
38
-
39
- path_recipe = Path(path)
40
- if not path_recipe.exists():
41
- raise click.ClickException(
42
- f"Recipe file '{path_recipe}' does not exist. Please provide a valid path. "
43
- f"To create a recipe, use the 'hafnia recipe create' command."
44
- )
45
- show_recipe_content(path_recipe, depth_limit=depth_limit)