hafnia 0.1.24__py3-none-any.whl → 0.1.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hafnia/http.py CHANGED
@@ -20,7 +20,7 @@ def fetch(endpoint: str, headers: Dict, params: Optional[Dict] = None) -> Dict:
20
20
  json.JSONDecodeError: On invalid JSON response
21
21
  """
22
22
  params = {} if params is None else params
23
- http = urllib3.PoolManager(timeout=5.0, retries=urllib3.Retry(3))
23
+ http = urllib3.PoolManager(retries=urllib3.Retry(3))
24
24
  try:
25
25
  response = http.request("GET", endpoint, fields=params, headers=headers)
26
26
  if response.status != 200:
@@ -48,7 +48,7 @@ def post(endpoint: str, headers: Dict, data: Union[Path, Dict, bytes], multipart
48
48
  json.JSONDecodeError: If response isn't valid JSON
49
49
  ValueError: If data type is unsupported
50
50
  """
51
- http = urllib3.PoolManager(timeout=5.0, retries=urllib3.Retry(3))
51
+ http = urllib3.PoolManager(retries=urllib3.Retry(3))
52
52
  try:
53
53
  if multipart:
54
54
  # Remove content-type header if present as urllib3 will set it
hafnia/log.py CHANGED
@@ -1,32 +1,23 @@
1
1
  import logging
2
+ import os
2
3
 
3
- from hafnia import __package_name__
4
-
5
-
6
- class CustomFormatter(logging.Formatter):
7
- log_format = "%(asctime)s - %(name)s:%(filename)s @ %(lineno)d - %(levelname)s - %(message)s"
4
+ from rich.logging import RichHandler
8
5
 
9
- def format(self, record):
10
- formatter = logging.Formatter(self.log_format)
11
- return formatter.format(record)
12
-
13
-
14
- def create_logger() -> logging.Logger:
15
- root_logger = logging.getLogger(__package_name__)
16
- if root_logger.hasHandlers():
17
- return root_logger
6
+ from hafnia import __package_name__
18
7
 
19
- ch = logging.StreamHandler()
20
- ch.setLevel(logging.INFO)
21
- ch.setFormatter(CustomFormatter())
8
+ system_handler = RichHandler(rich_tracebacks=True, show_path=True, show_level=True)
9
+ user_handler = RichHandler(rich_tracebacks=False, show_path=False, show_level=False, log_time_format="[%X]")
22
10
 
23
- root_logger.propagate = False
24
- for handler in root_logger.handlers:
25
- root_logger.removeHandler(handler)
26
11
 
27
- root_logger.addHandler(ch)
28
- root_logger.setLevel(logging.INFO)
29
- return root_logger
12
+ def create_logger(handler: RichHandler, name: str, log_level: str) -> logging.Logger:
13
+ logger = logging.getLogger(name)
14
+ if logger.hasHandlers():
15
+ logger.handlers.clear()
16
+ logger.addHandler(handler)
17
+ logger.setLevel(log_level)
18
+ logger.propagate = False
19
+ return logger
30
20
 
31
21
 
32
- logger = create_logger()
22
+ sys_logger = create_logger(system_handler, f"{__package_name__}.system", os.getenv("HAFNIA_LOG", "INFO").upper())
23
+ user_logger = create_logger(user_handler, f"{__package_name__}.user", "DEBUG")
@@ -1,4 +1,3 @@
1
- from hafnia.platform.api import get_organization_id
2
1
  from hafnia.platform.download import (
3
2
  download_resource,
4
3
  download_single_object,
@@ -12,7 +11,6 @@ from hafnia.platform.experiment import (
12
11
  )
13
12
 
14
13
  __all__ = [
15
- "get_organization_id",
16
14
  "get_dataset_id",
17
15
  "create_recipe",
18
16
  "get_exp_environment_id",
@@ -1,184 +1,144 @@
1
1
  import json
2
2
  import os
3
+ import subprocess
4
+ import tempfile
5
+ import zipfile
3
6
  from hashlib import sha256
4
7
  from pathlib import Path
5
- from shutil import rmtree
6
- from typing import Dict, List, Optional
7
- from zipfile import ZipFile
8
+ from typing import Dict, Optional
8
9
 
9
10
  import boto3
10
11
  from botocore.exceptions import ClientError
11
12
 
12
- from hafnia.log import logger
13
+ from hafnia.log import sys_logger, user_logger
13
14
  from hafnia.platform import download_resource
14
15
 
15
16
 
16
- def validate_recipe(zip_path: Path, required_paths: Optional[set] = None) -> None:
17
- """
18
- Validates the structure of a zip archive.
19
- Ensures the presence of specific files and directories.
20
-
21
- Args:
22
- zip_path (Path): Path to the zip archive.
23
- required_paths (set): A set of required paths relative to the archive root.
24
-
25
- Raises:
26
- FileNotFoundError: If any required file or directory is missing.
27
- """
28
- required_paths = {"src/lib/", "src/scripts/", "Dockerfile"} if required_paths is None else required_paths
29
- with ZipFile(zip_path, "r") as archive:
30
- archive_contents = {Path(file).as_posix() for file in archive.namelist()}
31
- missing_paths = {
32
- path for path in required_paths if not any(entry.startswith(path) for entry in archive_contents)
33
- }
17
+ def validate_recipe_format(path: Path) -> None:
18
+ """Validate Hafnia Recipe Format submition"""
19
+ hrf = zipfile.Path(path) if path.suffix == ".zip" else path
20
+ required = {"src", "scripts", "Dockerfile"}
21
+ errors = 0
22
+ for rp in required:
23
+ if not (hrf / rp).exists():
24
+ user_logger.error(f"Required path {rp} not found in recipe.")
25
+ errors += 1
26
+ if errors > 0:
27
+ raise FileNotFoundError("Wrong recipe structure")
34
28
 
35
- if missing_paths:
36
- raise FileNotFoundError(f"The following required paths are missing in the zip archive: {missing_paths}")
37
-
38
- script_files = [f for f in archive_contents if f.startswith("src/scripts/") and f.endswith(".py")]
39
-
40
- if not script_files:
41
- raise ValueError("No Python script files found in the 'src/scripts/' directory.")
42
-
43
-
44
- def clean_up(files: List[Path], dirs: List[Path], prefix: str = "__") -> None:
45
- """
46
- Clean up a list of files first, and then remove all folders starting with a specific prefix.
47
-
48
- Args:
49
- paths (list[Path]): List of file and directory paths to clean up.
50
- prefix (str, optional): Prefix to match for folder removal. Defaults to "__".
51
- """
52
- for path in files:
53
- if path.exists() and path.is_file():
54
- path.unlink()
55
-
56
- for path in dirs:
57
- if path.exists() and path.is_dir():
58
- for sub_dir in path.glob(f"**/{prefix}*"):
59
- if sub_dir.is_dir():
60
- rmtree(sub_dir)
61
-
62
-
63
- def get_recipe_content(recipe_url: str, output_dir: Path, state_file: str, api_key: str) -> Dict:
64
- """
65
- Retrieves and validates the recipe content from an S3 location and extracts it.
66
-
67
- Args:
68
- recipe_uuid (str): The unique identifier of the recipe.
69
- output_dir (str): Directory to extract the recipe content.
70
- state_file (str): File to save the state information.
71
29
 
72
- Returns:
73
- Dict: Metadata about the recipe for further processing.
74
- """
75
- result = download_resource(recipe_url, output_dir, api_key)
76
- recipe_path = Path(result["downloaded_files"][0])
77
-
78
- validate_recipe(recipe_path)
79
-
80
- with ZipFile(recipe_path, "r") as zip_ref:
30
+ def prepare_recipe(recipe_url: str, output_dir: Path, api_key: str, state_file: Optional[Path] = None) -> Dict:
31
+ resource = download_resource(recipe_url, output_dir.as_posix(), api_key)
32
+ recipe_path = Path(resource["downloaded_files"][0])
33
+ with zipfile.ZipFile(recipe_path, "r") as zip_ref:
81
34
  zip_ref.extractall(output_dir)
82
35
 
83
- tag = sha256(recipe_path.read_bytes()).hexdigest()[:8]
36
+ validate_recipe_format(output_dir)
84
37
 
85
- scripts_dir = output_dir / "src/scripts"
86
- valid_commands = [str(f.name)[:-3] for f in scripts_dir.iterdir() if f.is_file() and f.suffix.lower() == ".py"]
38
+ scripts_dir = output_dir / "scripts"
39
+ if not any(scripts_dir.iterdir()):
40
+ user_logger.warning("Scripts folder is empty")
87
41
 
88
- if not valid_commands:
89
- raise ValueError("No valid Python script commands found in the 'src/scripts' directory.")
90
-
91
- state = {
42
+ metadata = {
92
43
  "user_data": (output_dir / "src").as_posix(),
93
- "docker_context": output_dir.as_posix(),
94
44
  "dockerfile": (output_dir / "Dockerfile").as_posix(),
95
- "docker_tag": f"runtime:{tag}",
96
- "hash": tag,
97
- "valid_commands": valid_commands,
45
+ "digest": sha256(recipe_path.read_bytes()).hexdigest()[:8],
98
46
  }
47
+ state_file = state_file if state_file else output_dir / "state.json"
48
+ with open(state_file, "w", encoding="utf-8") as f:
49
+ json.dump(metadata, f)
50
+ return metadata
99
51
 
100
- try:
101
- with open(state_file, "w", encoding="utf-8") as f:
102
- json.dump(state, f)
103
- except Exception as e:
104
- raise RuntimeError(f"Failed to write state file: {e}")
105
52
 
106
- clean_up([recipe_path], [output_dir])
107
-
108
- return state
53
+ def buildx_available() -> bool:
54
+ try:
55
+ result = subprocess.run(["docker", "buildx", "version"], capture_output=True, text=True, check=True)
56
+ return "buildx" in result.stdout.lower()
57
+ except (subprocess.CalledProcessError, FileNotFoundError):
58
+ return False
109
59
 
110
60
 
111
- def build_dockerfile(dockerfile: str, docker_context: str, docker_tag: str, secrets: Optional[Dict] = None) -> None:
61
+ def build_dockerfile(dockerfile: str, docker_context: str, docker_tag: str, meta_file: str) -> None:
112
62
  """
113
63
  Build a Docker image using the provided Dockerfile.
114
64
 
115
65
  Args:
116
- dockerfile (Path): Path to the Dockerfile.
66
+ dockerfile (str): Path to the Dockerfile.
117
67
  docker_context (str): Path to the build context.
118
68
  docker_tag (str): Tag for the Docker image.
119
- secrets (dict, optional): Dictionary of secrets to pass to docker build.
120
- Each key-value pair will be passed as --secret id=key,env=value
69
+ meta_file (Optional[str]): File to store build metadata.
121
70
  """
122
-
123
- import subprocess
124
-
125
71
  if not Path(dockerfile).exists():
126
72
  raise FileNotFoundError("Dockerfile not found.")
127
- build_cmd = [
128
- "docker",
129
- "build",
130
- "--platform=linux/amd64",
131
- "-t",
132
- docker_tag,
133
- "-f",
134
- dockerfile,
135
- ]
136
- build_cmd.append(docker_context)
137
- logger.info(f"Building Docker image: {' '.join(build_cmd)}")
138
- subprocess.run(build_cmd, check=True)
139
-
140
-
141
- def check_ecr(repository_name: str, image_tag: str) -> bool:
142
- aws_region = os.getenv("AWS_REGION", None)
143
- if aws_region is None:
144
- logger.warning("ECR registry region is not provided can not look up in the registry.")
145
- return False
146
- session = boto3.Session(region_name=aws_region)
147
- ecr_client = session.client("ecr")
148
- try:
149
- response = ecr_client.describe_images(repositoryName=repository_name, imageIds=[{"imageTag": image_tag}])
150
- if response["imageDetails"]:
151
- logger.info(f"Image {image_tag} already exists in ECR.")
152
- return True
153
- else:
154
- return False
155
- except ClientError as e:
156
- if e.response["Error"]["Code"] == "ImageNotFoundException":
157
- logger.info(f"Image {image_tag} does not exist in ECR.")
158
- return False
159
- else:
160
- raise e
161
73
 
74
+ cmd = ["docker", "build", "--platform", "linux/amd64", "-t", docker_tag, "-f", dockerfile]
75
+
76
+ remote_cache = os.getenv("REMOTE_CACHE_REPO")
77
+ cloud_mode = os.getenv("HAFNIA_CLOUD", "false").lower() in ["true", "1", "yes"]
78
+
79
+ if buildx_available():
80
+ cmd.insert(1, "buildx")
81
+ cmd += ["--build-arg", "BUILDKIT_INLINE_CACHE=1", "--metadata-file", meta_file]
82
+ if cloud_mode:
83
+ cmd += ["--push"]
84
+ if remote_cache:
85
+ cmd += [
86
+ "--cache-from",
87
+ f"type=registry,ref={remote_cache}:buildcache",
88
+ "--cache-to",
89
+ f"type=registry,ref={remote_cache}:buildcache,mode=max",
90
+ ]
91
+ cmd.append(docker_context)
92
+ sys_logger.debug("Build cmd: `{}`".format(" ".join(cmd)))
93
+ sys_logger.info(f"Building and pushing Docker image with BuildKit (buildx); cache repo: {remote_cache or 'none'}")
94
+ try:
95
+ subprocess.run(cmd, check=True)
96
+ except subprocess.CalledProcessError as e:
97
+ sys_logger.error(f"Docker build failed: {e}")
98
+ raise RuntimeError(f"Docker build failed: {e}")
162
99
 
163
- def prepare_recipe(recipe_url: str, output_dir: Path, api_key: str) -> Dict:
164
- state_file = output_dir / "state.json"
165
- get_recipe_content(recipe_url, output_dir, state_file.as_posix(), api_key)
166
- with open(state_file.as_posix(), "r") as f:
167
- return json.loads(f.read())
168
100
 
101
+ def check_registry(docker_image: str) -> Optional[str]:
102
+ """
103
+ Returns the remote digest for TAG if it exists, otherwise None.
104
+ """
105
+ if "localhost" in docker_image:
106
+ return None
107
+
108
+ region = os.getenv("AWS_REGION")
109
+ if not region:
110
+ sys_logger.warning("AWS_REGION environment variable not set. Skip image exist check.")
111
+ return None
112
+
113
+ repo_name, image_tag = docker_image.rsplit(":")
114
+ if "/" in repo_name:
115
+ repo_name = repo_name.rsplit("/", 1)[-1]
116
+ ecr = boto3.client("ecr", region_name=region)
117
+ try:
118
+ out = ecr.describe_images(repositoryName=repo_name, imageIds=[{"imageTag": image_tag}])
119
+ return out["imageDetails"][0]["imageDigest"]
120
+ except ClientError as e:
121
+ error_code = e.response["Error"]["Code"]
122
+ sys_logger.error(f"ECR client error: {error_code}")
123
+ return None
169
124
 
170
- def build_image(image_info: Dict, ecr_prefix: str, state_file: str = "state.json") -> None:
171
- hafnia_tag = f"{ecr_prefix}/{image_info['name']}:{image_info['hash']}"
172
- image_exists = False
173
- if "localhost" not in ecr_prefix:
174
- image_exists = check_ecr(image_info["name"], image_info["hash"])
175
125
 
176
- image_info.update({"mdi_tag": hafnia_tag, "image_exists": image_exists})
177
- state_path = Path(state_file)
178
- state_path.parent.mkdir(parents=True, exist_ok=True)
126
+ def build_image(metadata: Dict, registry_repo: str, state_file: str = "state.json") -> None:
127
+ docker_image = f"{registry_repo}:{metadata['digest']}"
128
+ image_exists = check_registry(docker_image) is not None
179
129
  if image_exists:
180
- logger.info(f"Image {hafnia_tag} already exists in ECR. Skipping build.")
130
+ sys_logger.info(f"Tag already in ECR skipping build of {docker_image}.")
181
131
  else:
182
- build_dockerfile(image_info["dockerfile"], image_info["docker_context"], hafnia_tag)
183
- with open(state_path.as_posix(), "w") as f:
184
- json.dump(image_info, f, indent=4)
132
+ with tempfile.NamedTemporaryFile() as meta_tmp:
133
+ meta_file = meta_tmp.name
134
+ build_dockerfile(
135
+ metadata["dockerfile"], Path(metadata["dockerfile"]).parent.as_posix(), docker_image, meta_file
136
+ )
137
+ with open(meta_file) as m:
138
+ try:
139
+ build_meta = json.load(m)
140
+ metadata["local_digest"] = build_meta["containerimage.digest"]
141
+ except Exception:
142
+ metadata["local_digest"] = ""
143
+ metadata.update({"image_tag": docker_image, "image_exists": image_exists})
144
+ Path(state_file).write_text(json.dumps(metadata, indent=2))
@@ -6,7 +6,7 @@ from botocore.exceptions import ClientError
6
6
  from tqdm import tqdm
7
7
 
8
8
  from hafnia.http import fetch
9
- from hafnia.log import logger
9
+ from hafnia.log import sys_logger, user_logger
10
10
 
11
11
 
12
12
  def get_resource_creds(endpoint: str, api_key: str) -> Dict[str, Any]:
@@ -30,11 +30,11 @@ def get_resource_creds(endpoint: str, api_key: str) -> Dict[str, Any]:
30
30
  RuntimeError: If the call to fetch the credentials fails for any reason.
31
31
  """
32
32
  try:
33
- creds = fetch(endpoint, headers={"X-APIKEY": api_key, "accept": "application/json"})
34
- logger.debug("Successfully retrieved credentials from DIP endpoint.")
33
+ creds = fetch(endpoint, headers={"Authorization": api_key, "accept": "application/json"})
34
+ sys_logger.debug("Successfully retrieved credentials from DIP endpoint.")
35
35
  return creds
36
36
  except Exception as e:
37
- logger.error(f"Failed to fetch credentials from endpoint: {e}")
37
+ sys_logger.error(f"Failed to fetch credentials from endpoint: {e}")
38
38
  raise RuntimeError(f"Failed to retrieve credentials: {e}") from e
39
39
 
40
40
 
@@ -99,12 +99,12 @@ def download_resource(resource_url: str, destination: str, api_key: str) -> Dict
99
99
  s3_client.head_object(Bucket=bucket_name, Key=key)
100
100
  local_file = download_single_object(s3_client, bucket_name, key, output_path)
101
101
  downloaded_files.append(str(local_file))
102
- logger.info(f"Downloaded single file: {local_file}")
102
+ user_logger.info(f"Downloaded single file: {local_file}")
103
103
 
104
104
  except ClientError as e:
105
105
  error_code = e.response.get("Error", {}).get("Code")
106
106
  if error_code == "404":
107
- logger.debug(f"Object '{key}' not found; trying as a prefix.")
107
+ sys_logger.debug(f"Object '{key}' not found; trying as a prefix.")
108
108
  response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=key)
109
109
  contents = response.get("Contents", [])
110
110
 
@@ -118,9 +118,9 @@ def download_resource(resource_url: str, destination: str, api_key: str) -> Dict
118
118
  local_file = download_single_object(s3_client, bucket_name, sub_key, output_path)
119
119
  downloaded_files.append(local_file.as_posix())
120
120
 
121
- logger.info(f"Downloaded folder/prefix '{key}' with {len(downloaded_files)} object(s).")
121
+ user_logger.info(f"Downloaded folder/prefix '{key}' with {len(downloaded_files)} object(s).")
122
122
  else:
123
- logger.error(f"Error checking object or prefix: {e}")
123
+ user_logger.error(f"Error checking object or prefix: {e}")
124
124
  raise RuntimeError(f"Failed to check or download S3 resource: {e}") from e
125
125
 
126
126
  return {"status": "success", "downloaded_files": downloaded_files}
@@ -2,45 +2,53 @@ from pathlib import Path
2
2
  from typing import Optional
3
3
 
4
4
  from hafnia.http import fetch, post
5
- from hafnia.platform.builder import validate_recipe
6
- from hafnia.utils import archive_dir, get_recipe_path
5
+ from hafnia.log import user_logger
6
+ from hafnia.utils import archive_dir, get_recipe_path, timed
7
7
 
8
8
 
9
- def get_dataset_id(dataset_name: str, endpoint: str, api_key: str) -> Optional[str]:
10
- headers = {"X-APIKEY": api_key}
9
+ @timed("Fetching dataset info.")
10
+ def get_dataset_id(dataset_name: str, endpoint: str, api_key: str) -> str:
11
+ headers = {"Authorization": api_key}
11
12
  full_url = f"{endpoint}?name__iexact={dataset_name}"
12
13
  dataset_info = fetch(full_url, headers=headers)
13
14
  if not dataset_info:
14
15
  raise ValueError(f"Dataset '{dataset_name}' was not found in the dataset library.")
15
- return dataset_info[0]["id"]
16
+ try:
17
+ return dataset_info[0]["id"]
18
+ except (IndexError, KeyError) as e:
19
+ raise ValueError("Dataset information is missing or invalid") from e
16
20
 
17
21
 
18
- def create_recipe(source_dir: Path, endpoint: str, api_key: str, organization_id: str) -> Optional[str]:
19
- headers = {"X-APIKEY": api_key, "accept": "application/json"}
22
+ @timed("Uploading recipe.")
23
+ def create_recipe(source_dir: Path, endpoint: str, api_key: str) -> str:
20
24
  source_dir = source_dir.resolve() # Ensure the path is absolute to handle '.' paths are given an appropriate name.
21
25
  path_recipe = get_recipe_path(recipe_name=source_dir.name)
22
26
  zip_path = archive_dir(source_dir, output_path=path_recipe)
27
+ user_logger.info(f"Recipe created and stored in '{path_recipe}'")
23
28
 
24
- print(f"Recipe created and stored in '{path_recipe}'")
25
-
26
- validate_recipe(zip_path)
27
- with open(zip_path, "rb") as zip_file:
28
- fields = {
29
- "name": path_recipe.name,
30
- "description": "Recipe created by Hafnia CLI",
31
- "organization": organization_id,
32
- "file": (zip_path.name, zip_file.read()),
33
- }
34
- response = post(endpoint, headers=headers, data=fields, multipart=True)
35
- return response["id"]
29
+ headers = {"Authorization": api_key, "accept": "application/json"}
30
+ data = {
31
+ "name": path_recipe.name,
32
+ "description": "Recipe created by Hafnia CLI",
33
+ "file": (zip_path.name, Path(zip_path).read_bytes()),
34
+ }
35
+ response = post(endpoint, headers=headers, data=data, multipart=True)
36
+ return response["id"]
36
37
 
37
38
 
38
- def get_exp_environment_id(name: str, endpoint: str, api_key: str) -> Optional[str]:
39
- headers = {"X-APIKEY": api_key}
39
+ @timed("Fetching environment info.")
40
+ def get_exp_environment_id(name: str, endpoint: str, api_key: str) -> str:
41
+ headers = {"Authorization": api_key}
40
42
  env_info = fetch(endpoint, headers=headers)
41
- return next((env["id"] for env in env_info if env["name"] == name), None)
43
+
44
+ for env in env_info:
45
+ if env["name"] == name:
46
+ return env["id"]
47
+
48
+ raise ValueError(f"Environment '{name}' not found")
42
49
 
43
50
 
51
+ @timed("Creating exepriment.")
44
52
  def create_experiment(
45
53
  exp_name: str,
46
54
  dataset_id: str,
@@ -49,14 +57,12 @@ def create_experiment(
49
57
  environment_id: str,
50
58
  endpoint: str,
51
59
  api_key: str,
52
- organization_id: str,
53
60
  ) -> Optional[str]:
54
- headers = {"X-APIKEY": api_key}
61
+ headers = {"Authorization": api_key}
55
62
  response = post(
56
63
  endpoint,
57
64
  headers=headers,
58
65
  data={
59
- "organization": organization_id,
60
66
  "name": exp_name,
61
67
  "recipe": recipe_id,
62
68
  "dataset": dataset_id,