hafnia 0.1.25__py3-none-any.whl → 0.1.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__main__.py +4 -9
- cli/config.py +20 -27
- cli/consts.py +3 -1
- cli/experiment_cmds.py +3 -17
- cli/profile_cmds.py +16 -2
- cli/recipe_cmds.py +2 -6
- cli/runc_cmds.py +50 -49
- hafnia/data/factory.py +3 -3
- hafnia/experiment/hafnia_logger.py +5 -5
- hafnia/http.py +2 -2
- hafnia/log.py +15 -24
- hafnia/platform/__init__.py +0 -2
- hafnia/platform/builder.py +110 -144
- hafnia/platform/download.py +8 -8
- hafnia/platform/experiment.py +31 -25
- hafnia/utils.py +67 -65
- {hafnia-0.1.25.dist-info → hafnia-0.1.27.dist-info}/METADATA +157 -12
- hafnia-0.1.27.dist-info/RECORD +27 -0
- hafnia/platform/api.py +0 -12
- hafnia/platform/executor.py +0 -111
- hafnia-0.1.25.dist-info/RECORD +0 -29
- {hafnia-0.1.25.dist-info → hafnia-0.1.27.dist-info}/WHEEL +0 -0
- {hafnia-0.1.25.dist-info → hafnia-0.1.27.dist-info}/entry_points.txt +0 -0
- {hafnia-0.1.25.dist-info → hafnia-0.1.27.dist-info}/licenses/LICENSE +0 -0
hafnia/platform/builder.py
CHANGED
|
@@ -1,184 +1,150 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
+
import re
|
|
4
|
+
import subprocess
|
|
5
|
+
import zipfile
|
|
3
6
|
from hashlib import sha256
|
|
4
7
|
from pathlib import Path
|
|
5
|
-
from
|
|
6
|
-
from typing import Dict, List, Optional
|
|
7
|
-
from zipfile import ZipFile
|
|
8
|
+
from typing import Dict, Optional
|
|
8
9
|
|
|
9
10
|
import boto3
|
|
10
11
|
from botocore.exceptions import ClientError
|
|
11
12
|
|
|
12
|
-
from hafnia.log import
|
|
13
|
+
from hafnia.log import sys_logger, user_logger
|
|
13
14
|
from hafnia.platform import download_resource
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
def
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
FileNotFoundError
|
|
27
|
-
"""
|
|
28
|
-
required_paths = {"src", "scripts", "Dockerfile"} if required_paths is None else required_paths
|
|
29
|
-
with ZipFile(zip_path, "r") as archive:
|
|
30
|
-
archive_contents = {Path(file).as_posix() for file in archive.namelist()}
|
|
31
|
-
missing_paths = {
|
|
32
|
-
path for path in required_paths if not any(entry.startswith(path) for entry in archive_contents)
|
|
33
|
-
}
|
|
17
|
+
def validate_recipe_format(path: Path) -> None:
|
|
18
|
+
"""Validate Hafnia Recipe Format submition"""
|
|
19
|
+
hrf = zipfile.Path(path) if path.suffix == ".zip" else path
|
|
20
|
+
required = {"src", "scripts", "Dockerfile"}
|
|
21
|
+
errors = 0
|
|
22
|
+
for rp in required:
|
|
23
|
+
if not (hrf / rp).exists():
|
|
24
|
+
user_logger.error(f"Required path {rp} not found in recipe.")
|
|
25
|
+
errors += 1
|
|
26
|
+
if errors > 0:
|
|
27
|
+
raise FileNotFoundError("Wrong recipe structure")
|
|
34
28
|
|
|
35
|
-
if missing_paths:
|
|
36
|
-
raise FileNotFoundError(f"The following required paths are missing in the zip archive: {missing_paths}")
|
|
37
|
-
|
|
38
|
-
script_files = [f for f in archive_contents if f.startswith("scripts/") and f.endswith(".py")]
|
|
39
|
-
|
|
40
|
-
if not script_files:
|
|
41
|
-
raise ValueError("No Python script files found in the 'scripts' directory.")
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def clean_up(files: List[Path], dirs: List[Path], prefix: str = "__") -> None:
|
|
45
|
-
"""
|
|
46
|
-
Clean up a list of files first, and then remove all folders starting with a specific prefix.
|
|
47
|
-
|
|
48
|
-
Args:
|
|
49
|
-
paths (list[Path]): List of file and directory paths to clean up.
|
|
50
|
-
prefix (str, optional): Prefix to match for folder removal. Defaults to "__".
|
|
51
|
-
"""
|
|
52
|
-
for path in files:
|
|
53
|
-
if path.exists() and path.is_file():
|
|
54
|
-
path.unlink()
|
|
55
|
-
|
|
56
|
-
for path in dirs:
|
|
57
|
-
if path.exists() and path.is_dir():
|
|
58
|
-
for sub_dir in path.glob(f"**/{prefix}*"):
|
|
59
|
-
if sub_dir.is_dir():
|
|
60
|
-
rmtree(sub_dir)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def get_recipe_content(recipe_url: str, output_dir: Path, state_file: str, api_key: str) -> Dict:
|
|
64
|
-
"""
|
|
65
|
-
Retrieves and validates the recipe content from an S3 location and extracts it.
|
|
66
|
-
|
|
67
|
-
Args:
|
|
68
|
-
recipe_uuid (str): The unique identifier of the recipe.
|
|
69
|
-
output_dir (str): Directory to extract the recipe content.
|
|
70
|
-
state_file (str): File to save the state information.
|
|
71
29
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
""
|
|
75
|
-
|
|
76
|
-
recipe_path = Path(result["downloaded_files"][0])
|
|
77
|
-
|
|
78
|
-
validate_recipe(recipe_path)
|
|
79
|
-
|
|
80
|
-
with ZipFile(recipe_path, "r") as zip_ref:
|
|
30
|
+
def prepare_recipe(recipe_url: str, output_dir: Path, api_key: str, state_file: Optional[Path] = None) -> Dict:
|
|
31
|
+
resource = download_resource(recipe_url, output_dir.as_posix(), api_key)
|
|
32
|
+
recipe_path = Path(resource["downloaded_files"][0])
|
|
33
|
+
with zipfile.ZipFile(recipe_path, "r") as zip_ref:
|
|
81
34
|
zip_ref.extractall(output_dir)
|
|
82
35
|
|
|
83
|
-
|
|
36
|
+
validate_recipe_format(output_dir)
|
|
84
37
|
|
|
85
38
|
scripts_dir = output_dir / "scripts"
|
|
86
|
-
|
|
39
|
+
if not any(scripts_dir.iterdir()):
|
|
40
|
+
user_logger.warning("Scripts folder is empty")
|
|
87
41
|
|
|
88
|
-
|
|
89
|
-
raise ValueError("No valid Python script commands found in the 'scripts' directory.")
|
|
90
|
-
|
|
91
|
-
state = {
|
|
42
|
+
metadata = {
|
|
92
43
|
"user_data": (output_dir / "src").as_posix(),
|
|
93
|
-
"docker_context": output_dir.as_posix(),
|
|
94
44
|
"dockerfile": (output_dir / "Dockerfile").as_posix(),
|
|
95
|
-
"
|
|
96
|
-
"hash": tag,
|
|
97
|
-
"valid_commands": valid_commands,
|
|
45
|
+
"digest": sha256(recipe_path.read_bytes()).hexdigest()[:8],
|
|
98
46
|
}
|
|
47
|
+
state_file = state_file if state_file else output_dir / "state.json"
|
|
48
|
+
with open(state_file, "w", encoding="utf-8") as f:
|
|
49
|
+
json.dump(metadata, f)
|
|
50
|
+
return metadata
|
|
99
51
|
|
|
100
|
-
try:
|
|
101
|
-
with open(state_file, "w", encoding="utf-8") as f:
|
|
102
|
-
json.dump(state, f)
|
|
103
|
-
except Exception as e:
|
|
104
|
-
raise RuntimeError(f"Failed to write state file: {e}")
|
|
105
52
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
53
|
+
def buildx_available() -> bool:
|
|
54
|
+
try:
|
|
55
|
+
result = subprocess.run(["docker", "buildx", "version"], capture_output=True, text=True, check=True)
|
|
56
|
+
return "buildx" in result.stdout.lower()
|
|
57
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
58
|
+
return False
|
|
109
59
|
|
|
110
60
|
|
|
111
|
-
def build_dockerfile(dockerfile: str, docker_context: str, docker_tag: str
|
|
61
|
+
def build_dockerfile(dockerfile: str, docker_context: str, docker_tag: str) -> None:
|
|
112
62
|
"""
|
|
113
63
|
Build a Docker image using the provided Dockerfile.
|
|
114
64
|
|
|
115
65
|
Args:
|
|
116
|
-
dockerfile (
|
|
66
|
+
dockerfile (str): Path to the Dockerfile.
|
|
117
67
|
docker_context (str): Path to the build context.
|
|
118
68
|
docker_tag (str): Tag for the Docker image.
|
|
119
|
-
|
|
120
|
-
Each key-value pair will be passed as --secret id=key,env=value
|
|
69
|
+
meta_file (Optional[str]): File to store build metadata.
|
|
121
70
|
"""
|
|
122
|
-
|
|
123
|
-
import subprocess
|
|
124
|
-
|
|
125
71
|
if not Path(dockerfile).exists():
|
|
126
72
|
raise FileNotFoundError("Dockerfile not found.")
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
73
|
+
|
|
74
|
+
cmd = ["docker", "build", "--platform", "linux/amd64", "-t", docker_tag, "-f", dockerfile]
|
|
75
|
+
|
|
76
|
+
remote_cache = os.getenv("EXPERIMENT_CACHE_ECR")
|
|
77
|
+
cloud_mode = os.getenv("HAFNIA_CLOUD", "false").lower() in ["true", "1", "yes"]
|
|
78
|
+
|
|
79
|
+
if buildx_available():
|
|
80
|
+
cmd.insert(1, "buildx")
|
|
81
|
+
cmd += ["--build-arg", "BUILDKIT_INLINE_CACHE=1"]
|
|
82
|
+
if cloud_mode:
|
|
83
|
+
cmd += ["--push"]
|
|
84
|
+
if remote_cache:
|
|
85
|
+
cmd += [
|
|
86
|
+
"--cache-from",
|
|
87
|
+
f"type=registry,ref={remote_cache}:buildcache",
|
|
88
|
+
"--cache-to",
|
|
89
|
+
f"type=registry,ref={remote_cache}:buildcache,mode=max",
|
|
90
|
+
]
|
|
91
|
+
cmd.append(docker_context)
|
|
92
|
+
sys_logger.debug("Build cmd: `{}`".format(" ".join(cmd)))
|
|
93
|
+
sys_logger.info(f"Building and pushing Docker image with BuildKit (buildx); cache repo: {remote_cache or 'none'}")
|
|
94
|
+
result = None
|
|
95
|
+
output = ""
|
|
96
|
+
errors = []
|
|
148
97
|
try:
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
98
|
+
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
|
99
|
+
output = (result.stdout or "") + (result.stderr or "")
|
|
100
|
+
except subprocess.CalledProcessError as e:
|
|
101
|
+
output = (e.stdout or "") + (e.stderr or "")
|
|
102
|
+
error_pattern = r"ERROR: (.+?)(?:\n|$)"
|
|
103
|
+
errors = re.findall(error_pattern, output)
|
|
104
|
+
if not errors:
|
|
105
|
+
raise RuntimeError(f"Docker build failed: {output}")
|
|
106
|
+
if re.search(r"image tag '([^']+)' already exists", errors[-1]):
|
|
107
|
+
sys_logger.warning("Image {} already exists in the registry.".format(docker_tag.rsplit("/")[-1]))
|
|
108
|
+
return
|
|
109
|
+
raise RuntimeError(f"Docker build failed: {output}")
|
|
110
|
+
finally:
|
|
111
|
+
stage_pattern = r"^.*\[\d+/\d+\][^\n]*"
|
|
112
|
+
stages = re.findall(stage_pattern, output, re.MULTILINE)
|
|
113
|
+
user_logger.info("\n".join(stages))
|
|
114
|
+
sys_logger.debug(output)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def check_registry(docker_image: str) -> Optional[str]:
|
|
118
|
+
"""
|
|
119
|
+
Returns the remote digest for TAG if it exists, otherwise None.
|
|
120
|
+
"""
|
|
121
|
+
if "localhost" in docker_image:
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
region = os.getenv("AWS_REGION")
|
|
125
|
+
if not region:
|
|
126
|
+
sys_logger.warning("AWS_REGION environment variable not set. Skip image exist check.")
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
repo_name, image_tag = docker_image.rsplit(":")
|
|
130
|
+
if "/" in repo_name:
|
|
131
|
+
repo_name = repo_name.rsplit("/", 1)[-1]
|
|
132
|
+
ecr = boto3.client("ecr", region_name=region)
|
|
133
|
+
try:
|
|
134
|
+
out = ecr.describe_images(repositoryName=repo_name, imageIds=[{"imageTag": image_tag}])
|
|
135
|
+
return out["imageDetails"][0]["imageDigest"]
|
|
155
136
|
except ClientError as e:
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
else:
|
|
160
|
-
raise e
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
def prepare_recipe(recipe_url: str, output_dir: Path, api_key: str) -> Dict:
|
|
164
|
-
state_file = output_dir / "state.json"
|
|
165
|
-
get_recipe_content(recipe_url, output_dir, state_file.as_posix(), api_key)
|
|
166
|
-
with open(state_file.as_posix(), "r") as f:
|
|
167
|
-
return json.loads(f.read())
|
|
168
|
-
|
|
137
|
+
error_code = e.response["Error"]["Code"]
|
|
138
|
+
sys_logger.error(f"ECR client error: {error_code}")
|
|
139
|
+
return None
|
|
169
140
|
|
|
170
|
-
def build_image(image_info: Dict, ecr_prefix: str, state_file: str = "state.json") -> None:
|
|
171
|
-
hafnia_tag = f"{ecr_prefix}/{image_info['name']}:{image_info['hash']}"
|
|
172
|
-
image_exists = False
|
|
173
|
-
if "localhost" not in ecr_prefix:
|
|
174
|
-
image_exists = check_ecr(image_info["name"], image_info["hash"])
|
|
175
141
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
142
|
+
def build_image(metadata: Dict, registry_repo: str, state_file: str = "state.json") -> None:
|
|
143
|
+
docker_image = f"{registry_repo}:{metadata['digest']}"
|
|
144
|
+
image_exists = check_registry(docker_image) is not None
|
|
179
145
|
if image_exists:
|
|
180
|
-
|
|
146
|
+
sys_logger.info("Image {} already exists in the registry.".format(docker_image.rsplit("/")[-1]))
|
|
181
147
|
else:
|
|
182
|
-
build_dockerfile(
|
|
183
|
-
|
|
184
|
-
|
|
148
|
+
build_dockerfile(metadata["dockerfile"], Path(metadata["dockerfile"]).parent.as_posix(), docker_image)
|
|
149
|
+
metadata.update({"image_tag": docker_image, "image_exists": image_exists})
|
|
150
|
+
Path(state_file).write_text(json.dumps(metadata, indent=2))
|
hafnia/platform/download.py
CHANGED
|
@@ -6,7 +6,7 @@ from botocore.exceptions import ClientError
|
|
|
6
6
|
from tqdm import tqdm
|
|
7
7
|
|
|
8
8
|
from hafnia.http import fetch
|
|
9
|
-
from hafnia.log import
|
|
9
|
+
from hafnia.log import sys_logger, user_logger
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def get_resource_creds(endpoint: str, api_key: str) -> Dict[str, Any]:
|
|
@@ -30,11 +30,11 @@ def get_resource_creds(endpoint: str, api_key: str) -> Dict[str, Any]:
|
|
|
30
30
|
RuntimeError: If the call to fetch the credentials fails for any reason.
|
|
31
31
|
"""
|
|
32
32
|
try:
|
|
33
|
-
creds = fetch(endpoint, headers={"
|
|
34
|
-
|
|
33
|
+
creds = fetch(endpoint, headers={"Authorization": api_key, "accept": "application/json"})
|
|
34
|
+
sys_logger.debug("Successfully retrieved credentials from DIP endpoint.")
|
|
35
35
|
return creds
|
|
36
36
|
except Exception as e:
|
|
37
|
-
|
|
37
|
+
sys_logger.error(f"Failed to fetch credentials from endpoint: {e}")
|
|
38
38
|
raise RuntimeError(f"Failed to retrieve credentials: {e}") from e
|
|
39
39
|
|
|
40
40
|
|
|
@@ -99,12 +99,12 @@ def download_resource(resource_url: str, destination: str, api_key: str) -> Dict
|
|
|
99
99
|
s3_client.head_object(Bucket=bucket_name, Key=key)
|
|
100
100
|
local_file = download_single_object(s3_client, bucket_name, key, output_path)
|
|
101
101
|
downloaded_files.append(str(local_file))
|
|
102
|
-
|
|
102
|
+
user_logger.info(f"Downloaded single file: {local_file}")
|
|
103
103
|
|
|
104
104
|
except ClientError as e:
|
|
105
105
|
error_code = e.response.get("Error", {}).get("Code")
|
|
106
106
|
if error_code == "404":
|
|
107
|
-
|
|
107
|
+
sys_logger.debug(f"Object '{key}' not found; trying as a prefix.")
|
|
108
108
|
response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=key)
|
|
109
109
|
contents = response.get("Contents", [])
|
|
110
110
|
|
|
@@ -118,9 +118,9 @@ def download_resource(resource_url: str, destination: str, api_key: str) -> Dict
|
|
|
118
118
|
local_file = download_single_object(s3_client, bucket_name, sub_key, output_path)
|
|
119
119
|
downloaded_files.append(local_file.as_posix())
|
|
120
120
|
|
|
121
|
-
|
|
121
|
+
user_logger.info(f"Downloaded folder/prefix '{key}' with {len(downloaded_files)} object(s).")
|
|
122
122
|
else:
|
|
123
|
-
|
|
123
|
+
user_logger.error(f"Error checking object or prefix: {e}")
|
|
124
124
|
raise RuntimeError(f"Failed to check or download S3 resource: {e}") from e
|
|
125
125
|
|
|
126
126
|
return {"status": "success", "downloaded_files": downloaded_files}
|
hafnia/platform/experiment.py
CHANGED
|
@@ -2,45 +2,53 @@ from pathlib import Path
|
|
|
2
2
|
from typing import Optional
|
|
3
3
|
|
|
4
4
|
from hafnia.http import fetch, post
|
|
5
|
-
from hafnia.
|
|
6
|
-
from hafnia.utils import archive_dir, get_recipe_path
|
|
5
|
+
from hafnia.log import user_logger
|
|
6
|
+
from hafnia.utils import archive_dir, get_recipe_path, timed
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
@timed("Fetching dataset info.")
|
|
10
|
+
def get_dataset_id(dataset_name: str, endpoint: str, api_key: str) -> str:
|
|
11
|
+
headers = {"Authorization": api_key}
|
|
11
12
|
full_url = f"{endpoint}?name__iexact={dataset_name}"
|
|
12
13
|
dataset_info = fetch(full_url, headers=headers)
|
|
13
14
|
if not dataset_info:
|
|
14
15
|
raise ValueError(f"Dataset '{dataset_name}' was not found in the dataset library.")
|
|
15
|
-
|
|
16
|
+
try:
|
|
17
|
+
return dataset_info[0]["id"]
|
|
18
|
+
except (IndexError, KeyError) as e:
|
|
19
|
+
raise ValueError("Dataset information is missing or invalid") from e
|
|
16
20
|
|
|
17
21
|
|
|
18
|
-
|
|
19
|
-
|
|
22
|
+
@timed("Uploading recipe.")
|
|
23
|
+
def create_recipe(source_dir: Path, endpoint: str, api_key: str) -> str:
|
|
20
24
|
source_dir = source_dir.resolve() # Ensure the path is absolute to handle '.' paths are given an appropriate name.
|
|
21
25
|
path_recipe = get_recipe_path(recipe_name=source_dir.name)
|
|
22
26
|
zip_path = archive_dir(source_dir, output_path=path_recipe)
|
|
27
|
+
user_logger.info(f"Recipe created and stored in '{path_recipe}'")
|
|
23
28
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
"file": (zip_path.name, zip_file.read()),
|
|
33
|
-
}
|
|
34
|
-
response = post(endpoint, headers=headers, data=fields, multipart=True)
|
|
35
|
-
return response["id"]
|
|
29
|
+
headers = {"Authorization": api_key, "accept": "application/json"}
|
|
30
|
+
data = {
|
|
31
|
+
"name": path_recipe.name,
|
|
32
|
+
"description": "Recipe created by Hafnia CLI",
|
|
33
|
+
"file": (zip_path.name, Path(zip_path).read_bytes()),
|
|
34
|
+
}
|
|
35
|
+
response = post(endpoint, headers=headers, data=data, multipart=True)
|
|
36
|
+
return response["id"]
|
|
36
37
|
|
|
37
38
|
|
|
38
|
-
|
|
39
|
-
|
|
39
|
+
@timed("Fetching environment info.")
|
|
40
|
+
def get_exp_environment_id(name: str, endpoint: str, api_key: str) -> str:
|
|
41
|
+
headers = {"Authorization": api_key}
|
|
40
42
|
env_info = fetch(endpoint, headers=headers)
|
|
41
|
-
|
|
43
|
+
|
|
44
|
+
for env in env_info:
|
|
45
|
+
if env["name"] == name:
|
|
46
|
+
return env["id"]
|
|
47
|
+
|
|
48
|
+
raise ValueError(f"Environment '{name}' not found")
|
|
42
49
|
|
|
43
50
|
|
|
51
|
+
@timed("Creating exepriment.")
|
|
44
52
|
def create_experiment(
|
|
45
53
|
exp_name: str,
|
|
46
54
|
dataset_id: str,
|
|
@@ -49,14 +57,12 @@ def create_experiment(
|
|
|
49
57
|
environment_id: str,
|
|
50
58
|
endpoint: str,
|
|
51
59
|
api_key: str,
|
|
52
|
-
organization_id: str,
|
|
53
60
|
) -> Optional[str]:
|
|
54
|
-
headers = {"
|
|
61
|
+
headers = {"Authorization": api_key}
|
|
55
62
|
response = post(
|
|
56
63
|
endpoint,
|
|
57
64
|
headers=headers,
|
|
58
65
|
data={
|
|
59
|
-
"organization": organization_id,
|
|
60
66
|
"name": exp_name,
|
|
61
67
|
"recipe": recipe_id,
|
|
62
68
|
"dataset": dataset_id,
|
hafnia/utils.py
CHANGED
|
@@ -1,18 +1,17 @@
|
|
|
1
|
-
import functools
|
|
2
1
|
import os
|
|
3
|
-
import
|
|
4
|
-
import tempfile
|
|
2
|
+
import time
|
|
5
3
|
import zipfile
|
|
6
4
|
from datetime import datetime
|
|
5
|
+
from functools import wraps
|
|
7
6
|
from pathlib import Path
|
|
8
|
-
from typing import Any, Callable, Optional
|
|
7
|
+
from typing import Any, Callable, Iterator, Optional
|
|
9
8
|
from zipfile import ZipFile
|
|
10
9
|
|
|
11
|
-
import click
|
|
12
10
|
import pathspec
|
|
13
11
|
import seedir
|
|
12
|
+
from rich import print as rprint
|
|
14
13
|
|
|
15
|
-
from hafnia.log import
|
|
14
|
+
from hafnia.log import sys_logger, user_logger
|
|
16
15
|
|
|
17
16
|
PATH_DATA = Path("./.data")
|
|
18
17
|
PATH_DATASET = PATH_DATA / "datasets"
|
|
@@ -34,6 +33,30 @@ DEFAULT_IGNORE_SPECIFICATION = [
|
|
|
34
33
|
]
|
|
35
34
|
|
|
36
35
|
|
|
36
|
+
def timed(label: str):
|
|
37
|
+
"""
|
|
38
|
+
Decorator factory that allows custom labels for timing.
|
|
39
|
+
Usage: @timed("Custom Operation")
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def decorator(func: Callable) -> Callable:
|
|
43
|
+
@wraps(func)
|
|
44
|
+
def wrapper(*args, **kwargs) -> Any:
|
|
45
|
+
operation_label = label or func.__name__
|
|
46
|
+
tik = time.perf_counter()
|
|
47
|
+
try:
|
|
48
|
+
return func(*args, **kwargs)
|
|
49
|
+
except Exception as e:
|
|
50
|
+
sys_logger.error(f"{operation_label} failed: {e}")
|
|
51
|
+
finally:
|
|
52
|
+
elapsed = time.perf_counter() - tik
|
|
53
|
+
sys_logger.debug(f"{operation_label} took {elapsed:.2f} seconds.")
|
|
54
|
+
|
|
55
|
+
return wrapper
|
|
56
|
+
|
|
57
|
+
return decorator
|
|
58
|
+
|
|
59
|
+
|
|
37
60
|
def now_as_str() -> str:
|
|
38
61
|
"""Get the current date and time as a string."""
|
|
39
62
|
return datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
|
|
@@ -45,19 +68,11 @@ def get_recipe_path(recipe_name: str) -> Path:
|
|
|
45
68
|
return path_recipe
|
|
46
69
|
|
|
47
70
|
|
|
48
|
-
def
|
|
49
|
-
recipe_path: Path,
|
|
50
|
-
output_path: Optional[Path] = None,
|
|
51
|
-
path_ignore_file: Optional[Path] = None,
|
|
52
|
-
) -> Path:
|
|
53
|
-
recipe_zip_path = output_path or recipe_path / "recipe.zip"
|
|
54
|
-
assert recipe_zip_path.suffix == ".zip", "Output path must be a zip file"
|
|
55
|
-
recipe_zip_path.parent.mkdir(parents=True, exist_ok=True)
|
|
56
|
-
|
|
71
|
+
def filter_recipe_files(recipe_path: Path, path_ignore_file: Optional[Path] = None) -> Iterator:
|
|
57
72
|
path_ignore_file = path_ignore_file or recipe_path / FILENAME_HAFNIAIGNORE
|
|
58
73
|
if not path_ignore_file.exists():
|
|
59
74
|
ignore_specification_lines = DEFAULT_IGNORE_SPECIFICATION
|
|
60
|
-
|
|
75
|
+
user_logger.info(
|
|
61
76
|
f"No '{FILENAME_HAFNIAIGNORE}' was file found. Files are excluded using the default ignore patterns.\n"
|
|
62
77
|
f"\tDefault ignore patterns: {DEFAULT_IGNORE_SPECIFICATION}\n"
|
|
63
78
|
f"Add a '{FILENAME_HAFNIAIGNORE}' file to the root folder to make custom ignore patterns."
|
|
@@ -65,71 +80,58 @@ def archive_dir(
|
|
|
65
80
|
else:
|
|
66
81
|
ignore_specification_lines = Path(path_ignore_file).read_text().splitlines()
|
|
67
82
|
ignore_specification = pathspec.GitIgnoreSpec.from_lines(ignore_specification_lines)
|
|
83
|
+
include_files = ignore_specification.match_tree(recipe_path, negate=True)
|
|
84
|
+
return include_files
|
|
68
85
|
|
|
69
|
-
include_files = sorted(ignore_specification.match_tree(recipe_path, negate=True))
|
|
70
|
-
click.echo(f"Creating zip archive of '{recipe_path}'")
|
|
71
|
-
with ZipFile(recipe_zip_path, "w") as zip_ref:
|
|
72
|
-
for str_filepath in include_files:
|
|
73
|
-
path_file = recipe_path / str_filepath
|
|
74
|
-
if not path_file.is_file():
|
|
75
|
-
continue
|
|
76
|
-
|
|
77
|
-
relative_path = path_file.relative_to(recipe_path)
|
|
78
|
-
zip_ref.write(path_file, relative_path)
|
|
79
86
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
""
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
Args:
|
|
90
|
-
func: The function to decorate
|
|
91
|
-
|
|
92
|
-
Returns:
|
|
93
|
-
Wrapped function that handles exceptions
|
|
94
|
-
"""
|
|
87
|
+
@timed("Wrapping recipe.")
|
|
88
|
+
def archive_dir(
|
|
89
|
+
recipe_path: Path,
|
|
90
|
+
output_path: Optional[Path] = None,
|
|
91
|
+
path_ignore_file: Optional[Path] = None,
|
|
92
|
+
) -> Path:
|
|
93
|
+
recipe_zip_path = output_path or recipe_path / "recipe.zip"
|
|
94
|
+
assert recipe_zip_path.suffix == ".zip", "Output path must be a zip file"
|
|
95
|
+
recipe_zip_path.parent.mkdir(parents=True, exist_ok=True)
|
|
95
96
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
97
|
+
user_logger.info(f" Creating zip archive of '{recipe_path}'")
|
|
98
|
+
include_files = filter_recipe_files(recipe_path, path_ignore_file)
|
|
99
|
+
with ZipFile(recipe_zip_path, "w", compression=zipfile.ZIP_STORED, allowZip64=True) as zip_ref:
|
|
100
|
+
for str_filepath in include_files:
|
|
101
|
+
full_path = recipe_path / str_filepath
|
|
102
|
+
zip_ref.write(full_path, str_filepath)
|
|
103
|
+
show_recipe_content(recipe_zip_path)
|
|
103
104
|
|
|
104
|
-
return
|
|
105
|
+
return recipe_zip_path
|
|
105
106
|
|
|
106
107
|
|
|
107
108
|
def size_human_readable(size_bytes: int, suffix="B") -> str:
|
|
108
|
-
# From: https://stackoverflow.com/a/1094933
|
|
109
109
|
size_value = float(size_bytes)
|
|
110
110
|
for unit in ("", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"):
|
|
111
111
|
if abs(size_value) < 1024.0:
|
|
112
|
-
return f"{size_value:3.1f}{unit}{suffix}"
|
|
112
|
+
return f"{size_value:3.1f} {unit}{suffix}"
|
|
113
113
|
size_value /= 1024.0
|
|
114
114
|
return f"{size_value:.1f}Yi{suffix}"
|
|
115
115
|
|
|
116
116
|
|
|
117
|
-
def
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
117
|
+
def show_recipe_content(recipe_path: Path, style: str = "emoji", depth_limit: int = 3) -> None:
|
|
118
|
+
def scan(parent: seedir.FakeDir, path: zipfile.Path, depth: int = 0) -> None:
|
|
119
|
+
if depth >= depth_limit:
|
|
120
|
+
return
|
|
121
|
+
for child in path.iterdir():
|
|
122
|
+
if child.is_dir():
|
|
123
|
+
folder = seedir.FakeDir(child.name)
|
|
124
|
+
scan(folder, child, depth + 1)
|
|
125
|
+
folder.parent = parent
|
|
126
|
+
else:
|
|
127
|
+
parent.create_file(child.name)
|
|
127
128
|
|
|
128
|
-
|
|
129
|
-
|
|
129
|
+
recipe = seedir.FakeDir("recipe")
|
|
130
|
+
scan(recipe, zipfile.Path(recipe_path))
|
|
131
|
+
rprint(recipe.seedir(sort=True, first="folders", style=style, printout=False))
|
|
132
|
+
user_logger.info(f"Recipe size: {size_human_readable(os.path.getsize(recipe_path))}. Max size 800 MiB")
|
|
130
133
|
|
|
131
134
|
|
|
132
135
|
def is_remote_job() -> bool:
|
|
133
136
|
"""Check if the current job is running in HAFNIA cloud environment."""
|
|
134
|
-
|
|
135
|
-
return is_remote
|
|
137
|
+
return os.getenv("HAFNIA_CLOUD", "false").lower() == "true"
|