buildfunctions 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- buildfunctions/__init__.py +139 -1
- buildfunctions/client.py +282 -0
- buildfunctions/cpu_function.py +167 -0
- buildfunctions/cpu_sandbox.py +393 -0
- buildfunctions/dotdict.py +39 -0
- buildfunctions/errors.py +90 -0
- buildfunctions/framework.py +22 -0
- buildfunctions/gpu_function.py +241 -0
- buildfunctions/gpu_sandbox.py +443 -0
- buildfunctions/http_client.py +97 -0
- buildfunctions/memory.py +28 -0
- buildfunctions/py.typed +0 -0
- buildfunctions/resolve_code.py +109 -0
- buildfunctions/types.py +227 -0
- buildfunctions/uploader.py +198 -0
- buildfunctions-0.2.1.dist-info/METADATA +176 -0
- buildfunctions-0.2.1.dist-info/RECORD +18 -0
- {buildfunctions-0.2.0.dist-info → buildfunctions-0.2.1.dist-info}/WHEEL +1 -2
- buildfunctions/api.py +0 -2
- buildfunctions-0.2.0.dist-info/METADATA +0 -6
- buildfunctions-0.2.0.dist-info/RECORD +0 -6
- buildfunctions-0.2.0.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""HTTP Client for Buildfunctions API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
from urllib.parse import urlencode, urljoin
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
from buildfunctions.errors import AuthenticationError, BuildfunctionsError, error_from_response
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def create_http_client(base_url: str, api_token: str, timeout: float = 600.0) -> dict[str, Any]:
|
|
14
|
+
"""Create an HTTP client for the Buildfunctions API.
|
|
15
|
+
|
|
16
|
+
Returns a dict with request/get/post/put/delete/set_token functions.
|
|
17
|
+
"""
|
|
18
|
+
if not api_token:
|
|
19
|
+
raise AuthenticationError("API token is required")
|
|
20
|
+
|
|
21
|
+
resolved_base_url = base_url.rstrip("/")
|
|
22
|
+
state = {"token": api_token}
|
|
23
|
+
|
|
24
|
+
def _build_url(path: str, params: dict[str, str | int] | None = None) -> str:
|
|
25
|
+
url = f"{resolved_base_url}{path}"
|
|
26
|
+
if params:
|
|
27
|
+
query = urlencode({k: str(v) for k, v in params.items()})
|
|
28
|
+
url = f"{url}?{query}"
|
|
29
|
+
return url
|
|
30
|
+
|
|
31
|
+
async def _parse_response(response: httpx.Response) -> Any:
|
|
32
|
+
content_type = response.headers.get("content-type", "")
|
|
33
|
+
if "application/json" in content_type:
|
|
34
|
+
return response.json()
|
|
35
|
+
text = response.text
|
|
36
|
+
return {"message": text}
|
|
37
|
+
|
|
38
|
+
async def request(
|
|
39
|
+
method: str,
|
|
40
|
+
path: str,
|
|
41
|
+
body: dict[str, Any] | None = None,
|
|
42
|
+
params: dict[str, str | int] | None = None,
|
|
43
|
+
) -> Any:
|
|
44
|
+
url = _build_url(path, params)
|
|
45
|
+
headers = {
|
|
46
|
+
"Content-Type": "application/json",
|
|
47
|
+
"Authorization": f"Bearer {state['token']}",
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(timeout)) as client:
|
|
52
|
+
response = await client.request(
|
|
53
|
+
method=method,
|
|
54
|
+
url=url,
|
|
55
|
+
headers=headers,
|
|
56
|
+
json=body,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
data = await _parse_response(response)
|
|
60
|
+
|
|
61
|
+
if not response.is_success:
|
|
62
|
+
raise error_from_response(response.status_code, data if isinstance(data, dict) else {})
|
|
63
|
+
|
|
64
|
+
return data
|
|
65
|
+
|
|
66
|
+
except BuildfunctionsError:
|
|
67
|
+
raise
|
|
68
|
+
except httpx.TimeoutException:
|
|
69
|
+
raise BuildfunctionsError("Request timed out", "NETWORK_ERROR")
|
|
70
|
+
except httpx.ConnectError:
|
|
71
|
+
raise BuildfunctionsError("Unable to connect to server", "NETWORK_ERROR")
|
|
72
|
+
except Exception:
|
|
73
|
+
raise BuildfunctionsError("Request failed", "UNKNOWN_ERROR")
|
|
74
|
+
|
|
75
|
+
async def get(path: str, params: dict[str, str | int] | None = None) -> Any:
|
|
76
|
+
return await request("GET", path, params=params)
|
|
77
|
+
|
|
78
|
+
async def post(path: str, body: dict[str, Any] | None = None) -> Any:
|
|
79
|
+
return await request("POST", path, body=body)
|
|
80
|
+
|
|
81
|
+
async def put(path: str, body: dict[str, Any] | None = None) -> Any:
|
|
82
|
+
return await request("PUT", path, body=body)
|
|
83
|
+
|
|
84
|
+
async def delete(path: str, body: dict[str, Any] | None = None) -> Any:
|
|
85
|
+
return await request("DELETE", path, body=body)
|
|
86
|
+
|
|
87
|
+
def set_token(token: str) -> None:
|
|
88
|
+
state["token"] = token
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
"request": request,
|
|
92
|
+
"get": get,
|
|
93
|
+
"post": post,
|
|
94
|
+
"put": put,
|
|
95
|
+
"delete": delete,
|
|
96
|
+
"set_token": set_token,
|
|
97
|
+
}
|
buildfunctions/memory.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Memory string parsing utility."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def parse_memory(memory: str | int) -> int:
|
|
9
|
+
"""Parse memory string to megabytes.
|
|
10
|
+
|
|
11
|
+
Accepts: "2GB", "1024MB", or raw int (treated as MB).
|
|
12
|
+
Returns value in MB.
|
|
13
|
+
"""
|
|
14
|
+
if isinstance(memory, int):
|
|
15
|
+
return memory
|
|
16
|
+
|
|
17
|
+
text = memory.strip().upper()
|
|
18
|
+
match = re.match(r"^(\d+)\s*(GB|MB)$", text)
|
|
19
|
+
|
|
20
|
+
if not match:
|
|
21
|
+
raise ValueError(f'Invalid memory format: "{memory}". Use "2GB" or "1024MB".')
|
|
22
|
+
|
|
23
|
+
value = int(match.group(1))
|
|
24
|
+
unit = match.group(2)
|
|
25
|
+
|
|
26
|
+
if unit == "GB":
|
|
27
|
+
return value * 1024
|
|
28
|
+
return value # MB
|
buildfunctions/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Resolve code parameter - supports both inline code and file paths.
|
|
2
|
+
|
|
3
|
+
Relative paths (./foo.py, ../bar.py) are resolved relative to the caller's
|
|
4
|
+
file location, not the current working directory.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import inspect
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from buildfunctions.errors import ValidationError
|
|
13
|
+
|
|
14
|
+
# SDK directory - used to skip SDK frames when finding caller
|
|
15
|
+
_SDK_DIR = Path(__file__).parent.resolve()
|
|
16
|
+
|
|
17
|
+
CODE_EXTENSIONS = frozenset({
|
|
18
|
+
".js", ".mjs", ".cjs", ".ts", ".tsx", ".jsx", # JavaScript & TypeScript
|
|
19
|
+
".py", ".pyw", ".pyi", # Python
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_caller_file() -> Path | None:
|
|
24
|
+
"""Get the file path of the caller (the file that called the SDK).
|
|
25
|
+
|
|
26
|
+
Used to resolve relative paths against the caller's location.
|
|
27
|
+
Skips frames that are inside the SDK itself.
|
|
28
|
+
"""
|
|
29
|
+
for frame_info in inspect.stack():
|
|
30
|
+
frame_path = Path(frame_info.filename).resolve()
|
|
31
|
+
|
|
32
|
+
# Skip frames inside the SDK directory
|
|
33
|
+
try:
|
|
34
|
+
frame_path.relative_to(_SDK_DIR)
|
|
35
|
+
continue # Frame is inside SDK, skip it
|
|
36
|
+
except ValueError:
|
|
37
|
+
pass # Frame is outside SDK
|
|
38
|
+
|
|
39
|
+
# Skip frames that don't have a real file
|
|
40
|
+
if not frame_path.exists():
|
|
41
|
+
continue
|
|
42
|
+
|
|
43
|
+
return frame_path
|
|
44
|
+
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _looks_like_file_path(value: str) -> bool:
|
|
49
|
+
"""Check if a string looks like a file path."""
|
|
50
|
+
if value.startswith(("/", "./", "../", "~")):
|
|
51
|
+
return True
|
|
52
|
+
# Windows drive letter
|
|
53
|
+
if len(value) >= 3 and value[1] == ":" and value[2] in ("/", "\\"):
|
|
54
|
+
return True
|
|
55
|
+
# Ends with known code file extension
|
|
56
|
+
dot_index = value.rfind(".")
|
|
57
|
+
if dot_index > 0:
|
|
58
|
+
ext = value[dot_index:].lower()
|
|
59
|
+
if ext in CODE_EXTENSIONS:
|
|
60
|
+
return True
|
|
61
|
+
return False
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
async def resolve_code(code: str, base_path: Path | None = None) -> str:
|
|
65
|
+
"""Resolve code string - reads from file if it's a path, returns as-is if inline.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
code: Either inline code or a file path
|
|
69
|
+
base_path: Base directory for resolving relative paths (e.g., caller's directory).
|
|
70
|
+
If not provided, automatically detects caller's file location.
|
|
71
|
+
|
|
72
|
+
Detection heuristic:
|
|
73
|
+
1. If the string contains a newline, treat as inline code.
|
|
74
|
+
2. If the resolved path exists on disk, read and return the file contents.
|
|
75
|
+
3. If it looks like a path but does not exist, raise ValidationError.
|
|
76
|
+
4. Otherwise treat as single-line inline code.
|
|
77
|
+
"""
|
|
78
|
+
if "\n" in code:
|
|
79
|
+
return code
|
|
80
|
+
|
|
81
|
+
# Expand ~ to home directory
|
|
82
|
+
path_to_check = Path(code).expanduser()
|
|
83
|
+
|
|
84
|
+
# Resolve the path:
|
|
85
|
+
# - Absolute paths stay absolute
|
|
86
|
+
# - Relative paths resolve against base_path or caller's directory
|
|
87
|
+
if path_to_check.is_absolute():
|
|
88
|
+
resolved = path_to_check.resolve()
|
|
89
|
+
elif base_path:
|
|
90
|
+
resolved = (base_path / path_to_check).resolve()
|
|
91
|
+
else:
|
|
92
|
+
# Auto-detect caller's directory for relative paths
|
|
93
|
+
caller_file = get_caller_file()
|
|
94
|
+
if caller_file:
|
|
95
|
+
resolved = (caller_file.parent / path_to_check).resolve()
|
|
96
|
+
else:
|
|
97
|
+
resolved = path_to_check.resolve()
|
|
98
|
+
|
|
99
|
+
if resolved.exists() and resolved.is_file():
|
|
100
|
+
return resolved.read_text(encoding="utf-8")
|
|
101
|
+
|
|
102
|
+
if _looks_like_file_path(code):
|
|
103
|
+
raise ValidationError(
|
|
104
|
+
f'Code file not found: "{code}" (resolved to "{resolved}"). '
|
|
105
|
+
f"If this is meant to be inline code, ensure it is a valid code string."
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Single-line inline code
|
|
109
|
+
return code
|
buildfunctions/types.py
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""Buildfunctions SDK Type Definitions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Awaitable, Callable, Literal, TypedDict
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Scalar types
|
|
9
|
+
Language = Literal["javascript", "typescript", "python", "go", "shell"]
|
|
10
|
+
Runtime = Literal["node", "deno", "python", "go", "shell"]
|
|
11
|
+
GPUType = Literal["T4"]
|
|
12
|
+
Framework = Literal["pytorch"]
|
|
13
|
+
Memory = Literal["128Mi", "256Mi", "512Mi", "1Gi", "2Gi", "4Gi", "8Gi", "16Gi", "32Gi", "64Gi"]
|
|
14
|
+
ErrorCode = Literal[
|
|
15
|
+
"UNAUTHORIZED",
|
|
16
|
+
"NOT_FOUND",
|
|
17
|
+
"INVALID_REQUEST",
|
|
18
|
+
"MAX_CAPACITY",
|
|
19
|
+
"SIZE_LIMIT_EXCEEDED",
|
|
20
|
+
"VALIDATION_ERROR",
|
|
21
|
+
"NETWORK_ERROR",
|
|
22
|
+
"UNKNOWN_ERROR",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Client configuration
|
|
27
|
+
class BuildfunctionsConfig(TypedDict, total=False):
|
|
28
|
+
api_token: str
|
|
29
|
+
base_url: str
|
|
30
|
+
gpu_build_url: str
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class _BuildfunctionsConfigRequired(TypedDict):
|
|
34
|
+
api_token: str
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Authenticated user
|
|
38
|
+
class AuthenticatedUser(TypedDict, total=False):
|
|
39
|
+
id: str
|
|
40
|
+
username: str | None
|
|
41
|
+
email: str | None
|
|
42
|
+
compute_tier: str | None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# Auth response
|
|
46
|
+
class AuthResponse(TypedDict):
|
|
47
|
+
authenticated: bool
|
|
48
|
+
user: AuthenticatedUser
|
|
49
|
+
sessionToken: str
|
|
50
|
+
expiresAt: str
|
|
51
|
+
authenticatedAt: str
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# Function configuration
|
|
55
|
+
class FunctionConfig(TypedDict, total=False):
|
|
56
|
+
memory: str | int
|
|
57
|
+
timeout: int
|
|
58
|
+
cpu_cores: int
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# CPU function options
|
|
62
|
+
class CPUFunctionOptions(TypedDict, total=False):
|
|
63
|
+
name: str
|
|
64
|
+
language: Language
|
|
65
|
+
runtime: Runtime
|
|
66
|
+
code: str # Inline code string or path to file (absolute, relative, or ~/path)
|
|
67
|
+
config: FunctionConfig
|
|
68
|
+
env_variables: dict[str, str]
|
|
69
|
+
dependencies: str
|
|
70
|
+
cron_schedule: str
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# GPU function options (extends CPU)
|
|
74
|
+
class GPUFunctionOptions(TypedDict, total=False):
|
|
75
|
+
name: str
|
|
76
|
+
language: Language
|
|
77
|
+
runtime: Runtime
|
|
78
|
+
code: str # Inline code string or path to file (absolute, relative, or ~/path)
|
|
79
|
+
config: FunctionConfig
|
|
80
|
+
env_variables: dict[str, str]
|
|
81
|
+
dependencies: str
|
|
82
|
+
cron_schedule: str
|
|
83
|
+
gpu: GPUType
|
|
84
|
+
cpu_cores: int # vCPUs for the GPU function VM (hotplugged at runtime, default 10, max 50)
|
|
85
|
+
framework: Framework
|
|
86
|
+
model_path: str
|
|
87
|
+
model_name: str
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# Create function options (for SDK deploy via client)
|
|
91
|
+
class CreateFunctionOptions(TypedDict, total=False):
|
|
92
|
+
name: str
|
|
93
|
+
code: str # Inline code string or path to file (absolute, relative, or ~/path)
|
|
94
|
+
language: Language
|
|
95
|
+
runtime: Runtime
|
|
96
|
+
memory: str | int
|
|
97
|
+
timeout: int
|
|
98
|
+
env_variables: list[dict[str, str]]
|
|
99
|
+
requirements: str | list[str]
|
|
100
|
+
cron_schedule: str
|
|
101
|
+
processor_type: Literal["CPU", "GPU"]
|
|
102
|
+
framework: Framework
|
|
103
|
+
gpu: GPUType
|
|
104
|
+
model_name: str
|
|
105
|
+
model_path: str
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# Deployed function
|
|
109
|
+
class DeployedFunction(TypedDict, total=False):
|
|
110
|
+
id: str
|
|
111
|
+
name: str
|
|
112
|
+
subdomain: str
|
|
113
|
+
endpoint: str
|
|
114
|
+
lambdaUrl: str
|
|
115
|
+
language: str
|
|
116
|
+
runtime: str
|
|
117
|
+
lambdaMemoryAllocated: int
|
|
118
|
+
timeoutSeconds: int
|
|
119
|
+
cpuCores: str
|
|
120
|
+
isGPUF: bool
|
|
121
|
+
framework: str
|
|
122
|
+
createdAt: str
|
|
123
|
+
updatedAt: str
|
|
124
|
+
delete: Callable[[], Awaitable[None]]
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# Sandbox configuration
|
|
128
|
+
class CPUSandboxConfig(TypedDict, total=False):
|
|
129
|
+
name: str
|
|
130
|
+
language: Language
|
|
131
|
+
runtime: Runtime
|
|
132
|
+
code: str # Inline code string or path to file (absolute, relative, or ~/path)
|
|
133
|
+
memory: str | int
|
|
134
|
+
timeout: int
|
|
135
|
+
env_variables: list[dict[str, str]]
|
|
136
|
+
requirements: str | list[str]
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class GPUSandboxConfig(TypedDict, total=False):
|
|
140
|
+
name: str
|
|
141
|
+
language: Language
|
|
142
|
+
runtime: Runtime
|
|
143
|
+
code: str # Inline code string or path to file (absolute, relative, or ~/path)
|
|
144
|
+
memory: str | int
|
|
145
|
+
timeout: int
|
|
146
|
+
env_variables: list[dict[str, str]]
|
|
147
|
+
requirements: str | list[str]
|
|
148
|
+
gpu: GPUType
|
|
149
|
+
cpu_cores: int # vCPUs for the GPU sandbox VM (hotplugged at runtime, default 10, max 50)
|
|
150
|
+
model: str | dict[str, str]
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# Run result
|
|
154
|
+
class RunResult(TypedDict):
|
|
155
|
+
response: Any # The response (parsed JSON object, or raw string if not JSON)
|
|
156
|
+
status: int # HTTP status code
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# Upload options
|
|
160
|
+
class UploadOptions(TypedDict, total=False):
|
|
161
|
+
local_path: str
|
|
162
|
+
file_path: str
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# Sandbox instances
|
|
166
|
+
class SandboxInstance(TypedDict, total=False):
|
|
167
|
+
id: str
|
|
168
|
+
name: str
|
|
169
|
+
runtime: str
|
|
170
|
+
endpoint: str
|
|
171
|
+
run: Callable[..., Awaitable[RunResult]]
|
|
172
|
+
upload: Callable[[UploadOptions], Awaitable[None]]
|
|
173
|
+
delete: Callable[[], Awaitable[None]]
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class CPUSandboxInstance(TypedDict, total=False):
|
|
177
|
+
id: str
|
|
178
|
+
name: str
|
|
179
|
+
runtime: str
|
|
180
|
+
endpoint: str
|
|
181
|
+
type: Literal["cpu"]
|
|
182
|
+
run: Callable[..., Awaitable[RunResult]]
|
|
183
|
+
upload: Callable[[UploadOptions], Awaitable[None]]
|
|
184
|
+
delete: Callable[[], Awaitable[None]]
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class GPUSandboxInstance(TypedDict, total=False):
|
|
188
|
+
id: str
|
|
189
|
+
name: str
|
|
190
|
+
runtime: str
|
|
191
|
+
endpoint: str
|
|
192
|
+
type: Literal["gpu"]
|
|
193
|
+
gpu: GPUType
|
|
194
|
+
run: Callable[..., Awaitable[RunResult]]
|
|
195
|
+
upload: Callable[[UploadOptions], Awaitable[None]]
|
|
196
|
+
delete: Callable[[], Awaitable[None]]
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
# Find options
|
|
200
|
+
class FindUniqueWhere(TypedDict, total=False):
|
|
201
|
+
name: str
|
|
202
|
+
id: str
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class FindUniqueOptions(TypedDict):
|
|
206
|
+
where: FindUniqueWhere
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class ListOptions(TypedDict, total=False):
|
|
210
|
+
page: int
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# File metadata (for uploads)
|
|
214
|
+
class FileMetadata(TypedDict):
|
|
215
|
+
name: str
|
|
216
|
+
size: int
|
|
217
|
+
type: str
|
|
218
|
+
webkit_relative_path: str
|
|
219
|
+
local_path: str
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
# Presigned URL info
|
|
223
|
+
class PresignedUrlInfo(TypedDict, total=False):
|
|
224
|
+
signedUrl: list[str]
|
|
225
|
+
uploadId: str | None
|
|
226
|
+
numberOfParts: int
|
|
227
|
+
s3FilePath: str
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""File upload utilities for GPU Sandbox."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
from buildfunctions.types import FileMetadata, PresignedUrlInfo
|
|
12
|
+
|
|
13
|
+
CHUNK_SIZE = 9 * 1024 * 1024 # 9MB
|
|
14
|
+
MAX_PARALLEL_UPLOADS = 5
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def upload_file(content: bytes, presigned_url: str) -> None:
|
|
18
|
+
"""Upload a single file to a presigned URL."""
|
|
19
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(600.0)) as client:
|
|
20
|
+
response = await client.put(
|
|
21
|
+
presigned_url,
|
|
22
|
+
content=content,
|
|
23
|
+
headers={"Content-Type": "application/octet-stream"},
|
|
24
|
+
)
|
|
25
|
+
if not response.is_success:
|
|
26
|
+
raise RuntimeError(f"Failed to upload file: {response.reason_phrase}")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def upload_part(content: bytes, presigned_url: str, part_number: int) -> dict[str, Any]:
|
|
30
|
+
"""Upload a single part of a multipart upload."""
|
|
31
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(600.0)) as client:
|
|
32
|
+
response = await client.put(
|
|
33
|
+
presigned_url,
|
|
34
|
+
content=content,
|
|
35
|
+
headers={"Content-Type": "application/octet-stream"},
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
if not response.is_success:
|
|
39
|
+
raise RuntimeError(f"Failed to upload part {part_number}: {response.reason_phrase}")
|
|
40
|
+
|
|
41
|
+
etag = response.headers.get("ETag")
|
|
42
|
+
if not etag:
|
|
43
|
+
raise RuntimeError(f"Failed to retrieve ETag for part {part_number}")
|
|
44
|
+
|
|
45
|
+
clean_etag = etag.strip('"')
|
|
46
|
+
return {"PartNumber": part_number, "ETag": clean_etag}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
async def upload_multipart_file(
|
|
50
|
+
content: bytes,
|
|
51
|
+
signed_urls: list[str],
|
|
52
|
+
upload_id: str,
|
|
53
|
+
number_of_parts: int,
|
|
54
|
+
bucket_name: str,
|
|
55
|
+
s3_file_path: str,
|
|
56
|
+
base_url: str,
|
|
57
|
+
) -> None:
|
|
58
|
+
"""Orchestrate a multipart upload with parallel chunk uploads."""
|
|
59
|
+
parts: list[dict[str, Any]] = []
|
|
60
|
+
semaphore = asyncio.Semaphore(MAX_PARALLEL_UPLOADS)
|
|
61
|
+
|
|
62
|
+
async def _upload_chunk(index: int) -> None:
|
|
63
|
+
async with semaphore:
|
|
64
|
+
part_number = index + 1
|
|
65
|
+
start = index * CHUNK_SIZE
|
|
66
|
+
end = min(start + CHUNK_SIZE, len(content))
|
|
67
|
+
chunk = content[start:end]
|
|
68
|
+
url = signed_urls[index]
|
|
69
|
+
if not url:
|
|
70
|
+
raise RuntimeError(f"Missing upload URL for part {part_number}")
|
|
71
|
+
part = await upload_part(chunk, url, part_number)
|
|
72
|
+
parts.append(part)
|
|
73
|
+
|
|
74
|
+
tasks = [_upload_chunk(i) for i in range(number_of_parts)]
|
|
75
|
+
await asyncio.gather(*tasks)
|
|
76
|
+
|
|
77
|
+
sorted_parts = sorted(parts, key=lambda p: p["PartNumber"])
|
|
78
|
+
|
|
79
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
|
|
80
|
+
response = await client.post(
|
|
81
|
+
f"{base_url}/api/functions/gpu/transfer-and-mount/complete-multipart-upload",
|
|
82
|
+
json={
|
|
83
|
+
"bucketName": bucket_name,
|
|
84
|
+
"uploadId": upload_id,
|
|
85
|
+
"parts": sorted_parts,
|
|
86
|
+
"s3FilePath": s3_file_path,
|
|
87
|
+
"fileName": s3_file_path.split("/")[-1] if "/" in s3_file_path else s3_file_path,
|
|
88
|
+
},
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if not response.is_success:
|
|
92
|
+
error_text = response.text
|
|
93
|
+
raise RuntimeError(f"Failed to complete upload: {response.reason_phrase} - {error_text}")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def get_files_in_directory(dir_path: str) -> list[FileMetadata]:
|
|
97
|
+
"""Recursively walk a directory and collect file metadata."""
|
|
98
|
+
root = Path(dir_path)
|
|
99
|
+
root_dir_name = root.name
|
|
100
|
+
files: list[FileMetadata] = []
|
|
101
|
+
|
|
102
|
+
for file_path in root.rglob("*"):
|
|
103
|
+
if file_path.is_file():
|
|
104
|
+
relative = file_path.relative_to(root)
|
|
105
|
+
files.append(
|
|
106
|
+
FileMetadata(
|
|
107
|
+
name=file_path.name,
|
|
108
|
+
size=file_path.stat().st_size,
|
|
109
|
+
type="application/octet-stream",
|
|
110
|
+
webkit_relative_path=f"{root_dir_name}/{relative}",
|
|
111
|
+
local_path=str(file_path),
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return files
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
async def upload_model_files(
|
|
119
|
+
files: list[FileMetadata],
|
|
120
|
+
presigned_urls: dict[str, PresignedUrlInfo],
|
|
121
|
+
bucket_name: str,
|
|
122
|
+
base_url: str,
|
|
123
|
+
) -> None:
|
|
124
|
+
"""Upload all model files using presigned URLs."""
|
|
125
|
+
upload_tasks: list[asyncio.Task[None]] = []
|
|
126
|
+
|
|
127
|
+
for file in files:
|
|
128
|
+
url_info = presigned_urls.get(file["webkit_relative_path"])
|
|
129
|
+
if not url_info:
|
|
130
|
+
print(f"No upload URL found for {file['webkit_relative_path']}")
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
content = Path(file["local_path"]).read_bytes()
|
|
134
|
+
signed_urls = url_info["signedUrl"]
|
|
135
|
+
|
|
136
|
+
if len(signed_urls) > 1 and url_info.get("uploadId"):
|
|
137
|
+
upload_tasks.append(
|
|
138
|
+
asyncio.ensure_future(
|
|
139
|
+
upload_multipart_file(
|
|
140
|
+
content,
|
|
141
|
+
signed_urls,
|
|
142
|
+
url_info["uploadId"], # type: ignore[arg-type]
|
|
143
|
+
url_info.get("numberOfParts", len(signed_urls)),
|
|
144
|
+
bucket_name,
|
|
145
|
+
url_info.get("s3FilePath", ""),
|
|
146
|
+
base_url,
|
|
147
|
+
)
|
|
148
|
+
)
|
|
149
|
+
)
|
|
150
|
+
elif len(signed_urls) == 1 and signed_urls[0]:
|
|
151
|
+
upload_tasks.append(asyncio.ensure_future(upload_file(content, signed_urls[0])))
|
|
152
|
+
|
|
153
|
+
if upload_tasks:
|
|
154
|
+
await asyncio.gather(*upload_tasks)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
async def transfer_files_to_efs(
|
|
158
|
+
files: list[FileMetadata],
|
|
159
|
+
sanitized_model_name: str,
|
|
160
|
+
base_url: str,
|
|
161
|
+
session_token: str,
|
|
162
|
+
) -> None:
|
|
163
|
+
"""Transfer files to EFS storage."""
|
|
164
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client:
|
|
165
|
+
details_response = await client.post(
|
|
166
|
+
f"{base_url}/api/sdk/sandbox/gpu/get-transfer-details",
|
|
167
|
+
headers={
|
|
168
|
+
"Content-Type": "application/json",
|
|
169
|
+
"Authorization": f"Bearer {session_token}",
|
|
170
|
+
},
|
|
171
|
+
json={
|
|
172
|
+
"shouldVerifyContents": False,
|
|
173
|
+
"filesToTransfer": [f["webkit_relative_path"] for f in files],
|
|
174
|
+
"sanitizedModelName": sanitized_model_name,
|
|
175
|
+
"fileNamesWithinModelFolder": [f["name"] for f in files],
|
|
176
|
+
},
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
if not details_response.is_success:
|
|
180
|
+
error_data = details_response.json()
|
|
181
|
+
raise RuntimeError(error_data.get("error", "Failed to prepare file transfer"))
|
|
182
|
+
|
|
183
|
+
transfer_data = details_response.json()
|
|
184
|
+
transfer_details = transfer_data["transferDetails"]
|
|
185
|
+
storage_api_url = transfer_data["storageApiUrl"]
|
|
186
|
+
storage_api_path = transfer_data["storageApiPath"]
|
|
187
|
+
|
|
188
|
+
valid_details = [d for d in transfer_details if d.get("fileName")]
|
|
189
|
+
|
|
190
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client:
|
|
191
|
+
for file_detail in valid_details:
|
|
192
|
+
response = await client.post(
|
|
193
|
+
f"{storage_api_url}{storage_api_path}",
|
|
194
|
+
json=file_detail,
|
|
195
|
+
)
|
|
196
|
+
if not response.is_success:
|
|
197
|
+
error_text = response.text
|
|
198
|
+
raise RuntimeError(f"Failed to transfer {file_detail['fileName']}: {error_text}")
|