icefold-runner 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- icefold_runner/__init__.py +26 -0
- icefold_runner/__main__.py +75 -0
- icefold_runner/client.py +306 -0
- icefold_runner/runner.py +282 -0
- icefold_runner-0.1.0.dist-info/METADATA +166 -0
- icefold_runner-0.1.0.dist-info/RECORD +10 -0
- icefold_runner-0.1.0.dist-info/WHEEL +5 -0
- icefold_runner-0.1.0.dist-info/entry_points.txt +2 -0
- icefold_runner-0.1.0.dist-info/licenses/LICENSE +21 -0
- icefold_runner-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""icefold-runner — a self-hosted execution runner for IceFold nodes.
|
|
2
|
+
|
|
3
|
+
Like a GitHub self-hosted CI runner: you start it on your own machine, it
|
|
4
|
+
reverse-connects to an IceFold server, receives node-execution jobs, and runs
|
|
5
|
+
them locally — pulling input media over HTTP and pushing products back.
|
|
6
|
+
|
|
7
|
+
The runner is a *generic execution framework*: it ships no node implementations
|
|
8
|
+
of its own. The server renders each node into a self-contained ``.py`` bundle;
|
|
9
|
+
the runner fetches the bundle on demand, pre-flights its declared dependencies,
|
|
10
|
+
and runs it. So upgrading or adding nodes on the server never requires updating
|
|
11
|
+
the runner.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
__version__ = "0.1.0"
|
|
15
|
+
|
|
16
|
+
__all__ = ["__version__", "WorkerClient", "NodeRunner"]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def __getattr__(name): # lazy so importing the package is cheap
|
|
20
|
+
if name == "WorkerClient":
|
|
21
|
+
from icefold_runner.client import WorkerClient
|
|
22
|
+
return WorkerClient
|
|
23
|
+
if name == "NodeRunner":
|
|
24
|
+
from icefold_runner.runner import NodeRunner
|
|
25
|
+
return NodeRunner
|
|
26
|
+
raise AttributeError(name)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""CLI entrypoint: icefold-runner --token <token>
|
|
2
|
+
|
|
3
|
+
Run IceFold nodes on this machine. The runner reverse-connects to IceFold and
|
|
4
|
+
serves the account the token belongs to — the token (generated in the IceFold
|
|
5
|
+
app, Nodes ▸ Connect a runner) encodes + signs your user id, so there's no
|
|
6
|
+
server URL or user id to pass.
|
|
7
|
+
|
|
8
|
+
Bootstrap order matters: we point ``ICEFOLD_PROJECT_ROOT`` at the runner's
|
|
9
|
+
``--work-dir`` *before* importing ``icefold``, so the SDK's ``DATA_DIR``
|
|
10
|
+
(hence where ffmpeg writes products) resolves under this runner's own dir.
|
|
11
|
+
``icefold`` itself is an installed dependency (``pip install icefold-sdk``).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import asyncio
|
|
18
|
+
import os
|
|
19
|
+
import socket
|
|
20
|
+
|
|
21
|
+
# Built-in server. Self-hosters / dev can override via the ICEFOLD_RUNNER_SERVER
|
|
22
|
+
# env var (intentionally not a CLI flag — the normal user never sets it).
|
|
23
|
+
DEFAULT_SERVER = "wss://api.icefold.com"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _parse_args(argv):
|
|
27
|
+
p = argparse.ArgumentParser(
|
|
28
|
+
prog="icefold-runner",
|
|
29
|
+
description="Run IceFold nodes on this machine. "
|
|
30
|
+
"Get a token from the IceFold app (Nodes ▸ Connect a runner).",
|
|
31
|
+
)
|
|
32
|
+
p.add_argument("--token", default=os.environ.get("ICEFOLD_RUNNER_TOKEN", ""),
|
|
33
|
+
help="Runner token from the IceFold app. env: ICEFOLD_RUNNER_TOKEN")
|
|
34
|
+
p.add_argument("--runner-id", default=os.environ.get("ICEFOLD_RUNNER_ID", "") or socket.gethostname(),
|
|
35
|
+
help="Stable id for this runner (default: hostname). env: ICEFOLD_RUNNER_ID")
|
|
36
|
+
p.add_argument("--work-dir",
|
|
37
|
+
default=os.environ.get("ICEFOLD_RUNNER_DIR", "") or os.path.abspath("./icefold-runner-data"),
|
|
38
|
+
help="Scratch dir for staged inputs + ffmpeg products. env: ICEFOLD_RUNNER_DIR")
|
|
39
|
+
args = p.parse_args(argv)
|
|
40
|
+
|
|
41
|
+
if not args.token:
|
|
42
|
+
p.error("missing required argument: --token "
|
|
43
|
+
"(generate one in the IceFold app: Nodes ▸ Connect a runner)")
|
|
44
|
+
return args
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def main(argv=None) -> int:
|
|
48
|
+
args = _parse_args(argv)
|
|
49
|
+
|
|
50
|
+
# Built-in server; ICEFOLD_RUNNER_SERVER overrides for self-host / dev.
|
|
51
|
+
server = os.environ.get("ICEFOLD_RUNNER_SERVER", "").strip() or DEFAULT_SERVER
|
|
52
|
+
|
|
53
|
+
work_dir = os.path.abspath(args.work_dir)
|
|
54
|
+
os.makedirs(os.path.join(work_dir, "data", "download"), exist_ok=True)
|
|
55
|
+
os.makedirs(os.path.join(work_dir, "data", "upload"), exist_ok=True)
|
|
56
|
+
|
|
57
|
+
# Must precede any icefold import so DATA_DIR resolves under work_dir.
|
|
58
|
+
os.environ["ICEFOLD_PROJECT_ROOT"] = work_dir
|
|
59
|
+
|
|
60
|
+
from icefold_runner.client import WorkerClient
|
|
61
|
+
|
|
62
|
+
client = WorkerClient(
|
|
63
|
+
server=server,
|
|
64
|
+
token=args.token,
|
|
65
|
+
worker_id=args.runner_id,
|
|
66
|
+
)
|
|
67
|
+
try:
|
|
68
|
+
asyncio.run(client.run_forever())
|
|
69
|
+
except KeyboardInterrupt:
|
|
70
|
+
print("\nicefold-runner stopped")
|
|
71
|
+
return 0
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
if __name__ == "__main__":
|
|
75
|
+
raise SystemExit(main())
|
icefold_runner/client.py
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
"""Reverse WebSocket client for icefold-runner.
|
|
2
|
+
|
|
3
|
+
Dials out to ``<server>/v1/ws/worker``, authenticates with the shared token
|
|
4
|
+
(also the XOR keystream), then serves leaf ``node_exec`` jobs concurrently.
|
|
5
|
+
Reconnects with jittered exponential backoff; an auth rejection is fatal.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import json
|
|
12
|
+
import random
|
|
13
|
+
from typing import Dict, Optional
|
|
14
|
+
from urllib.parse import urlencode, urlsplit, urlunsplit
|
|
15
|
+
|
|
16
|
+
import websockets
|
|
17
|
+
|
|
18
|
+
import uuid
|
|
19
|
+
|
|
20
|
+
from icefold.crypto import xor_bytes
|
|
21
|
+
from icefold.exceptions import MissingDependencyError
|
|
22
|
+
from icefold.wire import (
|
|
23
|
+
SRV_CANCEL,
|
|
24
|
+
SRV_NODE_CALLBACK_RESULT,
|
|
25
|
+
SRV_NODE_EXEC,
|
|
26
|
+
SRV_PING,
|
|
27
|
+
WKR_HELLO,
|
|
28
|
+
WKR_NODE_DONE,
|
|
29
|
+
WKR_PONG,
|
|
30
|
+
WKR_PING,
|
|
31
|
+
make_missing_dep,
|
|
32
|
+
make_node_callback,
|
|
33
|
+
)
|
|
34
|
+
from icefold_runner import __version__ as VERSION
|
|
35
|
+
from icefold_runner.runner import NodeRunner
|
|
36
|
+
from icefold import log_error, log_info, log_warning
|
|
37
|
+
|
|
38
|
+
_BACKOFF_MIN = 1.0
|
|
39
|
+
_BACKOFF_MAX = 30.0
|
|
40
|
+
_KEEPALIVE_S = 20.0
|
|
41
|
+
_MAX_FRAME = 8 * 1024 * 1024
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class AuthError(Exception):
|
|
45
|
+
"""Server rejected our credentials — fatal, retrying won't help."""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _log(level: str, msg: str, **kw) -> None:
|
|
49
|
+
{"warn": log_warning, "error": log_error}.get(level, log_info)("worker", msg, **kw)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class WorkerClient:
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
*,
|
|
56
|
+
server: str,
|
|
57
|
+
token: str,
|
|
58
|
+
worker_id: str,
|
|
59
|
+
http_base: Optional[str] = None,
|
|
60
|
+
) -> None:
|
|
61
|
+
self.server = server
|
|
62
|
+
self.token = token
|
|
63
|
+
self.worker_id = worker_id
|
|
64
|
+
self.xor_key = token.encode("utf-8") if token else b""
|
|
65
|
+
self.http_base = (http_base or self._derive_http_base(server)).rstrip("/")
|
|
66
|
+
self.runner = NodeRunner(self.http_base, token, _log)
|
|
67
|
+
self._tasks: Dict[str, asyncio.Task] = {}
|
|
68
|
+
# Bundle-host callback bookkeeping: bundle code reaches back into the
|
|
69
|
+
# server via ``ctx.progress(...)`` / ``ctx.llm.text(...)``; those land
|
|
70
|
+
# here as outbound ``node_callback`` frames keyed by ``req_id`` and
|
|
71
|
+
# we await the server's matching ``node_callback_result`` to resolve
|
|
72
|
+
# the bundle's future.
|
|
73
|
+
self._pending_callbacks: Dict[str, "asyncio.Future[dict]"] = {}
|
|
74
|
+
|
|
75
|
+
# ── URL helpers ──
|
|
76
|
+
|
|
77
|
+
@staticmethod
|
|
78
|
+
def _derive_http_base(server: str) -> str:
|
|
79
|
+
parts = urlsplit(server)
|
|
80
|
+
scheme = {"ws": "http", "wss": "https"}.get(parts.scheme, parts.scheme or "http")
|
|
81
|
+
return urlunsplit((scheme, parts.netloc, "", "", ""))
|
|
82
|
+
|
|
83
|
+
def _ws_url(self) -> str:
|
|
84
|
+
parts = urlsplit(self.server)
|
|
85
|
+
scheme = {"http": "ws", "https": "wss"}.get(parts.scheme, parts.scheme or "ws")
|
|
86
|
+
path = parts.path.rstrip("/")
|
|
87
|
+
if not path.endswith("/v1/ws/worker"):
|
|
88
|
+
path = path + "/v1/ws/worker"
|
|
89
|
+
# No user_id: the token encodes + signs it; the server derives the
|
|
90
|
+
# identity from the token so this runner can't claim another account.
|
|
91
|
+
query = urlencode({
|
|
92
|
+
"token": self.token,
|
|
93
|
+
"worker_id": self.worker_id,
|
|
94
|
+
})
|
|
95
|
+
return urlunsplit((scheme, parts.netloc, path, query, ""))
|
|
96
|
+
|
|
97
|
+
# ── main loop ──
|
|
98
|
+
|
|
99
|
+
async def run_forever(self) -> None:
|
|
100
|
+
_log("info", f"icefold-runner {VERSION} starting",
|
|
101
|
+
server=self.server, worker_id=self.worker_id)
|
|
102
|
+
backoff = _BACKOFF_MIN
|
|
103
|
+
while True:
|
|
104
|
+
try:
|
|
105
|
+
await self._run_once()
|
|
106
|
+
backoff = _BACKOFF_MIN
|
|
107
|
+
_log("info", "connection closed; will reconnect")
|
|
108
|
+
except AuthError as e:
|
|
109
|
+
_log("error", f"authentication failed; exiting: {e}")
|
|
110
|
+
return
|
|
111
|
+
except Exception as e: # noqa: BLE001
|
|
112
|
+
_log("error", f"connection failed: {e}", next_retry=round(backoff, 1))
|
|
113
|
+
sleep = backoff + random.uniform(0, backoff / 4)
|
|
114
|
+
await asyncio.sleep(sleep)
|
|
115
|
+
backoff = min(backoff * 2, _BACKOFF_MAX)
|
|
116
|
+
|
|
117
|
+
async def _run_once(self) -> None:
|
|
118
|
+
url = self._ws_url()
|
|
119
|
+
_log("info", "dialing", url=self._redact(url))
|
|
120
|
+
try:
|
|
121
|
+
ws = await websockets.connect(
|
|
122
|
+
url, max_size=_MAX_FRAME, open_timeout=15,
|
|
123
|
+
ping_interval=_KEEPALIVE_S, ping_timeout=_KEEPALIVE_S,
|
|
124
|
+
)
|
|
125
|
+
except Exception as e: # noqa: BLE001
|
|
126
|
+
if self._is_auth_rejection(e):
|
|
127
|
+
raise AuthError(str(e))
|
|
128
|
+
raise
|
|
129
|
+
async with ws:
|
|
130
|
+
await self._send(ws, {
|
|
131
|
+
"type": WKR_HELLO,
|
|
132
|
+
"worker_id": self.worker_id,
|
|
133
|
+
"version": VERSION,
|
|
134
|
+
"capabilities": ["builtin"],
|
|
135
|
+
})
|
|
136
|
+
_log("info", "connected", worker_id=self.worker_id)
|
|
137
|
+
keepalive = asyncio.create_task(self._keepalive(ws))
|
|
138
|
+
try:
|
|
139
|
+
async for raw in ws:
|
|
140
|
+
msg = self._decode(raw)
|
|
141
|
+
if msg is not None:
|
|
142
|
+
await self._handle(ws, msg)
|
|
143
|
+
finally:
|
|
144
|
+
keepalive.cancel()
|
|
145
|
+
for t in list(self._tasks.values()):
|
|
146
|
+
t.cancel()
|
|
147
|
+
self._tasks.clear()
|
|
148
|
+
|
|
149
|
+
async def _keepalive(self, ws) -> None:
|
|
150
|
+
try:
|
|
151
|
+
while True:
|
|
152
|
+
await asyncio.sleep(_KEEPALIVE_S)
|
|
153
|
+
await self._send(ws, {"type": WKR_PING})
|
|
154
|
+
except asyncio.CancelledError:
|
|
155
|
+
pass
|
|
156
|
+
except Exception: # noqa: BLE001
|
|
157
|
+
pass
|
|
158
|
+
|
|
159
|
+
# ── frame codec ──
|
|
160
|
+
|
|
161
|
+
def _decode(self, raw) -> Optional[dict]:
|
|
162
|
+
try:
|
|
163
|
+
if isinstance(raw, (bytes, bytearray)):
|
|
164
|
+
data = xor_bytes(bytes(raw), self.xor_key) if self.xor_key else bytes(raw)
|
|
165
|
+
return json.loads(data.decode("utf-8"))
|
|
166
|
+
return json.loads(raw)
|
|
167
|
+
except Exception as e: # noqa: BLE001
|
|
168
|
+
_log("warn", f"bad frame from server: {e}")
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
async def _send(self, ws, msg: dict) -> None:
|
|
172
|
+
payload = json.dumps(msg).encode("utf-8")
|
|
173
|
+
if self.xor_key:
|
|
174
|
+
await ws.send(xor_bytes(payload, self.xor_key))
|
|
175
|
+
else:
|
|
176
|
+
await ws.send(payload.decode("utf-8"))
|
|
177
|
+
|
|
178
|
+
# ── dispatch ──
|
|
179
|
+
|
|
180
|
+
async def _handle(self, ws, msg: dict) -> None:
|
|
181
|
+
mtype = msg.get("type", "")
|
|
182
|
+
if mtype == SRV_NODE_EXEC:
|
|
183
|
+
call_id = msg.get("call_id", "")
|
|
184
|
+
if not call_id:
|
|
185
|
+
return
|
|
186
|
+
self._tasks[call_id] = asyncio.create_task(self._run_node(ws, msg))
|
|
187
|
+
elif mtype == SRV_CANCEL:
|
|
188
|
+
task = self._tasks.get(msg.get("call_id", ""))
|
|
189
|
+
if task is not None:
|
|
190
|
+
task.cancel()
|
|
191
|
+
elif mtype == SRV_NODE_CALLBACK_RESULT:
|
|
192
|
+
# Server is replying to a callback the bundle issued via
|
|
193
|
+
# ctx.progress(...) / ctx.llm.text(...). Look up the awaiter
|
|
194
|
+
# by req_id and feed it the result; the bundle's coroutine
|
|
195
|
+
# resumes inside the node's task.
|
|
196
|
+
req_id = msg.get("req_id", "")
|
|
197
|
+
fut = self._pending_callbacks.pop(req_id, None)
|
|
198
|
+
if fut is not None and not fut.done():
|
|
199
|
+
fut.set_result(msg)
|
|
200
|
+
elif mtype == SRV_PING:
|
|
201
|
+
await self._send(ws, {"type": WKR_PONG})
|
|
202
|
+
|
|
203
|
+
async def _run_node(self, ws, msg: dict) -> None:
|
|
204
|
+
call_id = msg["call_id"]
|
|
205
|
+
node_type = msg.get("node_type", "")
|
|
206
|
+
try:
|
|
207
|
+
_log("info", f"running node {node_type}", call_id=call_id)
|
|
208
|
+
send_callback = self._make_send_callback(ws, call_id)
|
|
209
|
+
output = await self.runner.run(msg, send_callback=send_callback)
|
|
210
|
+
await self._send(ws, {
|
|
211
|
+
"type": WKR_NODE_DONE, "call_id": call_id,
|
|
212
|
+
"output": output, "err": "", "killed": False,
|
|
213
|
+
})
|
|
214
|
+
_log("info", f"node done {node_type}", call_id=call_id)
|
|
215
|
+
except MissingDependencyError as dep:
|
|
216
|
+
# Bundle pre-flight detected a missing native/python dep. Send the
|
|
217
|
+
# typed reply (not node_done) so the server can surface a
|
|
218
|
+
# user-actionable "install X via …" notification.
|
|
219
|
+
_log(
|
|
220
|
+
"warn",
|
|
221
|
+
f"node {node_type} skipped: missing deps "
|
|
222
|
+
f"binaries={list(dep.missing_binaries)} python={list(dep.missing_python)}",
|
|
223
|
+
call_id=call_id,
|
|
224
|
+
)
|
|
225
|
+
await self._safe_send(ws, make_missing_dep(
|
|
226
|
+
call_id=call_id,
|
|
227
|
+
missing_binaries=dep.missing_binaries,
|
|
228
|
+
missing_python=dep.missing_python,
|
|
229
|
+
install_hint=dep.install_hint,
|
|
230
|
+
))
|
|
231
|
+
except asyncio.TimeoutError:
|
|
232
|
+
await self._safe_send(ws, {
|
|
233
|
+
"type": WKR_NODE_DONE, "call_id": call_id,
|
|
234
|
+
"output": None, "err": "remote node timed out", "killed": True,
|
|
235
|
+
})
|
|
236
|
+
except asyncio.CancelledError:
|
|
237
|
+
# Server asked us to cancel (or we're tearing down). The server's
|
|
238
|
+
# awaiting future is already cancelled, so no node_done is needed.
|
|
239
|
+
raise
|
|
240
|
+
except Exception as e: # noqa: BLE001
|
|
241
|
+
_log("error", f"node failed {node_type}: {e}", call_id=call_id)
|
|
242
|
+
await self._safe_send(ws, {
|
|
243
|
+
"type": WKR_NODE_DONE, "call_id": call_id,
|
|
244
|
+
"output": None, "err": str(e), "killed": False,
|
|
245
|
+
})
|
|
246
|
+
finally:
|
|
247
|
+
self._tasks.pop(call_id, None)
|
|
248
|
+
# Fail any still-pending callbacks (e.g. the bundle was cancelled
|
|
249
|
+
# mid-LLM-call) so the bundle's awaiter doesn't hang on shutdown.
|
|
250
|
+
for req_id, fut in list(self._pending_callbacks.items()):
|
|
251
|
+
if not fut.done():
|
|
252
|
+
fut.set_result({
|
|
253
|
+
"type": SRV_NODE_CALLBACK_RESULT,
|
|
254
|
+
"call_id": call_id, "req_id": req_id,
|
|
255
|
+
"ok": False, "result": None,
|
|
256
|
+
"error": "node ended before callback resolved",
|
|
257
|
+
})
|
|
258
|
+
self._pending_callbacks.pop(req_id, None)
|
|
259
|
+
|
|
260
|
+
def _make_send_callback(self, ws, call_id: str):
|
|
261
|
+
"""Return the bundle-host callback sender bound to one node_exec.
|
|
262
|
+
|
|
263
|
+
Bundles only ever see this closure (never the raw WS). It allocates
|
|
264
|
+
a ``req_id``, queues a ``node_callback`` frame, and awaits the
|
|
265
|
+
server's matching ``node_callback_result``. Result frames where
|
|
266
|
+
``ok=False`` are translated into ``RuntimeError`` so the bundle can
|
|
267
|
+
catch them like any synchronous failure.
|
|
268
|
+
"""
|
|
269
|
+
loop = asyncio.get_event_loop()
|
|
270
|
+
|
|
271
|
+
async def _send(kind: str, payload: dict):
|
|
272
|
+
req_id = uuid.uuid4().hex
|
|
273
|
+
fut: "asyncio.Future[dict]" = loop.create_future()
|
|
274
|
+
self._pending_callbacks[req_id] = fut
|
|
275
|
+
try:
|
|
276
|
+
await self._send(ws, make_node_callback(
|
|
277
|
+
call_id=call_id, req_id=req_id, kind=kind, payload=payload,
|
|
278
|
+
))
|
|
279
|
+
except Exception:
|
|
280
|
+
self._pending_callbacks.pop(req_id, None)
|
|
281
|
+
raise
|
|
282
|
+
reply = await fut
|
|
283
|
+
if not reply.get("ok"):
|
|
284
|
+
raise RuntimeError(reply.get("error") or f"callback {kind!r} failed")
|
|
285
|
+
return reply.get("result")
|
|
286
|
+
|
|
287
|
+
return _send
|
|
288
|
+
|
|
289
|
+
async def _safe_send(self, ws, msg: dict) -> None:
|
|
290
|
+
try:
|
|
291
|
+
await self._send(ws, msg)
|
|
292
|
+
except Exception: # noqa: BLE001
|
|
293
|
+
pass
|
|
294
|
+
|
|
295
|
+
# ── error classification ──
|
|
296
|
+
|
|
297
|
+
@staticmethod
|
|
298
|
+
def _is_auth_rejection(e: Exception) -> bool:
|
|
299
|
+
resp = getattr(e, "response", None)
|
|
300
|
+
status = getattr(resp, "status_code", None) or getattr(e, "status_code", None)
|
|
301
|
+
return status in (401, 403)
|
|
302
|
+
|
|
303
|
+
@staticmethod
|
|
304
|
+
def _redact(url: str) -> str:
|
|
305
|
+
parts = urlsplit(url)
|
|
306
|
+
return urlunsplit((parts.scheme, parts.netloc, parts.path, "token=<redacted>", ""))
|
icefold_runner/runner.py
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
"""Run one leaf node-exec job on this machine.
|
|
2
|
+
|
|
3
|
+
Each ``node_exec`` frame is a single, already-sliced variant (the server did
|
|
4
|
+
all variant planning) and carries a **bundle hash** — the server has already
|
|
5
|
+
rendered the node into a self-contained ``.py``. The runner ships no node
|
|
6
|
+
implementations of its own and never compiles user source.
|
|
7
|
+
|
|
8
|
+
Per call:
|
|
9
|
+
|
|
10
|
+
1. fetch (cache-aware) ``/v1/bundles/<hash>`` into ``runner_work_dir/bundles/``
|
|
11
|
+
2. exec the bundle in a fresh module namespace — it self-declares
|
|
12
|
+
``__icefold_python_deps__`` / ``__icefold_binary_deps__`` plus the
|
|
13
|
+
``async def __icefold_run__(inputs, ctx_dict) -> Any`` entry point
|
|
14
|
+
3. pre-flight the declared deps (``shutil.which`` + ``import_module``);
|
|
15
|
+
surface ``MissingDependencyError`` so the client wraps a structured
|
|
16
|
+
``missing_dep`` reply instead of ``node_done``
|
|
17
|
+
4. download ``/upload/`` & ``/download/`` input refs to a staging dir and
|
|
18
|
+
rewrite them to local paths
|
|
19
|
+
5. await ``__icefold_run__(local_inputs, ctx_dict)``
|
|
20
|
+
6. upload product files back to the server and rewrite the output to the
|
|
21
|
+
server-canonical paths it hands back
|
|
22
|
+
|
|
23
|
+
Output that isn't a file (text, numbers, None) passes through untouched.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import asyncio
|
|
29
|
+
import hashlib
|
|
30
|
+
import importlib.util
|
|
31
|
+
import os
|
|
32
|
+
import shutil
|
|
33
|
+
import sys
|
|
34
|
+
from types import ModuleType
|
|
35
|
+
from typing import Any, Dict, List, Tuple
|
|
36
|
+
|
|
37
|
+
import httpx
|
|
38
|
+
|
|
39
|
+
# Bundled node SDK (importable via the CLI's _sdk sys.path entry). DATA_DIR
|
|
40
|
+
# reflects this runner's --work-dir because the CLI sets ICEFOLD_PROJECT_ROOT
|
|
41
|
+
# before these imports, so executors write products under our download dir.
|
|
42
|
+
from icefold.config import DATA_DIR
|
|
43
|
+
from icefold.exceptions import MissingDependencyError
|
|
44
|
+
from icefold.wire import OUTPUT_UPLOAD_PATH, binary_install_hint
|
|
45
|
+
|
|
46
|
+
_STAGED_DIR = os.path.join(DATA_DIR, "staged")
|
|
47
|
+
_BUNDLES_DIR = os.path.join(DATA_DIR, "bundles")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _is_server_ref(value: Any) -> bool:
|
|
51
|
+
return isinstance(value, str) and (
|
|
52
|
+
value.startswith("/upload/") or value.startswith("/download/")
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _ext_from_ref(ref: str) -> str:
|
|
57
|
+
ext = os.path.splitext(ref.split("?", 1)[0])[1].lower()
|
|
58
|
+
if not ext or len(ext) > 12 or not ext[1:].isalnum():
|
|
59
|
+
return ""
|
|
60
|
+
return ext
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class NodeRunner:
|
|
64
|
+
"""Stateless per-worker runner; one instance shared across jobs."""
|
|
65
|
+
|
|
66
|
+
def __init__(self, http_base: str, token: str, log) -> None:
|
|
67
|
+
self._http_base = http_base.rstrip("/")
|
|
68
|
+
self._token = token
|
|
69
|
+
self._log = log
|
|
70
|
+
# Cache of bundle modules keyed by bundle hash. A bundle is a
|
|
71
|
+
# self-contained .py; once exec'd we keep the module around for the
|
|
72
|
+
# lifetime of this runner process.
|
|
73
|
+
self._bundles: Dict[str, ModuleType] = {}
|
|
74
|
+
os.makedirs(_STAGED_DIR, exist_ok=True)
|
|
75
|
+
os.makedirs(_BUNDLES_DIR, exist_ok=True)
|
|
76
|
+
|
|
77
|
+
async def run(self, msg: dict, *, send_callback=None) -> Any:
|
|
78
|
+
"""Execute one ``node_exec`` frame against a server-rendered bundle.
|
|
79
|
+
|
|
80
|
+
``send_callback(kind, payload) -> awaitable`` (optional) is the
|
|
81
|
+
host-injected seam the bundle uses to reach back into the server for
|
|
82
|
+
capabilities the runner can't fulfil locally — ``progress`` (session
|
|
83
|
+
notifications) and ``llm.*`` (the server owns the provider keys and
|
|
84
|
+
accounting). The runner client wires this so the same callable
|
|
85
|
+
correlates replies via ``req_id``. ``None`` means no host is wired
|
|
86
|
+
(e.g. self-check), and the bundle's callback methods raise instead of
|
|
87
|
+
silently no-op'ing.
|
|
88
|
+
"""
|
|
89
|
+
bundle_hash = msg.get("bundle_hash") or ""
|
|
90
|
+
if not bundle_hash:
|
|
91
|
+
node_type = msg.get("node_type") or msg.get("node_id", "")
|
|
92
|
+
raise RuntimeError(
|
|
93
|
+
f"node_exec for {node_type!r} arrived without bundle_hash; "
|
|
94
|
+
"the server must render a bundle via codegen before dispatch"
|
|
95
|
+
)
|
|
96
|
+
timeout = max(1.0, msg.get("timeout_ms", 1800_000) / 1000.0)
|
|
97
|
+
|
|
98
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(600.0)) as http:
|
|
99
|
+
local_inputs = await self._download_inputs(http, msg.get("inputs") or {})
|
|
100
|
+
output = await asyncio.wait_for(
|
|
101
|
+
self._run_bundle(http, bundle_hash, msg, local_inputs, send_callback),
|
|
102
|
+
timeout=timeout,
|
|
103
|
+
)
|
|
104
|
+
return await self._upload_outputs(http, output, msg.get("session_id", ""))
|
|
105
|
+
|
|
106
|
+
# ── bundle path ──
|
|
107
|
+
|
|
108
|
+
async def _run_bundle(
|
|
109
|
+
self,
|
|
110
|
+
http: httpx.AsyncClient,
|
|
111
|
+
bundle_hash: str,
|
|
112
|
+
msg: dict,
|
|
113
|
+
local_inputs: Any,
|
|
114
|
+
send_callback,
|
|
115
|
+
) -> Any:
|
|
116
|
+
"""Fetch + pre-flight + exec a server-rendered self-contained bundle."""
|
|
117
|
+
mod = self._bundles.get(bundle_hash)
|
|
118
|
+
if mod is None:
|
|
119
|
+
bundle_path = await self._fetch_bundle(http, bundle_hash, msg.get("bundle_url") or "")
|
|
120
|
+
mod = self._import_bundle(bundle_hash, bundle_path)
|
|
121
|
+
self._bundles[bundle_hash] = mod
|
|
122
|
+
|
|
123
|
+
# Pre-flight declared deps (binary first, then python). Raise a typed
|
|
124
|
+
# exception so the client wraps a ``missing_dep`` reply instead of
|
|
125
|
+
# ``node_done``.
|
|
126
|
+
self._preflight_deps(
|
|
127
|
+
tuple(getattr(mod, "__icefold_binary_deps__", ()) or ()),
|
|
128
|
+
tuple(getattr(mod, "__icefold_python_deps__", ()) or ()),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
ctx_dict = {
|
|
132
|
+
"node_id": msg.get("node_id", msg.get("node_type", "")),
|
|
133
|
+
"node_config": msg.get("node_config") or {},
|
|
134
|
+
"user_id": msg.get("user_id", ""),
|
|
135
|
+
"session_id": msg.get("session_id") or None,
|
|
136
|
+
"space_name": msg.get("space_name") or None,
|
|
137
|
+
"variant": msg.get("variant") or {},
|
|
138
|
+
"raw_inputs": local_inputs if isinstance(local_inputs, dict) else {},
|
|
139
|
+
"provider": msg.get("provider") or {},
|
|
140
|
+
"model": msg.get("model", ""),
|
|
141
|
+
}
|
|
142
|
+
# Bundle-host callback seam: bundles call this via the embedded
|
|
143
|
+
# NodeContext's ``progress`` / ``llm.text`` methods. The runner
|
|
144
|
+
# client wires ``send_callback(kind, payload)`` so it correlates
|
|
145
|
+
# the reply via ``req_id`` and resolves the bundle's awaiter.
|
|
146
|
+
if send_callback is not None:
|
|
147
|
+
ctx_dict["_send_callback"] = send_callback
|
|
148
|
+
|
|
149
|
+
entry = getattr(mod, "__icefold_run__", None)
|
|
150
|
+
if entry is None:
|
|
151
|
+
raise RuntimeError(
|
|
152
|
+
f"bundle {bundle_hash[:8]} is missing __icefold_run__ entry point"
|
|
153
|
+
)
|
|
154
|
+
return await entry(local_inputs if isinstance(local_inputs, dict) else {}, ctx_dict)
|
|
155
|
+
|
|
156
|
+
async def _fetch_bundle(
|
|
157
|
+
self, http: httpx.AsyncClient, bundle_hash: str, bundle_url: str,
|
|
158
|
+
) -> str:
|
|
159
|
+
"""Cache-aware bundle fetch. Returns the on-disk path."""
|
|
160
|
+
path = os.path.join(_BUNDLES_DIR, f"{bundle_hash}.py")
|
|
161
|
+
if os.path.isfile(path):
|
|
162
|
+
return path
|
|
163
|
+
url = bundle_url or f"{self._http_base}/v1/bundles/{bundle_hash}"
|
|
164
|
+
self._log("info", f"pulling bundle {bundle_hash[:8]}")
|
|
165
|
+
headers = {"X-Worker-Token": self._token} if self._token else {}
|
|
166
|
+
async with http.stream("GET", url, headers=headers) as resp:
|
|
167
|
+
resp.raise_for_status()
|
|
168
|
+
tmp = path + ".part"
|
|
169
|
+
with open(tmp, "wb") as fh:
|
|
170
|
+
async for chunk in resp.aiter_bytes(64 * 1024):
|
|
171
|
+
fh.write(chunk)
|
|
172
|
+
os.replace(tmp, path)
|
|
173
|
+
# Sanity: re-hash + compare so a corrupted download can't silently exec.
|
|
174
|
+
with open(path, "rb") as fh:
|
|
175
|
+
got = hashlib.sha256(fh.read()).hexdigest()
|
|
176
|
+
if got != bundle_hash:
|
|
177
|
+
os.unlink(path)
|
|
178
|
+
raise RuntimeError(
|
|
179
|
+
f"bundle hash mismatch: expected {bundle_hash[:8]}, got {got[:8]}"
|
|
180
|
+
)
|
|
181
|
+
return path
|
|
182
|
+
|
|
183
|
+
@staticmethod
|
|
184
|
+
def _import_bundle(bundle_hash: str, path: str) -> ModuleType:
|
|
185
|
+
"""exec the bundle in a fresh module namespace. No sys.modules pollution."""
|
|
186
|
+
mod_name = f"_icefold_bundle_{bundle_hash[:16]}"
|
|
187
|
+
spec = importlib.util.spec_from_file_location(mod_name, path)
|
|
188
|
+
if spec is None or spec.loader is None:
|
|
189
|
+
raise RuntimeError(f"failed to create import spec for bundle {bundle_hash[:8]}")
|
|
190
|
+
mod = importlib.util.module_from_spec(spec)
|
|
191
|
+
sys.modules[mod_name] = mod
|
|
192
|
+
try:
|
|
193
|
+
spec.loader.exec_module(mod)
|
|
194
|
+
except Exception:
|
|
195
|
+
sys.modules.pop(mod_name, None)
|
|
196
|
+
raise
|
|
197
|
+
return mod
|
|
198
|
+
|
|
199
|
+
def _preflight_deps(
|
|
200
|
+
self, binary_deps: Tuple[str, ...], python_deps: Tuple[str, ...],
|
|
201
|
+
) -> None:
|
|
202
|
+
"""Surface a structured ``MissingDependencyError`` when any dep is absent."""
|
|
203
|
+
missing_bin: List[str] = [b for b in binary_deps if b and shutil.which(b) is None]
|
|
204
|
+
missing_py: List[str] = []
|
|
205
|
+
for pkg in python_deps:
|
|
206
|
+
if not pkg:
|
|
207
|
+
continue
|
|
208
|
+
try:
|
|
209
|
+
__import__(pkg.split(".")[0])
|
|
210
|
+
except ImportError:
|
|
211
|
+
missing_py.append(pkg)
|
|
212
|
+
if not (missing_bin or missing_py):
|
|
213
|
+
return
|
|
214
|
+
# Build a platform-aware install hint covering both categories.
|
|
215
|
+
plat = sys.platform if sys.platform in ("linux", "darwin", "win32") else "linux"
|
|
216
|
+
lines: List[str] = []
|
|
217
|
+
for b in missing_bin:
|
|
218
|
+
lines.append(f" · {b} (binary) → {binary_install_hint(b, plat)}")
|
|
219
|
+
for p in missing_py:
|
|
220
|
+
lines.append(f" · {p} (python) → pip install {p}")
|
|
221
|
+
hint = "Install the following on this runner host:\n" + "\n".join(lines)
|
|
222
|
+
raise MissingDependencyError(
|
|
223
|
+
missing_binaries=tuple(missing_bin),
|
|
224
|
+
missing_python=tuple(missing_py),
|
|
225
|
+
install_hint=hint,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# ── input staging (download) ──
|
|
229
|
+
|
|
230
|
+
async def _download_inputs(self, http: httpx.AsyncClient, inputs: Any) -> Any:
|
|
231
|
+
if isinstance(inputs, str):
|
|
232
|
+
if _is_server_ref(inputs):
|
|
233
|
+
return await self._download_one(http, inputs)
|
|
234
|
+
return inputs
|
|
235
|
+
if isinstance(inputs, dict):
|
|
236
|
+
return {k: await self._download_inputs(http, v) for k, v in inputs.items()}
|
|
237
|
+
if isinstance(inputs, (list, tuple)):
|
|
238
|
+
return [await self._download_inputs(http, v) for v in inputs]
|
|
239
|
+
return inputs
|
|
240
|
+
|
|
241
|
+
async def _download_one(self, http: httpx.AsyncClient, ref: str) -> str:
|
|
242
|
+
url = self._http_base + ref
|
|
243
|
+
dest = os.path.join(_STAGED_DIR, f"{os.urandom(8).hex()}{_ext_from_ref(ref)}")
|
|
244
|
+
self._log("info", f"pulling input {ref}")
|
|
245
|
+
async with http.stream("GET", url) as resp:
|
|
246
|
+
resp.raise_for_status()
|
|
247
|
+
with open(dest, "wb") as fh:
|
|
248
|
+
async for chunk in resp.aiter_bytes(1024 * 1024):
|
|
249
|
+
fh.write(chunk)
|
|
250
|
+
return dest
|
|
251
|
+
|
|
252
|
+
# ── output staging (upload) ──
|
|
253
|
+
|
|
254
|
+
async def _upload_outputs(self, http: httpx.AsyncClient, output: Any, session_id: str) -> Any:
|
|
255
|
+
if isinstance(output, str):
|
|
256
|
+
if output and os.path.isfile(output) and os.path.abspath(output).startswith(
|
|
257
|
+
os.path.abspath(DATA_DIR)
|
|
258
|
+
):
|
|
259
|
+
return await self._upload_one(http, output, session_id)
|
|
260
|
+
return output
|
|
261
|
+
if isinstance(output, dict):
|
|
262
|
+
return {k: await self._upload_outputs(http, v, session_id) for k, v in output.items()}
|
|
263
|
+
if isinstance(output, (list, tuple)):
|
|
264
|
+
return [await self._upload_outputs(http, v, session_id) for v in output]
|
|
265
|
+
return output
|
|
266
|
+
|
|
267
|
+
async def _upload_one(self, http: httpx.AsyncClient, path: str, session_id: str) -> str:
|
|
268
|
+
url = self._http_base + OUTPUT_UPLOAD_PATH
|
|
269
|
+
self._log("info", f"pushing product {os.path.basename(path)}")
|
|
270
|
+
headers = {"X-Worker-Token": self._token} if self._token else {}
|
|
271
|
+
with open(path, "rb") as fh:
|
|
272
|
+
resp = await http.post(
|
|
273
|
+
url,
|
|
274
|
+
headers=headers,
|
|
275
|
+
data={"session_id": session_id or ""},
|
|
276
|
+
files={"file": (os.path.basename(path), fh, "application/octet-stream")},
|
|
277
|
+
)
|
|
278
|
+
resp.raise_for_status()
|
|
279
|
+
server_path = resp.json().get("path")
|
|
280
|
+
if not server_path:
|
|
281
|
+
raise RuntimeError("server did not return a stored path for output")
|
|
282
|
+
return server_path
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: icefold-runner
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Self-hosted execution runner for IceFold nodes (reverse-connects to an IceFold server, like a self-hosted CI runner).
|
|
5
|
+
Author: IceFold
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/IceFold/icefold-runner
|
|
8
|
+
Project-URL: Repository, https://github.com/IceFold/icefold-runner
|
|
9
|
+
Keywords: icefold,runner,self-hosted,node-execution
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: icefold-sdk>=0.1.0
|
|
19
|
+
Requires-Dist: websockets>=12
|
|
20
|
+
Requires-Dist: httpx>=0.27
|
|
21
|
+
Dynamic: license-file
|
|
22
|
+
|
|
23
|
+
# icefold-runner
|
|
24
|
+
|
|
25
|
+
A self-hosted execution runner for [IceFold](https://icefold.com) nodes — like
|
|
26
|
+
a GitHub self-hosted CI runner. You start it on your own machine; it
|
|
27
|
+
**reverse-connects** to an IceFold server (so it works behind NAT with no
|
|
28
|
+
inbound ports, no public IP, no tunnel), receives node-execution jobs, runs them
|
|
29
|
+
locally, and streams results back.
|
|
30
|
+
|
|
31
|
+
It is the place where **your uploaded node code runs** — on your hardware, with
|
|
32
|
+
full `subprocess` / `ffmpeg` / GPU / any-dependency access — instead of inside
|
|
33
|
+
the server's restricted sandbox.
|
|
34
|
+
|
|
35
|
+
## How it works
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
your machine (private, behind NAT) IceFold server (public)
|
|
39
|
+
┌──────────────────────────────────┐ reverse WSS ┌───────────────────────────┐
|
|
40
|
+
│ icefold-runner │ ───────────► │ /v1/ws/worker?token │
|
|
41
|
+
│ • dials out, token auth │ node_exec ◄─│ routes node runs (per user) │
|
|
42
|
+
│ • reconnect + keepalive │ node_done ─►│ │
|
|
43
|
+
│ • bundle runner: │ │ │
|
|
44
|
+
│ GET /v1/bundles/<hash> │ HTTP pull │ /upload /download │
|
|
45
|
+
│ import bundle + preflight deps │ ◄──────────► │ /v1/workers/output │
|
|
46
|
+
│ await __icefold_run__ │ │ │
|
|
47
|
+
└──────────────────────────────────┘ HTTP push └───────────────────────────┘
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
- **Control plane** rides the reverse WebSocket (`node_exec` / `cancel` →
|
|
51
|
+
`node_status` / `node_done` / `missing_dep`), JSON frames XOR-obfuscated with
|
|
52
|
+
the token (TLS still does the real protection). Each `node_exec` frame only
|
|
53
|
+
carries a `bundle_hash` and a single already-sliced variant — no source.
|
|
54
|
+
- **Bulk media + bundles** ride plain HTTP: the runner GETs inputs from the
|
|
55
|
+
server's `/upload` & `/download` and node bundles from `/v1/bundles/<hash>`
|
|
56
|
+
(sha256-addressed, cached locally as `runner_work_dir/bundles/<hash>.py`,
|
|
57
|
+
re-hashed on every download), runs the bundle, POSTs products back to
|
|
58
|
+
`/v1/workers/output` (which returns server-canonical paths).
|
|
59
|
+
- **The runner ships no node implementations and never compiles user source.**
|
|
60
|
+
The IceFold server renders every node (your custom ones *and* the platform's
|
|
61
|
+
built-in ones) into a self-contained `.py` bundle, with `python_deps` /
|
|
62
|
+
`binary_deps` declared in the bundle header. The runner imports the bundle,
|
|
63
|
+
pre-flights the deps (sending back a structured `missing_dep` reply with
|
|
64
|
+
platform-aware install hints if anything is absent), and awaits
|
|
65
|
+
`__icefold_run__(inputs, ctx_dict)`. So when the server adds or upgrades
|
|
66
|
+
nodes, **you never have to upgrade the runner.**
|
|
67
|
+
- Variant planning / dimension & provider resolution all stay on the server;
|
|
68
|
+
each job is a single already-sliced leaf call.
|
|
69
|
+
|
|
70
|
+
## Install
|
|
71
|
+
|
|
72
|
+
Requires **Python ≥ 3.11**, **ffmpeg/ffprobe** on `PATH` (for media nodes), and
|
|
73
|
+
whatever third-party packages your custom nodes import.
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install icefold-runner # pulls in icefold-sdk
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
From source:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
git clone <this-repo> icefold-runner
|
|
83
|
+
cd icefold-runner
|
|
84
|
+
python -m venv .venv && . .venv/bin/activate
|
|
85
|
+
pip install -e .
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Run
|
|
89
|
+
|
|
90
|
+
Generate a token in the IceFold app (**Nodes ▸ Connect a runner**), then:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
icefold-runner --token <your-token>
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
That's it — the token (GitHub-CI style) encodes + signs your IceFold user id, so
|
|
97
|
+
there's no server URL or user id to pass. The server is built in.
|
|
98
|
+
|
|
99
|
+
Every flag also reads an env var (see [`.env.example`](.env.example)):
|
|
100
|
+
|
|
101
|
+
| flag | env | meaning |
|
|
102
|
+
|---|---|---|
|
|
103
|
+
| `--token` | `ICEFOLD_RUNNER_TOKEN` | runner token from the IceFold app |
|
|
104
|
+
| `--runner-id` | `ICEFOLD_RUNNER_ID` | stable id (default: hostname) |
|
|
105
|
+
| `--work-dir` | `ICEFOLD_RUNNER_DIR` | scratch for staged inputs + products |
|
|
106
|
+
|
|
107
|
+
The runner honors standard proxy env vars (`HTTPS_PROXY`, …) for reaching the
|
|
108
|
+
server. It reconnects automatically with backoff; an auth rejection is fatal.
|
|
109
|
+
|
|
110
|
+
> Self-hosting / dev: point the runner at a different server with the
|
|
111
|
+
> `ICEFOLD_RUNNER_SERVER` env var (e.g. `ws://127.0.0.1:7000`).
|
|
112
|
+
|
|
113
|
+
### Run as a service (systemd)
|
|
114
|
+
|
|
115
|
+
```ini
|
|
116
|
+
# /etc/systemd/system/icefold-runner.service
|
|
117
|
+
[Unit]
|
|
118
|
+
Description=IceFold runner
|
|
119
|
+
After=network-online.target
|
|
120
|
+
|
|
121
|
+
[Service]
|
|
122
|
+
EnvironmentFile=/etc/icefold-runner.env
|
|
123
|
+
ExecStart=/opt/icefold-runner/.venv/bin/icefold-runner
|
|
124
|
+
Restart=always
|
|
125
|
+
RestartSec=5
|
|
126
|
+
|
|
127
|
+
[Install]
|
|
128
|
+
WantedBy=multi-user.target
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Layout
|
|
132
|
+
|
|
133
|
+
```
|
|
134
|
+
icefold_runner/ the runner agent (connection, file staging, bundle exec)
|
|
135
|
+
client.py reverse-WS client: dial / auth / reconnect / keepalive
|
|
136
|
+
runner.py fetch /v1/bundles/<hash>, preflight deps, await __icefold_run__
|
|
137
|
+
__main__.py CLI entrypoint (icefold-runner)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
The runner imports the bundle on demand; the bundle is **self-contained** and
|
|
141
|
+
already inlines whatever it needs (the author's function body, the
|
|
142
|
+
`Inputs` / `Output` dataclasses, and a minimal `NodeContext` shim). The only
|
|
143
|
+
runtime dependency on `icefold-sdk` is the wire protocol + a small helper kit
|
|
144
|
+
(`get_file_id` / `run_blocking` / `write_text`), used by the runner agent
|
|
145
|
+
itself, not by node code.
|
|
146
|
+
|
|
147
|
+
## Security model
|
|
148
|
+
|
|
149
|
+
- Node code runs **unsandboxed** here — it's your machine, your risk. That's the
|
|
150
|
+
point: code the server sandbox forbids (subprocess/ffmpeg/native deps) runs on
|
|
151
|
+
the runner instead. The runner downloads each bundle from the server and
|
|
152
|
+
executes it; it verifies the bundle's sha256 matches the requested hash, but
|
|
153
|
+
the bundle itself is whatever the server you authenticated to sends. Only
|
|
154
|
+
point a runner at a server you trust.
|
|
155
|
+
- The runner only talks to the one server you point it at, authenticated by the
|
|
156
|
+
shared token; it pulls input files and pushes products over HTTP to that host.
|
|
157
|
+
|
|
158
|
+
## Self-check
|
|
159
|
+
|
|
160
|
+
A no-network sanity check that `icefold` is importable and the bundle execution
|
|
161
|
+
path (fetch + import + run `__icefold_run__`) works against a locally-rendered
|
|
162
|
+
bundle:
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
python selfcheck.py
|
|
166
|
+
```
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
icefold_runner/__init__.py,sha256=E5P4VMUKq7Kva5iRBnTsvZkM_nCsWUfCUB8dmmicyys,1035
|
|
2
|
+
icefold_runner/__main__.py,sha256=BEjQKyeniGOTaai0r5QuiKDV6CVPwCVu7_zkxVd10Jw,2881
|
|
3
|
+
icefold_runner/client.py,sha256=mEnjkzMRlVVcuO8KG0_3TjhNAFhu4paToGt_wcIMlE0,11842
|
|
4
|
+
icefold_runner/runner.py,sha256=BWVc7M7RnyO2eqmA0M6Su_0eWUc-9RTiBSBR0zU0Xoc,12280
|
|
5
|
+
icefold_runner-0.1.0.dist-info/licenses/LICENSE,sha256=GkZpO-PWJeVUDysnFXlj-lDZPx0qQWrtOe1Cuzk4phA,1064
|
|
6
|
+
icefold_runner-0.1.0.dist-info/METADATA,sha256=alMtzr2kXRm2dfGLh-Y00M-hlCUXMAYf8HekpcJRjpI,7171
|
|
7
|
+
icefold_runner-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
8
|
+
icefold_runner-0.1.0.dist-info/entry_points.txt,sha256=U63xdZSCxgXJhs8JT-F1FHHr_aH_Ov97yUrbJB5lEs4,64
|
|
9
|
+
icefold_runner-0.1.0.dist-info/top_level.txt,sha256=o5xHC1eeRUfy6gGuAUA-tTXazxLy0zpDR9C8IKgaf68,15
|
|
10
|
+
icefold_runner-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 IceFold
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
icefold_runner
|