nvdc 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nvdc/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """nvdc: bring your GPU onto the network as a verifiable, OpenAI-compatible inference node."""
2
+
3
+ __version__ = "0.1.0"
nvdc/agent.py ADDED
@@ -0,0 +1,329 @@
1
+ """Node agent: the thing `nvdc serve` runs.
2
+
3
+ Opens ONE outbound WebSocket to the coordinator (so the node never needs an
4
+ inbound public port and its IP stays private), registers its GPU + attestation
5
+ profile, then services inference requests over that tunnel.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import hashlib
12
+ import json as _json
13
+ import logging
14
+ from typing import Any, Dict, Optional
15
+
16
+ import websockets
17
+
18
+ from . import __version__, keys, protocol
19
+ from .attestation import attest
20
+ from .gpu import detect_gpu, detect_gpus, detect_interconnect
21
+ from .hardware import detect_hardware, machine_id as hw_machine_id
22
+ from .inference import Backend, make_backend
23
+ from .keys import Identity
24
+
25
+ log = logging.getLogger("nvdc.agent")
26
+
27
+
28
+ def _extract_content(sse_line: str) -> str:
29
+ """Pull the delta content out of one OpenAI SSE 'data: {...}' line."""
30
+ line = sse_line.strip()
31
+ if not line.startswith("data:"):
32
+ return ""
33
+ try:
34
+ obj = _json.loads(line[len("data:"):].strip())
35
+ return obj["choices"][0]["delta"].get("content") or ""
36
+ except Exception:
37
+ return ""
38
+
39
+
40
+ class NodeAgent:
41
+ def __init__(
42
+ self,
43
+ coordinator_url: str,
44
+ name: str,
45
+ backend: Backend,
46
+ model: str,
47
+ token: str = "",
48
+ require_attestation: bool = False,
49
+ status_cb=None,
50
+ drain_timeout: float = 120.0,
51
+ price_per_mtok: float = 0.0,
52
+ account_id: str = "",
53
+ identity: Optional[Identity] = None,
54
+ owner_account: str = "",
55
+ machine_id: str = "",
56
+ cluster: str = "",
57
+ ):
58
+ # A node commits to exactly ONE hot-loaded model at a time — the
59
+ # "mining algorithm" it has chosen. It advertises and serves only this
60
+ # model; requests for anything else are rejected at the node boundary.
61
+ self.coordinator_url = coordinator_url
62
+ self.name = name
63
+ self.backend = backend
64
+ self.model = model
65
+ self.price_per_mtok = price_per_mtok
66
+ self.identity = identity or Identity()
67
+ self.account_id = account_id or self.identity.account_id
68
+ # Earnings credit the owner account; a single machine owns itself.
69
+ self.owner_account = owner_account or self.account_id
70
+ self.machine_id = machine_id or hw_machine_id()
71
+ self.cluster = cluster
72
+ self.token = token
73
+ self.require_attestation = require_attestation
74
+ self.status_cb = status_cb
75
+ self.drain_timeout = drain_timeout
76
+ self._ws = None
77
+ self._send_lock = asyncio.Lock()
78
+ # graceful drain bookkeeping
79
+ self._stopped = False
80
+ self._draining = False
81
+ self._inflight_ids = set() # request ids currently being served
82
+ self._inflight_zero = asyncio.Event()
83
+ self._inflight_zero.set() # starts idle
84
+
85
+ def _emit(self, status: str, **info):
86
+ if self.status_cb:
87
+ try:
88
+ self.status_cb(status, info)
89
+ except Exception:
90
+ log.debug("status_cb error", exc_info=True)
91
+
92
+ async def run_forever(self):
93
+ backoff = 1
94
+ while not self._stopped:
95
+ try:
96
+ await self._connect_and_serve()
97
+ backoff = 1
98
+ except (OSError, websockets.WebSocketException) as e:
99
+ if self._stopped:
100
+ break
101
+ log.warning("connection lost (%s); reconnecting in %ss", e, backoff)
102
+ self._emit("connecting", detail=str(e))
103
+ await asyncio.sleep(backoff)
104
+ backoff = min(backoff * 2, 30)
105
+
106
+ async def drain(self):
107
+ """Gracefully leave: tell the coordinator to stop routing new work,
108
+ let in-flight requests finish, then disconnect. In-flight responses are
109
+ never interrupted, so the node's delivery/completion score is preserved.
110
+ """
111
+ if self._draining:
112
+ return
113
+ self._draining = True
114
+ log.info("draining: %d request(s) in flight", len(self._inflight_ids))
115
+ self._emit("draining", inflight=len(self._inflight_ids))
116
+ try:
117
+ await self._send(protocol.MSG_DRAIN) # coordinator stops routing now
118
+ except Exception:
119
+ pass
120
+ try:
121
+ # block until all in-flight complete, but not forever
122
+ await asyncio.wait_for(self._inflight_zero.wait(), timeout=self.drain_timeout)
123
+ log.info("drain complete; disconnecting")
124
+ except asyncio.TimeoutError:
125
+ stuck = list(self._inflight_ids)
126
+ log.warning(
127
+ "drain timeout after %ss; force-failing %d stuck request(s): %s",
128
+ self.drain_timeout, len(stuck), stuck,
129
+ )
130
+ # Fail only the stuck requests as node_failed; completed ones are
131
+ # already done and unaffected.
132
+ for rid in stuck:
133
+ try:
134
+ await self._send(
135
+ protocol.MSG_ERROR, id=rid,
136
+ error=f"node_failed: drain timeout after {self.drain_timeout}s",
137
+ )
138
+ except Exception:
139
+ pass
140
+ self._stopped = True
141
+ if self._ws is not None:
142
+ try:
143
+ await self._ws.close()
144
+ except Exception:
145
+ pass
146
+
147
+ async def _connect_and_serve(self):
148
+ headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
149
+ log.info("connecting to coordinator %s", self.coordinator_url)
150
+ async with websockets.connect(
151
+ self.coordinator_url,
152
+ additional_headers=headers,
153
+ max_size=32 * 1024 * 1024,
154
+ ping_interval=20,
155
+ ) as ws:
156
+ self._ws = ws
157
+ await self._register()
158
+ try:
159
+ async for raw in ws:
160
+ msg = protocol.decode(raw)
161
+ await self._dispatch(msg)
162
+ finally:
163
+ self._ws = None
164
+ self._emit("offline")
165
+
166
+ async def _register(self):
167
+ gpus = detect_gpus()
168
+ gpu = gpus[0] if gpus else detect_gpu()
169
+ interconnect = detect_interconnect() if len(gpus) > 1 else ""
170
+ hw = detect_hardware()
171
+ att = attest(require=self.require_attestation)
172
+ if self.require_attestation and not att.verified:
173
+ raise RuntimeError(
174
+ f"attestation required but not verified: {att.reason or att.mode}"
175
+ )
176
+ profile = protocol.NodeProfile(
177
+ name=self.name,
178
+ models=[self.model],
179
+ gpu=gpu,
180
+ attestation=att,
181
+ gpus=gpus,
182
+ gpu_count=len(gpus),
183
+ interconnect=interconnect,
184
+ ram_mb=hw.ram_mb,
185
+ memory_budget_mb=hw.memory_budget_mb,
186
+ accelerator=hw.accelerator.type,
187
+ price_per_mtok=self.price_per_mtok,
188
+ account_id=self.account_id,
189
+ owner_account=self.owner_account,
190
+ machine_id=self.machine_id,
191
+ cluster=self.cluster,
192
+ agent_version=__version__,
193
+ )
194
+ await self._send(protocol.MSG_REGISTER, profile=protocol.node_profile_to_dict(profile))
195
+ log.info(
196
+ "registered '%s' | gpu=%s | serving=%s | attestation=%s(verified=%s)",
197
+ self.name, gpu.name, self.model, att.mode, att.verified,
198
+ )
199
+
200
+ async def _dispatch(self, msg: Dict[str, Any]):
201
+ t = msg.get("t")
202
+ if t == protocol.MSG_INFER:
203
+ asyncio.create_task(self._handle_infer(msg))
204
+ elif t == protocol.MSG_PING:
205
+ await self._send(protocol.MSG_PONG)
206
+ elif t == protocol.MSG_REGISTERED:
207
+ log.info("coordinator assigned node_id=%s", msg.get("node_id"))
208
+ self._emit("live", node_id=msg.get("node_id"))
209
+ else:
210
+ log.debug("ignoring message type %s", t)
211
+
212
+ async def _handle_infer(self, msg: Dict[str, Any]):
213
+ req_id = msg.get("id")
214
+ body = dict(msg.get("body", {}))
215
+ requested = body.get("model", "")
216
+
217
+ # Once draining, refuse new work so it can be routed elsewhere. This
218
+ # closes the race between the operator leaving and the coordinator
219
+ # marking us un-routable; in-flight requests (already past this point)
220
+ # are unaffected and run to completion.
221
+ if self._draining:
222
+ await self._send(
223
+ protocol.MSG_ERROR, id=req_id,
224
+ error="node is draining; request not accepted",
225
+ )
226
+ return
227
+
228
+ # Enforce the single committed model at the node boundary. A node only
229
+ # serves the model it has hot-loaded; anything else is refused so it
230
+ # can never be coerced into running a cold/different model.
231
+ if requested and requested != self.model:
232
+ await self._send(
233
+ protocol.MSG_ERROR, id=req_id,
234
+ error=f"this node only serves '{self.model}', not '{requested}'",
235
+ )
236
+ return
237
+ body["model"] = self.model # pin, in case the request omitted it
238
+ prompt_commit = hashlib.sha256(
239
+ _json.dumps(body.get("messages", []), sort_keys=True).encode()).hexdigest()
240
+
241
+ self._inflight_ids.add(req_id)
242
+ self._inflight_zero.clear()
243
+ stream = bool(body.get("stream", False))
244
+ try:
245
+ if stream:
246
+ acc, tokens = [], 0
247
+ async for line in self.backend.chat_stream(body):
248
+ await self._send(protocol.MSG_CHUNK, id=req_id, data=line)
249
+ if '"content"' in line:
250
+ tokens += 1
251
+ c = _extract_content(line)
252
+ if c:
253
+ acc.append(c)
254
+ response_commit = hashlib.sha256("".join(acc).encode()).hexdigest()
255
+ sig = self._sign_work(req_id, prompt_commit, tokens, response_commit, "complete")
256
+ await self._send(protocol.MSG_END, id=req_id, tokens=tokens,
257
+ response_commit=response_commit, sig=sig)
258
+ else:
259
+ result = await self.backend.chat_once(body)
260
+ content = ""
261
+ try:
262
+ content = result["choices"][0]["message"].get("content") or ""
263
+ except Exception:
264
+ pass
265
+ tokens = (result.get("usage", {}) or {}).get("completion_tokens", 0)
266
+ response_commit = hashlib.sha256(content.encode()).hexdigest()
267
+ sig = self._sign_work(req_id, prompt_commit, tokens, response_commit, "complete")
268
+ await self._send(protocol.MSG_RESULT, id=req_id, body=result,
269
+ tokens=tokens, response_commit=response_commit, sig=sig)
270
+ except Exception as e:
271
+ log.exception("inference failed for %s", req_id)
272
+ await self._send(protocol.MSG_ERROR, id=req_id, error=str(e))
273
+ finally:
274
+ self._inflight_ids.discard(req_id)
275
+ if not self._inflight_ids:
276
+ self._inflight_zero.set()
277
+
278
+ def _sign_work(self, req_id, prompt_commit, tokens, response_commit, delivery) -> str:
279
+ payload = keys.work_payload(req_id, self.model, prompt_commit, tokens,
280
+ response_commit, delivery)
281
+ return self.identity.sign(payload)
282
+
283
+ async def _send(self, msg_type: str, **fields: Any):
284
+ if self._ws is None:
285
+ return
286
+ frame = protocol.encode(msg_type, **fields)
287
+ async with self._send_lock:
288
+ await self._ws.send(frame)
289
+
290
+
291
+ async def serve(
292
+ coordinator_url: str,
293
+ name: str,
294
+ backend_kind: str,
295
+ model: str,
296
+ ollama_url: str,
297
+ token: str = "",
298
+ require_attestation: bool = False,
299
+ warm: bool = True,
300
+ drain_timeout: float = 120.0,
301
+ owner_account: str = "",
302
+ cluster: str = "",
303
+ ):
304
+ if not model:
305
+ raise ValueError("a single --model must be specified; a node serves exactly one model")
306
+ backend = make_backend(backend_kind, model=model, ollama_url=ollama_url)
307
+
308
+ # Hot-load the committed model before advertising it to the network, so the
309
+ # node is never live with a cold model (low TTFT guarantee).
310
+ if warm:
311
+ log.info("hot-loading '%s' into memory ...", model)
312
+ try:
313
+ await backend.warm(model)
314
+ log.info("'%s' is hot", model)
315
+ except Exception as e:
316
+ log.warning("warm-up failed for '%s' (%s); serving anyway", model, e)
317
+
318
+ agent = NodeAgent(
319
+ coordinator_url=coordinator_url,
320
+ name=name,
321
+ backend=backend,
322
+ model=model,
323
+ token=token,
324
+ require_attestation=require_attestation,
325
+ drain_timeout=drain_timeout,
326
+ owner_account=owner_account,
327
+ cluster=cluster,
328
+ )
329
+ await agent.run_forever()
nvdc/app.py ADDED
@@ -0,0 +1,306 @@
1
+ """Local visual client: a small web app the node operator opens in a browser.
2
+
3
+ `nvdc app` starts this server and opens the page. It shows hardware + inference
4
+ mechanism, a catalog of popular models with fit indicators, lets the operator
5
+ load one into memory (hot), and only then enables "Go Live" on the network.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+ import json as _json
12
+ import logging
13
+ import secrets
14
+ import time
15
+ from pathlib import Path
16
+
17
+ import httpx
18
+ from fastapi import FastAPI, Request, Response
19
+ from fastapi.middleware.cors import CORSMiddleware
20
+ from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
21
+
22
+ from . import keys
23
+ from .runtime import NodeRuntime
24
+
25
+ log = logging.getLogger("nvdc.app")
26
+
27
+ WEB_DIR = Path(__file__).parent / "web"
28
+
29
+
30
+ def create_app(runtime: NodeRuntime) -> FastAPI:
31
+ app = FastAPI(title="nvdc node client")
32
+ app.state.runtime = runtime
33
+
34
+ # Let the hosted Vercel site detect and read THIS locally-running client.
35
+ app.add_middleware(
36
+ CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"],
37
+ )
38
+
39
+ @app.middleware("http")
40
+ async def private_network_access(request: Request, call_next):
41
+ # A public (https) page fetching localhost triggers Chrome's Private
42
+ # Network Access preflight, which needs this header to be allowed.
43
+ if request.method == "OPTIONS" and request.headers.get(
44
+ "access-control-request-private-network") == "true":
45
+ resp = Response(status_code=204)
46
+ resp.headers["Access-Control-Allow-Origin"] = request.headers.get("origin", "*")
47
+ resp.headers["Access-Control-Allow-Methods"] = "*"
48
+ resp.headers["Access-Control-Allow-Headers"] = "*"
49
+ resp.headers["Access-Control-Allow-Private-Network"] = "true"
50
+ return resp
51
+ resp = await call_next(request)
52
+ resp.headers["Access-Control-Allow-Private-Network"] = "true"
53
+ return resp
54
+
55
+ @app.get("/", response_class=HTMLResponse)
56
+ async def index():
57
+ return (WEB_DIR / "index.html").read_text(encoding="utf-8")
58
+
59
+ @app.get("/api/state")
60
+ async def state():
61
+ await runtime.refresh_installed()
62
+ return runtime.snapshot()
63
+
64
+ @app.post("/api/load")
65
+ async def load(request: Request):
66
+ body = await request.json()
67
+ model_id = body.get("model")
68
+ if not model_id:
69
+ return JSONResponse({"ok": False, "error": "model required"}, status_code=400)
70
+ ok, msg = runtime.start_load(model_id)
71
+ return {"ok": ok, "message": msg}
72
+
73
+ @app.post("/api/network/live")
74
+ async def live(request: Request):
75
+ body = await request.json()
76
+ ok, msg = runtime.go_live(
77
+ coordinator=body.get("coordinator", ""),
78
+ token=body.get("token", ""),
79
+ )
80
+ return {"ok": ok, "message": msg}
81
+
82
+ @app.post("/api/network/offline")
83
+ async def offline():
84
+ ok, msg = runtime.go_offline()
85
+ return {"ok": ok, "message": msg}
86
+
87
+ # ---- consumer proxies to the coordinator --------------------------------
88
+ # Keep the UI same-origin (no CORS) by proxying the coordinator's public API
89
+ # through this local app. These power the Home / Chat / Network tabs.
90
+ @app.get("/api/coordinator")
91
+ async def coordinator_info():
92
+ base = runtime.coordinator_http
93
+ return {"http": base, "openai_base": (base + "/v1") if base else ""}
94
+
95
+ async def _proxy_get(path: str):
96
+ base = runtime.coordinator_http
97
+ if not base:
98
+ return JSONResponse({"error": "no coordinator configured"}, status_code=503)
99
+ try:
100
+ async with httpx.AsyncClient(timeout=10) as c:
101
+ r = await c.get(base + path)
102
+ return JSONResponse(r.json(), status_code=r.status_code)
103
+ except Exception as e:
104
+ return JSONResponse({"error": f"coordinator unreachable: {e}"}, status_code=502)
105
+
106
+ @app.get("/api/net/status")
107
+ async def net_status():
108
+ return await _proxy_get("/api/network")
109
+
110
+ @app.get("/api/net/models")
111
+ async def net_models():
112
+ return await _proxy_get("/v1/models")
113
+
114
+ @app.get("/api/net/miners")
115
+ async def net_miners():
116
+ return await _proxy_get("/nodes")
117
+
118
+ @app.get("/api/net/market")
119
+ async def net_market():
120
+ return await _proxy_get("/api/market")
121
+
122
+ @app.get("/api/net/marketplace")
123
+ async def net_marketplace():
124
+ return await _proxy_get("/v1/marketplace")
125
+
126
+ @app.get("/api/net/ledger")
127
+ async def net_ledger():
128
+ return await _proxy_get("/api/ledger?limit=40")
129
+
130
+ @app.post("/api/net/verify")
131
+ async def net_verify(request: Request):
132
+ base = runtime.coordinator_http
133
+ if not base:
134
+ return JSONResponse({"error": "no coordinator"}, status_code=503)
135
+ receipt = await request.json()
136
+ try:
137
+ async with httpx.AsyncClient(timeout=8) as c:
138
+ r = await c.post(base + "/api/verify", json=receipt)
139
+ return JSONResponse(r.json(), status_code=r.status_code)
140
+ except Exception as e:
141
+ return JSONResponse({"error": str(e)}, status_code=502)
142
+
143
+ # ---- wallet (backed by the coordinator account) -------------------------
144
+ def _acct_headers():
145
+ return {"X-NVDC-Account": runtime.wallet.account_id}
146
+
147
+ @app.get("/api/wallet")
148
+ async def wallet():
149
+ base = runtime.coordinator_http
150
+ if not base:
151
+ return JSONResponse({"account_id": runtime.wallet.account_id,
152
+ "balance_usd": 0, "total_earned": 0, "total_spent": 0,
153
+ "receipts": [], "error": "no coordinator"}, status_code=200)
154
+ try:
155
+ async with httpx.AsyncClient(timeout=8) as c:
156
+ r = await c.get(base + "/api/account", headers=_acct_headers())
157
+ return JSONResponse(r.json(), status_code=r.status_code)
158
+ except Exception as e:
159
+ return JSONResponse({"account_id": runtime.wallet.account_id,
160
+ "balance_usd": 0, "error": str(e)}, status_code=200)
161
+
162
+ @app.post("/api/wallet/buy")
163
+ async def wallet_buy(request: Request):
164
+ base = runtime.coordinator_http
165
+ if not base:
166
+ return JSONResponse({"ok": False, "error": "no coordinator configured"}, status_code=503)
167
+ body = await request.json()
168
+ try:
169
+ amount = float(body.get("amount_usd", 0))
170
+ except (TypeError, ValueError):
171
+ return JSONResponse({"ok": False, "error": "invalid amount"}, status_code=400)
172
+ try:
173
+ async with httpx.AsyncClient(timeout=8) as c:
174
+ r = await c.post(base + "/api/account/deposit",
175
+ json={"amount_usd": amount}, headers=_acct_headers())
176
+ j = r.json()
177
+ return {"ok": r.status_code == 200, "bought_usd": amount, **j}
178
+ except Exception as e:
179
+ return JSONResponse({"ok": False, "error": str(e)}, status_code=502)
180
+
181
+ async def _coordinator_post(path: str, json_body=None):
182
+ base = runtime.coordinator_http
183
+ if not base:
184
+ return JSONResponse({"error": "no coordinator configured"}, status_code=503)
185
+ try:
186
+ async with httpx.AsyncClient(timeout=20) as c:
187
+ r = await c.post(base + path, json=json_body or {}, headers=_acct_headers())
188
+ return JSONResponse(r.json(), status_code=r.status_code)
189
+ except Exception as e:
190
+ return JSONResponse({"error": f"coordinator unreachable: {e}"}, status_code=502)
191
+
192
+ @app.get("/api/payments/config")
193
+ async def payments_config():
194
+ return await _proxy_get("/api/payments/config")
195
+
196
+ @app.post("/api/wallet/checkout")
197
+ async def wallet_checkout(request: Request):
198
+ body = await request.json()
199
+ return await _coordinator_post("/api/account/checkout",
200
+ {"amount_usd": body.get("amount_usd", 0)})
201
+
202
+ @app.post("/api/payout/onboard")
203
+ async def payout_onboard():
204
+ return await _coordinator_post("/api/payout/onboard")
205
+
206
+ @app.post("/api/payout/withdraw")
207
+ async def payout_withdraw():
208
+ return await _coordinator_post("/api/payout/withdraw")
209
+
210
+ @app.post("/api/price")
211
+ async def set_price(request: Request):
212
+ body = await request.json()
213
+ try:
214
+ runtime.set_price(float(body.get("price_per_mtok", 0)))
215
+ except (TypeError, ValueError):
216
+ return JSONResponse({"ok": False, "error": "invalid price"}, status_code=400)
217
+ return {"ok": True, "price_per_mtok": runtime.price_per_mtok}
218
+
219
+ @app.post("/api/net/chat")
220
+ async def net_chat(request: Request):
221
+ base = runtime.coordinator_http
222
+ if not base:
223
+ return JSONResponse({"error": "no coordinator configured"}, status_code=503)
224
+ body = await request.json()
225
+ # The app's own consumer account funds these requests.
226
+ account = runtime.wallet.account_id
227
+ headers = {"Content-Type": "application/json", "X-NVDC-Account": account}
228
+ for h in ("x-nvdc-pin", "x-nvdc-min-rating", "x-nvdc-max-price", "x-nvdc-models"):
229
+ if request.headers.get(h):
230
+ headers[h] = request.headers[h]
231
+
232
+ # Sign the request authorization so the receipt is dual-signed.
233
+ prompt_commit = hashlib.sha256(
234
+ _json.dumps(body.get("messages", []), sort_keys=True).encode()).hexdigest()
235
+ nonce = secrets.token_hex(8)
236
+ ts = int(time.time())
237
+ try:
238
+ max_price = float(headers.get("x-nvdc-max-price", -1))
239
+ except ValueError:
240
+ max_price = -1
241
+ auth = keys.auth_payload(account, body.get("model", ""), prompt_commit,
242
+ max_price, nonce, ts)
243
+ headers["X-NVDC-Consumer-Sig"] = runtime.wallet.sign(auth)
244
+ headers["X-NVDC-Nonce"] = nonce
245
+ headers["X-NVDC-Ts"] = str(ts)
246
+
247
+ async def _balance():
248
+ try:
249
+ async with httpx.AsyncClient(timeout=5) as c:
250
+ r = await c.get(base + "/api/account", headers={"X-NVDC-Account": runtime.wallet.account_id})
251
+ return float(r.json().get("balance_usd", 0))
252
+ except Exception:
253
+ return None
254
+
255
+ if body.get("stream"):
256
+ async def gen():
257
+ import json as _j
258
+ tokens, price, node, status = 0, 0.0, "", 200
259
+ try:
260
+ async with httpx.AsyncClient(timeout=None) as c:
261
+ async with c.stream("POST", base + "/v1/chat/completions",
262
+ json=body, headers=headers) as r:
263
+ status = r.status_code
264
+ price = float(r.headers.get("x-nvdc-price-per-mtok", 0) or 0)
265
+ node = r.headers.get("x-nvdc-node", "")
266
+ if status != 200:
267
+ txt = (await r.aread()).decode("utf-8", "replace")
268
+ yield "data: " + _j.dumps({"error": {"message": txt}}) + "\n\n"
269
+ return
270
+ async for line in r.aiter_lines():
271
+ if not line:
272
+ continue
273
+ if line.strip() == "data: [DONE]":
274
+ break
275
+ if '"content"' in line:
276
+ tokens += 1
277
+ yield line + "\n\n"
278
+ cost = tokens * price / 1_000_000.0
279
+ bal = await _balance()
280
+ yield "data: " + _j.dumps({"nvdc_billing": {
281
+ "node": node, "price_per_mtok": price, "tokens": tokens,
282
+ "cost_usd": round(cost, 8),
283
+ "balance_usd": round(bal, 6) if bal is not None else None}}) + "\n\n"
284
+ yield "data: [DONE]\n\n"
285
+ except Exception as e:
286
+ yield "data: " + _j.dumps({"error": {"message": str(e)}}) + "\n\n"
287
+ return StreamingResponse(gen(), media_type="text/event-stream")
288
+
289
+ try:
290
+ async with httpx.AsyncClient(timeout=None) as c:
291
+ r = await c.post(base + "/v1/chat/completions", json=body, headers=headers)
292
+ j = r.json()
293
+ if r.status_code == 200:
294
+ bal = await _balance()
295
+ j["nvdc_billing"] = {
296
+ "node": r.headers.get("x-nvdc-node", ""),
297
+ "price_per_mtok": float(r.headers.get("x-nvdc-price-per-mtok", 0) or 0),
298
+ "tokens": int(r.headers.get("x-nvdc-tokens", 0) or 0),
299
+ "cost_usd": float(r.headers.get("x-nvdc-cost", 0) or 0),
300
+ "receipt": r.headers.get("x-nvdc-receipt", ""),
301
+ "balance_usd": round(bal, 6) if bal is not None else None}
302
+ return JSONResponse(j, status_code=r.status_code)
303
+ except Exception as e:
304
+ return JSONResponse({"error": f"coordinator unreachable: {e}"}, status_code=502)
305
+
306
+ return app