nvdc 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nvdc/__init__.py +3 -0
- nvdc/agent.py +329 -0
- nvdc/app.py +306 -0
- nvdc/attestation.py +122 -0
- nvdc/catalog.py +104 -0
- nvdc/cli.py +217 -0
- nvdc/config.py +17 -0
- nvdc/coordinator.py +869 -0
- nvdc/gpu.py +167 -0
- nvdc/hardware.py +157 -0
- nvdc/inference.py +133 -0
- nvdc/keys.py +114 -0
- nvdc/payments.py +125 -0
- nvdc/protocol.py +91 -0
- nvdc/runtime.py +317 -0
- nvdc/storage.py +207 -0
- nvdc/wallet.py +26 -0
- nvdc/web/index.html +605 -0
- nvdc-0.1.0.dist-info/METADATA +169 -0
- nvdc-0.1.0.dist-info/RECORD +23 -0
- nvdc-0.1.0.dist-info/WHEEL +5 -0
- nvdc-0.1.0.dist-info/entry_points.txt +2 -0
- nvdc-0.1.0.dist-info/top_level.txt +1 -0
nvdc/__init__.py
ADDED
nvdc/agent.py
ADDED
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
"""Node agent: the thing `nvdc serve` runs.
|
|
2
|
+
|
|
3
|
+
Opens ONE outbound WebSocket to the coordinator (so the node never needs an
|
|
4
|
+
inbound public port and its IP stays private), registers its GPU + attestation
|
|
5
|
+
profile, then services inference requests over that tunnel.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import hashlib
|
|
12
|
+
import json as _json
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Any, Dict, Optional
|
|
15
|
+
|
|
16
|
+
import websockets
|
|
17
|
+
|
|
18
|
+
from . import __version__, keys, protocol
|
|
19
|
+
from .attestation import attest
|
|
20
|
+
from .gpu import detect_gpu, detect_gpus, detect_interconnect
|
|
21
|
+
from .hardware import detect_hardware, machine_id as hw_machine_id
|
|
22
|
+
from .inference import Backend, make_backend
|
|
23
|
+
from .keys import Identity
|
|
24
|
+
|
|
25
|
+
log = logging.getLogger("nvdc.agent")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _extract_content(sse_line: str) -> str:
|
|
29
|
+
"""Pull the delta content out of one OpenAI SSE 'data: {...}' line."""
|
|
30
|
+
line = sse_line.strip()
|
|
31
|
+
if not line.startswith("data:"):
|
|
32
|
+
return ""
|
|
33
|
+
try:
|
|
34
|
+
obj = _json.loads(line[len("data:"):].strip())
|
|
35
|
+
return obj["choices"][0]["delta"].get("content") or ""
|
|
36
|
+
except Exception:
|
|
37
|
+
return ""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class NodeAgent:
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
coordinator_url: str,
|
|
44
|
+
name: str,
|
|
45
|
+
backend: Backend,
|
|
46
|
+
model: str,
|
|
47
|
+
token: str = "",
|
|
48
|
+
require_attestation: bool = False,
|
|
49
|
+
status_cb=None,
|
|
50
|
+
drain_timeout: float = 120.0,
|
|
51
|
+
price_per_mtok: float = 0.0,
|
|
52
|
+
account_id: str = "",
|
|
53
|
+
identity: Optional[Identity] = None,
|
|
54
|
+
owner_account: str = "",
|
|
55
|
+
machine_id: str = "",
|
|
56
|
+
cluster: str = "",
|
|
57
|
+
):
|
|
58
|
+
# A node commits to exactly ONE hot-loaded model at a time — the
|
|
59
|
+
# "mining algorithm" it has chosen. It advertises and serves only this
|
|
60
|
+
# model; requests for anything else are rejected at the node boundary.
|
|
61
|
+
self.coordinator_url = coordinator_url
|
|
62
|
+
self.name = name
|
|
63
|
+
self.backend = backend
|
|
64
|
+
self.model = model
|
|
65
|
+
self.price_per_mtok = price_per_mtok
|
|
66
|
+
self.identity = identity or Identity()
|
|
67
|
+
self.account_id = account_id or self.identity.account_id
|
|
68
|
+
# Earnings credit the owner account; a single machine owns itself.
|
|
69
|
+
self.owner_account = owner_account or self.account_id
|
|
70
|
+
self.machine_id = machine_id or hw_machine_id()
|
|
71
|
+
self.cluster = cluster
|
|
72
|
+
self.token = token
|
|
73
|
+
self.require_attestation = require_attestation
|
|
74
|
+
self.status_cb = status_cb
|
|
75
|
+
self.drain_timeout = drain_timeout
|
|
76
|
+
self._ws = None
|
|
77
|
+
self._send_lock = asyncio.Lock()
|
|
78
|
+
# graceful drain bookkeeping
|
|
79
|
+
self._stopped = False
|
|
80
|
+
self._draining = False
|
|
81
|
+
self._inflight_ids = set() # request ids currently being served
|
|
82
|
+
self._inflight_zero = asyncio.Event()
|
|
83
|
+
self._inflight_zero.set() # starts idle
|
|
84
|
+
|
|
85
|
+
def _emit(self, status: str, **info):
|
|
86
|
+
if self.status_cb:
|
|
87
|
+
try:
|
|
88
|
+
self.status_cb(status, info)
|
|
89
|
+
except Exception:
|
|
90
|
+
log.debug("status_cb error", exc_info=True)
|
|
91
|
+
|
|
92
|
+
async def run_forever(self):
|
|
93
|
+
backoff = 1
|
|
94
|
+
while not self._stopped:
|
|
95
|
+
try:
|
|
96
|
+
await self._connect_and_serve()
|
|
97
|
+
backoff = 1
|
|
98
|
+
except (OSError, websockets.WebSocketException) as e:
|
|
99
|
+
if self._stopped:
|
|
100
|
+
break
|
|
101
|
+
log.warning("connection lost (%s); reconnecting in %ss", e, backoff)
|
|
102
|
+
self._emit("connecting", detail=str(e))
|
|
103
|
+
await asyncio.sleep(backoff)
|
|
104
|
+
backoff = min(backoff * 2, 30)
|
|
105
|
+
|
|
106
|
+
async def drain(self):
|
|
107
|
+
"""Gracefully leave: tell the coordinator to stop routing new work,
|
|
108
|
+
let in-flight requests finish, then disconnect. In-flight responses are
|
|
109
|
+
never interrupted, so the node's delivery/completion score is preserved.
|
|
110
|
+
"""
|
|
111
|
+
if self._draining:
|
|
112
|
+
return
|
|
113
|
+
self._draining = True
|
|
114
|
+
log.info("draining: %d request(s) in flight", len(self._inflight_ids))
|
|
115
|
+
self._emit("draining", inflight=len(self._inflight_ids))
|
|
116
|
+
try:
|
|
117
|
+
await self._send(protocol.MSG_DRAIN) # coordinator stops routing now
|
|
118
|
+
except Exception:
|
|
119
|
+
pass
|
|
120
|
+
try:
|
|
121
|
+
# block until all in-flight complete, but not forever
|
|
122
|
+
await asyncio.wait_for(self._inflight_zero.wait(), timeout=self.drain_timeout)
|
|
123
|
+
log.info("drain complete; disconnecting")
|
|
124
|
+
except asyncio.TimeoutError:
|
|
125
|
+
stuck = list(self._inflight_ids)
|
|
126
|
+
log.warning(
|
|
127
|
+
"drain timeout after %ss; force-failing %d stuck request(s): %s",
|
|
128
|
+
self.drain_timeout, len(stuck), stuck,
|
|
129
|
+
)
|
|
130
|
+
# Fail only the stuck requests as node_failed; completed ones are
|
|
131
|
+
# already done and unaffected.
|
|
132
|
+
for rid in stuck:
|
|
133
|
+
try:
|
|
134
|
+
await self._send(
|
|
135
|
+
protocol.MSG_ERROR, id=rid,
|
|
136
|
+
error=f"node_failed: drain timeout after {self.drain_timeout}s",
|
|
137
|
+
)
|
|
138
|
+
except Exception:
|
|
139
|
+
pass
|
|
140
|
+
self._stopped = True
|
|
141
|
+
if self._ws is not None:
|
|
142
|
+
try:
|
|
143
|
+
await self._ws.close()
|
|
144
|
+
except Exception:
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
async def _connect_and_serve(self):
|
|
148
|
+
headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
|
|
149
|
+
log.info("connecting to coordinator %s", self.coordinator_url)
|
|
150
|
+
async with websockets.connect(
|
|
151
|
+
self.coordinator_url,
|
|
152
|
+
additional_headers=headers,
|
|
153
|
+
max_size=32 * 1024 * 1024,
|
|
154
|
+
ping_interval=20,
|
|
155
|
+
) as ws:
|
|
156
|
+
self._ws = ws
|
|
157
|
+
await self._register()
|
|
158
|
+
try:
|
|
159
|
+
async for raw in ws:
|
|
160
|
+
msg = protocol.decode(raw)
|
|
161
|
+
await self._dispatch(msg)
|
|
162
|
+
finally:
|
|
163
|
+
self._ws = None
|
|
164
|
+
self._emit("offline")
|
|
165
|
+
|
|
166
|
+
async def _register(self):
|
|
167
|
+
gpus = detect_gpus()
|
|
168
|
+
gpu = gpus[0] if gpus else detect_gpu()
|
|
169
|
+
interconnect = detect_interconnect() if len(gpus) > 1 else ""
|
|
170
|
+
hw = detect_hardware()
|
|
171
|
+
att = attest(require=self.require_attestation)
|
|
172
|
+
if self.require_attestation and not att.verified:
|
|
173
|
+
raise RuntimeError(
|
|
174
|
+
f"attestation required but not verified: {att.reason or att.mode}"
|
|
175
|
+
)
|
|
176
|
+
profile = protocol.NodeProfile(
|
|
177
|
+
name=self.name,
|
|
178
|
+
models=[self.model],
|
|
179
|
+
gpu=gpu,
|
|
180
|
+
attestation=att,
|
|
181
|
+
gpus=gpus,
|
|
182
|
+
gpu_count=len(gpus),
|
|
183
|
+
interconnect=interconnect,
|
|
184
|
+
ram_mb=hw.ram_mb,
|
|
185
|
+
memory_budget_mb=hw.memory_budget_mb,
|
|
186
|
+
accelerator=hw.accelerator.type,
|
|
187
|
+
price_per_mtok=self.price_per_mtok,
|
|
188
|
+
account_id=self.account_id,
|
|
189
|
+
owner_account=self.owner_account,
|
|
190
|
+
machine_id=self.machine_id,
|
|
191
|
+
cluster=self.cluster,
|
|
192
|
+
agent_version=__version__,
|
|
193
|
+
)
|
|
194
|
+
await self._send(protocol.MSG_REGISTER, profile=protocol.node_profile_to_dict(profile))
|
|
195
|
+
log.info(
|
|
196
|
+
"registered '%s' | gpu=%s | serving=%s | attestation=%s(verified=%s)",
|
|
197
|
+
self.name, gpu.name, self.model, att.mode, att.verified,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
async def _dispatch(self, msg: Dict[str, Any]):
|
|
201
|
+
t = msg.get("t")
|
|
202
|
+
if t == protocol.MSG_INFER:
|
|
203
|
+
asyncio.create_task(self._handle_infer(msg))
|
|
204
|
+
elif t == protocol.MSG_PING:
|
|
205
|
+
await self._send(protocol.MSG_PONG)
|
|
206
|
+
elif t == protocol.MSG_REGISTERED:
|
|
207
|
+
log.info("coordinator assigned node_id=%s", msg.get("node_id"))
|
|
208
|
+
self._emit("live", node_id=msg.get("node_id"))
|
|
209
|
+
else:
|
|
210
|
+
log.debug("ignoring message type %s", t)
|
|
211
|
+
|
|
212
|
+
async def _handle_infer(self, msg: Dict[str, Any]):
|
|
213
|
+
req_id = msg.get("id")
|
|
214
|
+
body = dict(msg.get("body", {}))
|
|
215
|
+
requested = body.get("model", "")
|
|
216
|
+
|
|
217
|
+
# Once draining, refuse new work so it can be routed elsewhere. This
|
|
218
|
+
# closes the race between the operator leaving and the coordinator
|
|
219
|
+
# marking us un-routable; in-flight requests (already past this point)
|
|
220
|
+
# are unaffected and run to completion.
|
|
221
|
+
if self._draining:
|
|
222
|
+
await self._send(
|
|
223
|
+
protocol.MSG_ERROR, id=req_id,
|
|
224
|
+
error="node is draining; request not accepted",
|
|
225
|
+
)
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
# Enforce the single committed model at the node boundary. A node only
|
|
229
|
+
# serves the model it has hot-loaded; anything else is refused so it
|
|
230
|
+
# can never be coerced into running a cold/different model.
|
|
231
|
+
if requested and requested != self.model:
|
|
232
|
+
await self._send(
|
|
233
|
+
protocol.MSG_ERROR, id=req_id,
|
|
234
|
+
error=f"this node only serves '{self.model}', not '{requested}'",
|
|
235
|
+
)
|
|
236
|
+
return
|
|
237
|
+
body["model"] = self.model # pin, in case the request omitted it
|
|
238
|
+
prompt_commit = hashlib.sha256(
|
|
239
|
+
_json.dumps(body.get("messages", []), sort_keys=True).encode()).hexdigest()
|
|
240
|
+
|
|
241
|
+
self._inflight_ids.add(req_id)
|
|
242
|
+
self._inflight_zero.clear()
|
|
243
|
+
stream = bool(body.get("stream", False))
|
|
244
|
+
try:
|
|
245
|
+
if stream:
|
|
246
|
+
acc, tokens = [], 0
|
|
247
|
+
async for line in self.backend.chat_stream(body):
|
|
248
|
+
await self._send(protocol.MSG_CHUNK, id=req_id, data=line)
|
|
249
|
+
if '"content"' in line:
|
|
250
|
+
tokens += 1
|
|
251
|
+
c = _extract_content(line)
|
|
252
|
+
if c:
|
|
253
|
+
acc.append(c)
|
|
254
|
+
response_commit = hashlib.sha256("".join(acc).encode()).hexdigest()
|
|
255
|
+
sig = self._sign_work(req_id, prompt_commit, tokens, response_commit, "complete")
|
|
256
|
+
await self._send(protocol.MSG_END, id=req_id, tokens=tokens,
|
|
257
|
+
response_commit=response_commit, sig=sig)
|
|
258
|
+
else:
|
|
259
|
+
result = await self.backend.chat_once(body)
|
|
260
|
+
content = ""
|
|
261
|
+
try:
|
|
262
|
+
content = result["choices"][0]["message"].get("content") or ""
|
|
263
|
+
except Exception:
|
|
264
|
+
pass
|
|
265
|
+
tokens = (result.get("usage", {}) or {}).get("completion_tokens", 0)
|
|
266
|
+
response_commit = hashlib.sha256(content.encode()).hexdigest()
|
|
267
|
+
sig = self._sign_work(req_id, prompt_commit, tokens, response_commit, "complete")
|
|
268
|
+
await self._send(protocol.MSG_RESULT, id=req_id, body=result,
|
|
269
|
+
tokens=tokens, response_commit=response_commit, sig=sig)
|
|
270
|
+
except Exception as e:
|
|
271
|
+
log.exception("inference failed for %s", req_id)
|
|
272
|
+
await self._send(protocol.MSG_ERROR, id=req_id, error=str(e))
|
|
273
|
+
finally:
|
|
274
|
+
self._inflight_ids.discard(req_id)
|
|
275
|
+
if not self._inflight_ids:
|
|
276
|
+
self._inflight_zero.set()
|
|
277
|
+
|
|
278
|
+
def _sign_work(self, req_id, prompt_commit, tokens, response_commit, delivery) -> str:
|
|
279
|
+
payload = keys.work_payload(req_id, self.model, prompt_commit, tokens,
|
|
280
|
+
response_commit, delivery)
|
|
281
|
+
return self.identity.sign(payload)
|
|
282
|
+
|
|
283
|
+
async def _send(self, msg_type: str, **fields: Any):
|
|
284
|
+
if self._ws is None:
|
|
285
|
+
return
|
|
286
|
+
frame = protocol.encode(msg_type, **fields)
|
|
287
|
+
async with self._send_lock:
|
|
288
|
+
await self._ws.send(frame)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
async def serve(
|
|
292
|
+
coordinator_url: str,
|
|
293
|
+
name: str,
|
|
294
|
+
backend_kind: str,
|
|
295
|
+
model: str,
|
|
296
|
+
ollama_url: str,
|
|
297
|
+
token: str = "",
|
|
298
|
+
require_attestation: bool = False,
|
|
299
|
+
warm: bool = True,
|
|
300
|
+
drain_timeout: float = 120.0,
|
|
301
|
+
owner_account: str = "",
|
|
302
|
+
cluster: str = "",
|
|
303
|
+
):
|
|
304
|
+
if not model:
|
|
305
|
+
raise ValueError("a single --model must be specified; a node serves exactly one model")
|
|
306
|
+
backend = make_backend(backend_kind, model=model, ollama_url=ollama_url)
|
|
307
|
+
|
|
308
|
+
# Hot-load the committed model before advertising it to the network, so the
|
|
309
|
+
# node is never live with a cold model (low TTFT guarantee).
|
|
310
|
+
if warm:
|
|
311
|
+
log.info("hot-loading '%s' into memory ...", model)
|
|
312
|
+
try:
|
|
313
|
+
await backend.warm(model)
|
|
314
|
+
log.info("'%s' is hot", model)
|
|
315
|
+
except Exception as e:
|
|
316
|
+
log.warning("warm-up failed for '%s' (%s); serving anyway", model, e)
|
|
317
|
+
|
|
318
|
+
agent = NodeAgent(
|
|
319
|
+
coordinator_url=coordinator_url,
|
|
320
|
+
name=name,
|
|
321
|
+
backend=backend,
|
|
322
|
+
model=model,
|
|
323
|
+
token=token,
|
|
324
|
+
require_attestation=require_attestation,
|
|
325
|
+
drain_timeout=drain_timeout,
|
|
326
|
+
owner_account=owner_account,
|
|
327
|
+
cluster=cluster,
|
|
328
|
+
)
|
|
329
|
+
await agent.run_forever()
|
nvdc/app.py
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
"""Local visual client: a small web app the node operator opens in a browser.
|
|
2
|
+
|
|
3
|
+
`nvdc app` starts this server and opens the page. It shows hardware + inference
|
|
4
|
+
mechanism, a catalog of popular models with fit indicators, lets the operator
|
|
5
|
+
load one into memory (hot), and only then enables "Go Live" on the network.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import json as _json
|
|
12
|
+
import logging
|
|
13
|
+
import secrets
|
|
14
|
+
import time
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
import httpx
|
|
18
|
+
from fastapi import FastAPI, Request, Response
|
|
19
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
20
|
+
from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
|
|
21
|
+
|
|
22
|
+
from . import keys
|
|
23
|
+
from .runtime import NodeRuntime
|
|
24
|
+
|
|
25
|
+
log = logging.getLogger("nvdc.app")
|
|
26
|
+
|
|
27
|
+
WEB_DIR = Path(__file__).parent / "web"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def create_app(runtime: NodeRuntime) -> FastAPI:
|
|
31
|
+
app = FastAPI(title="nvdc node client")
|
|
32
|
+
app.state.runtime = runtime
|
|
33
|
+
|
|
34
|
+
# Let the hosted Vercel site detect and read THIS locally-running client.
|
|
35
|
+
app.add_middleware(
|
|
36
|
+
CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"],
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
@app.middleware("http")
|
|
40
|
+
async def private_network_access(request: Request, call_next):
|
|
41
|
+
# A public (https) page fetching localhost triggers Chrome's Private
|
|
42
|
+
# Network Access preflight, which needs this header to be allowed.
|
|
43
|
+
if request.method == "OPTIONS" and request.headers.get(
|
|
44
|
+
"access-control-request-private-network") == "true":
|
|
45
|
+
resp = Response(status_code=204)
|
|
46
|
+
resp.headers["Access-Control-Allow-Origin"] = request.headers.get("origin", "*")
|
|
47
|
+
resp.headers["Access-Control-Allow-Methods"] = "*"
|
|
48
|
+
resp.headers["Access-Control-Allow-Headers"] = "*"
|
|
49
|
+
resp.headers["Access-Control-Allow-Private-Network"] = "true"
|
|
50
|
+
return resp
|
|
51
|
+
resp = await call_next(request)
|
|
52
|
+
resp.headers["Access-Control-Allow-Private-Network"] = "true"
|
|
53
|
+
return resp
|
|
54
|
+
|
|
55
|
+
@app.get("/", response_class=HTMLResponse)
|
|
56
|
+
async def index():
|
|
57
|
+
return (WEB_DIR / "index.html").read_text(encoding="utf-8")
|
|
58
|
+
|
|
59
|
+
@app.get("/api/state")
|
|
60
|
+
async def state():
|
|
61
|
+
await runtime.refresh_installed()
|
|
62
|
+
return runtime.snapshot()
|
|
63
|
+
|
|
64
|
+
@app.post("/api/load")
|
|
65
|
+
async def load(request: Request):
|
|
66
|
+
body = await request.json()
|
|
67
|
+
model_id = body.get("model")
|
|
68
|
+
if not model_id:
|
|
69
|
+
return JSONResponse({"ok": False, "error": "model required"}, status_code=400)
|
|
70
|
+
ok, msg = runtime.start_load(model_id)
|
|
71
|
+
return {"ok": ok, "message": msg}
|
|
72
|
+
|
|
73
|
+
@app.post("/api/network/live")
|
|
74
|
+
async def live(request: Request):
|
|
75
|
+
body = await request.json()
|
|
76
|
+
ok, msg = runtime.go_live(
|
|
77
|
+
coordinator=body.get("coordinator", ""),
|
|
78
|
+
token=body.get("token", ""),
|
|
79
|
+
)
|
|
80
|
+
return {"ok": ok, "message": msg}
|
|
81
|
+
|
|
82
|
+
@app.post("/api/network/offline")
|
|
83
|
+
async def offline():
|
|
84
|
+
ok, msg = runtime.go_offline()
|
|
85
|
+
return {"ok": ok, "message": msg}
|
|
86
|
+
|
|
87
|
+
# ---- consumer proxies to the coordinator --------------------------------
|
|
88
|
+
# Keep the UI same-origin (no CORS) by proxying the coordinator's public API
|
|
89
|
+
# through this local app. These power the Home / Chat / Network tabs.
|
|
90
|
+
@app.get("/api/coordinator")
|
|
91
|
+
async def coordinator_info():
|
|
92
|
+
base = runtime.coordinator_http
|
|
93
|
+
return {"http": base, "openai_base": (base + "/v1") if base else ""}
|
|
94
|
+
|
|
95
|
+
async def _proxy_get(path: str):
|
|
96
|
+
base = runtime.coordinator_http
|
|
97
|
+
if not base:
|
|
98
|
+
return JSONResponse({"error": "no coordinator configured"}, status_code=503)
|
|
99
|
+
try:
|
|
100
|
+
async with httpx.AsyncClient(timeout=10) as c:
|
|
101
|
+
r = await c.get(base + path)
|
|
102
|
+
return JSONResponse(r.json(), status_code=r.status_code)
|
|
103
|
+
except Exception as e:
|
|
104
|
+
return JSONResponse({"error": f"coordinator unreachable: {e}"}, status_code=502)
|
|
105
|
+
|
|
106
|
+
@app.get("/api/net/status")
|
|
107
|
+
async def net_status():
|
|
108
|
+
return await _proxy_get("/api/network")
|
|
109
|
+
|
|
110
|
+
@app.get("/api/net/models")
|
|
111
|
+
async def net_models():
|
|
112
|
+
return await _proxy_get("/v1/models")
|
|
113
|
+
|
|
114
|
+
@app.get("/api/net/miners")
|
|
115
|
+
async def net_miners():
|
|
116
|
+
return await _proxy_get("/nodes")
|
|
117
|
+
|
|
118
|
+
@app.get("/api/net/market")
|
|
119
|
+
async def net_market():
|
|
120
|
+
return await _proxy_get("/api/market")
|
|
121
|
+
|
|
122
|
+
@app.get("/api/net/marketplace")
|
|
123
|
+
async def net_marketplace():
|
|
124
|
+
return await _proxy_get("/v1/marketplace")
|
|
125
|
+
|
|
126
|
+
@app.get("/api/net/ledger")
|
|
127
|
+
async def net_ledger():
|
|
128
|
+
return await _proxy_get("/api/ledger?limit=40")
|
|
129
|
+
|
|
130
|
+
@app.post("/api/net/verify")
|
|
131
|
+
async def net_verify(request: Request):
|
|
132
|
+
base = runtime.coordinator_http
|
|
133
|
+
if not base:
|
|
134
|
+
return JSONResponse({"error": "no coordinator"}, status_code=503)
|
|
135
|
+
receipt = await request.json()
|
|
136
|
+
try:
|
|
137
|
+
async with httpx.AsyncClient(timeout=8) as c:
|
|
138
|
+
r = await c.post(base + "/api/verify", json=receipt)
|
|
139
|
+
return JSONResponse(r.json(), status_code=r.status_code)
|
|
140
|
+
except Exception as e:
|
|
141
|
+
return JSONResponse({"error": str(e)}, status_code=502)
|
|
142
|
+
|
|
143
|
+
# ---- wallet (backed by the coordinator account) -------------------------
|
|
144
|
+
def _acct_headers():
|
|
145
|
+
return {"X-NVDC-Account": runtime.wallet.account_id}
|
|
146
|
+
|
|
147
|
+
@app.get("/api/wallet")
|
|
148
|
+
async def wallet():
|
|
149
|
+
base = runtime.coordinator_http
|
|
150
|
+
if not base:
|
|
151
|
+
return JSONResponse({"account_id": runtime.wallet.account_id,
|
|
152
|
+
"balance_usd": 0, "total_earned": 0, "total_spent": 0,
|
|
153
|
+
"receipts": [], "error": "no coordinator"}, status_code=200)
|
|
154
|
+
try:
|
|
155
|
+
async with httpx.AsyncClient(timeout=8) as c:
|
|
156
|
+
r = await c.get(base + "/api/account", headers=_acct_headers())
|
|
157
|
+
return JSONResponse(r.json(), status_code=r.status_code)
|
|
158
|
+
except Exception as e:
|
|
159
|
+
return JSONResponse({"account_id": runtime.wallet.account_id,
|
|
160
|
+
"balance_usd": 0, "error": str(e)}, status_code=200)
|
|
161
|
+
|
|
162
|
+
@app.post("/api/wallet/buy")
|
|
163
|
+
async def wallet_buy(request: Request):
|
|
164
|
+
base = runtime.coordinator_http
|
|
165
|
+
if not base:
|
|
166
|
+
return JSONResponse({"ok": False, "error": "no coordinator configured"}, status_code=503)
|
|
167
|
+
body = await request.json()
|
|
168
|
+
try:
|
|
169
|
+
amount = float(body.get("amount_usd", 0))
|
|
170
|
+
except (TypeError, ValueError):
|
|
171
|
+
return JSONResponse({"ok": False, "error": "invalid amount"}, status_code=400)
|
|
172
|
+
try:
|
|
173
|
+
async with httpx.AsyncClient(timeout=8) as c:
|
|
174
|
+
r = await c.post(base + "/api/account/deposit",
|
|
175
|
+
json={"amount_usd": amount}, headers=_acct_headers())
|
|
176
|
+
j = r.json()
|
|
177
|
+
return {"ok": r.status_code == 200, "bought_usd": amount, **j}
|
|
178
|
+
except Exception as e:
|
|
179
|
+
return JSONResponse({"ok": False, "error": str(e)}, status_code=502)
|
|
180
|
+
|
|
181
|
+
async def _coordinator_post(path: str, json_body=None):
|
|
182
|
+
base = runtime.coordinator_http
|
|
183
|
+
if not base:
|
|
184
|
+
return JSONResponse({"error": "no coordinator configured"}, status_code=503)
|
|
185
|
+
try:
|
|
186
|
+
async with httpx.AsyncClient(timeout=20) as c:
|
|
187
|
+
r = await c.post(base + path, json=json_body or {}, headers=_acct_headers())
|
|
188
|
+
return JSONResponse(r.json(), status_code=r.status_code)
|
|
189
|
+
except Exception as e:
|
|
190
|
+
return JSONResponse({"error": f"coordinator unreachable: {e}"}, status_code=502)
|
|
191
|
+
|
|
192
|
+
@app.get("/api/payments/config")
|
|
193
|
+
async def payments_config():
|
|
194
|
+
return await _proxy_get("/api/payments/config")
|
|
195
|
+
|
|
196
|
+
@app.post("/api/wallet/checkout")
|
|
197
|
+
async def wallet_checkout(request: Request):
|
|
198
|
+
body = await request.json()
|
|
199
|
+
return await _coordinator_post("/api/account/checkout",
|
|
200
|
+
{"amount_usd": body.get("amount_usd", 0)})
|
|
201
|
+
|
|
202
|
+
@app.post("/api/payout/onboard")
|
|
203
|
+
async def payout_onboard():
|
|
204
|
+
return await _coordinator_post("/api/payout/onboard")
|
|
205
|
+
|
|
206
|
+
@app.post("/api/payout/withdraw")
|
|
207
|
+
async def payout_withdraw():
|
|
208
|
+
return await _coordinator_post("/api/payout/withdraw")
|
|
209
|
+
|
|
210
|
+
@app.post("/api/price")
|
|
211
|
+
async def set_price(request: Request):
|
|
212
|
+
body = await request.json()
|
|
213
|
+
try:
|
|
214
|
+
runtime.set_price(float(body.get("price_per_mtok", 0)))
|
|
215
|
+
except (TypeError, ValueError):
|
|
216
|
+
return JSONResponse({"ok": False, "error": "invalid price"}, status_code=400)
|
|
217
|
+
return {"ok": True, "price_per_mtok": runtime.price_per_mtok}
|
|
218
|
+
|
|
219
|
+
@app.post("/api/net/chat")
|
|
220
|
+
async def net_chat(request: Request):
|
|
221
|
+
base = runtime.coordinator_http
|
|
222
|
+
if not base:
|
|
223
|
+
return JSONResponse({"error": "no coordinator configured"}, status_code=503)
|
|
224
|
+
body = await request.json()
|
|
225
|
+
# The app's own consumer account funds these requests.
|
|
226
|
+
account = runtime.wallet.account_id
|
|
227
|
+
headers = {"Content-Type": "application/json", "X-NVDC-Account": account}
|
|
228
|
+
for h in ("x-nvdc-pin", "x-nvdc-min-rating", "x-nvdc-max-price", "x-nvdc-models"):
|
|
229
|
+
if request.headers.get(h):
|
|
230
|
+
headers[h] = request.headers[h]
|
|
231
|
+
|
|
232
|
+
# Sign the request authorization so the receipt is dual-signed.
|
|
233
|
+
prompt_commit = hashlib.sha256(
|
|
234
|
+
_json.dumps(body.get("messages", []), sort_keys=True).encode()).hexdigest()
|
|
235
|
+
nonce = secrets.token_hex(8)
|
|
236
|
+
ts = int(time.time())
|
|
237
|
+
try:
|
|
238
|
+
max_price = float(headers.get("x-nvdc-max-price", -1))
|
|
239
|
+
except ValueError:
|
|
240
|
+
max_price = -1
|
|
241
|
+
auth = keys.auth_payload(account, body.get("model", ""), prompt_commit,
|
|
242
|
+
max_price, nonce, ts)
|
|
243
|
+
headers["X-NVDC-Consumer-Sig"] = runtime.wallet.sign(auth)
|
|
244
|
+
headers["X-NVDC-Nonce"] = nonce
|
|
245
|
+
headers["X-NVDC-Ts"] = str(ts)
|
|
246
|
+
|
|
247
|
+
async def _balance():
|
|
248
|
+
try:
|
|
249
|
+
async with httpx.AsyncClient(timeout=5) as c:
|
|
250
|
+
r = await c.get(base + "/api/account", headers={"X-NVDC-Account": runtime.wallet.account_id})
|
|
251
|
+
return float(r.json().get("balance_usd", 0))
|
|
252
|
+
except Exception:
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
if body.get("stream"):
|
|
256
|
+
async def gen():
|
|
257
|
+
import json as _j
|
|
258
|
+
tokens, price, node, status = 0, 0.0, "", 200
|
|
259
|
+
try:
|
|
260
|
+
async with httpx.AsyncClient(timeout=None) as c:
|
|
261
|
+
async with c.stream("POST", base + "/v1/chat/completions",
|
|
262
|
+
json=body, headers=headers) as r:
|
|
263
|
+
status = r.status_code
|
|
264
|
+
price = float(r.headers.get("x-nvdc-price-per-mtok", 0) or 0)
|
|
265
|
+
node = r.headers.get("x-nvdc-node", "")
|
|
266
|
+
if status != 200:
|
|
267
|
+
txt = (await r.aread()).decode("utf-8", "replace")
|
|
268
|
+
yield "data: " + _j.dumps({"error": {"message": txt}}) + "\n\n"
|
|
269
|
+
return
|
|
270
|
+
async for line in r.aiter_lines():
|
|
271
|
+
if not line:
|
|
272
|
+
continue
|
|
273
|
+
if line.strip() == "data: [DONE]":
|
|
274
|
+
break
|
|
275
|
+
if '"content"' in line:
|
|
276
|
+
tokens += 1
|
|
277
|
+
yield line + "\n\n"
|
|
278
|
+
cost = tokens * price / 1_000_000.0
|
|
279
|
+
bal = await _balance()
|
|
280
|
+
yield "data: " + _j.dumps({"nvdc_billing": {
|
|
281
|
+
"node": node, "price_per_mtok": price, "tokens": tokens,
|
|
282
|
+
"cost_usd": round(cost, 8),
|
|
283
|
+
"balance_usd": round(bal, 6) if bal is not None else None}}) + "\n\n"
|
|
284
|
+
yield "data: [DONE]\n\n"
|
|
285
|
+
except Exception as e:
|
|
286
|
+
yield "data: " + _j.dumps({"error": {"message": str(e)}}) + "\n\n"
|
|
287
|
+
return StreamingResponse(gen(), media_type="text/event-stream")
|
|
288
|
+
|
|
289
|
+
try:
|
|
290
|
+
async with httpx.AsyncClient(timeout=None) as c:
|
|
291
|
+
r = await c.post(base + "/v1/chat/completions", json=body, headers=headers)
|
|
292
|
+
j = r.json()
|
|
293
|
+
if r.status_code == 200:
|
|
294
|
+
bal = await _balance()
|
|
295
|
+
j["nvdc_billing"] = {
|
|
296
|
+
"node": r.headers.get("x-nvdc-node", ""),
|
|
297
|
+
"price_per_mtok": float(r.headers.get("x-nvdc-price-per-mtok", 0) or 0),
|
|
298
|
+
"tokens": int(r.headers.get("x-nvdc-tokens", 0) or 0),
|
|
299
|
+
"cost_usd": float(r.headers.get("x-nvdc-cost", 0) or 0),
|
|
300
|
+
"receipt": r.headers.get("x-nvdc-receipt", ""),
|
|
301
|
+
"balance_usd": round(bal, 6) if bal is not None else None}
|
|
302
|
+
return JSONResponse(j, status_code=r.status_code)
|
|
303
|
+
except Exception as e:
|
|
304
|
+
return JSONResponse({"error": f"coordinator unreachable: {e}"}, status_code=502)
|
|
305
|
+
|
|
306
|
+
return app
|