agent-relay-runner 0.22.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,383 @@
1
+ import { errMessage, isRecord } from "agent-relay-sdk";
2
+ import { logger } from "./logger";
3
+
4
+ // Loose fetch signature so tests can inject a plain async stub without Bun's `preconnect`
5
+ // member; the real global `fetch` satisfies it.
6
+ export type FetchLike = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
7
+
8
+ // Stage 2 of #213/#215 — the Runner becomes the MCP endpoint the agent connects to,
9
+ // fronting the relay. The agent's MCP client points at this localhost server instead of
10
+ // directly at the relay, so the Runner owns the relay connection, reconnect/backoff, and a
11
+ // durable buffer. A relay restart/crash becomes invisible to the agent.
12
+ //
13
+ // This is a TRANSPARENT JSON-RPC pass-through to the relay's `/api/mcp` (which stays the
14
+ // sole enforcement authority). It intervenes in exactly four places:
15
+ //
16
+ // 1. `initialize` — forward, then advertise `capabilities.tools.listChanged: true`,
17
+ // the live-tool-set capability the relay's static endpoint can't.
18
+ // 2. `tools/list` — forward (the relay already scope-filters per token), then context-
19
+ // NARROW: hide tools that don't apply to this agent's workspace mode/
20
+ // state (e.g. `relay_workspace_*` for a non-worktree agent). Strictly
21
+ // a subset — never widens. Served from a last-known cache during a blip.
22
+ // 3. `tools/call` — forward with the runner's LIVE token; for bufferable writes during a
23
+ // relay outage, enqueue durably + return a synthetic "queued" ok so the
24
+ // call is never lost. Mutating spawn/shutdown are forwarded, never local.
25
+ // 4. `GET` (SSE) — the server→client notification channel; emits
26
+ // `notifications/tools/list_changed` when a transition changes the
27
+ // narrowed set, so the agent's tool menu updates mid-session.
28
+ //
29
+ // Narrow-never-widen is the safety invariant: filtering `tools/list` is UX/token-efficiency,
30
+ // NEVER enforcement. The proxy can only ever surface a subset of what the relay's scope filter
31
+ // already returned, so a proxy bug can never become an auth bypass — the relay owns the lock at
32
+ // `tools/call`, the proxy owns the menu.
33
+
34
+ const PROXY_PATH = "/mcp";
35
+ // Relay-side failures we treat as "relay down" (buffer/serve-cache), as opposed to a real 4xx
36
+ // rejection that must be surfaced to the agent verbatim.
37
+ const GATEWAY_STATUSES = new Set([502, 503, 504]);
38
+ const SSE_KEEPALIVE_MS = 25_000;
39
+
40
+ // The write tools whose loss during a relay outage is unacceptable and whose result the agent
41
+ // does not need synchronously — safe to queue durably and replay on reconnect. Reads, claims
42
+ // (409 contention), spawn/shutdown (need a real ack) are deliberately NOT bufferable.
43
+ export const DEFAULT_BUFFERABLE_TOOLS = new Set<string>([
44
+ "relay_send_message",
45
+ "relay_reply",
46
+ "relay_workspace_ready",
47
+ ]);
48
+
49
+ // Tools surfaced ONLY to an agent that owns a live isolated worktree. For any other agent the
50
+ // proxy narrows them out of `tools/list` even though the token scope permits them (the relay
51
+ // would return them). This is the context the coarse token scope can't express (#214/#215).
52
+ const WORKTREE_ONLY_TOOLS = new Set<string>([
53
+ "relay_workspace_status",
54
+ "relay_workspace_ready",
55
+ "relay_workspace_deps",
56
+ "relay_workspace_list",
57
+ "relay_workspace_claim",
58
+ "relay_workspace_release",
59
+ "relay_workspace_land",
60
+ ]);
61
+
62
+ export interface ProxyContext {
63
+ // The agent owns a live (non-terminal) isolated git worktree → workspace tools apply.
64
+ isolatedWorktree: boolean;
65
+ }
66
+
67
+ export interface BufferedToolCall {
68
+ tool: string;
69
+ arguments: Record<string, unknown>;
70
+ idempotencyKey: string;
71
+ }
72
+
73
+ export interface RelayMcpProxyOptions {
74
+ // The relay's MCP endpoint, e.g. http://localhost:4850/api/mcp.
75
+ relayMcpEndpoint: string;
76
+ // The runner's LIVE relay token (read on every forward so rotation is invisible to the agent).
77
+ getToken(): string | undefined;
78
+ // The bearer the agent must present to this localhost proxy (a per-session secret the runner
79
+ // mints and injects into the agent env). Decouples the agent from the rotating relay token.
80
+ authSecret: string;
81
+ // Persist a bufferable write durably for replay on reconnect (wired to the runner outbox).
82
+ enqueueBuffered(call: BufferedToolCall): void;
83
+ initialContext?: ProxyContext;
84
+ bufferableTools?: Set<string>;
85
+ // Test seam.
86
+ fetchImpl?: FetchLike;
87
+ }
88
+
89
+ interface JsonRpcMessage {
90
+ jsonrpc?: string;
91
+ id?: string | number | null;
92
+ method?: string;
93
+ params?: unknown;
94
+ }
95
+
96
+ interface SseClient {
97
+ controller: ReadableStreamDefaultController<Uint8Array>;
98
+ keepalive: ReturnType<typeof setInterval>;
99
+ }
100
+
101
+ export class RelayMcpProxy {
102
+ private readonly relayMcpEndpoint: string;
103
+ private readonly getToken: () => string | undefined;
104
+ private readonly authSecret: string;
105
+ private readonly enqueueBuffered: (call: BufferedToolCall) => void;
106
+ private readonly bufferableTools: Set<string>;
107
+ private readonly fetchImpl: FetchLike;
108
+ private readonly encoder = new TextEncoder();
109
+
110
+ private context: ProxyContext;
111
+ private server?: ReturnType<typeof Bun.serve>;
112
+ private readonly sseClients = new Set<SseClient>();
113
+ // Last successful relay tools/list — narrowed and served when the relay is briefly down so a
114
+ // read still works (reads serve from last-known where safe).
115
+ private lastRelayTools: Array<Record<string, unknown>> = [];
116
+ private lastNarrowedNames = "";
117
+
118
+ constructor(options: RelayMcpProxyOptions) {
119
+ this.relayMcpEndpoint = options.relayMcpEndpoint;
120
+ this.getToken = options.getToken;
121
+ this.authSecret = options.authSecret;
122
+ this.enqueueBuffered = options.enqueueBuffered;
123
+ this.bufferableTools = options.bufferableTools ?? DEFAULT_BUFFERABLE_TOOLS;
124
+ this.fetchImpl = options.fetchImpl ?? fetch;
125
+ this.context = options.initialContext ?? { isolatedWorktree: false };
126
+ }
127
+
128
+ start(): { url: string; port: number } {
129
+ const self = this;
130
+ this.server = Bun.serve({
131
+ hostname: "127.0.0.1",
132
+ port: 0,
133
+ // SSE streams are long-lived; disable Bun's idle timeout and keep them alive with pings.
134
+ idleTimeout: 0,
135
+ fetch(req) {
136
+ return self.handle(req);
137
+ },
138
+ });
139
+ const port = this.server.port;
140
+ if (port === undefined) throw new Error("relay MCP proxy did not bind a port");
141
+ return { url: `http://127.0.0.1:${port}${PROXY_PATH}`, port };
142
+ }
143
+
144
+ stop(): void {
145
+ for (const client of this.sseClients) {
146
+ clearInterval(client.keepalive);
147
+ try { client.controller.close(); } catch { /* already closed */ }
148
+ }
149
+ this.sseClients.clear();
150
+ this.server?.stop(true);
151
+ this.server = undefined;
152
+ }
153
+
154
+ // The runner calls this on a workspace mode/state transition (active→ready→merged→terminal,
155
+ // or shared↔worktree). If it changes which tools the agent can see, emit list_changed so the
156
+ // agent's menu updates mid-session instead of staying frozen until reconnect.
157
+ setContext(context: ProxyContext): void {
158
+ this.context = context;
159
+ this.maybeEmitListChanged();
160
+ }
161
+
162
+ // The runner calls this after re-minting its runtime token (scope may have changed — e.g. a
163
+ // profile change grants/revokes command:spawn). Re-fetch the relay's now-differently-scoped
164
+ // tool list with the live token and emit list_changed if the visible set changed. This is the
165
+ // "token scope transition" path; setContext covers the workspace mode/state path. Best-effort —
166
+ // a failed refresh keeps the last-known list (the next tools/list refreshes it anyway).
167
+ async refreshTools(): Promise<void> {
168
+ const relay = await this.forward({ method: "tools/list", id: 0 }).catch(() => null);
169
+ const tools = relay && isRecord(relay.body?.result) && Array.isArray((relay.body!.result as Record<string, unknown>).tools)
170
+ ? ((relay.body!.result as Record<string, unknown>).tools as Array<Record<string, unknown>>)
171
+ : null;
172
+ if (!tools) return;
173
+ this.lastRelayTools = tools;
174
+ this.maybeEmitListChanged();
175
+ }
176
+
177
+ private async handle(req: Request): Promise<Response> {
178
+ const url = new URL(req.url);
179
+ if (url.pathname !== PROXY_PATH) return new Response("not found", { status: 404 });
180
+ if (!this.authorized(req)) {
181
+ return Response.json(jsonRpcError(null, -32001, "proxy auth required"), { status: 401 });
182
+ }
183
+ // GET → open the server→client SSE notification channel (streamable-HTTP transport).
184
+ if (req.method === "GET") return this.openSse();
185
+ if (req.method === "DELETE") return new Response(null, { status: 204 });
186
+ if (req.method !== "POST") return new Response("method not allowed", { status: 405 });
187
+ return this.handleRpc(req);
188
+ }
189
+
190
+ private authorized(req: Request): boolean {
191
+ const header = req.headers.get("authorization") ?? "";
192
+ const bearer = header.startsWith("Bearer ") ? header.slice(7) : "";
193
+ return bearer === this.authSecret;
194
+ }
195
+
196
+ private async handleRpc(req: Request): Promise<Response> {
197
+ let message: JsonRpcMessage;
198
+ try {
199
+ const body = await req.json();
200
+ if (!body || typeof body !== "object" || Array.isArray(body)) {
201
+ return Response.json(jsonRpcError(null, -32600, "JSON-RPC body must be an object"));
202
+ }
203
+ message = body as JsonRpcMessage;
204
+ } catch {
205
+ return Response.json(jsonRpcError(null, -32700, "invalid JSON-RPC body"));
206
+ }
207
+ const id = message.id ?? null;
208
+ const method = message.method;
209
+
210
+ if (method === "initialize") return this.handleInitialize(id, message);
211
+ if (method === "tools/list") return this.handleToolsList(id, message);
212
+ if (method === "tools/call") return this.handleToolsCall(id, message);
213
+ // Everything else (notifications/initialized, ping, …) forwards verbatim.
214
+ return this.forwardRaw(message);
215
+ }
216
+
217
+ private async handleInitialize(id: string | number | null, message: JsonRpcMessage): Promise<Response> {
218
+ const relay = await this.forward({ ...message, method: "initialize", id }).catch((error) => {
219
+ logger.warn("mcp-proxy", `initialize forward failed: ${errMessage(error)}`);
220
+ return null;
221
+ });
222
+ // Degrade gracefully if the relay is momentarily down at connect: still hand the agent a
223
+ // usable initialize result advertising the proxy's capabilities.
224
+ const result: Record<string, unknown> = isRecord(relay?.body?.result) ? { ...(relay!.body!.result as Record<string, unknown>) } : {
225
+ protocolVersion: "2024-11-05",
226
+ serverInfo: { name: "agent-relay", title: "Agent Relay (via runner)", version: "proxy" },
227
+ };
228
+ const caps = isRecord(result.capabilities) ? { ...result.capabilities } : {};
229
+ // The capability the relay's static endpoint doesn't advertise: live tool sets.
230
+ caps.tools = { ...(isRecord(caps.tools) ? caps.tools : {}), listChanged: true };
231
+ result.capabilities = caps;
232
+ return Response.json(jsonRpcResult(id, result));
233
+ }
234
+
235
+ private async handleToolsList(id: string | number | null, message: JsonRpcMessage): Promise<Response> {
236
+ const relay = await this.forward({ ...message, method: "tools/list", id }).catch(() => null);
237
+ const tools = relay && isRecord(relay.body?.result) && Array.isArray((relay.body!.result as Record<string, unknown>).tools)
238
+ ? ((relay.body!.result as Record<string, unknown>).tools as Array<Record<string, unknown>>)
239
+ : null;
240
+ if (tools) this.lastRelayTools = tools; // refresh the last-known base list
241
+ // If the relay is down and we have no cache yet, surface an empty list rather than erroring —
242
+ // the agent can still operate (writes buffer; tools/list refreshes on the next call).
243
+ const base = tools ?? this.lastRelayTools;
244
+ const narrowed = this.narrow(base);
245
+ this.lastNarrowedNames = toolNames(narrowed);
246
+ return Response.json(jsonRpcResult(id, { tools: narrowed }));
247
+ }
248
+
249
+ // Strict subset of the relay's already-scope-filtered list. Only removes — never adds. The one
250
+ // narrowing rule today: workspace tools apply only to an agent that owns a live worktree.
251
+ private narrow(tools: Array<Record<string, unknown>>): Array<Record<string, unknown>> {
252
+ return tools.filter((tool) => {
253
+ const name = typeof tool.name === "string" ? tool.name : "";
254
+ if (WORKTREE_ONLY_TOOLS.has(name) && !this.context.isolatedWorktree) return false;
255
+ return true;
256
+ });
257
+ }
258
+
259
+ private async handleToolsCall(id: string | number | null, message: JsonRpcMessage): Promise<Response> {
260
+ const params = isRecord(message.params) ? message.params : {};
261
+ const toolName = typeof params.name === "string" ? params.name : "";
262
+ const args = isRecord(params.arguments) ? params.arguments : {};
263
+
264
+ const relay = await this.forward(message).catch((error) => {
265
+ // A thrown fetch = transport failure (relay unreachable / DNS / connection refused).
266
+ return { ok: false, status: 0, body: null, transportError: errMessage(error) } as ForwardResult;
267
+ });
268
+
269
+ const relayDown = !relay.ok && (relay.status === 0 || GATEWAY_STATUSES.has(relay.status));
270
+ if (relayDown && this.bufferableTools.has(toolName)) {
271
+ // Durably queue the write and tell the agent it's safely accepted. It replays on reconnect.
272
+ const idempotencyKey = typeof args.idempotencyKey === "string" && args.idempotencyKey
273
+ ? args.idempotencyKey
274
+ : `mcp-${toolName}-${crypto.randomUUID()}`;
275
+ this.enqueueBuffered({ tool: toolName, arguments: { ...args, idempotencyKey }, idempotencyKey });
276
+ logger.info("mcp-proxy", `relay unreachable — buffered ${toolName} (idempotencyKey=${idempotencyKey})`);
277
+ return Response.json(jsonRpcResult(id, toolResult({
278
+ queued: true,
279
+ tool: toolName,
280
+ idempotencyKey,
281
+ note: "Relay was unreachable; your call was queued durably and will be delivered automatically when the relay comes back. It is not lost.",
282
+ })));
283
+ }
284
+
285
+ if (relay.body) return Response.json(relay.body);
286
+ // Relay down and not bufferable: a real error the agent must see (and can retry).
287
+ return Response.json(jsonRpcError(id, -32002, `relay unreachable: ${relay.transportError ?? `status ${relay.status}`}`));
288
+ }
289
+
290
+ // Verbatim forward for methods with no proxy-specific handling. Returns the relay's response
291
+ // body+status unchanged (or a JSON-RPC error if the relay is down).
292
+ private async forwardRaw(message: JsonRpcMessage): Promise<Response> {
293
+ const relay = await this.forward(message).catch((error) => {
294
+ return { ok: false, status: 0, body: null, transportError: errMessage(error) } as ForwardResult;
295
+ });
296
+ if (relay.body) return Response.json(relay.body, { status: relay.status || 200 });
297
+ // Notifications (no id) tolerate a down relay — ack locally; requests get an error.
298
+ if (message.id === undefined || message.id === null) return new Response(null, { status: 202 });
299
+ return Response.json(jsonRpcError(message.id, -32002, `relay unreachable: ${relay.transportError ?? `status ${relay.status}`}`));
300
+ }
301
+
302
+ // POST the JSON-RPC message to the relay with the runner's LIVE token. The agent's incoming
303
+ // bearer is the proxy secret; we substitute the real relay credential here so the agent never
304
+ // holds it and token rotation is invisible. The body is re-serialized from the already-parsed
305
+ // message (the request stream was consumed during dispatch and can't be re-read).
306
+ private async forward(message: JsonRpcMessage): Promise<ForwardResult> {
307
+ const token = this.getToken();
308
+ const headers: Record<string, string> = { "content-type": "application/json" };
309
+ if (token) headers.authorization = `Bearer ${token}`;
310
+ const payload: Record<string, unknown> = { jsonrpc: "2.0", method: message.method };
311
+ if (message.id !== undefined) payload.id = message.id;
312
+ if (message.params !== undefined) payload.params = message.params;
313
+ const response = await this.fetchImpl(this.relayMcpEndpoint, { method: "POST", headers, body: JSON.stringify(payload) });
314
+ const text = await response.text();
315
+ let parsed: Record<string, unknown> | null = null;
316
+ if (text) { try { parsed = JSON.parse(text); } catch { parsed = null; } }
317
+ return { ok: response.ok, status: response.status, body: parsed };
318
+ }
319
+
320
+ private openSse(): Response {
321
+ const self = this;
322
+ let client: SseClient;
323
+ const stream = new ReadableStream<Uint8Array>({
324
+ start(controller) {
325
+ controller.enqueue(self.encoder.encode(": connected\n\n"));
326
+ const keepalive = setInterval(() => {
327
+ try { controller.enqueue(self.encoder.encode(": keepalive\n\n")); } catch { /* closed */ }
328
+ }, SSE_KEEPALIVE_MS);
329
+ keepalive.unref?.();
330
+ client = { controller, keepalive };
331
+ self.sseClients.add(client);
332
+ },
333
+ cancel() {
334
+ if (client) {
335
+ clearInterval(client.keepalive);
336
+ self.sseClients.delete(client);
337
+ }
338
+ },
339
+ });
340
+ return new Response(stream, {
341
+ headers: { "content-type": "text/event-stream", "cache-control": "no-cache", connection: "keep-alive" },
342
+ });
343
+ }
344
+
345
+ private maybeEmitListChanged(): void {
346
+ const narrowed = this.narrow(this.lastRelayTools);
347
+ const names = toolNames(narrowed);
348
+ if (names === this.lastNarrowedNames) return; // no visible change → no notification
349
+ this.lastNarrowedNames = names;
350
+ const frame = this.encoder.encode(`event: message\ndata: ${JSON.stringify({ jsonrpc: "2.0", method: "notifications/tools/list_changed" })}\n\n`);
351
+ for (const client of this.sseClients) {
352
+ try { client.controller.enqueue(frame); } catch { /* dropped client; cancel() cleans up */ }
353
+ }
354
+ logger.debug("mcp-proxy", `tools/list_changed emitted to ${this.sseClients.size} client(s)`);
355
+ }
356
+
357
+ // Test/observability hooks.
358
+ sseClientCount(): number { return this.sseClients.size; }
359
+ narrowedToolNames(): string[] { return this.narrow(this.lastRelayTools).map((t) => String(t.name)); }
360
+ }
361
+
362
+ interface ForwardResult {
363
+ ok: boolean;
364
+ status: number;
365
+ body: Record<string, unknown> | null;
366
+ transportError?: string;
367
+ }
368
+
369
+ function toolNames(tools: Array<Record<string, unknown>>): string {
370
+ return tools.map((t) => String(t.name)).sort().join(",");
371
+ }
372
+
373
+ function jsonRpcResult(id: string | number | null, result: unknown): Record<string, unknown> {
374
+ return { jsonrpc: "2.0", id, result };
375
+ }
376
+
377
+ function jsonRpcError(id: string | number | null, code: number, message: string): Record<string, unknown> {
378
+ return { jsonrpc: "2.0", id, error: { code, message } };
379
+ }
380
+
381
+ function toolResult(result: unknown): Record<string, unknown> {
382
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }], structuredContent: result };
383
+ }
package/src/relay-mcp.ts CHANGED
@@ -26,14 +26,17 @@ export function relayMcpEndpoint(relayUrl: string): string {
26
26
  // Claude: additive `--mcp-config` JSON (NOT --strict-mcp-config, which would clobber
27
27
  // the user's own servers). HTTP transport, token via env-var expansion so it never
28
28
  // hits argv. Returns the full ["--mcp-config", "<json>"] arg pair.
29
- export function relayMcpClaudeConfigArg(relayUrl: string): string[] {
29
+ //
30
+ // `endpoint` overrides the target URL: the runner passes its local MCP proxy URL (Stage 2,
31
+ // #215) so the agent connects to the Runner, not the relay. Omitted → the direct relay endpoint.
32
+ export function relayMcpClaudeConfigArg(relayUrl: string, endpoint?: string): string[] {
30
33
  return [
31
34
  "--mcp-config",
32
35
  JSON.stringify({
33
36
  mcpServers: {
34
37
  [RELAY_MCP_SERVER_NAME]: {
35
38
  type: "http",
36
- url: relayMcpEndpoint(relayUrl),
39
+ url: endpoint ?? relayMcpEndpoint(relayUrl),
37
40
  headers: { Authorization: `Bearer \${${RELAY_MCP_TOKEN_ENV}}` },
38
41
  },
39
42
  },
@@ -43,11 +46,12 @@ export function relayMcpClaudeConfigArg(relayUrl: string): string[] {
43
46
 
44
47
  // Codex: `-c mcp_servers.<name>.*` overrides. `bearer_token_env_var` tells Codex to
45
48
  // read the token from the env var itself → transport resolves to streamable_http.
46
- export function relayMcpCodexConfigArgs(relayUrl: string): string[] {
49
+ // `endpoint` overrides the target URL (runner-local proxy, Stage 2 #215) see above.
50
+ export function relayMcpCodexConfigArgs(relayUrl: string, endpoint?: string): string[] {
47
51
  const key = `mcp_servers.${RELAY_MCP_SERVER_NAME}`;
48
52
  return [
49
53
  "-c",
50
- `${key}.url=${tomlString(relayMcpEndpoint(relayUrl))}`,
54
+ `${key}.url=${tomlString(endpoint ?? relayMcpEndpoint(relayUrl))}`,
51
55
  "-c",
52
56
  `${key}.bearer_token_env_var=${tomlString(RELAY_MCP_TOKEN_ENV)}`,
53
57
  ];