agent-relay-runner 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
- "version": "0.22.0",
3
+ "version": "0.23.0",
4
4
  "description": "Unified provider lifecycle runner for Agent Relay",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
3
  "description": "Thin Agent Relay runner bridge for Claude Code",
4
- "version": "0.22.0",
4
+ "version": "0.23.0",
5
5
  "agentRelayContracts": {
6
6
  "providerPluginProtocol": 1
7
7
  }
package/src/adapter.ts CHANGED
@@ -84,6 +84,9 @@ export interface RunnerSpawnConfig {
84
84
  providerConfig: ProviderConfig;
85
85
  env: Record<string, string>;
86
86
  controlPort: number;
87
+ // Stage 2 (#215): the MCP endpoint the agent connects to — the runner-local proxy URL when the
88
+ // proxy is active. Undefined → the adapter targets the relay's MCP endpoint directly (Stage 1).
89
+ relayMcpEndpoint?: string;
87
90
  monitor?: {
88
91
  deliver(messages: Message[]): Promise<number[]>;
89
92
  };
@@ -204,7 +204,7 @@ export class ClaudeAdapter implements ProviderAdapter {
204
204
  const args = [
205
205
  ...rigPrefix,
206
206
  ...pluginDirs.flatMap((dir) => ["--plugin-dir", dir]),
207
- ...(profileAllowsRelayFeature(config, "mcp") ? relayMcpClaudeConfigArg(config.relayUrl) : []),
207
+ ...(profileAllowsRelayFeature(config, "mcp") ? relayMcpClaudeConfigArg(config.relayUrl, config.relayMcpEndpoint) : []),
208
208
  ...(profileAllowsRelayFeature(config, "statusLine") ? sessionStatusLineSettingsArgs(defaultArgs, config.providerArgs) : []),
209
209
  ...(config.systemPromptAppend ? ["--append-system-prompt", config.systemPromptAppend] : []),
210
210
  ...providerArgs,
@@ -261,7 +261,7 @@ export class CodexAdapter implements ProviderAdapter {
261
261
  ...codexModelConfigArgs(config.model, config.effort),
262
262
  ...codexApprovalConfigArgs(config.approvalMode),
263
263
  ...(profileAllowsRelayFeature(config, "skills") ? bundledSkillConfigArgs() : []),
264
- ...(profileAllowsRelayFeature(config, "mcp") ? relayMcpCodexConfigArgs(config.relayUrl) : []),
264
+ ...(profileAllowsRelayFeature(config, "mcp") ? relayMcpCodexConfigArgs(config.relayUrl, config.relayMcpEndpoint) : []),
265
265
  ...codexToolOutputTokenLimitConfigArgs(config),
266
266
  ...codexManagedConfigArgs(),
267
267
  "--listen",
@@ -0,0 +1,383 @@
1
+ import { errMessage, isRecord } from "agent-relay-sdk";
2
+ import { logger } from "./logger";
3
+
4
+ // Loose fetch signature so tests can inject a plain async stub without Bun's `preconnect`
5
+ // member; the real global `fetch` satisfies it.
6
+ export type FetchLike = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
7
+
8
+ // Stage 2 of #213/#215 — the Runner becomes the MCP endpoint the agent connects to,
9
+ // fronting the relay. The agent's MCP client points at this localhost server instead of
10
+ // directly at the relay, so the Runner owns the relay connection, reconnect/backoff, and a
11
+ // durable buffer. A relay restart/crash becomes invisible to the agent.
12
+ //
13
+ // This is a TRANSPARENT JSON-RPC pass-through to the relay's `/api/mcp` (which stays the
14
+ // sole enforcement authority). It intervenes in exactly four places:
15
+ //
16
+ // 1. `initialize` — forward, then advertise `capabilities.tools.listChanged: true`,
17
+ // the live-tool-set capability the relay's static endpoint can't.
18
+ // 2. `tools/list` — forward (the relay already scope-filters per token), then context-
19
+ // NARROW: hide tools that don't apply to this agent's workspace mode/
20
+ // state (e.g. `relay_workspace_*` for a non-worktree agent). Strictly
21
+ // a subset — never widens. Served from a last-known cache during a blip.
22
+ // 3. `tools/call` — forward with the runner's LIVE token; for bufferable writes during a
23
+ // relay outage, enqueue durably + return a synthetic "queued" ok so the
24
+ // call is never lost. Mutating spawn/shutdown are forwarded, never local.
25
+ // 4. `GET` (SSE) — the server→client notification channel; emits
26
+ // `notifications/tools/list_changed` when a transition changes the
27
+ // narrowed set, so the agent's tool menu updates mid-session.
28
+ //
29
+ // Narrow-never-widen is the safety invariant: filtering `tools/list` is UX/token-efficiency,
30
+ // NEVER enforcement. The proxy can only ever surface a subset of what the relay's scope filter
31
+ // already returned, so a proxy bug can never become an auth bypass — the relay owns the lock at
32
+ // `tools/call`, the proxy owns the menu.
33
+
34
+ const PROXY_PATH = "/mcp";
35
+ // Relay-side failures we treat as "relay down" (buffer/serve-cache), as opposed to a real 4xx
36
+ // rejection that must be surfaced to the agent verbatim.
37
+ const GATEWAY_STATUSES = new Set([502, 503, 504]);
38
+ const SSE_KEEPALIVE_MS = 25_000;
39
+
40
+ // The write tools whose loss during a relay outage is unacceptable and whose result the agent
41
+ // does not need synchronously — safe to queue durably and replay on reconnect. Reads, claims
42
+ // (409 contention), spawn/shutdown (need a real ack) are deliberately NOT bufferable.
43
+ export const DEFAULT_BUFFERABLE_TOOLS = new Set<string>([
44
+ "relay_send_message",
45
+ "relay_reply",
46
+ "relay_workspace_ready",
47
+ ]);
48
+
49
+ // Tools surfaced ONLY to an agent that owns a live isolated worktree. For any other agent the
50
+ // proxy narrows them out of `tools/list` even though the token scope permits them (the relay
51
+ // would return them). This is the context the coarse token scope can't express (#214/#215).
52
+ const WORKTREE_ONLY_TOOLS = new Set<string>([
53
+ "relay_workspace_status",
54
+ "relay_workspace_ready",
55
+ "relay_workspace_deps",
56
+ "relay_workspace_list",
57
+ "relay_workspace_claim",
58
+ "relay_workspace_release",
59
+ "relay_workspace_land",
60
+ ]);
61
+
62
+ export interface ProxyContext {
63
+ // The agent owns a live (non-terminal) isolated git worktree → workspace tools apply.
64
+ isolatedWorktree: boolean;
65
+ }
66
+
67
+ export interface BufferedToolCall {
68
+ tool: string;
69
+ arguments: Record<string, unknown>;
70
+ idempotencyKey: string;
71
+ }
72
+
73
+ export interface RelayMcpProxyOptions {
74
+ // The relay's MCP endpoint, e.g. http://localhost:4850/api/mcp.
75
+ relayMcpEndpoint: string;
76
+ // The runner's LIVE relay token (read on every forward so rotation is invisible to the agent).
77
+ getToken(): string | undefined;
78
+ // The bearer the agent must present to this localhost proxy (a per-session secret the runner
79
+ // mints and injects into the agent env). Decouples the agent from the rotating relay token.
80
+ authSecret: string;
81
+ // Persist a bufferable write durably for replay on reconnect (wired to the runner outbox).
82
+ enqueueBuffered(call: BufferedToolCall): void;
83
+ initialContext?: ProxyContext;
84
+ bufferableTools?: Set<string>;
85
+ // Test seam.
86
+ fetchImpl?: FetchLike;
87
+ }
88
+
89
+ interface JsonRpcMessage {
90
+ jsonrpc?: string;
91
+ id?: string | number | null;
92
+ method?: string;
93
+ params?: unknown;
94
+ }
95
+
96
+ interface SseClient {
97
+ controller: ReadableStreamDefaultController<Uint8Array>;
98
+ keepalive: ReturnType<typeof setInterval>;
99
+ }
100
+
101
+ export class RelayMcpProxy {
102
+ private readonly relayMcpEndpoint: string;
103
+ private readonly getToken: () => string | undefined;
104
+ private readonly authSecret: string;
105
+ private readonly enqueueBuffered: (call: BufferedToolCall) => void;
106
+ private readonly bufferableTools: Set<string>;
107
+ private readonly fetchImpl: FetchLike;
108
+ private readonly encoder = new TextEncoder();
109
+
110
+ private context: ProxyContext;
111
+ private server?: ReturnType<typeof Bun.serve>;
112
+ private readonly sseClients = new Set<SseClient>();
113
+ // Last successful relay tools/list — narrowed and served when the relay is briefly down so a
114
+ // read still works (reads serve from last-known where safe).
115
+ private lastRelayTools: Array<Record<string, unknown>> = [];
116
+ private lastNarrowedNames = "";
117
+
118
+ constructor(options: RelayMcpProxyOptions) {
119
+ this.relayMcpEndpoint = options.relayMcpEndpoint;
120
+ this.getToken = options.getToken;
121
+ this.authSecret = options.authSecret;
122
+ this.enqueueBuffered = options.enqueueBuffered;
123
+ this.bufferableTools = options.bufferableTools ?? DEFAULT_BUFFERABLE_TOOLS;
124
+ this.fetchImpl = options.fetchImpl ?? fetch;
125
+ this.context = options.initialContext ?? { isolatedWorktree: false };
126
+ }
127
+
128
+ start(): { url: string; port: number } {
129
+ const self = this;
130
+ this.server = Bun.serve({
131
+ hostname: "127.0.0.1",
132
+ port: 0,
133
+ // SSE streams are long-lived; disable Bun's idle timeout and keep them alive with pings.
134
+ idleTimeout: 0,
135
+ fetch(req) {
136
+ return self.handle(req);
137
+ },
138
+ });
139
+ const port = this.server.port;
140
+ if (port === undefined) throw new Error("relay MCP proxy did not bind a port");
141
+ return { url: `http://127.0.0.1:${port}${PROXY_PATH}`, port };
142
+ }
143
+
144
+ stop(): void {
145
+ for (const client of this.sseClients) {
146
+ clearInterval(client.keepalive);
147
+ try { client.controller.close(); } catch { /* already closed */ }
148
+ }
149
+ this.sseClients.clear();
150
+ this.server?.stop(true);
151
+ this.server = undefined;
152
+ }
153
+
154
+ // The runner calls this on a workspace mode/state transition (active→ready→merged→terminal,
155
+ // or shared↔worktree). If it changes which tools the agent can see, emit list_changed so the
156
+ // agent's menu updates mid-session instead of staying frozen until reconnect.
157
+ setContext(context: ProxyContext): void {
158
+ this.context = context;
159
+ this.maybeEmitListChanged();
160
+ }
161
+
162
+ // The runner calls this after re-minting its runtime token (scope may have changed — e.g. a
163
+ // profile change grants/revokes command:spawn). Re-fetch the relay's now-differently-scoped
164
+ // tool list with the live token and emit list_changed if the visible set changed. This is the
165
+ // "token scope transition" path; setContext covers the workspace mode/state path. Best-effort —
166
+ // a failed refresh keeps the last-known list (the next tools/list refreshes it anyway).
167
+ async refreshTools(): Promise<void> {
168
+ const relay = await this.forward({ method: "tools/list", id: 0 }).catch(() => null);
169
+ const tools = relay && isRecord(relay.body?.result) && Array.isArray((relay.body!.result as Record<string, unknown>).tools)
170
+ ? ((relay.body!.result as Record<string, unknown>).tools as Array<Record<string, unknown>>)
171
+ : null;
172
+ if (!tools) return;
173
+ this.lastRelayTools = tools;
174
+ this.maybeEmitListChanged();
175
+ }
176
+
177
+ private async handle(req: Request): Promise<Response> {
178
+ const url = new URL(req.url);
179
+ if (url.pathname !== PROXY_PATH) return new Response("not found", { status: 404 });
180
+ if (!this.authorized(req)) {
181
+ return Response.json(jsonRpcError(null, -32001, "proxy auth required"), { status: 401 });
182
+ }
183
+ // GET → open the server→client SSE notification channel (streamable-HTTP transport).
184
+ if (req.method === "GET") return this.openSse();
185
+ if (req.method === "DELETE") return new Response(null, { status: 204 });
186
+ if (req.method !== "POST") return new Response("method not allowed", { status: 405 });
187
+ return this.handleRpc(req);
188
+ }
189
+
190
+ private authorized(req: Request): boolean {
191
+ const header = req.headers.get("authorization") ?? "";
192
+ const bearer = header.startsWith("Bearer ") ? header.slice(7) : "";
193
+ return bearer === this.authSecret;
194
+ }
195
+
196
+ private async handleRpc(req: Request): Promise<Response> {
197
+ let message: JsonRpcMessage;
198
+ try {
199
+ const body = await req.json();
200
+ if (!body || typeof body !== "object" || Array.isArray(body)) {
201
+ return Response.json(jsonRpcError(null, -32600, "JSON-RPC body must be an object"));
202
+ }
203
+ message = body as JsonRpcMessage;
204
+ } catch {
205
+ return Response.json(jsonRpcError(null, -32700, "invalid JSON-RPC body"));
206
+ }
207
+ const id = message.id ?? null;
208
+ const method = message.method;
209
+
210
+ if (method === "initialize") return this.handleInitialize(id, message);
211
+ if (method === "tools/list") return this.handleToolsList(id, message);
212
+ if (method === "tools/call") return this.handleToolsCall(id, message);
213
+ // Everything else (notifications/initialized, ping, …) forwards verbatim.
214
+ return this.forwardRaw(message);
215
+ }
216
+
217
+ private async handleInitialize(id: string | number | null, message: JsonRpcMessage): Promise<Response> {
218
+ const relay = await this.forward({ ...message, method: "initialize", id }).catch((error) => {
219
+ logger.warn("mcp-proxy", `initialize forward failed: ${errMessage(error)}`);
220
+ return null;
221
+ });
222
+ // Degrade gracefully if the relay is momentarily down at connect: still hand the agent a
223
+ // usable initialize result advertising the proxy's capabilities.
224
+ const result: Record<string, unknown> = isRecord(relay?.body?.result) ? { ...(relay!.body!.result as Record<string, unknown>) } : {
225
+ protocolVersion: "2024-11-05",
226
+ serverInfo: { name: "agent-relay", title: "Agent Relay (via runner)", version: "proxy" },
227
+ };
228
+ const caps = isRecord(result.capabilities) ? { ...result.capabilities } : {};
229
+ // The capability the relay's static endpoint doesn't advertise: live tool sets.
230
+ caps.tools = { ...(isRecord(caps.tools) ? caps.tools : {}), listChanged: true };
231
+ result.capabilities = caps;
232
+ return Response.json(jsonRpcResult(id, result));
233
+ }
234
+
235
+ private async handleToolsList(id: string | number | null, message: JsonRpcMessage): Promise<Response> {
236
+ const relay = await this.forward({ ...message, method: "tools/list", id }).catch(() => null);
237
+ const tools = relay && isRecord(relay.body?.result) && Array.isArray((relay.body!.result as Record<string, unknown>).tools)
238
+ ? ((relay.body!.result as Record<string, unknown>).tools as Array<Record<string, unknown>>)
239
+ : null;
240
+ if (tools) this.lastRelayTools = tools; // refresh the last-known base list
241
+ // If the relay is down and we have no cache yet, surface an empty list rather than erroring —
242
+ // the agent can still operate (writes buffer; tools/list refreshes on the next call).
243
+ const base = tools ?? this.lastRelayTools;
244
+ const narrowed = this.narrow(base);
245
+ this.lastNarrowedNames = toolNames(narrowed);
246
+ return Response.json(jsonRpcResult(id, { tools: narrowed }));
247
+ }
248
+
249
+ // Strict subset of the relay's already-scope-filtered list. Only removes — never adds. The one
250
+ // narrowing rule today: workspace tools apply only to an agent that owns a live worktree.
251
+ private narrow(tools: Array<Record<string, unknown>>): Array<Record<string, unknown>> {
252
+ return tools.filter((tool) => {
253
+ const name = typeof tool.name === "string" ? tool.name : "";
254
+ if (WORKTREE_ONLY_TOOLS.has(name) && !this.context.isolatedWorktree) return false;
255
+ return true;
256
+ });
257
+ }
258
+
259
+ private async handleToolsCall(id: string | number | null, message: JsonRpcMessage): Promise<Response> {
260
+ const params = isRecord(message.params) ? message.params : {};
261
+ const toolName = typeof params.name === "string" ? params.name : "";
262
+ const args = isRecord(params.arguments) ? params.arguments : {};
263
+
264
+ const relay = await this.forward(message).catch((error) => {
265
+ // A thrown fetch = transport failure (relay unreachable / DNS / connection refused).
266
+ return { ok: false, status: 0, body: null, transportError: errMessage(error) } as ForwardResult;
267
+ });
268
+
269
+ const relayDown = !relay.ok && (relay.status === 0 || GATEWAY_STATUSES.has(relay.status));
270
+ if (relayDown && this.bufferableTools.has(toolName)) {
271
+ // Durably queue the write and tell the agent it's safely accepted. It replays on reconnect.
272
+ const idempotencyKey = typeof args.idempotencyKey === "string" && args.idempotencyKey
273
+ ? args.idempotencyKey
274
+ : `mcp-${toolName}-${crypto.randomUUID()}`;
275
+ this.enqueueBuffered({ tool: toolName, arguments: { ...args, idempotencyKey }, idempotencyKey });
276
+ logger.info("mcp-proxy", `relay unreachable — buffered ${toolName} (idempotencyKey=${idempotencyKey})`);
277
+ return Response.json(jsonRpcResult(id, toolResult({
278
+ queued: true,
279
+ tool: toolName,
280
+ idempotencyKey,
281
+ note: "Relay was unreachable; your call was queued durably and will be delivered automatically when the relay comes back. It is not lost.",
282
+ })));
283
+ }
284
+
285
+ if (relay.body) return Response.json(relay.body);
286
+ // Relay down and not bufferable: a real error the agent must see (and can retry).
287
+ return Response.json(jsonRpcError(id, -32002, `relay unreachable: ${relay.transportError ?? `status ${relay.status}`}`));
288
+ }
289
+
290
+ // Verbatim forward for methods with no proxy-specific handling. Returns the relay's response
291
+ // body+status unchanged (or a JSON-RPC error if the relay is down).
292
+ private async forwardRaw(message: JsonRpcMessage): Promise<Response> {
293
+ const relay = await this.forward(message).catch((error) => {
294
+ return { ok: false, status: 0, body: null, transportError: errMessage(error) } as ForwardResult;
295
+ });
296
+ if (relay.body) return Response.json(relay.body, { status: relay.status || 200 });
297
+ // Notifications (no id) tolerate a down relay — ack locally; requests get an error.
298
+ if (message.id === undefined || message.id === null) return new Response(null, { status: 202 });
299
+ return Response.json(jsonRpcError(message.id, -32002, `relay unreachable: ${relay.transportError ?? `status ${relay.status}`}`));
300
+ }
301
+
302
+ // POST the JSON-RPC message to the relay with the runner's LIVE token. The agent's incoming
303
+ // bearer is the proxy secret; we substitute the real relay credential here so the agent never
304
+ // holds it and token rotation is invisible. The body is re-serialized from the already-parsed
305
+ // message (the request stream was consumed during dispatch and can't be re-read).
306
+ private async forward(message: JsonRpcMessage): Promise<ForwardResult> {
307
+ const token = this.getToken();
308
+ const headers: Record<string, string> = { "content-type": "application/json" };
309
+ if (token) headers.authorization = `Bearer ${token}`;
310
+ const payload: Record<string, unknown> = { jsonrpc: "2.0", method: message.method };
311
+ if (message.id !== undefined) payload.id = message.id;
312
+ if (message.params !== undefined) payload.params = message.params;
313
+ const response = await this.fetchImpl(this.relayMcpEndpoint, { method: "POST", headers, body: JSON.stringify(payload) });
314
+ const text = await response.text();
315
+ let parsed: Record<string, unknown> | null = null;
316
+ if (text) { try { parsed = JSON.parse(text); } catch { parsed = null; } }
317
+ return { ok: response.ok, status: response.status, body: parsed };
318
+ }
319
+
320
+ private openSse(): Response {
321
+ const self = this;
322
+ let client: SseClient;
323
+ const stream = new ReadableStream<Uint8Array>({
324
+ start(controller) {
325
+ controller.enqueue(self.encoder.encode(": connected\n\n"));
326
+ const keepalive = setInterval(() => {
327
+ try { controller.enqueue(self.encoder.encode(": keepalive\n\n")); } catch { /* closed */ }
328
+ }, SSE_KEEPALIVE_MS);
329
+ keepalive.unref?.();
330
+ client = { controller, keepalive };
331
+ self.sseClients.add(client);
332
+ },
333
+ cancel() {
334
+ if (client) {
335
+ clearInterval(client.keepalive);
336
+ self.sseClients.delete(client);
337
+ }
338
+ },
339
+ });
340
+ return new Response(stream, {
341
+ headers: { "content-type": "text/event-stream", "cache-control": "no-cache", connection: "keep-alive" },
342
+ });
343
+ }
344
+
345
+ private maybeEmitListChanged(): void {
346
+ const narrowed = this.narrow(this.lastRelayTools);
347
+ const names = toolNames(narrowed);
348
+ if (names === this.lastNarrowedNames) return; // no visible change → no notification
349
+ this.lastNarrowedNames = names;
350
+ const frame = this.encoder.encode(`event: message\ndata: ${JSON.stringify({ jsonrpc: "2.0", method: "notifications/tools/list_changed" })}\n\n`);
351
+ for (const client of this.sseClients) {
352
+ try { client.controller.enqueue(frame); } catch { /* dropped client; cancel() cleans up */ }
353
+ }
354
+ logger.debug("mcp-proxy", `tools/list_changed emitted to ${this.sseClients.size} client(s)`);
355
+ }
356
+
357
+ // Test/observability hooks.
358
+ sseClientCount(): number { return this.sseClients.size; }
359
+ narrowedToolNames(): string[] { return this.narrow(this.lastRelayTools).map((t) => String(t.name)); }
360
+ }
361
+
362
+ interface ForwardResult {
363
+ ok: boolean;
364
+ status: number;
365
+ body: Record<string, unknown> | null;
366
+ transportError?: string;
367
+ }
368
+
369
+ function toolNames(tools: Array<Record<string, unknown>>): string {
370
+ return tools.map((t) => String(t.name)).sort().join(",");
371
+ }
372
+
373
+ function jsonRpcResult(id: string | number | null, result: unknown): Record<string, unknown> {
374
+ return { jsonrpc: "2.0", id, result };
375
+ }
376
+
377
+ function jsonRpcError(id: string | number | null, code: number, message: string): Record<string, unknown> {
378
+ return { jsonrpc: "2.0", id, error: { code, message } };
379
+ }
380
+
381
+ function toolResult(result: unknown): Record<string, unknown> {
382
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }], structuredContent: result };
383
+ }
package/src/relay-mcp.ts CHANGED
@@ -26,14 +26,17 @@ export function relayMcpEndpoint(relayUrl: string): string {
26
26
  // Claude: additive `--mcp-config` JSON (NOT --strict-mcp-config, which would clobber
27
27
  // the user's own servers). HTTP transport, token via env-var expansion so it never
28
28
  // hits argv. Returns the full ["--mcp-config", "<json>"] arg pair.
29
- export function relayMcpClaudeConfigArg(relayUrl: string): string[] {
29
+ //
30
+ // `endpoint` overrides the target URL: the runner passes its local MCP proxy URL (Stage 2,
31
+ // #215) so the agent connects to the Runner, not the relay. Omitted → the direct relay endpoint.
32
+ export function relayMcpClaudeConfigArg(relayUrl: string, endpoint?: string): string[] {
30
33
  return [
31
34
  "--mcp-config",
32
35
  JSON.stringify({
33
36
  mcpServers: {
34
37
  [RELAY_MCP_SERVER_NAME]: {
35
38
  type: "http",
36
- url: relayMcpEndpoint(relayUrl),
39
+ url: endpoint ?? relayMcpEndpoint(relayUrl),
37
40
  headers: { Authorization: `Bearer \${${RELAY_MCP_TOKEN_ENV}}` },
38
41
  },
39
42
  },
@@ -43,11 +46,12 @@ export function relayMcpClaudeConfigArg(relayUrl: string): string[] {
43
46
 
44
47
  // Codex: `-c mcp_servers.<name>.*` overrides. `bearer_token_env_var` tells Codex to
45
48
  // read the token from the env var itself → transport resolves to streamable_http.
46
- export function relayMcpCodexConfigArgs(relayUrl: string): string[] {
49
+ // `endpoint` overrides the target URL (runner-local proxy, Stage 2 #215) see above.
50
+ export function relayMcpCodexConfigArgs(relayUrl: string, endpoint?: string): string[] {
47
51
  const key = `mcp_servers.${RELAY_MCP_SERVER_NAME}`;
48
52
  return [
49
53
  "-c",
50
- `${key}.url=${tomlString(relayMcpEndpoint(relayUrl))}`,
54
+ `${key}.url=${tomlString(endpoint ?? relayMcpEndpoint(relayUrl))}`,
51
55
  "-c",
52
56
  `${key}.bearer_token_env_var=${tomlString(RELAY_MCP_TOKEN_ENV)}`,
53
57
  ];
package/src/runner.ts CHANGED
@@ -14,7 +14,8 @@ import { Outbox, type OutboxRecord } from "./outbox";
14
14
  import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete, analyzeSession } from "./adapters/claude-transcript";
15
15
  import { agentProfileProjectionReport } from "./profile-projection";
16
16
  import { profileUsesHostProviderGlobals } from "./profile-home";
17
- import { RELAY_MCP_TOKEN_ENV } from "./relay-mcp";
17
+ import { RELAY_MCP_TOKEN_ENV, relayMcpEndpoint } from "./relay-mcp";
18
+ import { RelayMcpProxy } from "./relay-mcp-proxy";
18
19
  import { runtimeMetadata } from "./version";
19
20
  import { logger, parseLogLevel } from "./logger";
20
21
  import { ensureSessionScratch, reapSessionScratch, sweepStaleSessions, type SessionScratchLayout } from "./session-scratch";
@@ -131,6 +132,15 @@ export class AgentRunner {
131
132
  private currentTokenProfileId?: string;
132
133
  private currentTokenExpiresAt?: number;
133
134
  private control?: ControlServer;
135
+ // Stage 2 (#215): the local MCP endpoint the agent connects to, fronting the relay so the
136
+ // Runner owns reconnect/backoff + a durable buffer. Disabled via AGENT_RELAY_MCP_PROXY=0
137
+ // (then the agent connects to the relay directly, Stage-1 behaviour). The proxy secret is the
138
+ // bearer the agent presents to the localhost proxy — it decouples the agent from the rotating
139
+ // relay token (the proxy injects the live token relay-side).
140
+ private proxy?: RelayMcpProxy;
141
+ private mcpProxyEndpoint?: string;
142
+ private readonly mcpProxyEnabled: boolean;
143
+ private readonly mcpProxySecret: string;
134
144
  private process?: ManagedProcess;
135
145
  private stopped = false;
136
146
  private exitCommandInProgress = false;
@@ -200,6 +210,8 @@ export class AgentRunner {
200
210
  this.currentTokenJti = options.tokenJti;
201
211
  this.currentTokenProfileId = options.tokenProfileId;
202
212
  this.currentTokenExpiresAt = options.tokenExpiresAt;
213
+ this.mcpProxyEnabled = !["0", "false", "off"].includes((process.env.AGENT_RELAY_MCP_PROXY ?? "").trim().toLowerCase());
214
+ this.mcpProxySecret = crypto.randomUUID();
203
215
  const runtime = runtimeMetadata(options.provider);
204
216
  this.http = new RelayHttpClient({ baseUrl: options.relayUrl, token: this.currentToken });
205
217
  this.obligationCache = new ReplyObligationCache({ fetch: () => this.http.listReplyObligations(this.agentId) });
@@ -284,6 +296,7 @@ export class AgentRunner {
284
296
  onSessionEnd: (input) => this.handleSessionEnd(input),
285
297
  onHookFatal: (report) => this.reportHookFatal(report),
286
298
  });
299
+ this.startMcpProxy();
287
300
  this.writeRunnerInfoFile();
288
301
  this.options.adapter.onStatusChange((status) => {
289
302
  if (this.restartInProgress || this.restartPending) return;
@@ -350,10 +363,45 @@ export class AgentRunner {
350
363
  this.stopReasoningTail();
351
364
  this.obligationCache.stop();
352
365
  this.outbox.close();
366
+ this.proxy?.stop();
353
367
  this.control?.stop();
354
368
  await this.bus.close();
355
369
  }
356
370
 
371
+ // Start the local MCP proxy the agent connects to (Stage 2, #215). Forwards tool calls to the
372
+ // relay with the runner's LIVE token, buffers bufferable writes durably during a relay outage,
373
+ // and narrows the tool list to this agent's workspace context. Best-effort: if it can't bind,
374
+ // we fall back to a direct relay MCP connection (the agent env still works, no resilience).
375
+ private startMcpProxy(): void {
376
+ if (!this.mcpProxyEnabled) return;
377
+ try {
378
+ this.proxy = new RelayMcpProxy({
379
+ relayMcpEndpoint: relayMcpEndpoint(this.options.relayUrl),
380
+ getToken: () => this.currentToken,
381
+ authSecret: this.mcpProxySecret,
382
+ enqueueBuffered: (call) => {
383
+ this.outbox.enqueue({
384
+ kind: "mcp-tool-call",
385
+ payload: { tool: call.tool, arguments: call.arguments },
386
+ idempotencyKey: call.idempotencyKey,
387
+ });
388
+ },
389
+ initialContext: { isolatedWorktree: this.ownsIsolatedWorktree() },
390
+ });
391
+ this.mcpProxyEndpoint = this.proxy.start().url;
392
+ logger.info("mcp-proxy", `runner MCP proxy listening at ${this.mcpProxyEndpoint} (worktree=${this.ownsIsolatedWorktree()})`);
393
+ } catch (error) {
394
+ this.proxy = undefined;
395
+ this.mcpProxyEndpoint = undefined;
396
+ logger.warn("mcp-proxy", `failed to start MCP proxy; agent will connect to the relay directly: ${errMessage(error)}`);
397
+ }
398
+ }
399
+
400
+ private ownsIsolatedWorktree(): boolean {
401
+ const mode = this.options.workspace?.requestedMode ?? this.options.workspace?.mode ?? process.env.AGENT_RELAY_WORKSPACE_MODE;
402
+ return mode === "isolated";
403
+ }
404
+
357
405
  private async spawnProvider(): Promise<ManagedProcess> {
358
406
  this.providerSessionId = crypto.randomUUID();
359
407
  this.lastTranscriptPath = undefined;
@@ -369,11 +417,18 @@ export class AgentRunner {
369
417
  AGENT_RELAY_URL: this.options.relayUrl,
370
418
  AGENT_RELAY_APPROVAL: this.options.approvalMode,
371
419
  ...(this.currentToken ? { AGENT_RELAY_TOKEN: this.currentToken } : {}),
372
- // Dedicated, un-clobberable credential for the injected relay MCP endpoint. A rig's
420
+ // Dedicated, un-clobberable credential for the injected MCP endpoint. A rig's
373
421
  // settings.json `env.AGENT_RELAY_TOKEN` would override the scoped token above at
374
422
  // MCP-parse time → server-actor auth, no identity (#233). The MCP config references
375
423
  // ${AGENT_RELAY_SESSION_TOKEN}, which rigs never set. See runner/src/relay-mcp.ts.
376
- ...(this.currentToken ? { [RELAY_MCP_TOKEN_ENV]: this.currentToken } : {}),
424
+ //
425
+ // Stage 2 (#215): when the proxy is active the agent connects to the LOCAL proxy, so this
426
+ // holds the per-session PROXY SECRET (not the relay token). The proxy injects the live
427
+ // relay token itself — the agent never holds it, and token rotation is invisible. With the
428
+ // proxy disabled this stays the scoped relay token (Stage-1 direct connection).
429
+ ...(this.proxy
430
+ ? { [RELAY_MCP_TOKEN_ENV]: this.mcpProxySecret }
431
+ : (this.currentToken ? { [RELAY_MCP_TOKEN_ENV]: this.currentToken } : {})),
377
432
  ...(this.currentTokenJti ? { AGENT_RELAY_TOKEN_JTI: this.currentTokenJti } : {}),
378
433
  ...(this.currentTokenProfileId ? { AGENT_RELAY_TOKEN_PROFILE: this.currentTokenProfileId } : {}),
379
434
  ...(this.currentTokenExpiresAt ? { AGENT_RELAY_TOKEN_EXPIRES_AT: String(this.currentTokenExpiresAt) } : {}),
@@ -400,6 +455,9 @@ export class AgentRunner {
400
455
  providerConfig: this.options.providerConfig,
401
456
  env,
402
457
  controlPort: this.control!.port,
458
+ // Stage 2 (#215): the MCP endpoint the agent's client should target — the runner-local
459
+ // proxy when active, undefined when disabled (adapters fall back to the direct relay URL).
460
+ ...(this.mcpProxyEndpoint ? { relayMcpEndpoint: this.mcpProxyEndpoint } : {}),
403
461
  monitor: {
404
462
  deliver: (messages) => this.control!.deliverToMonitor(messages),
405
463
  },
@@ -1057,6 +1115,10 @@ export class AgentRunner {
1057
1115
  });
1058
1116
  return;
1059
1117
  }
1118
+ if (record.kind === "mcp-tool-call") {
1119
+ await this.deliverBufferedMcpCall(record);
1120
+ return;
1121
+ }
1060
1122
  logger.warn("outbox", `dropping event with unknown kind: ${record.kind}`);
1061
1123
  } catch (error) {
1062
1124
  // 409 = the server intentionally rejected it (e.g. Insights/feature toggled off). That
@@ -1067,6 +1129,40 @@ export class AgentRunner {
1067
1129
  }
1068
1130
  }
1069
1131
 
1132
+ // Replay a buffered MCP tool call (Stage 2, #215) that the proxy queued while the relay was
1133
+ // unreachable. POST it to the relay MCP endpoint with the LIVE token — same path the live call
1134
+ // would have taken. Throw to retry (transient), return to ack (delivered or permanently
1135
+ // rejected). The proxy stamped an idempotencyKey into the arguments so a retry that already
1136
+ // landed server-side is deduped, not double-sent.
1137
+ private async deliverBufferedMcpCall(record: OutboxRecord): Promise<void> {
1138
+ const payload = record.payload as { tool: string; arguments: Record<string, unknown> };
1139
+ const headers: Record<string, string> = { "content-type": "application/json" };
1140
+ if (this.currentToken) headers.authorization = `Bearer ${this.currentToken}`;
1141
+ const response = await fetch(relayMcpEndpoint(this.options.relayUrl), {
1142
+ method: "POST",
1143
+ headers,
1144
+ body: JSON.stringify({ jsonrpc: "2.0", id: 1, method: "tools/call", params: { name: payload.tool, arguments: payload.arguments } }),
1145
+ });
1146
+ if (response.status === 401 || response.status === 403) {
1147
+ this.recoverRuntimeTokenAfterAuthFailure("mcp-outbox");
1148
+ throw new Error(`relay rejected buffered ${payload.tool} with ${response.status}`);
1149
+ }
1150
+ if (response.status >= 500) throw new Error(`relay ${response.status} on buffered ${payload.tool}`);
1151
+ if (!response.ok) {
1152
+ // A 4xx (e.g. target gone, validation) is a permanent rejection — retrying won't help.
1153
+ // Ack so it doesn't block the queue, but log loudly: a queued write did not land.
1154
+ const body = await response.text().catch(() => "");
1155
+ logger.warn("mcp-outbox", `buffered ${payload.tool} permanently rejected (${response.status}); dropping: ${body.slice(0, 200)}`);
1156
+ return;
1157
+ }
1158
+ // HTTP 200 but the JSON-RPC body may still carry a tool-level error. Those reflect the same
1159
+ // permanent-rejection semantics (bad target, validation) — ack and log, don't loop.
1160
+ const json = await response.json().catch(() => null) as { error?: { message?: string } } | null;
1161
+ if (json?.error) {
1162
+ logger.warn("mcp-outbox", `buffered ${payload.tool} returned a tool error; dropping: ${json.error.message ?? "(no detail)"}`);
1163
+ }
1164
+ }
1165
+
1070
1166
  // A hook reported an unhandled failure (#198 seam). Already logged FATAL by the control
1071
1167
  // server; here we additionally surface it durably to the server as a generic insight so
1072
1168
  // it shows up in observability rather than only in the per-agent log (#196).
@@ -1607,6 +1703,10 @@ export class AgentRunner {
1607
1703
  this.options.tokenExpiresAt = this.currentTokenExpiresAt;
1608
1704
  this.http.setToken(token);
1609
1705
  this.bus.setToken(token);
1706
+ // The proxy reads the token live via getToken(), so forwarding already uses the new one.
1707
+ // A re-mint can change scope (e.g. a profile change), so refresh the relay tool list and
1708
+ // emit tools/list_changed if the visible set changed (#215 — token-scope transition).
1709
+ void this.proxy?.refreshTools().catch(() => {});
1610
1710
  this.httpLivenessAuthFailed = false;
1611
1711
  this.reactiveTokenRecoveryAt = undefined;
1612
1712
  // An earlier auth failure may have stopped the liveness loop; restart it so the