agent-relay-runner 0.22.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/plugins/claude/.claude-plugin/plugin.json +1 -1
- package/plugins/claude/hooks/pre-compact.sh +6 -0
- package/plugins/claude/hooks/relay-status.sh +6 -5
- package/plugins/claude/hooks/session-end.sh +5 -2
- package/src/adapter.ts +13 -0
- package/src/adapters/claude-transcript.ts +21 -75
- package/src/adapters/claude.ts +17 -1
- package/src/adapters/codex.ts +65 -4
- package/src/claim-tracker.ts +0 -12
- package/src/control-server.ts +12 -11
- package/src/relay-mcp-proxy.ts +383 -0
- package/src/relay-mcp.ts +8 -4
- package/src/runner.ts +224 -34
- package/src/session-insights.ts +118 -0
package/src/runner.ts
CHANGED
|
@@ -11,14 +11,45 @@ import { ClaimTracker } from "./claim-tracker";
|
|
|
11
11
|
import { startControlServer, type ControlServer } from "./control-server";
|
|
12
12
|
import { ReplyObligationCache } from "./reply-obligation-cache";
|
|
13
13
|
import { Outbox, type OutboxRecord } from "./outbox";
|
|
14
|
-
import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete
|
|
14
|
+
import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete } from "./adapters/claude-transcript";
|
|
15
|
+
import { computeContextRatio } from "./session-insights";
|
|
15
16
|
import { agentProfileProjectionReport } from "./profile-projection";
|
|
16
17
|
import { profileUsesHostProviderGlobals } from "./profile-home";
|
|
17
|
-
import { RELAY_MCP_TOKEN_ENV } from "./relay-mcp";
|
|
18
|
+
import { RELAY_MCP_TOKEN_ENV, relayMcpEndpoint } from "./relay-mcp";
|
|
19
|
+
import { RelayMcpProxy } from "./relay-mcp-proxy";
|
|
18
20
|
import { runtimeMetadata } from "./version";
|
|
19
21
|
import { logger, parseLogLevel } from "./logger";
|
|
20
22
|
import { ensureSessionScratch, reapSessionScratch, sweepStaleSessions, type SessionScratchLayout } from "./session-scratch";
|
|
21
23
|
|
|
24
|
+
// A destructive session transition. The runner runs end-of-session work (Insights
|
|
25
|
+
// capture, #183/#184) before the invasive operation and, during that window, presents a
|
|
26
|
+
// distinct non-addressable lifecycle state. Bus commands and provider hooks (Claude
|
|
27
|
+
// PreCompact / SessionEnd) both normalize to one of these.
|
|
28
|
+
type SessionDestroyReason = "compact" | "clear" | "restart" | "shutdown" | "kill";
|
|
29
|
+
|
|
30
|
+
// `finalizing-<reason>` is the transient pre-destroy window; the others are the executing
|
|
31
|
+
// teardown states the dashboard already renders.
|
|
32
|
+
type LifecycleAction =
|
|
33
|
+
| "shutting-down" | "killing" | "restarting"
|
|
34
|
+
| `finalizing-${SessionDestroyReason}`;
|
|
35
|
+
|
|
36
|
+
// Pre-destroy work is best-effort and must never hang teardown. Capping it keeps a slow
|
|
37
|
+
// transcript read or a wedged provider from stalling a shutdown the operator asked for.
|
|
38
|
+
const PRE_DESTROY_TIMEOUT_MS = 4_000;
|
|
39
|
+
|
|
40
|
+
// Map a lifecycle bus command to its destructive boundary reason, or undefined for
|
|
41
|
+
// non-destructive commands (interrupt, inject, reconnect, permission decisions).
|
|
42
|
+
function boundaryReasonForCommand(type: string): SessionDestroyReason | undefined {
|
|
43
|
+
switch (type) {
|
|
44
|
+
case "agent.compact": return "compact";
|
|
45
|
+
case "agent.clearContext": return "clear";
|
|
46
|
+
case "agent.restart": return "restart";
|
|
47
|
+
case "agent.shutdown": return "shutdown";
|
|
48
|
+
case "agent.kill": return "kill";
|
|
49
|
+
default: return undefined;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
22
53
|
interface RunnerOptions {
|
|
23
54
|
provider: string;
|
|
24
55
|
model?: string;
|
|
@@ -131,6 +162,15 @@ export class AgentRunner {
|
|
|
131
162
|
private currentTokenProfileId?: string;
|
|
132
163
|
private currentTokenExpiresAt?: number;
|
|
133
164
|
private control?: ControlServer;
|
|
165
|
+
// Stage 2 (#215): the local MCP endpoint the agent connects to, fronting the relay so the
|
|
166
|
+
// Runner owns reconnect/backoff + a durable buffer. Disabled via AGENT_RELAY_MCP_PROXY=0
|
|
167
|
+
// (then the agent connects to the relay directly, Stage-1 behaviour). The proxy secret is the
|
|
168
|
+
// bearer the agent presents to the localhost proxy — it decouples the agent from the rotating
|
|
169
|
+
// relay token (the proxy injects the live token relay-side).
|
|
170
|
+
private proxy?: RelayMcpProxy;
|
|
171
|
+
private mcpProxyEndpoint?: string;
|
|
172
|
+
private readonly mcpProxyEnabled: boolean;
|
|
173
|
+
private readonly mcpProxySecret: string;
|
|
134
174
|
private process?: ManagedProcess;
|
|
135
175
|
private stopped = false;
|
|
136
176
|
private exitCommandInProgress = false;
|
|
@@ -154,7 +194,17 @@ export class AgentRunner {
|
|
|
154
194
|
// Last transcript path seen this session — used by end-of-session Insights (#184)
|
|
155
195
|
// when the SessionEnd hook payload omits it.
|
|
156
196
|
private lastTranscriptPath?: string;
|
|
157
|
-
private lifecycleAction?:
|
|
197
|
+
private lifecycleAction?: LifecycleAction;
|
|
198
|
+
// #183/#184 per-segment cursor: how many of the current session's normalized events
|
|
199
|
+
// have already been folded into an observation, and the key (transcript path / Codex
|
|
200
|
+
// session) that count belongs to. A boundary captures only events since the last one,
|
|
201
|
+
// so each datapoint is one work chunk between context resets; a key change or a shrink
|
|
202
|
+
// (transcript rotated, Codex buffer trimmed) resets the cursor.
|
|
203
|
+
private insightsObserved = 0;
|
|
204
|
+
private insightsCursorKey = "";
|
|
205
|
+
// Coalesces concurrent pre-session-destroy runs (e.g. the shutdown bus command and the
|
|
206
|
+
// SessionEnd hook both fire for the same teardown) so the cursor isn't raced.
|
|
207
|
+
private preDestroyPromise?: Promise<void>;
|
|
158
208
|
private readonly unexpectedExitTimes: number[] = [];
|
|
159
209
|
private readonly pendingMessages = new Map<number, Message>();
|
|
160
210
|
private readonly activeTaskClaims = new Map<number, ActiveTaskClaim>();
|
|
@@ -200,6 +250,8 @@ export class AgentRunner {
|
|
|
200
250
|
this.currentTokenJti = options.tokenJti;
|
|
201
251
|
this.currentTokenProfileId = options.tokenProfileId;
|
|
202
252
|
this.currentTokenExpiresAt = options.tokenExpiresAt;
|
|
253
|
+
this.mcpProxyEnabled = !["0", "false", "off"].includes((process.env.AGENT_RELAY_MCP_PROXY ?? "").trim().toLowerCase());
|
|
254
|
+
this.mcpProxySecret = crypto.randomUUID();
|
|
203
255
|
const runtime = runtimeMetadata(options.provider);
|
|
204
256
|
this.http = new RelayHttpClient({ baseUrl: options.relayUrl, token: this.currentToken });
|
|
205
257
|
this.obligationCache = new ReplyObligationCache({ fetch: () => this.http.listReplyObligations(this.agentId) });
|
|
@@ -281,9 +333,10 @@ export class AgentRunner {
|
|
|
281
333
|
onReplyObligations: () => Promise.resolve(this.obligationCache.get()),
|
|
282
334
|
onSessionTurn: (input) => this.publishSessionTurn(input),
|
|
283
335
|
onUserPrompt: (input) => this.handleUserPrompt(input),
|
|
284
|
-
|
|
336
|
+
onSessionBoundary: (input) => this.handleSessionBoundary(input),
|
|
285
337
|
onHookFatal: (report) => this.reportHookFatal(report),
|
|
286
338
|
});
|
|
339
|
+
this.startMcpProxy();
|
|
287
340
|
this.writeRunnerInfoFile();
|
|
288
341
|
this.options.adapter.onStatusChange((status) => {
|
|
289
342
|
if (this.restartInProgress || this.restartPending) return;
|
|
@@ -350,10 +403,45 @@ export class AgentRunner {
|
|
|
350
403
|
this.stopReasoningTail();
|
|
351
404
|
this.obligationCache.stop();
|
|
352
405
|
this.outbox.close();
|
|
406
|
+
this.proxy?.stop();
|
|
353
407
|
this.control?.stop();
|
|
354
408
|
await this.bus.close();
|
|
355
409
|
}
|
|
356
410
|
|
|
411
|
+
// Start the local MCP proxy the agent connects to (Stage 2, #215). Forwards tool calls to the
|
|
412
|
+
// relay with the runner's LIVE token, buffers bufferable writes durably during a relay outage,
|
|
413
|
+
// and narrows the tool list to this agent's workspace context. Best-effort: if it can't bind,
|
|
414
|
+
// we fall back to a direct relay MCP connection (the agent env still works, no resilience).
|
|
415
|
+
private startMcpProxy(): void {
|
|
416
|
+
if (!this.mcpProxyEnabled) return;
|
|
417
|
+
try {
|
|
418
|
+
this.proxy = new RelayMcpProxy({
|
|
419
|
+
relayMcpEndpoint: relayMcpEndpoint(this.options.relayUrl),
|
|
420
|
+
getToken: () => this.currentToken,
|
|
421
|
+
authSecret: this.mcpProxySecret,
|
|
422
|
+
enqueueBuffered: (call) => {
|
|
423
|
+
this.outbox.enqueue({
|
|
424
|
+
kind: "mcp-tool-call",
|
|
425
|
+
payload: { tool: call.tool, arguments: call.arguments },
|
|
426
|
+
idempotencyKey: call.idempotencyKey,
|
|
427
|
+
});
|
|
428
|
+
},
|
|
429
|
+
initialContext: { isolatedWorktree: this.ownsIsolatedWorktree() },
|
|
430
|
+
});
|
|
431
|
+
this.mcpProxyEndpoint = this.proxy.start().url;
|
|
432
|
+
logger.info("mcp-proxy", `runner MCP proxy listening at ${this.mcpProxyEndpoint} (worktree=${this.ownsIsolatedWorktree()})`);
|
|
433
|
+
} catch (error) {
|
|
434
|
+
this.proxy = undefined;
|
|
435
|
+
this.mcpProxyEndpoint = undefined;
|
|
436
|
+
logger.warn("mcp-proxy", `failed to start MCP proxy; agent will connect to the relay directly: ${errMessage(error)}`);
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
private ownsIsolatedWorktree(): boolean {
|
|
441
|
+
const mode = this.options.workspace?.requestedMode ?? this.options.workspace?.mode ?? process.env.AGENT_RELAY_WORKSPACE_MODE;
|
|
442
|
+
return mode === "isolated";
|
|
443
|
+
}
|
|
444
|
+
|
|
357
445
|
private async spawnProvider(): Promise<ManagedProcess> {
|
|
358
446
|
this.providerSessionId = crypto.randomUUID();
|
|
359
447
|
this.lastTranscriptPath = undefined;
|
|
@@ -369,11 +457,18 @@ export class AgentRunner {
|
|
|
369
457
|
AGENT_RELAY_URL: this.options.relayUrl,
|
|
370
458
|
AGENT_RELAY_APPROVAL: this.options.approvalMode,
|
|
371
459
|
...(this.currentToken ? { AGENT_RELAY_TOKEN: this.currentToken } : {}),
|
|
372
|
-
// Dedicated, un-clobberable credential for the injected
|
|
460
|
+
// Dedicated, un-clobberable credential for the injected MCP endpoint. A rig's
|
|
373
461
|
// settings.json `env.AGENT_RELAY_TOKEN` would override the scoped token above at
|
|
374
462
|
// MCP-parse time → server-actor auth, no identity (#233). The MCP config references
|
|
375
463
|
// ${AGENT_RELAY_SESSION_TOKEN}, which rigs never set. See runner/src/relay-mcp.ts.
|
|
376
|
-
|
|
464
|
+
//
|
|
465
|
+
// Stage 2 (#215): when the proxy is active the agent connects to the LOCAL proxy, so this
|
|
466
|
+
// holds the per-session PROXY SECRET (not the relay token). The proxy injects the live
|
|
467
|
+
// relay token itself — the agent never holds it, and token rotation is invisible. With the
|
|
468
|
+
// proxy disabled this stays the scoped relay token (Stage-1 direct connection).
|
|
469
|
+
...(this.proxy
|
|
470
|
+
? { [RELAY_MCP_TOKEN_ENV]: this.mcpProxySecret }
|
|
471
|
+
: (this.currentToken ? { [RELAY_MCP_TOKEN_ENV]: this.currentToken } : {})),
|
|
377
472
|
...(this.currentTokenJti ? { AGENT_RELAY_TOKEN_JTI: this.currentTokenJti } : {}),
|
|
378
473
|
...(this.currentTokenProfileId ? { AGENT_RELAY_TOKEN_PROFILE: this.currentTokenProfileId } : {}),
|
|
379
474
|
...(this.currentTokenExpiresAt ? { AGENT_RELAY_TOKEN_EXPIRES_AT: String(this.currentTokenExpiresAt) } : {}),
|
|
@@ -400,6 +495,9 @@ export class AgentRunner {
|
|
|
400
495
|
providerConfig: this.options.providerConfig,
|
|
401
496
|
env,
|
|
402
497
|
controlPort: this.control!.port,
|
|
498
|
+
// Stage 2 (#215): the MCP endpoint the agent's client should target — the runner-local
|
|
499
|
+
// proxy when active, undefined when disabled (adapters fall back to the direct relay URL).
|
|
500
|
+
...(this.mcpProxyEndpoint ? { relayMcpEndpoint: this.mcpProxyEndpoint } : {}),
|
|
403
501
|
monitor: {
|
|
404
502
|
deliver: (messages) => this.control!.deliverToMonitor(messages),
|
|
405
503
|
},
|
|
@@ -537,17 +635,22 @@ export class AgentRunner {
|
|
|
537
635
|
if (type !== "agent.shutdown" && type !== "agent.restart" && type !== "agent.reconnect" && type !== "agent.kill" && type !== "agent.compact" && type !== "agent.clearContext" && type !== "agent.injectContext" && type !== "agent.permissionDecision" && type !== "agent.interrupt" && type !== "prompt.inject") return;
|
|
538
636
|
|
|
539
637
|
const exitAfterCommand = type === "agent.shutdown" || type === "agent.kill";
|
|
540
|
-
if (exitAfterCommand)
|
|
541
|
-
this.exitCommandInProgress = true;
|
|
542
|
-
this.lifecycleAction = type === "agent.kill" ? "killing" : "shutting-down";
|
|
543
|
-
} else if (type === "agent.restart") {
|
|
544
|
-
this.lifecycleAction = "restarting";
|
|
545
|
-
}
|
|
638
|
+
if (exitAfterCommand) this.exitCommandInProgress = true;
|
|
546
639
|
this.claims.startClaim("command", commandId);
|
|
547
|
-
this.publishStatus();
|
|
548
640
|
try {
|
|
549
641
|
await this.updateCommand(commandId, "accepted");
|
|
550
642
|
await this.updateCommand(commandId, "running");
|
|
643
|
+
// Pre-session-destroy seam (#183): for destructive transitions, run end-of-session
|
|
644
|
+
// work (Insights capture, #184) BEFORE the invasive operation, surfaced as a
|
|
645
|
+
// non-addressable "finalizing" state so the agent isn't mistaken for merely busy.
|
|
646
|
+
const destroyReason = boundaryReasonForCommand(type);
|
|
647
|
+
if (destroyReason) await this.runPreSessionDestroy(destroyReason);
|
|
648
|
+
// Move from the transient finalizing window to the executing teardown state (or drop
|
|
649
|
+
// it entirely for compact/clear, which complete promptly once capture is done).
|
|
650
|
+
if (exitAfterCommand) this.lifecycleAction = type === "agent.kill" ? "killing" : "shutting-down";
|
|
651
|
+
else if (type === "agent.restart") this.lifecycleAction = "restarting";
|
|
652
|
+
else this.lifecycleAction = undefined;
|
|
653
|
+
this.publishStatus();
|
|
551
654
|
let providerResult: Record<string, unknown> | void = undefined;
|
|
552
655
|
if (type === "agent.restart") await this.restartProvider();
|
|
553
656
|
else if (type === "agent.reconnect") this.publishStatus();
|
|
@@ -1057,6 +1160,10 @@ export class AgentRunner {
|
|
|
1057
1160
|
});
|
|
1058
1161
|
return;
|
|
1059
1162
|
}
|
|
1163
|
+
if (record.kind === "mcp-tool-call") {
|
|
1164
|
+
await this.deliverBufferedMcpCall(record);
|
|
1165
|
+
return;
|
|
1166
|
+
}
|
|
1060
1167
|
logger.warn("outbox", `dropping event with unknown kind: ${record.kind}`);
|
|
1061
1168
|
} catch (error) {
|
|
1062
1169
|
// 409 = the server intentionally rejected it (e.g. Insights/feature toggled off). That
|
|
@@ -1067,6 +1174,40 @@ export class AgentRunner {
|
|
|
1067
1174
|
}
|
|
1068
1175
|
}
|
|
1069
1176
|
|
|
1177
|
+
// Replay a buffered MCP tool call (Stage 2, #215) that the proxy queued while the relay was
|
|
1178
|
+
// unreachable. POST it to the relay MCP endpoint with the LIVE token — same path the live call
|
|
1179
|
+
// would have taken. Throw to retry (transient), return to ack (delivered or permanently
|
|
1180
|
+
// rejected). The proxy stamped an idempotencyKey into the arguments so a retry that already
|
|
1181
|
+
// landed server-side is deduped, not double-sent.
|
|
1182
|
+
private async deliverBufferedMcpCall(record: OutboxRecord): Promise<void> {
|
|
1183
|
+
const payload = record.payload as { tool: string; arguments: Record<string, unknown> };
|
|
1184
|
+
const headers: Record<string, string> = { "content-type": "application/json" };
|
|
1185
|
+
if (this.currentToken) headers.authorization = `Bearer ${this.currentToken}`;
|
|
1186
|
+
const response = await fetch(relayMcpEndpoint(this.options.relayUrl), {
|
|
1187
|
+
method: "POST",
|
|
1188
|
+
headers,
|
|
1189
|
+
body: JSON.stringify({ jsonrpc: "2.0", id: 1, method: "tools/call", params: { name: payload.tool, arguments: payload.arguments } }),
|
|
1190
|
+
});
|
|
1191
|
+
if (response.status === 401 || response.status === 403) {
|
|
1192
|
+
this.recoverRuntimeTokenAfterAuthFailure("mcp-outbox");
|
|
1193
|
+
throw new Error(`relay rejected buffered ${payload.tool} with ${response.status}`);
|
|
1194
|
+
}
|
|
1195
|
+
if (response.status >= 500) throw new Error(`relay ${response.status} on buffered ${payload.tool}`);
|
|
1196
|
+
if (!response.ok) {
|
|
1197
|
+
// A 4xx (e.g. target gone, validation) is a permanent rejection — retrying won't help.
|
|
1198
|
+
// Ack so it doesn't block the queue, but log loudly: a queued write did not land.
|
|
1199
|
+
const body = await response.text().catch(() => "");
|
|
1200
|
+
logger.warn("mcp-outbox", `buffered ${payload.tool} permanently rejected (${response.status}); dropping: ${body.slice(0, 200)}`);
|
|
1201
|
+
return;
|
|
1202
|
+
}
|
|
1203
|
+
// HTTP 200 but the JSON-RPC body may still carry a tool-level error. Those reflect the same
|
|
1204
|
+
// permanent-rejection semantics (bad target, validation) — ack and log, don't loop.
|
|
1205
|
+
const json = await response.json().catch(() => null) as { error?: { message?: string } } | null;
|
|
1206
|
+
if (json?.error) {
|
|
1207
|
+
logger.warn("mcp-outbox", `buffered ${payload.tool} returned a tool error; dropping: ${json.error.message ?? "(no detail)"}`);
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1210
|
+
|
|
1070
1211
|
// A hook reported an unhandled failure (#198 seam). Already logged FATAL by the control
|
|
1071
1212
|
// server; here we additionally surface it durably to the server as a generic insight so
|
|
1072
1213
|
// it shows up in observability rather than only in the per-agent log (#196).
|
|
@@ -1110,26 +1251,71 @@ export class AgentRunner {
|
|
|
1110
1251
|
if (input.transcriptPath) this.startReasoningTail(input.transcriptPath);
|
|
1111
1252
|
}
|
|
1112
1253
|
|
|
1113
|
-
//
|
|
1114
|
-
//
|
|
1115
|
-
//
|
|
1116
|
-
//
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1254
|
+
// A provider lifecycle hook reported a session boundary (Claude PreCompact / SessionEnd
|
|
1255
|
+
// → control server). Normalize the raw provider reason to a SessionDestroyReason and run
|
|
1256
|
+
// the same pre-destroy seam the bus commands use. `clear`/`compact` continue the session;
|
|
1257
|
+
// anything else (logout, prompt_input_exit, other) is a real termination.
|
|
1258
|
+
private async handleSessionBoundary(input: { reason?: string; transcriptPath?: string }): Promise<void> {
|
|
1259
|
+
const reason = input.reason === "compact" ? "compact"
|
|
1260
|
+
: input.reason === "clear" ? "clear"
|
|
1261
|
+
: "shutdown";
|
|
1262
|
+
await this.runPreSessionDestroy(reason, { transcriptPath: input.transcriptPath });
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
// The pre-session-destroy seam (#183): the single place end-of-session work runs before
|
|
1266
|
+
// an invasive transition (compact/clear/restart/shutdown/kill). Best-effort and
|
|
1267
|
+
// time-boxed so it never hangs teardown; concurrent calls for the same teardown coalesce
|
|
1268
|
+
// (a shutdown bus command and the SessionEnd hook can both fire). During the window the
|
|
1269
|
+
// agent is published non-addressable so the operator sees "wrapping up", not "busy".
|
|
1270
|
+
private runPreSessionDestroy(reason: SessionDestroyReason, opts?: { transcriptPath?: string }): Promise<void> {
|
|
1271
|
+
if (this.preDestroyPromise) return this.preDestroyPromise;
|
|
1272
|
+
const run = (async () => {
|
|
1273
|
+
this.publishFinalizing(reason);
|
|
1274
|
+
try {
|
|
1275
|
+
await Promise.race([
|
|
1276
|
+
this.captureContextRatio(reason, opts),
|
|
1277
|
+
new Promise<void>((resolve) => setTimeout(resolve, PRE_DESTROY_TIMEOUT_MS)),
|
|
1278
|
+
]);
|
|
1279
|
+
} catch (error) {
|
|
1280
|
+
this.sessionLog(`insights: pre-destroy capture failed: ${errMessage(error)}`);
|
|
1281
|
+
}
|
|
1282
|
+
})();
|
|
1283
|
+
this.preDestroyPromise = run;
|
|
1284
|
+
void run.finally(() => { this.preDestroyPromise = undefined; });
|
|
1285
|
+
return run;
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
// Publish the transient pre-destroy state: a non-offline status with ready:false (so the
|
|
1289
|
+
// agent drops out of isAgentOnline fan-out targeting without going "offline") plus a
|
|
1290
|
+
// finalizing-<reason> lifecycleAction the dashboard renders as "wrapping up" with the
|
|
1291
|
+
// composer disabled.
|
|
1292
|
+
private publishFinalizing(reason: SessionDestroyReason): void {
|
|
1293
|
+
this.lifecycleAction = `finalizing-${reason}`;
|
|
1294
|
+
void this.bus.statusAsync({ agentStatus: "busy", ready: false, meta: { lifecycleAction: this.lifecycleAction, lifecycleActionAt: Date.now() } });
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
// Compute the #184 context-gathering ratio for the segment since the last boundary and
|
|
1298
|
+
// queue it (durable outbox, #196). Provider-agnostic: the adapter normalizes its session
|
|
1299
|
+
// into the shared SessionEvent stream; the math + classifier live in session-insights.ts.
|
|
1300
|
+
// Per-segment via a runner-side cursor, so each datapoint is one work chunk between
|
|
1301
|
+
// context resets. Mechanical, model-free → zero agent tokens, un-gameable.
|
|
1302
|
+
private async captureContextRatio(reason: SessionDestroyReason, opts?: { transcriptPath?: string }): Promise<void> {
|
|
1303
|
+
const adapter = this.options.adapter;
|
|
1304
|
+
if (!adapter.collectSessionEvents || !this.process) return;
|
|
1305
|
+
const transcriptPath = opts?.transcriptPath ?? this.lastTranscriptPath;
|
|
1306
|
+
const events = await adapter.collectSessionEvents(this.process, { transcriptPath });
|
|
1307
|
+
if (!events) return;
|
|
1308
|
+
// Reset the cursor when the underlying log changed identity (transcript rotated on
|
|
1309
|
+
// resume) or shrank (Codex buffer trimmed) — otherwise the slice would be wrong.
|
|
1310
|
+
const key = transcriptPath ?? `session:${this.providerSessionId}`;
|
|
1311
|
+
if (key !== this.insightsCursorKey || events.length < this.insightsObserved) {
|
|
1312
|
+
this.insightsCursorKey = key;
|
|
1313
|
+
this.insightsObserved = 0;
|
|
1128
1314
|
}
|
|
1129
|
-
const
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1315
|
+
const segment = events.slice(this.insightsObserved);
|
|
1316
|
+
this.insightsObserved = events.length;
|
|
1317
|
+
const analysis = computeContextRatio(segment);
|
|
1318
|
+
if (!analysis) return; // no tool calls this segment = nothing substantive to measure
|
|
1133
1319
|
this.outbox.enqueue({
|
|
1134
1320
|
kind: "insight",
|
|
1135
1321
|
payload: {
|
|
@@ -1137,12 +1323,12 @@ export class AgentRunner {
|
|
|
1137
1323
|
project: this.options.cwd,
|
|
1138
1324
|
agentId: this.agentId,
|
|
1139
1325
|
signal: "context_ratio",
|
|
1140
|
-
value: { ...analysis.metric,
|
|
1326
|
+
value: { ...analysis.metric, endReason: reason },
|
|
1141
1327
|
outcome: { ...analysis.outcome },
|
|
1142
1328
|
source: "server",
|
|
1143
1329
|
},
|
|
1144
1330
|
});
|
|
1145
|
-
this.sessionLog(`insights: context_ratio ${analysis.metric.ratio.toFixed(2)} (${analysis.metric.gatheringCalls}/${analysis.metric.totalToolCalls} gathering) queued`);
|
|
1331
|
+
this.sessionLog(`insights: context_ratio ${analysis.metric.ratio.toFixed(2)} (${analysis.metric.gatheringCalls}/${analysis.metric.totalToolCalls} gathering, ${reason}) queued`);
|
|
1146
1332
|
}
|
|
1147
1333
|
|
|
1148
1334
|
// Route a provider-emitted session event (Codex app-server) into the chat mirror.
|
|
@@ -1607,6 +1793,10 @@ export class AgentRunner {
|
|
|
1607
1793
|
this.options.tokenExpiresAt = this.currentTokenExpiresAt;
|
|
1608
1794
|
this.http.setToken(token);
|
|
1609
1795
|
this.bus.setToken(token);
|
|
1796
|
+
// The proxy reads the token live via getToken(), so forwarding already uses the new one.
|
|
1797
|
+
// A re-mint can change scope (e.g. a profile change), so refresh the relay tool list and
|
|
1798
|
+
// emit tools/list_changed if the visible set changed (#215 — token-scope transition).
|
|
1799
|
+
void this.proxy?.refreshTools().catch(() => {});
|
|
1610
1800
|
this.httpLivenessAuthFailed = false;
|
|
1611
1801
|
this.reactiveTokenRecoveryAt = undefined;
|
|
1612
1802
|
// An earlier auth failure may have stopped the liveness loop; restart it so the
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
// Provider-agnostic core for the #184 context-gathering signal (epic #183).
|
|
2
|
+
//
|
|
3
|
+
// The transcript *format* is provider-specific (Claude JSONL, Codex app-server items,
|
|
4
|
+
// future providers), so each adapter normalizes its session into the same `SessionEvent`
|
|
5
|
+
// stream via `collectSessionEvents`. Everything downstream — the gathering/action
|
|
6
|
+
// classifier and the ratio math — lives here once and is shared, so a tool reclassified
|
|
7
|
+
// for one provider is reclassified for all, and a new provider only implements the
|
|
8
|
+
// normalization.
|
|
9
|
+
//
|
|
10
|
+
// The classifier is model-free and runs in the runner, so it costs zero agent tokens and
|
|
11
|
+
// the agent can't game it.
|
|
12
|
+
|
|
13
|
+
// A normalized, ordered session event. Order is significant: `leadingGather` counts the
|
|
14
|
+
// run of gathering tools before the first action.
|
|
15
|
+
export type SessionEvent =
|
|
16
|
+
// A tool invocation. Gathering-vs-action is decided here by `isGatheringTool(name)`.
|
|
17
|
+
| { type: "tool"; name: string }
|
|
18
|
+
// A failed tool result (paired outcome proxy — failures/workarounds the agent hit).
|
|
19
|
+
| { type: "tool_error" }
|
|
20
|
+
// A real user prompt (paired outcome proxy — more back-and-forth ~ clarification/correction).
|
|
21
|
+
| { type: "user_prompt" }
|
|
22
|
+
// A substantive assistant turn (one that produced text or a tool call).
|
|
23
|
+
| { type: "turn" };
|
|
24
|
+
|
|
25
|
+
// Tools that acquire context without changing anything. Anything not matched here is
|
|
26
|
+
// treated as an action (mutation, execution, or a delegation/direction decision) — Bash
|
|
27
|
+
// counts as an action because it executes (a conservative, documented choice for v0;
|
|
28
|
+
// `cat`/`ls` via Bash are misclassified, refine later if the data warrants it).
|
|
29
|
+
const GATHERING_TOOLS = new Set([
|
|
30
|
+
"Read", "Grep", "Glob", "LS", "NotebookRead", "WebFetch", "WebSearch",
|
|
31
|
+
]);
|
|
32
|
+
const GATHERING_NAME = /(?:^|[._-])(read|get|list|search|grep|glob|find|fetch|query|browse|view|show|cat|status|inspect|lookup|symbols|snippet)/i;
|
|
33
|
+
|
|
34
|
+
export function isGatheringTool(name: string): boolean {
|
|
35
|
+
if (GATHERING_TOOLS.has(name)) return true;
|
|
36
|
+
// MCP / custom tools: classify by name shape (e.g. mcp__callmux__searxng_web_search).
|
|
37
|
+
return GATHERING_NAME.test(name);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface ContextRatioMetric {
|
|
41
|
+
/** Session-wide gathering fraction: gatheringCalls / totalToolCalls. The headline metric. */
|
|
42
|
+
ratio: number;
|
|
43
|
+
gatheringCalls: number;
|
|
44
|
+
actionCalls: number;
|
|
45
|
+
totalToolCalls: number;
|
|
46
|
+
/** Consecutive gathering calls before the first action — the "read N files before moving" signal. */
|
|
47
|
+
leadingGather: number;
|
|
48
|
+
/** Substantive assistant turns (turns that produced text or a tool call). */
|
|
49
|
+
turns: number;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface SessionOutcomeProxy {
|
|
53
|
+
/** Real user prompts in the session — more back-and-forth ~ more clarification/correction. */
|
|
54
|
+
userPrompts: number;
|
|
55
|
+
/** tool_result blocks flagged is_error — failures/workarounds the agent hit. */
|
|
56
|
+
toolErrors: number;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface SessionAnalysis {
|
|
60
|
+
metric: ContextRatioMetric;
|
|
61
|
+
outcome: SessionOutcomeProxy;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Reduce a normalized event stream to the context-gathering ratio plus paired outcome
|
|
66
|
+
* proxies. Returns null when there's nothing substantive to measure (no tool calls) —
|
|
67
|
+
* trivial segments have nothing to learn from and shouldn't pollute the baselines.
|
|
68
|
+
*
|
|
69
|
+
* Per-segment by construction: callers pass only the events since the last capture
|
|
70
|
+
* boundary (compact/clear/restart/shutdown), so each result describes one work chunk.
|
|
71
|
+
*/
|
|
72
|
+
export function computeContextRatio(events: SessionEvent[]): SessionAnalysis | null {
|
|
73
|
+
let gatheringCalls = 0;
|
|
74
|
+
let actionCalls = 0;
|
|
75
|
+
let leadingGather = 0;
|
|
76
|
+
let sawAction = false;
|
|
77
|
+
let userPrompts = 0;
|
|
78
|
+
let toolErrors = 0;
|
|
79
|
+
let turns = 0;
|
|
80
|
+
|
|
81
|
+
for (const event of events) {
|
|
82
|
+
switch (event.type) {
|
|
83
|
+
case "user_prompt":
|
|
84
|
+
userPrompts++;
|
|
85
|
+
break;
|
|
86
|
+
case "tool_error":
|
|
87
|
+
toolErrors++;
|
|
88
|
+
break;
|
|
89
|
+
case "turn":
|
|
90
|
+
turns++;
|
|
91
|
+
break;
|
|
92
|
+
case "tool":
|
|
93
|
+
if (isGatheringTool(event.name)) {
|
|
94
|
+
gatheringCalls++;
|
|
95
|
+
if (!sawAction) leadingGather++;
|
|
96
|
+
} else {
|
|
97
|
+
actionCalls++;
|
|
98
|
+
sawAction = true;
|
|
99
|
+
}
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const totalToolCalls = gatheringCalls + actionCalls;
|
|
105
|
+
if (totalToolCalls === 0) return null;
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
metric: {
|
|
109
|
+
ratio: gatheringCalls / totalToolCalls,
|
|
110
|
+
gatheringCalls,
|
|
111
|
+
actionCalls,
|
|
112
|
+
totalToolCalls,
|
|
113
|
+
leadingGather,
|
|
114
|
+
turns,
|
|
115
|
+
},
|
|
116
|
+
outcome: { userPrompts, toolErrors },
|
|
117
|
+
};
|
|
118
|
+
}
|