agent-relay-runner 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
- "version": "0.23.0",
3
+ "version": "0.24.0",
4
4
  "description": "Unified provider lifecycle runner for Agent Relay",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
3
  "description": "Thin Agent Relay runner bridge for Claude Code",
4
- "version": "0.23.0",
4
+ "version": "0.24.0",
5
5
  "agentRelayContracts": {
6
6
  "providerPluginProtocol": 1
7
7
  }
@@ -3,4 +3,10 @@ set -euo pipefail
3
3
  source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
4
4
  relay_install_hook_guard pre-compact
5
5
 
6
+ payload="$(cat || true)"
7
+ transcript_path="$(relay_json_string_field transcript_path "$payload")"
8
+
6
9
  relay_post_timeline_status busy provider-turn "" compacting
10
+ # Pre-destroy seam (#183): compaction is about to discard context — capture this segment's
11
+ # Insights (#184) from the full transcript first. The most common boundary in practice.
12
+ relay_post_session_boundary "$transcript_path" compact
@@ -72,10 +72,11 @@ relay_post_user_prompt() {
72
72
  -d "$body" >/dev/null 2>&1 || true
73
73
  }
74
74
 
75
- relay_post_session_end() {
76
- # Insights #184: tell the runner the session ended so it can compute the
77
- # end-of-session context-gathering ratio from the full transcript. Fire-and-forget;
78
- # the transcript path is optional (the runner falls back to the last path it saw).
75
+ relay_post_session_boundary() {
76
+ # Pre-destroy seam (#183): tell the runner a context reset or termination is imminent
77
+ # (compact/clear/logout/…) so it can run end-of-session work the #184 context-gathering
78
+ # ratio capture from the full transcript before the invasive op. Fire-and-forget; the
79
+ # transcript path is optional (the runner falls back to the last path it saw).
79
80
  local transcript_path="${1:-}"
80
81
  local reason="${2:-}"
81
82
  local port="${AGENT_RELAY_RUNNER_PORT:-}"
@@ -87,7 +88,7 @@ relay_post_session_end() {
87
88
  body="${body}\"reason\":\"$(relay_json_escape "$reason")\""
88
89
  fi
89
90
  body="${body}}"
90
- curl -fsS --max-time 3 -X POST "http://127.0.0.1:${port}/session-end" \
91
+ curl -fsS --max-time 3 -X POST "http://127.0.0.1:${port}/session-boundary" \
91
92
  -H 'Content-Type: application/json' \
92
93
  -d "$body" >/dev/null 2>&1 || true
93
94
  }
@@ -10,14 +10,17 @@ transcript_path="$(relay_json_string_field transcript_path "$payload")"
10
10
  case "$reason" in
11
11
  clear)
12
12
  relay_post_timeline_status idle provider-turn subagent clearing-context
13
+ # /clear wipes the context — the most common work boundary for a long-lived agent.
14
+ # Capture this segment's Insights (#183/#184) before it's gone.
15
+ relay_post_session_boundary "$transcript_path" clear
13
16
  ;;
14
17
  resume)
15
18
  relay_post_status_clearing_subagents idle
16
19
  ;;
17
20
  logout|prompt_input_exit|bypass_permissions_disabled|other|*)
18
21
  relay_post_status_clearing_subagents offline
19
- # Real session termination: capture end-of-session Insights (#184). Order after the
22
+ # Real session termination: capture end-of-session Insights (#183/#184). Order after the
20
23
  # status post is arbitrary — the runner reads the transcript file regardless.
21
- relay_post_session_end "$transcript_path" "$reason"
24
+ relay_post_session_boundary "$transcript_path" "$reason"
22
25
  ;;
23
26
  esac
package/src/adapter.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import type { AgentProfile, Message } from "agent-relay-sdk";
2
2
  import { isRecord } from "agent-relay-sdk";
3
+ import type { SessionEvent } from "./session-insights";
3
4
 
4
5
  export type SemanticStatus = "idle" | "busy" | "offline" | "error";
5
6
  type ProviderWorkKind = "provider-turn" | "subagent";
@@ -133,6 +134,15 @@ export interface ProviderAdapter {
133
134
  shutdown(process: ManagedProcess, opts: { graceful: boolean; timeoutMs: number }): Promise<void>;
134
135
  compact?(process: ManagedProcess): Promise<Record<string, unknown> | void>;
135
136
  clearContext?(process: ManagedProcess): Promise<Record<string, unknown> | void>;
137
+ // Normalize the session so far into the provider-agnostic SessionEvent stream the
138
+ // Insights context-ratio signal (#183/#184) reduces. Called by the runner's
139
+ // pre-session-destroy seam before any compact/clear/restart/shutdown. The runner owns
140
+ // the per-segment cursor (it slices events since the last capture), so this returns the
141
+ // full ordered event list for the current process lifetime. `ctx.transcriptPath` is
142
+ // supplied for transcript-backed providers (Claude); event-stream providers (Codex)
143
+ // ignore it and return their accumulated log. Return null when there is nothing to
144
+ // measure. Best-effort: may be omitted by providers without a session view yet.
145
+ collectSessionEvents?(process: ManagedProcess, ctx: { transcriptPath?: string }): Promise<SessionEvent[] | null>;
136
146
  // Interrupt the in-flight turn without ending the session (ESC for Claude's
137
147
  // tmux pane, turn/interrupt for the Codex app-server). Provider-independent at
138
148
  // the runner boundary; each adapter does what its provider actually supports.
@@ -10,6 +10,8 @@
10
10
  // entry carrying text, not just tool_result blocks). We collect the assistant
11
11
  // `text` blocks from that turn — thinking and tool_use are dropped.
12
12
 
13
+ import { computeContextRatio, type SessionAnalysis, type SessionEvent } from "../session-insights";
14
+
13
15
  interface TranscriptBlock {
14
16
  type?: string;
15
17
  text?: string;
@@ -194,59 +196,14 @@ export function summarizeToolUse(name: string, input: Record<string, unknown> |
194
196
  // paired with cheap outcome proxies (user re-prompts, tool errors) so it's never read
195
197
  // alone — see the anti-Goodhart constraint in the epic.
196
198
 
197
- // Tools that acquire context without changing anything. Anything not matched here is
198
- // treated as an action (mutation, execution, or a delegation/direction decision) —
199
- // Bash counts as an action because it executes (a conservative, documented choice for
200
- // v0; `cat`/`ls` via Bash are misclassified, refine later if the data warrants it).
201
- const GATHERING_TOOLS = new Set([
202
- "Read", "Grep", "Glob", "LS", "NotebookRead", "WebFetch", "WebSearch",
203
- ]);
204
- const GATHERING_NAME = /(?:^|[._-])(read|get|list|search|grep|glob|find|fetch|query|browse|view|show|cat|status|inspect|lookup|symbols|snippet)/i;
205
-
206
- function isGatheringTool(name: string): boolean {
207
- if (GATHERING_TOOLS.has(name)) return true;
208
- // MCP / custom tools: classify by name shape (e.g. mcp__callmux__searxng_web_search).
209
- return GATHERING_NAME.test(name);
210
- }
211
-
212
- export interface ContextRatioMetric {
213
- /** Session-wide gathering fraction: gatheringCalls / totalToolCalls. The headline metric. */
214
- ratio: number;
215
- gatheringCalls: number;
216
- actionCalls: number;
217
- totalToolCalls: number;
218
- /** Consecutive gathering calls before the first action — the "read N files before moving" signal. */
219
- leadingGather: number;
220
- /** Substantive assistant turns (turns that produced text or a tool call). */
221
- turns: number;
222
- }
223
-
224
- export interface SessionOutcomeProxy {
225
- /** Real user prompts in the session — more back-and-forth ~ more clarification/correction. */
226
- userPrompts: number;
227
- /** tool_result blocks flagged is_error — failures/workarounds the agent hit. */
228
- toolErrors: number;
229
- }
230
-
231
- export interface SessionAnalysis {
232
- metric: ContextRatioMetric;
233
- outcome: SessionOutcomeProxy;
234
- }
235
-
236
199
  /**
237
- * Walk the full transcript and compute the context-gathering ratio plus paired outcome
238
- * proxies. Returns null when there's nothing substantive to measure (no tool calls) —
239
- * trivial sessions have nothing to learn from and shouldn't pollute the baselines.
200
+ * Normalize a full Claude transcript into the provider-agnostic `SessionEvent` stream
201
+ * (#183/#184). The classifier and ratio math live in `session-insights.ts` and are shared
202
+ * across providers; this only knows the Claude JSONL shape. Events are emitted in
203
+ * transcript order so `leadingGather` is meaningful.
240
204
  */
241
- export function analyzeSession(jsonl: string): SessionAnalysis | null {
242
- let gatheringCalls = 0;
243
- let actionCalls = 0;
244
- let leadingGather = 0;
245
- let sawAction = false;
246
- let userPrompts = 0;
247
- let toolErrors = 0;
248
- let turns = 0;
249
-
205
+ export function collectClaudeSessionEvents(jsonl: string): SessionEvent[] {
206
+ const events: SessionEvent[] = [];
250
207
  for (const line of jsonl.split("\n")) {
251
208
  const trimmed = line.trim();
252
209
  if (!trimmed) continue;
@@ -256,10 +213,10 @@ export function analyzeSession(jsonl: string): SessionAnalysis | null {
256
213
  } catch {
257
214
  continue;
258
215
  }
259
- if (isRealUserPrompt(entry)) userPrompts++;
216
+ if (isRealUserPrompt(entry)) events.push({ type: "user_prompt" });
260
217
  if (entry.type === "user") {
261
218
  for (const b of blocks(entry.message)) {
262
- if (b.type === "tool_result" && b.is_error === true) toolErrors++;
219
+ if (b.type === "tool_result" && b.is_error === true) events.push({ type: "tool_error" });
263
220
  }
264
221
  continue;
265
222
  }
@@ -269,31 +226,20 @@ export function analyzeSession(jsonl: string): SessionAnalysis | null {
269
226
  if (b.type === "text" && b.text?.trim()) producedSomething = true;
270
227
  if (b.type !== "tool_use" || typeof b.name !== "string" || !b.name) continue;
271
228
  producedSomething = true;
272
- if (isGatheringTool(b.name)) {
273
- gatheringCalls++;
274
- if (!sawAction) leadingGather++;
275
- } else {
276
- actionCalls++;
277
- sawAction = true;
278
- }
229
+ events.push({ type: "tool", name: b.name });
279
230
  }
280
- if (producedSomething) turns++;
231
+ if (producedSomething) events.push({ type: "turn" });
281
232
  }
233
+ return events;
234
+ }
282
235
 
283
- const totalToolCalls = gatheringCalls + actionCalls;
284
- if (totalToolCalls === 0) return null;
285
-
286
- return {
287
- metric: {
288
- ratio: gatheringCalls / totalToolCalls,
289
- gatheringCalls,
290
- actionCalls,
291
- totalToolCalls,
292
- leadingGather,
293
- turns,
294
- },
295
- outcome: { userPrompts, toolErrors },
296
- };
236
+ /**
237
+ * Walk the full transcript and compute the context-gathering ratio plus paired outcome
238
+ * proxies. Returns null when there's nothing substantive to measure (no tool calls) —
239
+ * trivial sessions have nothing to learn from and shouldn't pollute the baselines.
240
+ */
241
+ export function analyzeSession(jsonl: string): SessionAnalysis | null {
242
+ return computeContextRatio(collectClaudeSessionEvents(jsonl));
297
243
  }
298
244
 
299
245
  /** Count substantive assistant turns — used by the #185 introspection gate. */
@@ -1,4 +1,5 @@
1
1
  import { existsSync, mkdirSync, writeFileSync } from "node:fs";
2
+ import { readFile } from "node:fs/promises";
2
3
  import { homedir, tmpdir } from "node:os";
3
4
  import { join, resolve } from "node:path";
4
5
  import type { Message } from "agent-relay-sdk";
@@ -6,6 +7,8 @@ import { shellEscape as shellQuote } from "agent-relay-sdk/shell-utils";
6
7
  import { tmuxCommand, tmuxHasSession } from "agent-relay-sdk/tmux-utils";
7
8
  import { sanitizeFsName } from "agent-relay-sdk/fs-name";
8
9
  import { profileAllowsRelayFeature, type ManagedProcess, type ProviderAdapter, type ProviderConfig, type ProviderStatusUpdate, type RunnerSpawnConfig, type SemanticStatus, type SpawnArgs } from "../adapter";
10
+ import { collectClaudeSessionEvents } from "./claude-transcript";
11
+ import type { SessionEvent } from "../session-insights";
9
12
  import { prepareClaudeProfileHome, profileUsesHostProviderGlobals } from "../profile-home";
10
13
  import { relayMcpClaudeConfigArg } from "../relay-mcp";
11
14
  import { claudeProviderMessageText } from "./claude-delivery";
@@ -64,6 +67,19 @@ export class ClaudeAdapter implements ProviderAdapter {
64
67
  return { method: "tmux-inject", command: "/clear" };
65
68
  }
66
69
 
70
+ // #183/#184: parse the full Claude transcript into the shared SessionEvent stream. The
71
+ // runner slices per-segment, so we return the whole transcript's events each call.
72
+ async collectSessionEvents(_process: ManagedProcess, ctx: { transcriptPath?: string }): Promise<SessionEvent[] | null> {
73
+ if (!ctx.transcriptPath) return null;
74
+ let jsonl: string;
75
+ try {
76
+ jsonl = await readFile(ctx.transcriptPath, "utf8");
77
+ } catch {
78
+ return null;
79
+ }
80
+ return collectClaudeSessionEvents(jsonl);
81
+ }
82
+
67
83
  async interrupt(process: ManagedProcess): Promise<Record<string, unknown>> {
68
84
  const session = process.meta?.tmuxSession as string | undefined;
69
85
  const socket = process.meta?.tmuxSocket as string | undefined;
@@ -8,6 +8,7 @@ import { profileAllowsRelayFeature, providerMessageText, RELAY_CONTEXT, type Man
8
8
  import { workspaceDepsNoteFromEnv } from "../relay-instructions";
9
9
  import { relayMcpCodexConfigArgs, tomlString } from "../relay-mcp";
10
10
  import { logger } from "../logger";
11
+ import type { SessionEvent } from "../session-insights";
11
12
 
12
13
  /** Relay context prepended to a Codex agent's first turn: the standard relay
13
14
  * blurb plus, when running in an isolated workspace, the deps caveat (#159). */
@@ -41,6 +42,13 @@ export class CodexAdapter implements ProviderAdapter {
41
42
  private turnMessages: string[] = [];
42
43
  private readonly itemTextBuffers = new Map<string, string>();
43
44
  private captureMode: "final" | "full" = "final";
45
+ // #183/#184: the normalized session-event log for the current process lifetime, fed
46
+ // from the same completed-item stream that drives the chat mirror. The runner slices
47
+ // this per-segment (since the last compact/clear/restart) via its own cursor, so we
48
+ // accumulate and never clear mid-session; spawn() resets it for a fresh process. Soft
49
+ // cap below keeps a runaway session from growing memory unbounded.
50
+ private sessionEvents: SessionEvent[] = [];
51
+ private static readonly SESSION_EVENTS_CAP = 50_000;
44
52
 
45
53
  onStatusChange(cb: (status: ProviderStatusUpdate) => void): void {
46
54
  this.statusCb = cb;
@@ -75,6 +83,7 @@ export class CodexAdapter implements ProviderAdapter {
75
83
 
76
84
  async spawn(config: RunnerSpawnConfig): Promise<ManagedProcess> {
77
85
  this.captureMode = (config.providerConfig as ProviderConfig).chatCaptureMode ?? "final";
86
+ this.sessionEvents = []; // fresh process → fresh segment cursor (#184)
78
87
  const args = this.buildSpawnArgs(config, config.providerConfig as ProviderConfig);
79
88
  const appServer = Bun.spawn([args.command, ...args.args], {
80
89
  cwd: args.cwd,
@@ -377,13 +386,19 @@ export class CodexAdapter implements ProviderAdapter {
377
386
  const itemId = codexItemId(item);
378
387
  if (type === "agentMessage") {
379
388
  const text = (stringValue(item.text) ?? (itemId ? this.itemTextBuffers.get(itemId) : undefined))?.trim();
380
- if (text) this.turnMessages.push(text);
389
+ if (text) {
390
+ this.turnMessages.push(text);
391
+ this.recordInsightEvent({ type: "turn" }); // a substantive assistant turn
392
+ }
381
393
  if (itemId) this.itemTextBuffers.delete(itemId);
382
394
  return;
383
395
  }
384
396
  if (type === "userMessage") {
385
397
  const text = codexUserMessageText(item.content);
386
- if (text) this.sessionEventCb({ type: "prompt", origin: "terminal", body: text, ...(turnId ? { turnId } : {}) });
398
+ if (text) {
399
+ this.recordInsightEvent({ type: "user_prompt" });
400
+ this.sessionEventCb({ type: "prompt", origin: "terminal", body: text, ...(turnId ? { turnId } : {}) });
401
+ }
387
402
  return;
388
403
  }
389
404
  if (type === "reasoning") {
@@ -394,10 +409,31 @@ export class CodexAdapter implements ProviderAdapter {
394
409
  return;
395
410
  }
396
411
  const tool = codexToolSummary(type, item);
397
- if (tool) this.sessionEventCb({ type: "tool", origin: "provider", body: tool.body, label: tool.label, status: "completed", ...(turnId ? { turnId } : {}) });
412
+ if (tool) {
413
+ this.recordInsightEvent({ type: "tool", name: codexInsightToolName(type, item) });
414
+ if (codexItemFailed(item)) this.recordInsightEvent({ type: "tool_error" });
415
+ this.sessionEventCb({ type: "tool", origin: "provider", body: tool.body, label: tool.label, status: "completed", ...(turnId ? { turnId } : {}) });
416
+ }
398
417
  if (itemId) this.itemTextBuffers.delete(itemId);
399
418
  }
400
419
 
420
+ // #183/#184: append to the session-event log with a soft cap. On overflow we drop the
421
+ // oldest half; the runner detects the resulting length shrink and resets its segment
422
+ // cursor (worst case: one slightly-truncated datapoint on a pathologically long session).
423
+ private recordInsightEvent(event: SessionEvent): void {
424
+ this.sessionEvents.push(event);
425
+ if (this.sessionEvents.length > CodexAdapter.SESSION_EVENTS_CAP) {
426
+ this.sessionEvents = this.sessionEvents.slice(this.sessionEvents.length >> 1);
427
+ }
428
+ }
429
+
430
+ // Whole-session event stream for the Insights context-ratio signal (#184). Codex learns
431
+ // about activity through app-server item events, not a transcript, so we replay the log
432
+ // accumulated since this process started. The runner owns per-segment slicing.
433
+ async collectSessionEvents(): Promise<SessionEvent[] | null> {
434
+ return [...this.sessionEvents];
435
+ }
436
+
401
437
  private handleCodexItemDelta(method: string, params: Record<string, unknown> | undefined): void {
402
438
  if (!method.includes("item/") && !method.includes("item.")) return;
403
439
  const item = isRecord(params?.item) ? params.item : undefined;
@@ -492,6 +528,31 @@ export function codexReasoningText(item: Record<string, unknown>): string {
492
528
  }
493
529
 
494
530
  /** Build a compact { label, body } activity summary for a Codex tool item. */
531
+ // Canonical tool name for the #184 gathering/action classifier. Codex item types map to
532
+ // names the shared classifier already understands: commandExecution executes (→ Bash, an
533
+ // action), fileChange mutates (→ Edit), webSearch gathers (→ WebSearch, in the set), and
534
+ // MCP/dynamic calls carry their real tool name so name-shape classification applies.
535
+ export function codexInsightToolName(type: string | undefined, item: Record<string, unknown>): string {
536
+ switch (type) {
537
+ case "commandExecution": return "Bash";
538
+ case "fileChange": return "Edit";
539
+ case "webSearch": return "WebSearch";
540
+ case "mcpToolCall":
541
+ case "dynamicToolCall":
542
+ case "collabAgentToolCall":
543
+ return stringValue(item.tool) ?? type ?? "tool";
544
+ default: return type ?? "tool";
545
+ }
546
+ }
547
+
548
+ // Did a completed tool item fail? Mirrors Claude's tool_result is_error outcome proxy.
549
+ export function codexItemFailed(item: Record<string, unknown>): boolean {
550
+ if (stringValue(item.status) === "failed") return true;
551
+ if (item.error != null && item.error !== false) return true;
552
+ const exitCode = item.exitCode ?? item.exit_code;
553
+ return typeof exitCode === "number" && exitCode !== 0;
554
+ }
555
+
495
556
  export function codexToolSummary(type: string | undefined, item: Record<string, unknown>): { label: string; body: string } | null {
496
557
  const oneLine = (value: unknown): string => (typeof value === "string" ? value.replace(/\s+/g, " ").trim() : "");
497
558
  const clip = (text: string): string => (text.length > 200 ? `${text.slice(0, 197)}…` : text);
@@ -83,14 +83,6 @@ export class ClaimTracker {
83
83
  return before !== this.currentStatus();
84
84
  }
85
85
 
86
- clearKind(kind: ClaimKind): boolean {
87
- const before = this.currentStatus();
88
- for (const key of [...this.claims.keys()]) {
89
- if (key.startsWith(`${kind}:`)) this.claims.delete(key);
90
- }
91
- return before !== this.currentStatus();
92
- }
93
-
94
86
  expire(now = Date.now()): boolean {
95
87
  const before = this.currentStatus();
96
88
  for (const [key, claim] of this.claims) {
@@ -111,10 +103,6 @@ export class ClaimTracker {
111
103
  return [...reasons];
112
104
  }
113
105
 
114
- activeClaims(): ClaimRecord[] {
115
- return [...this.claims.values()];
116
- }
117
-
118
106
  activeWork(): WorkRecord[] {
119
107
  return [...this.work.values()];
120
108
  }
@@ -39,11 +39,12 @@ interface ControlServerOptions {
39
39
  // directly into the session (web terminal / TUI) so the runner can mirror it
40
40
  // into the dashboard chat and start tailing the turn transcript for reasoning.
41
41
  onUserPrompt?(input: { prompt: string; transcriptPath?: string }): Promise<void>;
42
- // A provider SessionEnd hook signals the session is over so the runner can
43
- // compute end-of-session Insights signals (#184 context ratio) from the full
44
- // transcript. transcriptPath is optional the runner falls back to the last
45
- // path it saw during the session.
46
- onSessionEnd?(input: { reason?: string; transcriptPath?: string }): Promise<void>;
42
+ // A provider session-boundary hook (Claude PreCompact / SessionEnd) signals an imminent
43
+ // context reset or termination so the runner can run end-of-session work (#183 pre-destroy
44
+ // seam: #184 context-ratio capture) before the invasive operation. `reason` is the raw
45
+ // provider reason (compact, clear, logout, …); transcriptPath is optional — the runner
46
+ // falls back to the last path it saw during the session.
47
+ onSessionBoundary?(input: { reason?: string; transcriptPath?: string }): Promise<void>;
47
48
  // Phase 1 observability (#198): a hook reporting an unhandled failure. The
48
49
  // control server already logs it FATAL; this is the seam for Phase 2 to also
49
50
  // surface it to the server via the runner outbox.
@@ -93,8 +94,8 @@ export function startControlServer(options: ControlServerOptions): ControlServer
93
94
  if (url.pathname === "/user-prompt" && req.method === "POST") {
94
95
  return handleUserPrompt(req, options);
95
96
  }
96
- if (url.pathname === "/session-end" && req.method === "POST") {
97
- return handleSessionEnd(req, options);
97
+ if (url.pathname === "/session-boundary" && req.method === "POST") {
98
+ return handleSessionBoundary(req, options);
98
99
  }
99
100
  if (url.pathname === "/log-level" && req.method === "GET") {
100
101
  return Response.json({ level: logger.getLevel(), levels: LOG_LEVELS });
@@ -375,13 +376,13 @@ async function handleUserPrompt(req: Request, options: ControlServerOptions): Pr
375
376
  return Response.json({ ok: true });
376
377
  }
377
378
 
378
- async function handleSessionEnd(req: Request, options: ControlServerOptions): Promise<Response> {
379
- if (!options.onSessionEnd) return Response.json({ ok: false, reason: "session-end capture unavailable" });
379
+ async function handleSessionBoundary(req: Request, options: ControlServerOptions): Promise<Response> {
380
+ if (!options.onSessionBoundary) return Response.json({ ok: false, reason: "session-boundary capture unavailable" });
380
381
  const body = await req.json().catch(() => null);
381
382
  const reason = isRecord(body) && typeof body.reason === "string" ? body.reason : undefined;
382
383
  const transcriptPath = isRecord(body) && typeof body.transcriptPath === "string" ? body.transcriptPath : undefined;
383
- // Fire-and-forget: the SessionEnd hook must not block Claude shutting down.
384
- void Promise.resolve(options.onSessionEnd({ reason, transcriptPath })).catch(() => {});
384
+ // Fire-and-forget: a PreCompact/SessionEnd hook must not block Claude compacting or exiting.
385
+ void Promise.resolve(options.onSessionBoundary({ reason, transcriptPath })).catch(() => {});
385
386
  return Response.json({ ok: true });
386
387
  }
387
388
 
package/src/runner.ts CHANGED
@@ -11,7 +11,8 @@ import { ClaimTracker } from "./claim-tracker";
11
11
  import { startControlServer, type ControlServer } from "./control-server";
12
12
  import { ReplyObligationCache } from "./reply-obligation-cache";
13
13
  import { Outbox, type OutboxRecord } from "./outbox";
14
- import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete, analyzeSession } from "./adapters/claude-transcript";
14
+ import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete } from "./adapters/claude-transcript";
15
+ import { computeContextRatio } from "./session-insights";
15
16
  import { agentProfileProjectionReport } from "./profile-projection";
16
17
  import { profileUsesHostProviderGlobals } from "./profile-home";
17
18
  import { RELAY_MCP_TOKEN_ENV, relayMcpEndpoint } from "./relay-mcp";
@@ -20,6 +21,35 @@ import { runtimeMetadata } from "./version";
20
21
  import { logger, parseLogLevel } from "./logger";
21
22
  import { ensureSessionScratch, reapSessionScratch, sweepStaleSessions, type SessionScratchLayout } from "./session-scratch";
22
23
 
24
+ // A destructive session transition. The runner runs end-of-session work (Insights
25
+ // capture, #183/#184) before the invasive operation and, during that window, presents a
26
+ // distinct non-addressable lifecycle state. Bus commands and provider hooks (Claude
27
+ // PreCompact / SessionEnd) both normalize to one of these.
28
+ type SessionDestroyReason = "compact" | "clear" | "restart" | "shutdown" | "kill";
29
+
30
+ // `finalizing-<reason>` is the transient pre-destroy window; the others are the executing
31
+ // teardown states the dashboard already renders.
32
+ type LifecycleAction =
33
+ | "shutting-down" | "killing" | "restarting"
34
+ | `finalizing-${SessionDestroyReason}`;
35
+
36
+ // Pre-destroy work is best-effort and must never hang teardown. Capping it keeps a slow
37
+ // transcript read or a wedged provider from stalling a shutdown the operator asked for.
38
+ const PRE_DESTROY_TIMEOUT_MS = 4_000;
39
+
40
+ // Map a lifecycle bus command to its destructive boundary reason, or undefined for
41
+ // non-destructive commands (interrupt, inject, reconnect, permission decisions).
42
+ function boundaryReasonForCommand(type: string): SessionDestroyReason | undefined {
43
+ switch (type) {
44
+ case "agent.compact": return "compact";
45
+ case "agent.clearContext": return "clear";
46
+ case "agent.restart": return "restart";
47
+ case "agent.shutdown": return "shutdown";
48
+ case "agent.kill": return "kill";
49
+ default: return undefined;
50
+ }
51
+ }
52
+
23
53
  interface RunnerOptions {
24
54
  provider: string;
25
55
  model?: string;
@@ -164,7 +194,17 @@ export class AgentRunner {
164
194
  // Last transcript path seen this session — used by end-of-session Insights (#184)
165
195
  // when the SessionEnd hook payload omits it.
166
196
  private lastTranscriptPath?: string;
167
- private lifecycleAction?: "shutting-down" | "killing" | "restarting";
197
+ private lifecycleAction?: LifecycleAction;
198
+ // #183/#184 per-segment cursor: how many of the current session's normalized events
199
+ // have already been folded into an observation, and the key (transcript path / Codex
200
+ // session) that count belongs to. A boundary captures only events since the last one,
201
+ // so each datapoint is one work chunk between context resets; a key change or a shrink
202
+ // (transcript rotated, Codex buffer trimmed) resets the cursor.
203
+ private insightsObserved = 0;
204
+ private insightsCursorKey = "";
205
+ // Coalesces concurrent pre-session-destroy runs (e.g. the shutdown bus command and the
206
+ // SessionEnd hook both fire for the same teardown) so the cursor isn't raced.
207
+ private preDestroyPromise?: Promise<void>;
168
208
  private readonly unexpectedExitTimes: number[] = [];
169
209
  private readonly pendingMessages = new Map<number, Message>();
170
210
  private readonly activeTaskClaims = new Map<number, ActiveTaskClaim>();
@@ -293,7 +333,7 @@ export class AgentRunner {
293
333
  onReplyObligations: () => Promise.resolve(this.obligationCache.get()),
294
334
  onSessionTurn: (input) => this.publishSessionTurn(input),
295
335
  onUserPrompt: (input) => this.handleUserPrompt(input),
296
- onSessionEnd: (input) => this.handleSessionEnd(input),
336
+ onSessionBoundary: (input) => this.handleSessionBoundary(input),
297
337
  onHookFatal: (report) => this.reportHookFatal(report),
298
338
  });
299
339
  this.startMcpProxy();
@@ -595,17 +635,22 @@ export class AgentRunner {
595
635
  if (type !== "agent.shutdown" && type !== "agent.restart" && type !== "agent.reconnect" && type !== "agent.kill" && type !== "agent.compact" && type !== "agent.clearContext" && type !== "agent.injectContext" && type !== "agent.permissionDecision" && type !== "agent.interrupt" && type !== "prompt.inject") return;
596
636
 
597
637
  const exitAfterCommand = type === "agent.shutdown" || type === "agent.kill";
598
- if (exitAfterCommand) {
599
- this.exitCommandInProgress = true;
600
- this.lifecycleAction = type === "agent.kill" ? "killing" : "shutting-down";
601
- } else if (type === "agent.restart") {
602
- this.lifecycleAction = "restarting";
603
- }
638
+ if (exitAfterCommand) this.exitCommandInProgress = true;
604
639
  this.claims.startClaim("command", commandId);
605
- this.publishStatus();
606
640
  try {
607
641
  await this.updateCommand(commandId, "accepted");
608
642
  await this.updateCommand(commandId, "running");
643
+ // Pre-session-destroy seam (#183): for destructive transitions, run end-of-session
644
+ // work (Insights capture, #184) BEFORE the invasive operation, surfaced as a
645
+ // non-addressable "finalizing" state so the agent isn't mistaken for merely busy.
646
+ const destroyReason = boundaryReasonForCommand(type);
647
+ if (destroyReason) await this.runPreSessionDestroy(destroyReason);
648
+ // Move from the transient finalizing window to the executing teardown state (or drop
649
+ // it entirely for compact/clear, which complete promptly once capture is done).
650
+ if (exitAfterCommand) this.lifecycleAction = type === "agent.kill" ? "killing" : "shutting-down";
651
+ else if (type === "agent.restart") this.lifecycleAction = "restarting";
652
+ else this.lifecycleAction = undefined;
653
+ this.publishStatus();
609
654
  let providerResult: Record<string, unknown> | void = undefined;
610
655
  if (type === "agent.restart") await this.restartProvider();
611
656
  else if (type === "agent.reconnect") this.publishStatus();
@@ -1206,26 +1251,71 @@ export class AgentRunner {
1206
1251
  if (input.transcriptPath) this.startReasoningTail(input.transcriptPath);
1207
1252
  }
1208
1253
 
1209
- // SessionEnd: compute end-of-session Insights signals (#184 context-gathering
1210
- // ratio) from the full transcript and record them with the relay. Mechanical and
1211
- // model-free costs zero agent tokens and the agent can't game it. The relay drops
1212
- // the observation if Insights or this signal is toggled off. Best-effort: never
1213
- // blocks or fails provider shutdown.
1214
- private async handleSessionEnd(input: { reason?: string; transcriptPath?: string }): Promise<void> {
1215
- // Only Claude transcripts have this shape; Codex sessions are skipped for now.
1216
- if (this.options.provider !== "claude") return;
1217
- const transcriptPath = input.transcriptPath || this.lastTranscriptPath;
1218
- if (!transcriptPath) return;
1219
- let jsonl: string;
1220
- try {
1221
- jsonl = await readFile(transcriptPath, "utf8");
1222
- } catch {
1223
- return;
1254
+ // A provider lifecycle hook reported a session boundary (Claude PreCompact / SessionEnd
1255
+ // → control server). Normalize the raw provider reason to a SessionDestroyReason and run
1256
+ // the same pre-destroy seam the bus commands use. `clear`/`compact` continue the session;
1257
+ // anything else (logout, prompt_input_exit, other) is a real termination.
1258
+ private async handleSessionBoundary(input: { reason?: string; transcriptPath?: string }): Promise<void> {
1259
+ const reason = input.reason === "compact" ? "compact"
1260
+ : input.reason === "clear" ? "clear"
1261
+ : "shutdown";
1262
+ await this.runPreSessionDestroy(reason, { transcriptPath: input.transcriptPath });
1263
+ }
1264
+
1265
+ // The pre-session-destroy seam (#183): the single place end-of-session work runs before
1266
+ // an invasive transition (compact/clear/restart/shutdown/kill). Best-effort and
1267
+ // time-boxed so it never hangs teardown; concurrent calls for the same teardown coalesce
1268
+ // (a shutdown bus command and the SessionEnd hook can both fire). During the window the
1269
+ // agent is published non-addressable so the operator sees "wrapping up", not "busy".
1270
+ private runPreSessionDestroy(reason: SessionDestroyReason, opts?: { transcriptPath?: string }): Promise<void> {
1271
+ if (this.preDestroyPromise) return this.preDestroyPromise;
1272
+ const run = (async () => {
1273
+ this.publishFinalizing(reason);
1274
+ try {
1275
+ await Promise.race([
1276
+ this.captureContextRatio(reason, opts),
1277
+ new Promise<void>((resolve) => setTimeout(resolve, PRE_DESTROY_TIMEOUT_MS)),
1278
+ ]);
1279
+ } catch (error) {
1280
+ this.sessionLog(`insights: pre-destroy capture failed: ${errMessage(error)}`);
1281
+ }
1282
+ })();
1283
+ this.preDestroyPromise = run;
1284
+ void run.finally(() => { this.preDestroyPromise = undefined; });
1285
+ return run;
1286
+ }
1287
+
1288
+ // Publish the transient pre-destroy state: a non-offline status with ready:false (so the
1289
+ // agent drops out of isAgentOnline fan-out targeting without going "offline") plus a
1290
+ // finalizing-<reason> lifecycleAction the dashboard renders as "wrapping up" with the
1291
+ // composer disabled.
1292
+ private publishFinalizing(reason: SessionDestroyReason): void {
1293
+ this.lifecycleAction = `finalizing-${reason}`;
1294
+ void this.bus.statusAsync({ agentStatus: "busy", ready: false, meta: { lifecycleAction: this.lifecycleAction, lifecycleActionAt: Date.now() } });
1295
+ }
1296
+
1297
+ // Compute the #184 context-gathering ratio for the segment since the last boundary and
1298
+ // queue it (durable outbox, #196). Provider-agnostic: the adapter normalizes its session
1299
+ // into the shared SessionEvent stream; the math + classifier live in session-insights.ts.
1300
+ // Per-segment via a runner-side cursor, so each datapoint is one work chunk between
1301
+ // context resets. Mechanical, model-free → zero agent tokens, un-gameable.
1302
+ private async captureContextRatio(reason: SessionDestroyReason, opts?: { transcriptPath?: string }): Promise<void> {
1303
+ const adapter = this.options.adapter;
1304
+ if (!adapter.collectSessionEvents || !this.process) return;
1305
+ const transcriptPath = opts?.transcriptPath ?? this.lastTranscriptPath;
1306
+ const events = await adapter.collectSessionEvents(this.process, { transcriptPath });
1307
+ if (!events) return;
1308
+ // Reset the cursor when the underlying log changed identity (transcript rotated on
1309
+ // resume) or shrank (Codex buffer trimmed) — otherwise the slice would be wrong.
1310
+ const key = transcriptPath ?? `session:${this.providerSessionId}`;
1311
+ if (key !== this.insightsCursorKey || events.length < this.insightsObserved) {
1312
+ this.insightsCursorKey = key;
1313
+ this.insightsObserved = 0;
1224
1314
  }
1225
- const analysis = analyzeSession(jsonl);
1226
- if (!analysis) return; // no tool calls = nothing substantive to measure
1227
- // Durable + non-blocking (#196): queue it. SessionEnd can race provider shutdown, so a
1228
- // direct POST risked being dropped if the server hiccuped; the outbox survives that.
1315
+ const segment = events.slice(this.insightsObserved);
1316
+ this.insightsObserved = events.length;
1317
+ const analysis = computeContextRatio(segment);
1318
+ if (!analysis) return; // no tool calls this segment = nothing substantive to measure
1229
1319
  this.outbox.enqueue({
1230
1320
  kind: "insight",
1231
1321
  payload: {
@@ -1233,12 +1323,12 @@ export class AgentRunner {
1233
1323
  project: this.options.cwd,
1234
1324
  agentId: this.agentId,
1235
1325
  signal: "context_ratio",
1236
- value: { ...analysis.metric, ...(input.reason ? { endReason: input.reason } : {}) },
1326
+ value: { ...analysis.metric, endReason: reason },
1237
1327
  outcome: { ...analysis.outcome },
1238
1328
  source: "server",
1239
1329
  },
1240
1330
  });
1241
- this.sessionLog(`insights: context_ratio ${analysis.metric.ratio.toFixed(2)} (${analysis.metric.gatheringCalls}/${analysis.metric.totalToolCalls} gathering) queued`);
1331
+ this.sessionLog(`insights: context_ratio ${analysis.metric.ratio.toFixed(2)} (${analysis.metric.gatheringCalls}/${analysis.metric.totalToolCalls} gathering, ${reason}) queued`);
1242
1332
  }
1243
1333
 
1244
1334
  // Route a provider-emitted session event (Codex app-server) into the chat mirror.
@@ -0,0 +1,118 @@
1
+ // Provider-agnostic core for the #184 context-gathering signal (epic #183).
2
+ //
3
+ // The transcript *format* is provider-specific (Claude JSONL, Codex app-server items,
4
+ // future providers), so each adapter normalizes its session into the same `SessionEvent`
5
+ // stream via `collectSessionEvents`. Everything downstream — the gathering/action
6
+ // classifier and the ratio math — lives here once and is shared, so a tool reclassified
7
+ // for one provider is reclassified for all, and a new provider only implements the
8
+ // normalization.
9
+ //
10
+ // The classifier is model-free and runs in the runner, so it costs zero agent tokens and
11
+ // the agent can't game it.
12
+
13
+ // A normalized, ordered session event. Order is significant: `leadingGather` counts the
14
+ // run of gathering tools before the first action.
15
+ export type SessionEvent =
16
+ // A tool invocation. Gathering-vs-action is decided here by `isGatheringTool(name)`.
17
+ | { type: "tool"; name: string }
18
+ // A failed tool result (paired outcome proxy — failures/workarounds the agent hit).
19
+ | { type: "tool_error" }
20
+ // A real user prompt (paired outcome proxy — more back-and-forth ~ clarification/correction).
21
+ | { type: "user_prompt" }
22
+ // A substantive assistant turn (one that produced text or a tool call).
23
+ | { type: "turn" };
24
+
25
+ // Tools that acquire context without changing anything. Anything not matched here is
26
+ // treated as an action (mutation, execution, or a delegation/direction decision) — Bash
27
+ // counts as an action because it executes (a conservative, documented choice for v0;
28
+ // `cat`/`ls` via Bash are misclassified, refine later if the data warrants it).
29
+ const GATHERING_TOOLS = new Set([
30
+ "Read", "Grep", "Glob", "LS", "NotebookRead", "WebFetch", "WebSearch",
31
+ ]);
32
+ const GATHERING_NAME = /(?:^|[._-])(read|get|list|search|grep|glob|find|fetch|query|browse|view|show|cat|status|inspect|lookup|symbols|snippet)/i;
33
+
34
+ export function isGatheringTool(name: string): boolean {
35
+ if (GATHERING_TOOLS.has(name)) return true;
36
+ // MCP / custom tools: classify by name shape (e.g. mcp__callmux__searxng_web_search).
37
+ return GATHERING_NAME.test(name);
38
+ }
39
+
40
+ export interface ContextRatioMetric {
41
+ /** Session-wide gathering fraction: gatheringCalls / totalToolCalls. The headline metric. */
42
+ ratio: number;
43
+ gatheringCalls: number;
44
+ actionCalls: number;
45
+ totalToolCalls: number;
46
+ /** Consecutive gathering calls before the first action — the "read N files before moving" signal. */
47
+ leadingGather: number;
48
+ /** Substantive assistant turns (turns that produced text or a tool call). */
49
+ turns: number;
50
+ }
51
+
52
+ export interface SessionOutcomeProxy {
53
+ /** Real user prompts in the session — more back-and-forth ~ more clarification/correction. */
54
+ userPrompts: number;
55
+ /** tool_result blocks flagged is_error — failures/workarounds the agent hit. */
56
+ toolErrors: number;
57
+ }
58
+
59
+ export interface SessionAnalysis {
60
+ metric: ContextRatioMetric;
61
+ outcome: SessionOutcomeProxy;
62
+ }
63
+
64
+ /**
65
+ * Reduce a normalized event stream to the context-gathering ratio plus paired outcome
66
+ * proxies. Returns null when there's nothing substantive to measure (no tool calls) —
67
+ * trivial segments have nothing to learn from and shouldn't pollute the baselines.
68
+ *
69
+ * Per-segment by construction: callers pass only the events since the last capture
70
+ * boundary (compact/clear/restart/shutdown), so each result describes one work chunk.
71
+ */
72
+ export function computeContextRatio(events: SessionEvent[]): SessionAnalysis | null {
73
+ let gatheringCalls = 0;
74
+ let actionCalls = 0;
75
+ let leadingGather = 0;
76
+ let sawAction = false;
77
+ let userPrompts = 0;
78
+ let toolErrors = 0;
79
+ let turns = 0;
80
+
81
+ for (const event of events) {
82
+ switch (event.type) {
83
+ case "user_prompt":
84
+ userPrompts++;
85
+ break;
86
+ case "tool_error":
87
+ toolErrors++;
88
+ break;
89
+ case "turn":
90
+ turns++;
91
+ break;
92
+ case "tool":
93
+ if (isGatheringTool(event.name)) {
94
+ gatheringCalls++;
95
+ if (!sawAction) leadingGather++;
96
+ } else {
97
+ actionCalls++;
98
+ sawAction = true;
99
+ }
100
+ break;
101
+ }
102
+ }
103
+
104
+ const totalToolCalls = gatheringCalls + actionCalls;
105
+ if (totalToolCalls === 0) return null;
106
+
107
+ return {
108
+ metric: {
109
+ ratio: gatheringCalls / totalToolCalls,
110
+ gatheringCalls,
111
+ actionCalls,
112
+ totalToolCalls,
113
+ leadingGather,
114
+ turns,
115
+ },
116
+ outcome: { userPrompts, toolErrors },
117
+ };
118
+ }