npm - agent-relay-runner - Versions diffs - 0.23.0 → 0.24.0 - Mend

agent-relay-runner 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/package.json +1 -1
package/plugins/claude/.claude-plugin/plugin.json +1 -1
package/plugins/claude/hooks/pre-compact.sh +6 -0
package/plugins/claude/hooks/relay-status.sh +6 -5
package/plugins/claude/hooks/session-end.sh +5 -2
package/src/adapter.ts +10 -0
package/src/adapters/claude-transcript.ts +21 -75
package/src/adapters/claude.ts +16 -0
package/src/adapters/codex.ts +64 -3
package/src/claim-tracker.ts +0 -12
package/src/control-server.ts +12 -11
package/src/runner.ts +121 -31
package/src/session-insights.ts +118 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-relay-runner",
-  "version": "0.23.0",
+  "version": "0.24.0",
   "description": "Unified provider lifecycle runner for Agent Relay",
   "type": "module",
   "bin": {

package/plugins/claude/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "agent-relay-runner",
   "description": "Thin Agent Relay runner bridge for Claude Code",
-  "version": "0.23.0",
+  "version": "0.24.0",
   "agentRelayContracts": {
     "providerPluginProtocol": 1
   }

package/plugins/claude/hooks/pre-compact.sh CHANGED Viewed

@@ -3,4 +3,10 @@ set -euo pipefail
 source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
 relay_install_hook_guard pre-compact
+payload="$(cat || true)"
+transcript_path="$(relay_json_string_field transcript_path "$payload")"
 relay_post_timeline_status busy provider-turn "" compacting
+# Pre-destroy seam (#183): compaction is about to discard context — capture this segment's
+# Insights (#184) from the full transcript first. The most common boundary in practice.
+relay_post_session_boundary "$transcript_path" compact

package/plugins/claude/hooks/relay-status.sh CHANGED Viewed

@@ -72,10 +72,11 @@ relay_post_user_prompt() {
     -d "$body" >/dev/null 2>&1 || true
 }
-relay_post_session_end() {
-  # Insights #184: tell the runner the session ended so it can compute the
-  # end-of-session context-gathering ratio from the full transcript. Fire-and-forget;
-  # the transcript path is optional (the runner falls back to the last path it saw).
+relay_post_session_boundary() {
+  # Pre-destroy seam (#183): tell the runner a context reset or termination is imminent
+  # (compact/clear/logout/…) so it can run end-of-session work — the #184 context-gathering
+  # ratio capture — from the full transcript before the invasive op. Fire-and-forget; the
+  # transcript path is optional (the runner falls back to the last path it saw).
   local transcript_path="${1:-}"
   local reason="${2:-}"
   local port="${AGENT_RELAY_RUNNER_PORT:-}"
@@ -87,7 +88,7 @@ relay_post_session_end() {
     body="${body}\"reason\":\"$(relay_json_escape "$reason")\""
   fi
   body="${body}}"
-  curl -fsS --max-time 3 -X POST "http://127.0.0.1:${port}/session-end" \
+  curl -fsS --max-time 3 -X POST "http://127.0.0.1:${port}/session-boundary" \
     -H 'Content-Type: application/json' \
     -d "$body" >/dev/null 2>&1 || true
 }

package/plugins/claude/hooks/session-end.sh CHANGED Viewed

@@ -10,14 +10,17 @@ transcript_path="$(relay_json_string_field transcript_path "$payload")"
 case "$reason" in
   clear)
     relay_post_timeline_status idle provider-turn subagent clearing-context
+    # /clear wipes the context — the most common work boundary for a long-lived agent.
+    # Capture this segment's Insights (#183/#184) before it's gone.
+    relay_post_session_boundary "$transcript_path" clear
     ;;
   resume)
     relay_post_status_clearing_subagents idle
     ;;
   logout|prompt_input_exit|bypass_permissions_disabled|other|*)
     relay_post_status_clearing_subagents offline
-    # Real session termination: capture end-of-session Insights (#184). Order after the
+    # Real session termination: capture end-of-session Insights (#183/#184). Order after the
     # status post is arbitrary — the runner reads the transcript file regardless.
-    relay_post_session_end "$transcript_path" "$reason"
+    relay_post_session_boundary "$transcript_path" "$reason"
     ;;
 esac

package/src/adapter.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import type { AgentProfile, Message } from "agent-relay-sdk";
 import { isRecord } from "agent-relay-sdk";
+import type { SessionEvent } from "./session-insights";
 export type SemanticStatus = "idle" | "busy" | "offline" | "error";
 type ProviderWorkKind = "provider-turn" | "subagent";
@@ -133,6 +134,15 @@ export interface ProviderAdapter {
   shutdown(process: ManagedProcess, opts: { graceful: boolean; timeoutMs: number }): Promise<void>;
   compact?(process: ManagedProcess): Promise<Record<string, unknown> | void>;
   clearContext?(process: ManagedProcess): Promise<Record<string, unknown> | void>;
+  // Normalize the session so far into the provider-agnostic SessionEvent stream the
+  // Insights context-ratio signal (#183/#184) reduces. Called by the runner's
+  // pre-session-destroy seam before any compact/clear/restart/shutdown. The runner owns
+  // the per-segment cursor (it slices events since the last capture), so this returns the
+  // full ordered event list for the current process lifetime. `ctx.transcriptPath` is
+  // supplied for transcript-backed providers (Claude); event-stream providers (Codex)
+  // ignore it and return their accumulated log. Return null when there is nothing to
+  // measure. Best-effort: may be omitted by providers without a session view yet.
+  collectSessionEvents?(process: ManagedProcess, ctx: { transcriptPath?: string }): Promise<SessionEvent[] | null>;
   // Interrupt the in-flight turn without ending the session (ESC for Claude's
   // tmux pane, turn/interrupt for the Codex app-server). Provider-independent at
   // the runner boundary; each adapter does what its provider actually supports.

package/src/adapters/claude-transcript.ts CHANGED Viewed

@@ -10,6 +10,8 @@
 // entry carrying text, not just tool_result blocks). We collect the assistant
 // `text` blocks from that turn — thinking and tool_use are dropped.
+import { computeContextRatio, type SessionAnalysis, type SessionEvent } from "../session-insights";
 interface TranscriptBlock {
   type?: string;
   text?: string;
@@ -194,59 +196,14 @@ export function summarizeToolUse(name: string, input: Record<string, unknown> |
 // paired with cheap outcome proxies (user re-prompts, tool errors) so it's never read
 // alone — see the anti-Goodhart constraint in the epic.
-// Tools that acquire context without changing anything. Anything not matched here is
-// treated as an action (mutation, execution, or a delegation/direction decision) —
-// Bash counts as an action because it executes (a conservative, documented choice for
-// v0; `cat`/`ls` via Bash are misclassified, refine later if the data warrants it).
-const GATHERING_TOOLS = new Set([
-  "Read", "Grep", "Glob", "LS", "NotebookRead", "WebFetch", "WebSearch",
-]);
-const GATHERING_NAME = /(?:^|[._-])(read|get|list|search|grep|glob|find|fetch|query|browse|view|show|cat|status|inspect|lookup|symbols|snippet)/i;
-function isGatheringTool(name: string): boolean {
-  if (GATHERING_TOOLS.has(name)) return true;
-  // MCP / custom tools: classify by name shape (e.g. mcp__callmux__searxng_web_search).
-  return GATHERING_NAME.test(name);
-}
-export interface ContextRatioMetric {
-  /** Session-wide gathering fraction: gatheringCalls / totalToolCalls. The headline metric. */
-  ratio: number;
-  gatheringCalls: number;
-  actionCalls: number;
-  totalToolCalls: number;
-  /** Consecutive gathering calls before the first action — the "read N files before moving" signal. */
-  leadingGather: number;
-  /** Substantive assistant turns (turns that produced text or a tool call). */
-  turns: number;
-}
-export interface SessionOutcomeProxy {
-  /** Real user prompts in the session — more back-and-forth ~ more clarification/correction. */
-  userPrompts: number;
-  /** tool_result blocks flagged is_error — failures/workarounds the agent hit. */
-  toolErrors: number;
-}
-export interface SessionAnalysis {
-  metric: ContextRatioMetric;
-  outcome: SessionOutcomeProxy;
-}
 /**
- * Walk the full transcript and compute the context-gathering ratio plus paired outcome
- * proxies. Returns null when there's nothing substantive to measure (no tool calls) —
- * trivial sessions have nothing to learn from and shouldn't pollute the baselines.
+ * Normalize a full Claude transcript into the provider-agnostic `SessionEvent` stream
+ * (#183/#184). The classifier and ratio math live in `session-insights.ts` and are shared
+ * across providers; this only knows the Claude JSONL shape. Events are emitted in
+ * transcript order so `leadingGather` is meaningful.
  */
-export function analyzeSession(jsonl: string): SessionAnalysis | null {
-  let gatheringCalls = 0;
-  let actionCalls = 0;
-  let leadingGather = 0;
-  let sawAction = false;
-  let userPrompts = 0;
-  let toolErrors = 0;
-  let turns = 0;
+export function collectClaudeSessionEvents(jsonl: string): SessionEvent[] {
+  const events: SessionEvent[] = [];
   for (const line of jsonl.split("\n")) {
     const trimmed = line.trim();
     if (!trimmed) continue;
@@ -256,10 +213,10 @@ export function analyzeSession(jsonl: string): SessionAnalysis | null {
     } catch {
       continue;
     }
-    if (isRealUserPrompt(entry)) userPrompts++;
+    if (isRealUserPrompt(entry)) events.push({ type: "user_prompt" });
     if (entry.type === "user") {
       for (const b of blocks(entry.message)) {
-        if (b.type === "tool_result" && b.is_error === true) toolErrors++;
+        if (b.type === "tool_result" && b.is_error === true) events.push({ type: "tool_error" });
       }
       continue;
     }
@@ -269,31 +226,20 @@ export function analyzeSession(jsonl: string): SessionAnalysis | null {
       if (b.type === "text" && b.text?.trim()) producedSomething = true;
       if (b.type !== "tool_use" || typeof b.name !== "string" || !b.name) continue;
       producedSomething = true;
-      if (isGatheringTool(b.name)) {
-        gatheringCalls++;
-        if (!sawAction) leadingGather++;
-      } else {
-        actionCalls++;
-        sawAction = true;
-      }
+      events.push({ type: "tool", name: b.name });
     }
-    if (producedSomething) turns++;
+    if (producedSomething) events.push({ type: "turn" });
   }
+  return events;
+}
-  const totalToolCalls = gatheringCalls + actionCalls;
-  if (totalToolCalls === 0) return null;
-  return {
-    metric: {
-      ratio: gatheringCalls / totalToolCalls,
-      gatheringCalls,
-      actionCalls,
-      totalToolCalls,
-      leadingGather,
-      turns,
-    },
-    outcome: { userPrompts, toolErrors },
-  };
+/**
+ * Walk the full transcript and compute the context-gathering ratio plus paired outcome
+ * proxies. Returns null when there's nothing substantive to measure (no tool calls) —
+ * trivial sessions have nothing to learn from and shouldn't pollute the baselines.
+ */
+export function analyzeSession(jsonl: string): SessionAnalysis | null {
+  return computeContextRatio(collectClaudeSessionEvents(jsonl));
 }
 /** Count substantive assistant turns — used by the #185 introspection gate. */

package/src/adapters/claude.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { existsSync, mkdirSync, writeFileSync } from "node:fs";
+import { readFile } from "node:fs/promises";
 import { homedir, tmpdir } from "node:os";
 import { join, resolve } from "node:path";
 import type { Message } from "agent-relay-sdk";
@@ -6,6 +7,8 @@ import { shellEscape as shellQuote } from "agent-relay-sdk/shell-utils";
 import { tmuxCommand, tmuxHasSession } from "agent-relay-sdk/tmux-utils";
 import { sanitizeFsName } from "agent-relay-sdk/fs-name";
 import { profileAllowsRelayFeature, type ManagedProcess, type ProviderAdapter, type ProviderConfig, type ProviderStatusUpdate, type RunnerSpawnConfig, type SemanticStatus, type SpawnArgs } from "../adapter";
+import { collectClaudeSessionEvents } from "./claude-transcript";
+import type { SessionEvent } from "../session-insights";
 import { prepareClaudeProfileHome, profileUsesHostProviderGlobals } from "../profile-home";
 import { relayMcpClaudeConfigArg } from "../relay-mcp";
 import { claudeProviderMessageText } from "./claude-delivery";
@@ -64,6 +67,19 @@ export class ClaudeAdapter implements ProviderAdapter {
     return { method: "tmux-inject", command: "/clear" };
   }
+  // #183/#184: parse the full Claude transcript into the shared SessionEvent stream. The
+  // runner slices per-segment, so we return the whole transcript's events each call.
+  async collectSessionEvents(_process: ManagedProcess, ctx: { transcriptPath?: string }): Promise<SessionEvent[] | null> {
+    if (!ctx.transcriptPath) return null;
+    let jsonl: string;
+    try {
+      jsonl = await readFile(ctx.transcriptPath, "utf8");
+    } catch {
+      return null;
+    }
+    return collectClaudeSessionEvents(jsonl);
+  }
   async interrupt(process: ManagedProcess): Promise<Record<string, unknown>> {
     const session = process.meta?.tmuxSession as string | undefined;
     const socket = process.meta?.tmuxSocket as string | undefined;

package/src/adapters/codex.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { profileAllowsRelayFeature, providerMessageText, RELAY_CONTEXT, type Man
 import { workspaceDepsNoteFromEnv } from "../relay-instructions";
 import { relayMcpCodexConfigArgs, tomlString } from "../relay-mcp";
 import { logger } from "../logger";
+import type { SessionEvent } from "../session-insights";
 /** Relay context prepended to a Codex agent's first turn: the standard relay
  * blurb plus, when running in an isolated workspace, the deps caveat (#159). */
@@ -41,6 +42,13 @@ export class CodexAdapter implements ProviderAdapter {
   private turnMessages: string[] = [];
   private readonly itemTextBuffers = new Map<string, string>();
   private captureMode: "final" | "full" = "final";
+  // #183/#184: the normalized session-event log for the current process lifetime, fed
+  // from the same completed-item stream that drives the chat mirror. The runner slices
+  // this per-segment (since the last compact/clear/restart) via its own cursor, so we
+  // accumulate and never clear mid-session; spawn() resets it for a fresh process. Soft
+  // cap below keeps a runaway session from growing memory unbounded.
+  private sessionEvents: SessionEvent[] = [];
+  private static readonly SESSION_EVENTS_CAP = 50_000;
   onStatusChange(cb: (status: ProviderStatusUpdate) => void): void {
     this.statusCb = cb;
@@ -75,6 +83,7 @@ export class CodexAdapter implements ProviderAdapter {
   async spawn(config: RunnerSpawnConfig): Promise<ManagedProcess> {
     this.captureMode = (config.providerConfig as ProviderConfig).chatCaptureMode ?? "final";
+    this.sessionEvents = []; // fresh process → fresh segment cursor (#184)
     const args = this.buildSpawnArgs(config, config.providerConfig as ProviderConfig);
     const appServer = Bun.spawn([args.command, ...args.args], {
       cwd: args.cwd,
@@ -377,13 +386,19 @@ export class CodexAdapter implements ProviderAdapter {
     const itemId = codexItemId(item);
     if (type === "agentMessage") {
       const text = (stringValue(item.text) ?? (itemId ? this.itemTextBuffers.get(itemId) : undefined))?.trim();
-      if (text) this.turnMessages.push(text);
+      if (text) {
+        this.turnMessages.push(text);
+        this.recordInsightEvent({ type: "turn" }); // a substantive assistant turn
+      }
       if (itemId) this.itemTextBuffers.delete(itemId);
       return;
     }
     if (type === "userMessage") {
       const text = codexUserMessageText(item.content);
-      if (text) this.sessionEventCb({ type: "prompt", origin: "terminal", body: text, ...(turnId ? { turnId } : {}) });
+      if (text) {
+        this.recordInsightEvent({ type: "user_prompt" });
+        this.sessionEventCb({ type: "prompt", origin: "terminal", body: text, ...(turnId ? { turnId } : {}) });
+      }
       return;
     }
     if (type === "reasoning") {
@@ -394,10 +409,31 @@ export class CodexAdapter implements ProviderAdapter {
       return;
     }
     const tool = codexToolSummary(type, item);
-    if (tool) this.sessionEventCb({ type: "tool", origin: "provider", body: tool.body, label: tool.label, status: "completed", ...(turnId ? { turnId } : {}) });
+    if (tool) {
+      this.recordInsightEvent({ type: "tool", name: codexInsightToolName(type, item) });
+      if (codexItemFailed(item)) this.recordInsightEvent({ type: "tool_error" });
+      this.sessionEventCb({ type: "tool", origin: "provider", body: tool.body, label: tool.label, status: "completed", ...(turnId ? { turnId } : {}) });
+    }
     if (itemId) this.itemTextBuffers.delete(itemId);
   }
+  // #183/#184: append to the session-event log with a soft cap. On overflow we drop the
+  // oldest half; the runner detects the resulting length shrink and resets its segment
+  // cursor (worst case: one slightly-truncated datapoint on a pathologically long session).
+  private recordInsightEvent(event: SessionEvent): void {
+    this.sessionEvents.push(event);
+    if (this.sessionEvents.length > CodexAdapter.SESSION_EVENTS_CAP) {
+      this.sessionEvents = this.sessionEvents.slice(this.sessionEvents.length >> 1);
+    }
+  }
+  // Whole-session event stream for the Insights context-ratio signal (#184). Codex learns
+  // about activity through app-server item events, not a transcript, so we replay the log
+  // accumulated since this process started. The runner owns per-segment slicing.
+  async collectSessionEvents(): Promise<SessionEvent[] | null> {
+    return [...this.sessionEvents];
+  }
   private handleCodexItemDelta(method: string, params: Record<string, unknown> | undefined): void {
     if (!method.includes("item/") && !method.includes("item.")) return;
     const item = isRecord(params?.item) ? params.item : undefined;
@@ -492,6 +528,31 @@ export function codexReasoningText(item: Record<string, unknown>): string {
 }
 /** Build a compact { label, body } activity summary for a Codex tool item. */
+// Canonical tool name for the #184 gathering/action classifier. Codex item types map to
+// names the shared classifier already understands: commandExecution executes (→ Bash, an
+// action), fileChange mutates (→ Edit), webSearch gathers (→ WebSearch, in the set), and
+// MCP/dynamic calls carry their real tool name so name-shape classification applies.
+export function codexInsightToolName(type: string | undefined, item: Record<string, unknown>): string {
+  switch (type) {
+    case "commandExecution": return "Bash";
+    case "fileChange": return "Edit";
+    case "webSearch": return "WebSearch";
+    case "mcpToolCall":
+    case "dynamicToolCall":
+    case "collabAgentToolCall":
+      return stringValue(item.tool) ?? type ?? "tool";
+    default: return type ?? "tool";
+  }
+}
+// Did a completed tool item fail? Mirrors Claude's tool_result is_error outcome proxy.
+export function codexItemFailed(item: Record<string, unknown>): boolean {
+  if (stringValue(item.status) === "failed") return true;
+  if (item.error != null && item.error !== false) return true;
+  const exitCode = item.exitCode ?? item.exit_code;
+  return typeof exitCode === "number" && exitCode !== 0;
+}
 export function codexToolSummary(type: string | undefined, item: Record<string, unknown>): { label: string; body: string } | null {
   const oneLine = (value: unknown): string => (typeof value === "string" ? value.replace(/\s+/g, " ").trim() : "");
   const clip = (text: string): string => (text.length > 200 ? `${text.slice(0, 197)}…` : text);

package/src/claim-tracker.ts CHANGED Viewed

@@ -83,14 +83,6 @@ export class ClaimTracker {
     return before !== this.currentStatus();
   }
-  clearKind(kind: ClaimKind): boolean {
-    const before = this.currentStatus();
-    for (const key of [...this.claims.keys()]) {
-      if (key.startsWith(`${kind}:`)) this.claims.delete(key);
-    }
-    return before !== this.currentStatus();
-  }
   expire(now = Date.now()): boolean {
     const before = this.currentStatus();
     for (const [key, claim] of this.claims) {
@@ -111,10 +103,6 @@ export class ClaimTracker {
     return [...reasons];
   }
-  activeClaims(): ClaimRecord[] {
-    return [...this.claims.values()];
-  }
   activeWork(): WorkRecord[] {
     return [...this.work.values()];
   }

package/src/control-server.ts CHANGED Viewed

@@ -39,11 +39,12 @@ interface ControlServerOptions {
   // directly into the session (web terminal / TUI) so the runner can mirror it
   // into the dashboard chat and start tailing the turn transcript for reasoning.
   onUserPrompt?(input: { prompt: string; transcriptPath?: string }): Promise<void>;
-  // A provider SessionEnd hook signals the session is over so the runner can
-  // compute end-of-session Insights signals (#184 context ratio) from the full
-  // transcript. transcriptPath is optional — the runner falls back to the last
-  // path it saw during the session.
-  onSessionEnd?(input: { reason?: string; transcriptPath?: string }): Promise<void>;
+  // A provider session-boundary hook (Claude PreCompact / SessionEnd) signals an imminent
+  // context reset or termination so the runner can run end-of-session work (#183 pre-destroy
+  // seam: #184 context-ratio capture) before the invasive operation. `reason` is the raw
+  // provider reason (compact, clear, logout, …); transcriptPath is optional — the runner
+  // falls back to the last path it saw during the session.
+  onSessionBoundary?(input: { reason?: string; transcriptPath?: string }): Promise<void>;
   // Phase 1 observability (#198): a hook reporting an unhandled failure. The
   // control server already logs it FATAL; this is the seam for Phase 2 to also
   // surface it to the server via the runner outbox.
@@ -93,8 +94,8 @@ export function startControlServer(options: ControlServerOptions): ControlServer
       if (url.pathname === "/user-prompt" && req.method === "POST") {
         return handleUserPrompt(req, options);
       }
-      if (url.pathname === "/session-end" && req.method === "POST") {
-        return handleSessionEnd(req, options);
+      if (url.pathname === "/session-boundary" && req.method === "POST") {
+        return handleSessionBoundary(req, options);
       }
       if (url.pathname === "/log-level" && req.method === "GET") {
         return Response.json({ level: logger.getLevel(), levels: LOG_LEVELS });
@@ -375,13 +376,13 @@ async function handleUserPrompt(req: Request, options: ControlServerOptions): Pr
   return Response.json({ ok: true });
 }
-async function handleSessionEnd(req: Request, options: ControlServerOptions): Promise<Response> {
-  if (!options.onSessionEnd) return Response.json({ ok: false, reason: "session-end capture unavailable" });
+async function handleSessionBoundary(req: Request, options: ControlServerOptions): Promise<Response> {
+  if (!options.onSessionBoundary) return Response.json({ ok: false, reason: "session-boundary capture unavailable" });
   const body = await req.json().catch(() => null);
   const reason = isRecord(body) && typeof body.reason === "string" ? body.reason : undefined;
   const transcriptPath = isRecord(body) && typeof body.transcriptPath === "string" ? body.transcriptPath : undefined;
-  // Fire-and-forget: the SessionEnd hook must not block Claude shutting down.
-  void Promise.resolve(options.onSessionEnd({ reason, transcriptPath })).catch(() => {});
+  // Fire-and-forget: a PreCompact/SessionEnd hook must not block Claude compacting or exiting.
+  void Promise.resolve(options.onSessionBoundary({ reason, transcriptPath })).catch(() => {});
   return Response.json({ ok: true });
 }

package/src/runner.ts CHANGED Viewed

@@ -11,7 +11,8 @@ import { ClaimTracker } from "./claim-tracker";
 import { startControlServer, type ControlServer } from "./control-server";
 import { ReplyObligationCache } from "./reply-obligation-cache";
 import { Outbox, type OutboxRecord } from "./outbox";
-import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete, analyzeSession } from "./adapters/claude-transcript";
+import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete } from "./adapters/claude-transcript";
+import { computeContextRatio } from "./session-insights";
 import { agentProfileProjectionReport } from "./profile-projection";
 import { profileUsesHostProviderGlobals } from "./profile-home";
 import { RELAY_MCP_TOKEN_ENV, relayMcpEndpoint } from "./relay-mcp";
@@ -20,6 +21,35 @@ import { runtimeMetadata } from "./version";
 import { logger, parseLogLevel } from "./logger";
 import { ensureSessionScratch, reapSessionScratch, sweepStaleSessions, type SessionScratchLayout } from "./session-scratch";
+// A destructive session transition. The runner runs end-of-session work (Insights
+// capture, #183/#184) before the invasive operation and, during that window, presents a
+// distinct non-addressable lifecycle state. Bus commands and provider hooks (Claude
+// PreCompact / SessionEnd) both normalize to one of these.
+type SessionDestroyReason = "compact" | "clear" | "restart" | "shutdown" | "kill";
+// `finalizing-<reason>` is the transient pre-destroy window; the others are the executing
+// teardown states the dashboard already renders.
+type LifecycleAction =
+  | "shutting-down" | "killing" | "restarting"
+  | `finalizing-${SessionDestroyReason}`;
+// Pre-destroy work is best-effort and must never hang teardown. Capping it keeps a slow
+// transcript read or a wedged provider from stalling a shutdown the operator asked for.
+const PRE_DESTROY_TIMEOUT_MS = 4_000;
+// Map a lifecycle bus command to its destructive boundary reason, or undefined for
+// non-destructive commands (interrupt, inject, reconnect, permission decisions).
+function boundaryReasonForCommand(type: string): SessionDestroyReason | undefined {
+  switch (type) {
+    case "agent.compact": return "compact";
+    case "agent.clearContext": return "clear";
+    case "agent.restart": return "restart";
+    case "agent.shutdown": return "shutdown";
+    case "agent.kill": return "kill";
+    default: return undefined;
+  }
+}
 interface RunnerOptions {
   provider: string;
   model?: string;
@@ -164,7 +194,17 @@ export class AgentRunner {
   // Last transcript path seen this session — used by end-of-session Insights (#184)
   // when the SessionEnd hook payload omits it.
   private lastTranscriptPath?: string;
-  private lifecycleAction?: "shutting-down" | "killing" | "restarting";
+  private lifecycleAction?: LifecycleAction;
+  // #183/#184 per-segment cursor: how many of the current session's normalized events
+  // have already been folded into an observation, and the key (transcript path / Codex
+  // session) that count belongs to. A boundary captures only events since the last one,
+  // so each datapoint is one work chunk between context resets; a key change or a shrink
+  // (transcript rotated, Codex buffer trimmed) resets the cursor.
+  private insightsObserved = 0;
+  private insightsCursorKey = "";
+  // Coalesces concurrent pre-session-destroy runs (e.g. the shutdown bus command and the
+  // SessionEnd hook both fire for the same teardown) so the cursor isn't raced.
+  private preDestroyPromise?: Promise<void>;
   private readonly unexpectedExitTimes: number[] = [];
   private readonly pendingMessages = new Map<number, Message>();
   private readonly activeTaskClaims = new Map<number, ActiveTaskClaim>();
@@ -293,7 +333,7 @@ export class AgentRunner {
       onReplyObligations: () => Promise.resolve(this.obligationCache.get()),
       onSessionTurn: (input) => this.publishSessionTurn(input),
       onUserPrompt: (input) => this.handleUserPrompt(input),
-      onSessionEnd: (input) => this.handleSessionEnd(input),
+      onSessionBoundary: (input) => this.handleSessionBoundary(input),
       onHookFatal: (report) => this.reportHookFatal(report),
     });
     this.startMcpProxy();
@@ -595,17 +635,22 @@ export class AgentRunner {
     if (type !== "agent.shutdown" && type !== "agent.restart" && type !== "agent.reconnect" && type !== "agent.kill" && type !== "agent.compact" && type !== "agent.clearContext" && type !== "agent.injectContext" && type !== "agent.permissionDecision" && type !== "agent.interrupt" && type !== "prompt.inject") return;
     const exitAfterCommand = type === "agent.shutdown" || type === "agent.kill";
-    if (exitAfterCommand) {
-      this.exitCommandInProgress = true;
-      this.lifecycleAction = type === "agent.kill" ? "killing" : "shutting-down";
-    } else if (type === "agent.restart") {
-      this.lifecycleAction = "restarting";
-    }
+    if (exitAfterCommand) this.exitCommandInProgress = true;
     this.claims.startClaim("command", commandId);
-    this.publishStatus();
     try {
       await this.updateCommand(commandId, "accepted");
       await this.updateCommand(commandId, "running");
+      // Pre-session-destroy seam (#183): for destructive transitions, run end-of-session
+      // work (Insights capture, #184) BEFORE the invasive operation, surfaced as a
+      // non-addressable "finalizing" state so the agent isn't mistaken for merely busy.
+      const destroyReason = boundaryReasonForCommand(type);
+      if (destroyReason) await this.runPreSessionDestroy(destroyReason);
+      // Move from the transient finalizing window to the executing teardown state (or drop
+      // it entirely for compact/clear, which complete promptly once capture is done).
+      if (exitAfterCommand) this.lifecycleAction = type === "agent.kill" ? "killing" : "shutting-down";
+      else if (type === "agent.restart") this.lifecycleAction = "restarting";
+      else this.lifecycleAction = undefined;
+      this.publishStatus();
       let providerResult: Record<string, unknown> | void = undefined;
       if (type === "agent.restart") await this.restartProvider();
       else if (type === "agent.reconnect") this.publishStatus();
@@ -1206,26 +1251,71 @@ export class AgentRunner {
     if (input.transcriptPath) this.startReasoningTail(input.transcriptPath);
   }
-  // SessionEnd: compute end-of-session Insights signals (#184 context-gathering
-  // ratio) from the full transcript and record them with the relay. Mechanical and
-  // model-free — costs zero agent tokens and the agent can't game it. The relay drops
-  // the observation if Insights or this signal is toggled off. Best-effort: never
-  // blocks or fails provider shutdown.
-  private async handleSessionEnd(input: { reason?: string; transcriptPath?: string }): Promise<void> {
-    // Only Claude transcripts have this shape; Codex sessions are skipped for now.
-    if (this.options.provider !== "claude") return;
-    const transcriptPath = input.transcriptPath || this.lastTranscriptPath;
-    if (!transcriptPath) return;
-    let jsonl: string;
-    try {
-      jsonl = await readFile(transcriptPath, "utf8");
-    } catch {
-      return;
+  // A provider lifecycle hook reported a session boundary (Claude PreCompact / SessionEnd
+  // → control server). Normalize the raw provider reason to a SessionDestroyReason and run
+  // the same pre-destroy seam the bus commands use. `clear`/`compact` continue the session;
+  // anything else (logout, prompt_input_exit, other) is a real termination.
+  private async handleSessionBoundary(input: { reason?: string; transcriptPath?: string }): Promise<void> {
+    const reason = input.reason === "compact" ? "compact"
+      : input.reason === "clear" ? "clear"
+      : "shutdown";
+    await this.runPreSessionDestroy(reason, { transcriptPath: input.transcriptPath });
+  }
+  // The pre-session-destroy seam (#183): the single place end-of-session work runs before
+  // an invasive transition (compact/clear/restart/shutdown/kill). Best-effort and
+  // time-boxed so it never hangs teardown; concurrent calls for the same teardown coalesce
+  // (a shutdown bus command and the SessionEnd hook can both fire). During the window the
+  // agent is published non-addressable so the operator sees "wrapping up", not "busy".
+  private runPreSessionDestroy(reason: SessionDestroyReason, opts?: { transcriptPath?: string }): Promise<void> {
+    if (this.preDestroyPromise) return this.preDestroyPromise;
+    const run = (async () => {
+      this.publishFinalizing(reason);
+      try {
+        await Promise.race([
+          this.captureContextRatio(reason, opts),
+          new Promise<void>((resolve) => setTimeout(resolve, PRE_DESTROY_TIMEOUT_MS)),
+        ]);
+      } catch (error) {
+        this.sessionLog(`insights: pre-destroy capture failed: ${errMessage(error)}`);
+      }
+    })();
+    this.preDestroyPromise = run;
+    void run.finally(() => { this.preDestroyPromise = undefined; });
+    return run;
+  }
+  // Publish the transient pre-destroy state: a non-offline status with ready:false (so the
+  // agent drops out of isAgentOnline fan-out targeting without going "offline") plus a
+  // finalizing-<reason> lifecycleAction the dashboard renders as "wrapping up" with the
+  // composer disabled.
+  private publishFinalizing(reason: SessionDestroyReason): void {
+    this.lifecycleAction = `finalizing-${reason}`;
+    void this.bus.statusAsync({ agentStatus: "busy", ready: false, meta: { lifecycleAction: this.lifecycleAction, lifecycleActionAt: Date.now() } });
+  }
+  // Compute the #184 context-gathering ratio for the segment since the last boundary and
+  // queue it (durable outbox, #196). Provider-agnostic: the adapter normalizes its session
+  // into the shared SessionEvent stream; the math + classifier live in session-insights.ts.
+  // Per-segment via a runner-side cursor, so each datapoint is one work chunk between
+  // context resets. Mechanical, model-free → zero agent tokens, un-gameable.
+  private async captureContextRatio(reason: SessionDestroyReason, opts?: { transcriptPath?: string }): Promise<void> {
+    const adapter = this.options.adapter;
+    if (!adapter.collectSessionEvents || !this.process) return;
+    const transcriptPath = opts?.transcriptPath ?? this.lastTranscriptPath;
+    const events = await adapter.collectSessionEvents(this.process, { transcriptPath });
+    if (!events) return;
+    // Reset the cursor when the underlying log changed identity (transcript rotated on
+    // resume) or shrank (Codex buffer trimmed) — otherwise the slice would be wrong.
+    const key = transcriptPath ?? `session:${this.providerSessionId}`;
+    if (key !== this.insightsCursorKey || events.length < this.insightsObserved) {
+      this.insightsCursorKey = key;
+      this.insightsObserved = 0;
     }
-    const analysis = analyzeSession(jsonl);
-    if (!analysis) return; // no tool calls = nothing substantive to measure
-    // Durable + non-blocking (#196): queue it. SessionEnd can race provider shutdown, so a
-    // direct POST risked being dropped if the server hiccuped; the outbox survives that.
+    const segment = events.slice(this.insightsObserved);
+    this.insightsObserved = events.length;
+    const analysis = computeContextRatio(segment);
+    if (!analysis) return; // no tool calls this segment = nothing substantive to measure
     this.outbox.enqueue({
       kind: "insight",
       payload: {
@@ -1233,12 +1323,12 @@ export class AgentRunner {
         project: this.options.cwd,
         agentId: this.agentId,
         signal: "context_ratio",
-        value: { ...analysis.metric, ...(input.reason ? { endReason: input.reason } : {}) },
+        value: { ...analysis.metric, endReason: reason },
         outcome: { ...analysis.outcome },
         source: "server",
       },
     });
-    this.sessionLog(`insights: context_ratio ${analysis.metric.ratio.toFixed(2)} (${analysis.metric.gatheringCalls}/${analysis.metric.totalToolCalls} gathering) queued`);
+    this.sessionLog(`insights: context_ratio ${analysis.metric.ratio.toFixed(2)} (${analysis.metric.gatheringCalls}/${analysis.metric.totalToolCalls} gathering, ${reason}) queued`);
   }
   // Route a provider-emitted session event (Codex app-server) into the chat mirror.

package/src/session-insights.ts ADDED Viewed

@@ -0,0 +1,118 @@
+// Provider-agnostic core for the #184 context-gathering signal (epic #183).
+//
+// The transcript *format* is provider-specific (Claude JSONL, Codex app-server items,
+// future providers), so each adapter normalizes its session into the same `SessionEvent`
+// stream via `collectSessionEvents`. Everything downstream — the gathering/action
+// classifier and the ratio math — lives here once and is shared, so a tool reclassified
+// for one provider is reclassified for all, and a new provider only implements the
+// normalization.
+//
+// The classifier is model-free and runs in the runner, so it costs zero agent tokens and
+// the agent can't game it.
+// A normalized, ordered session event. Order is significant: `leadingGather` counts the
+// run of gathering tools before the first action.
+export type SessionEvent =
+  // A tool invocation. Gathering-vs-action is decided here by `isGatheringTool(name)`.
+  | { type: "tool"; name: string }
+  // A failed tool result (paired outcome proxy — failures/workarounds the agent hit).
+  | { type: "tool_error" }
+  // A real user prompt (paired outcome proxy — more back-and-forth ~ clarification/correction).
+  | { type: "user_prompt" }
+  // A substantive assistant turn (one that produced text or a tool call).
+  | { type: "turn" };
+// Tools that acquire context without changing anything. Anything not matched here is
+// treated as an action (mutation, execution, or a delegation/direction decision) — Bash
+// counts as an action because it executes (a conservative, documented choice for v0;
+// `cat`/`ls` via Bash are misclassified, refine later if the data warrants it).
+const GATHERING_TOOLS = new Set([
+  "Read", "Grep", "Glob", "LS", "NotebookRead", "WebFetch", "WebSearch",
+]);
+const GATHERING_NAME = /(?:^|[._-])(read|get|list|search|grep|glob|find|fetch|query|browse|view|show|cat|status|inspect|lookup|symbols|snippet)/i;
+export function isGatheringTool(name: string): boolean {
+  if (GATHERING_TOOLS.has(name)) return true;
+  // MCP / custom tools: classify by name shape (e.g. mcp__callmux__searxng_web_search).
+  return GATHERING_NAME.test(name);
+}
+export interface ContextRatioMetric {
+  /** Session-wide gathering fraction: gatheringCalls / totalToolCalls. The headline metric. */
+  ratio: number;
+  gatheringCalls: number;
+  actionCalls: number;
+  totalToolCalls: number;
+  /** Consecutive gathering calls before the first action — the "read N files before moving" signal. */
+  leadingGather: number;
+  /** Substantive assistant turns (turns that produced text or a tool call). */
+  turns: number;
+}
+export interface SessionOutcomeProxy {
+  /** Real user prompts in the session — more back-and-forth ~ more clarification/correction. */
+  userPrompts: number;
+  /** tool_result blocks flagged is_error — failures/workarounds the agent hit. */
+  toolErrors: number;
+}
+export interface SessionAnalysis {
+  metric: ContextRatioMetric;
+  outcome: SessionOutcomeProxy;
+}
+/**
+ * Reduce a normalized event stream to the context-gathering ratio plus paired outcome
+ * proxies. Returns null when there's nothing substantive to measure (no tool calls) —
+ * trivial segments have nothing to learn from and shouldn't pollute the baselines.
+ *
+ * Per-segment by construction: callers pass only the events since the last capture
+ * boundary (compact/clear/restart/shutdown), so each result describes one work chunk.
+ */
+export function computeContextRatio(events: SessionEvent[]): SessionAnalysis | null {
+  let gatheringCalls = 0;
+  let actionCalls = 0;
+  let leadingGather = 0;
+  let sawAction = false;
+  let userPrompts = 0;
+  let toolErrors = 0;
+  let turns = 0;
+  for (const event of events) {
+    switch (event.type) {
+      case "user_prompt":
+        userPrompts++;
+        break;
+      case "tool_error":
+        toolErrors++;
+        break;
+      case "turn":
+        turns++;
+        break;
+      case "tool":
+        if (isGatheringTool(event.name)) {
+          gatheringCalls++;
+          if (!sawAction) leadingGather++;
+        } else {
+          actionCalls++;
+          sawAction = true;
+        }
+        break;
+    }
+  }
+  const totalToolCalls = gatheringCalls + actionCalls;
+  if (totalToolCalls === 0) return null;
+  return {
+    metric: {
+      ratio: gatheringCalls / totalToolCalls,
+      gatheringCalls,
+      actionCalls,
+      totalToolCalls,
+      leadingGather,
+      turns,
+    },
+    outcome: { userPrompts, toolErrors },
+  };
+}