npm - agent-relay-runner - Versions diffs - 0.27.2 → 0.29.0 - Mend

agent-relay-runner 0.27.2 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/package.json +2 -2
package/plugins/claude/.claude-plugin/plugin.json +1 -1
package/src/adapter.ts +14 -1
package/src/adapters/claude-delivery.ts +7 -3
package/src/adapters/claude-transcript.ts +49 -7
package/src/adapters/claude.ts +15 -0
package/src/adapters/codex-client.ts +5 -0
package/src/adapters/codex.ts +106 -19
package/src/attachment-cache.ts +3 -3
package/src/control-server.ts +2 -2
package/src/logger.ts +2 -2
package/src/outbox.ts +4 -4
package/src/profile-home.ts +1 -1
package/src/relay-instructions.ts +1 -0
package/src/relay-mcp-proxy.ts +2 -2
package/src/reply-obligation-cache.ts +2 -2
package/src/runner.ts +19 -9
package/src/session-insights.ts +2 -2
package/src/session-scratch.ts +4 -4

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-relay-runner",
-  "version": "0.27.2",
+  "version": "0.29.0",
   "description": "Unified provider lifecycle runner for Agent Relay",
   "type": "module",
   "bin": {
@@ -20,7 +20,7 @@
     "directory": "runner"
   },
   "dependencies": {
-    "agent-relay-sdk": "0.2.16"
+    "agent-relay-sdk": "0.2.18"
   },
   "devDependencies": {
     "@types/bun": "latest",

package/plugins/claude/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "agent-relay-runner",
   "description": "Thin Agent Relay runner bridge for Claude Code",
-  "version": "0.27.2",
+  "version": "0.29.0",
   "agentRelayContracts": {
     "providerPluginProtocol": 1
   }

package/src/adapter.ts CHANGED Viewed

@@ -211,9 +211,19 @@ function isPersistedRelayMessage(message: Message): boolean {
   return Number.isSafeInteger(message.id) && message.id > 0;
 }
+// #283 — one-line nudge that replaces the reply-scaffold footer for notification-class
+// (replyExpected:false) messages. Deliberately tiny so a bloated context can't drown the
+// no-reply rule established at session start. Shared with the Claude delivery path.
+export const NOTIFICATION_NUDGE = "↪ Notification — no reply needed.";
+// A notification is a persisted message the server marked replyExpected:false.
+export function isNotificationMessage(message: Message): boolean {
+  return isPersistedRelayMessage(message) && message.replyExpected === false;
+}
 function latestReplyableMessage(messages: Message[]): Message | undefined {
   return messages
-    .filter((message) => isPersistedRelayMessage(message) && !isMemoryInjection(message) && !isReactionNotification(message))
+    .filter((message) => isPersistedRelayMessage(message) && !isMemoryInjection(message) && !isReactionNotification(message) && message.replyExpected !== false)
     .at(-1);
 }
@@ -316,6 +326,9 @@ export function providerMessageText(messages: Message[]): string {
       "If you already delivered the useful response through Relay, do not send a separate status-only confirmation.",
       "If multiple messages arrived together, cover them in one reply instead of answering each line separately.",
     ].join("\n"));
+  } else if (messages.some(isNotificationMessage)) {
+    // #283 — pure notification batch: no scaffold, just the one-line no-reply nudge.
+    sections.push(NOTIFICATION_NUDGE);
   }
   return sections.join("\n\n");
 }

package/src/adapters/claude-delivery.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import type { Message } from "agent-relay-sdk";
 import { isRecord } from "agent-relay-sdk";
-import { providerAttachmentText } from "../adapter";
+import { isNotificationMessage, NOTIFICATION_NUDGE, providerAttachmentText } from "../adapter";
 const PROVIDER_MESSAGE_BODY_PREVIEW_CHARS = 4000;
 const REMINDER_EVERY_DELIVERIES = 5;
@@ -61,7 +61,7 @@ function shouldShowReplyReminder(deliveryCount: number): boolean {
 function latestReplyableMessage(messages: Message[]): Message | undefined {
   return messages
-    .filter((message) => isPersistedRelayMessage(message) && !isMemoryInjection(message) && !isReactionNotification(message))
+    .filter((message) => isPersistedRelayMessage(message) && !isMemoryInjection(message) && !isReactionNotification(message) && message.replyExpected !== false)
     .at(-1);
 }
@@ -121,9 +121,13 @@ export function claudeProviderMessageText(messages: Message[], options: ClaudeDe
   const relaySurface = options.relaySurface !== false;
   const sections = messages.map((message) => formatMessage(message, relaySurface));
   const replyable = latestReplyableMessage(messages);
-  // Isolated agents have no way to reply through Relay — never append the reminder.
+  // Isolated agents have no way to reply through Relay — never append the reminder/nudge.
   if (relaySurface && replyable && shouldShowReplyReminder(options.deliveryCount)) {
     sections.push(replyReminder(replyable, options.readOnly === true));
+  } else if (relaySurface && !replyable && messages.some(isNotificationMessage)) {
+    // #283 — pure notification batch (no message wants a reply): drop the scaffold, append the
+    // one-line nudge so a long context can't make the agent forget the session-start no-reply rule.
+    sections.push(NOTIFICATION_NUDGE);
   }
   return sections.join("\n\n");
 }

package/src/adapters/claude-transcript.ts CHANGED Viewed

@@ -21,8 +21,8 @@ interface TranscriptBlock {
   is_error?: boolean;
 }
-export interface TurnStep {
-  type: "reasoning" | "tool";
+interface TurnStep {
+  type: "narration" | "reasoning" | "tool";
   text: string;
   label?: string;
 }
@@ -36,6 +36,16 @@ interface TranscriptMessage {
 interface TranscriptEntry {
   type?: string;
   message?: TranscriptMessage;
+  // Claude Code stamps every transcript entry with `isSidechain`: true for
+  // entries belonging to a Task (subagent) run, false for the root session.
+  // Current CC writes sidechains to a separate subagents/*.jsonl so they don't
+  // reach the root transcript the runner tails — but older CC inlined them, and
+  // the behavior can revert, so the chat-mirror parsers below defensively skip
+  // sidechain entries to keep a subagent's reasoning/tools/responses from
+  // leaking into the parent agent's chat. Insights parsers (collectClaudeSession-
+  // Events/countSubstantiveTurns) intentionally do NOT filter — changing them
+  // would shift the #184/#185 baselines, a separate concern.
+  isSidechain?: boolean;
 }
 function blocks(message: TranscriptMessage | undefined): TranscriptBlock[] {
@@ -43,6 +53,11 @@ function blocks(message: TranscriptMessage | undefined): TranscriptBlock[] {
   return message.content.filter((b): b is TranscriptBlock => Boolean(b) && typeof b === "object");
 }
+/** True for a subagent (Task) transcript entry — see the note on TranscriptEntry.isSidechain. */
+function isSidechainEntry(entry: TranscriptEntry): boolean {
+  return entry.isSidechain === true;
+}
 function isRealUserPrompt(entry: TranscriptEntry): boolean {
   if (entry.type !== "user") return false;
   const content = entry.message?.content;
@@ -75,6 +90,7 @@ export function transcriptLooksComplete(jsonl: string): boolean {
     if (!trimmed) continue;
     try {
       const entry = JSON.parse(trimmed) as TranscriptEntry;
+      if (isSidechainEntry(entry)) continue;
       if (entry.type === "assistant") lastAssistantStopReason = entry.message?.stop_reason;
     } catch {
       continue;
@@ -99,6 +115,7 @@ export function extractLastAssistantTurn(jsonl: string): string {
     } catch {
       continue;
     }
+    if (isSidechainEntry(entry)) continue;
     if (isRealUserPrompt(entry)) {
       collected = [];
       continue;
@@ -128,6 +145,7 @@ export function extractFinalAssistantMessage(jsonl: string): string {
     } catch {
       continue;
     }
+    if (isSidechainEntry(entry)) continue;
     if (isRealUserPrompt(entry)) {
       pastLastUserPrompt = true;
       lastText = "";
@@ -147,10 +165,12 @@ export function extractFinalAssistantMessage(jsonl: string): string {
  * Thinking and tool_use blocks are dropped, matching extractLastAssistantTurn.
  */
 /**
- * Extract the ordered reasoning and tool steps for the most recent turn (since
- * the last real user prompt). Used by the reasoning tailer to stream discreet
- * progress into chat while a turn is in flight. Returns steps in transcript order
- * so the tailer can emit only the ones it hasn't seen yet by index.
+ * Extract the ordered narration, reasoning, and tool steps for the most recent
+ * turn (since the last real user prompt). Used by the reasoning tailer to stream
+ * progress into chat while a turn is in flight. `narration` is the assistant's
+ * intermediate `text` between tool calls (the terminal's `●` lines); it is the
+ * primary, default-visible turn content. Returns steps in transcript order so the
+ * tailer can emit only the ones it hasn't seen yet.
  */
 export function extractLatestTurnSteps(jsonl: string): TurnStep[] {
   const lines = jsonl.split("\n");
@@ -164,13 +184,16 @@ export function extractLatestTurnSteps(jsonl: string): TurnStep[] {
     } catch {
       continue;
     }
+    if (isSidechainEntry(entry)) continue;
     if (isRealUserPrompt(entry)) {
       steps = [];
       continue;
     }
     if (entry.type !== "assistant") continue;
     for (const b of blocks(entry.message)) {
-      if (b.type === "thinking" && typeof b.thinking === "string" && b.thinking.trim()) {
+      if (b.type === "text" && typeof b.text === "string" && b.text.trim()) {
+        steps.push({ type: "narration", text: b.text.trim() });
+      } else if (b.type === "thinking" && typeof b.thinking === "string" && b.thinking.trim()) {
         steps.push({ type: "reasoning", text: b.thinking.trim() });
       } else if (b.type === "tool_use" && typeof b.name === "string" && b.name) {
         steps.push({ type: "tool", label: b.name, text: summarizeToolUse(b.name, b.input) });
@@ -180,6 +203,25 @@ export function extractLatestTurnSteps(jsonl: string): TurnStep[] {
   return steps;
 }
+/**
+ * Stable dedup keys for a turn's steps, in order. Each key is salted with how many
+ * identical (type,label,text) steps preceded it in the same window — so running the
+ * same tool twice with identical input within a turn yields two distinct keys and
+ * both show in the activity trace (#265). Keying on occurrence-within-window rather
+ * than raw transcript index keeps the reasoning tailer idempotent when the "latest
+ * turn" window shrinks/resets mid-poll: a surviving step recomputes to the same or a
+ * lower occurrence, so an already-emitted step never re-fires.
+ */
+export function stepDedupKeys(steps: TurnStep[]): string[] {
+  const counts = new Map<string, number>();
+  return steps.map((step) => {
+    const base = JSON.stringify([step.type, step.label ?? "", step.text]);
+    const occ = counts.get(base) ?? 0;
+    counts.set(base, occ + 1);
+    return JSON.stringify([step.type, step.label ?? "", step.text, occ]);
+  });
+}
 /** Compact one-line summary of a tool invocation for the discreet activity row. */
 export function summarizeToolUse(name: string, input: Record<string, unknown> | undefined): string {
   const str = (key: string): string | undefined => (input && typeof input[key] === "string" ? (input[key] as string) : undefined);

package/src/adapters/claude.ts CHANGED Viewed

@@ -453,6 +453,21 @@ function captureTmuxPane(sessionName: string, socketName?: string): string {
   return result.stdout.toString();
 }
+// ⚠ FRAGILE PANE HEURISTICS — both functions below string-match Claude Code's TUI
+// chrome against captured tmux scrollback (~80 lines), so they break whenever CC
+// restyles its footer/banner. They are deliberately substring/regex based because
+// there's no machine-readable ready/busy signal from the TUI. Known break conditions,
+// so the next CC restyle is a fast fix rather than a hunt:
+//   readiness (claudePaneLooksReady) breaks if CC renames/removes ALL of: the
+//     "bypass permissions" / "shift+tab to cycle" / "? for shortcuts" footer hints,
+//     the "/effort" hint, or the "Welcome back" / "Claude Code" banner.
+//   busy (claudePaneIsBusy) breaks if CC drops the live "… (<elapsed>" spinner counter
+//     (the cross-version anchor; the "esc to interrupt" hint was already dropped in 2.1.x).
+//   FALSE POSITIVES: agent output that literally QUOTES any of these strings (e.g. a
+//     transcript discussing "esc to interrupt", or this very comment shown in a pane)
+//     reads as ready/busy. Tolerated because the markers are CC-specific enough to be
+//     rare in real output; if it bites, gate on the LAST N lines only (the live footer).
+// History: 18067b5 (busy counter), and the readiness footer-vs-banner fix below.
 export function claudePaneLooksReady(text: string): boolean {
   // Claude's startup banner ("Claude Code" / "Welcome back") scrolls off the pane once the
   // conversation fills it, so a mid-session delivery (e.g. the budget warning, minutes into

package/src/adapters/codex-client.ts CHANGED Viewed

@@ -74,6 +74,8 @@ interface ThreadLoadedListResponse {
   nextCursor: string | null;
 }
+export const CODEX_APP_CLIENT_EVENT_CAP = 5_000;
 export class CodexAppClient {
   private ws!: WebSocket;
   private nextId = 1;
@@ -256,6 +258,9 @@ export class CodexAppClient {
   private record(event: ClientEvent): void {
     this.events.push(event);
+    if (this.events.length > CODEX_APP_CLIENT_EVENT_CAP) {
+      this.events.splice(0, this.events.length - CODEX_APP_CLIENT_EVENT_CAP);
+    }
     for (const listener of this.listeners) listener(event);
   }

package/src/adapters/codex.ts CHANGED Viewed

@@ -41,6 +41,7 @@ export class CodexAdapter implements ProviderAdapter {
   // flushed as one session response on turn/completed (mirrors Claude's chatCaptureMode).
   private turnMessages: string[] = [];
   private readonly itemTextBuffers = new Map<string, string>();
+  private readonly itemTextBufferTypes = new Map<string, string>();
   private captureMode: "final" | "full" = "final";
   // #183/#184: the normalized session-event log for the current process lifetime, fed
   // from the same completed-item stream that drives the chat mirror. The runner slices
@@ -58,6 +59,20 @@ export class CodexAdapter implements ProviderAdapter {
     this.sessionEventCb = cb;
   }
+  private resetProcessState(): void {
+    this.resetThreadState();
+    this.sessionEvents = []; // fresh process -> fresh segment cursor (#184)
+  }
+  private resetThreadState(): void {
+    this.subagentThreads.clear();
+    this.pendingApprovals.clear();
+    this.activeTurnId = undefined;
+    this.turnMessages = [];
+    this.itemTextBuffers.clear();
+    this.itemTextBufferTypes.clear();
+  }
   async interrupt(process: ManagedProcess): Promise<Record<string, unknown>> {
     const client = process.meta?.client as CodexAppClient | undefined;
     if (!client) throw new Error("Codex App Server client is unavailable");
@@ -68,11 +83,33 @@ export class CodexAdapter implements ProviderAdapter {
     return { method: "turn-interrupt", turnId: this.activeTurnId };
   }
-  // Codex streams thread/status continuously, so the runner's claim state never
-  // goes stale the way Claude's can after an out-of-band interrupt. No cheap probe
-  // is needed — defer to the live status stream.
-  async probeActivity(): Promise<"busy" | "idle" | "unknown"> {
-    return "unknown";
+  async probeActivity(process: ManagedProcess): Promise<"busy" | "idle" | "unknown"> {
+    const client = process.meta?.client as CodexAppClient | undefined;
+    if (!client?.isConnected()) return "unknown";
+    const threadId = typeof process.meta?.threadId === "string" ? process.meta.threadId : "";
+    if (!this.activeTurnId) return "idle";
+    if (!threadId) return "busy";
+    try {
+      const read = await client.threadRead(threadId, true);
+      const thread = isRecord(read.thread) ? read.thread : undefined;
+      const turns = Array.isArray(thread?.turns) ? thread.turns : [];
+      const activeTurn = turns.find((turn) => isRecord(turn) && stringValue(turn.id) === this.activeTurnId);
+      const turnStatus = isRecord(activeTurn) ? stringValue(activeTurn.status) : undefined;
+      if (turnStatus === "inProgress") return "busy";
+      if (turnStatus === "completed" || turnStatus === "interrupted" || turnStatus === "failed") {
+        this.finishMainTurn();
+        return "idle";
+      }
+      const threadStatus = statusType(thread?.status);
+      if (threadStatus === "active") return "busy";
+      if (threadStatus === "idle" || threadStatus === "notLoaded" || threadStatus === "systemError") {
+        this.finishMainTurn();
+        return "idle";
+      }
+    } catch {
+      return "unknown";
+    }
+    return "busy";
   }
   // The Codex app-server is headless and has no tmux session, but an unexpected
@@ -82,8 +119,8 @@ export class CodexAdapter implements ProviderAdapter {
   }
   async spawn(config: RunnerSpawnConfig): Promise<ManagedProcess> {
+    this.resetProcessState();
     this.captureMode = (config.providerConfig as ProviderConfig).chatCaptureMode ?? "final";
-    this.sessionEvents = []; // fresh process → fresh segment cursor (#184)
     const args = this.buildSpawnArgs(config, config.providerConfig as ProviderConfig);
     const appServer = Bun.spawn([args.command, ...args.args], {
       cwd: args.cwd,
@@ -150,7 +187,7 @@ export class CodexAdapter implements ProviderAdapter {
     if (!client) throw new Error("Codex App Server client is unavailable");
     const threadId = typeof process.meta?.threadId === "string" ? process.meta.threadId : "";
     if (!threadId) throw new Error("Codex thread is not ready");
-    await client.threadCompactStart(threadId);
+    this.statusCb({ status: "busy", reason: "provider-turn", timeline: { status: "compacting", timestamp: Date.now() } });
     const currentContext = isContextState(process.meta?.context) ? process.meta.context : undefined;
     if (currentContext) {
       process.meta = {
@@ -158,6 +195,27 @@ export class CodexAdapter implements ProviderAdapter {
         context: { ...currentContext, lifecycleState: "compacting", lastUpdatedAt: Date.now() },
       };
     }
+    try {
+      await client.threadCompactStart(threadId);
+    } catch (error) {
+      this.statusCb({ status: "idle", reason: "provider-turn" });
+      throw error;
+    }
+    const compactedAt = Date.now();
+    const compactingContext = isContextState(process.meta?.context) ? process.meta.context : currentContext;
+    if (compactingContext) {
+      process.meta = {
+        ...(process.meta ?? {}),
+        context: {
+          ...compactingContext,
+          lifecycleState: "cooling",
+          tasksSinceCompact: 0,
+          lastCompactedAt: compactedAt,
+          lastUpdatedAt: compactedAt,
+        },
+      };
+    }
+    this.statusCb({ status: "idle", reason: "provider-turn", timeline: { status: "compacted", timestamp: compactedAt } });
     return { threadId };
   }
@@ -165,7 +223,16 @@ export class CodexAdapter implements ProviderAdapter {
     const client = process.meta?.client as CodexAppClient | undefined;
     if (!client) throw new Error("Codex App Server client is unavailable");
     const previousThreadId = typeof process.meta?.threadId === "string" ? process.meta.threadId : undefined;
-    const started = await client.threadStart({ cwd: typeof process.meta?.cwd === "string" ? process.meta.cwd : globalThis.process.cwd() });
+    this.statusCb({ status: "busy", reason: "provider-turn", timeline: { status: "clearing-context", timestamp: Date.now() } });
+    let started: Awaited<ReturnType<CodexAppClient["threadStart"]>>;
+    try {
+      started = await client.threadStart({ cwd: typeof process.meta?.cwd === "string" ? process.meta.cwd : globalThis.process.cwd() });
+    } catch (error) {
+      this.statusCb({ status: "idle", reason: "provider-turn" });
+      throw error;
+    }
+    const clearedAt = Date.now();
+    this.resetThreadState();
     process.meta = {
       ...(process.meta ?? {}),
       threadId: started.thread.id,
@@ -176,11 +243,13 @@ export class CodexAdapter implements ProviderAdapter {
         warmTopics: [],
         activeMemories: [],
         tasksSinceCompact: 0,
-        lastUpdatedAt: Date.now(),
+        lastCompactedAt: clearedAt,
+        lastUpdatedAt: clearedAt,
         source: "api",
         confidence: "reported",
       } satisfies ContextState,
     };
+    this.statusCb({ status: "idle", reason: "provider-turn", clear: ["subagent"], timeline: { status: "context-cleared", timestamp: clearedAt } });
     return { previousThreadId, threadId: started.thread.id };
   }
@@ -350,28 +419,25 @@ export class CodexAdapter implements ProviderAdapter {
         this.statusCb({ status: "busy", reason: "provider-turn", id: this.activeTurnId });
       }
     }
-    if (method.includes("turn/completed") || method.includes("turn.completed")) {
+    if (method.includes("turn/completed") || method.includes("turn.completed") || method.includes("turn/failed") || method.includes("turn.failed") || method.includes("turn/interrupted") || method.includes("turn.interrupted")) {
       if (threadId && this.subagentThreads.has(threadId)) {
         this.statusCb({ status: "idle", reason: "subagent", id: threadId, ...this.subagentThreads.get(threadId) });
       } else {
-        this.flushTurnResponse();
-        const completedTurnId = this.activeTurnId;
-        this.activeTurnId = undefined;
-        this.statusCb({ status: "idle", reason: "provider-turn", id: completedTurnId });
+        this.finishMainTurn();
       }
     }
     if ((method.includes("item/completed") || method.includes("item.completed")) && !isSubagent) {
       this.handleCodexItem(isRecord(params?.item) ? params.item : undefined);
     }
     if (!isSubagent) this.handleCodexItemDelta(method, params);
-    if (method.includes("thread/status")) {
+    if (method.includes("thread/status") || method.includes("thread.status")) {
       const status = statusType(params?.status);
       if (threadId && this.subagentThreads.has(threadId)) {
         if (status === "active") this.statusCb({ status: "busy", reason: "subagent", id: threadId, ...this.subagentThreads.get(threadId) });
         if (status === "idle" || status === "notLoaded") this.statusCb({ status: "idle", reason: "subagent", id: threadId, ...this.subagentThreads.get(threadId) });
       } else {
         if (status === "active") this.statusCb({ status: "busy", reason: "provider-turn", providerState: this.providerStateFromThreadStatus(params?.status, params) });
-        if (status === "idle") this.statusCb({ status: "idle", reason: "provider-turn" });
+        if (status === "idle" || status === "notLoaded" || status === "systemError") this.finishMainTurn();
       }
     }
   }
@@ -391,6 +457,7 @@ export class CodexAdapter implements ProviderAdapter {
         this.recordInsightEvent({ type: "turn" }); // a substantive assistant turn
       }
       if (itemId) this.itemTextBuffers.delete(itemId);
+      if (itemId) this.itemTextBufferTypes.delete(itemId);
       return;
     }
     if (type === "userMessage") {
@@ -406,6 +473,7 @@ export class CodexAdapter implements ProviderAdapter {
       const text = (codexReasoningText(item) || buffered || "").trim();
       if (text) this.sessionEventCb({ type: "reasoning", origin: "provider", body: text, ...(turnId ? { turnId } : {}) });
       if (itemId) this.itemTextBuffers.delete(itemId);
+      if (itemId) this.itemTextBufferTypes.delete(itemId);
       return;
     }
     const tool = codexToolSummary(type, item);
@@ -415,6 +483,7 @@ export class CodexAdapter implements ProviderAdapter {
       this.sessionEventCb({ type: "tool", origin: "provider", body: tool.body, label: tool.label, status: "completed", ...(turnId ? { turnId } : {}) });
     }
     if (itemId) this.itemTextBuffers.delete(itemId);
+    if (itemId) this.itemTextBufferTypes.delete(itemId);
   }
   // #183/#184: append to the session-event log with a soft cap. On overflow we drop the
@@ -449,7 +518,10 @@ export class CodexAdapter implements ProviderAdapter {
     if (type === "agentMessage" || type === "reasoning" || type === "plan") {
       const delta = codexDeltaText(params);
-      if (delta && itemId) this.itemTextBuffers.set(itemId, `${this.itemTextBuffers.get(itemId) ?? ""}${delta}`);
+      if (delta && itemId) {
+        this.itemTextBuffers.set(itemId, `${this.itemTextBuffers.get(itemId) ?? ""}${delta}`);
+        if (type) this.itemTextBufferTypes.set(itemId, type);
+      }
       return;
     }
@@ -459,13 +531,28 @@ export class CodexAdapter implements ProviderAdapter {
   }
   private flushTurnResponse(): void {
-    if (!this.turnMessages.length) return;
-    const joined = this.captureMode === "full" ? this.turnMessages.join("\n\n") : this.turnMessages[this.turnMessages.length - 1]!;
+    const pendingAgentMessages = [...this.itemTextBuffers.entries()]
+      .filter(([itemId]) => this.itemTextBufferTypes.get(itemId) === "agentMessage")
+      .map(([, text]) => text.trim())
+      .filter(Boolean);
+    const messages = [...this.turnMessages, ...pendingAgentMessages];
+    if (!messages.length) return;
+    const joined = this.captureMode === "full" ? messages.join("\n\n") : messages[messages.length - 1]!;
     this.turnMessages = [];
     const text = joined.trim();
     if (text) this.sessionEventCb({ type: "response", origin: "provider", body: text, ...(this.activeTurnId ? { turnId: this.activeTurnId } : {}) });
   }
+  private finishMainTurn(): void {
+    this.flushTurnResponse();
+    const turnId = this.activeTurnId;
+    this.activeTurnId = undefined;
+    this.pendingApprovals.clear();
+    this.itemTextBuffers.clear();
+    this.itemTextBufferTypes.clear();
+    this.statusCb({ status: "idle", reason: "provider-turn", id: turnId });
+  }
   private providerStateFromThreadStatus(status: unknown, params?: Record<string, unknown>): Record<string, unknown> | undefined {
     const state = codexProviderStateFromThreadStatus(status, params);
     if (state?.state !== "blocked" || state.reason !== "waitingOnApproval" || state.pendingApproval) return state;

package/src/attachment-cache.ts CHANGED Viewed

@@ -7,11 +7,11 @@ import { sanitizeFsName } from "agent-relay-sdk/fs-name";
 const DEFAULT_CACHE_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000;
-export interface AttachmentCacheClient {
+interface AttachmentCacheClient {
   downloadArtifact(id: string): Promise<{ stream: ReadableStream<Uint8Array>; meta: Artifact }>;
 }
-export interface AttachmentCacheOptions {
+interface AttachmentCacheOptions {
   agentId: string;
   rootDir?: string;
   maxAgeMs?: number;
@@ -34,7 +34,7 @@ function attachmentRefs(message: Message): Record<string, unknown>[] {
   return refs.filter(isRecord);
 }
-export function attachmentCacheRoot(agentId: string, rootDir = process.env.AGENT_RELAY_ATTACHMENT_CACHE_DIR): string {
+function attachmentCacheRoot(agentId: string, rootDir = process.env.AGENT_RELAY_ATTACHMENT_CACHE_DIR): string {
   return join(attachmentCacheBase(rootDir), safePathPart(agentId));
 }

package/src/control-server.ts CHANGED Viewed

@@ -237,7 +237,7 @@ async function handlePermissionRequest(
   return Response.json(claudePermissionHookResponse(decision, body));
 }
-export function claudePermissionApprovalView(id: string, body: Record<string, unknown>): Record<string, unknown> {
+function claudePermissionApprovalView(id: string, body: Record<string, unknown>): Record<string, unknown> {
   const toolName = typeof body.tool_name === "string" ? body.tool_name : "Tool";
   const toolInput = isRecord(body.tool_input) ? body.tool_input : {};
   // AskUserQuestion is not a yes/no gate — it asks the user to pick answers.
@@ -299,7 +299,7 @@ export function claudePermissionApprovalView(id: string, body: Record<string, un
   };
 }
-export function claudePermissionHookResponse(decision: ProviderPermissionDecisionInput, body: Record<string, unknown>): Record<string, unknown> {
+function claudePermissionHookResponse(decision: ProviderPermissionDecisionInput, body: Record<string, unknown>): Record<string, unknown> {
   // AskUserQuestion comes through a PreToolUse hook. The only way to satisfy it
   // headlessly is permissionDecision "allow" + updatedInput carrying the answers
   // (echoing back the original questions). A bare "allow" is not sufficient, so

package/src/logger.ts CHANGED Viewed

@@ -16,7 +16,7 @@ import { sanitizeFsName } from "agent-relay-sdk/fs-name";
 // flipped at runtime via the control port (no restart) — so a phase refactor can
 // be watched at debug without bouncing the agent.
-export type LogLevel = "debug" | "info" | "warn" | "error" | "fatal";
+type LogLevel = "debug" | "info" | "warn" | "error" | "fatal";
 const ORDER: Record<LogLevel, number> = { debug: 10, info: 20, warn: 30, error: 40, fatal: 50 };
 export const LOG_LEVELS = Object.keys(ORDER) as LogLevel[];
@@ -33,7 +33,7 @@ function safeLogName(value: string): string {
   return sanitizeFsName(value, { replacement: "_", maxLen: 180 });
 }
-export interface LoggerConfig {
+interface LoggerConfig {
   agentId?: string;
   level?: LogLevel;
   headless?: boolean;

package/src/outbox.ts CHANGED Viewed

@@ -21,9 +21,9 @@ import { logger } from "./logger";
 // last-wins and self-heals on reconnect (so it already satisfies "coalesce, don't replay
 // stale busyes"). The coalesce mode below exists so a future state event could migrate here.
-export type OutboxMode = "append" | "coalesce";
+type OutboxMode = "append" | "coalesce";
-export interface OutboxEventInput {
+interface OutboxEventInput {
   kind: string;
   payload: unknown;
   mode?: OutboxMode;
@@ -46,9 +46,9 @@ export interface OutboxRecord {
 }
 // The transport. Resolve = delivered (row deleted). Reject = failed (retried with backoff).
-export type OutboxSend = (record: OutboxRecord) => Promise<void>;
+type OutboxSend = (record: OutboxRecord) => Promise<void>;
-export interface OutboxOptions {
+interface OutboxOptions {
   agentId: string;
   send: OutboxSend;
   // Storage directory. Defaults to AGENT_RELAY_RUNNER_OUTBOX_DIR, else a per-host temp dir.

package/src/profile-home.ts CHANGED Viewed

@@ -44,7 +44,7 @@ const CLAUDE_AUTH_ITEMS = [".credentials.json", "statsig"];
 // Shared skeleton for both providers: gate on isolated-profile, make the
 // instance-keyed home, run the provider-specific first-run bootstrap. The
 // bootstrap step is the only genuinely provider-specific part.
-export function prepareProviderHome(provider: "claude" | "codex", config: RunnerSpawnConfig): ProviderHome | undefined {
+function prepareProviderHome(provider: "claude" | "codex", config: RunnerSpawnConfig): ProviderHome | undefined {
   if (!profileRequiresIsolatedHome(config)) return undefined;
   const target = providerHomePath(provider, config);
   mkdirSync(target, { recursive: true });

package/src/relay-instructions.ts CHANGED Viewed

@@ -8,6 +8,7 @@ export const CLAUDE_RELAY_MANUAL = `# Agent Relay
 - If multiple Relay messages arrive together, answer once to the latest relevant message and cover the current request. Do not separately acknowledge stale greetings or context.
 - If the useful response was already delivered through Relay, do not send an extra "sent", "done", or "drafts sent" confirmation unless the user explicitly asked for one.
 - No reply is needed for pure info messages, passive acknowledgements, or reactions that do not ask for action.
+- NEVER reply to a notification-class message. The server marks these and renders a single \`↪ Notification — no reply needed.\` line instead of the reply reminder — it is a fire-and-forget signal (a merge notice, lifecycle event, or FYI). Act on the information if relevant, but do not send any reply, status confirmation, or reaction back.
 - Use \`agent-relay /react <messageId> <emoji>\` instead of a text reply for lightweight acknowledgement, approval, thanks, or "good job" after a completed work update.
 - Good reaction uses: acknowledge praise with 👍 or ❤️, mark a completed handoff as seen, approve a proposed next step, or acknowledge a passive FYI.
 - Do not use reactions when the user asked a question, gave a new task, reported a bug, or needs a textual result.

package/src/relay-mcp-proxy.ts CHANGED Viewed

@@ -40,7 +40,7 @@ const SSE_KEEPALIVE_MS = 25_000;
 // The write tools whose loss during a relay outage is unacceptable and whose result the agent
 // does not need synchronously — safe to queue durably and replay on reconnect. Reads, claims
 // (409 contention), spawn/shutdown (need a real ack) are deliberately NOT bufferable.
-export const DEFAULT_BUFFERABLE_TOOLS = new Set<string>([
+const DEFAULT_BUFFERABLE_TOOLS = new Set<string>([
   "relay_send_message",
   "relay_reply",
   "relay_workspace_ready",
@@ -59,7 +59,7 @@ const WORKTREE_ONLY_TOOLS = new Set<string>([
   "relay_workspace_land",
 ]);
-export interface ProxyContext {
+interface ProxyContext {
   // The agent owns a live (non-terminal) isolated git worktree → workspace tools apply.
   isolatedWorktree: boolean;
 }

package/src/reply-obligation-cache.ts CHANGED Viewed

@@ -16,9 +16,9 @@ import { logger } from "./logger";
 // - A background interval keeps the snapshot warm; `markDirty()` requests an extra,
 //   debounced refresh when state likely just changed (a message arrived, a turn ended).
-export type ReplyObligationFetch = () => Promise<ReplyObligation[]>;
+type ReplyObligationFetch = () => Promise<ReplyObligation[]>;
-export interface ReplyObligationCacheOptions {
+interface ReplyObligationCacheOptions {
   fetch: ReplyObligationFetch;
   // Background freshness backstop. Default 10s — well under any turn cadence, cheap.
   intervalMs?: number;

package/src/runner.ts CHANGED Viewed

@@ -11,7 +11,7 @@ import { ClaimTracker } from "./claim-tracker";
 import { startControlServer, type ControlServer } from "./control-server";
 import { ReplyObligationCache } from "./reply-obligation-cache";
 import { Outbox, type OutboxRecord } from "./outbox";
-import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete } from "./adapters/claude-transcript";
+import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, stepDedupKeys, transcriptLooksComplete } from "./adapters/claude-transcript";
 import { computeContextRatio } from "./session-insights";
 import { agentProfileProjectionReport } from "./profile-projection";
 import { profileUsesHostProviderGlobals } from "./profile-home";
@@ -1256,6 +1256,12 @@ export class AgentRunner {
   // the same pre-destroy seam the bus commands use. `clear`/`compact` continue the session;
   // anything else (logout, prompt_input_exit, other) is a real termination.
   private async handleSessionBoundary(input: { reason?: string; transcriptPath?: string }): Promise<void> {
+    // Reason mapping is fail-safe-toward-termination: only the two known session-
+    // CONTINUING reasons are special-cased; everything else (logout, prompt_input_exit,
+    // other, AND any future reason) maps to "shutdown" → full pre-destroy capture.
+    // ⚠ If Claude Code adds a new BENIGN/continuing boundary reason, add it here — until
+    // then it will trigger a (harmless but wasteful) full context capture on a session
+    // that isn't actually ending.
     const reason = input.reason === "compact" ? "compact"
       : input.reason === "clear" ? "clear"
       : "shutdown";
@@ -1481,10 +1487,12 @@ export class AgentRunner {
     }, INTERRUPT_RECONCILE_DELAY_MS);
   }
-  // --- Reasoning tailer (item 5) ------------------------------------------------------
-  // Tail the in-flight turn's Claude transcript and surface new reasoning/tool steps
-  // as discreet session events. Coalesced and coarse; the final response still comes
-  // through publishSessionTurn.
+  // --- Turn-step tailer (item 5) ------------------------------------------------------
+  // Tail the in-flight turn's Claude transcript and surface new narration/reasoning/tool
+  // steps as session events, in transcript order. `narration` (the agent's intermediate
+  // text) is the primary visible content; reasoning visibility is a client-side toggle.
+  // Coalesced and coarse; the final response still comes through publishSessionTurn.
+  // `reasoningCapture: false` disables the whole live trace (server-side kill switch).
   private startReasoningTail(transcriptPath: string): void {
     if (this.options.providerConfig.reasoningCapture === false) return;
     this.stopReasoningTail();
@@ -1492,6 +1500,8 @@ export class AgentRunner {
     // turn" window in the transcript can shrink/reset (a tool_result entry, a
     // mid-turn user line), and an index cursor would then either re-emit or stall
     // and drop the rest of the turn. A seen-set is idempotent under any reshuffle.
+    // The signature is salted with each step's occurrence-within-window (stepDedupKeys)
+    // so two identical steps in one turn — same tool, same input — both surface (#265).
     const seen = new Set<string>();
     const turnIdAtStart = this.currentTurnId;
     // On the first poll the new prompt usually hasn't landed in the transcript yet,
@@ -1507,16 +1517,16 @@ export class AgentRunner {
       try { jsonl = await readFile(transcriptPath, "utf8"); } catch { return; }
       let steps: ReturnType<typeof extractLatestTurnSteps>;
       try { steps = extractLatestTurnSteps(jsonl); } catch { return; }
+      const keyed = stepDedupKeys(steps).map((sig, i) => ({ sig, step: steps[i]! }));
       if (!seeded) {
         seeded = true;
         if (transcriptLooksComplete(jsonl)) {
-          for (const s of steps) seen.add(JSON.stringify([s.type, s.label ?? "", s.text]));
+          for (const { sig } of keyed) seen.add(sig);
         }
       }
       const turnId = this.currentTurnId ?? turnIdAtStart;
       let emitted = 0;
-      for (const step of steps) {
-        const sig = JSON.stringify([step.type, step.label ?? "", step.text]);
+      for (const { sig, step } of keyed) {
         if (seen.has(sig)) continue;
         seen.add(sig);
         emitted += 1;
@@ -2054,7 +2064,7 @@ export function latestClaudeResumeIdFromText(text: string): string | undefined {
   return latest;
 }
-export function latestClaudeResumeIdFromLogFile(path: string): string | undefined {
+function latestClaudeResumeIdFromLogFile(path: string): string | undefined {
   let fd: number | undefined;
   try {
     const stat = statSync(path);

package/src/session-insights.ts CHANGED Viewed

@@ -37,7 +37,7 @@ export function isGatheringTool(name: string): boolean {
   return GATHERING_NAME.test(name);
 }
-export interface ContextRatioMetric {
+interface ContextRatioMetric {
   /** Session-wide gathering fraction: gatheringCalls / totalToolCalls. The headline metric. */
   ratio: number;
   gatheringCalls: number;
@@ -49,7 +49,7 @@ export interface ContextRatioMetric {
   turns: number;
 }
-export interface SessionOutcomeProxy {
+interface SessionOutcomeProxy {
   /** Real user prompts in the session — more back-and-forth ~ more clarification/correction. */
   userPrompts: number;
   /** tool_result blocks flagged is_error — failures/workarounds the agent hit. */

package/src/session-scratch.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import { execFileSync } from "node:child_process";
 import { accessSync, appendFileSync, constants, mkdirSync, readdirSync, readFileSync, rmSync, statSync } from "node:fs";
 import { dirname, isAbsolute, join, resolve } from "node:path";
-export const SCRATCH_DIR_NAME = ".agent-relay";
+const SCRATCH_DIR_NAME = ".agent-relay";
 // The local-ignore entry. Leading + trailing slash scopes it to the dir at the
 // base, matching git's gitignore semantics.
 const EXCLUDE_ENTRY = "/.agent-relay/";
@@ -16,7 +16,7 @@ export interface SessionScratchLayout {
   replyFile: string; // <tmp>/reply.md
 }
-export interface SessionScratchTarget {
+interface SessionScratchTarget {
   agentId: string;
   cwd: string;
   // Orchestrator base dir, used only when cwd is not writable. NEVER home — a
@@ -44,7 +44,7 @@ export function resolveScratchBase(cwd: string, fallbackBaseDir?: string): strin
   return cwd;
 }
-export function sessionScratchLayout(baseDir: string, agentId: string): SessionScratchLayout {
+function sessionScratchLayout(baseDir: string, agentId: string): SessionScratchLayout {
   const rootDir = join(baseDir, SCRATCH_DIR_NAME);
   const sessionsDir = join(rootDir, "sessions");
   const sessionDir = join(sessionsDir, agentId);
@@ -131,7 +131,7 @@ export function reapSessionScratch(target: SessionScratchTarget): void {
   }
 }
-export interface SweepOptions {
+interface SweepOptions {
   cwd: string;
   fallbackBaseDir?: string;
   // Agent ids to keep (currently-known agents + self). Any session dir whose id