agent-relay-runner 0.27.2 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
- "version": "0.27.2",
3
+ "version": "0.29.0",
4
4
  "description": "Unified provider lifecycle runner for Agent Relay",
5
5
  "type": "module",
6
6
  "bin": {
@@ -20,7 +20,7 @@
20
20
  "directory": "runner"
21
21
  },
22
22
  "dependencies": {
23
- "agent-relay-sdk": "0.2.16"
23
+ "agent-relay-sdk": "0.2.18"
24
24
  },
25
25
  "devDependencies": {
26
26
  "@types/bun": "latest",
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
3
  "description": "Thin Agent Relay runner bridge for Claude Code",
4
- "version": "0.27.2",
4
+ "version": "0.29.0",
5
5
  "agentRelayContracts": {
6
6
  "providerPluginProtocol": 1
7
7
  }
package/src/adapter.ts CHANGED
@@ -211,9 +211,19 @@ function isPersistedRelayMessage(message: Message): boolean {
211
211
  return Number.isSafeInteger(message.id) && message.id > 0;
212
212
  }
213
213
 
214
+ // #283 — one-line nudge that replaces the reply-scaffold footer for notification-class
215
+ // (replyExpected:false) messages. Deliberately tiny so a bloated context can't drown the
216
+ // no-reply rule established at session start. Shared with the Claude delivery path.
217
+ export const NOTIFICATION_NUDGE = "↪ Notification — no reply needed.";
218
+
219
+ // A notification is a persisted message the server marked replyExpected:false.
220
+ export function isNotificationMessage(message: Message): boolean {
221
+ return isPersistedRelayMessage(message) && message.replyExpected === false;
222
+ }
223
+
214
224
  function latestReplyableMessage(messages: Message[]): Message | undefined {
215
225
  return messages
216
- .filter((message) => isPersistedRelayMessage(message) && !isMemoryInjection(message) && !isReactionNotification(message))
226
+ .filter((message) => isPersistedRelayMessage(message) && !isMemoryInjection(message) && !isReactionNotification(message) && message.replyExpected !== false)
217
227
  .at(-1);
218
228
  }
219
229
 
@@ -316,6 +326,9 @@ export function providerMessageText(messages: Message[]): string {
316
326
  "If you already delivered the useful response through Relay, do not send a separate status-only confirmation.",
317
327
  "If multiple messages arrived together, cover them in one reply instead of answering each line separately.",
318
328
  ].join("\n"));
329
+ } else if (messages.some(isNotificationMessage)) {
330
+ // #283 — pure notification batch: no scaffold, just the one-line no-reply nudge.
331
+ sections.push(NOTIFICATION_NUDGE);
319
332
  }
320
333
  return sections.join("\n\n");
321
334
  }
@@ -1,6 +1,6 @@
1
1
  import type { Message } from "agent-relay-sdk";
2
2
  import { isRecord } from "agent-relay-sdk";
3
- import { providerAttachmentText } from "../adapter";
3
+ import { isNotificationMessage, NOTIFICATION_NUDGE, providerAttachmentText } from "../adapter";
4
4
 
5
5
  const PROVIDER_MESSAGE_BODY_PREVIEW_CHARS = 4000;
6
6
  const REMINDER_EVERY_DELIVERIES = 5;
@@ -61,7 +61,7 @@ function shouldShowReplyReminder(deliveryCount: number): boolean {
61
61
 
62
62
  function latestReplyableMessage(messages: Message[]): Message | undefined {
63
63
  return messages
64
- .filter((message) => isPersistedRelayMessage(message) && !isMemoryInjection(message) && !isReactionNotification(message))
64
+ .filter((message) => isPersistedRelayMessage(message) && !isMemoryInjection(message) && !isReactionNotification(message) && message.replyExpected !== false)
65
65
  .at(-1);
66
66
  }
67
67
 
@@ -121,9 +121,13 @@ export function claudeProviderMessageText(messages: Message[], options: ClaudeDe
121
121
  const relaySurface = options.relaySurface !== false;
122
122
  const sections = messages.map((message) => formatMessage(message, relaySurface));
123
123
  const replyable = latestReplyableMessage(messages);
124
- // Isolated agents have no way to reply through Relay — never append the reminder.
124
+ // Isolated agents have no way to reply through Relay — never append the reminder/nudge.
125
125
  if (relaySurface && replyable && shouldShowReplyReminder(options.deliveryCount)) {
126
126
  sections.push(replyReminder(replyable, options.readOnly === true));
127
+ } else if (relaySurface && !replyable && messages.some(isNotificationMessage)) {
128
+ // #283 — pure notification batch (no message wants a reply): drop the scaffold, append the
129
+ // one-line nudge so a long context can't make the agent forget the session-start no-reply rule.
130
+ sections.push(NOTIFICATION_NUDGE);
127
131
  }
128
132
  return sections.join("\n\n");
129
133
  }
@@ -21,8 +21,8 @@ interface TranscriptBlock {
21
21
  is_error?: boolean;
22
22
  }
23
23
 
24
- export interface TurnStep {
25
- type: "reasoning" | "tool";
24
+ interface TurnStep {
25
+ type: "narration" | "reasoning" | "tool";
26
26
  text: string;
27
27
  label?: string;
28
28
  }
@@ -36,6 +36,16 @@ interface TranscriptMessage {
36
36
  interface TranscriptEntry {
37
37
  type?: string;
38
38
  message?: TranscriptMessage;
39
+ // Claude Code stamps every transcript entry with `isSidechain`: true for
40
+ // entries belonging to a Task (subagent) run, false for the root session.
41
+ // Current CC writes sidechains to a separate subagents/*.jsonl so they don't
42
+ // reach the root transcript the runner tails — but older CC inlined them, and
43
+ // the behavior can revert, so the chat-mirror parsers below defensively skip
44
+ // sidechain entries to keep a subagent's reasoning/tools/responses from
45
+ // leaking into the parent agent's chat. Insights parsers (collectClaudeSession-
46
+ // Events/countSubstantiveTurns) intentionally do NOT filter — changing them
47
+ // would shift the #184/#185 baselines, a separate concern.
48
+ isSidechain?: boolean;
39
49
  }
40
50
 
41
51
  function blocks(message: TranscriptMessage | undefined): TranscriptBlock[] {
@@ -43,6 +53,11 @@ function blocks(message: TranscriptMessage | undefined): TranscriptBlock[] {
43
53
  return message.content.filter((b): b is TranscriptBlock => Boolean(b) && typeof b === "object");
44
54
  }
45
55
 
56
+ /** True for a subagent (Task) transcript entry — see the note on TranscriptEntry.isSidechain. */
57
+ function isSidechainEntry(entry: TranscriptEntry): boolean {
58
+ return entry.isSidechain === true;
59
+ }
60
+
46
61
  function isRealUserPrompt(entry: TranscriptEntry): boolean {
47
62
  if (entry.type !== "user") return false;
48
63
  const content = entry.message?.content;
@@ -75,6 +90,7 @@ export function transcriptLooksComplete(jsonl: string): boolean {
75
90
  if (!trimmed) continue;
76
91
  try {
77
92
  const entry = JSON.parse(trimmed) as TranscriptEntry;
93
+ if (isSidechainEntry(entry)) continue;
78
94
  if (entry.type === "assistant") lastAssistantStopReason = entry.message?.stop_reason;
79
95
  } catch {
80
96
  continue;
@@ -99,6 +115,7 @@ export function extractLastAssistantTurn(jsonl: string): string {
99
115
  } catch {
100
116
  continue;
101
117
  }
118
+ if (isSidechainEntry(entry)) continue;
102
119
  if (isRealUserPrompt(entry)) {
103
120
  collected = [];
104
121
  continue;
@@ -128,6 +145,7 @@ export function extractFinalAssistantMessage(jsonl: string): string {
128
145
  } catch {
129
146
  continue;
130
147
  }
148
+ if (isSidechainEntry(entry)) continue;
131
149
  if (isRealUserPrompt(entry)) {
132
150
  pastLastUserPrompt = true;
133
151
  lastText = "";
@@ -147,10 +165,12 @@ export function extractFinalAssistantMessage(jsonl: string): string {
147
165
  * Thinking and tool_use blocks are dropped, matching extractLastAssistantTurn.
148
166
  */
149
167
  /**
150
- * Extract the ordered reasoning and tool steps for the most recent turn (since
151
- * the last real user prompt). Used by the reasoning tailer to stream discreet
152
- * progress into chat while a turn is in flight. Returns steps in transcript order
153
- * so the tailer can emit only the ones it hasn't seen yet by index.
168
+ * Extract the ordered narration, reasoning, and tool steps for the most recent
169
+ * turn (since the last real user prompt). Used by the reasoning tailer to stream
170
+ * progress into chat while a turn is in flight. `narration` is the assistant's
171
+ * intermediate `text` between tool calls (the terminal's `●` lines); it is the
172
+ * primary, default-visible turn content. Returns steps in transcript order so the
173
+ * tailer can emit only the ones it hasn't seen yet.
154
174
  */
155
175
  export function extractLatestTurnSteps(jsonl: string): TurnStep[] {
156
176
  const lines = jsonl.split("\n");
@@ -164,13 +184,16 @@ export function extractLatestTurnSteps(jsonl: string): TurnStep[] {
164
184
  } catch {
165
185
  continue;
166
186
  }
187
+ if (isSidechainEntry(entry)) continue;
167
188
  if (isRealUserPrompt(entry)) {
168
189
  steps = [];
169
190
  continue;
170
191
  }
171
192
  if (entry.type !== "assistant") continue;
172
193
  for (const b of blocks(entry.message)) {
173
- if (b.type === "thinking" && typeof b.thinking === "string" && b.thinking.trim()) {
194
+ if (b.type === "text" && typeof b.text === "string" && b.text.trim()) {
195
+ steps.push({ type: "narration", text: b.text.trim() });
196
+ } else if (b.type === "thinking" && typeof b.thinking === "string" && b.thinking.trim()) {
174
197
  steps.push({ type: "reasoning", text: b.thinking.trim() });
175
198
  } else if (b.type === "tool_use" && typeof b.name === "string" && b.name) {
176
199
  steps.push({ type: "tool", label: b.name, text: summarizeToolUse(b.name, b.input) });
@@ -180,6 +203,25 @@ export function extractLatestTurnSteps(jsonl: string): TurnStep[] {
180
203
  return steps;
181
204
  }
182
205
 
206
+ /**
207
+ * Stable dedup keys for a turn's steps, in order. Each key is salted with how many
208
+ * identical (type,label,text) steps preceded it in the same window — so running the
209
+ * same tool twice with identical input within a turn yields two distinct keys and
210
+ * both show in the activity trace (#265). Keying on occurrence-within-window rather
211
+ * than raw transcript index keeps the reasoning tailer idempotent when the "latest
212
+ * turn" window shrinks/resets mid-poll: a surviving step recomputes to the same or a
213
+ * lower occurrence, so an already-emitted step never re-fires.
214
+ */
215
+ export function stepDedupKeys(steps: TurnStep[]): string[] {
216
+ const counts = new Map<string, number>();
217
+ return steps.map((step) => {
218
+ const base = JSON.stringify([step.type, step.label ?? "", step.text]);
219
+ const occ = counts.get(base) ?? 0;
220
+ counts.set(base, occ + 1);
221
+ return JSON.stringify([step.type, step.label ?? "", step.text, occ]);
222
+ });
223
+ }
224
+
183
225
  /** Compact one-line summary of a tool invocation for the discreet activity row. */
184
226
  export function summarizeToolUse(name: string, input: Record<string, unknown> | undefined): string {
185
227
  const str = (key: string): string | undefined => (input && typeof input[key] === "string" ? (input[key] as string) : undefined);
@@ -453,6 +453,21 @@ function captureTmuxPane(sessionName: string, socketName?: string): string {
453
453
  return result.stdout.toString();
454
454
  }
455
455
 
456
+ // ⚠ FRAGILE PANE HEURISTICS — both functions below string-match Claude Code's TUI
457
+ // chrome against captured tmux scrollback (~80 lines), so they break whenever CC
458
+ // restyles its footer/banner. They are deliberately substring/regex based because
459
+ // there's no machine-readable ready/busy signal from the TUI. Known break conditions,
460
+ // so the next CC restyle is a fast fix rather than a hunt:
461
+ // readiness (claudePaneLooksReady) breaks if CC renames/removes ALL of: the
462
+ // "bypass permissions" / "shift+tab to cycle" / "? for shortcuts" footer hints,
463
+ // the "/effort" hint, or the "Welcome back" / "Claude Code" banner.
464
+ // busy (claudePaneIsBusy) breaks if CC drops the live "… (<elapsed>" spinner counter
465
+ // (the cross-version anchor; the "esc to interrupt" hint was already dropped in 2.1.x).
466
+ // FALSE POSITIVES: agent output that literally QUOTES any of these strings (e.g. a
467
+ // transcript discussing "esc to interrupt", or this very comment shown in a pane)
468
+ // reads as ready/busy. Tolerated because the markers are CC-specific enough to be
469
+ // rare in real output; if it bites, gate on the LAST N lines only (the live footer).
470
+ // History: 18067b5 (busy counter), and the readiness footer-vs-banner fix below.
456
471
  export function claudePaneLooksReady(text: string): boolean {
457
472
  // Claude's startup banner ("Claude Code" / "Welcome back") scrolls off the pane once the
458
473
  // conversation fills it, so a mid-session delivery (e.g. the budget warning, minutes into
@@ -74,6 +74,8 @@ interface ThreadLoadedListResponse {
74
74
  nextCursor: string | null;
75
75
  }
76
76
 
77
+ export const CODEX_APP_CLIENT_EVENT_CAP = 5_000;
78
+
77
79
  export class CodexAppClient {
78
80
  private ws!: WebSocket;
79
81
  private nextId = 1;
@@ -256,6 +258,9 @@ export class CodexAppClient {
256
258
 
257
259
  private record(event: ClientEvent): void {
258
260
  this.events.push(event);
261
+ if (this.events.length > CODEX_APP_CLIENT_EVENT_CAP) {
262
+ this.events.splice(0, this.events.length - CODEX_APP_CLIENT_EVENT_CAP);
263
+ }
259
264
  for (const listener of this.listeners) listener(event);
260
265
  }
261
266
 
@@ -41,6 +41,7 @@ export class CodexAdapter implements ProviderAdapter {
41
41
  // flushed as one session response on turn/completed (mirrors Claude's chatCaptureMode).
42
42
  private turnMessages: string[] = [];
43
43
  private readonly itemTextBuffers = new Map<string, string>();
44
+ private readonly itemTextBufferTypes = new Map<string, string>();
44
45
  private captureMode: "final" | "full" = "final";
45
46
  // #183/#184: the normalized session-event log for the current process lifetime, fed
46
47
  // from the same completed-item stream that drives the chat mirror. The runner slices
@@ -58,6 +59,20 @@ export class CodexAdapter implements ProviderAdapter {
58
59
  this.sessionEventCb = cb;
59
60
  }
60
61
 
62
+ private resetProcessState(): void {
63
+ this.resetThreadState();
64
+ this.sessionEvents = []; // fresh process -> fresh segment cursor (#184)
65
+ }
66
+
67
+ private resetThreadState(): void {
68
+ this.subagentThreads.clear();
69
+ this.pendingApprovals.clear();
70
+ this.activeTurnId = undefined;
71
+ this.turnMessages = [];
72
+ this.itemTextBuffers.clear();
73
+ this.itemTextBufferTypes.clear();
74
+ }
75
+
61
76
  async interrupt(process: ManagedProcess): Promise<Record<string, unknown>> {
62
77
  const client = process.meta?.client as CodexAppClient | undefined;
63
78
  if (!client) throw new Error("Codex App Server client is unavailable");
@@ -68,11 +83,33 @@ export class CodexAdapter implements ProviderAdapter {
68
83
  return { method: "turn-interrupt", turnId: this.activeTurnId };
69
84
  }
70
85
 
71
- // Codex streams thread/status continuously, so the runner's claim state never
72
- // goes stale the way Claude's can after an out-of-band interrupt. No cheap probe
73
- // is needed — defer to the live status stream.
74
- async probeActivity(): Promise<"busy" | "idle" | "unknown"> {
75
- return "unknown";
86
+ async probeActivity(process: ManagedProcess): Promise<"busy" | "idle" | "unknown"> {
87
+ const client = process.meta?.client as CodexAppClient | undefined;
88
+ if (!client?.isConnected()) return "unknown";
89
+ const threadId = typeof process.meta?.threadId === "string" ? process.meta.threadId : "";
90
+ if (!this.activeTurnId) return "idle";
91
+ if (!threadId) return "busy";
92
+ try {
93
+ const read = await client.threadRead(threadId, true);
94
+ const thread = isRecord(read.thread) ? read.thread : undefined;
95
+ const turns = Array.isArray(thread?.turns) ? thread.turns : [];
96
+ const activeTurn = turns.find((turn) => isRecord(turn) && stringValue(turn.id) === this.activeTurnId);
97
+ const turnStatus = isRecord(activeTurn) ? stringValue(activeTurn.status) : undefined;
98
+ if (turnStatus === "inProgress") return "busy";
99
+ if (turnStatus === "completed" || turnStatus === "interrupted" || turnStatus === "failed") {
100
+ this.finishMainTurn();
101
+ return "idle";
102
+ }
103
+ const threadStatus = statusType(thread?.status);
104
+ if (threadStatus === "active") return "busy";
105
+ if (threadStatus === "idle" || threadStatus === "notLoaded" || threadStatus === "systemError") {
106
+ this.finishMainTurn();
107
+ return "idle";
108
+ }
109
+ } catch {
110
+ return "unknown";
111
+ }
112
+ return "busy";
76
113
  }
77
114
 
78
115
  // The Codex app-server is headless and has no tmux session, but an unexpected
@@ -82,8 +119,8 @@ export class CodexAdapter implements ProviderAdapter {
82
119
  }
83
120
 
84
121
  async spawn(config: RunnerSpawnConfig): Promise<ManagedProcess> {
122
+ this.resetProcessState();
85
123
  this.captureMode = (config.providerConfig as ProviderConfig).chatCaptureMode ?? "final";
86
- this.sessionEvents = []; // fresh process → fresh segment cursor (#184)
87
124
  const args = this.buildSpawnArgs(config, config.providerConfig as ProviderConfig);
88
125
  const appServer = Bun.spawn([args.command, ...args.args], {
89
126
  cwd: args.cwd,
@@ -150,7 +187,7 @@ export class CodexAdapter implements ProviderAdapter {
150
187
  if (!client) throw new Error("Codex App Server client is unavailable");
151
188
  const threadId = typeof process.meta?.threadId === "string" ? process.meta.threadId : "";
152
189
  if (!threadId) throw new Error("Codex thread is not ready");
153
- await client.threadCompactStart(threadId);
190
+ this.statusCb({ status: "busy", reason: "provider-turn", timeline: { status: "compacting", timestamp: Date.now() } });
154
191
  const currentContext = isContextState(process.meta?.context) ? process.meta.context : undefined;
155
192
  if (currentContext) {
156
193
  process.meta = {
@@ -158,6 +195,27 @@ export class CodexAdapter implements ProviderAdapter {
158
195
  context: { ...currentContext, lifecycleState: "compacting", lastUpdatedAt: Date.now() },
159
196
  };
160
197
  }
198
+ try {
199
+ await client.threadCompactStart(threadId);
200
+ } catch (error) {
201
+ this.statusCb({ status: "idle", reason: "provider-turn" });
202
+ throw error;
203
+ }
204
+ const compactedAt = Date.now();
205
+ const compactingContext = isContextState(process.meta?.context) ? process.meta.context : currentContext;
206
+ if (compactingContext) {
207
+ process.meta = {
208
+ ...(process.meta ?? {}),
209
+ context: {
210
+ ...compactingContext,
211
+ lifecycleState: "cooling",
212
+ tasksSinceCompact: 0,
213
+ lastCompactedAt: compactedAt,
214
+ lastUpdatedAt: compactedAt,
215
+ },
216
+ };
217
+ }
218
+ this.statusCb({ status: "idle", reason: "provider-turn", timeline: { status: "compacted", timestamp: compactedAt } });
161
219
  return { threadId };
162
220
  }
163
221
 
@@ -165,7 +223,16 @@ export class CodexAdapter implements ProviderAdapter {
165
223
  const client = process.meta?.client as CodexAppClient | undefined;
166
224
  if (!client) throw new Error("Codex App Server client is unavailable");
167
225
  const previousThreadId = typeof process.meta?.threadId === "string" ? process.meta.threadId : undefined;
168
- const started = await client.threadStart({ cwd: typeof process.meta?.cwd === "string" ? process.meta.cwd : globalThis.process.cwd() });
226
+ this.statusCb({ status: "busy", reason: "provider-turn", timeline: { status: "clearing-context", timestamp: Date.now() } });
227
+ let started: Awaited<ReturnType<CodexAppClient["threadStart"]>>;
228
+ try {
229
+ started = await client.threadStart({ cwd: typeof process.meta?.cwd === "string" ? process.meta.cwd : globalThis.process.cwd() });
230
+ } catch (error) {
231
+ this.statusCb({ status: "idle", reason: "provider-turn" });
232
+ throw error;
233
+ }
234
+ const clearedAt = Date.now();
235
+ this.resetThreadState();
169
236
  process.meta = {
170
237
  ...(process.meta ?? {}),
171
238
  threadId: started.thread.id,
@@ -176,11 +243,13 @@ export class CodexAdapter implements ProviderAdapter {
176
243
  warmTopics: [],
177
244
  activeMemories: [],
178
245
  tasksSinceCompact: 0,
179
- lastUpdatedAt: Date.now(),
246
+ lastCompactedAt: clearedAt,
247
+ lastUpdatedAt: clearedAt,
180
248
  source: "api",
181
249
  confidence: "reported",
182
250
  } satisfies ContextState,
183
251
  };
252
+ this.statusCb({ status: "idle", reason: "provider-turn", clear: ["subagent"], timeline: { status: "context-cleared", timestamp: clearedAt } });
184
253
  return { previousThreadId, threadId: started.thread.id };
185
254
  }
186
255
 
@@ -350,28 +419,25 @@ export class CodexAdapter implements ProviderAdapter {
350
419
  this.statusCb({ status: "busy", reason: "provider-turn", id: this.activeTurnId });
351
420
  }
352
421
  }
353
- if (method.includes("turn/completed") || method.includes("turn.completed")) {
422
+ if (method.includes("turn/completed") || method.includes("turn.completed") || method.includes("turn/failed") || method.includes("turn.failed") || method.includes("turn/interrupted") || method.includes("turn.interrupted")) {
354
423
  if (threadId && this.subagentThreads.has(threadId)) {
355
424
  this.statusCb({ status: "idle", reason: "subagent", id: threadId, ...this.subagentThreads.get(threadId) });
356
425
  } else {
357
- this.flushTurnResponse();
358
- const completedTurnId = this.activeTurnId;
359
- this.activeTurnId = undefined;
360
- this.statusCb({ status: "idle", reason: "provider-turn", id: completedTurnId });
426
+ this.finishMainTurn();
361
427
  }
362
428
  }
363
429
  if ((method.includes("item/completed") || method.includes("item.completed")) && !isSubagent) {
364
430
  this.handleCodexItem(isRecord(params?.item) ? params.item : undefined);
365
431
  }
366
432
  if (!isSubagent) this.handleCodexItemDelta(method, params);
367
- if (method.includes("thread/status")) {
433
+ if (method.includes("thread/status") || method.includes("thread.status")) {
368
434
  const status = statusType(params?.status);
369
435
  if (threadId && this.subagentThreads.has(threadId)) {
370
436
  if (status === "active") this.statusCb({ status: "busy", reason: "subagent", id: threadId, ...this.subagentThreads.get(threadId) });
371
437
  if (status === "idle" || status === "notLoaded") this.statusCb({ status: "idle", reason: "subagent", id: threadId, ...this.subagentThreads.get(threadId) });
372
438
  } else {
373
439
  if (status === "active") this.statusCb({ status: "busy", reason: "provider-turn", providerState: this.providerStateFromThreadStatus(params?.status, params) });
374
- if (status === "idle") this.statusCb({ status: "idle", reason: "provider-turn" });
440
+ if (status === "idle" || status === "notLoaded" || status === "systemError") this.finishMainTurn();
375
441
  }
376
442
  }
377
443
  }
@@ -391,6 +457,7 @@ export class CodexAdapter implements ProviderAdapter {
391
457
  this.recordInsightEvent({ type: "turn" }); // a substantive assistant turn
392
458
  }
393
459
  if (itemId) this.itemTextBuffers.delete(itemId);
460
+ if (itemId) this.itemTextBufferTypes.delete(itemId);
394
461
  return;
395
462
  }
396
463
  if (type === "userMessage") {
@@ -406,6 +473,7 @@ export class CodexAdapter implements ProviderAdapter {
406
473
  const text = (codexReasoningText(item) || buffered || "").trim();
407
474
  if (text) this.sessionEventCb({ type: "reasoning", origin: "provider", body: text, ...(turnId ? { turnId } : {}) });
408
475
  if (itemId) this.itemTextBuffers.delete(itemId);
476
+ if (itemId) this.itemTextBufferTypes.delete(itemId);
409
477
  return;
410
478
  }
411
479
  const tool = codexToolSummary(type, item);
@@ -415,6 +483,7 @@ export class CodexAdapter implements ProviderAdapter {
415
483
  this.sessionEventCb({ type: "tool", origin: "provider", body: tool.body, label: tool.label, status: "completed", ...(turnId ? { turnId } : {}) });
416
484
  }
417
485
  if (itemId) this.itemTextBuffers.delete(itemId);
486
+ if (itemId) this.itemTextBufferTypes.delete(itemId);
418
487
  }
419
488
 
420
489
  // #183/#184: append to the session-event log with a soft cap. On overflow we drop the
@@ -449,7 +518,10 @@ export class CodexAdapter implements ProviderAdapter {
449
518
 
450
519
  if (type === "agentMessage" || type === "reasoning" || type === "plan") {
451
520
  const delta = codexDeltaText(params);
452
- if (delta && itemId) this.itemTextBuffers.set(itemId, `${this.itemTextBuffers.get(itemId) ?? ""}${delta}`);
521
+ if (delta && itemId) {
522
+ this.itemTextBuffers.set(itemId, `${this.itemTextBuffers.get(itemId) ?? ""}${delta}`);
523
+ if (type) this.itemTextBufferTypes.set(itemId, type);
524
+ }
453
525
  return;
454
526
  }
455
527
 
@@ -459,13 +531,28 @@ export class CodexAdapter implements ProviderAdapter {
459
531
  }
460
532
 
461
533
  private flushTurnResponse(): void {
462
- if (!this.turnMessages.length) return;
463
- const joined = this.captureMode === "full" ? this.turnMessages.join("\n\n") : this.turnMessages[this.turnMessages.length - 1]!;
534
+ const pendingAgentMessages = [...this.itemTextBuffers.entries()]
535
+ .filter(([itemId]) => this.itemTextBufferTypes.get(itemId) === "agentMessage")
536
+ .map(([, text]) => text.trim())
537
+ .filter(Boolean);
538
+ const messages = [...this.turnMessages, ...pendingAgentMessages];
539
+ if (!messages.length) return;
540
+ const joined = this.captureMode === "full" ? messages.join("\n\n") : messages[messages.length - 1]!;
464
541
  this.turnMessages = [];
465
542
  const text = joined.trim();
466
543
  if (text) this.sessionEventCb({ type: "response", origin: "provider", body: text, ...(this.activeTurnId ? { turnId: this.activeTurnId } : {}) });
467
544
  }
468
545
 
546
+ private finishMainTurn(): void {
547
+ this.flushTurnResponse();
548
+ const turnId = this.activeTurnId;
549
+ this.activeTurnId = undefined;
550
+ this.pendingApprovals.clear();
551
+ this.itemTextBuffers.clear();
552
+ this.itemTextBufferTypes.clear();
553
+ this.statusCb({ status: "idle", reason: "provider-turn", id: turnId });
554
+ }
555
+
469
556
  private providerStateFromThreadStatus(status: unknown, params?: Record<string, unknown>): Record<string, unknown> | undefined {
470
557
  const state = codexProviderStateFromThreadStatus(status, params);
471
558
  if (state?.state !== "blocked" || state.reason !== "waitingOnApproval" || state.pendingApproval) return state;
@@ -7,11 +7,11 @@ import { sanitizeFsName } from "agent-relay-sdk/fs-name";
7
7
 
8
8
  const DEFAULT_CACHE_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000;
9
9
 
10
- export interface AttachmentCacheClient {
10
+ interface AttachmentCacheClient {
11
11
  downloadArtifact(id: string): Promise<{ stream: ReadableStream<Uint8Array>; meta: Artifact }>;
12
12
  }
13
13
 
14
- export interface AttachmentCacheOptions {
14
+ interface AttachmentCacheOptions {
15
15
  agentId: string;
16
16
  rootDir?: string;
17
17
  maxAgeMs?: number;
@@ -34,7 +34,7 @@ function attachmentRefs(message: Message): Record<string, unknown>[] {
34
34
  return refs.filter(isRecord);
35
35
  }
36
36
 
37
- export function attachmentCacheRoot(agentId: string, rootDir = process.env.AGENT_RELAY_ATTACHMENT_CACHE_DIR): string {
37
+ function attachmentCacheRoot(agentId: string, rootDir = process.env.AGENT_RELAY_ATTACHMENT_CACHE_DIR): string {
38
38
  return join(attachmentCacheBase(rootDir), safePathPart(agentId));
39
39
  }
40
40
 
@@ -237,7 +237,7 @@ async function handlePermissionRequest(
237
237
  return Response.json(claudePermissionHookResponse(decision, body));
238
238
  }
239
239
 
240
- export function claudePermissionApprovalView(id: string, body: Record<string, unknown>): Record<string, unknown> {
240
+ function claudePermissionApprovalView(id: string, body: Record<string, unknown>): Record<string, unknown> {
241
241
  const toolName = typeof body.tool_name === "string" ? body.tool_name : "Tool";
242
242
  const toolInput = isRecord(body.tool_input) ? body.tool_input : {};
243
243
  // AskUserQuestion is not a yes/no gate — it asks the user to pick answers.
@@ -299,7 +299,7 @@ export function claudePermissionApprovalView(id: string, body: Record<string, un
299
299
  };
300
300
  }
301
301
 
302
- export function claudePermissionHookResponse(decision: ProviderPermissionDecisionInput, body: Record<string, unknown>): Record<string, unknown> {
302
+ function claudePermissionHookResponse(decision: ProviderPermissionDecisionInput, body: Record<string, unknown>): Record<string, unknown> {
303
303
  // AskUserQuestion comes through a PreToolUse hook. The only way to satisfy it
304
304
  // headlessly is permissionDecision "allow" + updatedInput carrying the answers
305
305
  // (echoing back the original questions). A bare "allow" is not sufficient, so
package/src/logger.ts CHANGED
@@ -16,7 +16,7 @@ import { sanitizeFsName } from "agent-relay-sdk/fs-name";
16
16
  // flipped at runtime via the control port (no restart) — so a phase refactor can
17
17
  // be watched at debug without bouncing the agent.
18
18
 
19
- export type LogLevel = "debug" | "info" | "warn" | "error" | "fatal";
19
+ type LogLevel = "debug" | "info" | "warn" | "error" | "fatal";
20
20
 
21
21
  const ORDER: Record<LogLevel, number> = { debug: 10, info: 20, warn: 30, error: 40, fatal: 50 };
22
22
  export const LOG_LEVELS = Object.keys(ORDER) as LogLevel[];
@@ -33,7 +33,7 @@ function safeLogName(value: string): string {
33
33
  return sanitizeFsName(value, { replacement: "_", maxLen: 180 });
34
34
  }
35
35
 
36
- export interface LoggerConfig {
36
+ interface LoggerConfig {
37
37
  agentId?: string;
38
38
  level?: LogLevel;
39
39
  headless?: boolean;
package/src/outbox.ts CHANGED
@@ -21,9 +21,9 @@ import { logger } from "./logger";
21
21
  // last-wins and self-heals on reconnect (so it already satisfies "coalesce, don't replay
22
22
  // stale busyes"). The coalesce mode below exists so a future state event could migrate here.
23
23
 
24
- export type OutboxMode = "append" | "coalesce";
24
+ type OutboxMode = "append" | "coalesce";
25
25
 
26
- export interface OutboxEventInput {
26
+ interface OutboxEventInput {
27
27
  kind: string;
28
28
  payload: unknown;
29
29
  mode?: OutboxMode;
@@ -46,9 +46,9 @@ export interface OutboxRecord {
46
46
  }
47
47
 
48
48
  // The transport. Resolve = delivered (row deleted). Reject = failed (retried with backoff).
49
- export type OutboxSend = (record: OutboxRecord) => Promise<void>;
49
+ type OutboxSend = (record: OutboxRecord) => Promise<void>;
50
50
 
51
- export interface OutboxOptions {
51
+ interface OutboxOptions {
52
52
  agentId: string;
53
53
  send: OutboxSend;
54
54
  // Storage directory. Defaults to AGENT_RELAY_RUNNER_OUTBOX_DIR, else a per-host temp dir.
@@ -44,7 +44,7 @@ const CLAUDE_AUTH_ITEMS = [".credentials.json", "statsig"];
44
44
  // Shared skeleton for both providers: gate on isolated-profile, make the
45
45
  // instance-keyed home, run the provider-specific first-run bootstrap. The
46
46
  // bootstrap step is the only genuinely provider-specific part.
47
- export function prepareProviderHome(provider: "claude" | "codex", config: RunnerSpawnConfig): ProviderHome | undefined {
47
+ function prepareProviderHome(provider: "claude" | "codex", config: RunnerSpawnConfig): ProviderHome | undefined {
48
48
  if (!profileRequiresIsolatedHome(config)) return undefined;
49
49
  const target = providerHomePath(provider, config);
50
50
  mkdirSync(target, { recursive: true });
@@ -8,6 +8,7 @@ export const CLAUDE_RELAY_MANUAL = `# Agent Relay
8
8
  - If multiple Relay messages arrive together, answer once to the latest relevant message and cover the current request. Do not separately acknowledge stale greetings or context.
9
9
  - If the useful response was already delivered through Relay, do not send an extra "sent", "done", or "drafts sent" confirmation unless the user explicitly asked for one.
10
10
  - No reply is needed for pure info messages, passive acknowledgements, or reactions that do not ask for action.
11
+ - NEVER reply to a notification-class message. The server marks these and renders a single \`↪ Notification — no reply needed.\` line instead of the reply reminder — it is a fire-and-forget signal (a merge notice, lifecycle event, or FYI). Act on the information if relevant, but do not send any reply, status confirmation, or reaction back.
11
12
  - Use \`agent-relay /react <messageId> <emoji>\` instead of a text reply for lightweight acknowledgement, approval, thanks, or "good job" after a completed work update.
12
13
  - Good reaction uses: acknowledge praise with 👍 or ❤️, mark a completed handoff as seen, approve a proposed next step, or acknowledge a passive FYI.
13
14
  - Do not use reactions when the user asked a question, gave a new task, reported a bug, or needs a textual result.
@@ -40,7 +40,7 @@ const SSE_KEEPALIVE_MS = 25_000;
40
40
  // The write tools whose loss during a relay outage is unacceptable and whose result the agent
41
41
  // does not need synchronously — safe to queue durably and replay on reconnect. Reads, claims
42
42
  // (409 contention), spawn/shutdown (need a real ack) are deliberately NOT bufferable.
43
- export const DEFAULT_BUFFERABLE_TOOLS = new Set<string>([
43
+ const DEFAULT_BUFFERABLE_TOOLS = new Set<string>([
44
44
  "relay_send_message",
45
45
  "relay_reply",
46
46
  "relay_workspace_ready",
@@ -59,7 +59,7 @@ const WORKTREE_ONLY_TOOLS = new Set<string>([
59
59
  "relay_workspace_land",
60
60
  ]);
61
61
 
62
- export interface ProxyContext {
62
+ interface ProxyContext {
63
63
  // The agent owns a live (non-terminal) isolated git worktree → workspace tools apply.
64
64
  isolatedWorktree: boolean;
65
65
  }
@@ -16,9 +16,9 @@ import { logger } from "./logger";
16
16
  // - A background interval keeps the snapshot warm; `markDirty()` requests an extra,
17
17
  // debounced refresh when state likely just changed (a message arrived, a turn ended).
18
18
 
19
- export type ReplyObligationFetch = () => Promise<ReplyObligation[]>;
19
+ type ReplyObligationFetch = () => Promise<ReplyObligation[]>;
20
20
 
21
- export interface ReplyObligationCacheOptions {
21
+ interface ReplyObligationCacheOptions {
22
22
  fetch: ReplyObligationFetch;
23
23
  // Background freshness backstop. Default 10s — well under any turn cadence, cheap.
24
24
  intervalMs?: number;
package/src/runner.ts CHANGED
@@ -11,7 +11,7 @@ import { ClaimTracker } from "./claim-tracker";
11
11
  import { startControlServer, type ControlServer } from "./control-server";
12
12
  import { ReplyObligationCache } from "./reply-obligation-cache";
13
13
  import { Outbox, type OutboxRecord } from "./outbox";
14
- import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete } from "./adapters/claude-transcript";
14
+ import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, stepDedupKeys, transcriptLooksComplete } from "./adapters/claude-transcript";
15
15
  import { computeContextRatio } from "./session-insights";
16
16
  import { agentProfileProjectionReport } from "./profile-projection";
17
17
  import { profileUsesHostProviderGlobals } from "./profile-home";
@@ -1256,6 +1256,12 @@ export class AgentRunner {
1256
1256
  // the same pre-destroy seam the bus commands use. `clear`/`compact` continue the session;
1257
1257
  // anything else (logout, prompt_input_exit, other) is a real termination.
1258
1258
  private async handleSessionBoundary(input: { reason?: string; transcriptPath?: string }): Promise<void> {
1259
+ // Reason mapping is fail-safe-toward-termination: only the two known session-
1260
+ // CONTINUING reasons are special-cased; everything else (logout, prompt_input_exit,
1261
+ // other, AND any future reason) maps to "shutdown" → full pre-destroy capture.
1262
+ // ⚠ If Claude Code adds a new BENIGN/continuing boundary reason, add it here — until
1263
+ // then it will trigger a (harmless but wasteful) full context capture on a session
1264
+ // that isn't actually ending.
1259
1265
  const reason = input.reason === "compact" ? "compact"
1260
1266
  : input.reason === "clear" ? "clear"
1261
1267
  : "shutdown";
@@ -1481,10 +1487,12 @@ export class AgentRunner {
1481
1487
  }, INTERRUPT_RECONCILE_DELAY_MS);
1482
1488
  }
1483
1489
 
1484
- // --- Reasoning tailer (item 5) ------------------------------------------------------
1485
- // Tail the in-flight turn's Claude transcript and surface new reasoning/tool steps
1486
- // as discreet session events. Coalesced and coarse; the final response still comes
1487
- // through publishSessionTurn.
1490
+ // --- Turn-step tailer (item 5) ------------------------------------------------------
1491
+ // Tail the in-flight turn's Claude transcript and surface new narration/reasoning/tool
1492
+ // steps as session events, in transcript order. `narration` (the agent's intermediate
1493
+ // text) is the primary visible content; reasoning visibility is a client-side toggle.
1494
+ // Coalesced and coarse; the final response still comes through publishSessionTurn.
1495
+ // `reasoningCapture: false` disables the whole live trace (server-side kill switch).
1488
1496
  private startReasoningTail(transcriptPath: string): void {
1489
1497
  if (this.options.providerConfig.reasoningCapture === false) return;
1490
1498
  this.stopReasoningTail();
@@ -1492,6 +1500,8 @@ export class AgentRunner {
1492
1500
  // turn" window in the transcript can shrink/reset (a tool_result entry, a
1493
1501
  // mid-turn user line), and an index cursor would then either re-emit or stall
1494
1502
  // and drop the rest of the turn. A seen-set is idempotent under any reshuffle.
1503
+ // The signature is salted with each step's occurrence-within-window (stepDedupKeys)
1504
+ // so two identical steps in one turn — same tool, same input — both surface (#265).
1495
1505
  const seen = new Set<string>();
1496
1506
  const turnIdAtStart = this.currentTurnId;
1497
1507
  // On the first poll the new prompt usually hasn't landed in the transcript yet,
@@ -1507,16 +1517,16 @@ export class AgentRunner {
1507
1517
  try { jsonl = await readFile(transcriptPath, "utf8"); } catch { return; }
1508
1518
  let steps: ReturnType<typeof extractLatestTurnSteps>;
1509
1519
  try { steps = extractLatestTurnSteps(jsonl); } catch { return; }
1520
+ const keyed = stepDedupKeys(steps).map((sig, i) => ({ sig, step: steps[i]! }));
1510
1521
  if (!seeded) {
1511
1522
  seeded = true;
1512
1523
  if (transcriptLooksComplete(jsonl)) {
1513
- for (const s of steps) seen.add(JSON.stringify([s.type, s.label ?? "", s.text]));
1524
+ for (const { sig } of keyed) seen.add(sig);
1514
1525
  }
1515
1526
  }
1516
1527
  const turnId = this.currentTurnId ?? turnIdAtStart;
1517
1528
  let emitted = 0;
1518
- for (const step of steps) {
1519
- const sig = JSON.stringify([step.type, step.label ?? "", step.text]);
1529
+ for (const { sig, step } of keyed) {
1520
1530
  if (seen.has(sig)) continue;
1521
1531
  seen.add(sig);
1522
1532
  emitted += 1;
@@ -2054,7 +2064,7 @@ export function latestClaudeResumeIdFromText(text: string): string | undefined {
2054
2064
  return latest;
2055
2065
  }
2056
2066
 
2057
- export function latestClaudeResumeIdFromLogFile(path: string): string | undefined {
2067
+ function latestClaudeResumeIdFromLogFile(path: string): string | undefined {
2058
2068
  let fd: number | undefined;
2059
2069
  try {
2060
2070
  const stat = statSync(path);
@@ -37,7 +37,7 @@ export function isGatheringTool(name: string): boolean {
37
37
  return GATHERING_NAME.test(name);
38
38
  }
39
39
 
40
- export interface ContextRatioMetric {
40
+ interface ContextRatioMetric {
41
41
  /** Session-wide gathering fraction: gatheringCalls / totalToolCalls. The headline metric. */
42
42
  ratio: number;
43
43
  gatheringCalls: number;
@@ -49,7 +49,7 @@ export interface ContextRatioMetric {
49
49
  turns: number;
50
50
  }
51
51
 
52
- export interface SessionOutcomeProxy {
52
+ interface SessionOutcomeProxy {
53
53
  /** Real user prompts in the session — more back-and-forth ~ more clarification/correction. */
54
54
  userPrompts: number;
55
55
  /** tool_result blocks flagged is_error — failures/workarounds the agent hit. */
@@ -2,7 +2,7 @@ import { execFileSync } from "node:child_process";
2
2
  import { accessSync, appendFileSync, constants, mkdirSync, readdirSync, readFileSync, rmSync, statSync } from "node:fs";
3
3
  import { dirname, isAbsolute, join, resolve } from "node:path";
4
4
 
5
- export const SCRATCH_DIR_NAME = ".agent-relay";
5
+ const SCRATCH_DIR_NAME = ".agent-relay";
6
6
  // The local-ignore entry. Leading + trailing slash scopes it to the dir at the
7
7
  // base, matching git's gitignore semantics.
8
8
  const EXCLUDE_ENTRY = "/.agent-relay/";
@@ -16,7 +16,7 @@ export interface SessionScratchLayout {
16
16
  replyFile: string; // <tmp>/reply.md
17
17
  }
18
18
 
19
- export interface SessionScratchTarget {
19
+ interface SessionScratchTarget {
20
20
  agentId: string;
21
21
  cwd: string;
22
22
  // Orchestrator base dir, used only when cwd is not writable. NEVER home — a
@@ -44,7 +44,7 @@ export function resolveScratchBase(cwd: string, fallbackBaseDir?: string): strin
44
44
  return cwd;
45
45
  }
46
46
 
47
- export function sessionScratchLayout(baseDir: string, agentId: string): SessionScratchLayout {
47
+ function sessionScratchLayout(baseDir: string, agentId: string): SessionScratchLayout {
48
48
  const rootDir = join(baseDir, SCRATCH_DIR_NAME);
49
49
  const sessionsDir = join(rootDir, "sessions");
50
50
  const sessionDir = join(sessionsDir, agentId);
@@ -131,7 +131,7 @@ export function reapSessionScratch(target: SessionScratchTarget): void {
131
131
  }
132
132
  }
133
133
 
134
- export interface SweepOptions {
134
+ interface SweepOptions {
135
135
  cwd: string;
136
136
  fallbackBaseDir?: string;
137
137
  // Agent ids to keep (currently-known agents + self). Any session dir whose id