agent-relay-runner 0.12.3 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
- "version": "0.12.3",
3
+ "version": "0.13.0",
4
4
  "description": "Unified provider lifecycle runner for Agent Relay",
5
5
  "type": "module",
6
6
  "bin": {
@@ -20,7 +20,7 @@
20
20
  "directory": "runner"
21
21
  },
22
22
  "dependencies": {
23
- "agent-relay-sdk": "0.2.6"
23
+ "agent-relay-sdk": "0.2.7"
24
24
  },
25
25
  "devDependencies": {
26
26
  "@types/bun": "latest",
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
3
  "description": "Thin Agent Relay runner bridge for Claude Code",
4
- "version": "0.12.3",
4
+ "version": "0.13.0",
5
5
  "agentRelayContracts": {
6
6
  "providerPluginProtocol": 1
7
7
  }
@@ -72,6 +72,26 @@ relay_post_user_prompt() {
72
72
  -d "$body" >/dev/null 2>&1 || true
73
73
  }
74
74
 
75
+ relay_post_session_end() {
76
+ # Insights #184: tell the runner the session ended so it can compute the
77
+ # end-of-session context-gathering ratio from the full transcript. Fire-and-forget;
78
+ # the transcript path is optional (the runner falls back to the last path it saw).
79
+ local transcript_path="${1:-}"
80
+ local reason="${2:-}"
81
+ local port="${AGENT_RELAY_RUNNER_PORT:-}"
82
+ [ -z "$port" ] && return 0
83
+ local body="{"
84
+ [ -n "$transcript_path" ] && body="${body}\"transcriptPath\":\"$(relay_json_escape "$transcript_path")\""
85
+ if [ -n "$reason" ]; then
86
+ [ "$body" != "{" ] && body="${body},"
87
+ body="${body}\"reason\":\"$(relay_json_escape "$reason")\""
88
+ fi
89
+ body="${body}}"
90
+ curl -fsS --max-time 3 -X POST "http://127.0.0.1:${port}/session-end" \
91
+ -H 'Content-Type: application/json' \
92
+ -d "$body" >/dev/null 2>&1 || true
93
+ }
94
+
75
95
  relay_pending_reply_stop_decision() {
76
96
  local port="${AGENT_RELAY_RUNNER_PORT:-}"
77
97
  [ -z "$port" ] && return 0
@@ -4,6 +4,7 @@ source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/
4
4
 
5
5
  payload="$(cat || true)"
6
6
  reason="$(relay_json_string_field reason "$payload")"
7
+ transcript_path="$(relay_json_string_field transcript_path "$payload")"
7
8
 
8
9
  case "$reason" in
9
10
  clear)
@@ -14,5 +15,8 @@ case "$reason" in
14
15
  ;;
15
16
  logout|prompt_input_exit|bypass_permissions_disabled|other|*)
16
17
  relay_post_status_clearing_subagents offline
18
+ # Real session termination: capture end-of-session Insights (#184). Order after the
19
+ # status post is arbitrary — the runner reads the transcript file regardless.
20
+ relay_post_session_end "$transcript_path" "$reason"
17
21
  ;;
18
22
  esac
@@ -16,6 +16,7 @@ interface TranscriptBlock {
16
16
  thinking?: string;
17
17
  name?: string;
18
18
  input?: Record<string, unknown>;
19
+ is_error?: boolean;
19
20
  }
20
21
 
21
22
  export interface TurnStep {
@@ -186,6 +187,136 @@ export function summarizeToolUse(name: string, input: Record<string, unknown> |
186
187
  return summary.length > 200 ? `${summary.slice(0, 197)}…` : summary;
187
188
  }
188
189
 
190
+ // --- Insights #184: context-gathering ratio (epic #183, docs/self-improvement.md) ---
191
+ //
192
+ // Computed mechanically from the whole-session transcript at session end — no model
193
+ // involvement, so it costs zero agent tokens and the agent can't game it. The ratio is
194
+ // paired with cheap outcome proxies (user re-prompts, tool errors) so it's never read
195
+ // alone — see the anti-Goodhart constraint in the epic.
196
+
197
+ // Tools that acquire context without changing anything. Anything not matched here is
198
+ // treated as an action (mutation, execution, or a delegation/direction decision) —
199
+ // Bash counts as an action because it executes (a conservative, documented choice for
200
+ // v0; `cat`/`ls` via Bash are misclassified, refine later if the data warrants it).
201
+ const GATHERING_TOOLS = new Set([
202
+ "Read", "Grep", "Glob", "LS", "NotebookRead", "WebFetch", "WebSearch",
203
+ ]);
204
+ const GATHERING_NAME = /(?:^|[._-])(read|get|list|search|grep|glob|find|fetch|query|browse|view|show|cat|status|inspect|lookup|symbols|snippet)/i;
205
+
206
+ function isGatheringTool(name: string): boolean {
207
+ if (GATHERING_TOOLS.has(name)) return true;
208
+ // MCP / custom tools: classify by name shape (e.g. mcp__callmux__searxng_web_search).
209
+ return GATHERING_NAME.test(name);
210
+ }
211
+
212
+ export interface ContextRatioMetric {
213
+ /** Session-wide gathering fraction: gatheringCalls / totalToolCalls. The headline metric. */
214
+ ratio: number;
215
+ gatheringCalls: number;
216
+ actionCalls: number;
217
+ totalToolCalls: number;
218
+ /** Consecutive gathering calls before the first action — the "read N files before moving" signal. */
219
+ leadingGather: number;
220
+ /** Substantive assistant turns (turns that produced text or a tool call). */
221
+ turns: number;
222
+ }
223
+
224
+ export interface SessionOutcomeProxy {
225
+ /** Real user prompts in the session — more back-and-forth ~ more clarification/correction. */
226
+ userPrompts: number;
227
+ /** tool_result blocks flagged is_error — failures/workarounds the agent hit. */
228
+ toolErrors: number;
229
+ }
230
+
231
+ export interface SessionAnalysis {
232
+ metric: ContextRatioMetric;
233
+ outcome: SessionOutcomeProxy;
234
+ }
235
+
236
+ /**
237
+ * Walk the full transcript and compute the context-gathering ratio plus paired outcome
238
+ * proxies. Returns null when there's nothing substantive to measure (no tool calls) —
239
+ * trivial sessions have nothing to learn from and shouldn't pollute the baselines.
240
+ */
241
+ export function analyzeSession(jsonl: string): SessionAnalysis | null {
242
+ let gatheringCalls = 0;
243
+ let actionCalls = 0;
244
+ let leadingGather = 0;
245
+ let sawAction = false;
246
+ let userPrompts = 0;
247
+ let toolErrors = 0;
248
+ let turns = 0;
249
+
250
+ for (const line of jsonl.split("\n")) {
251
+ const trimmed = line.trim();
252
+ if (!trimmed) continue;
253
+ let entry: TranscriptEntry;
254
+ try {
255
+ entry = JSON.parse(trimmed) as TranscriptEntry;
256
+ } catch {
257
+ continue;
258
+ }
259
+ if (isRealUserPrompt(entry)) userPrompts++;
260
+ if (entry.type === "user") {
261
+ for (const b of blocks(entry.message)) {
262
+ if (b.type === "tool_result" && b.is_error === true) toolErrors++;
263
+ }
264
+ continue;
265
+ }
266
+ if (entry.type !== "assistant") continue;
267
+ let producedSomething = false;
268
+ for (const b of blocks(entry.message)) {
269
+ if (b.type === "text" && b.text?.trim()) producedSomething = true;
270
+ if (b.type !== "tool_use" || typeof b.name !== "string" || !b.name) continue;
271
+ producedSomething = true;
272
+ if (isGatheringTool(b.name)) {
273
+ gatheringCalls++;
274
+ if (!sawAction) leadingGather++;
275
+ } else {
276
+ actionCalls++;
277
+ sawAction = true;
278
+ }
279
+ }
280
+ if (producedSomething) turns++;
281
+ }
282
+
283
+ const totalToolCalls = gatheringCalls + actionCalls;
284
+ if (totalToolCalls === 0) return null;
285
+
286
+ return {
287
+ metric: {
288
+ ratio: gatheringCalls / totalToolCalls,
289
+ gatheringCalls,
290
+ actionCalls,
291
+ totalToolCalls,
292
+ leadingGather,
293
+ turns,
294
+ },
295
+ outcome: { userPrompts, toolErrors },
296
+ };
297
+ }
298
+
299
+ /** Count substantive assistant turns — used by the #185 introspection gate. */
300
+ export function countSubstantiveTurns(jsonl: string): number {
301
+ let turns = 0;
302
+ for (const line of jsonl.split("\n")) {
303
+ const trimmed = line.trim();
304
+ if (!trimmed) continue;
305
+ let entry: TranscriptEntry;
306
+ try {
307
+ entry = JSON.parse(trimmed) as TranscriptEntry;
308
+ } catch {
309
+ continue;
310
+ }
311
+ if (entry.type !== "assistant") continue;
312
+ const hasContent = blocks(entry.message).some(
313
+ (b) => (b.type === "text" && b.text?.trim()) || (b.type === "tool_use" && b.name),
314
+ );
315
+ if (hasContent) turns++;
316
+ }
317
+ return turns;
318
+ }
319
+
189
320
  export function extractHookAssistantMessage(content: unknown): string {
190
321
  if (typeof content === "string") return content.trim();
191
322
  if (!Array.isArray(content)) return "";
@@ -355,6 +355,13 @@ export function sessionStatusLineSettingsArgs(...argLists: string[][]): string[]
355
355
  command: "agent-relay context-probe --wrap",
356
356
  refreshInterval: 30,
357
357
  },
358
+ // Force readable thinking text for managed sessions so the session-mirror can
359
+ // surface reasoning in the dashboard. With showThinkingSummaries:false the API
360
+ // redacts thinking to a signature-only stub (empty text), leaving the transcript
361
+ // tail nothing to mirror. --settings merges per-key, so this overrides only this
362
+ // key for managed sessions — a host rig default of false still governs the
363
+ // operator's own interactive TUI sessions.
364
+ showThinkingSummaries: true,
358
365
  })];
359
366
  }
360
367
 
@@ -448,12 +455,19 @@ export function claudePaneLooksReady(text: string): boolean {
448
455
  || text.includes("Claude Code");
449
456
  }
450
457
 
458
+ // The working-spinner footer carries a live elapsed-time counter while a turn is in
459
+ // flight, e.g. "✶ Perambulating… (2m 17s · ↓ 8.7k tokens)" — gerund, "… (", then
460
+ // "[Nh ][Nm ]Ns". Anchored on the gerund ellipsis so it can't match the "… +N lines
461
+ // (ctrl+o to expand)" truncation marker, the idle input box, or the persistent
462
+ // "/btw … without interrupting Claude's current work" queue hint.
463
+ const CLAUDE_BUSY_SPINNER_RE = /…\s*\((?:\d+h\s+)?(?:\d+m\s+)?\d+s\b/;
464
+
451
465
  export function claudePaneIsBusy(text: string): boolean {
452
- // Claude renders "(esc to interrupt)" in its working spinner footer while a turn
453
- // is in flight and removes it once the turn completes and the input box is idle.
454
- // The persistent "…without interrupting Claude" queue hint does NOT contain this
455
- // exact phrase, so it won't false-positive.
456
- return text.includes("esc to interrupt");
466
+ // Claude Code <2.1.x rendered "(esc to interrupt)" in the spinner footer; 2.1.x
467
+ // dropped that hint but kept the "(<elapsed>" counter, which is the stable busy
468
+ // signal across versions. Match either so the busy probe (and the reconciler
469
+ // backstop that depends on it) keep working as the footer wording changes.
470
+ return CLAUDE_BUSY_SPINNER_RE.test(text) || text.includes("esc to interrupt");
457
471
  }
458
472
 
459
473
  async function waitForClaudeInputReady(sessionName: string, timeoutMs = CLAUDE_TMUX_READY_TIMEOUT_MS, socketName?: string): Promise<void> {
@@ -453,6 +453,18 @@ export function codexToolSummary(type: string | undefined, item: Record<string,
453
453
  if (type === "webSearch") {
454
454
  return { label: "Search", body: clip(oneLine(item.query) || "web search") };
455
455
  }
456
+ if (type === "plan") {
457
+ return { label: "Plan", body: clip(oneLine(item.text) || "updated plan") };
458
+ }
459
+ if (type === "collabAgentToolCall") {
460
+ const tool = stringValue(item.tool) ?? "collab";
461
+ const prompt = oneLine(item.prompt);
462
+ const targets = Array.isArray(item.receiverThreadIds)
463
+ ? item.receiverThreadIds.filter((t): t is string => typeof t === "string").length
464
+ : 0;
465
+ const detail = prompt || (targets ? `${targets} agent${targets === 1 ? "" : "s"}` : tool);
466
+ return { label: `Collab/${tool}`, body: clip(detail) };
467
+ }
456
468
  return null;
457
469
  }
458
470
 
@@ -28,6 +28,11 @@ interface ControlServerOptions {
28
28
  // directly into the session (web terminal / TUI) so the runner can mirror it
29
29
  // into the dashboard chat and start tailing the turn transcript for reasoning.
30
30
  onUserPrompt?(input: { prompt: string; transcriptPath?: string }): Promise<void>;
31
+ // A provider SessionEnd hook signals the session is over so the runner can
32
+ // compute end-of-session Insights signals (#184 context ratio) from the full
33
+ // transcript. transcriptPath is optional — the runner falls back to the last
34
+ // path it saw during the session.
35
+ onSessionEnd?(input: { reason?: string; transcriptPath?: string }): Promise<void>;
31
36
  }
32
37
 
33
38
  export function startControlServer(options: ControlServerOptions): ControlServer {
@@ -73,6 +78,9 @@ export function startControlServer(options: ControlServerOptions): ControlServer
73
78
  if (url.pathname === "/user-prompt" && req.method === "POST") {
74
79
  return handleUserPrompt(req, options);
75
80
  }
81
+ if (url.pathname === "/session-end" && req.method === "POST") {
82
+ return handleSessionEnd(req, options);
83
+ }
76
84
  if (url.pathname === "/monitor") {
77
85
  const upgraded = srv.upgrade(req, { data: { kind: "monitor" } });
78
86
  return upgraded ? undefined : new Response("WebSocket upgrade failed", { status: 400 });
@@ -343,6 +351,16 @@ async function handleUserPrompt(req: Request, options: ControlServerOptions): Pr
343
351
  return Response.json({ ok: true });
344
352
  }
345
353
 
354
+ async function handleSessionEnd(req: Request, options: ControlServerOptions): Promise<Response> {
355
+ if (!options.onSessionEnd) return Response.json({ ok: false, reason: "session-end capture unavailable" });
356
+ const body = await req.json().catch(() => null);
357
+ const reason = isRecord(body) && typeof body.reason === "string" ? body.reason : undefined;
358
+ const transcriptPath = isRecord(body) && typeof body.transcriptPath === "string" ? body.transcriptPath : undefined;
359
+ // Fire-and-forget: the SessionEnd hook must not block Claude shutting down.
360
+ void Promise.resolve(options.onSessionEnd({ reason, transcriptPath })).catch(() => {});
361
+ return Response.json({ ok: true });
362
+ }
363
+
346
364
  async function handleStatus(req: Request, options: ControlServerOptions): Promise<Response> {
347
365
  const body = await req.json().catch(() => null) as Partial<ProviderStatusEvent> | null;
348
366
  const status = body?.status;
package/src/runner.ts CHANGED
@@ -9,7 +9,7 @@ import type { ManagedProcess, ProviderAdapter, ProviderConfig, ProviderPermissio
9
9
  import { messagesWithCachedAttachments } from "./attachment-cache";
10
10
  import { ClaimTracker } from "./claim-tracker";
11
11
  import { startControlServer, type ControlServer } from "./control-server";
12
- import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete } from "./adapters/claude-transcript";
12
+ import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete, analyzeSession } from "./adapters/claude-transcript";
13
13
  import { agentProfileProjectionReport } from "./profile-projection";
14
14
  import { profileUsesHostProviderGlobals } from "./profile-home";
15
15
  import { runtimeMetadata } from "./version";
@@ -63,6 +63,9 @@ const CLAIM_RENEW_INTERVAL_MS = 5 * 60 * 1000;
63
63
  const HTTP_LIVENESS_INTERVAL_MS = 20_000;
64
64
  const HTTP_LIVENESS_LOG_INTERVAL_MS = 5 * 60 * 1000;
65
65
  const TOKEN_RENEW_RETRY_MS = 60_000;
66
+ // Debounce reactive token recovery so a burst of 401-ing calls in the same window
67
+ // triggers a single re-mint attempt, not one per failing request.
68
+ const REACTIVE_TOKEN_RECOVERY_DEBOUNCE_MS = 10_000;
66
69
  const UNEXPECTED_EXIT_WINDOW_MS = 2 * 60 * 1000;
67
70
  const RAPID_EXIT_MS = 30 * 1000;
68
71
  const MAX_RAPID_UNEXPECTED_EXITS = 3;
@@ -128,8 +131,12 @@ export class AgentRunner {
128
131
  private tokenRenewTimer?: Timer;
129
132
  private tokenRenewInFlight = false;
130
133
  private tokenRenewLastLog?: { key: string; at: number };
134
+ private reactiveTokenRecoveryAt?: number;
131
135
  private processStartedAt = 0;
132
136
  private providerSessionId = crypto.randomUUID();
137
+ // Last transcript path seen this session — used by end-of-session Insights (#184)
138
+ // when the SessionEnd hook payload omits it.
139
+ private lastTranscriptPath?: string;
133
140
  private lifecycleAction?: "shutting-down" | "killing" | "restarting";
134
141
  private readonly unexpectedExitTimes: number[] = [];
135
142
  private readonly pendingMessages = new Map<number, Message>();
@@ -139,10 +146,12 @@ export class AgentRunner {
139
146
  // Session-mirror: a synthesized id grouping a turn's reasoning/tool steps and
140
147
  // its final response. Set when a provider-turn starts, cleared when it ends.
141
148
  private currentTurnId?: string;
142
- // Prompt-echo dedup: the last prompt the runner itself injected (chat box or
143
- // initial prompt). A UserPromptSubmit hook echo matching this within the window
144
- // is the same prompt arriving back from the provider and must not double-post.
145
- private lastInjectedPrompt?: { text: string; at: number };
149
+ // Prompt-echo dedup: a short, time-bounded queue of prompts the runner itself
150
+ // injected (chat box or initial prompt) that are still awaiting their matching
151
+ // UserPromptSubmit echo. A single slot dropped earlier entries when several prompts
152
+ // were injected before their echoes returned (rapid sends while the provider is busy
153
+ // and queues them) — the evicted ones then double-posted. Match consumes one entry.
154
+ private injectedPrompts: Array<{ text: string; at: number }> = [];
146
155
  // Busy reconciler: consecutive idle probes observed while claims still say busy.
147
156
  private busyReconcileIdleStreak = 0;
148
157
  private busyReconcileTimer?: ReturnType<typeof setInterval>;
@@ -239,6 +248,7 @@ export class AgentRunner {
239
248
  onReplyObligations: () => this.http.listReplyObligations(this.agentId),
240
249
  onSessionTurn: (input) => this.publishSessionTurn(input),
241
250
  onUserPrompt: (input) => this.handleUserPrompt(input),
251
+ onSessionEnd: (input) => this.handleSessionEnd(input),
242
252
  });
243
253
  this.writeRunnerInfoFile();
244
254
  this.options.adapter.onStatusChange((status) => {
@@ -303,6 +313,7 @@ export class AgentRunner {
303
313
 
304
314
  private async spawnProvider(): Promise<ManagedProcess> {
305
315
  this.providerSessionId = crypto.randomUUID();
316
+ this.lastTranscriptPath = undefined;
306
317
  const includeProviderGlobals = profileUsesHostProviderGlobals(this.options);
307
318
  const env = {
308
319
  ...process.env as Record<string, string>,
@@ -600,7 +611,7 @@ export class AgentRunner {
600
611
  if (messageId) this.pendingPromptMessageId = messageId;
601
612
  // Mark so the matching UserPromptSubmit echo isn't double-posted: a chat-box
602
613
  // prompt already created its own session message shown in the dashboard.
603
- this.lastInjectedPrompt = { text: body.trim(), at: Date.now() };
614
+ this.recordInjectedPrompt(body.trim());
604
615
  await this.options.adapter.deliverInitialPrompt(this.process, body);
605
616
  return { injected: true, messageId };
606
617
  }
@@ -891,6 +902,7 @@ export class AgentRunner {
891
902
  // no relay message) are mirrored too. A reply obligation, when present, is still
892
903
  // used as replyTo so the Stop hook stops nagging the agent to /reply.
893
904
  private async publishSessionTurn(input: { transcriptPath: string; lastAssistantMessage?: unknown }): Promise<void> {
905
+ if (input.transcriptPath) this.lastTranscriptPath = input.transcriptPath;
894
906
  const turnId = this.currentTurnId;
895
907
  this.stopReasoningTail();
896
908
  // Optional correlation for threading + obligation clearing — never a capture gate.
@@ -972,6 +984,7 @@ export class AgentRunner {
972
984
  });
973
985
  } catch (error) {
974
986
  this.logRunnerDiagnostic(`session ${input.session.type} capture failed: ${error instanceof Error ? error.message : String(error)}`);
987
+ if (isHttpAuthError(error)) this.recoverRuntimeTokenAfterAuthFailure("session-capture");
975
988
  }
976
989
  }
977
990
 
@@ -980,6 +993,7 @@ export class AgentRunner {
980
993
  // tailing for the turn. Skips prompts the runner itself injected (chat box, relay
981
994
  // deliveries) so those aren't double-posted.
982
995
  private async handleUserPrompt(input: { prompt: string; transcriptPath?: string }): Promise<void> {
996
+ if (input.transcriptPath) this.lastTranscriptPath = input.transcriptPath;
983
997
  if (!this.currentTurnId) this.currentTurnId = crypto.randomUUID();
984
998
  const text = input.prompt.trim();
985
999
  if (text && !this.isRunnerInjectedPrompt(text)) {
@@ -996,6 +1010,42 @@ export class AgentRunner {
996
1010
  if (input.transcriptPath) this.startReasoningTail(input.transcriptPath);
997
1011
  }
998
1012
 
1013
+ // SessionEnd: compute end-of-session Insights signals (#184 context-gathering
1014
+ // ratio) from the full transcript and record them with the relay. Mechanical and
1015
+ // model-free — costs zero agent tokens and the agent can't game it. The relay drops
1016
+ // the observation if Insights or this signal is toggled off. Best-effort: never
1017
+ // blocks or fails provider shutdown.
1018
+ private async handleSessionEnd(input: { reason?: string; transcriptPath?: string }): Promise<void> {
1019
+ // Only Claude transcripts have this shape; Codex sessions are skipped for now.
1020
+ if (this.options.provider !== "claude") return;
1021
+ const transcriptPath = input.transcriptPath || this.lastTranscriptPath;
1022
+ if (!transcriptPath) return;
1023
+ let jsonl: string;
1024
+ try {
1025
+ jsonl = await readFile(transcriptPath, "utf8");
1026
+ } catch {
1027
+ return;
1028
+ }
1029
+ const analysis = analyzeSession(jsonl);
1030
+ if (!analysis) return; // no tool calls = nothing substantive to measure
1031
+ try {
1032
+ await this.http.recordInsightObservation({
1033
+ sessionId: this.providerSessionId,
1034
+ project: this.options.cwd,
1035
+ agentId: this.agentId,
1036
+ signal: "context_ratio",
1037
+ value: { ...analysis.metric, ...(input.reason ? { endReason: input.reason } : {}) },
1038
+ outcome: { ...analysis.outcome },
1039
+ source: "server",
1040
+ });
1041
+ this.sessionLog(`insights: context_ratio ${analysis.metric.ratio.toFixed(2)} (${analysis.metric.gatheringCalls}/${analysis.metric.totalToolCalls} gathering)`);
1042
+ } catch (error) {
1043
+ // 409 = Insights/feature toggled off; anything else is best-effort too.
1044
+ this.sessionDebug(`insights context_ratio skipped: ${error instanceof Error ? error.message : String(error)}`);
1045
+ if (isHttpAuthError(error)) this.recoverRuntimeTokenAfterAuthFailure("insights");
1046
+ }
1047
+ }
1048
+
999
1049
  // Route a provider-emitted session event (Codex app-server) into the chat mirror.
1000
1050
  // Mirrors the same semantics as the Claude lane: prompts are echoed with dedup,
1001
1051
  // and a response is only auto-captured when the agent won't separately reply to a
@@ -1048,11 +1098,23 @@ export class AgentRunner {
1048
1098
  });
1049
1099
  }
1050
1100
 
1101
+ // Remember an injected prompt so its UserPromptSubmit echo can be suppressed. Prunes
1102
+ // expired entries first; a defensive length cap guards against echoes that never
1103
+ // arrive (e.g. the provider drops a queued prompt) so the queue can't grow unbounded.
1104
+ private recordInjectedPrompt(text: string): void {
1105
+ const now = Date.now();
1106
+ this.injectedPrompts = this.injectedPrompts.filter((p) => now - p.at < PROMPT_ECHO_DEDUP_MS);
1107
+ this.injectedPrompts.push({ text, at: now });
1108
+ if (this.injectedPrompts.length > 50) this.injectedPrompts.shift();
1109
+ }
1110
+
1051
1111
  private isRunnerInjectedPrompt(text: string): boolean {
1052
1112
  if (RELAY_INJECTION_MARKERS.some((marker) => text.startsWith(marker))) return true;
1053
- const recent = this.lastInjectedPrompt;
1054
- if (recent && recent.text === text && Date.now() - recent.at < PROMPT_ECHO_DEDUP_MS) {
1055
- this.lastInjectedPrompt = undefined;
1113
+ const now = Date.now();
1114
+ this.injectedPrompts = this.injectedPrompts.filter((p) => now - p.at < PROMPT_ECHO_DEDUP_MS);
1115
+ const idx = this.injectedPrompts.findIndex((p) => p.text === text);
1116
+ if (idx !== -1) {
1117
+ this.injectedPrompts.splice(idx, 1); // consume one — identical repeats each match once
1056
1118
  return true;
1057
1119
  }
1058
1120
  return false;
@@ -1259,6 +1321,25 @@ export class AgentRunner {
1259
1321
  this.httpLivenessAuthFailed = true;
1260
1322
  if (this.httpLivenessTimer) clearInterval(this.httpLivenessTimer);
1261
1323
  this.httpLivenessTimer = undefined;
1324
+ // A 401/403 here is the only timely signal that the token died — stopping the
1325
+ // liveness timer means there is no second chance, so recover from THIS failure.
1326
+ this.recoverRuntimeTokenAfterAuthFailure("http-liveness");
1327
+ }
1328
+
1329
+ // A definitive relay auth failure (401/403) means the runtime token is dead right
1330
+ // now — expired, or (the common case) revoked when the relay marked this agent
1331
+ // stale across its own restart/reconnect. The proactive renew timer is keyed to
1332
+ // TTL and structurally cannot catch a revocation, so the auth failure itself must
1333
+ // drive recovery. renewRuntimeToken() prefers an orchestrator re-mint, which heals
1334
+ // even a revoked token. Debounced so a burst of failing calls re-mints once.
1335
+ private recoverRuntimeTokenAfterAuthFailure(source: string): void {
1336
+ if (this.stopped || this.tokenRenewInFlight) return;
1337
+ if (!this.isRuntimeTokenRenewable() && !this.canRemintViaOrchestrator()) return;
1338
+ const now = Date.now();
1339
+ if (this.reactiveTokenRecoveryAt && now - this.reactiveTokenRecoveryAt < REACTIVE_TOKEN_RECOVERY_DEBOUNCE_MS) return;
1340
+ this.reactiveTokenRecoveryAt = now;
1341
+ this.logRunnerDiagnostic(`[runner] relay auth failure on ${source}; recovering runtime token`);
1342
+ void this.renewRuntimeToken();
1262
1343
  }
1263
1344
 
1264
1345
  private logHttpLivenessFailure(error: unknown, authFailed: boolean): void {
@@ -1432,6 +1513,11 @@ export class AgentRunner {
1432
1513
  this.http.setToken(token);
1433
1514
  this.bus.setToken(token);
1434
1515
  this.httpLivenessAuthFailed = false;
1516
+ this.reactiveTokenRecoveryAt = undefined;
1517
+ // An earlier auth failure may have stopped the liveness loop; restart it so the
1518
+ // agent reports live again on the fresh token. startHttpLiveness clears any
1519
+ // existing timer first, so this is safe on the normal (proactive) renew path too.
1520
+ this.startHttpLiveness();
1435
1521
  this.pendingTimelineEvent = { status, id: record.jti, timestamp: Date.now() };
1436
1522
  this.bus.reconnectTransport(status === "runtime-token-reminted" ? "runtime token re-minted" : "runtime token renewed");
1437
1523
  this.publishStatus();