npm - agent-relay-runner - Versions diffs - 0.12.3 → 0.13.0 - Mend

agent-relay-runner 0.12.3 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/package.json +2 -2
package/plugins/claude/.claude-plugin/plugin.json +1 -1
package/plugins/claude/hooks/relay-status.sh +20 -0
package/plugins/claude/hooks/session-end.sh +4 -0
package/src/adapters/claude-transcript.ts +131 -0
package/src/adapters/claude.ts +19 -5
package/src/adapters/codex.ts +12 -0
package/src/control-server.ts +18 -0
package/src/runner.ts +95 -9

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-relay-runner",
-  "version": "0.12.3",
+  "version": "0.13.0",
   "description": "Unified provider lifecycle runner for Agent Relay",
   "type": "module",
   "bin": {
@@ -20,7 +20,7 @@
     "directory": "runner"
   },
   "dependencies": {
-    "agent-relay-sdk": "0.2.6"
+    "agent-relay-sdk": "0.2.7"
   },
   "devDependencies": {
     "@types/bun": "latest",

package/plugins/claude/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "agent-relay-runner",
   "description": "Thin Agent Relay runner bridge for Claude Code",
-  "version": "0.12.3",
+  "version": "0.13.0",
   "agentRelayContracts": {
     "providerPluginProtocol": 1
   }

package/plugins/claude/hooks/relay-status.sh CHANGED Viewed

@@ -72,6 +72,26 @@ relay_post_user_prompt() {
     -d "$body" >/dev/null 2>&1 || true
 }
+relay_post_session_end() {
+  # Insights #184: tell the runner the session ended so it can compute the
+  # end-of-session context-gathering ratio from the full transcript. Fire-and-forget;
+  # the transcript path is optional (the runner falls back to the last path it saw).
+  local transcript_path="${1:-}"
+  local reason="${2:-}"
+  local port="${AGENT_RELAY_RUNNER_PORT:-}"
+  [ -z "$port" ] && return 0
+  local body="{"
+  [ -n "$transcript_path" ] && body="${body}\"transcriptPath\":\"$(relay_json_escape "$transcript_path")\""
+  if [ -n "$reason" ]; then
+    [ "$body" != "{" ] && body="${body},"
+    body="${body}\"reason\":\"$(relay_json_escape "$reason")\""
+  fi
+  body="${body}}"
+  curl -fsS --max-time 3 -X POST "http://127.0.0.1:${port}/session-end" \
+    -H 'Content-Type: application/json' \
+    -d "$body" >/dev/null 2>&1 || true
+}
 relay_pending_reply_stop_decision() {
   local port="${AGENT_RELAY_RUNNER_PORT:-}"
   [ -z "$port" ] && return 0

package/plugins/claude/hooks/session-end.sh CHANGED Viewed

@@ -4,6 +4,7 @@ source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/
 payload="$(cat || true)"
 reason="$(relay_json_string_field reason "$payload")"
+transcript_path="$(relay_json_string_field transcript_path "$payload")"
 case "$reason" in
   clear)
@@ -14,5 +15,8 @@ case "$reason" in
     ;;
   logout|prompt_input_exit|bypass_permissions_disabled|other|*)
     relay_post_status_clearing_subagents offline
+    # Real session termination: capture end-of-session Insights (#184). Order after the
+    # status post is arbitrary — the runner reads the transcript file regardless.
+    relay_post_session_end "$transcript_path" "$reason"
     ;;
 esac

package/src/adapters/claude-transcript.ts CHANGED Viewed

@@ -16,6 +16,7 @@ interface TranscriptBlock {
   thinking?: string;
   name?: string;
   input?: Record<string, unknown>;
+  is_error?: boolean;
 }
 export interface TurnStep {
@@ -186,6 +187,136 @@ export function summarizeToolUse(name: string, input: Record<string, unknown> |
   return summary.length > 200 ? `${summary.slice(0, 197)}…` : summary;
 }
+// --- Insights #184: context-gathering ratio (epic #183, docs/self-improvement.md) ---
+//
+// Computed mechanically from the whole-session transcript at session end — no model
+// involvement, so it costs zero agent tokens and the agent can't game it. The ratio is
+// paired with cheap outcome proxies (user re-prompts, tool errors) so it's never read
+// alone — see the anti-Goodhart constraint in the epic.
+// Tools that acquire context without changing anything. Anything not matched here is
+// treated as an action (mutation, execution, or a delegation/direction decision) —
+// Bash counts as an action because it executes (a conservative, documented choice for
+// v0; `cat`/`ls` via Bash are misclassified, refine later if the data warrants it).
+const GATHERING_TOOLS = new Set([
+  "Read", "Grep", "Glob", "LS", "NotebookRead", "WebFetch", "WebSearch",
+]);
+const GATHERING_NAME = /(?:^|[._-])(read|get|list|search|grep|glob|find|fetch|query|browse|view|show|cat|status|inspect|lookup|symbols|snippet)/i;
+function isGatheringTool(name: string): boolean {
+  if (GATHERING_TOOLS.has(name)) return true;
+  // MCP / custom tools: classify by name shape (e.g. mcp__callmux__searxng_web_search).
+  return GATHERING_NAME.test(name);
+}
+export interface ContextRatioMetric {
+  /** Session-wide gathering fraction: gatheringCalls / totalToolCalls. The headline metric. */
+  ratio: number;
+  gatheringCalls: number;
+  actionCalls: number;
+  totalToolCalls: number;
+  /** Consecutive gathering calls before the first action — the "read N files before moving" signal. */
+  leadingGather: number;
+  /** Substantive assistant turns (turns that produced text or a tool call). */
+  turns: number;
+}
+export interface SessionOutcomeProxy {
+  /** Real user prompts in the session — more back-and-forth ~ more clarification/correction. */
+  userPrompts: number;
+  /** tool_result blocks flagged is_error — failures/workarounds the agent hit. */
+  toolErrors: number;
+}
+export interface SessionAnalysis {
+  metric: ContextRatioMetric;
+  outcome: SessionOutcomeProxy;
+}
+/**
+ * Walk the full transcript and compute the context-gathering ratio plus paired outcome
+ * proxies. Returns null when there's nothing substantive to measure (no tool calls) —
+ * trivial sessions have nothing to learn from and shouldn't pollute the baselines.
+ */
+export function analyzeSession(jsonl: string): SessionAnalysis | null {
+  let gatheringCalls = 0;
+  let actionCalls = 0;
+  let leadingGather = 0;
+  let sawAction = false;
+  let userPrompts = 0;
+  let toolErrors = 0;
+  let turns = 0;
+  for (const line of jsonl.split("\n")) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    let entry: TranscriptEntry;
+    try {
+      entry = JSON.parse(trimmed) as TranscriptEntry;
+    } catch {
+      continue;
+    }
+    if (isRealUserPrompt(entry)) userPrompts++;
+    if (entry.type === "user") {
+      for (const b of blocks(entry.message)) {
+        if (b.type === "tool_result" && b.is_error === true) toolErrors++;
+      }
+      continue;
+    }
+    if (entry.type !== "assistant") continue;
+    let producedSomething = false;
+    for (const b of blocks(entry.message)) {
+      if (b.type === "text" && b.text?.trim()) producedSomething = true;
+      if (b.type !== "tool_use" || typeof b.name !== "string" || !b.name) continue;
+      producedSomething = true;
+      if (isGatheringTool(b.name)) {
+        gatheringCalls++;
+        if (!sawAction) leadingGather++;
+      } else {
+        actionCalls++;
+        sawAction = true;
+      }
+    }
+    if (producedSomething) turns++;
+  }
+  const totalToolCalls = gatheringCalls + actionCalls;
+  if (totalToolCalls === 0) return null;
+  return {
+    metric: {
+      ratio: gatheringCalls / totalToolCalls,
+      gatheringCalls,
+      actionCalls,
+      totalToolCalls,
+      leadingGather,
+      turns,
+    },
+    outcome: { userPrompts, toolErrors },
+  };
+}
+/** Count substantive assistant turns — used by the #185 introspection gate. */
+export function countSubstantiveTurns(jsonl: string): number {
+  let turns = 0;
+  for (const line of jsonl.split("\n")) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    let entry: TranscriptEntry;
+    try {
+      entry = JSON.parse(trimmed) as TranscriptEntry;
+    } catch {
+      continue;
+    }
+    if (entry.type !== "assistant") continue;
+    const hasContent = blocks(entry.message).some(
+      (b) => (b.type === "text" && b.text?.trim()) || (b.type === "tool_use" && b.name),
+    );
+    if (hasContent) turns++;
+  }
+  return turns;
+}
 export function extractHookAssistantMessage(content: unknown): string {
   if (typeof content === "string") return content.trim();
   if (!Array.isArray(content)) return "";

package/src/adapters/claude.ts CHANGED Viewed

@@ -355,6 +355,13 @@ export function sessionStatusLineSettingsArgs(...argLists: string[][]): string[]
       command: "agent-relay context-probe --wrap",
       refreshInterval: 30,
     },
+    // Force readable thinking text for managed sessions so the session-mirror can
+    // surface reasoning in the dashboard. With showThinkingSummaries:false the API
+    // redacts thinking to a signature-only stub (empty text), leaving the transcript
+    // tail nothing to mirror. --settings merges per-key, so this overrides only this
+    // key for managed sessions — a host rig default of false still governs the
+    // operator's own interactive TUI sessions.
+    showThinkingSummaries: true,
   })];
 }
@@ -448,12 +455,19 @@ export function claudePaneLooksReady(text: string): boolean {
     || text.includes("Claude Code");
 }
+// The working-spinner footer carries a live elapsed-time counter while a turn is in
+// flight, e.g. "✶ Perambulating… (2m 17s · ↓ 8.7k tokens)" — gerund, "… (", then
+// "[Nh ][Nm ]Ns". Anchored on the gerund ellipsis so it can't match the "… +N lines
+// (ctrl+o to expand)" truncation marker, the idle input box, or the persistent
+// "/btw … without interrupting Claude's current work" queue hint.
+const CLAUDE_BUSY_SPINNER_RE = /…\s*\((?:\d+h\s+)?(?:\d+m\s+)?\d+s\b/;
 export function claudePaneIsBusy(text: string): boolean {
-  // Claude renders "(esc to interrupt)" in its working spinner footer while a turn
-  // is in flight and removes it once the turn completes and the input box is idle.
-  // The persistent "…without interrupting Claude" queue hint does NOT contain this
-  // exact phrase, so it won't false-positive.
-  return text.includes("esc to interrupt");
+  // Claude Code <2.1.x rendered "(esc to interrupt)" in the spinner footer; 2.1.x
+  // dropped that hint but kept the "(<elapsed>" counter, which is the stable busy
+  // signal across versions. Match either so the busy probe (and the reconciler
+  // backstop that depends on it) keep working as the footer wording changes.
+  return CLAUDE_BUSY_SPINNER_RE.test(text) || text.includes("esc to interrupt");
 }
 async function waitForClaudeInputReady(sessionName: string, timeoutMs = CLAUDE_TMUX_READY_TIMEOUT_MS, socketName?: string): Promise<void> {

package/src/adapters/codex.ts CHANGED Viewed

@@ -453,6 +453,18 @@ export function codexToolSummary(type: string | undefined, item: Record<string,
   if (type === "webSearch") {
     return { label: "Search", body: clip(oneLine(item.query) || "web search") };
   }
+  if (type === "plan") {
+    return { label: "Plan", body: clip(oneLine(item.text) || "updated plan") };
+  }
+  if (type === "collabAgentToolCall") {
+    const tool = stringValue(item.tool) ?? "collab";
+    const prompt = oneLine(item.prompt);
+    const targets = Array.isArray(item.receiverThreadIds)
+      ? item.receiverThreadIds.filter((t): t is string => typeof t === "string").length
+      : 0;
+    const detail = prompt || (targets ? `${targets} agent${targets === 1 ? "" : "s"}` : tool);
+    return { label: `Collab/${tool}`, body: clip(detail) };
+  }
   return null;
 }

package/src/control-server.ts CHANGED Viewed

@@ -28,6 +28,11 @@ interface ControlServerOptions {
   // directly into the session (web terminal / TUI) so the runner can mirror it
   // into the dashboard chat and start tailing the turn transcript for reasoning.
   onUserPrompt?(input: { prompt: string; transcriptPath?: string }): Promise<void>;
+  // A provider SessionEnd hook signals the session is over so the runner can
+  // compute end-of-session Insights signals (#184 context ratio) from the full
+  // transcript. transcriptPath is optional — the runner falls back to the last
+  // path it saw during the session.
+  onSessionEnd?(input: { reason?: string; transcriptPath?: string }): Promise<void>;
 }
 export function startControlServer(options: ControlServerOptions): ControlServer {
@@ -73,6 +78,9 @@ export function startControlServer(options: ControlServerOptions): ControlServer
       if (url.pathname === "/user-prompt" && req.method === "POST") {
         return handleUserPrompt(req, options);
       }
+      if (url.pathname === "/session-end" && req.method === "POST") {
+        return handleSessionEnd(req, options);
+      }
       if (url.pathname === "/monitor") {
         const upgraded = srv.upgrade(req, { data: { kind: "monitor" } });
         return upgraded ? undefined : new Response("WebSocket upgrade failed", { status: 400 });
@@ -343,6 +351,16 @@ async function handleUserPrompt(req: Request, options: ControlServerOptions): Pr
   return Response.json({ ok: true });
 }
+async function handleSessionEnd(req: Request, options: ControlServerOptions): Promise<Response> {
+  if (!options.onSessionEnd) return Response.json({ ok: false, reason: "session-end capture unavailable" });
+  const body = await req.json().catch(() => null);
+  const reason = isRecord(body) && typeof body.reason === "string" ? body.reason : undefined;
+  const transcriptPath = isRecord(body) && typeof body.transcriptPath === "string" ? body.transcriptPath : undefined;
+  // Fire-and-forget: the SessionEnd hook must not block Claude shutting down.
+  void Promise.resolve(options.onSessionEnd({ reason, transcriptPath })).catch(() => {});
+  return Response.json({ ok: true });
+}
 async function handleStatus(req: Request, options: ControlServerOptions): Promise<Response> {
   const body = await req.json().catch(() => null) as Partial<ProviderStatusEvent> | null;
   const status = body?.status;

package/src/runner.ts CHANGED Viewed

@@ -9,7 +9,7 @@ import type { ManagedProcess, ProviderAdapter, ProviderConfig, ProviderPermissio
 import { messagesWithCachedAttachments } from "./attachment-cache";
 import { ClaimTracker } from "./claim-tracker";
 import { startControlServer, type ControlServer } from "./control-server";
-import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete } from "./adapters/claude-transcript";
+import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete, analyzeSession } from "./adapters/claude-transcript";
 import { agentProfileProjectionReport } from "./profile-projection";
 import { profileUsesHostProviderGlobals } from "./profile-home";
 import { runtimeMetadata } from "./version";
@@ -63,6 +63,9 @@ const CLAIM_RENEW_INTERVAL_MS = 5 * 60 * 1000;
 const HTTP_LIVENESS_INTERVAL_MS = 20_000;
 const HTTP_LIVENESS_LOG_INTERVAL_MS = 5 * 60 * 1000;
 const TOKEN_RENEW_RETRY_MS = 60_000;
+// Debounce reactive token recovery so a burst of 401-ing calls in the same window
+// triggers a single re-mint attempt, not one per failing request.
+const REACTIVE_TOKEN_RECOVERY_DEBOUNCE_MS = 10_000;
 const UNEXPECTED_EXIT_WINDOW_MS = 2 * 60 * 1000;
 const RAPID_EXIT_MS = 30 * 1000;
 const MAX_RAPID_UNEXPECTED_EXITS = 3;
@@ -128,8 +131,12 @@ export class AgentRunner {
   private tokenRenewTimer?: Timer;
   private tokenRenewInFlight = false;
   private tokenRenewLastLog?: { key: string; at: number };
+  private reactiveTokenRecoveryAt?: number;
   private processStartedAt = 0;
   private providerSessionId = crypto.randomUUID();
+  // Last transcript path seen this session — used by end-of-session Insights (#184)
+  // when the SessionEnd hook payload omits it.
+  private lastTranscriptPath?: string;
   private lifecycleAction?: "shutting-down" | "killing" | "restarting";
   private readonly unexpectedExitTimes: number[] = [];
   private readonly pendingMessages = new Map<number, Message>();
@@ -139,10 +146,12 @@ export class AgentRunner {
   // Session-mirror: a synthesized id grouping a turn's reasoning/tool steps and
   // its final response. Set when a provider-turn starts, cleared when it ends.
   private currentTurnId?: string;
-  // Prompt-echo dedup: the last prompt the runner itself injected (chat box or
-  // initial prompt). A UserPromptSubmit hook echo matching this within the window
-  // is the same prompt arriving back from the provider and must not double-post.
-  private lastInjectedPrompt?: { text: string; at: number };
+  // Prompt-echo dedup: a short, time-bounded queue of prompts the runner itself
+  // injected (chat box or initial prompt) that are still awaiting their matching
+  // UserPromptSubmit echo. A single slot dropped earlier entries when several prompts
+  // were injected before their echoes returned (rapid sends while the provider is busy
+  // and queues them) — the evicted ones then double-posted. Match consumes one entry.
+  private injectedPrompts: Array<{ text: string; at: number }> = [];
   // Busy reconciler: consecutive idle probes observed while claims still say busy.
   private busyReconcileIdleStreak = 0;
   private busyReconcileTimer?: ReturnType<typeof setInterval>;
@@ -239,6 +248,7 @@ export class AgentRunner {
       onReplyObligations: () => this.http.listReplyObligations(this.agentId),
       onSessionTurn: (input) => this.publishSessionTurn(input),
       onUserPrompt: (input) => this.handleUserPrompt(input),
+      onSessionEnd: (input) => this.handleSessionEnd(input),
     });
     this.writeRunnerInfoFile();
     this.options.adapter.onStatusChange((status) => {
@@ -303,6 +313,7 @@ export class AgentRunner {
   private async spawnProvider(): Promise<ManagedProcess> {
     this.providerSessionId = crypto.randomUUID();
+    this.lastTranscriptPath = undefined;
     const includeProviderGlobals = profileUsesHostProviderGlobals(this.options);
     const env = {
       ...process.env as Record<string, string>,
@@ -600,7 +611,7 @@ export class AgentRunner {
     if (messageId) this.pendingPromptMessageId = messageId;
     // Mark so the matching UserPromptSubmit echo isn't double-posted: a chat-box
     // prompt already created its own session message shown in the dashboard.
-    this.lastInjectedPrompt = { text: body.trim(), at: Date.now() };
+    this.recordInjectedPrompt(body.trim());
     await this.options.adapter.deliverInitialPrompt(this.process, body);
     return { injected: true, messageId };
   }
@@ -891,6 +902,7 @@ export class AgentRunner {
   // no relay message) are mirrored too. A reply obligation, when present, is still
   // used as replyTo so the Stop hook stops nagging the agent to /reply.
   private async publishSessionTurn(input: { transcriptPath: string; lastAssistantMessage?: unknown }): Promise<void> {
+    if (input.transcriptPath) this.lastTranscriptPath = input.transcriptPath;
     const turnId = this.currentTurnId;
     this.stopReasoningTail();
     // Optional correlation for threading + obligation clearing — never a capture gate.
@@ -972,6 +984,7 @@ export class AgentRunner {
       });
     } catch (error) {
       this.logRunnerDiagnostic(`session ${input.session.type} capture failed: ${error instanceof Error ? error.message : String(error)}`);
+      if (isHttpAuthError(error)) this.recoverRuntimeTokenAfterAuthFailure("session-capture");
     }
   }
@@ -980,6 +993,7 @@ export class AgentRunner {
   // tailing for the turn. Skips prompts the runner itself injected (chat box, relay
   // deliveries) so those aren't double-posted.
   private async handleUserPrompt(input: { prompt: string; transcriptPath?: string }): Promise<void> {
+    if (input.transcriptPath) this.lastTranscriptPath = input.transcriptPath;
     if (!this.currentTurnId) this.currentTurnId = crypto.randomUUID();
     const text = input.prompt.trim();
     if (text && !this.isRunnerInjectedPrompt(text)) {
@@ -996,6 +1010,42 @@ export class AgentRunner {
     if (input.transcriptPath) this.startReasoningTail(input.transcriptPath);
   }
+  // SessionEnd: compute end-of-session Insights signals (#184 context-gathering
+  // ratio) from the full transcript and record them with the relay. Mechanical and
+  // model-free — costs zero agent tokens and the agent can't game it. The relay drops
+  // the observation if Insights or this signal is toggled off. Best-effort: never
+  // blocks or fails provider shutdown.
+  private async handleSessionEnd(input: { reason?: string; transcriptPath?: string }): Promise<void> {
+    // Only Claude transcripts have this shape; Codex sessions are skipped for now.
+    if (this.options.provider !== "claude") return;
+    const transcriptPath = input.transcriptPath || this.lastTranscriptPath;
+    if (!transcriptPath) return;
+    let jsonl: string;
+    try {
+      jsonl = await readFile(transcriptPath, "utf8");
+    } catch {
+      return;
+    }
+    const analysis = analyzeSession(jsonl);
+    if (!analysis) return; // no tool calls = nothing substantive to measure
+    try {
+      await this.http.recordInsightObservation({
+        sessionId: this.providerSessionId,
+        project: this.options.cwd,
+        agentId: this.agentId,
+        signal: "context_ratio",
+        value: { ...analysis.metric, ...(input.reason ? { endReason: input.reason } : {}) },
+        outcome: { ...analysis.outcome },
+        source: "server",
+      });
+      this.sessionLog(`insights: context_ratio ${analysis.metric.ratio.toFixed(2)} (${analysis.metric.gatheringCalls}/${analysis.metric.totalToolCalls} gathering)`);
+    } catch (error) {
+      // 409 = Insights/feature toggled off; anything else is best-effort too.
+      this.sessionDebug(`insights context_ratio skipped: ${error instanceof Error ? error.message : String(error)}`);
+      if (isHttpAuthError(error)) this.recoverRuntimeTokenAfterAuthFailure("insights");
+    }
+  }
   // Route a provider-emitted session event (Codex app-server) into the chat mirror.
   // Mirrors the same semantics as the Claude lane: prompts are echoed with dedup,
   // and a response is only auto-captured when the agent won't separately reply to a
@@ -1048,11 +1098,23 @@ export class AgentRunner {
     });
   }
+  // Remember an injected prompt so its UserPromptSubmit echo can be suppressed. Prunes
+  // expired entries first; a defensive length cap guards against echoes that never
+  // arrive (e.g. the provider drops a queued prompt) so the queue can't grow unbounded.
+  private recordInjectedPrompt(text: string): void {
+    const now = Date.now();
+    this.injectedPrompts = this.injectedPrompts.filter((p) => now - p.at < PROMPT_ECHO_DEDUP_MS);
+    this.injectedPrompts.push({ text, at: now });
+    if (this.injectedPrompts.length > 50) this.injectedPrompts.shift();
+  }
   private isRunnerInjectedPrompt(text: string): boolean {
     if (RELAY_INJECTION_MARKERS.some((marker) => text.startsWith(marker))) return true;
-    const recent = this.lastInjectedPrompt;
-    if (recent && recent.text === text && Date.now() - recent.at < PROMPT_ECHO_DEDUP_MS) {
-      this.lastInjectedPrompt = undefined;
+    const now = Date.now();
+    this.injectedPrompts = this.injectedPrompts.filter((p) => now - p.at < PROMPT_ECHO_DEDUP_MS);
+    const idx = this.injectedPrompts.findIndex((p) => p.text === text);
+    if (idx !== -1) {
+      this.injectedPrompts.splice(idx, 1); // consume one — identical repeats each match once
       return true;
     }
     return false;
@@ -1259,6 +1321,25 @@ export class AgentRunner {
     this.httpLivenessAuthFailed = true;
     if (this.httpLivenessTimer) clearInterval(this.httpLivenessTimer);
     this.httpLivenessTimer = undefined;
+    // A 401/403 here is the only timely signal that the token died — stopping the
+    // liveness timer means there is no second chance, so recover from THIS failure.
+    this.recoverRuntimeTokenAfterAuthFailure("http-liveness");
+  }
+  // A definitive relay auth failure (401/403) means the runtime token is dead right
+  // now — expired, or (the common case) revoked when the relay marked this agent
+  // stale across its own restart/reconnect. The proactive renew timer is keyed to
+  // TTL and structurally cannot catch a revocation, so the auth failure itself must
+  // drive recovery. renewRuntimeToken() prefers an orchestrator re-mint, which heals
+  // even a revoked token. Debounced so a burst of failing calls re-mints once.
+  private recoverRuntimeTokenAfterAuthFailure(source: string): void {
+    if (this.stopped || this.tokenRenewInFlight) return;
+    if (!this.isRuntimeTokenRenewable() && !this.canRemintViaOrchestrator()) return;
+    const now = Date.now();
+    if (this.reactiveTokenRecoveryAt && now - this.reactiveTokenRecoveryAt < REACTIVE_TOKEN_RECOVERY_DEBOUNCE_MS) return;
+    this.reactiveTokenRecoveryAt = now;
+    this.logRunnerDiagnostic(`[runner] relay auth failure on ${source}; recovering runtime token`);
+    void this.renewRuntimeToken();
   }
   private logHttpLivenessFailure(error: unknown, authFailed: boolean): void {
@@ -1432,6 +1513,11 @@ export class AgentRunner {
     this.http.setToken(token);
     this.bus.setToken(token);
     this.httpLivenessAuthFailed = false;
+    this.reactiveTokenRecoveryAt = undefined;
+    // An earlier auth failure may have stopped the liveness loop; restart it so the
+    // agent reports live again on the fresh token. startHttpLiveness clears any
+    // existing timer first, so this is safe on the normal (proactive) renew path too.
+    this.startHttpLiveness();
     this.pendingTimelineEvent = { status, id: record.jti, timestamp: Date.now() };
     this.bus.reconnectTransport(status === "runtime-token-reminted" ? "runtime token re-minted" : "runtime token renewed");
     this.publishStatus();