agent-relay-runner 0.15.1 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
- "version": "0.15.1",
3
+ "version": "0.16.0",
4
4
  "description": "Unified provider lifecycle runner for Agent Relay",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "agent-relay-runner",
3
3
  "description": "Thin Agent Relay runner bridge for Claude Code",
4
- "version": "0.15.1",
4
+ "version": "0.16.0",
5
5
  "agentRelayContracts": {
6
6
  "providerPluginProtocol": 1
7
7
  }
@@ -1,5 +1,7 @@
1
1
  #!/usr/bin/env bash
2
2
  set -euo pipefail
3
+ source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
4
+ relay_install_hook_guard permission-request
3
5
 
4
6
  port="${AGENT_RELAY_RUNNER_PORT:-}"
5
7
  if [[ -z "$port" ]]; then
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env bash
2
2
  set -euo pipefail
3
3
  source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
4
+ relay_install_hook_guard post-compact
4
5
 
5
6
  relay_post_timeline_status idle provider-turn "" compacted
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env bash
2
2
  set -euo pipefail
3
3
  source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
4
+ relay_install_hook_guard pre-compact
4
5
 
5
6
  relay_post_timeline_status busy provider-turn "" compacting
@@ -92,6 +92,30 @@ relay_post_session_end() {
92
92
  -d "$body" >/dev/null 2>&1 || true
93
93
  }
94
94
 
95
+ # --- Hook FATAL surfacing (#198) -------------------------------------------
96
+ # A hook that dies unexpectedly must never be silent. relay_install_hook_guard
97
+ # arms an ERR trap that reports the failure FATAL to the runner control port,
98
+ # which logs it to the dashboard-surfaced per-agent log. Best-effort and bounded
99
+ # (--max-time 2) so the report itself can never blow the hook's timeout budget.
100
+ relay_hook_fatal_report() {
101
+ local hook="${1:-unknown}" detail="${2:-}"
102
+ local port="${AGENT_RELAY_RUNNER_PORT:-}"
103
+ [ -z "$port" ] && return 0
104
+ local body="{\"hook\":\"$(relay_json_escape "$hook")\",\"error\":\"$(relay_json_escape "$detail")\"}"
105
+ curl -fsS --max-time 2 -X POST "http://127.0.0.1:${port}/hook-fatal" \
106
+ -H 'Content-Type: application/json' \
107
+ -d "$body" >/dev/null 2>&1 || true
108
+ }
109
+
110
+ relay_install_hook_guard() {
111
+ RELAY_HOOK_NAME="${1:-unknown}"
112
+ # Fires on any unhandled failure under `set -e`/`set -u`/pipefail in the hook's
113
+ # main body, just before the shell exits. Reports, then lets the exit proceed.
114
+ # (ERR is not inherited into functions without `set -E`; this covers the top-level
115
+ # flow, which is where a silent death actually wedges a turn.)
116
+ trap 'relay_hook_err_rc=$?; relay_hook_fatal_report "${RELAY_HOOK_NAME:-unknown}" "exit ${relay_hook_err_rc}: ${BASH_COMMAND}"' ERR
117
+ }
118
+
95
119
  relay_pending_reply_stop_decision() {
96
120
  local port="${AGENT_RELAY_RUNNER_PORT:-}"
97
121
  [ -z "$port" ] && return 0
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env bash
2
2
  set -euo pipefail
3
3
  source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
4
+ relay_install_hook_guard session-end
4
5
 
5
6
  payload="$(cat || true)"
6
7
  reason="$(relay_json_string_field reason "$payload")"
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env bash
2
2
  set -euo pipefail
3
3
  source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
4
+ relay_install_hook_guard session-start
4
5
 
5
6
  payload="$(cat || true)"
6
7
  source_kind="$(relay_json_string_field source "$payload")"
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env bash
2
2
  set -euo pipefail
3
3
  source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
4
+ relay_install_hook_guard stop-failure
4
5
 
5
6
  payload="$(cat || true)"
6
7
  error="$(relay_json_string_field error "$payload")"
@@ -1,6 +1,14 @@
1
1
  #!/usr/bin/env bash
2
2
  set -euo pipefail
3
3
  source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
4
+ relay_install_hook_guard stop
5
+
6
+ # Clearing the turn's busy state is the critical path (#199). Register it on EXIT
7
+ # so it runs even if a side-call below fails or times out under `set -e`. The one
8
+ # exception is the reply-obligation block path, which deliberately keeps the agent
9
+ # busy to answer — it opts out via the flag before exiting.
10
+ _relay_clear_idle_on_exit=1
11
+ trap '[ "${_relay_clear_idle_on_exit:-0}" = "1" ] && relay_post_status_clearing_subagents idle' EXIT
4
12
 
5
13
  payload="$(cat || true)"
6
14
  stop_hook_active="$(relay_json_bool_field stop_hook_active "$payload")"
@@ -8,12 +16,14 @@ if [ "$stop_hook_active" != "true" ]; then
8
16
  last_assistant_msg="$(echo "$payload" | jq -c '.last_assistant_message // empty' 2>/dev/null || true)"
9
17
  relay_post_session_turn "$(relay_json_string_field transcript_path "$payload")" "$last_assistant_msg"
10
18
  # `|| true`: under `set -e`, a non-zero from the obligation check must never abort
11
- # the hook before the idle-clear below — clearing the turn is the critical path (#199).
19
+ # the hook before the idle-clear — clearing the turn is the critical path (#199).
12
20
  stop_decision="$(relay_pending_reply_stop_decision || true)"
13
21
  if [ -n "$stop_decision" ]; then
22
+ _relay_clear_idle_on_exit=0
14
23
  printf '%s\n' "$stop_decision"
15
24
  exit 0
16
25
  fi
17
26
  fi
18
27
 
19
- relay_post_status_clearing_subagents idle
28
+ # Normal turn end → the EXIT trap posts idle (always, even on an unexpected abort above).
29
+ exit 0
@@ -4,6 +4,7 @@ set -euo pipefail
4
4
  PLUGIN_ROOT="${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}"
5
5
  # shellcheck source=/dev/null
6
6
  source "${PLUGIN_ROOT}/hooks/relay-status.sh"
7
+ relay_install_hook_guard subagent-start
7
8
 
8
9
  payload="$(cat || true)"
9
10
  agent_id="$(relay_json_string_field agent_id "$payload")"
@@ -4,6 +4,7 @@ set -euo pipefail
4
4
  PLUGIN_ROOT="${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}"
5
5
  # shellcheck source=/dev/null
6
6
  source "${PLUGIN_ROOT}/hooks/relay-status.sh"
7
+ relay_install_hook_guard subagent-stop
7
8
 
8
9
  payload="$(cat || true)"
9
10
  agent_id="$(relay_json_string_field agent_id "$payload")"
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env bash
2
2
  set -euo pipefail
3
3
  source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
4
+ relay_install_hook_guard user-prompt-submit
4
5
  payload="$(cat || true)"
5
6
  relay_post_status busy
6
7
  # Mirror a terminal/TUI-typed prompt into the dashboard chat and start reasoning
@@ -4,6 +4,7 @@ import { basename, join, resolve } from "node:path";
4
4
  import type { ContextState, Message } from "agent-relay-sdk";
5
5
  import { profileAllowsRelayFeature, providerMessageText, RELAY_CONTEXT, type ManagedProcess, type ProviderAdapter, type ProviderConfig, type ProviderPermissionDecisionInput, type ProviderSessionEvent, type ProviderStatusUpdate, type RunnerSpawnConfig, type SpawnArgs, type TerminalAttachSpec } from "../adapter";
6
6
  import { workspaceDepsNoteFromEnv } from "../relay-instructions";
7
+ import { logger } from "../logger";
7
8
 
8
9
  /** Relay context prepended to a Codex agent's first turn: the standard relay
9
10
  * blurb plus, when running in an isolated workspace, the deps caveat (#159). */
@@ -199,7 +200,7 @@ export class CodexAdapter implements ProviderAdapter {
199
200
  input = codexRelayContextBlock() + "\n\n" + input;
200
201
  process.meta = { ...(process.meta ?? {}), relayContextSent: true };
201
202
  }
202
- console.error(`[agent-relay] starting Codex initial prompt in thread ${threadId}`);
203
+ logger.info("codex", `starting Codex initial prompt in thread ${threadId}`);
203
204
  const client = process.meta?.client as CodexAppClient;
204
205
  await client.turnStart(threadId, input);
205
206
  }
@@ -211,7 +212,7 @@ export class CodexAdapter implements ProviderAdapter {
211
212
  text = codexRelayContextBlock() + "\n\n" + text;
212
213
  process.meta = { ...(process.meta ?? {}), relayContextSent: true };
213
214
  }
214
- console.error(codexDeliveryNotice(messages, threadId));
215
+ logger.info("codex", codexDeliveryNotice(messages, threadId));
215
216
  const client = process.meta?.client as CodexAppClient;
216
217
  await client.turnStart(threadId, text);
217
218
  }
@@ -1,6 +1,16 @@
1
1
  import type { Server, ServerWebSocket } from "bun";
2
2
  import type { Message, ReplyObligation } from "agent-relay-sdk";
3
3
  import type { ProviderPermissionDecisionInput, ProviderStatusEvent, SemanticStatus, TerminalAttachSpec } from "./adapter";
4
+ import { logger, parseLogLevel, LOG_LEVELS } from "./logger";
5
+
6
+ // A hook that failed in a way it could not handle itself reports here so the
7
+ // failure is never silent (#198 item 5). Phase 1 logs it FATAL to the per-agent
8
+ // log; Phase 2 (#196) will additionally route it through the runner outbox to the
9
+ // server.
10
+ export interface HookFatalReport {
11
+ hook: string;
12
+ error: string;
13
+ }
4
14
 
5
15
  interface MonitorSocketData {
6
16
  kind: "monitor";
@@ -33,6 +43,10 @@ interface ControlServerOptions {
33
43
  // transcript. transcriptPath is optional — the runner falls back to the last
34
44
  // path it saw during the session.
35
45
  onSessionEnd?(input: { reason?: string; transcriptPath?: string }): Promise<void>;
46
+ // Phase 1 observability (#198): a hook reporting an unhandled failure. The
47
+ // control server already logs it FATAL; this is the seam for Phase 2 to also
48
+ // surface it to the server via the runner outbox.
49
+ onHookFatal?(report: HookFatalReport): void;
36
50
  }
37
51
 
38
52
  export function startControlServer(options: ControlServerOptions): ControlServer {
@@ -81,6 +95,15 @@ export function startControlServer(options: ControlServerOptions): ControlServer
81
95
  if (url.pathname === "/session-end" && req.method === "POST") {
82
96
  return handleSessionEnd(req, options);
83
97
  }
98
+ if (url.pathname === "/log-level" && req.method === "GET") {
99
+ return Response.json({ level: logger.getLevel(), levels: LOG_LEVELS });
100
+ }
101
+ if (url.pathname === "/log-level" && req.method === "POST") {
102
+ return handleLogLevel(req);
103
+ }
104
+ if (url.pathname === "/hook-fatal" && req.method === "POST") {
105
+ return handleHookFatal(req, options);
106
+ }
84
107
  if (url.pathname === "/monitor") {
85
108
  const upgraded = srv.upgrade(req, { data: { kind: "monitor" } });
86
109
  return upgraded ? undefined : new Response("WebSocket upgrade failed", { status: 400 });
@@ -361,6 +384,26 @@ async function handleSessionEnd(req: Request, options: ControlServerOptions): Pr
361
384
  return Response.json({ ok: true });
362
385
  }
363
386
 
387
+ async function handleLogLevel(req: Request): Promise<Response> {
388
+ const body = await req.json().catch(() => null);
389
+ const level = parseLogLevel(isRecord(body) && typeof body.level === "string" ? body.level : undefined);
390
+ if (!level) return Response.json({ error: `level must be one of: ${LOG_LEVELS.join(", ")}` }, { status: 400 });
391
+ const previous = logger.getLevel();
392
+ logger.setLevel(level);
393
+ logger.info("logger", `log level set to ${level} (was ${previous}) via control port`);
394
+ return Response.json({ ok: true, level, previous });
395
+ }
396
+
397
+ async function handleHookFatal(req: Request, options: ControlServerOptions): Promise<Response> {
398
+ const body = await req.json().catch(() => null);
399
+ const hook = isRecord(body) && typeof body.hook === "string" && body.hook.trim() ? body.hook.trim() : "unknown";
400
+ const error = isRecord(body) && typeof body.error === "string" ? body.error : "(no detail)";
401
+ // Never silent: a hook that couldn't handle its own failure lands here as FATAL.
402
+ logger.fatal(`hook:${hook}`, error);
403
+ try { options.onHookFatal?.({ hook, error }); } catch { /* reporting must never throw back at the hook */ }
404
+ return Response.json({ ok: true });
405
+ }
406
+
364
407
  async function handleStatus(req: Request, options: ControlServerOptions): Promise<Response> {
365
408
  const body = await req.json().catch(() => null) as Partial<ProviderStatusEvent> | null;
366
409
  const status = body?.status;
package/src/logger.ts ADDED
@@ -0,0 +1,97 @@
1
+ import { appendFileSync, mkdirSync } from "node:fs";
2
+ import { join } from "node:path";
3
+
4
+ // Phase 1 observability (#198): one leveled, runtime-togglable logger for the
5
+ // Runner and the provider adapters below it. Replaces the ad-hoc scatter of
6
+ // `console.error`, `logRunnerDiagnostic` (-> runner-<agent>.log) and
7
+ // `sessionLog`/`sessionDebug` (-> session-mirror-<agent>.log) with a single
8
+ // switch and a single greppable, ANSI-free sink.
9
+ //
10
+ // Sink: the per-agent `session-mirror-<agent>.log` — the file the orchestrator
11
+ // already surfaces to the dashboard log-viewer (captureSessionMirror). One place
12
+ // to look when anything in the Runner misbehaves.
13
+ //
14
+ // Level is read once from AGENT_RELAY_LOG_LEVEL (default "info") and can be
15
+ // flipped at runtime via the control port (no restart) — so a phase refactor can
16
+ // be watched at debug without bouncing the agent.
17
+
18
+ export type LogLevel = "debug" | "info" | "warn" | "error" | "fatal";
19
+
20
+ const ORDER: Record<LogLevel, number> = { debug: 10, info: 20, warn: 30, error: 40, fatal: 50 };
21
+ export const LOG_LEVELS = Object.keys(ORDER) as LogLevel[];
22
+
23
+ export function parseLogLevel(value: string | undefined | null): LogLevel | undefined {
24
+ if (!value) return undefined;
25
+ const v = value.trim().toLowerCase();
26
+ return (LOG_LEVELS as string[]).includes(v) ? (v as LogLevel) : undefined;
27
+ }
28
+
29
+ // Matches the runner's safeLogName and the orchestrator's safeMirrorLogName so all
30
+ // three resolve the identical filename for a given agent id.
31
+ function safeLogName(value: string): string {
32
+ return value.replace(/[^a-zA-Z0-9_.-]+/g, "_").slice(0, 180);
33
+ }
34
+
35
+ export interface LoggerConfig {
36
+ agentId?: string;
37
+ level?: LogLevel;
38
+ headless?: boolean;
39
+ logDir?: string;
40
+ }
41
+
42
+ export class Logger {
43
+ private level: LogLevel;
44
+ private agentId: string;
45
+ private headless: boolean;
46
+ private logDir: string;
47
+
48
+ constructor(config: LoggerConfig = {}) {
49
+ this.level = config.level ?? parseLogLevel(process.env.AGENT_RELAY_LOG_LEVEL) ?? "info";
50
+ this.agentId = config.agentId ?? "runner";
51
+ this.headless = config.headless ?? false;
52
+ this.logDir = config.logDir ?? join(process.env.HOME || ".", ".agent-relay", "logs");
53
+ }
54
+
55
+ // Bind the logger to a concrete agent once the runner knows its id. Preserves a
56
+ // level already set via env/runtime unless an explicit level is passed.
57
+ configure(config: LoggerConfig): void {
58
+ if (config.agentId !== undefined) this.agentId = config.agentId;
59
+ if (config.headless !== undefined) this.headless = config.headless;
60
+ if (config.logDir !== undefined) this.logDir = config.logDir;
61
+ if (config.level !== undefined) this.level = config.level;
62
+ }
63
+
64
+ setLevel(level: LogLevel): void { this.level = level; }
65
+ getLevel(): LogLevel { return this.level; }
66
+ isEnabled(level: LogLevel): boolean { return ORDER[level] >= ORDER[this.level]; }
67
+
68
+ debug(component: string, message: string): void { this.log("debug", component, message); }
69
+ info(component: string, message: string): void { this.log("info", component, message); }
70
+ warn(component: string, message: string): void { this.log("warn", component, message); }
71
+ error(component: string, message: string): void { this.log("error", component, message); }
72
+ fatal(component: string, message: string): void { this.log("fatal", component, message); }
73
+
74
+ log(level: LogLevel, component: string, message: string): void {
75
+ if (!this.isEnabled(level)) return;
76
+ const line = `[${new Date().toISOString()}] ${level.toUpperCase().padEnd(5)} [${component}] ${oneLine(message)}\n`;
77
+ try {
78
+ mkdirSync(this.logDir, { recursive: true });
79
+ appendFileSync(join(this.logDir, `session-mirror-${safeLogName(this.agentId)}.log`), line);
80
+ } catch {
81
+ // Best-effort. If the per-agent file can't be written, surface error/fatal to
82
+ // stderr so it is not lost entirely (headless: lands in the orchestrator log).
83
+ if (ORDER[level] >= ORDER.error) { try { console.error(line.trimEnd()); } catch { /* give up */ } }
84
+ }
85
+ }
86
+ }
87
+
88
+ // Newlines would split one record across several log lines and break greppability;
89
+ // collapse them so a multi-line message stays one line.
90
+ function oneLine(message: string): string {
91
+ return message.replace(/\r?\n/g, " ⏎ ");
92
+ }
93
+
94
+ // Process-global logger. A runner process serves exactly one agent, so a singleton
95
+ // is the right scope; the runner calls configure() once it knows its id, and
96
+ // adapters import this instance directly (no constructor threading).
97
+ export const logger = new Logger();
package/src/runner.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { hostname } from "node:os";
2
- import { appendFileSync, closeSync, mkdirSync, openSync, readSync, statSync, writeFileSync } from "node:fs";
2
+ import { closeSync, mkdirSync, openSync, readSync, statSync, writeFileSync } from "node:fs";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { dirname, join } from "node:path";
5
5
  import type { AgentProfile, ContextState, Message, MessageSessionMeta, ProviderCapabilities, TaskStatusInput, WorkspaceMetadata } from "agent-relay-sdk";
@@ -13,6 +13,7 @@ import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssi
13
13
  import { agentProfileProjectionReport } from "./profile-projection";
14
14
  import { profileUsesHostProviderGlobals } from "./profile-home";
15
15
  import { runtimeMetadata } from "./version";
16
+ import { logger, parseLogLevel } from "./logger";
16
17
  import { ensureSessionScratch, reapSessionScratch, sweepStaleSessions, type SessionScratchLayout } from "./session-scratch";
17
18
 
18
19
  interface RunnerOptions {
@@ -177,6 +178,14 @@ export class AgentRunner {
177
178
 
178
179
  constructor(private readonly options: RunnerOptions) {
179
180
  this.agentId = options.agentId ?? options.runnerId;
181
+ // Bind the process-global logger to this agent. AGENT_RELAY_SESSION_DEBUG=1 is
182
+ // kept as a back-compat alias for the verbose probe/emit lines, now expressed
183
+ // as log level "debug" (AGENT_RELAY_LOG_LEVEL still wins when both are set).
184
+ logger.configure({
185
+ agentId: this.agentId,
186
+ headless: options.headless,
187
+ ...(this.sessionDebugVerbose && !parseLogLevel(process.env.AGENT_RELAY_LOG_LEVEL) ? { level: "debug" as const } : {}),
188
+ });
180
189
  this.currentToken = options.token;
181
190
  this.currentTokenJti = options.tokenJti;
182
191
  this.currentTokenProfileId = options.tokenProfileId;
@@ -387,7 +396,7 @@ export class AgentRunner {
387
396
  startedAt: this.options.startedAt,
388
397
  }, null, 2) + "\n", { mode: 0o600 });
389
398
  } catch (error) {
390
- console.error(`[runner] failed to write runner info file: ${error}`);
399
+ logger.error("runner", `failed to write runner info file: ${error}`);
391
400
  }
392
401
  }
393
402
 
@@ -403,7 +412,7 @@ export class AgentRunner {
403
412
  const messages = await this.http.pollMessages({ for: this.agentId, unread: true, limit: 100 });
404
413
  for (const message of messages) this.enqueueMessage(message);
405
414
  } catch (error) {
406
- console.error(`[runner] inbox bootstrap failed: ${error}`);
415
+ logger.error("runner", `inbox bootstrap failed: ${error}`);
407
416
  }
408
417
  }
409
418
 
@@ -413,7 +422,7 @@ export class AgentRunner {
413
422
  try {
414
423
  await this.options.adapter.deliverInitialPrompt(this.process, prompt);
415
424
  } catch (error) {
416
- console.error(`[runner] initial prompt delivery failed: ${error}`);
425
+ logger.error("runner", `initial prompt delivery failed: ${error}`);
417
426
  }
418
427
  }
419
428
 
@@ -450,7 +459,7 @@ export class AgentRunner {
450
459
  status: "in_progress",
451
460
  agentId: this.agentId,
452
461
  metadata: { messageId: message.id, completedBy: "runner" },
453
- }).catch((error) => console.error(`[runner] task ${taskId} in_progress update failed: ${error}`));
462
+ }).catch((error) => logger.error("task", `task ${taskId} in_progress update failed: ${error}`));
454
463
  // Runner owns claim + status here; drop the server's self-claim instruction
455
464
  // so the agent doesn't improvise a stray claim send (see stripRunnerClaimedGuidance).
456
465
  toDeliver = { ...message, body: stripRunnerClaimedGuidance(message.body) };
@@ -468,7 +477,7 @@ export class AgentRunner {
468
477
  try {
469
478
  const prepared = await messagesWithCachedAttachments(deliverable, this.http, {
470
479
  agentId: this.agentId,
471
- onError: (message) => console.error(`[runner] ${message}`),
480
+ onError: (message) => logger.error("runner", message),
472
481
  });
473
482
  await this.options.adapter.deliver(this.process, prepared);
474
483
  for (const message of deliverable) {
@@ -477,7 +486,7 @@ export class AgentRunner {
477
486
  }
478
487
  } catch (error) {
479
488
  failed = true;
480
- if (shouldLogDeliveryFailure(error)) console.error(`[runner] message delivery failed: ${error}`);
489
+ if (shouldLogDeliveryFailure(error)) logger.warn("delivery", `message delivery failed: ${error}`);
481
490
  for (const message of deliverable) {
482
491
  this.clearActiveClaim(message);
483
492
  this.pendingMessages.set(message.id, message);
@@ -545,7 +554,7 @@ export class AgentRunner {
545
554
  await this.http.deleteAgent(this.agentId).catch(() => {});
546
555
  if (this.options.exitProcessOnShutdown !== false) {
547
556
  setTimeout(() => void this.stop().catch((error) => {
548
- console.error(`[runner] stop after command failed: ${error}`);
557
+ logger.error("lifecycle", `stop after command failed: ${error}`);
549
558
  }).finally(() => process.exit(0)), 10);
550
559
  }
551
560
  } else if (!this.stopped) {
@@ -680,7 +689,7 @@ export class AgentRunner {
680
689
 
681
690
  if (this.shouldStopUnexpectedProviderExit(diagnostics)) {
682
691
  const hasResumeId = typeof diagnostics.claudeResumeId === "string" && diagnostics.claudeResumeId.length > 0;
683
- console.warn(`[runner] ${this.options.provider} exited; leaving agent offline for manual recovery`);
692
+ logger.warn("lifecycle", `${this.options.provider} exited; leaving agent offline for manual recovery`);
684
693
  this.publishRunnerTimelineEvent({
685
694
  status: "provider.restart_decision",
686
695
  id: `provider-restart-decision-${this.providerSessionId}-${now}`,
@@ -708,7 +717,7 @@ export class AgentRunner {
708
717
  }
709
718
 
710
719
  if (runtimeMs < RAPID_EXIT_MS && recent.length > MAX_RAPID_UNEXPECTED_EXITS) {
711
- console.error(`[runner] provider session exited ${recent.length} times within ${Math.round(UNEXPECTED_EXIT_WINDOW_MS / 1000)}s; giving up`);
720
+ logger.error("lifecycle", `provider session exited ${recent.length} times within ${Math.round(UNEXPECTED_EXIT_WINDOW_MS / 1000)}s; giving up`);
712
721
  this.publishRunnerTimelineEvent({
713
722
  status: "provider.restart_decision",
714
723
  id: `provider-restart-decision-${this.providerSessionId}-${now}`,
@@ -732,7 +741,7 @@ export class AgentRunner {
732
741
  }
733
742
 
734
743
  const delayMs = Math.min(10_000, Math.max(500, 500 * recent.length));
735
- console.warn(`[runner] provider session exited unexpectedly after ${Math.round(runtimeMs / 1000)}s; restarting in ${delayMs}ms`);
744
+ logger.warn("lifecycle", `provider session exited unexpectedly after ${Math.round(runtimeMs / 1000)}s; restarting in ${delayMs}ms`);
736
745
  this.publishRunnerTimelineEvent({
737
746
  status: "provider.restart_decision",
738
747
  id: `provider-restart-decision-${this.providerSessionId}-${now}`,
@@ -757,7 +766,7 @@ export class AgentRunner {
757
766
  this.publishStatus();
758
767
  this.scheduleDrain();
759
768
  } catch (error) {
760
- console.error(`[runner] provider restart after unexpected exit failed: ${error}`);
769
+ logger.error("lifecycle", `provider restart after unexpected exit failed: ${error}`);
761
770
  this.setProviderStatus("error");
762
771
  this.options.onProviderExit?.(1);
763
772
  }
@@ -832,10 +841,10 @@ export class AgentRunner {
832
841
  private handleBusError(code: string, message: string): void {
833
842
  const action = runnerBusErrorAction(code, this.stopped);
834
843
  if (action === "ignore") return;
835
- console.error(`[runner] bus error ${code}: ${message}`);
844
+ logger.error("bus", `bus error ${code}: ${message}`);
836
845
  if (action === "stop") {
837
846
  void this.stop().catch((error) => {
838
- console.error(`[runner] stop after bus error failed: ${error}`);
847
+ logger.error("bus", `stop after bus error failed: ${error}`);
839
848
  }).finally(() => process.exit(0));
840
849
  }
841
850
  }
@@ -1363,36 +1372,24 @@ export class AgentRunner {
1363
1372
  this.logRunnerDiagnostic(`[runner] HTTP liveness update failed: ${suffix}`);
1364
1373
  }
1365
1374
 
1375
+ // Runner operational diagnostics (HTTP liveness, token renewal failures). Routed
1376
+ // through the leveled logger at warn — see logger.ts. Kept as a thin wrapper so
1377
+ // the existing call sites and their `[runner]` framing stay put.
1366
1378
  private logRunnerDiagnostic(message: string): void {
1367
- if (this.options.headless) {
1368
- console.error(message);
1369
- return;
1370
- }
1371
- try {
1372
- const logDir = join(process.env.HOME || ".", ".agent-relay", "logs");
1373
- mkdirSync(logDir, { recursive: true });
1374
- appendFileSync(join(logDir, `runner-${safeLogName(this.agentId)}.log`), `[${new Date().toISOString()}] ${message}\n`);
1375
- } catch {
1376
- // Do not write runner diagnostics into an interactive provider TUI.
1377
- }
1379
+ logger.warn("runner", message.replace(/^\[runner\]\s*/, ""));
1378
1380
  }
1379
1381
 
1380
- // Session-mirror diagnostics → a dedicated, ANSI-free, greppable log per agent
1381
- // (NOT the provider's TUI stdout, which is unreadable). This is the single place
1382
- // to look when chat/terminal sync misbehaves. Key transitions always log here.
1382
+ // Session-mirror diagnostics → the leveled logger (component "mirror"), written
1383
+ // to the dashboard-surfaced session-mirror-<agent>.log. Key transitions log at
1384
+ // info; the single place to look when chat/terminal sync misbehaves.
1383
1385
  private sessionLog(message: string): void {
1384
- try {
1385
- const logDir = join(process.env.HOME || ".", ".agent-relay", "logs");
1386
- mkdirSync(logDir, { recursive: true });
1387
- appendFileSync(join(logDir, `session-mirror-${safeLogName(this.agentId)}.log`), `[${new Date().toISOString()}] ${message}\n`);
1388
- } catch {
1389
- // best-effort
1390
- }
1386
+ logger.info("mirror", message);
1391
1387
  }
1392
1388
 
1393
- // Verbose, high-frequency lines (per-probe, per-emit) — only when AGENT_RELAY_SESSION_DEBUG=1.
1389
+ // Verbose, high-frequency lines (per-probe, per-emit) — surfaced only at log
1390
+ // level "debug" (AGENT_RELAY_LOG_LEVEL=debug, or flip live via /log-level).
1394
1391
  private sessionDebug(message: string): void {
1395
- if (this.sessionDebugVerbose) this.sessionLog(message);
1392
+ logger.debug("mirror", message);
1396
1393
  }
1397
1394
 
1398
1395
  private ensureScratch(): void {
@@ -1657,7 +1654,7 @@ export class AgentRunner {
1657
1654
  })
1658
1655
  .then(() => true)
1659
1656
  .catch((error) => {
1660
- console.error(`[runner] task ${claim.taskId} completion update failed: ${error}`);
1657
+ logger.error("task", `task ${claim.taskId} completion update failed: ${error}`);
1661
1658
  return false;
1662
1659
  });
1663
1660
  if (!ok) continue;
@@ -1962,10 +1959,6 @@ function httpErrorKey(error: unknown): string {
1962
1959
  return String(error);
1963
1960
  }
1964
1961
 
1965
- function safeLogName(value: string): string {
1966
- return value.replace(/[^a-zA-Z0-9_.-]+/g, "_").slice(0, 180);
1967
- }
1968
-
1969
1962
  function isContextState(value: unknown): value is ContextState {
1970
1963
  if (!value || typeof value !== "object" || Array.isArray(value)) return false;
1971
1964
  const state = value as Record<string, unknown>;