agent-relay-runner 0.15.1 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/plugins/claude/.claude-plugin/plugin.json +1 -1
- package/plugins/claude/hooks/permission-request.sh +2 -0
- package/plugins/claude/hooks/post-compact.sh +1 -0
- package/plugins/claude/hooks/pre-compact.sh +1 -0
- package/plugins/claude/hooks/relay-status.sh +24 -0
- package/plugins/claude/hooks/session-end.sh +1 -0
- package/plugins/claude/hooks/session-start.sh +1 -0
- package/plugins/claude/hooks/stop-failure.sh +1 -0
- package/plugins/claude/hooks/stop.sh +12 -2
- package/plugins/claude/hooks/subagent-start.sh +1 -0
- package/plugins/claude/hooks/subagent-stop.sh +1 -0
- package/plugins/claude/hooks/user-prompt-submit.sh +1 -0
- package/src/adapters/codex.ts +3 -2
- package/src/control-server.ts +43 -0
- package/src/logger.ts +97 -0
- package/src/runner.ts +35 -42
package/package.json
CHANGED
|
@@ -92,6 +92,30 @@ relay_post_session_end() {
|
|
|
92
92
|
-d "$body" >/dev/null 2>&1 || true
|
|
93
93
|
}
|
|
94
94
|
|
|
95
|
+
# --- Hook FATAL surfacing (#198) -------------------------------------------
|
|
96
|
+
# A hook that dies unexpectedly must never be silent. relay_install_hook_guard
|
|
97
|
+
# arms an ERR trap that reports the failure FATAL to the runner control port,
|
|
98
|
+
# which logs it to the dashboard-surfaced per-agent log. Best-effort and bounded
|
|
99
|
+
# (--max-time 2) so the report itself can never blow the hook's timeout budget.
|
|
100
|
+
relay_hook_fatal_report() {
|
|
101
|
+
local hook="${1:-unknown}" detail="${2:-}"
|
|
102
|
+
local port="${AGENT_RELAY_RUNNER_PORT:-}"
|
|
103
|
+
[ -z "$port" ] && return 0
|
|
104
|
+
local body="{\"hook\":\"$(relay_json_escape "$hook")\",\"error\":\"$(relay_json_escape "$detail")\"}"
|
|
105
|
+
curl -fsS --max-time 2 -X POST "http://127.0.0.1:${port}/hook-fatal" \
|
|
106
|
+
-H 'Content-Type: application/json' \
|
|
107
|
+
-d "$body" >/dev/null 2>&1 || true
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
relay_install_hook_guard() {
|
|
111
|
+
RELAY_HOOK_NAME="${1:-unknown}"
|
|
112
|
+
# Fires on any unhandled failure under `set -e`/`set -u`/pipefail in the hook's
|
|
113
|
+
# main body, just before the shell exits. Reports, then lets the exit proceed.
|
|
114
|
+
# (ERR is not inherited into functions without `set -E`; this covers the top-level
|
|
115
|
+
# flow, which is where a silent death actually wedges a turn.)
|
|
116
|
+
trap 'relay_hook_err_rc=$?; relay_hook_fatal_report "${RELAY_HOOK_NAME:-unknown}" "exit ${relay_hook_err_rc}: ${BASH_COMMAND}"' ERR
|
|
117
|
+
}
|
|
118
|
+
|
|
95
119
|
relay_pending_reply_stop_decision() {
|
|
96
120
|
local port="${AGENT_RELAY_RUNNER_PORT:-}"
|
|
97
121
|
[ -z "$port" ] && return 0
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
2
|
set -euo pipefail
|
|
3
3
|
source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
|
|
4
|
+
relay_install_hook_guard session-end
|
|
4
5
|
|
|
5
6
|
payload="$(cat || true)"
|
|
6
7
|
reason="$(relay_json_string_field reason "$payload")"
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
2
|
set -euo pipefail
|
|
3
3
|
source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
|
|
4
|
+
relay_install_hook_guard session-start
|
|
4
5
|
|
|
5
6
|
payload="$(cat || true)"
|
|
6
7
|
source_kind="$(relay_json_string_field source "$payload")"
|
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
2
|
set -euo pipefail
|
|
3
3
|
source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
|
|
4
|
+
relay_install_hook_guard stop
|
|
5
|
+
|
|
6
|
+
# Clearing the turn's busy state is the critical path (#199). Register it on EXIT
|
|
7
|
+
# so it runs even if a side-call below fails or times out under `set -e`. The one
|
|
8
|
+
# exception is the reply-obligation block path, which deliberately keeps the agent
|
|
9
|
+
# busy to answer — it opts out via the flag before exiting.
|
|
10
|
+
_relay_clear_idle_on_exit=1
|
|
11
|
+
trap '[ "${_relay_clear_idle_on_exit:-0}" = "1" ] && relay_post_status_clearing_subagents idle' EXIT
|
|
4
12
|
|
|
5
13
|
payload="$(cat || true)"
|
|
6
14
|
stop_hook_active="$(relay_json_bool_field stop_hook_active "$payload")"
|
|
@@ -8,12 +16,14 @@ if [ "$stop_hook_active" != "true" ]; then
|
|
|
8
16
|
last_assistant_msg="$(echo "$payload" | jq -c '.last_assistant_message // empty' 2>/dev/null || true)"
|
|
9
17
|
relay_post_session_turn "$(relay_json_string_field transcript_path "$payload")" "$last_assistant_msg"
|
|
10
18
|
# `|| true`: under `set -e`, a non-zero from the obligation check must never abort
|
|
11
|
-
# the hook before the idle-clear
|
|
19
|
+
# the hook before the idle-clear — clearing the turn is the critical path (#199).
|
|
12
20
|
stop_decision="$(relay_pending_reply_stop_decision || true)"
|
|
13
21
|
if [ -n "$stop_decision" ]; then
|
|
22
|
+
_relay_clear_idle_on_exit=0
|
|
14
23
|
printf '%s\n' "$stop_decision"
|
|
15
24
|
exit 0
|
|
16
25
|
fi
|
|
17
26
|
fi
|
|
18
27
|
|
|
19
|
-
|
|
28
|
+
# Normal turn end → the EXIT trap posts idle (always, even on an unexpected abort above).
|
|
29
|
+
exit 0
|
|
@@ -4,6 +4,7 @@ set -euo pipefail
|
|
|
4
4
|
PLUGIN_ROOT="${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}"
|
|
5
5
|
# shellcheck source=/dev/null
|
|
6
6
|
source "${PLUGIN_ROOT}/hooks/relay-status.sh"
|
|
7
|
+
relay_install_hook_guard subagent-start
|
|
7
8
|
|
|
8
9
|
payload="$(cat || true)"
|
|
9
10
|
agent_id="$(relay_json_string_field agent_id "$payload")"
|
|
@@ -4,6 +4,7 @@ set -euo pipefail
|
|
|
4
4
|
PLUGIN_ROOT="${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}"
|
|
5
5
|
# shellcheck source=/dev/null
|
|
6
6
|
source "${PLUGIN_ROOT}/hooks/relay-status.sh"
|
|
7
|
+
relay_install_hook_guard subagent-stop
|
|
7
8
|
|
|
8
9
|
payload="$(cat || true)"
|
|
9
10
|
agent_id="$(relay_json_string_field agent_id "$payload")"
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
2
|
set -euo pipefail
|
|
3
3
|
source "${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}/hooks/relay-status.sh"
|
|
4
|
+
relay_install_hook_guard user-prompt-submit
|
|
4
5
|
payload="$(cat || true)"
|
|
5
6
|
relay_post_status busy
|
|
6
7
|
# Mirror a terminal/TUI-typed prompt into the dashboard chat and start reasoning
|
package/src/adapters/codex.ts
CHANGED
|
@@ -4,6 +4,7 @@ import { basename, join, resolve } from "node:path";
|
|
|
4
4
|
import type { ContextState, Message } from "agent-relay-sdk";
|
|
5
5
|
import { profileAllowsRelayFeature, providerMessageText, RELAY_CONTEXT, type ManagedProcess, type ProviderAdapter, type ProviderConfig, type ProviderPermissionDecisionInput, type ProviderSessionEvent, type ProviderStatusUpdate, type RunnerSpawnConfig, type SpawnArgs, type TerminalAttachSpec } from "../adapter";
|
|
6
6
|
import { workspaceDepsNoteFromEnv } from "../relay-instructions";
|
|
7
|
+
import { logger } from "../logger";
|
|
7
8
|
|
|
8
9
|
/** Relay context prepended to a Codex agent's first turn: the standard relay
|
|
9
10
|
* blurb plus, when running in an isolated workspace, the deps caveat (#159). */
|
|
@@ -199,7 +200,7 @@ export class CodexAdapter implements ProviderAdapter {
|
|
|
199
200
|
input = codexRelayContextBlock() + "\n\n" + input;
|
|
200
201
|
process.meta = { ...(process.meta ?? {}), relayContextSent: true };
|
|
201
202
|
}
|
|
202
|
-
|
|
203
|
+
logger.info("codex", `starting Codex initial prompt in thread ${threadId}`);
|
|
203
204
|
const client = process.meta?.client as CodexAppClient;
|
|
204
205
|
await client.turnStart(threadId, input);
|
|
205
206
|
}
|
|
@@ -211,7 +212,7 @@ export class CodexAdapter implements ProviderAdapter {
|
|
|
211
212
|
text = codexRelayContextBlock() + "\n\n" + text;
|
|
212
213
|
process.meta = { ...(process.meta ?? {}), relayContextSent: true };
|
|
213
214
|
}
|
|
214
|
-
|
|
215
|
+
logger.info("codex", codexDeliveryNotice(messages, threadId));
|
|
215
216
|
const client = process.meta?.client as CodexAppClient;
|
|
216
217
|
await client.turnStart(threadId, text);
|
|
217
218
|
}
|
package/src/control-server.ts
CHANGED
|
@@ -1,6 +1,16 @@
|
|
|
1
1
|
import type { Server, ServerWebSocket } from "bun";
|
|
2
2
|
import type { Message, ReplyObligation } from "agent-relay-sdk";
|
|
3
3
|
import type { ProviderPermissionDecisionInput, ProviderStatusEvent, SemanticStatus, TerminalAttachSpec } from "./adapter";
|
|
4
|
+
import { logger, parseLogLevel, LOG_LEVELS } from "./logger";
|
|
5
|
+
|
|
6
|
+
// A hook that failed in a way it could not handle itself reports here so the
|
|
7
|
+
// failure is never silent (#198 item 5). Phase 1 logs it FATAL to the per-agent
|
|
8
|
+
// log; Phase 2 (#196) will additionally route it through the runner outbox to the
|
|
9
|
+
// server.
|
|
10
|
+
export interface HookFatalReport {
|
|
11
|
+
hook: string;
|
|
12
|
+
error: string;
|
|
13
|
+
}
|
|
4
14
|
|
|
5
15
|
interface MonitorSocketData {
|
|
6
16
|
kind: "monitor";
|
|
@@ -33,6 +43,10 @@ interface ControlServerOptions {
|
|
|
33
43
|
// transcript. transcriptPath is optional — the runner falls back to the last
|
|
34
44
|
// path it saw during the session.
|
|
35
45
|
onSessionEnd?(input: { reason?: string; transcriptPath?: string }): Promise<void>;
|
|
46
|
+
// Phase 1 observability (#198): a hook reporting an unhandled failure. The
|
|
47
|
+
// control server already logs it FATAL; this is the seam for Phase 2 to also
|
|
48
|
+
// surface it to the server via the runner outbox.
|
|
49
|
+
onHookFatal?(report: HookFatalReport): void;
|
|
36
50
|
}
|
|
37
51
|
|
|
38
52
|
export function startControlServer(options: ControlServerOptions): ControlServer {
|
|
@@ -81,6 +95,15 @@ export function startControlServer(options: ControlServerOptions): ControlServer
|
|
|
81
95
|
if (url.pathname === "/session-end" && req.method === "POST") {
|
|
82
96
|
return handleSessionEnd(req, options);
|
|
83
97
|
}
|
|
98
|
+
if (url.pathname === "/log-level" && req.method === "GET") {
|
|
99
|
+
return Response.json({ level: logger.getLevel(), levels: LOG_LEVELS });
|
|
100
|
+
}
|
|
101
|
+
if (url.pathname === "/log-level" && req.method === "POST") {
|
|
102
|
+
return handleLogLevel(req);
|
|
103
|
+
}
|
|
104
|
+
if (url.pathname === "/hook-fatal" && req.method === "POST") {
|
|
105
|
+
return handleHookFatal(req, options);
|
|
106
|
+
}
|
|
84
107
|
if (url.pathname === "/monitor") {
|
|
85
108
|
const upgraded = srv.upgrade(req, { data: { kind: "monitor" } });
|
|
86
109
|
return upgraded ? undefined : new Response("WebSocket upgrade failed", { status: 400 });
|
|
@@ -361,6 +384,26 @@ async function handleSessionEnd(req: Request, options: ControlServerOptions): Pr
|
|
|
361
384
|
return Response.json({ ok: true });
|
|
362
385
|
}
|
|
363
386
|
|
|
387
|
+
async function handleLogLevel(req: Request): Promise<Response> {
|
|
388
|
+
const body = await req.json().catch(() => null);
|
|
389
|
+
const level = parseLogLevel(isRecord(body) && typeof body.level === "string" ? body.level : undefined);
|
|
390
|
+
if (!level) return Response.json({ error: `level must be one of: ${LOG_LEVELS.join(", ")}` }, { status: 400 });
|
|
391
|
+
const previous = logger.getLevel();
|
|
392
|
+
logger.setLevel(level);
|
|
393
|
+
logger.info("logger", `log level set to ${level} (was ${previous}) via control port`);
|
|
394
|
+
return Response.json({ ok: true, level, previous });
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
async function handleHookFatal(req: Request, options: ControlServerOptions): Promise<Response> {
|
|
398
|
+
const body = await req.json().catch(() => null);
|
|
399
|
+
const hook = isRecord(body) && typeof body.hook === "string" && body.hook.trim() ? body.hook.trim() : "unknown";
|
|
400
|
+
const error = isRecord(body) && typeof body.error === "string" ? body.error : "(no detail)";
|
|
401
|
+
// Never silent: a hook that couldn't handle its own failure lands here as FATAL.
|
|
402
|
+
logger.fatal(`hook:${hook}`, error);
|
|
403
|
+
try { options.onHookFatal?.({ hook, error }); } catch { /* reporting must never throw back at the hook */ }
|
|
404
|
+
return Response.json({ ok: true });
|
|
405
|
+
}
|
|
406
|
+
|
|
364
407
|
async function handleStatus(req: Request, options: ControlServerOptions): Promise<Response> {
|
|
365
408
|
const body = await req.json().catch(() => null) as Partial<ProviderStatusEvent> | null;
|
|
366
409
|
const status = body?.status;
|
package/src/logger.ts
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { appendFileSync, mkdirSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
|
|
4
|
+
// Phase 1 observability (#198): one leveled, runtime-togglable logger for the
|
|
5
|
+
// Runner and the provider adapters below it. Replaces the ad-hoc scatter of
|
|
6
|
+
// `console.error`, `logRunnerDiagnostic` (-> runner-<agent>.log) and
|
|
7
|
+
// `sessionLog`/`sessionDebug` (-> session-mirror-<agent>.log) with a single
|
|
8
|
+
// switch and a single greppable, ANSI-free sink.
|
|
9
|
+
//
|
|
10
|
+
// Sink: the per-agent `session-mirror-<agent>.log` — the file the orchestrator
|
|
11
|
+
// already surfaces to the dashboard log-viewer (captureSessionMirror). One place
|
|
12
|
+
// to look when anything in the Runner misbehaves.
|
|
13
|
+
//
|
|
14
|
+
// Level is read once from AGENT_RELAY_LOG_LEVEL (default "info") and can be
|
|
15
|
+
// flipped at runtime via the control port (no restart) — so a phase refactor can
|
|
16
|
+
// be watched at debug without bouncing the agent.
|
|
17
|
+
|
|
18
|
+
export type LogLevel = "debug" | "info" | "warn" | "error" | "fatal";
|
|
19
|
+
|
|
20
|
+
const ORDER: Record<LogLevel, number> = { debug: 10, info: 20, warn: 30, error: 40, fatal: 50 };
|
|
21
|
+
export const LOG_LEVELS = Object.keys(ORDER) as LogLevel[];
|
|
22
|
+
|
|
23
|
+
export function parseLogLevel(value: string | undefined | null): LogLevel | undefined {
|
|
24
|
+
if (!value) return undefined;
|
|
25
|
+
const v = value.trim().toLowerCase();
|
|
26
|
+
return (LOG_LEVELS as string[]).includes(v) ? (v as LogLevel) : undefined;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Matches the runner's safeLogName and the orchestrator's safeMirrorLogName so all
|
|
30
|
+
// three resolve the identical filename for a given agent id.
|
|
31
|
+
function safeLogName(value: string): string {
|
|
32
|
+
return value.replace(/[^a-zA-Z0-9_.-]+/g, "_").slice(0, 180);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface LoggerConfig {
|
|
36
|
+
agentId?: string;
|
|
37
|
+
level?: LogLevel;
|
|
38
|
+
headless?: boolean;
|
|
39
|
+
logDir?: string;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export class Logger {
|
|
43
|
+
private level: LogLevel;
|
|
44
|
+
private agentId: string;
|
|
45
|
+
private headless: boolean;
|
|
46
|
+
private logDir: string;
|
|
47
|
+
|
|
48
|
+
constructor(config: LoggerConfig = {}) {
|
|
49
|
+
this.level = config.level ?? parseLogLevel(process.env.AGENT_RELAY_LOG_LEVEL) ?? "info";
|
|
50
|
+
this.agentId = config.agentId ?? "runner";
|
|
51
|
+
this.headless = config.headless ?? false;
|
|
52
|
+
this.logDir = config.logDir ?? join(process.env.HOME || ".", ".agent-relay", "logs");
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Bind the logger to a concrete agent once the runner knows its id. Preserves a
|
|
56
|
+
// level already set via env/runtime unless an explicit level is passed.
|
|
57
|
+
configure(config: LoggerConfig): void {
|
|
58
|
+
if (config.agentId !== undefined) this.agentId = config.agentId;
|
|
59
|
+
if (config.headless !== undefined) this.headless = config.headless;
|
|
60
|
+
if (config.logDir !== undefined) this.logDir = config.logDir;
|
|
61
|
+
if (config.level !== undefined) this.level = config.level;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
setLevel(level: LogLevel): void { this.level = level; }
|
|
65
|
+
getLevel(): LogLevel { return this.level; }
|
|
66
|
+
isEnabled(level: LogLevel): boolean { return ORDER[level] >= ORDER[this.level]; }
|
|
67
|
+
|
|
68
|
+
debug(component: string, message: string): void { this.log("debug", component, message); }
|
|
69
|
+
info(component: string, message: string): void { this.log("info", component, message); }
|
|
70
|
+
warn(component: string, message: string): void { this.log("warn", component, message); }
|
|
71
|
+
error(component: string, message: string): void { this.log("error", component, message); }
|
|
72
|
+
fatal(component: string, message: string): void { this.log("fatal", component, message); }
|
|
73
|
+
|
|
74
|
+
log(level: LogLevel, component: string, message: string): void {
|
|
75
|
+
if (!this.isEnabled(level)) return;
|
|
76
|
+
const line = `[${new Date().toISOString()}] ${level.toUpperCase().padEnd(5)} [${component}] ${oneLine(message)}\n`;
|
|
77
|
+
try {
|
|
78
|
+
mkdirSync(this.logDir, { recursive: true });
|
|
79
|
+
appendFileSync(join(this.logDir, `session-mirror-${safeLogName(this.agentId)}.log`), line);
|
|
80
|
+
} catch {
|
|
81
|
+
// Best-effort. If the per-agent file can't be written, surface error/fatal to
|
|
82
|
+
// stderr so it is not lost entirely (headless: lands in the orchestrator log).
|
|
83
|
+
if (ORDER[level] >= ORDER.error) { try { console.error(line.trimEnd()); } catch { /* give up */ } }
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Newlines would split one record across several log lines and break greppability;
|
|
89
|
+
// collapse them so a multi-line message stays one line.
|
|
90
|
+
function oneLine(message: string): string {
|
|
91
|
+
return message.replace(/\r?\n/g, " ⏎ ");
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Process-global logger. A runner process serves exactly one agent, so a singleton
|
|
95
|
+
// is the right scope; the runner calls configure() once it knows its id, and
|
|
96
|
+
// adapters import this instance directly (no constructor threading).
|
|
97
|
+
export const logger = new Logger();
|
package/src/runner.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { hostname } from "node:os";
|
|
2
|
-
import {
|
|
2
|
+
import { closeSync, mkdirSync, openSync, readSync, statSync, writeFileSync } from "node:fs";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join } from "node:path";
|
|
5
5
|
import type { AgentProfile, ContextState, Message, MessageSessionMeta, ProviderCapabilities, TaskStatusInput, WorkspaceMetadata } from "agent-relay-sdk";
|
|
@@ -13,6 +13,7 @@ import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssi
|
|
|
13
13
|
import { agentProfileProjectionReport } from "./profile-projection";
|
|
14
14
|
import { profileUsesHostProviderGlobals } from "./profile-home";
|
|
15
15
|
import { runtimeMetadata } from "./version";
|
|
16
|
+
import { logger, parseLogLevel } from "./logger";
|
|
16
17
|
import { ensureSessionScratch, reapSessionScratch, sweepStaleSessions, type SessionScratchLayout } from "./session-scratch";
|
|
17
18
|
|
|
18
19
|
interface RunnerOptions {
|
|
@@ -177,6 +178,14 @@ export class AgentRunner {
|
|
|
177
178
|
|
|
178
179
|
constructor(private readonly options: RunnerOptions) {
|
|
179
180
|
this.agentId = options.agentId ?? options.runnerId;
|
|
181
|
+
// Bind the process-global logger to this agent. AGENT_RELAY_SESSION_DEBUG=1 is
|
|
182
|
+
// kept as a back-compat alias for the verbose probe/emit lines, now expressed
|
|
183
|
+
// as log level "debug" (AGENT_RELAY_LOG_LEVEL still wins when both are set).
|
|
184
|
+
logger.configure({
|
|
185
|
+
agentId: this.agentId,
|
|
186
|
+
headless: options.headless,
|
|
187
|
+
...(this.sessionDebugVerbose && !parseLogLevel(process.env.AGENT_RELAY_LOG_LEVEL) ? { level: "debug" as const } : {}),
|
|
188
|
+
});
|
|
180
189
|
this.currentToken = options.token;
|
|
181
190
|
this.currentTokenJti = options.tokenJti;
|
|
182
191
|
this.currentTokenProfileId = options.tokenProfileId;
|
|
@@ -387,7 +396,7 @@ export class AgentRunner {
|
|
|
387
396
|
startedAt: this.options.startedAt,
|
|
388
397
|
}, null, 2) + "\n", { mode: 0o600 });
|
|
389
398
|
} catch (error) {
|
|
390
|
-
|
|
399
|
+
logger.error("runner", `failed to write runner info file: ${error}`);
|
|
391
400
|
}
|
|
392
401
|
}
|
|
393
402
|
|
|
@@ -403,7 +412,7 @@ export class AgentRunner {
|
|
|
403
412
|
const messages = await this.http.pollMessages({ for: this.agentId, unread: true, limit: 100 });
|
|
404
413
|
for (const message of messages) this.enqueueMessage(message);
|
|
405
414
|
} catch (error) {
|
|
406
|
-
|
|
415
|
+
logger.error("runner", `inbox bootstrap failed: ${error}`);
|
|
407
416
|
}
|
|
408
417
|
}
|
|
409
418
|
|
|
@@ -413,7 +422,7 @@ export class AgentRunner {
|
|
|
413
422
|
try {
|
|
414
423
|
await this.options.adapter.deliverInitialPrompt(this.process, prompt);
|
|
415
424
|
} catch (error) {
|
|
416
|
-
|
|
425
|
+
logger.error("runner", `initial prompt delivery failed: ${error}`);
|
|
417
426
|
}
|
|
418
427
|
}
|
|
419
428
|
|
|
@@ -450,7 +459,7 @@ export class AgentRunner {
|
|
|
450
459
|
status: "in_progress",
|
|
451
460
|
agentId: this.agentId,
|
|
452
461
|
metadata: { messageId: message.id, completedBy: "runner" },
|
|
453
|
-
}).catch((error) =>
|
|
462
|
+
}).catch((error) => logger.error("task", `task ${taskId} in_progress update failed: ${error}`));
|
|
454
463
|
// Runner owns claim + status here; drop the server's self-claim instruction
|
|
455
464
|
// so the agent doesn't improvise a stray claim send (see stripRunnerClaimedGuidance).
|
|
456
465
|
toDeliver = { ...message, body: stripRunnerClaimedGuidance(message.body) };
|
|
@@ -468,7 +477,7 @@ export class AgentRunner {
|
|
|
468
477
|
try {
|
|
469
478
|
const prepared = await messagesWithCachedAttachments(deliverable, this.http, {
|
|
470
479
|
agentId: this.agentId,
|
|
471
|
-
onError: (message) =>
|
|
480
|
+
onError: (message) => logger.error("runner", message),
|
|
472
481
|
});
|
|
473
482
|
await this.options.adapter.deliver(this.process, prepared);
|
|
474
483
|
for (const message of deliverable) {
|
|
@@ -477,7 +486,7 @@ export class AgentRunner {
|
|
|
477
486
|
}
|
|
478
487
|
} catch (error) {
|
|
479
488
|
failed = true;
|
|
480
|
-
if (shouldLogDeliveryFailure(error))
|
|
489
|
+
if (shouldLogDeliveryFailure(error)) logger.warn("delivery", `message delivery failed: ${error}`);
|
|
481
490
|
for (const message of deliverable) {
|
|
482
491
|
this.clearActiveClaim(message);
|
|
483
492
|
this.pendingMessages.set(message.id, message);
|
|
@@ -545,7 +554,7 @@ export class AgentRunner {
|
|
|
545
554
|
await this.http.deleteAgent(this.agentId).catch(() => {});
|
|
546
555
|
if (this.options.exitProcessOnShutdown !== false) {
|
|
547
556
|
setTimeout(() => void this.stop().catch((error) => {
|
|
548
|
-
|
|
557
|
+
logger.error("lifecycle", `stop after command failed: ${error}`);
|
|
549
558
|
}).finally(() => process.exit(0)), 10);
|
|
550
559
|
}
|
|
551
560
|
} else if (!this.stopped) {
|
|
@@ -680,7 +689,7 @@ export class AgentRunner {
|
|
|
680
689
|
|
|
681
690
|
if (this.shouldStopUnexpectedProviderExit(diagnostics)) {
|
|
682
691
|
const hasResumeId = typeof diagnostics.claudeResumeId === "string" && diagnostics.claudeResumeId.length > 0;
|
|
683
|
-
|
|
692
|
+
logger.warn("lifecycle", `${this.options.provider} exited; leaving agent offline for manual recovery`);
|
|
684
693
|
this.publishRunnerTimelineEvent({
|
|
685
694
|
status: "provider.restart_decision",
|
|
686
695
|
id: `provider-restart-decision-${this.providerSessionId}-${now}`,
|
|
@@ -708,7 +717,7 @@ export class AgentRunner {
|
|
|
708
717
|
}
|
|
709
718
|
|
|
710
719
|
if (runtimeMs < RAPID_EXIT_MS && recent.length > MAX_RAPID_UNEXPECTED_EXITS) {
|
|
711
|
-
|
|
720
|
+
logger.error("lifecycle", `provider session exited ${recent.length} times within ${Math.round(UNEXPECTED_EXIT_WINDOW_MS / 1000)}s; giving up`);
|
|
712
721
|
this.publishRunnerTimelineEvent({
|
|
713
722
|
status: "provider.restart_decision",
|
|
714
723
|
id: `provider-restart-decision-${this.providerSessionId}-${now}`,
|
|
@@ -732,7 +741,7 @@ export class AgentRunner {
|
|
|
732
741
|
}
|
|
733
742
|
|
|
734
743
|
const delayMs = Math.min(10_000, Math.max(500, 500 * recent.length));
|
|
735
|
-
|
|
744
|
+
logger.warn("lifecycle", `provider session exited unexpectedly after ${Math.round(runtimeMs / 1000)}s; restarting in ${delayMs}ms`);
|
|
736
745
|
this.publishRunnerTimelineEvent({
|
|
737
746
|
status: "provider.restart_decision",
|
|
738
747
|
id: `provider-restart-decision-${this.providerSessionId}-${now}`,
|
|
@@ -757,7 +766,7 @@ export class AgentRunner {
|
|
|
757
766
|
this.publishStatus();
|
|
758
767
|
this.scheduleDrain();
|
|
759
768
|
} catch (error) {
|
|
760
|
-
|
|
769
|
+
logger.error("lifecycle", `provider restart after unexpected exit failed: ${error}`);
|
|
761
770
|
this.setProviderStatus("error");
|
|
762
771
|
this.options.onProviderExit?.(1);
|
|
763
772
|
}
|
|
@@ -832,10 +841,10 @@ export class AgentRunner {
|
|
|
832
841
|
private handleBusError(code: string, message: string): void {
|
|
833
842
|
const action = runnerBusErrorAction(code, this.stopped);
|
|
834
843
|
if (action === "ignore") return;
|
|
835
|
-
|
|
844
|
+
logger.error("bus", `bus error ${code}: ${message}`);
|
|
836
845
|
if (action === "stop") {
|
|
837
846
|
void this.stop().catch((error) => {
|
|
838
|
-
|
|
847
|
+
logger.error("bus", `stop after bus error failed: ${error}`);
|
|
839
848
|
}).finally(() => process.exit(0));
|
|
840
849
|
}
|
|
841
850
|
}
|
|
@@ -1363,36 +1372,24 @@ export class AgentRunner {
|
|
|
1363
1372
|
this.logRunnerDiagnostic(`[runner] HTTP liveness update failed: ${suffix}`);
|
|
1364
1373
|
}
|
|
1365
1374
|
|
|
1375
|
+
// Runner operational diagnostics (HTTP liveness, token renewal failures). Routed
|
|
1376
|
+
// through the leveled logger at warn — see logger.ts. Kept as a thin wrapper so
|
|
1377
|
+
// the existing call sites and their `[runner]` framing stay put.
|
|
1366
1378
|
private logRunnerDiagnostic(message: string): void {
|
|
1367
|
-
|
|
1368
|
-
console.error(message);
|
|
1369
|
-
return;
|
|
1370
|
-
}
|
|
1371
|
-
try {
|
|
1372
|
-
const logDir = join(process.env.HOME || ".", ".agent-relay", "logs");
|
|
1373
|
-
mkdirSync(logDir, { recursive: true });
|
|
1374
|
-
appendFileSync(join(logDir, `runner-${safeLogName(this.agentId)}.log`), `[${new Date().toISOString()}] ${message}\n`);
|
|
1375
|
-
} catch {
|
|
1376
|
-
// Do not write runner diagnostics into an interactive provider TUI.
|
|
1377
|
-
}
|
|
1379
|
+
logger.warn("runner", message.replace(/^\[runner\]\s*/, ""));
|
|
1378
1380
|
}
|
|
1379
1381
|
|
|
1380
|
-
// Session-mirror diagnostics →
|
|
1381
|
-
//
|
|
1382
|
-
// to look when chat/terminal sync misbehaves.
|
|
1382
|
+
// Session-mirror diagnostics → the leveled logger (component "mirror"), written
|
|
1383
|
+
// to the dashboard-surfaced session-mirror-<agent>.log. Key transitions log at
|
|
1384
|
+
// info; the single place to look when chat/terminal sync misbehaves.
|
|
1383
1385
|
private sessionLog(message: string): void {
|
|
1384
|
-
|
|
1385
|
-
const logDir = join(process.env.HOME || ".", ".agent-relay", "logs");
|
|
1386
|
-
mkdirSync(logDir, { recursive: true });
|
|
1387
|
-
appendFileSync(join(logDir, `session-mirror-${safeLogName(this.agentId)}.log`), `[${new Date().toISOString()}] ${message}\n`);
|
|
1388
|
-
} catch {
|
|
1389
|
-
// best-effort
|
|
1390
|
-
}
|
|
1386
|
+
logger.info("mirror", message);
|
|
1391
1387
|
}
|
|
1392
1388
|
|
|
1393
|
-
// Verbose, high-frequency lines (per-probe, per-emit) — only
|
|
1389
|
+
// Verbose, high-frequency lines (per-probe, per-emit) — surfaced only at log
|
|
1390
|
+
// level "debug" (AGENT_RELAY_LOG_LEVEL=debug, or flip live via /log-level).
|
|
1394
1391
|
private sessionDebug(message: string): void {
|
|
1395
|
-
|
|
1392
|
+
logger.debug("mirror", message);
|
|
1396
1393
|
}
|
|
1397
1394
|
|
|
1398
1395
|
private ensureScratch(): void {
|
|
@@ -1657,7 +1654,7 @@ export class AgentRunner {
|
|
|
1657
1654
|
})
|
|
1658
1655
|
.then(() => true)
|
|
1659
1656
|
.catch((error) => {
|
|
1660
|
-
|
|
1657
|
+
logger.error("task", `task ${claim.taskId} completion update failed: ${error}`);
|
|
1661
1658
|
return false;
|
|
1662
1659
|
});
|
|
1663
1660
|
if (!ok) continue;
|
|
@@ -1962,10 +1959,6 @@ function httpErrorKey(error: unknown): string {
|
|
|
1962
1959
|
return String(error);
|
|
1963
1960
|
}
|
|
1964
1961
|
|
|
1965
|
-
function safeLogName(value: string): string {
|
|
1966
|
-
return value.replace(/[^a-zA-Z0-9_.-]+/g, "_").slice(0, 180);
|
|
1967
|
-
}
|
|
1968
|
-
|
|
1969
1962
|
function isContextState(value: unknown): value is ContextState {
|
|
1970
1963
|
if (!value || typeof value !== "object" || Array.isArray(value)) return false;
|
|
1971
1964
|
const state = value as Record<string, unknown>;
|