agent-relay-runner 0.16.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/plugins/claude/.claude-plugin/plugin.json +1 -1
- package/src/adapter.ts +1 -4
- package/src/adapters/claude-delivery.ts +1 -4
- package/src/adapters/codex.ts +1 -7
- package/src/attachment-cache.ts +1 -4
- package/src/config.ts +1 -4
- package/src/control-server.ts +1 -4
- package/src/index.ts +2 -1
- package/src/outbox.ts +303 -0
- package/src/relay-instructions.ts +36 -4
- package/src/reply-obligation-cache.ts +109 -0
- package/src/runner.ts +111 -32
- package/src/version.ts +4 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-relay-runner",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.18.0",
|
|
4
4
|
"description": "Unified provider lifecycle runner for Agent Relay",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"directory": "runner"
|
|
21
21
|
},
|
|
22
22
|
"dependencies": {
|
|
23
|
-
"agent-relay-sdk": "0.2.
|
|
23
|
+
"agent-relay-sdk": "0.2.9"
|
|
24
24
|
},
|
|
25
25
|
"devDependencies": {
|
|
26
26
|
"@types/bun": "latest",
|
package/src/adapter.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { AgentProfile, Message } from "agent-relay-sdk";
|
|
2
|
+
import { isRecord } from "agent-relay-sdk";
|
|
2
3
|
|
|
3
4
|
export type SemanticStatus = "idle" | "busy" | "offline" | "error";
|
|
4
5
|
type ProviderWorkKind = "provider-turn" | "subagent";
|
|
@@ -160,10 +161,6 @@ export const RELAY_CONTEXT = `[agent-relay] You are connected to Agent Relay, a
|
|
|
160
161
|
|
|
161
162
|
const PROVIDER_MESSAGE_BODY_PREVIEW_CHARS = 4000;
|
|
162
163
|
|
|
163
|
-
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
164
|
-
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
165
|
-
}
|
|
166
|
-
|
|
167
164
|
function attachmentRefs(message: Message): Record<string, unknown>[] {
|
|
168
165
|
const payloadRefs = message.payload?.attachments;
|
|
169
166
|
const topLevelRefs = (message as Message & { attachments?: unknown }).attachments;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { Message } from "agent-relay-sdk";
|
|
2
|
+
import { isRecord } from "agent-relay-sdk";
|
|
2
3
|
import { providerAttachmentText } from "../adapter";
|
|
3
4
|
|
|
4
5
|
const PROVIDER_MESSAGE_BODY_PREVIEW_CHARS = 4000;
|
|
@@ -30,10 +31,6 @@ function stripRelayScaffolding(body: string): string {
|
|
|
30
31
|
return lines.join("\n");
|
|
31
32
|
}
|
|
32
33
|
|
|
33
|
-
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
34
|
-
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
35
|
-
}
|
|
36
|
-
|
|
37
34
|
function isMemoryInjection(message: Message): boolean {
|
|
38
35
|
return isRecord(message.payload) && message.payload.memoryInjection === true;
|
|
39
36
|
}
|
package/src/adapters/codex.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { accessSync, constants, existsSync, readFileSync, realpathSync, readdirS
|
|
|
2
2
|
import { homedir } from "node:os";
|
|
3
3
|
import { basename, join, resolve } from "node:path";
|
|
4
4
|
import type { ContextState, Message } from "agent-relay-sdk";
|
|
5
|
+
import { isRecord, stringValue } from "agent-relay-sdk";
|
|
5
6
|
import { profileAllowsRelayFeature, providerMessageText, RELAY_CONTEXT, type ManagedProcess, type ProviderAdapter, type ProviderConfig, type ProviderPermissionDecisionInput, type ProviderSessionEvent, type ProviderStatusUpdate, type RunnerSpawnConfig, type SpawnArgs, type TerminalAttachSpec } from "../adapter";
|
|
6
7
|
import { workspaceDepsNoteFromEnv } from "../relay-instructions";
|
|
7
8
|
import { logger } from "../logger";
|
|
@@ -742,10 +743,6 @@ function activeFlags(value: unknown): string[] {
|
|
|
742
743
|
return value.activeFlags.filter((flag): flag is string => typeof flag === "string" && flag.length > 0);
|
|
743
744
|
}
|
|
744
745
|
|
|
745
|
-
function stringValue(value: unknown): string | undefined {
|
|
746
|
-
return typeof value === "string" && value ? value : undefined;
|
|
747
|
-
}
|
|
748
|
-
|
|
749
746
|
function numberValue(value: unknown): number | undefined {
|
|
750
747
|
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
|
|
751
748
|
}
|
|
@@ -759,9 +756,6 @@ function isContextState(value: unknown): value is ContextState {
|
|
|
759
756
|
typeof state.confidence === "string";
|
|
760
757
|
}
|
|
761
758
|
|
|
762
|
-
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
763
|
-
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
764
|
-
}
|
|
765
759
|
|
|
766
760
|
export function codexModelConfigArgs(model?: string, effort?: string): string[] {
|
|
767
761
|
const args: string[] = [];
|
package/src/attachment-cache.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { existsSync, mkdirSync, readdirSync, renameSync, rmSync, statSync, write
|
|
|
2
2
|
import { homedir } from "node:os";
|
|
3
3
|
import { basename, join } from "node:path";
|
|
4
4
|
import type { Artifact, Message } from "agent-relay-sdk";
|
|
5
|
+
import { isRecord } from "agent-relay-sdk";
|
|
5
6
|
|
|
6
7
|
const DEFAULT_CACHE_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000;
|
|
7
8
|
|
|
@@ -25,10 +26,6 @@ interface CachedAttachment {
|
|
|
25
26
|
digest: string;
|
|
26
27
|
}
|
|
27
28
|
|
|
28
|
-
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
29
|
-
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
30
|
-
}
|
|
31
|
-
|
|
32
29
|
function attachmentRefs(message: Message): Record<string, unknown>[] {
|
|
33
30
|
const payloadRefs = message.payload?.attachments;
|
|
34
31
|
const topLevelRefs = (message as Message & { attachments?: unknown }).attachments;
|
package/src/config.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
2
|
import { homedir, hostname } from "node:os";
|
|
3
3
|
import { join, resolve } from "node:path";
|
|
4
|
+
import { stringValue } from "agent-relay-sdk";
|
|
4
5
|
import type { ProviderConfig } from "./adapter";
|
|
5
6
|
|
|
6
7
|
interface GlobalRunnerConfig {
|
|
@@ -115,10 +116,6 @@ function readJson(path: string): Record<string, unknown> {
|
|
|
115
116
|
}
|
|
116
117
|
}
|
|
117
118
|
|
|
118
|
-
function stringValue(value: unknown): string | undefined {
|
|
119
|
-
return typeof value === "string" && value.length > 0 ? value : undefined;
|
|
120
|
-
}
|
|
121
|
-
|
|
122
119
|
function positiveInteger(value: unknown): number | undefined {
|
|
123
120
|
return typeof value === "number" && Number.isSafeInteger(value) && value > 0 ? value : undefined;
|
|
124
121
|
}
|
package/src/control-server.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { Server, ServerWebSocket } from "bun";
|
|
2
2
|
import type { Message, ReplyObligation } from "agent-relay-sdk";
|
|
3
|
+
import { isRecord } from "agent-relay-sdk";
|
|
3
4
|
import type { ProviderPermissionDecisionInput, ProviderStatusEvent, SemanticStatus, TerminalAttachSpec } from "./adapter";
|
|
4
5
|
import { logger, parseLogLevel, LOG_LEVELS } from "./logger";
|
|
5
6
|
|
|
@@ -453,7 +454,3 @@ function parseJson(raw: string | Buffer): Record<string, unknown> | null {
|
|
|
453
454
|
return null;
|
|
454
455
|
}
|
|
455
456
|
}
|
|
456
|
-
|
|
457
|
-
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
458
|
-
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
459
|
-
}
|
package/src/index.ts
CHANGED
|
@@ -8,6 +8,7 @@ import { AgentRunner } from "./runner";
|
|
|
8
8
|
import { loadGlobalConfig, loadProviderConfig, resolveCwd, runnerId } from "./config";
|
|
9
9
|
import { VERSION } from "./version";
|
|
10
10
|
import type { AgentProfile, WorkspaceMetadata } from "agent-relay-sdk";
|
|
11
|
+
import { RELAY_TOKEN_HEADER } from "agent-relay-sdk";
|
|
11
12
|
|
|
12
13
|
interface CliOptions {
|
|
13
14
|
provider: "claude" | "codex";
|
|
@@ -139,7 +140,7 @@ export async function resolveRunnerToken(input: {
|
|
|
139
140
|
method: "POST",
|
|
140
141
|
headers: {
|
|
141
142
|
"Content-Type": "application/json",
|
|
142
|
-
|
|
143
|
+
[RELAY_TOKEN_HEADER]: input.token,
|
|
143
144
|
},
|
|
144
145
|
body: JSON.stringify({
|
|
145
146
|
provider: input.provider,
|
package/src/outbox.ts
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
import { Database } from "bun:sqlite";
|
|
2
|
+
import { mkdirSync } from "node:fs";
|
|
3
|
+
import { dirname, join } from "node:path";
|
|
4
|
+
import { tmpdir } from "node:os";
|
|
5
|
+
import { logger } from "./logger";
|
|
6
|
+
|
|
7
|
+
// Phase 2 (#196) — the "nothing is ever lost" half. Runner→server events that used to be
|
|
8
|
+
// fire-and-forget over HTTP (session turns, reasoning/tool traces, prompt echoes, insights,
|
|
9
|
+
// hook-fatal reports) were silently dropped whenever the server was momentarily down. This
|
|
10
|
+
// is a durable, FIFO, disk-backed queue that:
|
|
11
|
+
// - survives Runner/server restart (bun:sqlite file in the runtime dir),
|
|
12
|
+
// - stamps true event time (`occurredAt`) once at enqueue and preserves it through retries,
|
|
13
|
+
// - retries with capped exponential backoff, strictly in order (an append log must not
|
|
14
|
+
// reorder turns),
|
|
15
|
+
// - poisons a permanently-failing head after maxAttempts so it can't block the queue,
|
|
16
|
+
// - is bounded with a logged drop policy (never silently truncates).
|
|
17
|
+
//
|
|
18
|
+
// Status deliberately does NOT go through here: it rides the WebSocket bus, which is
|
|
19
|
+
// last-wins and self-heals on reconnect (so it already satisfies "coalesce, don't replay
|
|
20
|
+
// stale busyes"). The coalesce mode below exists so a future state event could migrate here.
|
|
21
|
+
|
|
22
|
+
export type OutboxMode = "append" | "coalesce";
|
|
23
|
+
|
|
24
|
+
export interface OutboxEventInput {
|
|
25
|
+
kind: string;
|
|
26
|
+
payload: unknown;
|
|
27
|
+
mode?: OutboxMode;
|
|
28
|
+
// Required for coalesce mode: prior un-poisoned rows with the same dedupeKey are replaced.
|
|
29
|
+
dedupeKey?: string;
|
|
30
|
+
// Defaults to now. Set explicitly only to backdate (e.g. replaying a captured timestamp).
|
|
31
|
+
occurredAt?: number;
|
|
32
|
+
// Defaults to a stable derived key so server-side dedup makes retries exactly-once.
|
|
33
|
+
idempotencyKey?: string;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface OutboxRecord {
|
|
37
|
+
seq: number;
|
|
38
|
+
kind: string;
|
|
39
|
+
mode: OutboxMode;
|
|
40
|
+
occurredAt: number;
|
|
41
|
+
idempotencyKey: string;
|
|
42
|
+
payload: unknown;
|
|
43
|
+
attempts: number;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// The transport. Resolve = delivered (row deleted). Reject = failed (retried with backoff).
|
|
47
|
+
export type OutboxSend = (record: OutboxRecord) => Promise<void>;
|
|
48
|
+
|
|
49
|
+
export interface OutboxOptions {
|
|
50
|
+
agentId: string;
|
|
51
|
+
send: OutboxSend;
|
|
52
|
+
// Storage directory. Defaults to AGENT_RELAY_RUNNER_OUTBOX_DIR, else a per-host temp dir.
|
|
53
|
+
dir?: string;
|
|
54
|
+
maxRows?: number;
|
|
55
|
+
maxAttempts?: number;
|
|
56
|
+
baseBackoffMs?: number;
|
|
57
|
+
maxBackoffMs?: number;
|
|
58
|
+
pollMs?: number;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const DEFAULTS = {
|
|
62
|
+
maxRows: 5000,
|
|
63
|
+
maxAttempts: 12,
|
|
64
|
+
baseBackoffMs: 1_000,
|
|
65
|
+
maxBackoffMs: 60_000,
|
|
66
|
+
pollMs: 5_000,
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
interface Row {
|
|
70
|
+
seq: number;
|
|
71
|
+
kind: string;
|
|
72
|
+
mode: string;
|
|
73
|
+
occurred_at: number;
|
|
74
|
+
idempotency_key: string;
|
|
75
|
+
payload: string;
|
|
76
|
+
attempts: number;
|
|
77
|
+
next_attempt_at: number;
|
|
78
|
+
poisoned: number;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export class Outbox {
|
|
82
|
+
private readonly db: Database;
|
|
83
|
+
private readonly agentId: string;
|
|
84
|
+
private readonly send: OutboxSend;
|
|
85
|
+
private readonly maxRows: number;
|
|
86
|
+
private readonly maxAttempts: number;
|
|
87
|
+
private readonly baseBackoffMs: number;
|
|
88
|
+
private readonly maxBackoffMs: number;
|
|
89
|
+
private readonly pollMs: number;
|
|
90
|
+
readonly path: string;
|
|
91
|
+
|
|
92
|
+
private draining = false;
|
|
93
|
+
private rerun = false;
|
|
94
|
+
private pollTimer?: ReturnType<typeof setInterval>;
|
|
95
|
+
private dueTimer?: ReturnType<typeof setTimeout>;
|
|
96
|
+
private stopped = false;
|
|
97
|
+
|
|
98
|
+
constructor(options: OutboxOptions) {
|
|
99
|
+
this.agentId = options.agentId;
|
|
100
|
+
this.send = options.send;
|
|
101
|
+
this.maxRows = options.maxRows ?? DEFAULTS.maxRows;
|
|
102
|
+
this.maxAttempts = options.maxAttempts ?? DEFAULTS.maxAttempts;
|
|
103
|
+
this.baseBackoffMs = options.baseBackoffMs ?? DEFAULTS.baseBackoffMs;
|
|
104
|
+
this.maxBackoffMs = options.maxBackoffMs ?? DEFAULTS.maxBackoffMs;
|
|
105
|
+
this.pollMs = options.pollMs ?? DEFAULTS.pollMs;
|
|
106
|
+
|
|
107
|
+
const dir = options.dir ?? process.env.AGENT_RELAY_RUNNER_OUTBOX_DIR ?? join(tmpdir(), "agent-relay-outbox");
|
|
108
|
+
this.path = options.dir === ":memory:" ? ":memory:" : join(dir, `outbox-${safeName(this.agentId)}.sqlite`);
|
|
109
|
+
if (this.path !== ":memory:") mkdirSync(dirname(this.path), { recursive: true });
|
|
110
|
+
|
|
111
|
+
this.db = new Database(this.path, { create: true });
|
|
112
|
+
this.db.exec("PRAGMA journal_mode = WAL");
|
|
113
|
+
this.db.exec("PRAGMA busy_timeout = 2000");
|
|
114
|
+
this.db.exec(`
|
|
115
|
+
CREATE TABLE IF NOT EXISTS outbox (
|
|
116
|
+
seq INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
117
|
+
kind TEXT NOT NULL,
|
|
118
|
+
mode TEXT NOT NULL DEFAULT 'append',
|
|
119
|
+
dedupe_key TEXT,
|
|
120
|
+
occurred_at INTEGER NOT NULL,
|
|
121
|
+
idempotency_key TEXT NOT NULL,
|
|
122
|
+
payload TEXT NOT NULL,
|
|
123
|
+
attempts INTEGER NOT NULL DEFAULT 0,
|
|
124
|
+
next_attempt_at INTEGER NOT NULL DEFAULT 0,
|
|
125
|
+
poisoned INTEGER NOT NULL DEFAULT 0,
|
|
126
|
+
created_at INTEGER NOT NULL
|
|
127
|
+
)
|
|
128
|
+
`);
|
|
129
|
+
// A restart is a fresh start: clear any backoff timers left by the prior process so
|
|
130
|
+
// pending events get an immediate retry (the down server may now be back). `attempts`
|
|
131
|
+
// is kept so the poison threshold still counts cumulative failures.
|
|
132
|
+
this.db.exec("UPDATE outbox SET next_attempt_at = 0 WHERE next_attempt_at > 0");
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Persist an event. Returns the assigned seq. Triggers a drain.
|
|
136
|
+
enqueue(input: OutboxEventInput): number {
|
|
137
|
+
if (this.stopped) throw new Error("outbox is stopped");
|
|
138
|
+
const mode: OutboxMode = input.mode ?? "append";
|
|
139
|
+
const occurredAt = input.occurredAt ?? Date.now();
|
|
140
|
+
const payloadJson = JSON.stringify(input.payload ?? null);
|
|
141
|
+
const idempotencyKey = input.idempotencyKey ?? `${this.agentId}:${input.kind}:${occurredAt}:${shortHash(payloadJson)}`;
|
|
142
|
+
|
|
143
|
+
if (mode === "coalesce") {
|
|
144
|
+
if (!input.dedupeKey) throw new Error("coalesce mode requires a dedupeKey");
|
|
145
|
+
this.db.query("DELETE FROM outbox WHERE dedupe_key = ? AND poisoned = 0").run(input.dedupeKey);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const info = this.db
|
|
149
|
+
.query(`INSERT INTO outbox (kind, mode, dedupe_key, occurred_at, idempotency_key, payload, created_at)
|
|
150
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`)
|
|
151
|
+
.run(input.kind, mode, input.dedupeKey ?? null, occurredAt, idempotencyKey, payloadJson, Date.now());
|
|
152
|
+
const seq = Number(info.lastInsertRowid);
|
|
153
|
+
|
|
154
|
+
this.enforceBound();
|
|
155
|
+
// Defer the drain to a microtask so a synchronous burst of enqueues (e.g. several
|
|
156
|
+
// coalesce updates) all land — and coalesce — before the pump pulls the head.
|
|
157
|
+
queueMicrotask(() => { void this.drain(); });
|
|
158
|
+
return seq;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Bounded ring buffer: if over capacity, drop the oldest rows (defined overflow policy).
|
|
162
|
+
// Logged, never silent. Prefers dropping already-poisoned rows first, then oldest by seq.
|
|
163
|
+
private enforceBound(): void {
|
|
164
|
+
const { n } = this.db.query("SELECT count(*) AS n FROM outbox").get() as { n: number };
|
|
165
|
+
if (n <= this.maxRows) return;
|
|
166
|
+
const overflow = n - this.maxRows;
|
|
167
|
+
// Oldest poisoned first, then oldest live — both by seq.
|
|
168
|
+
const victims = this.db
|
|
169
|
+
.query("SELECT seq FROM outbox ORDER BY poisoned DESC, seq ASC LIMIT ?")
|
|
170
|
+
.all(overflow) as Array<{ seq: number }>;
|
|
171
|
+
const ids = victims.map((v) => v.seq);
|
|
172
|
+
if (ids.length === 0) return;
|
|
173
|
+
const placeholders = ids.map(() => "?").join(",");
|
|
174
|
+
this.db.query(`DELETE FROM outbox WHERE seq IN (${placeholders})`).run(...ids);
|
|
175
|
+
logger.warn("outbox", `bound exceeded (${n}/${this.maxRows}) — dropped ${ids.length} oldest event(s)`);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Begin the background pump: an initial drain plus a poll timer as a backstop.
|
|
179
|
+
start(): void {
|
|
180
|
+
if (this.pollTimer || this.stopped) return;
|
|
181
|
+
void this.drain();
|
|
182
|
+
this.pollTimer = setInterval(() => { void this.drain(); }, this.pollMs);
|
|
183
|
+
this.pollTimer.unref?.();
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Process the queue strictly oldest-first. Coalesces concurrent calls; if a drain is
|
|
187
|
+
// requested while one is running, it re-runs once at the end (so an enqueue during a
|
|
188
|
+
// send isn't missed).
|
|
189
|
+
async drain(): Promise<void> {
|
|
190
|
+
if (this.stopped) return;
|
|
191
|
+
if (this.draining) { this.rerun = true; return; }
|
|
192
|
+
this.draining = true;
|
|
193
|
+
try {
|
|
194
|
+
do {
|
|
195
|
+
this.rerun = false;
|
|
196
|
+
await this.drainOnce();
|
|
197
|
+
} while (this.rerun && !this.stopped);
|
|
198
|
+
} finally {
|
|
199
|
+
this.draining = false;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
private async drainOnce(): Promise<void> {
|
|
204
|
+
for (;;) {
|
|
205
|
+
if (this.stopped) return;
|
|
206
|
+
const row = this.db
|
|
207
|
+
.query("SELECT * FROM outbox WHERE poisoned = 0 ORDER BY seq ASC LIMIT 1")
|
|
208
|
+
.get() as Row | null;
|
|
209
|
+
if (!row) return;
|
|
210
|
+
|
|
211
|
+
const now = Date.now();
|
|
212
|
+
if (row.next_attempt_at > now) {
|
|
213
|
+
// Head isn't due yet. Don't reorder past it (FIFO) — schedule a wake-up and stop.
|
|
214
|
+
this.scheduleDue(row.next_attempt_at - now);
|
|
215
|
+
return;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const record: OutboxRecord = {
|
|
219
|
+
seq: row.seq,
|
|
220
|
+
kind: row.kind,
|
|
221
|
+
mode: row.mode as OutboxMode,
|
|
222
|
+
occurredAt: row.occurred_at,
|
|
223
|
+
idempotencyKey: row.idempotency_key,
|
|
224
|
+
payload: safeParse(row.payload),
|
|
225
|
+
attempts: row.attempts,
|
|
226
|
+
};
|
|
227
|
+
|
|
228
|
+
try {
|
|
229
|
+
await this.send(record);
|
|
230
|
+
this.db.query("DELETE FROM outbox WHERE seq = ?").run(row.seq);
|
|
231
|
+
} catch (error) {
|
|
232
|
+
const attempts = row.attempts + 1;
|
|
233
|
+
const reason = error instanceof Error ? error.message : String(error);
|
|
234
|
+
if (attempts >= this.maxAttempts) {
|
|
235
|
+
this.db.query("UPDATE outbox SET attempts = ?, poisoned = 1 WHERE seq = ?").run(attempts, row.seq);
|
|
236
|
+
logger.fatal("outbox", `event seq=${row.seq} kind=${row.kind} poisoned after ${attempts} attempts: ${reason}`);
|
|
237
|
+
// Move on — the next iteration picks the new head (poison no longer blocks).
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
const delay = this.backoff(attempts);
|
|
241
|
+
this.db.query("UPDATE outbox SET attempts = ?, next_attempt_at = ? WHERE seq = ?").run(attempts, now + delay, row.seq);
|
|
242
|
+
logger.debug("outbox", `event seq=${row.seq} kind=${row.kind} retry ${attempts}/${this.maxAttempts} in ${delay}ms: ${reason}`);
|
|
243
|
+
this.scheduleDue(delay);
|
|
244
|
+
return; // head is now scheduled; stop until it's due (preserve order)
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
private backoff(attempts: number): number {
|
|
250
|
+
const exp = Math.min(this.maxBackoffMs, this.baseBackoffMs * 2 ** (attempts - 1));
|
|
251
|
+
return Math.round(exp / 2 + Math.random() * (exp / 2)); // full-ish jitter, never below half
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
private scheduleDue(delayMs: number): void {
|
|
255
|
+
if (this.stopped || this.dueTimer) return;
|
|
256
|
+
this.dueTimer = setTimeout(() => {
|
|
257
|
+
this.dueTimer = undefined;
|
|
258
|
+
void this.drain();
|
|
259
|
+
}, Math.max(0, delayMs));
|
|
260
|
+
this.dueTimer.unref?.();
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Observability / tests.
|
|
264
|
+
pendingCount(): number {
|
|
265
|
+
return (this.db.query("SELECT count(*) AS n FROM outbox WHERE poisoned = 0").get() as { n: number }).n;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
poisonedCount(): number {
|
|
269
|
+
return (this.db.query("SELECT count(*) AS n FROM outbox WHERE poisoned = 1").get() as { n: number }).n;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
stop(): void {
|
|
273
|
+
this.stopped = true;
|
|
274
|
+
if (this.pollTimer) clearInterval(this.pollTimer);
|
|
275
|
+
this.pollTimer = undefined;
|
|
276
|
+
if (this.dueTimer) clearTimeout(this.dueTimer);
|
|
277
|
+
this.dueTimer = undefined;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
close(): void {
|
|
281
|
+
this.stop();
|
|
282
|
+
try { this.db.close(); } catch { /* already closed */ }
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function safeName(value: string): string {
|
|
287
|
+
return value.replace(/[^a-zA-Z0-9_.-]+/g, "_").slice(0, 180) || "agent";
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
function safeParse(json: string): unknown {
|
|
291
|
+
try { return JSON.parse(json); } catch { return null; }
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Small, fast, stable string hash (FNV-1a, 32-bit) — enough to disambiguate identical
|
|
295
|
+
// kind+timestamp payloads in the idempotency key. Not security-sensitive.
|
|
296
|
+
function shortHash(value: string): string {
|
|
297
|
+
let h = 0x811c9dc5;
|
|
298
|
+
for (let i = 0; i < value.length; i++) {
|
|
299
|
+
h ^= value.charCodeAt(i);
|
|
300
|
+
h = Math.imul(h, 0x01000193);
|
|
301
|
+
}
|
|
302
|
+
return (h >>> 0).toString(36);
|
|
303
|
+
}
|
|
@@ -45,15 +45,47 @@ export function workspaceDepsNote(input: { mode?: string | null; depsMode?: stri
|
|
|
45
45
|
}
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
-
/**
|
|
48
|
+
/**
|
|
49
|
+
* Lifecycle briefing for an agent spawned into an isolated workspace (#205).
|
|
50
|
+
* Tells it, in plain terms, that it is on an isolated branch off a base that
|
|
51
|
+
* other agents move in parallel, and how to hand off — so it doesn't re-derive
|
|
52
|
+
* "I seem to be in a clone, not on main" every session or hand-roll a rebase/push.
|
|
53
|
+
* Returns "" for shared workspaces. Branch/base come from the resolved metadata.
|
|
54
|
+
*/
|
|
55
|
+
export function workspaceLifecycleNote(input: { mode?: string | null; branch?: string | null; baseRef?: string | null }): string {
|
|
56
|
+
if (input.mode !== "isolated") return "";
|
|
57
|
+
const branch = input.branch ? `\`${input.branch}\`` : "an isolated agent branch";
|
|
58
|
+
const base = input.baseRef ? `\`${input.baseRef}\`` : "the base branch";
|
|
59
|
+
return [
|
|
60
|
+
`[agent-relay] Isolated workspace: you are in a git worktree on branch ${branch}, based on ${base} — NOT the main checkout. Other agents may work in parallel and land to ${base}, so ${base} will move under you. That is expected; don't fight it.`,
|
|
61
|
+
`Do NOT manually rebase, merge, or \`git push\` your branch. Just commit your work here. When the task is done, run \`agent-relay workspace ready\` — Relay rebases onto the latest ${base}, lands your work, and pushes for you. (\`agent-relay workspace status\` shows the current state.)`,
|
|
62
|
+
].join("\n");
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Caveat for untracked paths symlinked from main into an isolated worktree
|
|
67
|
+
* (WorkspaceConfig.symlinkPaths, e.g. AGENTS.md, .claude-rig). Edits to these
|
|
68
|
+
* write THROUGH to the main checkout — the agent must know so it doesn't mutate
|
|
69
|
+
* shared config thinking it's worktree-local. Returns "" when nothing was linked.
|
|
70
|
+
*/
|
|
71
|
+
export function workspaceSymlinksNote(linked: string[]): string {
|
|
72
|
+
if (!linked.length) return "";
|
|
73
|
+
return `[agent-relay] Isolated workspace: these untracked paths are SYMLINKED from the main checkout: ${linked.join(", ")}. They resolve to the real files in main, so editing or deleting them writes THROUGH to main — treat them as read-only unless you intend to change main.`;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Resolve the workspace caveats from the runner/monitor environment.
|
|
49
77
|
* AGENT_RELAY_WORKSPACE_JSON carries the resolved workspace metadata (mode +
|
|
50
|
-
* deps) and is the authoritative source. Best-effort: never throws. */
|
|
78
|
+
* deps + symlinks) and is the authoritative source. Best-effort: never throws. */
|
|
51
79
|
export function workspaceDepsNoteFromEnv(env: Record<string, string | undefined> = process.env): string {
|
|
52
80
|
const json = env.AGENT_RELAY_WORKSPACE_JSON;
|
|
53
81
|
if (!json) return "";
|
|
54
82
|
try {
|
|
55
|
-
const parsed = JSON.parse(json) as { mode?: string; deps?: { mode?: string } };
|
|
56
|
-
return
|
|
83
|
+
const parsed = JSON.parse(json) as { mode?: string; branch?: string; baseRef?: string; deps?: { mode?: string }; symlinks?: { linked?: string[] } };
|
|
84
|
+
return [
|
|
85
|
+
workspaceLifecycleNote({ mode: parsed.mode ?? null, branch: parsed.branch ?? null, baseRef: parsed.baseRef ?? null }),
|
|
86
|
+
workspaceDepsNote({ mode: parsed.mode ?? null, depsMode: parsed.deps?.mode ?? null }),
|
|
87
|
+
parsed.mode === "isolated" ? workspaceSymlinksNote(parsed.symlinks?.linked ?? []) : "",
|
|
88
|
+
].filter(Boolean).join("\n\n");
|
|
57
89
|
} catch {
|
|
58
90
|
return "";
|
|
59
91
|
}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import type { ReplyObligation } from "agent-relay-sdk";
|
|
2
|
+
import { logger } from "./logger";
|
|
3
|
+
|
|
4
|
+
// Phase 2 (#196) — the crux. The Claude Stop hook used to ask the server, synchronously
|
|
5
|
+
// and in the hot path, "does this agent owe a reply?" before clearing the turn. A slow
|
|
6
|
+
// server answer (the unindexed reply_to scan, #199) blew the hook's timeout and wedged the
|
|
7
|
+
// agent in `busy` forever. The fix: the hook asks the Runner, the Runner answers instantly
|
|
8
|
+
// from this local snapshot, and the snapshot is refreshed from the server only in the
|
|
9
|
+
// background — never on the path that ends a turn.
|
|
10
|
+
//
|
|
11
|
+
// Design rules:
|
|
12
|
+
// - `get()` is synchronous, never throws, never touches the network.
|
|
13
|
+
// - `refresh()` is the only thing that talks to the server; it coalesces concurrent calls
|
|
14
|
+
// and, on failure, keeps the last-known snapshot (stale-but-serving beats blocking).
|
|
15
|
+
// - A background interval keeps the snapshot warm; `markDirty()` requests an extra,
|
|
16
|
+
// debounced refresh when state likely just changed (a message arrived, a turn ended).
|
|
17
|
+
|
|
18
|
+
export type ReplyObligationFetch = () => Promise<ReplyObligation[]>;
|
|
19
|
+
|
|
20
|
+
export interface ReplyObligationCacheOptions {
|
|
21
|
+
fetch: ReplyObligationFetch;
|
|
22
|
+
// Background freshness backstop. Default 10s — well under any turn cadence, cheap.
|
|
23
|
+
intervalMs?: number;
|
|
24
|
+
// Debounce window for markDirty()-triggered refreshes so a burst of events
|
|
25
|
+
// (e.g. a fan-out of messages) collapses into one server round-trip.
|
|
26
|
+
dirtyDebounceMs?: number;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const DEFAULT_INTERVAL_MS = 10_000;
|
|
30
|
+
const DEFAULT_DIRTY_DEBOUNCE_MS = 400;
|
|
31
|
+
|
|
32
|
+
export class ReplyObligationCache {
|
|
33
|
+
private readonly fetch: ReplyObligationFetch;
|
|
34
|
+
private readonly intervalMs: number;
|
|
35
|
+
private readonly dirtyDebounceMs: number;
|
|
36
|
+
|
|
37
|
+
private snapshot: ReplyObligation[] = [];
|
|
38
|
+
private lastRefreshedAt = 0;
|
|
39
|
+
private inFlight: Promise<void> | null = null;
|
|
40
|
+
private intervalTimer?: ReturnType<typeof setInterval>;
|
|
41
|
+
private dirtyTimer?: ReturnType<typeof setTimeout>;
|
|
42
|
+
private stopped = false;
|
|
43
|
+
|
|
44
|
+
constructor(options: ReplyObligationCacheOptions) {
|
|
45
|
+
this.fetch = options.fetch;
|
|
46
|
+
this.intervalMs = options.intervalMs ?? DEFAULT_INTERVAL_MS;
|
|
47
|
+
this.dirtyDebounceMs = options.dirtyDebounceMs ?? DEFAULT_DIRTY_DEBOUNCE_MS;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Synchronous, hot-path-safe read. Returns a copy so callers can't mutate the snapshot.
|
|
51
|
+
get(): ReplyObligation[] {
|
|
52
|
+
return this.snapshot.slice();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
getLastRefreshedAt(): number {
|
|
56
|
+
return this.lastRefreshedAt;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Begin the background freshness loop and prime the first snapshot immediately.
|
|
60
|
+
start(): void {
|
|
61
|
+
if (this.intervalTimer || this.stopped) return;
|
|
62
|
+
void this.refresh();
|
|
63
|
+
this.intervalTimer = setInterval(() => { void this.refresh(); }, this.intervalMs);
|
|
64
|
+
// Don't keep the process alive solely for cache refreshes.
|
|
65
|
+
this.intervalTimer.unref?.();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
stop(): void {
|
|
69
|
+
this.stopped = true;
|
|
70
|
+
if (this.intervalTimer) clearInterval(this.intervalTimer);
|
|
71
|
+
this.intervalTimer = undefined;
|
|
72
|
+
if (this.dirtyTimer) clearTimeout(this.dirtyTimer);
|
|
73
|
+
this.dirtyTimer = undefined;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Request a refresh because state likely changed (message arrived / turn ended).
|
|
77
|
+
// Debounced so a burst collapses into a single server round-trip.
|
|
78
|
+
markDirty(): void {
|
|
79
|
+
if (this.stopped || this.dirtyTimer) return;
|
|
80
|
+
this.dirtyTimer = setTimeout(() => {
|
|
81
|
+
this.dirtyTimer = undefined;
|
|
82
|
+
void this.refresh();
|
|
83
|
+
}, this.dirtyDebounceMs);
|
|
84
|
+
this.dirtyTimer.unref?.();
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Fetch from the server and replace the snapshot. Coalesces concurrent callers onto a
|
|
88
|
+
// single in-flight request. Never rejects — a failed fetch leaves the prior snapshot in
|
|
89
|
+
// place (the hook keeps getting an answer even while the server is down).
|
|
90
|
+
refresh(): Promise<void> {
|
|
91
|
+
if (this.stopped) return Promise.resolve();
|
|
92
|
+
if (this.inFlight) return this.inFlight;
|
|
93
|
+
this.inFlight = this.doRefresh().finally(() => { this.inFlight = null; });
|
|
94
|
+
return this.inFlight;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
private async doRefresh(): Promise<void> {
|
|
98
|
+
try {
|
|
99
|
+
const obligations = await this.fetch();
|
|
100
|
+
if (this.stopped) return;
|
|
101
|
+
this.snapshot = Array.isArray(obligations) ? obligations : [];
|
|
102
|
+
this.lastRefreshedAt = Date.now();
|
|
103
|
+
} catch (error) {
|
|
104
|
+
// Server-down is a non-event: keep serving the last snapshot. Debug, not error —
|
|
105
|
+
// this is expected during outages and must not spam the log.
|
|
106
|
+
logger.debug("obligation-cache", `refresh failed, serving cached snapshot (${this.snapshot.length}): ${error instanceof Error ? error.message : String(error)}`);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
package/src/runner.ts
CHANGED
|
@@ -2,13 +2,15 @@ import { hostname } from "node:os";
|
|
|
2
2
|
import { closeSync, mkdirSync, openSync, readSync, statSync, writeFileSync } from "node:fs";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join } from "node:path";
|
|
5
|
-
import type { AgentProfile, ContextState, Message, MessageSessionMeta, ProviderCapabilities, TaskStatusInput, WorkspaceMetadata } from "agent-relay-sdk";
|
|
5
|
+
import type { AgentProfile, ContextState, Message, MessageSessionMeta, ProviderCapabilities, SendMessageInput, TaskStatusInput, WorkspaceMetadata } from "agent-relay-sdk";
|
|
6
6
|
import { RelayBusClient, RelayHttpClient } from "agent-relay-sdk";
|
|
7
7
|
import { contextStateFromProbeMetrics, readContextProbeState } from "agent-relay-sdk/context-probe";
|
|
8
8
|
import type { ManagedProcess, ProviderAdapter, ProviderConfig, ProviderPermissionDecision, ProviderPermissionDecisionInput, ProviderSessionEvent, ProviderStatusUpdate, RunnerSpawnConfig, SemanticStatus, TerminalAttachSpec } from "./adapter";
|
|
9
9
|
import { messagesWithCachedAttachments } from "./attachment-cache";
|
|
10
10
|
import { ClaimTracker } from "./claim-tracker";
|
|
11
11
|
import { startControlServer, type ControlServer } from "./control-server";
|
|
12
|
+
import { ReplyObligationCache } from "./reply-obligation-cache";
|
|
13
|
+
import { Outbox, type OutboxRecord } from "./outbox";
|
|
12
14
|
import { extractLastAssistantTurn, extractFinalAssistantMessage, extractHookAssistantMessage, extractLatestTurnSteps, transcriptLooksComplete, analyzeSession } from "./adapters/claude-transcript";
|
|
13
15
|
import { agentProfileProjectionReport } from "./profile-projection";
|
|
14
16
|
import { profileUsesHostProviderGlobals } from "./profile-home";
|
|
@@ -116,6 +118,13 @@ export class AgentRunner {
|
|
|
116
118
|
private readonly claims = new ClaimTracker();
|
|
117
119
|
private readonly http: RelayHttpClient;
|
|
118
120
|
private readonly bus: RelayBusClient;
|
|
121
|
+
// Phase 2 (#196): the Stop hook reads reply obligations from this local snapshot, never
|
|
122
|
+
// from the server — so a slow server can no longer wedge a turn (the crux fix).
|
|
123
|
+
private readonly obligationCache: ReplyObligationCache;
|
|
124
|
+
// Phase 2 (#196): Runner→server append-log events (session turns, reasoning, prompts,
|
|
125
|
+
// insights, hook-fatal) go through this durable, disk-backed, timestamped queue instead of
|
|
126
|
+
// direct fire-and-forget HTTP — so nothing is lost across a server/Runner restart.
|
|
127
|
+
private readonly outbox: Outbox;
|
|
119
128
|
private currentToken?: string;
|
|
120
129
|
private currentTokenJti?: string;
|
|
121
130
|
private currentTokenProfileId?: string;
|
|
@@ -192,6 +201,12 @@ export class AgentRunner {
|
|
|
192
201
|
this.currentTokenExpiresAt = options.tokenExpiresAt;
|
|
193
202
|
const runtime = runtimeMetadata(options.provider);
|
|
194
203
|
this.http = new RelayHttpClient({ baseUrl: options.relayUrl, token: this.currentToken });
|
|
204
|
+
this.obligationCache = new ReplyObligationCache({ fetch: () => this.http.listReplyObligations(this.agentId) });
|
|
205
|
+
// Co-locate the durable outbox with the runner's runtime state (survives reboot) when the
|
|
206
|
+
// orchestrator told us where that is; otherwise the Outbox falls back to a temp dir.
|
|
207
|
+
const outboxDir = process.env.AGENT_RELAY_RUNNER_OUTBOX_DIR
|
|
208
|
+
?? (process.env.AGENT_RELAY_RUNNER_INFO_FILE ? join(dirname(process.env.AGENT_RELAY_RUNNER_INFO_FILE), "outbox") : undefined);
|
|
209
|
+
this.outbox = new Outbox({ agentId: this.agentId, dir: outboxDir, send: (record) => this.deliverOutboxEvent(record) });
|
|
195
210
|
this.bus = new RelayBusClient({
|
|
196
211
|
url: relayBusUrl(options.relayUrl),
|
|
197
212
|
role: "provider",
|
|
@@ -260,10 +275,13 @@ export class AgentRunner {
|
|
|
260
275
|
this.control = startControlServer({
|
|
261
276
|
onStatus: (status) => this.setProviderStatus(status),
|
|
262
277
|
onTerminalAttachSpec: () => this.terminalAttachSpec(),
|
|
263
|
-
|
|
278
|
+
// Hot-path-safe: answered instantly from the local snapshot, never a server
|
|
279
|
+
// round-trip. The snapshot is kept warm by the background refresh below (#196).
|
|
280
|
+
onReplyObligations: () => Promise.resolve(this.obligationCache.get()),
|
|
264
281
|
onSessionTurn: (input) => this.publishSessionTurn(input),
|
|
265
282
|
onUserPrompt: (input) => this.handleUserPrompt(input),
|
|
266
283
|
onSessionEnd: (input) => this.handleSessionEnd(input),
|
|
284
|
+
onHookFatal: (report) => this.reportHookFatal(report),
|
|
267
285
|
});
|
|
268
286
|
this.writeRunnerInfoFile();
|
|
269
287
|
this.options.adapter.onStatusChange((status) => {
|
|
@@ -277,12 +295,19 @@ export class AgentRunner {
|
|
|
277
295
|
if (runnerShouldResolveProviderExit(semanticStatus, this.exitCommandInProgress)) this.options.onProviderExit?.(semanticStatus === "offline" ? 0 : 1);
|
|
278
296
|
});
|
|
279
297
|
this.options.adapter.onSessionEvent?.((event) => { void this.publishProviderSessionEvent(event); });
|
|
280
|
-
this.bus.on("message.new", (message) =>
|
|
298
|
+
this.bus.on("message.new", (message) => {
|
|
299
|
+
// A delivered message may create a new reply obligation — warm the snapshot so the
|
|
300
|
+
// next turn-end sees it without a hot-path server read.
|
|
301
|
+
this.obligationCache.markDirty();
|
|
302
|
+
this.enqueueMessage(message as Message);
|
|
303
|
+
});
|
|
281
304
|
this.bus.on("command", (type, params, commandId, command) => {
|
|
282
305
|
void this.handleCommand(type, params, commandId, command);
|
|
283
306
|
});
|
|
284
307
|
this.bus.on("error", (code, message) => this.handleBusError(String(code), String(message)));
|
|
285
308
|
await this.bus.connect();
|
|
309
|
+
this.obligationCache.start();
|
|
310
|
+
this.outbox.start();
|
|
286
311
|
this.ensureScratch();
|
|
287
312
|
void this.sweepStaleScratch();
|
|
288
313
|
this.process = await this.spawnProvider();
|
|
@@ -322,6 +347,8 @@ export class AgentRunner {
|
|
|
322
347
|
this.tokenRenewTimer = undefined;
|
|
323
348
|
this.disarmBusyReconciler();
|
|
324
349
|
this.stopReasoningTail();
|
|
350
|
+
this.obligationCache.stop();
|
|
351
|
+
this.outbox.close();
|
|
325
352
|
this.control?.stop();
|
|
326
353
|
await this.bus.close();
|
|
327
354
|
}
|
|
@@ -927,13 +954,10 @@ export class AgentRunner {
|
|
|
927
954
|
replyToMessageId = pendingPrompt;
|
|
928
955
|
this.pendingPromptMessageId = undefined;
|
|
929
956
|
} else {
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
} catch {
|
|
935
|
-
// fall through and capture without correlation
|
|
936
|
-
}
|
|
957
|
+
// Correlation-only (threading + obligation clearing) — the local snapshot is fresh
|
|
958
|
+
// enough and never blocks the response-capture path (#196).
|
|
959
|
+
const obligation = [...this.obligationCache.get()].reverse().find((o) => o.from === "user");
|
|
960
|
+
replyToMessageId = obligation?.messageId;
|
|
937
961
|
}
|
|
938
962
|
|
|
939
963
|
// The Stop hook can fire before the final assistant entry is flushed to disk.
|
|
@@ -975,31 +999,86 @@ export class AgentRunner {
|
|
|
975
999
|
...(replyToMessageId ? { replyTo: replyToMessageId } : {}),
|
|
976
1000
|
session: { type: "response", origin: "provider", ...(turnId ? { turnId } : {}) },
|
|
977
1001
|
});
|
|
1002
|
+
// The agent's reply may have cleared an obligation — refresh the snapshot so the next
|
|
1003
|
+
// turn-end doesn't re-prompt for a message already answered (#196).
|
|
1004
|
+
if (replyToMessageId) this.obligationCache.markDirty();
|
|
978
1005
|
}
|
|
979
1006
|
|
|
980
1007
|
// Post one session-mirror event (prompt echo, assistant response, reasoning or
|
|
981
1008
|
// tool step) as a `kind: "session"` relay message tagged with payload.session so
|
|
982
1009
|
// the dashboard can render the live provider session faithfully. Display-only:
|
|
983
1010
|
// session messages are never delivered back into a provider.
|
|
984
|
-
private
|
|
1011
|
+
private publishSessionEvent(input: {
|
|
985
1012
|
from: string;
|
|
986
1013
|
to: string;
|
|
987
1014
|
body: string;
|
|
988
1015
|
session: MessageSessionMeta;
|
|
989
1016
|
replyTo?: number;
|
|
990
|
-
}):
|
|
991
|
-
|
|
992
|
-
|
|
1017
|
+
}): void {
|
|
1018
|
+
// Durable, ordered, timestamped (#196): the actual POST happens in deliverOutboxEvent,
|
|
1019
|
+
// retried until it lands. occurredAt is stamped now so a queued event reports when it
|
|
1020
|
+
// truly happened, not when the server finally accepted it.
|
|
1021
|
+
this.outbox.enqueue({
|
|
1022
|
+
kind: "session-message",
|
|
1023
|
+
payload: {
|
|
993
1024
|
from: input.from,
|
|
994
1025
|
to: input.to,
|
|
995
1026
|
...(input.replyTo ? { replyTo: input.replyTo } : {}),
|
|
996
1027
|
kind: "session",
|
|
997
1028
|
body: input.body,
|
|
998
1029
|
payload: { session: { provider: this.options.provider, ...input.session } },
|
|
1030
|
+
} satisfies SendMessageInput,
|
|
1031
|
+
});
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
// The outbox transport: map a queued record to its HTTP call. Throw to retry, return to
|
|
1035
|
+
// ack (delete). occurredAt + idempotencyKey are injected from the record so retries are
|
|
1036
|
+
// exactly-once server-side and carry true event time.
|
|
1037
|
+
private async deliverOutboxEvent(record: OutboxRecord): Promise<void> {
|
|
1038
|
+
try {
|
|
1039
|
+
if (record.kind === "session-message") {
|
|
1040
|
+
await this.http.sendMessage({
|
|
1041
|
+
...(record.payload as SendMessageInput),
|
|
1042
|
+
occurredAt: record.occurredAt,
|
|
1043
|
+
idempotencyKey: record.idempotencyKey,
|
|
1044
|
+
});
|
|
1045
|
+
return;
|
|
1046
|
+
}
|
|
1047
|
+
if (record.kind === "insight") {
|
|
1048
|
+
await this.http.recordInsightObservation({
|
|
1049
|
+
...(record.payload as Parameters<RelayHttpClient["recordInsightObservation"]>[0]),
|
|
1050
|
+
occurredAt: record.occurredAt,
|
|
1051
|
+
});
|
|
1052
|
+
return;
|
|
1053
|
+
}
|
|
1054
|
+
logger.warn("outbox", `dropping event with unknown kind: ${record.kind}`);
|
|
1055
|
+
} catch (error) {
|
|
1056
|
+
// 409 = the server intentionally rejected it (e.g. Insights/feature toggled off). That
|
|
1057
|
+
// is a permanent "don't want this", not a transient failure — ack so it doesn't retry.
|
|
1058
|
+
if (isHttpStatusError(error, 409)) return;
|
|
1059
|
+
if (isHttpAuthError(error)) this.recoverRuntimeTokenAfterAuthFailure("outbox");
|
|
1060
|
+
throw error; // transient (or auth, post-recovery) → let the outbox retry with backoff
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
// A hook reported an unhandled failure (#198 seam). Already logged FATAL by the control
|
|
1065
|
+
// server; here we additionally surface it durably to the server as a generic insight so
|
|
1066
|
+
// it shows up in observability rather than only in the per-agent log (#196).
|
|
1067
|
+
private reportHookFatal(report: { hook: string; error: string }): void {
|
|
1068
|
+
try {
|
|
1069
|
+
this.outbox.enqueue({
|
|
1070
|
+
kind: "insight",
|
|
1071
|
+
payload: {
|
|
1072
|
+
sessionId: this.providerSessionId,
|
|
1073
|
+
project: this.options.cwd,
|
|
1074
|
+
agentId: this.agentId,
|
|
1075
|
+
signal: "hook_fatal",
|
|
1076
|
+
value: { hook: report.hook, error: report.error },
|
|
1077
|
+
source: "server",
|
|
1078
|
+
},
|
|
999
1079
|
});
|
|
1000
1080
|
} catch (error) {
|
|
1001
|
-
|
|
1002
|
-
if (isHttpAuthError(error)) this.recoverRuntimeTokenAfterAuthFailure("session-capture");
|
|
1081
|
+
logger.error("outbox", `failed to queue hook-fatal report: ${error instanceof Error ? error.message : String(error)}`);
|
|
1003
1082
|
}
|
|
1004
1083
|
}
|
|
1005
1084
|
|
|
@@ -1043,8 +1122,11 @@ export class AgentRunner {
|
|
|
1043
1122
|
}
|
|
1044
1123
|
const analysis = analyzeSession(jsonl);
|
|
1045
1124
|
if (!analysis) return; // no tool calls = nothing substantive to measure
|
|
1046
|
-
|
|
1047
|
-
|
|
1125
|
+
// Durable + non-blocking (#196): queue it. SessionEnd can race provider shutdown, so a
|
|
1126
|
+
// direct POST risked being dropped if the server hiccuped; the outbox survives that.
|
|
1127
|
+
this.outbox.enqueue({
|
|
1128
|
+
kind: "insight",
|
|
1129
|
+
payload: {
|
|
1048
1130
|
sessionId: this.providerSessionId,
|
|
1049
1131
|
project: this.options.cwd,
|
|
1050
1132
|
agentId: this.agentId,
|
|
@@ -1052,13 +1134,9 @@ export class AgentRunner {
|
|
|
1052
1134
|
value: { ...analysis.metric, ...(input.reason ? { endReason: input.reason } : {}) },
|
|
1053
1135
|
outcome: { ...analysis.outcome },
|
|
1054
1136
|
source: "server",
|
|
1055
|
-
}
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
// 409 = Insights/feature toggled off; anything else is best-effort too.
|
|
1059
|
-
this.sessionDebug(`insights context_ratio skipped: ${error instanceof Error ? error.message : String(error)}`);
|
|
1060
|
-
if (isHttpAuthError(error)) this.recoverRuntimeTokenAfterAuthFailure("insights");
|
|
1061
|
-
}
|
|
1137
|
+
},
|
|
1138
|
+
});
|
|
1139
|
+
this.sessionLog(`insights: context_ratio ${analysis.metric.ratio.toFixed(2)} (${analysis.metric.gatheringCalls}/${analysis.metric.totalToolCalls} gathering) queued`);
|
|
1062
1140
|
}
|
|
1063
1141
|
|
|
1064
1142
|
// Route a provider-emitted session event (Codex app-server) into the chat mirror.
|
|
@@ -1087,13 +1165,9 @@ export class AgentRunner {
|
|
|
1087
1165
|
if (pendingPrompt) {
|
|
1088
1166
|
replyToMessageId = pendingPrompt;
|
|
1089
1167
|
this.pendingPromptMessageId = undefined;
|
|
1090
|
-
} else {
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
if (obligations.some((o) => o.from === "user")) return;
|
|
1094
|
-
} catch {
|
|
1095
|
-
// capture anyway on lookup failure
|
|
1096
|
-
}
|
|
1168
|
+
} else if (this.obligationCache.get().some((o) => o.from === "user")) {
|
|
1169
|
+
// The agent will answer the relay obligation itself — don't double-post (#196).
|
|
1170
|
+
return;
|
|
1097
1171
|
}
|
|
1098
1172
|
await this.publishSessionEvent({
|
|
1099
1173
|
from: this.agentId,
|
|
@@ -1953,6 +2027,11 @@ function isHttpAuthError(error: unknown): boolean {
|
|
|
1953
2027
|
return status === 401 || status === 403;
|
|
1954
2028
|
}
|
|
1955
2029
|
|
|
2030
|
+
function isHttpStatusError(error: unknown, code: number): boolean {
|
|
2031
|
+
const status = typeof error === "object" && error !== null ? (error as { status?: unknown }).status : undefined;
|
|
2032
|
+
return status === code;
|
|
2033
|
+
}
|
|
2034
|
+
|
|
1956
2035
|
function httpErrorKey(error: unknown): string {
|
|
1957
2036
|
const status = typeof error === "object" && error !== null ? (error as { status?: unknown }).status : undefined;
|
|
1958
2037
|
if (typeof status === "number") return `status:${status}`;
|
package/src/version.ts
CHANGED
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
import { readFileSync } from "node:fs";
|
|
2
2
|
import { dirname, join } from "node:path";
|
|
3
3
|
import { fileURLToPath } from "node:url";
|
|
4
|
+
import { CONTRACT_VERSIONS } from "agent-relay-sdk";
|
|
4
5
|
|
|
5
6
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
6
7
|
const pkg = JSON.parse(readFileSync(join(__dirname, "../package.json"), "utf8")) as { name?: string; version?: string };
|
|
7
8
|
|
|
8
9
|
export const PACKAGE_NAME = pkg.name || "agent-relay-runner";
|
|
9
10
|
export const VERSION = pkg.version || "0.0.0";
|
|
10
|
-
|
|
11
|
-
export const
|
|
11
|
+
// Protocol versions are owned by the SDK (CONTRACT_VERSIONS) — derive, never redeclare.
|
|
12
|
+
export const RUNNER_PROTOCOL_VERSION = CONTRACT_VERSIONS.runnerProtocol;
|
|
13
|
+
export const PROVIDER_PLUGIN_PROTOCOL_VERSION = CONTRACT_VERSIONS.providerPluginProtocol;
|
|
12
14
|
|
|
13
15
|
export const CONTRACTS = {
|
|
14
16
|
runnerProtocol: RUNNER_PROTOCOL_VERSION,
|