agent-relay-runner 0.10.19 → 0.10.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/plugins/claude/.claude-plugin/plugin.json +4 -1
- package/plugins/claude/hooks/hooks.json +114 -0
- package/plugins/claude/hooks/permission-request.sh +20 -0
- package/plugins/claude/hooks/post-compact.sh +5 -0
- package/plugins/claude/hooks/pre-compact.sh +5 -0
- package/plugins/claude/hooks/relay-status.sh +66 -0
- package/plugins/claude/hooks/session-end.sh +16 -3
- package/plugins/claude/hooks/session-start.sh +14 -0
- package/plugins/claude/hooks/stop-failure.sh +15 -0
- package/plugins/claude/hooks/stop.sh +13 -3
- package/plugins/claude/hooks/subagent-start.sh +12 -0
- package/plugins/claude/hooks/subagent-stop.sh +12 -0
- package/plugins/claude/hooks/user-prompt-submit.sh +2 -3
- package/plugins/claude/monitors/relay-monitor.ts +16 -4
- package/plugins/claude/skills/react/SKILL.md +18 -0
- package/plugins/claude/skills/read-message/SKILL.md +24 -0
- package/plugins/claude/skills/reply/SKILL.md +7 -3
- package/plugins/codex/skills/guide/SKILL.md +15 -0
- package/plugins/codex/skills/react/SKILL.md +17 -0
- package/plugins/codex/skills/read-message/SKILL.md +23 -0
- package/plugins/codex/skills/reply/SKILL.md +6 -2
- package/src/adapter.ts +207 -6
- package/src/adapters/claude-delivery.ts +108 -0
- package/src/adapters/claude.ts +232 -31
- package/src/adapters/codex-client.ts +27 -1
- package/src/adapters/codex.ts +635 -26
- package/src/attachment-cache.ts +190 -0
- package/src/claim-tracker.ts +48 -5
- package/src/control-server.ts +193 -6
- package/src/index.ts +203 -6
- package/src/profile-home.ts +85 -0
- package/src/profile-projection.ts +146 -0
- package/src/relay-instructions.ts +25 -0
- package/src/runner.ts +811 -40
- package/src/version.ts +39 -0
package/src/runner.ts
CHANGED
|
@@ -1,58 +1,120 @@
|
|
|
1
1
|
import { hostname } from "node:os";
|
|
2
|
-
import
|
|
2
|
+
import { appendFileSync, mkdirSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { dirname, join } from "node:path";
|
|
4
|
+
import type { AgentProfile, ContextState, Message, ProviderCapabilities, TaskStatusInput, WorkspaceMetadata } from "agent-relay-sdk";
|
|
3
5
|
import { RelayBusClient, RelayHttpClient } from "agent-relay-sdk";
|
|
4
|
-
import
|
|
6
|
+
import { contextStateFromProbeMetrics, readContextProbeState } from "agent-relay-sdk/context-probe";
|
|
7
|
+
import type { ManagedProcess, ProviderAdapter, ProviderConfig, ProviderPermissionDecision, ProviderPermissionDecisionInput, ProviderStatusUpdate, RunnerSpawnConfig, SemanticStatus, TerminalAttachSpec } from "./adapter";
|
|
8
|
+
import { messagesWithCachedAttachments } from "./attachment-cache";
|
|
5
9
|
import { ClaimTracker } from "./claim-tracker";
|
|
6
10
|
import { startControlServer, type ControlServer } from "./control-server";
|
|
11
|
+
import { agentProfileProjectionReport } from "./profile-projection";
|
|
12
|
+
import { profileUsesHostProviderGlobals } from "./profile-home";
|
|
13
|
+
import { runtimeMetadata } from "./version";
|
|
7
14
|
|
|
8
15
|
interface RunnerOptions {
|
|
9
16
|
provider: string;
|
|
17
|
+
model?: string;
|
|
18
|
+
effort?: string;
|
|
10
19
|
runnerId: string;
|
|
11
20
|
instanceId: string;
|
|
12
21
|
agentId?: string;
|
|
13
22
|
relayUrl: string;
|
|
14
23
|
token?: string;
|
|
24
|
+
tokenJti?: string;
|
|
25
|
+
tokenProfileId?: string;
|
|
26
|
+
tokenExpiresAt?: number;
|
|
27
|
+
rootTokenFallback?: boolean;
|
|
15
28
|
cwd: string;
|
|
16
29
|
headless: boolean;
|
|
17
30
|
approvalMode: string;
|
|
18
31
|
label?: string;
|
|
19
32
|
rig?: string;
|
|
33
|
+
profile?: string;
|
|
34
|
+
agentProfile?: AgentProfile;
|
|
20
35
|
prompt?: string;
|
|
36
|
+
systemPromptAppend?: string;
|
|
21
37
|
tags: string[];
|
|
22
38
|
capabilities: string[];
|
|
23
39
|
providerArgs: string[];
|
|
24
40
|
policyName?: string;
|
|
25
41
|
spawnRequestId?: string;
|
|
42
|
+
automationId?: string;
|
|
43
|
+
automationRunId?: string;
|
|
44
|
+
workspace?: WorkspaceMetadata;
|
|
26
45
|
tmuxSession?: string;
|
|
27
46
|
startedAt: number;
|
|
28
47
|
providerConfig: ProviderConfig;
|
|
29
48
|
adapter: ProviderAdapter;
|
|
30
49
|
onProviderExit?: (code: number | null) => void;
|
|
50
|
+
exitProcessOnShutdown?: boolean;
|
|
31
51
|
}
|
|
32
52
|
|
|
53
|
+
interface ActiveTaskClaim {
|
|
54
|
+
messageId: number;
|
|
55
|
+
taskId: number;
|
|
56
|
+
observedProviderBusy: boolean;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const CLAIM_RENEW_INTERVAL_MS = 5 * 60 * 1000;
|
|
60
|
+
const HTTP_LIVENESS_INTERVAL_MS = 20_000;
|
|
61
|
+
const HTTP_LIVENESS_LOG_INTERVAL_MS = 5 * 60 * 1000;
|
|
62
|
+
const TOKEN_RENEW_RETRY_MS = 60_000;
|
|
63
|
+
const UNEXPECTED_EXIT_WINDOW_MS = 2 * 60 * 1000;
|
|
64
|
+
const RAPID_EXIT_MS = 30 * 1000;
|
|
65
|
+
const MAX_RAPID_UNEXPECTED_EXITS = 3;
|
|
66
|
+
const MAX_TIMER_DELAY_MS = 2_147_483_647;
|
|
67
|
+
|
|
33
68
|
export class AgentRunner {
|
|
34
69
|
private readonly agentId: string;
|
|
35
70
|
private readonly claims = new ClaimTracker();
|
|
36
71
|
private readonly http: RelayHttpClient;
|
|
37
72
|
private readonly bus: RelayBusClient;
|
|
73
|
+
private currentToken?: string;
|
|
74
|
+
private currentTokenJti?: string;
|
|
75
|
+
private currentTokenProfileId?: string;
|
|
76
|
+
private currentTokenExpiresAt?: number;
|
|
38
77
|
private control?: ControlServer;
|
|
39
78
|
private process?: ManagedProcess;
|
|
40
79
|
private stopped = false;
|
|
41
80
|
private exitCommandInProgress = false;
|
|
81
|
+
private restartInProgress = false;
|
|
82
|
+
// Set for the whole unexpected-exit restart, including the pre-restart backoff
|
|
83
|
+
// sleep, so a second exit during that window can't launch a parallel restart.
|
|
84
|
+
private restartPending = false;
|
|
42
85
|
private delivering = false;
|
|
43
86
|
private drainTimer?: Timer;
|
|
87
|
+
private claimRenewTimer?: Timer;
|
|
88
|
+
private httpLivenessTimer?: Timer;
|
|
89
|
+
private httpLivenessInFlight = false;
|
|
90
|
+
private httpLivenessAuthFailed = false;
|
|
91
|
+
private httpLivenessLastLog?: { key: string; at: number };
|
|
92
|
+
private tokenRenewTimer?: Timer;
|
|
93
|
+
private tokenRenewInFlight = false;
|
|
94
|
+
private tokenRenewLastLog?: { key: string; at: number };
|
|
95
|
+
private processStartedAt = 0;
|
|
96
|
+
private providerSessionId = crypto.randomUUID();
|
|
97
|
+
private lifecycleAction?: "shutting-down" | "killing" | "restarting";
|
|
98
|
+
private readonly unexpectedExitTimes: number[] = [];
|
|
44
99
|
private readonly pendingMessages = new Map<number, Message>();
|
|
100
|
+
private readonly activeTaskClaims = new Map<number, ActiveTaskClaim>();
|
|
101
|
+
private pendingTimelineEvent?: { status: string; id?: string; timestamp: number };
|
|
45
102
|
|
|
46
103
|
constructor(private readonly options: RunnerOptions) {
|
|
47
104
|
this.agentId = options.agentId ?? options.runnerId;
|
|
48
|
-
this.
|
|
105
|
+
this.currentToken = options.token;
|
|
106
|
+
this.currentTokenJti = options.tokenJti;
|
|
107
|
+
this.currentTokenProfileId = options.tokenProfileId;
|
|
108
|
+
this.currentTokenExpiresAt = options.tokenExpiresAt;
|
|
109
|
+
const runtime = runtimeMetadata(options.provider);
|
|
110
|
+
this.http = new RelayHttpClient({ baseUrl: options.relayUrl, token: this.currentToken });
|
|
49
111
|
this.bus = new RelayBusClient({
|
|
50
112
|
url: relayBusUrl(options.relayUrl),
|
|
51
113
|
role: "provider",
|
|
52
114
|
componentId: options.runnerId,
|
|
53
115
|
agentId: this.agentId,
|
|
54
116
|
instanceId: options.instanceId,
|
|
55
|
-
token:
|
|
117
|
+
token: this.currentToken,
|
|
56
118
|
machine: hostname(),
|
|
57
119
|
capabilities: [
|
|
58
120
|
...new Set([
|
|
@@ -70,18 +132,37 @@ export class AgentRunner {
|
|
|
70
132
|
],
|
|
71
133
|
tags: [...new Set([options.provider, ...csvTags(process.env.AGENT_RELAY_TAGS), ...options.tags, ...options.providerConfig.defaultTags, ...(options.headless ? ["headless"] : [])])],
|
|
72
134
|
meta: {
|
|
135
|
+
...runtime,
|
|
136
|
+
version: runtime.package.version,
|
|
73
137
|
provider: options.provider,
|
|
138
|
+
model: options.model ?? null,
|
|
139
|
+
effort: options.effort ?? null,
|
|
140
|
+
profile: options.profile ?? null,
|
|
141
|
+
agentProfile: options.agentProfile ? appliedAgentProfileMetadata(options.provider, options.agentProfile) : null,
|
|
74
142
|
runnerId: options.runnerId,
|
|
75
143
|
startedAt: options.startedAt,
|
|
76
|
-
tmuxSession: options.tmuxSession ?? null,
|
|
144
|
+
tmuxSession: this.providerTerminalSession() ?? options.tmuxSession ?? null,
|
|
145
|
+
tmuxSocket: this.providerTerminalSocket() ?? null,
|
|
77
146
|
policyName: options.policyName ?? null,
|
|
78
147
|
spawnRequestId: options.spawnRequestId ?? null,
|
|
148
|
+
automationId: options.automationId ?? null,
|
|
149
|
+
automationRunId: options.automationRunId ?? null,
|
|
150
|
+
workspace: options.workspace ?? null,
|
|
151
|
+
workspaceMode: options.workspace?.requestedMode ?? options.workspace?.mode ?? process.env.AGENT_RELAY_WORKSPACE_MODE ?? null,
|
|
79
152
|
runnerManaged: true,
|
|
80
153
|
cwd: options.cwd,
|
|
81
154
|
approvalMode: options.approvalMode,
|
|
82
155
|
label: options.label ?? null,
|
|
156
|
+
auth: {
|
|
157
|
+
profileId: this.currentTokenProfileId ?? null,
|
|
158
|
+
jti: this.currentTokenJti ?? null,
|
|
159
|
+
expiresAt: this.currentTokenExpiresAt ?? null,
|
|
160
|
+
rootTokenFallback: options.rootTokenFallback === true,
|
|
161
|
+
},
|
|
83
162
|
lifecycleCapabilities: lifecycleCapabilities(),
|
|
163
|
+
providerCapabilities: runtimeProviderCapabilities(options),
|
|
84
164
|
},
|
|
165
|
+
heartbeatMeta: () => this.heartbeatRuntimeMeta(),
|
|
85
166
|
heartbeatIntervalMs: 30_000,
|
|
86
167
|
initialStatus: "idle",
|
|
87
168
|
});
|
|
@@ -92,10 +173,21 @@ export class AgentRunner {
|
|
|
92
173
|
}
|
|
93
174
|
|
|
94
175
|
async run(): Promise<void> {
|
|
95
|
-
this.control = startControlServer({
|
|
176
|
+
this.control = startControlServer({
|
|
177
|
+
onStatus: (status) => this.setProviderStatus(status),
|
|
178
|
+
onTerminalAttachSpec: () => this.terminalAttachSpec(),
|
|
179
|
+
onReplyObligations: () => this.http.listReplyObligations(this.agentId),
|
|
180
|
+
});
|
|
181
|
+
this.writeRunnerInfoFile();
|
|
96
182
|
this.options.adapter.onStatusChange((status) => {
|
|
183
|
+
if (this.restartInProgress || this.restartPending) return;
|
|
184
|
+
const semanticStatus = typeof status === "string" ? status : status.status;
|
|
185
|
+
if (this.shouldRestartUnexpectedProviderExit(semanticStatus)) {
|
|
186
|
+
void this.restartUnexpectedProviderExit(semanticStatus);
|
|
187
|
+
return;
|
|
188
|
+
}
|
|
97
189
|
this.setProviderStatus(status);
|
|
98
|
-
if (runnerShouldResolveProviderExit(
|
|
190
|
+
if (runnerShouldResolveProviderExit(semanticStatus, this.exitCommandInProgress)) this.options.onProviderExit?.(semanticStatus === "offline" ? 0 : 1);
|
|
99
191
|
});
|
|
100
192
|
this.bus.on("message.new", (message) => this.enqueueMessage(message as Message));
|
|
101
193
|
this.bus.on("command", (type, params, commandId, command) => {
|
|
@@ -104,8 +196,14 @@ export class AgentRunner {
|
|
|
104
196
|
this.bus.on("error", (code, message) => this.handleBusError(String(code), String(message)));
|
|
105
197
|
await this.bus.connect();
|
|
106
198
|
this.process = await this.spawnProvider();
|
|
199
|
+
this.writeRunnerInfoFile();
|
|
200
|
+
this.processStartedAt = Date.now();
|
|
107
201
|
this.publishStatus();
|
|
202
|
+
await this.deliverInitialPrompt();
|
|
108
203
|
await this.bootstrapUnreadMessages();
|
|
204
|
+
this.startClaimRenewer();
|
|
205
|
+
this.startHttpLiveness();
|
|
206
|
+
this.scheduleRuntimeTokenRenewal();
|
|
109
207
|
this.scheduleDrain();
|
|
110
208
|
}
|
|
111
209
|
|
|
@@ -121,23 +219,37 @@ export class AgentRunner {
|
|
|
121
219
|
}
|
|
122
220
|
if (this.drainTimer) clearTimeout(this.drainTimer);
|
|
123
221
|
this.drainTimer = undefined;
|
|
222
|
+
if (this.claimRenewTimer) clearInterval(this.claimRenewTimer);
|
|
223
|
+
this.claimRenewTimer = undefined;
|
|
224
|
+
if (this.httpLivenessTimer) clearInterval(this.httpLivenessTimer);
|
|
225
|
+
this.httpLivenessTimer = undefined;
|
|
226
|
+
if (this.tokenRenewTimer) clearTimeout(this.tokenRenewTimer);
|
|
227
|
+
this.tokenRenewTimer = undefined;
|
|
124
228
|
this.control?.stop();
|
|
125
229
|
await this.bus.close();
|
|
126
230
|
}
|
|
127
231
|
|
|
128
232
|
private async spawnProvider(): Promise<ManagedProcess> {
|
|
233
|
+
this.providerSessionId = crypto.randomUUID();
|
|
234
|
+
const includeProviderGlobals = profileUsesHostProviderGlobals(this.options);
|
|
129
235
|
const env = {
|
|
130
236
|
...process.env as Record<string, string>,
|
|
131
|
-
...this.options.providerConfig.env,
|
|
237
|
+
...(includeProviderGlobals ? this.options.providerConfig.env : {}),
|
|
132
238
|
AGENT_RELAY_RUNNER_PORT: String(this.control!.port),
|
|
133
239
|
AGENT_RELAY_RUNNER_ID: this.options.runnerId,
|
|
240
|
+
AGENT_RELAY_PROVIDER_SESSION_ID: this.providerSessionId,
|
|
134
241
|
AGENT_RELAY_ID: this.agentId,
|
|
135
242
|
AGENT_RELAY_URL: this.options.relayUrl,
|
|
136
243
|
AGENT_RELAY_APPROVAL: this.options.approvalMode,
|
|
137
|
-
...(this.
|
|
244
|
+
...(this.currentToken ? { AGENT_RELAY_TOKEN: this.currentToken } : {}),
|
|
245
|
+
...(this.currentTokenJti ? { AGENT_RELAY_TOKEN_JTI: this.currentTokenJti } : {}),
|
|
246
|
+
...(this.currentTokenProfileId ? { AGENT_RELAY_TOKEN_PROFILE: this.currentTokenProfileId } : {}),
|
|
247
|
+
...(this.currentTokenExpiresAt ? { AGENT_RELAY_TOKEN_EXPIRES_AT: String(this.currentTokenExpiresAt) } : {}),
|
|
138
248
|
};
|
|
139
249
|
const config: RunnerSpawnConfig = {
|
|
140
250
|
provider: this.options.provider,
|
|
251
|
+
model: this.options.model,
|
|
252
|
+
effort: this.options.effort,
|
|
141
253
|
runnerId: this.options.runnerId,
|
|
142
254
|
instanceId: this.options.instanceId,
|
|
143
255
|
agentId: this.agentId,
|
|
@@ -147,7 +259,11 @@ export class AgentRunner {
|
|
|
147
259
|
approvalMode: this.options.approvalMode,
|
|
148
260
|
...(this.options.label ? { label: this.options.label } : {}),
|
|
149
261
|
...(this.options.rig ? { rig: this.options.rig } : {}),
|
|
262
|
+
...(this.options.profile ? { profile: this.options.profile } : {}),
|
|
263
|
+
...(this.options.agentProfile ? { agentProfile: this.options.agentProfile } : {}),
|
|
150
264
|
...(this.options.prompt ? { prompt: this.options.prompt } : {}),
|
|
265
|
+
...(this.options.systemPromptAppend ? { systemPromptAppend: this.options.systemPromptAppend } : {}),
|
|
266
|
+
...(this.options.tmuxSession ? { tmuxSession: this.options.tmuxSession } : {}),
|
|
151
267
|
providerArgs: this.options.providerArgs,
|
|
152
268
|
providerConfig: this.options.providerConfig,
|
|
153
269
|
env,
|
|
@@ -159,6 +275,32 @@ export class AgentRunner {
|
|
|
159
275
|
return this.options.adapter.spawn(config);
|
|
160
276
|
}
|
|
161
277
|
|
|
278
|
+
private async terminalAttachSpec(): Promise<TerminalAttachSpec> {
|
|
279
|
+
if (!this.process) throw new Error("provider process is unavailable");
|
|
280
|
+
if (!this.options.adapter.terminalAttachSpec) throw new Error("provider does not support terminal attach");
|
|
281
|
+
return this.options.adapter.terminalAttachSpec(this.process);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
private writeRunnerInfoFile(): void {
|
|
285
|
+
const file = process.env.AGENT_RELAY_RUNNER_INFO_FILE;
|
|
286
|
+
if (!file || !this.control) return;
|
|
287
|
+
try {
|
|
288
|
+
mkdirSync(dirname(file), { recursive: true });
|
|
289
|
+
writeFileSync(file, JSON.stringify({
|
|
290
|
+
agentId: this.agentId,
|
|
291
|
+
runnerId: this.options.runnerId,
|
|
292
|
+
provider: this.options.provider,
|
|
293
|
+
controlUrl: this.control.url,
|
|
294
|
+
pid: process.pid,
|
|
295
|
+
...(this.providerTerminalSession() ? { tmuxSession: this.providerTerminalSession() } : {}),
|
|
296
|
+
...(this.providerTerminalSocket() ? { tmuxSocket: this.providerTerminalSocket() } : {}),
|
|
297
|
+
startedAt: this.options.startedAt,
|
|
298
|
+
}, null, 2) + "\n", { mode: 0o600 });
|
|
299
|
+
} catch (error) {
|
|
300
|
+
console.error(`[runner] failed to write runner info file: ${error}`);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
162
304
|
private enqueueMessage(message: Message): void {
|
|
163
305
|
if (!this.matchesMessage(message)) return;
|
|
164
306
|
this.pendingMessages.set(message.id, message);
|
|
@@ -174,6 +316,16 @@ export class AgentRunner {
|
|
|
174
316
|
}
|
|
175
317
|
}
|
|
176
318
|
|
|
319
|
+
private async deliverInitialPrompt(): Promise<void> {
|
|
320
|
+
const prompt = this.options.prompt?.trim();
|
|
321
|
+
if (!prompt || !this.process || !this.options.adapter.deliverInitialPrompt) return;
|
|
322
|
+
try {
|
|
323
|
+
await this.options.adapter.deliverInitialPrompt(this.process, prompt);
|
|
324
|
+
} catch (error) {
|
|
325
|
+
console.error(`[runner] initial prompt delivery failed: ${error}`);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
177
329
|
private scheduleDrain(delayMs = 0): void {
|
|
178
330
|
if (this.stopped || this.drainTimer) return;
|
|
179
331
|
this.drainTimer = setTimeout(() => {
|
|
@@ -191,25 +343,50 @@ export class AgentRunner {
|
|
|
191
343
|
this.delivering = true;
|
|
192
344
|
const messages = [...this.pendingMessages.values()].sort((a, b) => a.id - b.id);
|
|
193
345
|
this.pendingMessages.clear();
|
|
346
|
+
const deliverable: Message[] = [];
|
|
347
|
+
const providerAlreadyBusy = this.claims.reasons().includes("provider-turn");
|
|
194
348
|
for (const message of messages) {
|
|
195
349
|
if (message.claimable) {
|
|
196
|
-
const claimed = await this.http.
|
|
197
|
-
if (!claimed) continue;
|
|
198
|
-
this.claims.startClaim("message", String(message.id),
|
|
350
|
+
const claimed = await this.http.claimMessageResult(message.id, this.agentId).catch(() => ({ ok: false, claimExpiresAt: undefined }));
|
|
351
|
+
if (!claimed.ok) continue;
|
|
352
|
+
this.claims.startClaim("message", String(message.id), claimed.claimExpiresAt);
|
|
353
|
+
const taskId = taskIdFromMessage(message);
|
|
354
|
+
if (taskId) {
|
|
355
|
+
this.activeTaskClaims.set(message.id, { messageId: message.id, taskId, observedProviderBusy: providerAlreadyBusy });
|
|
356
|
+
this.claims.startClaim("task", String(taskId), claimed.claimExpiresAt);
|
|
357
|
+
await this.updateTaskStatus(taskId, {
|
|
358
|
+
status: "in_progress",
|
|
359
|
+
agentId: this.agentId,
|
|
360
|
+
metadata: { messageId: message.id, completedBy: "runner" },
|
|
361
|
+
}).catch((error) => console.error(`[runner] task ${taskId} in_progress update failed: ${error}`));
|
|
362
|
+
}
|
|
199
363
|
}
|
|
364
|
+
deliverable.push(message);
|
|
365
|
+
}
|
|
366
|
+
if (deliverable.length === 0) {
|
|
367
|
+
this.delivering = false;
|
|
368
|
+
this.publishStatus();
|
|
369
|
+
return;
|
|
200
370
|
}
|
|
201
371
|
this.publishStatus();
|
|
202
372
|
let failed = false;
|
|
203
373
|
try {
|
|
204
|
-
await
|
|
205
|
-
|
|
374
|
+
const prepared = await messagesWithCachedAttachments(deliverable, this.http, {
|
|
375
|
+
agentId: this.agentId,
|
|
376
|
+
onError: (message) => console.error(`[runner] ${message}`),
|
|
377
|
+
});
|
|
378
|
+
await this.options.adapter.deliver(this.process, prepared);
|
|
379
|
+
for (const message of deliverable) {
|
|
206
380
|
await this.http.markRead(message.id, this.agentId).catch(() => {});
|
|
207
|
-
this.claims.finishClaim("message", String(message.id));
|
|
381
|
+
if (!taskIdFromMessage(message)) this.claims.finishClaim("message", String(message.id));
|
|
208
382
|
}
|
|
209
383
|
} catch (error) {
|
|
210
384
|
failed = true;
|
|
211
385
|
if (shouldLogDeliveryFailure(error)) console.error(`[runner] message delivery failed: ${error}`);
|
|
212
|
-
for (const message of
|
|
386
|
+
for (const message of deliverable) {
|
|
387
|
+
this.clearActiveClaim(message);
|
|
388
|
+
this.pendingMessages.set(message.id, message);
|
|
389
|
+
}
|
|
213
390
|
} finally {
|
|
214
391
|
this.delivering = false;
|
|
215
392
|
this.publishStatus();
|
|
@@ -220,17 +397,35 @@ export class AgentRunner {
|
|
|
220
397
|
private async handleCommand(type: string, params: Record<string, unknown>, commandId: string, command?: Record<string, unknown>): Promise<void> {
|
|
221
398
|
const target = typeof command?.target === "string" ? command.target : this.agentId;
|
|
222
399
|
if (target !== this.agentId && target !== this.options.runnerId) return;
|
|
223
|
-
if (type !== "agent.shutdown" && type !== "agent.restart" && type !== "agent.kill") return;
|
|
400
|
+
if (type !== "agent.shutdown" && type !== "agent.restart" && type !== "agent.reconnect" && type !== "agent.kill" && type !== "agent.compact" && type !== "agent.clearContext" && type !== "agent.injectContext" && type !== "agent.permissionDecision") return;
|
|
224
401
|
|
|
225
|
-
const exitAfterCommand = type
|
|
226
|
-
if (exitAfterCommand)
|
|
402
|
+
const exitAfterCommand = type === "agent.shutdown" || type === "agent.kill";
|
|
403
|
+
if (exitAfterCommand) {
|
|
404
|
+
this.exitCommandInProgress = true;
|
|
405
|
+
this.lifecycleAction = type === "agent.kill" ? "killing" : "shutting-down";
|
|
406
|
+
} else if (type === "agent.restart") {
|
|
407
|
+
this.lifecycleAction = "restarting";
|
|
408
|
+
}
|
|
227
409
|
this.claims.startClaim("command", commandId);
|
|
228
410
|
this.publishStatus();
|
|
229
|
-
await this.updateCommand(commandId, "accepted");
|
|
230
|
-
await this.updateCommand(commandId, "running");
|
|
231
411
|
try {
|
|
412
|
+
await this.updateCommand(commandId, "accepted");
|
|
413
|
+
await this.updateCommand(commandId, "running");
|
|
414
|
+
let providerResult: Record<string, unknown> | void = undefined;
|
|
232
415
|
if (type === "agent.restart") await this.restartProvider();
|
|
233
|
-
else
|
|
416
|
+
else if (type === "agent.reconnect") this.publishStatus();
|
|
417
|
+
else if (type === "agent.compact") {
|
|
418
|
+
if (!this.options.adapter.compact || !this.process) throw new Error("provider does not support compact");
|
|
419
|
+
providerResult = await this.options.adapter.compact(this.process);
|
|
420
|
+
} else if (type === "agent.clearContext") {
|
|
421
|
+
if (!this.options.adapter.clearContext || !this.process) throw new Error("provider does not support clearContext");
|
|
422
|
+
providerResult = await this.options.adapter.clearContext(this.process);
|
|
423
|
+
} else if (type === "agent.injectContext") {
|
|
424
|
+
if (!this.process) throw new Error("provider process is unavailable");
|
|
425
|
+
providerResult = await this.injectContext(params);
|
|
426
|
+
} else if (type === "agent.permissionDecision") {
|
|
427
|
+
providerResult = await this.respondToPermissionDecision(params);
|
|
428
|
+
} else await this.shutdownProvider(type === "agent.kill", commandTimeoutMs(params));
|
|
234
429
|
await this.updateCommand(commandId, "succeeded", {
|
|
235
430
|
action: type,
|
|
236
431
|
agentId: this.agentId,
|
|
@@ -238,6 +433,7 @@ export class AgentRunner {
|
|
|
238
433
|
policyName: this.options.policyName,
|
|
239
434
|
spawnRequestId: this.options.spawnRequestId,
|
|
240
435
|
reason: typeof params.reason === "string" ? params.reason : undefined,
|
|
436
|
+
...(providerResult ? { providerResult } : {}),
|
|
241
437
|
});
|
|
242
438
|
} catch (error) {
|
|
243
439
|
await this.updateCommand(commandId, "failed", undefined, error instanceof Error ? error.message : String(error)).catch(() => {});
|
|
@@ -245,32 +441,136 @@ export class AgentRunner {
|
|
|
245
441
|
this.claims.finishClaim("command", commandId);
|
|
246
442
|
if (exitAfterCommand) {
|
|
247
443
|
await this.http.deleteAgent(this.agentId).catch(() => {});
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
444
|
+
if (this.options.exitProcessOnShutdown !== false) {
|
|
445
|
+
setTimeout(() => void this.stop().catch((error) => {
|
|
446
|
+
console.error(`[runner] stop after command failed: ${error}`);
|
|
447
|
+
}).finally(() => process.exit(0)), 10);
|
|
448
|
+
}
|
|
251
449
|
} else if (!this.stopped) {
|
|
450
|
+
this.lifecycleAction = undefined;
|
|
252
451
|
this.publishStatus();
|
|
253
452
|
}
|
|
254
453
|
}
|
|
255
454
|
}
|
|
256
455
|
|
|
456
|
+
private async injectContext(params: Record<string, unknown>): Promise<Record<string, unknown>> {
|
|
457
|
+
const content = typeof params.content === "string" ? params.content.trim() : "";
|
|
458
|
+
if (!content) throw new Error("content required");
|
|
459
|
+
const memoryIds = Array.isArray(params.memoryIds) ? params.memoryIds.filter((id): id is string => typeof id === "string") : [];
|
|
460
|
+
await this.options.adapter.deliver(this.process!, [{
|
|
461
|
+
id: 0,
|
|
462
|
+
from: "system",
|
|
463
|
+
to: this.agentId,
|
|
464
|
+
kind: "system",
|
|
465
|
+
subject: "Agent Relay memory context",
|
|
466
|
+
body: content,
|
|
467
|
+
payload: {
|
|
468
|
+
memoryInjection: true,
|
|
469
|
+
reason: typeof params.reason === "string" ? params.reason : undefined,
|
|
470
|
+
memoryIds,
|
|
471
|
+
},
|
|
472
|
+
readBy: [],
|
|
473
|
+
createdAt: Date.now(),
|
|
474
|
+
}]);
|
|
475
|
+
return { memoryIds, injectedMemoryCount: memoryIds.length };
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
private async respondToPermissionDecision(params: Record<string, unknown>): Promise<Record<string, unknown>> {
|
|
479
|
+
const approvalId = typeof params.approvalId === "string" ? params.approvalId : "";
|
|
480
|
+
const decision = typeof params.decision === "string" ? params.decision : "";
|
|
481
|
+
if (!approvalId) throw new Error("approvalId required");
|
|
482
|
+
if (decision !== "approve" && decision !== "approve-session" && decision !== "deny" && decision !== "abort") {
|
|
483
|
+
throw new Error("decision must be approve, approve-session, deny, or abort");
|
|
484
|
+
}
|
|
485
|
+
const input: ProviderPermissionDecisionInput = {
|
|
486
|
+
approvalId,
|
|
487
|
+
decision: decision as ProviderPermissionDecision,
|
|
488
|
+
...(typeof params.reason === "string" ? { reason: params.reason } : {}),
|
|
489
|
+
};
|
|
490
|
+
if (this.control?.resolvePermissionDecision(input)) {
|
|
491
|
+
return { approvalId, decision, provider: "claude" };
|
|
492
|
+
}
|
|
493
|
+
if (!this.process) throw new Error("provider process is unavailable");
|
|
494
|
+
if (!this.options.adapter.respondToPermissionDecision) throw new Error("provider does not support permission decisions");
|
|
495
|
+
const result = await this.options.adapter.respondToPermissionDecision(this.process, input);
|
|
496
|
+
return { approvalId, decision, ...(result ? { providerResult: result } : {}) };
|
|
497
|
+
}
|
|
498
|
+
|
|
257
499
|
private async restartProvider(): Promise<void> {
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
500
|
+
this.restartInProgress = true;
|
|
501
|
+
try {
|
|
502
|
+
if (this.process) {
|
|
503
|
+
await this.options.adapter.shutdown(this.process, {
|
|
504
|
+
graceful: true,
|
|
505
|
+
timeoutMs: this.options.providerConfig.headless.shutdownTimeoutMs,
|
|
506
|
+
});
|
|
507
|
+
}
|
|
508
|
+
this.claims.clearTerminalStatus();
|
|
509
|
+
this.claims.clearWorkKind("provider-turn");
|
|
510
|
+
this.claims.clearWorkKind("subagent");
|
|
511
|
+
if (this.stopped) return;
|
|
512
|
+
this.process = await this.spawnProvider();
|
|
513
|
+
this.processStartedAt = Date.now();
|
|
514
|
+
} finally {
|
|
515
|
+
this.restartInProgress = false;
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
private shouldRestartUnexpectedProviderExit(status: SemanticStatus): boolean {
|
|
520
|
+
return runnerShouldRestartUnexpectedProviderExit(status, {
|
|
521
|
+
exitCommandInProgress: this.exitCommandInProgress,
|
|
522
|
+
stopped: this.stopped,
|
|
523
|
+
restartInProgress: this.restartInProgress,
|
|
524
|
+
provider: this.options.provider,
|
|
525
|
+
headless: this.options.headless,
|
|
526
|
+
hasTerminalSession: typeof this.process?.meta?.tmuxSession === "string"
|
|
527
|
+
|| (this.options.adapter.supportsUnexpectedExitRestart?.() ?? false),
|
|
528
|
+
});
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
private async restartUnexpectedProviderExit(status: SemanticStatus): Promise<void> {
|
|
532
|
+
if (this.restartPending) return;
|
|
533
|
+
this.restartPending = true;
|
|
534
|
+
try {
|
|
535
|
+
const now = Date.now();
|
|
536
|
+
const runtimeMs = this.processStartedAt ? now - this.processStartedAt : Number.POSITIVE_INFINITY;
|
|
537
|
+
const recent = this.unexpectedExitTimes.filter((time) => now - time <= UNEXPECTED_EXIT_WINDOW_MS);
|
|
538
|
+
recent.push(now);
|
|
539
|
+
this.unexpectedExitTimes.splice(0, this.unexpectedExitTimes.length, ...recent);
|
|
540
|
+
|
|
541
|
+
if (runtimeMs < RAPID_EXIT_MS && recent.length > MAX_RAPID_UNEXPECTED_EXITS) {
|
|
542
|
+
console.error(`[runner] provider session exited ${recent.length} times within ${Math.round(UNEXPECTED_EXIT_WINDOW_MS / 1000)}s; giving up`);
|
|
543
|
+
this.setProviderStatus(status);
|
|
544
|
+
this.options.onProviderExit?.(0);
|
|
545
|
+
return;
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
const delayMs = Math.min(10_000, Math.max(500, 500 * recent.length));
|
|
549
|
+
console.warn(`[runner] provider session exited unexpectedly after ${Math.round(runtimeMs / 1000)}s; restarting in ${delayMs}ms`);
|
|
550
|
+
await Bun.sleep(delayMs);
|
|
551
|
+
if (this.stopped || this.exitCommandInProgress) return;
|
|
552
|
+
try {
|
|
553
|
+
await this.restartProvider();
|
|
554
|
+
this.publishStatus();
|
|
555
|
+
this.scheduleDrain();
|
|
556
|
+
} catch (error) {
|
|
557
|
+
console.error(`[runner] provider restart after unexpected exit failed: ${error}`);
|
|
558
|
+
this.setProviderStatus("error");
|
|
559
|
+
this.options.onProviderExit?.(1);
|
|
560
|
+
}
|
|
561
|
+
} finally {
|
|
562
|
+
this.restartPending = false;
|
|
263
563
|
}
|
|
264
|
-
if (this.stopped) return;
|
|
265
|
-
this.process = await this.spawnProvider();
|
|
266
564
|
}
|
|
267
565
|
|
|
268
|
-
private async shutdownProvider(hard: boolean): Promise<void> {
|
|
269
|
-
|
|
566
|
+
private async shutdownProvider(hard: boolean, timeoutMs = this.options.providerConfig.headless.shutdownTimeoutMs): Promise<void> {
|
|
567
|
+
this.lifecycleAction = hard ? "killing" : "shutting-down";
|
|
568
|
+
this.publishStatus();
|
|
569
|
+
await this.bus.statusAsync({ agentStatus: "idle", ready: false, meta: { lifecycleAction: this.lifecycleAction, lifecycleActionAt: Date.now() } });
|
|
270
570
|
if (this.process) {
|
|
271
571
|
await this.options.adapter.shutdown(this.process, {
|
|
272
572
|
graceful: !hard,
|
|
273
|
-
timeoutMs
|
|
573
|
+
timeoutMs,
|
|
274
574
|
});
|
|
275
575
|
}
|
|
276
576
|
this.claims.setTerminalStatus("offline");
|
|
@@ -293,33 +593,269 @@ export class AgentRunner {
|
|
|
293
593
|
}
|
|
294
594
|
}
|
|
295
595
|
|
|
296
|
-
private setProviderStatus(
|
|
297
|
-
|
|
298
|
-
|
|
596
|
+
private setProviderStatus(update: ProviderStatusUpdate): void {
|
|
597
|
+
const status = typeof update === "string" ? update : update.status;
|
|
598
|
+
const providerSessionId = typeof update === "string" ? undefined : update.providerSessionId;
|
|
599
|
+
if (providerSessionId && providerSessionId !== this.providerSessionId) return;
|
|
600
|
+
const reason = typeof update === "string" ? "provider-turn" : update.reason ?? "provider-turn";
|
|
601
|
+
const id = typeof update === "string" ? reason : update.id ?? reason;
|
|
602
|
+
if (typeof update !== "string" && update.timeline) {
|
|
603
|
+
this.pendingTimelineEvent = {
|
|
604
|
+
status: update.timeline.status,
|
|
605
|
+
...(update.timeline.id ? { id: update.timeline.id } : {}),
|
|
606
|
+
timestamp: update.timeline.timestamp ?? Date.now(),
|
|
607
|
+
};
|
|
608
|
+
}
|
|
609
|
+
if (status === "busy") {
|
|
610
|
+
this.claims.clearTerminalStatus();
|
|
611
|
+
this.claims.startWork(reason, id, typeof update === "string" ? {} : {
|
|
612
|
+
label: update.label,
|
|
613
|
+
role: update.role,
|
|
614
|
+
parentId: update.parentId,
|
|
615
|
+
metadata: {
|
|
616
|
+
...(update.metadata ?? {}),
|
|
617
|
+
...(update.providerState ? { providerState: update.providerState } : {}),
|
|
618
|
+
},
|
|
619
|
+
});
|
|
620
|
+
if (reason === "provider-turn") {
|
|
621
|
+
for (const claim of this.activeTaskClaims.values()) claim.observedProviderBusy = true;
|
|
622
|
+
}
|
|
623
|
+
} else if (status === "idle") {
|
|
624
|
+
this.claims.clearTerminalStatus();
|
|
625
|
+
this.claims.finishWork(reason, id);
|
|
626
|
+
if (reason === "provider-turn") void this.completeObservedTaskClaims();
|
|
627
|
+
}
|
|
299
628
|
else if (status === "offline" || status === "error") this.claims.setTerminalStatus(status);
|
|
629
|
+
if (typeof update !== "string") {
|
|
630
|
+
for (const kind of update.clear ?? []) this.claims.clearWorkKind(kind);
|
|
631
|
+
}
|
|
300
632
|
this.publishStatus();
|
|
301
633
|
}
|
|
302
634
|
|
|
303
635
|
private publishStatus(): void {
|
|
304
636
|
const status = this.claims.currentStatus();
|
|
305
637
|
const agentStatus = runnerAgentStatus(status);
|
|
638
|
+
const activeWork = this.claims.activeWork();
|
|
639
|
+
const activeSubagents = activeWork.filter((item) => item.kind === "subagent");
|
|
640
|
+
const providerState = providerStateFromActiveWork(activeWork);
|
|
306
641
|
this.bus.setSemanticStatus(status === "offline" || status === "error" ? "idle" : status);
|
|
642
|
+
const timelineEvent = this.pendingTimelineEvent;
|
|
643
|
+
this.pendingTimelineEvent = undefined;
|
|
307
644
|
this.bus.status({
|
|
308
645
|
agentStatus,
|
|
309
646
|
ready: agentStatus !== "offline" && !this.stopped,
|
|
310
647
|
meta: {
|
|
311
648
|
runnerId: this.options.runnerId,
|
|
312
649
|
startedAt: this.options.startedAt,
|
|
313
|
-
tmuxSession: this.options.tmuxSession ?? null,
|
|
650
|
+
tmuxSession: this.providerTerminalSession() ?? this.options.tmuxSession ?? null,
|
|
651
|
+
tmuxSocket: this.providerTerminalSocket() ?? null,
|
|
314
652
|
policyName: this.options.policyName ?? null,
|
|
315
653
|
spawnRequestId: this.options.spawnRequestId ?? null,
|
|
654
|
+
automationId: this.options.automationId ?? null,
|
|
655
|
+
automationRunId: this.options.automationRunId ?? null,
|
|
656
|
+
workspace: this.options.workspace ?? null,
|
|
657
|
+
workspaceMode: this.options.workspace?.requestedMode ?? this.options.workspace?.mode ?? process.env.AGENT_RELAY_WORKSPACE_MODE ?? null,
|
|
658
|
+
lifecycleAction: this.lifecycleAction ?? null,
|
|
659
|
+
profile: this.options.profile ?? null,
|
|
316
660
|
...(status === "error" ? { terminalStatus: "error" } : {}),
|
|
317
661
|
busyReasons: this.claims.reasons(),
|
|
662
|
+
activeWork,
|
|
663
|
+
activeSubagents,
|
|
664
|
+
activeSubagentCount: activeSubagents.length,
|
|
665
|
+
providerState,
|
|
666
|
+
auth: {
|
|
667
|
+
profileId: this.currentTokenProfileId ?? null,
|
|
668
|
+
jti: this.currentTokenJti ?? null,
|
|
669
|
+
expiresAt: this.currentTokenExpiresAt ?? null,
|
|
670
|
+
rootTokenFallback: this.options.rootTokenFallback === true,
|
|
671
|
+
},
|
|
672
|
+
...(timelineEvent ? { timelineEvent } : {}),
|
|
318
673
|
transport: this.bus.transportState,
|
|
319
674
|
},
|
|
320
675
|
});
|
|
321
676
|
}
|
|
322
677
|
|
|
678
|
+
private startHttpLiveness(): void {
|
|
679
|
+
if (this.httpLivenessTimer) clearInterval(this.httpLivenessTimer);
|
|
680
|
+
void this.publishHttpLiveness();
|
|
681
|
+
this.httpLivenessTimer = setInterval(() => {
|
|
682
|
+
void this.publishHttpLiveness();
|
|
683
|
+
}, HTTP_LIVENESS_INTERVAL_MS);
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
private async publishHttpLiveness(): Promise<void> {
|
|
687
|
+
if (this.stopped || this.httpLivenessInFlight || this.httpLivenessAuthFailed) return;
|
|
688
|
+
this.httpLivenessInFlight = true;
|
|
689
|
+
const status = this.claims.currentStatus();
|
|
690
|
+
const agentStatus = runnerAgentStatus(status);
|
|
691
|
+
try {
|
|
692
|
+
await this.http.setStatus(this.agentId, agentStatus, this.options.instanceId);
|
|
693
|
+
await this.http.setReady(this.agentId, agentStatus !== "offline", this.options.instanceId);
|
|
694
|
+
await this.http.heartbeat(this.agentId, this.options.instanceId);
|
|
695
|
+
await this.bootstrapUnreadMessages();
|
|
696
|
+
} catch (error) {
|
|
697
|
+
if (!this.stopped) this.handleHttpLivenessFailure(error);
|
|
698
|
+
} finally {
|
|
699
|
+
this.httpLivenessInFlight = false;
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
private handleHttpLivenessFailure(error: unknown): void {
|
|
704
|
+
const authFailed = isHttpAuthError(error);
|
|
705
|
+
this.logHttpLivenessFailure(error, authFailed);
|
|
706
|
+
if (!authFailed) return;
|
|
707
|
+
this.httpLivenessAuthFailed = true;
|
|
708
|
+
if (this.httpLivenessTimer) clearInterval(this.httpLivenessTimer);
|
|
709
|
+
this.httpLivenessTimer = undefined;
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
private logHttpLivenessFailure(error: unknown, authFailed: boolean): void {
|
|
713
|
+
const key = httpErrorKey(error);
|
|
714
|
+
const now = Date.now();
|
|
715
|
+
if (
|
|
716
|
+
this.httpLivenessLastLog?.key === key &&
|
|
717
|
+
now - this.httpLivenessLastLog.at < HTTP_LIVENESS_LOG_INTERVAL_MS
|
|
718
|
+
) return;
|
|
719
|
+
this.httpLivenessLastLog = { key, at: now };
|
|
720
|
+
const suffix = authFailed ? "auth failed; stopping HTTP liveness retries until restart" : String(error);
|
|
721
|
+
this.logRunnerDiagnostic(`[runner] HTTP liveness update failed: ${suffix}`);
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
private logRunnerDiagnostic(message: string): void {
|
|
725
|
+
if (this.options.headless) {
|
|
726
|
+
console.error(message);
|
|
727
|
+
return;
|
|
728
|
+
}
|
|
729
|
+
try {
|
|
730
|
+
const logDir = join(process.env.HOME || ".", ".agent-relay", "logs");
|
|
731
|
+
mkdirSync(logDir, { recursive: true });
|
|
732
|
+
appendFileSync(join(logDir, `runner-${safeLogName(this.agentId)}.log`), `[${new Date().toISOString()}] ${message}\n`);
|
|
733
|
+
} catch {
|
|
734
|
+
// Do not write runner diagnostics into an interactive provider TUI.
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
private scheduleRuntimeTokenRenewal(delayMs?: number): void {
|
|
739
|
+
if (this.tokenRenewTimer) clearTimeout(this.tokenRenewTimer);
|
|
740
|
+
this.tokenRenewTimer = undefined;
|
|
741
|
+
if (!this.isRuntimeTokenRenewable()) return;
|
|
742
|
+
const computedDelay = delayMs ?? runtimeTokenRenewDelayMs(this.currentTokenExpiresAt!, Date.now());
|
|
743
|
+
if (computedDelay === undefined) return;
|
|
744
|
+
const schedule = runtimeTokenRenewTimerSchedule(computedDelay);
|
|
745
|
+
if (!schedule) return;
|
|
746
|
+
this.tokenRenewTimer = setTimeout(() => {
|
|
747
|
+
this.tokenRenewTimer = undefined;
|
|
748
|
+
if (!schedule.renew) {
|
|
749
|
+
this.scheduleRuntimeTokenRenewal();
|
|
750
|
+
return;
|
|
751
|
+
}
|
|
752
|
+
void this.renewRuntimeToken();
|
|
753
|
+
}, schedule.delayMs);
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
private isRuntimeTokenRenewable(): boolean {
|
|
757
|
+
return Boolean(
|
|
758
|
+
this.currentToken &&
|
|
759
|
+
this.currentTokenExpiresAt &&
|
|
760
|
+
this.currentTokenExpiresAt * 1000 > Date.now() &&
|
|
761
|
+
(this.currentTokenProfileId === "provider-agent" || this.currentTokenProfileId === "provider-interactive"),
|
|
762
|
+
);
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
private async renewRuntimeToken(): Promise<void> {
|
|
766
|
+
if (this.stopped || this.tokenRenewInFlight || !this.isRuntimeTokenRenewable()) return;
|
|
767
|
+
this.tokenRenewInFlight = true;
|
|
768
|
+
try {
|
|
769
|
+
const renewed = await this.http.renewRuntimeToken();
|
|
770
|
+
this.currentToken = renewed.token;
|
|
771
|
+
this.currentTokenJti = renewed.record.jti;
|
|
772
|
+
this.currentTokenProfileId = renewed.record.profileId ?? this.currentTokenProfileId;
|
|
773
|
+
this.currentTokenExpiresAt = renewed.record.expiresAt;
|
|
774
|
+
this.options.token = renewed.token;
|
|
775
|
+
this.options.tokenJti = renewed.record.jti;
|
|
776
|
+
this.options.tokenProfileId = this.currentTokenProfileId;
|
|
777
|
+
this.options.tokenExpiresAt = this.currentTokenExpiresAt;
|
|
778
|
+
this.http.setToken(renewed.token);
|
|
779
|
+
this.bus.setToken(renewed.token);
|
|
780
|
+
this.httpLivenessAuthFailed = false;
|
|
781
|
+
this.pendingTimelineEvent = {
|
|
782
|
+
status: "runtime-token-renewed",
|
|
783
|
+
id: renewed.record.jti,
|
|
784
|
+
timestamp: Date.now(),
|
|
785
|
+
};
|
|
786
|
+
this.bus.reconnectTransport("runtime token renewed");
|
|
787
|
+
this.publishStatus();
|
|
788
|
+
this.scheduleRuntimeTokenRenewal();
|
|
789
|
+
} catch (error) {
|
|
790
|
+
this.logRuntimeTokenRenewalFailure(error);
|
|
791
|
+
this.pendingTimelineEvent = {
|
|
792
|
+
status: "runtime-token-renewal-failed",
|
|
793
|
+
timestamp: Date.now(),
|
|
794
|
+
};
|
|
795
|
+
this.publishStatus();
|
|
796
|
+
this.scheduleRuntimeTokenRenewal(TOKEN_RENEW_RETRY_MS);
|
|
797
|
+
} finally {
|
|
798
|
+
this.tokenRenewInFlight = false;
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
private logRuntimeTokenRenewalFailure(error: unknown): void {
|
|
803
|
+
const key = httpErrorKey(error);
|
|
804
|
+
const now = Date.now();
|
|
805
|
+
if (
|
|
806
|
+
this.tokenRenewLastLog?.key === key &&
|
|
807
|
+
now - this.tokenRenewLastLog.at < HTTP_LIVENESS_LOG_INTERVAL_MS
|
|
808
|
+
) return;
|
|
809
|
+
this.tokenRenewLastLog = { key, at: now };
|
|
810
|
+
this.logRunnerDiagnostic(`[runner] runtime token renewal failed: ${String(error)}`);
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
private heartbeatRuntimeMeta(): Record<string, unknown> | undefined {
|
|
814
|
+
const processContext = this.latestProcessContext();
|
|
815
|
+
const probeMetrics = this.latestProbeMetrics();
|
|
816
|
+
const probeContext = probeMetrics ? contextStateFromProbeMetrics(probeMetrics) : undefined;
|
|
817
|
+
const context = processContext ?? probeContext;
|
|
818
|
+
const terminalSession = this.providerTerminalSession();
|
|
819
|
+
const terminalSocket = this.providerTerminalSocket();
|
|
820
|
+
const probeModel: ProbeModelInfo | undefined = probeMetrics?.model || probeMetrics?.effort
|
|
821
|
+
? { model: probeMetrics.model, effort: probeMetrics.effort }
|
|
822
|
+
: undefined;
|
|
823
|
+
const meta: Record<string, unknown> = {
|
|
824
|
+
providerCapabilities: runtimeProviderCapabilities(
|
|
825
|
+
this.options,
|
|
826
|
+
context ? { source: context.source, confidence: context.confidence } : undefined,
|
|
827
|
+
probeModel,
|
|
828
|
+
),
|
|
829
|
+
...(terminalSession ? { tmuxSession: terminalSession } : {}),
|
|
830
|
+
...(terminalSocket ? { tmuxSocket: terminalSocket } : {}),
|
|
831
|
+
};
|
|
832
|
+
if (context) meta.context = context;
|
|
833
|
+
return meta;
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
private providerTerminalSession(): string | undefined {
|
|
837
|
+
return typeof this.process?.meta?.tmuxSession === "string" ? this.process.meta.tmuxSession : undefined;
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
private providerTerminalSocket(): string | undefined {
|
|
841
|
+
return typeof this.process?.meta?.tmuxSocket === "string" ? this.process.meta.tmuxSocket : undefined;
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
private latestProcessContext(): ContextState | undefined {
|
|
845
|
+
const getContext = this.process?.meta?.getContext;
|
|
846
|
+
if (typeof getContext === "function") {
|
|
847
|
+
const context = getContext();
|
|
848
|
+
if (isContextState(context)) return context;
|
|
849
|
+
}
|
|
850
|
+
const context = this.process?.meta?.context;
|
|
851
|
+
return isContextState(context) ? context : undefined;
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
private latestProbeMetrics() {
|
|
855
|
+
const stateDir = process.env.AGENT_RELAY_CONTEXT_STATE_DIR;
|
|
856
|
+
return readContextProbeState(this.agentId, stateDir);
|
|
857
|
+
}
|
|
858
|
+
|
|
323
859
|
private matchesMessage(message: Message): boolean {
|
|
324
860
|
return runnerMessageMatches(message, {
|
|
325
861
|
agentId: this.agentId,
|
|
@@ -329,8 +865,61 @@ export class AgentRunner {
|
|
|
329
865
|
capabilities: this.options.capabilities,
|
|
330
866
|
defaultTags: this.options.providerConfig.defaultTags,
|
|
331
867
|
defaultCapabilities: this.options.providerConfig.defaultCapabilities,
|
|
868
|
+
allowBroadTargets: this.options.agentProfile?.relay?.context !== false,
|
|
332
869
|
});
|
|
333
870
|
}
|
|
871
|
+
|
|
872
|
+
private startClaimRenewer(): void {
|
|
873
|
+
if (this.claimRenewTimer) return;
|
|
874
|
+
this.claimRenewTimer = setInterval(() => {
|
|
875
|
+
void this.renewActiveTaskClaims();
|
|
876
|
+
}, CLAIM_RENEW_INTERVAL_MS);
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
private async renewActiveTaskClaims(): Promise<void> {
|
|
880
|
+
for (const claim of this.activeTaskClaims.values()) {
|
|
881
|
+
const renewed = await this.http.renewMessageClaim(claim.messageId, this.agentId).catch(() => ({ ok: false, claimExpiresAt: undefined }));
|
|
882
|
+
if (renewed.ok) {
|
|
883
|
+
this.claims.startClaim("message", String(claim.messageId), renewed.claimExpiresAt);
|
|
884
|
+
this.claims.startClaim("task", String(claim.taskId), renewed.claimExpiresAt);
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
this.publishStatus();
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
private async completeObservedTaskClaims(): Promise<void> {
|
|
891
|
+
const completed = [...this.activeTaskClaims.values()].filter((claim) => claim.observedProviderBusy);
|
|
892
|
+
for (const claim of completed) {
|
|
893
|
+
const ok = await this.updateTaskStatus(claim.taskId, {
|
|
894
|
+
status: "done",
|
|
895
|
+
agentId: this.agentId,
|
|
896
|
+
body: `Runner observed provider completion for message #${claim.messageId}`,
|
|
897
|
+
metadata: { messageId: claim.messageId, completedBy: "runner" },
|
|
898
|
+
})
|
|
899
|
+
.then(() => true)
|
|
900
|
+
.catch((error) => {
|
|
901
|
+
console.error(`[runner] task ${claim.taskId} completion update failed: ${error}`);
|
|
902
|
+
return false;
|
|
903
|
+
});
|
|
904
|
+
if (!ok) continue;
|
|
905
|
+
this.activeTaskClaims.delete(claim.messageId);
|
|
906
|
+
this.claims.finishClaim("message", String(claim.messageId));
|
|
907
|
+
this.claims.finishClaim("task", String(claim.taskId));
|
|
908
|
+
}
|
|
909
|
+
if (completed.length > 0) this.publishStatus();
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
private async updateTaskStatus(taskId: number, input: TaskStatusInput): Promise<void> {
|
|
913
|
+
await this.http.updateTaskStatus(taskId, input);
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
private clearActiveClaim(message: Message): void {
|
|
917
|
+
const taskId = taskIdFromMessage(message);
|
|
918
|
+
this.claims.finishClaim("message", String(message.id));
|
|
919
|
+
if (!taskId) return;
|
|
920
|
+
this.activeTaskClaims.delete(message.id);
|
|
921
|
+
this.claims.finishClaim("task", String(taskId));
|
|
922
|
+
}
|
|
334
923
|
}
|
|
335
924
|
|
|
336
925
|
export function runnerMessageMatches(message: Pick<Message, "to" | "resolvedToAgent">, target: {
|
|
@@ -341,10 +930,13 @@ export function runnerMessageMatches(message: Pick<Message, "to" | "resolvedToAg
|
|
|
341
930
|
capabilities: string[];
|
|
342
931
|
defaultTags: string[];
|
|
343
932
|
defaultCapabilities: string[];
|
|
933
|
+
allowBroadTargets?: boolean;
|
|
344
934
|
}): boolean {
|
|
345
935
|
if (message.resolvedToAgent === target.agentId) return true;
|
|
346
|
-
if (message.to === target.agentId
|
|
936
|
+
if (message.to === target.agentId) return true;
|
|
347
937
|
if (target.label && message.to === `label:${target.label}`) return true;
|
|
938
|
+
if (target.allowBroadTargets === false) return false;
|
|
939
|
+
if (message.to === "broadcast") return true;
|
|
348
940
|
if (message.to.startsWith("tag:")) {
|
|
349
941
|
const tag = message.to.slice("tag:".length);
|
|
350
942
|
return target.tags.includes(tag) || target.defaultTags.includes(tag) || tag === target.provider;
|
|
@@ -356,6 +948,11 @@ export function runnerMessageMatches(message: Pick<Message, "to" | "resolvedToAg
|
|
|
356
948
|
return false;
|
|
357
949
|
}
|
|
358
950
|
|
|
951
|
+
export function taskIdFromMessage(message: Pick<Message, "payload">): number | undefined {
|
|
952
|
+
const taskId = message.payload?.taskId;
|
|
953
|
+
return Number.isSafeInteger(taskId) ? taskId as number : undefined;
|
|
954
|
+
}
|
|
955
|
+
|
|
359
956
|
function csvTags(raw: string | undefined): string[] {
|
|
360
957
|
return (raw || "").split(",").map((tag) => tag.trim()).filter(Boolean);
|
|
361
958
|
}
|
|
@@ -381,11 +978,51 @@ export function runnerShouldResolveProviderExit(status: SemanticStatus, exitComm
|
|
|
381
978
|
return !exitCommandInProgress && (status === "offline" || status === "error");
|
|
382
979
|
}
|
|
383
980
|
|
|
981
|
+
export function runnerShouldRestartUnexpectedProviderExit(
|
|
982
|
+
status: SemanticStatus,
|
|
983
|
+
input: {
|
|
984
|
+
exitCommandInProgress: boolean;
|
|
985
|
+
stopped: boolean;
|
|
986
|
+
restartInProgress: boolean;
|
|
987
|
+
provider?: string;
|
|
988
|
+
headless: boolean;
|
|
989
|
+
hasTerminalSession: boolean;
|
|
990
|
+
},
|
|
991
|
+
): boolean {
|
|
992
|
+
return status === "offline"
|
|
993
|
+
&& !input.exitCommandInProgress
|
|
994
|
+
&& !input.stopped
|
|
995
|
+
&& !input.restartInProgress
|
|
996
|
+
&& input.headless
|
|
997
|
+
&& input.hasTerminalSession;
|
|
998
|
+
}
|
|
999
|
+
|
|
1000
|
+
function commandTimeoutMs(params: Record<string, unknown>, fallback = 10_000): number {
|
|
1001
|
+
const raw = params.timeoutMs;
|
|
1002
|
+
if (typeof raw !== "number" || !Number.isSafeInteger(raw) || raw <= 0) return fallback;
|
|
1003
|
+
return Math.min(raw, 60_000);
|
|
1004
|
+
}
|
|
1005
|
+
|
|
384
1006
|
export function shouldLogDeliveryFailure(error: unknown): boolean {
|
|
385
1007
|
const message = error instanceof Error ? error.message : String(error);
|
|
386
1008
|
return message !== "no Claude monitor connected";
|
|
387
1009
|
}
|
|
388
1010
|
|
|
1011
|
+
export function runtimeTokenRenewDelayMs(expiresAtSeconds: number, nowMs = Date.now()): number | undefined {
|
|
1012
|
+
const expiresAtMs = expiresAtSeconds * 1000;
|
|
1013
|
+
const ttlMs = expiresAtMs - nowMs;
|
|
1014
|
+
if (!Number.isFinite(ttlMs) || ttlMs <= 0) return undefined;
|
|
1015
|
+
const leadMs = Math.min(60 * 60 * 1000, Math.max(60_000, Math.floor(ttlMs * 0.2)));
|
|
1016
|
+
const jitterMs = Math.floor(Math.random() * Math.min(30_000, Math.max(0, leadMs / 2)));
|
|
1017
|
+
return Math.max(1_000, ttlMs - leadMs - jitterMs);
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
export function runtimeTokenRenewTimerSchedule(delayMs: number): { delayMs: number; renew: boolean } | undefined {
|
|
1021
|
+
if (!Number.isFinite(delayMs) || delayMs <= 0) return undefined;
|
|
1022
|
+
if (delayMs > MAX_TIMER_DELAY_MS) return { delayMs: MAX_TIMER_DELAY_MS, renew: false };
|
|
1023
|
+
return { delayMs, renew: true };
|
|
1024
|
+
}
|
|
1025
|
+
|
|
389
1026
|
function lifecycleCapabilities(): Record<string, true> {
|
|
390
1027
|
return {
|
|
391
1028
|
shutdownHard: true,
|
|
@@ -394,3 +1031,137 @@ function lifecycleCapabilities(): Record<string, true> {
|
|
|
394
1031
|
transportReconnect: true,
|
|
395
1032
|
};
|
|
396
1033
|
}
|
|
1034
|
+
|
|
1035
|
+
interface ProbeModelInfo {
|
|
1036
|
+
model?: string;
|
|
1037
|
+
effort?: string;
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
function runtimeProviderCapabilities(options: RunnerOptions, contextStats?: { source: "api" | "statusline" | "hook" | "estimate"; confidence: "exact" | "reported" | "estimated" }, probeModel?: ProbeModelInfo): ProviderCapabilities {
|
|
1041
|
+
const model = options.model ?? probeModel?.model;
|
|
1042
|
+
const effort = options.effort ?? probeModel?.effort;
|
|
1043
|
+
const modelSource = options.model ? "runtime" as const : probeModel?.model ? "provider" as const : "runtime" as const;
|
|
1044
|
+
return {
|
|
1045
|
+
lifecycle: {
|
|
1046
|
+
managed: true,
|
|
1047
|
+
shutdownHard: options.provider === "claude" ? options.headless : true,
|
|
1048
|
+
restartHard: options.provider === "claude" ? options.headless : true,
|
|
1049
|
+
semanticStatus: true,
|
|
1050
|
+
reconnect: true,
|
|
1051
|
+
},
|
|
1052
|
+
model: {
|
|
1053
|
+
provider: options.provider,
|
|
1054
|
+
alias: model,
|
|
1055
|
+
id: model,
|
|
1056
|
+
effort,
|
|
1057
|
+
source: modelSource,
|
|
1058
|
+
confidence: options.model ? "reported" : probeModel?.model ? "reported" : "unknown",
|
|
1059
|
+
lastUpdatedAt: options.startedAt,
|
|
1060
|
+
},
|
|
1061
|
+
session: {
|
|
1062
|
+
approvalMode: options.approvalMode,
|
|
1063
|
+
fileRead: true,
|
|
1064
|
+
fileWrite: options.approvalMode !== "read-only",
|
|
1065
|
+
shell: options.approvalMode !== "read-only",
|
|
1066
|
+
source: "runtime",
|
|
1067
|
+
confidence: "reported",
|
|
1068
|
+
lastUpdatedAt: options.startedAt,
|
|
1069
|
+
},
|
|
1070
|
+
...runtimeProviderContextCapabilities(options, contextStats),
|
|
1071
|
+
...runtimeProviderTerminalCapabilities(options),
|
|
1072
|
+
source: "runtime",
|
|
1073
|
+
confidence: "reported",
|
|
1074
|
+
lastUpdatedAt: options.startedAt,
|
|
1075
|
+
};
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
function runtimeProviderTerminalCapabilities(options: RunnerOptions): Pick<ProviderCapabilities, "terminal"> {
|
|
1079
|
+
if (options.provider === "claude" && options.headless) {
|
|
1080
|
+
return {
|
|
1081
|
+
terminal: {
|
|
1082
|
+
live: {
|
|
1083
|
+
read: true,
|
|
1084
|
+
write: true,
|
|
1085
|
+
},
|
|
1086
|
+
},
|
|
1087
|
+
};
|
|
1088
|
+
}
|
|
1089
|
+
if (options.provider === "codex") {
|
|
1090
|
+
return {
|
|
1091
|
+
terminal: {
|
|
1092
|
+
attach: {
|
|
1093
|
+
create: true,
|
|
1094
|
+
read: true,
|
|
1095
|
+
write: true,
|
|
1096
|
+
detach: true,
|
|
1097
|
+
},
|
|
1098
|
+
},
|
|
1099
|
+
};
|
|
1100
|
+
}
|
|
1101
|
+
return {};
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
function appliedAgentProfileMetadata(provider: string, profile: AgentProfile): Record<string, unknown> {
|
|
1105
|
+
const projection = provider === "claude" || provider === "codex"
|
|
1106
|
+
? agentProfileProjectionReport({ provider, profile })
|
|
1107
|
+
: undefined;
|
|
1108
|
+
return {
|
|
1109
|
+
name: profile.name,
|
|
1110
|
+
base: profile.base,
|
|
1111
|
+
provider: profile.provider ?? "any",
|
|
1112
|
+
relay: profile.relay,
|
|
1113
|
+
instructions: {
|
|
1114
|
+
repoInstructions: profile.instructions.repoInstructions,
|
|
1115
|
+
globalInstructions: profile.instructions.globalInstructions,
|
|
1116
|
+
appendCount: profile.instructions.append.length,
|
|
1117
|
+
hasSystem: Boolean(profile.instructions.system),
|
|
1118
|
+
},
|
|
1119
|
+
skills: profile.skills.filter((item) => item.enabled).length,
|
|
1120
|
+
plugins: profile.plugins.filter((item) => item.enabled).length,
|
|
1121
|
+
mcp: { mode: profile.mcp.mode },
|
|
1122
|
+
hooks: { mode: profile.hooks.mode },
|
|
1123
|
+
permissions: profile.permissions,
|
|
1124
|
+
projection,
|
|
1125
|
+
};
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
function runtimeProviderContextCapabilities(options: RunnerOptions, contextStats?: { source: "api" | "statusline" | "hook" | "estimate"; confidence: "exact" | "reported" | "estimated" }): Pick<ProviderCapabilities, "context"> {
|
|
1129
|
+
const context: NonNullable<ProviderCapabilities["context"]> = {};
|
|
1130
|
+
if (contextStats) context.stats = contextStats;
|
|
1131
|
+
if (options.provider === "codex" || (options.provider === "claude" && options.headless)) {
|
|
1132
|
+
context.compact = true;
|
|
1133
|
+
context.clear = true;
|
|
1134
|
+
}
|
|
1135
|
+
context.inject = true;
|
|
1136
|
+
return Object.keys(context).length ? { context } : {};
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
function providerStateFromActiveWork(activeWork: Array<{ kind: string; metadata?: Record<string, unknown> }>): Record<string, unknown> | null {
|
|
1140
|
+
const providerTurn = activeWork.find((item) => item.kind === "provider-turn");
|
|
1141
|
+
const state = providerTurn?.metadata?.providerState;
|
|
1142
|
+
return state && typeof state === "object" && !Array.isArray(state) ? state as Record<string, unknown> : null;
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
function isHttpAuthError(error: unknown): boolean {
|
|
1146
|
+
const status = typeof error === "object" && error !== null ? (error as { status?: unknown }).status : undefined;
|
|
1147
|
+
return status === 401 || status === 403;
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
function httpErrorKey(error: unknown): string {
|
|
1151
|
+
const status = typeof error === "object" && error !== null ? (error as { status?: unknown }).status : undefined;
|
|
1152
|
+
if (typeof status === "number") return `status:${status}`;
|
|
1153
|
+
return String(error);
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
function safeLogName(value: string): string {
|
|
1157
|
+
return value.replace(/[^a-zA-Z0-9_.-]+/g, "_").slice(0, 180);
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
function isContextState(value: unknown): value is ContextState {
|
|
1161
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) return false;
|
|
1162
|
+
const state = value as Record<string, unknown>;
|
|
1163
|
+
return typeof state.utilization === "number" &&
|
|
1164
|
+
typeof state.lifecycleState === "string" &&
|
|
1165
|
+
typeof state.source === "string" &&
|
|
1166
|
+
typeof state.confidence === "string";
|
|
1167
|
+
}
|