agent-relay-orchestrator 0.10.19 → 0.10.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -2
- package/src/api.ts +542 -40
- package/src/artifact-proxy.ts +173 -0
- package/src/control.ts +156 -18
- package/src/index.ts +53 -7
- package/src/provider-probe.ts +184 -0
- package/src/recovery.ts +1 -1
- package/src/relay.ts +106 -15
- package/src/self-supervision.ts +82 -0
- package/src/self-upgrade.ts +143 -0
- package/src/spawn.ts +1267 -0
- package/src/version.ts +30 -1
- package/src/workspace-probe.ts +513 -0
- package/src/tmux.ts +0 -298
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, readFileSync, statSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { join, relative, resolve } from "node:path";
|
|
4
|
+
import type { OrchestratorConfig } from "./config";
|
|
5
|
+
|
|
6
|
+
const SAFE_ARTIFACT_ID = /^[a-zA-Z0-9._-]{1,160}$/;
|
|
7
|
+
const CONTENT_ROUTE = /^\/api\/artifacts\/([^/]+)\/content$/;
|
|
8
|
+
const CACHEABLE_RESPONSE_HEADERS = [
|
|
9
|
+
"content-type",
|
|
10
|
+
"content-disposition",
|
|
11
|
+
"x-artifact-digest",
|
|
12
|
+
"etag",
|
|
13
|
+
"last-modified",
|
|
14
|
+
"cache-control",
|
|
15
|
+
];
|
|
16
|
+
const FORWARDED_REQUEST_HEADERS = [
|
|
17
|
+
"content-type",
|
|
18
|
+
"x-artifact-filename",
|
|
19
|
+
"x-artifact-digest",
|
|
20
|
+
"x-artifact-kind",
|
|
21
|
+
"x-artifact-sensitivity",
|
|
22
|
+
"x-artifact-expires-at",
|
|
23
|
+
];
|
|
24
|
+
const RESPONSE_HEADERS = [
|
|
25
|
+
"content-type",
|
|
26
|
+
"content-length",
|
|
27
|
+
"content-disposition",
|
|
28
|
+
"x-artifact-digest",
|
|
29
|
+
"etag",
|
|
30
|
+
"last-modified",
|
|
31
|
+
"cache-control",
|
|
32
|
+
];
|
|
33
|
+
|
|
34
|
+
interface CacheMeta {
|
|
35
|
+
headers: Record<string, string>;
|
|
36
|
+
cachedAt: number;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function artifactProxyBaseUrl(config: Pick<OrchestratorConfig, "apiPort">): string {
|
|
40
|
+
return `http://127.0.0.1:${config.apiPort}/api/artifacts`;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function artifactCacheRoot(): string {
|
|
44
|
+
const configured = process.env.AGENT_RELAY_ARTIFACT_CACHE_DIR || "~/.agent-relay/cache/artifacts";
|
|
45
|
+
if (configured === "~") return homedir();
|
|
46
|
+
if (configured.startsWith("~/")) return join(homedir(), configured.slice(2));
|
|
47
|
+
return configured;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function cachePaths(id: string, root = artifactCacheRoot()): { dir: string; content: string; meta: string } {
|
|
51
|
+
if (!SAFE_ARTIFACT_ID.test(id)) throw new Error("invalid artifact id");
|
|
52
|
+
const base = resolve(root);
|
|
53
|
+
const dir = resolve(base, id);
|
|
54
|
+
const rel = relative(base, dir);
|
|
55
|
+
if (rel.startsWith("..") || rel.startsWith("/") || rel === "") throw new Error("artifact cache path escapes root");
|
|
56
|
+
return {
|
|
57
|
+
dir,
|
|
58
|
+
content: join(dir, "content"),
|
|
59
|
+
meta: join(dir, "meta.json"),
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function responseHeaders(source: Headers, names = RESPONSE_HEADERS): Headers {
|
|
64
|
+
const headers = new Headers();
|
|
65
|
+
for (const name of names) {
|
|
66
|
+
const value = source.get(name);
|
|
67
|
+
if (value) headers.set(name, value);
|
|
68
|
+
}
|
|
69
|
+
headers.set("Access-Control-Allow-Origin", "*");
|
|
70
|
+
return headers;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function forwardHeaders(req: Request, config: OrchestratorConfig): Headers {
|
|
74
|
+
const headers = new Headers();
|
|
75
|
+
for (const name of FORWARDED_REQUEST_HEADERS) {
|
|
76
|
+
const value = req.headers.get(name);
|
|
77
|
+
if (value) headers.set(name, value);
|
|
78
|
+
}
|
|
79
|
+
const token = config.token || req.headers.get("x-agent-relay-token");
|
|
80
|
+
if (token) headers.set("X-Agent-Relay-Token", token);
|
|
81
|
+
return headers;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function relayUrl(req: Request, config: OrchestratorConfig): URL {
|
|
85
|
+
const url = new URL(req.url);
|
|
86
|
+
return new URL(url.pathname + url.search, config.relayUrl);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async function relayFetch(req: Request, config: OrchestratorConfig): Promise<Response> {
|
|
90
|
+
return fetch(relayUrl(req, config), {
|
|
91
|
+
method: req.method,
|
|
92
|
+
headers: forwardHeaders(req, config),
|
|
93
|
+
body: req.method === "GET" || req.method === "HEAD" ? undefined : req.body,
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function cachedContentResponse(id: string): Response | null {
|
|
98
|
+
let paths;
|
|
99
|
+
try {
|
|
100
|
+
paths = cachePaths(id);
|
|
101
|
+
} catch {
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
if (!existsSync(paths.content) || !existsSync(paths.meta)) return null;
|
|
105
|
+
try {
|
|
106
|
+
const meta = JSON.parse(readFileSync(paths.meta, "utf8")) as CacheMeta;
|
|
107
|
+
const stat = statSync(paths.content);
|
|
108
|
+
const headers = new Headers(meta.headers);
|
|
109
|
+
headers.set("Content-Length", String(stat.size));
|
|
110
|
+
return new Response(readFileSync(paths.content), { status: 200, headers });
|
|
111
|
+
} catch {
|
|
112
|
+
return null;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function writeCachedContent(id: string, bytes: ArrayBuffer, headers: Headers): void {
|
|
117
|
+
const paths = cachePaths(id);
|
|
118
|
+
mkdirSync(paths.dir, { recursive: true, mode: 0o700 });
|
|
119
|
+
const cachedHeaders: Record<string, string> = {};
|
|
120
|
+
for (const name of CACHEABLE_RESPONSE_HEADERS) {
|
|
121
|
+
const value = headers.get(name);
|
|
122
|
+
if (value) cachedHeaders[name] = value;
|
|
123
|
+
}
|
|
124
|
+
cachedHeaders["content-length"] = String(bytes.byteLength);
|
|
125
|
+
writeFileSync(paths.content, Buffer.from(bytes));
|
|
126
|
+
writeFileSync(paths.meta, JSON.stringify({ headers: cachedHeaders, cachedAt: Date.now() } satisfies CacheMeta, null, 2) + "\n");
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async function proxyContent(req: Request, config: OrchestratorConfig, id: string): Promise<Response> {
|
|
130
|
+
if (req.method === "GET") {
|
|
131
|
+
const cached = cachedContentResponse(id);
|
|
132
|
+
if (cached) return cached;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const relay = await relayFetch(req, config);
|
|
136
|
+
const headers = responseHeaders(relay.headers);
|
|
137
|
+
if (req.method === "HEAD" || !relay.ok) {
|
|
138
|
+
return new Response(req.method === "HEAD" ? null : relay.body, { status: relay.status, headers });
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const bytes = await relay.arrayBuffer();
|
|
142
|
+
try {
|
|
143
|
+
writeCachedContent(id, bytes, headers);
|
|
144
|
+
} catch (e) {
|
|
145
|
+
console.error(`[orchestrator] artifact cache write failed for ${id}: ${(e as Error).message}`);
|
|
146
|
+
}
|
|
147
|
+
return new Response(bytes, { status: relay.status, headers });
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
export async function proxyArtifactRequest(req: Request, config: OrchestratorConfig): Promise<Response> {
|
|
151
|
+
const url = new URL(req.url);
|
|
152
|
+
if (url.pathname === "/api/artifacts" && (req.method === "GET" || req.method === "POST")) {
|
|
153
|
+
const relay = await relayFetch(req, config);
|
|
154
|
+
return new Response(relay.body, { status: relay.status, headers: responseHeaders(relay.headers) });
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const contentMatch = url.pathname.match(CONTENT_ROUTE);
|
|
158
|
+
if (contentMatch && (req.method === "GET" || req.method === "HEAD")) {
|
|
159
|
+
const id = decodeURIComponent(contentMatch[1]!);
|
|
160
|
+
if (!SAFE_ARTIFACT_ID.test(id)) return new Response(JSON.stringify({ error: "invalid artifact id" }), { status: 400, headers: { "Content-Type": "application/json" } });
|
|
161
|
+
return proxyContent(req, config, id);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const artifactMatch = url.pathname.match(/^\/api\/artifacts\/([^/]+)$/);
|
|
165
|
+
if (artifactMatch && (req.method === "GET" || req.method === "DELETE")) {
|
|
166
|
+
const id = decodeURIComponent(artifactMatch[1]!);
|
|
167
|
+
if (!SAFE_ARTIFACT_ID.test(id)) return new Response(JSON.stringify({ error: "invalid artifact id" }), { status: 400, headers: { "Content-Type": "application/json" } });
|
|
168
|
+
const relay = await relayFetch(req, config);
|
|
169
|
+
return new Response(relay.body, { status: relay.status, headers: responseHeaders(relay.headers) });
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return new Response(JSON.stringify({ error: "Not found" }), { status: 404, headers: { "Content-Type": "application/json" } });
|
|
173
|
+
}
|
package/src/control.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import type { OrchestratorConfig } from "./config";
|
|
2
2
|
import type { ManagedAgentReport, RelayClient, RelayCommand } from "./relay";
|
|
3
|
-
import {
|
|
3
|
+
import { handleSelfUpgrade } from "./self-upgrade";
|
|
4
|
+
import { spawnAgent, stopSession, type SpawnOptions } from "./spawn";
|
|
5
|
+
import { cleanupWorkspace, mergeWorkspace, reconcileWorkspace } from "./workspace-probe";
|
|
4
6
|
|
|
5
7
|
interface ControlHandler {
|
|
6
8
|
handleCommand(command: RelayCommand): Promise<boolean>;
|
|
@@ -15,18 +17,7 @@ export function createControlHandler(
|
|
|
15
17
|
let managedAgents: ManagedAgentReport[] = [];
|
|
16
18
|
|
|
17
19
|
async function handleSpawn(ctrl: Record<string, any>): Promise<boolean> {
|
|
18
|
-
const opts
|
|
19
|
-
provider: ctrl.provider || "claude",
|
|
20
|
-
cwd: ctrl.cwd || config.baseDir,
|
|
21
|
-
label: ctrl.label,
|
|
22
|
-
approvalMode: ctrl.approvalMode || "guarded",
|
|
23
|
-
prompt: ctrl.prompt,
|
|
24
|
-
tags: Array.isArray(ctrl.tags) ? ctrl.tags.filter((item): item is string => typeof item === "string") : undefined,
|
|
25
|
-
capabilities: Array.isArray(ctrl.capabilities) ? ctrl.capabilities.filter((item): item is string => typeof item === "string") : undefined,
|
|
26
|
-
providerArgs: Array.isArray(ctrl.providerArgs) ? ctrl.providerArgs.filter((item): item is string => typeof item === "string") : undefined,
|
|
27
|
-
policyName: typeof ctrl.policyName === "string" ? ctrl.policyName : undefined,
|
|
28
|
-
spawnRequestId: typeof ctrl.spawnRequestId === "string" ? ctrl.spawnRequestId : undefined,
|
|
29
|
-
};
|
|
20
|
+
const opts = spawnOptionsFromControl(ctrl, config);
|
|
30
21
|
|
|
31
22
|
try {
|
|
32
23
|
const agent = await spawnAgent(opts, config);
|
|
@@ -40,17 +31,30 @@ export function createControlHandler(
|
|
|
40
31
|
}
|
|
41
32
|
|
|
42
33
|
async function handleShutdown(ctrl: Record<string, any>, restart = false): Promise<Record<string, unknown>> {
|
|
43
|
-
const
|
|
44
|
-
|
|
45
|
-
: managedAgents.find((agent) => agent.agentId === ctrl.agentId || (ctrl.policyName && agent.policyName === ctrl.policyName))?.tmuxSession;
|
|
34
|
+
const current = managedAgentShutdownTarget(managedAgents, ctrl);
|
|
35
|
+
const session = current?.sessionName ?? current?.tmuxSession;
|
|
46
36
|
if (!session) return { stopped: false, wasRunning: false };
|
|
47
|
-
const result = await stopSession(session, config, typeof ctrl.reason === "string" ? ctrl.reason : restart ? "restart" : "shutdown", ctrl.graceful !== false);
|
|
48
|
-
managedAgents = managedAgents.filter((agent) => agent.tmuxSession !== session);
|
|
37
|
+
const result = await stopSession(session, config, typeof ctrl.reason === "string" ? ctrl.reason : restart ? "restart" : "shutdown", ctrl.graceful !== false, shutdownTimeoutMs(ctrl));
|
|
38
|
+
managedAgents = managedAgents.filter((agent) => (agent.sessionName ?? agent.tmuxSession) !== session);
|
|
39
|
+
const restartSpawn = isRecord(ctrl.restartSpawn) ? ctrl.restartSpawn : undefined;
|
|
40
|
+
// A managed restart carries a fresh spawnRequestId in restartSpawn — keep it.
|
|
41
|
+
// Falling back to the live agent's params would reuse the stale id and break
|
|
42
|
+
// relay correlation, so drop it and let spawnAgent assign a new identity.
|
|
43
|
+
const restartSource = (restartSpawn ?? (current ? { ...current, spawnRequestId: undefined } : undefined)) as Record<string, any> | undefined;
|
|
44
|
+
let restarted: ManagedAgentReport | undefined;
|
|
45
|
+
if (restart && restartSource) {
|
|
46
|
+
restarted = await spawnAgent(spawnOptionsFromRestartSource(restartSource, config), config);
|
|
47
|
+
managedAgents.push(restarted);
|
|
48
|
+
console.error(`[orchestrator] Restarted ${restarted.provider} agent: ${restarted.tmuxSession}`);
|
|
49
|
+
}
|
|
49
50
|
return {
|
|
50
51
|
...result,
|
|
51
52
|
restart,
|
|
53
|
+
restarted: Boolean(restarted),
|
|
54
|
+
...(restarted ? { agent: restarted } : {}),
|
|
52
55
|
policyName: ctrl.policyName,
|
|
53
56
|
spawnRequestId: ctrl.spawnRequestId,
|
|
57
|
+
sessionName: session,
|
|
54
58
|
tmuxSession: session,
|
|
55
59
|
};
|
|
56
60
|
}
|
|
@@ -66,6 +70,44 @@ export function createControlHandler(
|
|
|
66
70
|
} else if (command.type === "agent.shutdown" || command.type === "agent.restart") {
|
|
67
71
|
const result = await handleShutdown(command.params, command.type === "agent.restart");
|
|
68
72
|
await relay.updateCommand(command.id, "succeeded", result);
|
|
73
|
+
} else if (command.type === "workspace.cleanup") {
|
|
74
|
+
const result = cleanupWorkspace({
|
|
75
|
+
id: typeof command.params.workspaceId === "string" ? command.params.workspaceId : undefined,
|
|
76
|
+
repoRoot: typeof command.params.repoRoot === "string" ? command.params.repoRoot : undefined,
|
|
77
|
+
worktreePath: typeof command.params.worktreePath === "string" ? command.params.worktreePath : undefined,
|
|
78
|
+
branch: typeof command.params.branch === "string" ? command.params.branch : undefined,
|
|
79
|
+
deleteBranch: command.params.deleteBranch !== false,
|
|
80
|
+
});
|
|
81
|
+
await relay.updateCommand(command.id, "succeeded", result);
|
|
82
|
+
} else if (command.type === "workspace.reconcile") {
|
|
83
|
+
const result = reconcileWorkspace({
|
|
84
|
+
id: typeof command.params.workspaceId === "string" ? command.params.workspaceId : undefined,
|
|
85
|
+
repoRoot: typeof command.params.repoRoot === "string" ? command.params.repoRoot : undefined,
|
|
86
|
+
worktreePath: typeof command.params.worktreePath === "string" ? command.params.worktreePath : undefined,
|
|
87
|
+
branch: typeof command.params.branch === "string" ? command.params.branch : undefined,
|
|
88
|
+
baseRef: typeof command.params.baseRef === "string" ? command.params.baseRef : undefined,
|
|
89
|
+
baseSha: typeof command.params.baseSha === "string" ? command.params.baseSha : undefined,
|
|
90
|
+
});
|
|
91
|
+
await relay.updateCommand(command.id, "succeeded", result);
|
|
92
|
+
} else if (command.type === "workspace.merge") {
|
|
93
|
+
const result = mergeWorkspace({
|
|
94
|
+
id: typeof command.params.workspaceId === "string" ? command.params.workspaceId : undefined,
|
|
95
|
+
repoRoot: typeof command.params.repoRoot === "string" ? command.params.repoRoot : undefined,
|
|
96
|
+
worktreePath: typeof command.params.worktreePath === "string" ? command.params.worktreePath : undefined,
|
|
97
|
+
branch: typeof command.params.branch === "string" ? command.params.branch : undefined,
|
|
98
|
+
baseRef: typeof command.params.baseRef === "string" ? command.params.baseRef : undefined,
|
|
99
|
+
baseSha: typeof command.params.baseSha === "string" ? command.params.baseSha : undefined,
|
|
100
|
+
strategy: command.params.strategy === "pr" || command.params.strategy === "rebase-ff" || command.params.strategy === "auto" ? command.params.strategy : undefined,
|
|
101
|
+
deleteBranch: command.params.deleteBranch !== false,
|
|
102
|
+
prTitle: typeof command.params.prTitle === "string" ? command.params.prTitle : undefined,
|
|
103
|
+
prBody: typeof command.params.prBody === "string" ? command.params.prBody : undefined,
|
|
104
|
+
});
|
|
105
|
+
await relay.updateCommand(command.id, "succeeded", result as unknown as Record<string, unknown>);
|
|
106
|
+
} else if (command.type === "orchestrator.upgrade") {
|
|
107
|
+
// Install + restart ourselves. Intentionally NOT marked "succeeded": the
|
|
108
|
+
// relay settles it by reconciling the version we report after we restart,
|
|
109
|
+
// since the success ack can't survive our own process teardown.
|
|
110
|
+
await handleSelfUpgrade(command, config, relay);
|
|
69
111
|
} else {
|
|
70
112
|
throw new Error(`unsupported orchestrator command: ${command.type}`);
|
|
71
113
|
}
|
|
@@ -87,3 +129,99 @@ export function createControlHandler(
|
|
|
87
129
|
|
|
88
130
|
return { handleCommand, getManagedAgents, setManagedAgents };
|
|
89
131
|
}
|
|
132
|
+
|
|
133
|
+
export function managedAgentShutdownTarget(agents: ManagedAgentReport[], ctrl: Record<string, any>): ManagedAgentReport | undefined {
|
|
134
|
+
const requestedSession = typeof ctrl.sessionName === "string" && ctrl.sessionName
|
|
135
|
+
? ctrl.sessionName
|
|
136
|
+
: typeof ctrl.tmuxSession === "string" && ctrl.tmuxSession
|
|
137
|
+
? ctrl.tmuxSession
|
|
138
|
+
: undefined;
|
|
139
|
+
if (requestedSession) return agents.find((agent) => agent.sessionName === requestedSession || agent.tmuxSession === requestedSession);
|
|
140
|
+
|
|
141
|
+
const agentId = typeof ctrl.agentId === "string" && ctrl.agentId ? ctrl.agentId : undefined;
|
|
142
|
+
if (agentId) return agents.find((agent) => agent.agentId === agentId);
|
|
143
|
+
|
|
144
|
+
const spawnRequestId = typeof ctrl.spawnRequestId === "string" && ctrl.spawnRequestId ? ctrl.spawnRequestId : undefined;
|
|
145
|
+
if (spawnRequestId) return agents.find((agent) => agent.spawnRequestId === spawnRequestId);
|
|
146
|
+
|
|
147
|
+
const policyName = typeof ctrl.policyName === "string" && ctrl.policyName ? ctrl.policyName : undefined;
|
|
148
|
+
if (policyName) return agents.find((agent) => agent.policyName === policyName);
|
|
149
|
+
return undefined;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function shutdownTimeoutMs(ctrl: Record<string, any>): number | undefined {
|
|
153
|
+
return Number.isSafeInteger(ctrl.timeoutMs) && ctrl.timeoutMs > 0 ? Math.min(ctrl.timeoutMs, 60_000) : undefined;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export function spawnOptionsFromControl(ctrl: Record<string, any>, config: OrchestratorConfig): SpawnOptions {
|
|
157
|
+
return {
|
|
158
|
+
provider: ctrl.provider || "claude",
|
|
159
|
+
cwd: ctrl.cwd || config.baseDir,
|
|
160
|
+
rig: typeof ctrl.rig === "string" ? ctrl.rig : undefined,
|
|
161
|
+
model: modelFromControl(ctrl),
|
|
162
|
+
effort: typeof ctrl.effort === "string" ? ctrl.effort : undefined,
|
|
163
|
+
profile: typeof ctrl.profile === "string" ? ctrl.profile : undefined,
|
|
164
|
+
workspaceMode: workspaceMode(ctrl.workspaceMode),
|
|
165
|
+
agentProfile: isRecord(ctrl.agentProfile) ? ctrl.agentProfile : undefined,
|
|
166
|
+
label: ctrl.label,
|
|
167
|
+
agentId: typeof ctrl.agentId === "string" ? ctrl.agentId : undefined,
|
|
168
|
+
approvalMode: ctrl.approvalMode || "guarded",
|
|
169
|
+
prompt: ctrl.prompt,
|
|
170
|
+
systemPromptAppend: typeof ctrl.systemPromptAppend === "string" ? ctrl.systemPromptAppend : undefined,
|
|
171
|
+
tags: stringArray(ctrl.tags),
|
|
172
|
+
capabilities: stringArray(ctrl.capabilities),
|
|
173
|
+
providerArgs: stringArray(ctrl.providerArgs),
|
|
174
|
+
env: stringRecord(ctrl.env),
|
|
175
|
+
policyName: typeof ctrl.policyName === "string" ? ctrl.policyName : undefined,
|
|
176
|
+
spawnRequestId: typeof ctrl.spawnRequestId === "string" ? ctrl.spawnRequestId : undefined,
|
|
177
|
+
automationId: typeof ctrl.automationId === "string" ? ctrl.automationId : undefined,
|
|
178
|
+
automationRunId: typeof ctrl.automationRunId === "string" ? ctrl.automationRunId : undefined,
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
export function spawnOptionsFromRestartSource(restartSource: Record<string, any>, config: OrchestratorConfig): SpawnOptions {
|
|
183
|
+
return {
|
|
184
|
+
provider: restartSource.provider === "codex" ? "codex" : "claude",
|
|
185
|
+
cwd: typeof restartSource.cwd === "string" ? restartSource.cwd : config.baseDir,
|
|
186
|
+
model: modelFromControl(restartSource),
|
|
187
|
+
effort: typeof restartSource.effort === "string" ? restartSource.effort : undefined,
|
|
188
|
+
profile: typeof restartSource.profile === "string" ? restartSource.profile : undefined,
|
|
189
|
+
workspaceMode: workspaceMode(restartSource.workspaceMode),
|
|
190
|
+
agentProfile: isRecord(restartSource.agentProfile) ? restartSource.agentProfile : undefined,
|
|
191
|
+
label: typeof restartSource.label === "string" ? restartSource.label : undefined,
|
|
192
|
+
agentId: typeof restartSource.agentId === "string" ? restartSource.agentId : undefined,
|
|
193
|
+
approvalMode: typeof restartSource.approvalMode === "string" ? restartSource.approvalMode : "guarded",
|
|
194
|
+
prompt: typeof restartSource.prompt === "string" ? restartSource.prompt : undefined,
|
|
195
|
+
systemPromptAppend: typeof restartSource.systemPromptAppend === "string" ? restartSource.systemPromptAppend : undefined,
|
|
196
|
+
tags: stringArray(restartSource.tags),
|
|
197
|
+
capabilities: stringArray(restartSource.capabilities),
|
|
198
|
+
providerArgs: stringArray(restartSource.providerArgs),
|
|
199
|
+
env: stringRecord(restartSource.env),
|
|
200
|
+
policyName: typeof restartSource.policyName === "string" ? restartSource.policyName : undefined,
|
|
201
|
+
spawnRequestId: typeof restartSource.spawnRequestId === "string" ? restartSource.spawnRequestId : undefined,
|
|
202
|
+
automationId: typeof restartSource.automationId === "string" ? restartSource.automationId : undefined,
|
|
203
|
+
automationRunId: typeof restartSource.automationRunId === "string" ? restartSource.automationRunId : undefined,
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function modelFromControl(ctrl: Record<string, any>): string | undefined {
|
|
208
|
+
return typeof ctrl.providerModel === "string" ? ctrl.providerModel : typeof ctrl.model === "string" ? ctrl.model : undefined;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function stringRecord(value: unknown): Record<string, string> | undefined {
|
|
212
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) return undefined;
|
|
213
|
+
const entries = Object.entries(value).filter((entry): entry is [string, string] => typeof entry[1] === "string");
|
|
214
|
+
return entries.length ? Object.fromEntries(entries) : undefined;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function stringArray(value: unknown): string[] | undefined {
|
|
218
|
+
return Array.isArray(value) ? value.filter((item): item is string => typeof item === "string") : undefined;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
function workspaceMode(value: unknown): SpawnOptions["workspaceMode"] {
|
|
222
|
+
return value === "isolated" || value === "shared" || value === "inherit" ? value : undefined;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function isRecord(value: unknown): value is Record<string, any> {
|
|
226
|
+
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
227
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
import { loadConfig, initConfigFile } from "./config";
|
|
3
3
|
import { createRelayClient } from "./relay";
|
|
4
|
+
import type { ManagedSessionExitDiagnostics } from "./relay";
|
|
4
5
|
import { createControlHandler } from "./control";
|
|
5
|
-
import {
|
|
6
|
+
import { diagnoseSessionExit, isSessionAlive, refreshManagedAgentReport } from "./spawn";
|
|
6
7
|
import { startApiServer } from "./api";
|
|
7
8
|
import { recoverManagedAgents } from "./recovery";
|
|
9
|
+
import { ProviderProbeCache } from "./provider-probe";
|
|
8
10
|
|
|
9
11
|
const args = process.argv.slice(2);
|
|
10
12
|
|
|
@@ -36,7 +38,8 @@ Config file: ~/.agent-relay/orchestrator.json
|
|
|
36
38
|
}
|
|
37
39
|
|
|
38
40
|
const config = loadConfig();
|
|
39
|
-
const
|
|
41
|
+
const probeCache = new ProviderProbeCache(config);
|
|
42
|
+
const relay = createRelayClient(config, probeCache);
|
|
40
43
|
const control = createControlHandler(config, relay);
|
|
41
44
|
|
|
42
45
|
const POLL_INTERVAL_MS = 3_000;
|
|
@@ -53,7 +56,7 @@ async function startup(): Promise<void> {
|
|
|
53
56
|
console.error(`[orchestrator] env keys: ${Object.keys(config.env).length}`);
|
|
54
57
|
|
|
55
58
|
// Start API server before registration so we can advertise the URL
|
|
56
|
-
apiServer = startApiServer(config);
|
|
59
|
+
apiServer = startApiServer(config, probeCache);
|
|
57
60
|
console.error(`[orchestrator] apiUrl: ${apiServer.url}`);
|
|
58
61
|
|
|
59
62
|
// Register with relay. The server and orchestrator are often restarted
|
|
@@ -108,16 +111,59 @@ async function registerUntilConnected(): Promise<void> {
|
|
|
108
111
|
async function healthCheck(): Promise<void> {
|
|
109
112
|
const agents = control.getManagedAgents();
|
|
110
113
|
let changed = false;
|
|
114
|
+
const exitedAgents: ManagedSessionExitDiagnostics[] = [];
|
|
115
|
+
let remainingAgents = agents;
|
|
111
116
|
for (const agent of agents) {
|
|
112
|
-
const
|
|
117
|
+
const refreshed = refreshManagedAgentReport(agent);
|
|
118
|
+
if (JSON.stringify(refreshed) !== JSON.stringify(agent)) {
|
|
119
|
+
remainingAgents = remainingAgents.map((item) => item.agentId === agent.agentId ? refreshed : item);
|
|
120
|
+
control.setManagedAgents(remainingAgents);
|
|
121
|
+
changed = true;
|
|
122
|
+
}
|
|
123
|
+
const sessionName = refreshed.sessionName ?? refreshed.tmuxSession;
|
|
124
|
+
const alive = isSessionAlive(sessionName);
|
|
113
125
|
if (!alive) {
|
|
114
|
-
|
|
115
|
-
|
|
126
|
+
const diagnostics = diagnoseSessionExit({
|
|
127
|
+
agentId: refreshed.agentId,
|
|
128
|
+
policyName: refreshed.policyName,
|
|
129
|
+
spawnRequestId: refreshed.spawnRequestId,
|
|
130
|
+
tmuxSession: sessionName,
|
|
131
|
+
}) ?? {
|
|
132
|
+
agentId: refreshed.agentId,
|
|
133
|
+
provider: refreshed.provider,
|
|
134
|
+
sessionName,
|
|
135
|
+
tmuxSession: refreshed.tmuxSession,
|
|
136
|
+
cwd: refreshed.cwd,
|
|
137
|
+
label: refreshed.label,
|
|
138
|
+
policyName: refreshed.policyName,
|
|
139
|
+
spawnRequestId: refreshed.spawnRequestId,
|
|
140
|
+
supervisor: refreshed.supervisor ?? "unknown",
|
|
141
|
+
systemdUnit: refreshed.systemdUnit,
|
|
142
|
+
terminalSession: refreshed.terminalSession,
|
|
143
|
+
terminalAvailable: refreshed.terminalAvailable,
|
|
144
|
+
pid: refreshed.pid,
|
|
145
|
+
startedAt: refreshed.startedAt,
|
|
146
|
+
detectedAt: Date.now(),
|
|
147
|
+
runtimeMs: Math.max(0, Date.now() - refreshed.startedAt),
|
|
148
|
+
runnerInfoPresent: false,
|
|
149
|
+
unavailable: ["orchestrator session record unavailable", "stdout/stderr log unavailable"],
|
|
150
|
+
lastError: `managed ${refreshed.provider} session ${sessionName} disappeared; orchestrator session record unavailable`,
|
|
151
|
+
};
|
|
152
|
+
exitedAgents.push(diagnostics);
|
|
153
|
+
console.error(`[orchestrator] Session dead: ${sessionName} — ${diagnostics.lastError}`);
|
|
154
|
+
if (diagnostics.logFile) {
|
|
155
|
+
console.error(`[orchestrator] log: ${diagnostics.logFile} (${diagnostics.logBytes ?? "unknown"} bytes${diagnostics.logEmpty ? ", empty" : ""})`);
|
|
156
|
+
}
|
|
157
|
+
if (diagnostics.logTail?.length) {
|
|
158
|
+
console.error(`[orchestrator] last log line: ${diagnostics.logTail[diagnostics.logTail.length - 1]}`);
|
|
159
|
+
}
|
|
160
|
+
remainingAgents = remainingAgents.filter((a) => (a.sessionName ?? a.tmuxSession) !== sessionName);
|
|
161
|
+
control.setManagedAgents(remainingAgents);
|
|
116
162
|
changed = true;
|
|
117
163
|
}
|
|
118
164
|
}
|
|
119
165
|
if (changed) {
|
|
120
|
-
await relay.updateManagedAgents(control.getManagedAgents());
|
|
166
|
+
await relay.updateManagedAgents(control.getManagedAgents(), exitedAgents);
|
|
121
167
|
}
|
|
122
168
|
}
|
|
123
169
|
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import { accessSync, constants, existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { delimiter, join, resolve } from "node:path";
|
|
4
|
+
import { providerCatalogList, type ProviderCatalogEntry } from "agent-relay-sdk/provider-catalog";
|
|
5
|
+
import type { OrchestratorConfig } from "./config";
|
|
6
|
+
import { VERSION } from "./version";
|
|
7
|
+
|
|
8
|
+
interface ProviderProbeDetail {
|
|
9
|
+
command: string;
|
|
10
|
+
path?: string;
|
|
11
|
+
ok: boolean;
|
|
12
|
+
version?: string;
|
|
13
|
+
error?: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
interface ProviderProbeResult {
|
|
17
|
+
name: "claude" | "codex";
|
|
18
|
+
available: boolean;
|
|
19
|
+
checkedAt: number;
|
|
20
|
+
reason?: string;
|
|
21
|
+
version?: string;
|
|
22
|
+
runnerVersion: string;
|
|
23
|
+
features?: Record<string, boolean>;
|
|
24
|
+
cli: ProviderProbeDetail;
|
|
25
|
+
runner: ProviderProbeDetail;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
interface ProviderProbeSnapshot {
|
|
29
|
+
providers: ("claude" | "codex")[];
|
|
30
|
+
providerStatus: ProviderProbeResult[];
|
|
31
|
+
providerCatalog: ProviderCatalogEntry[];
|
|
32
|
+
checkedAt: number;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const DEFAULT_TTL_MS = 30_000;
|
|
36
|
+
const DEFAULT_TIMEOUT_MS = 2_500;
|
|
37
|
+
|
|
38
|
+
export class ProviderProbeCache {
|
|
39
|
+
private snapshot?: ProviderProbeSnapshot;
|
|
40
|
+
|
|
41
|
+
constructor(
|
|
42
|
+
private readonly config: OrchestratorConfig,
|
|
43
|
+
private readonly ttlMs = DEFAULT_TTL_MS,
|
|
44
|
+
private readonly timeoutMs = DEFAULT_TIMEOUT_MS,
|
|
45
|
+
) {}
|
|
46
|
+
|
|
47
|
+
async getSnapshot(refresh = false): Promise<ProviderProbeSnapshot> {
|
|
48
|
+
const now = Date.now();
|
|
49
|
+
if (!refresh && this.snapshot && now - this.snapshot.checkedAt < this.ttlMs) {
|
|
50
|
+
return this.snapshot;
|
|
51
|
+
}
|
|
52
|
+
const providerStatus = await Promise.all(this.config.providers.map((provider) => probeProvider(provider, this.timeoutMs)));
|
|
53
|
+
const providers = providerStatus.filter((status) => status.available).map((status) => status.name);
|
|
54
|
+
this.snapshot = {
|
|
55
|
+
providers,
|
|
56
|
+
providerStatus,
|
|
57
|
+
providerCatalog: providerCatalogList().filter((entry) => this.config.providers.includes(entry.provider)),
|
|
58
|
+
checkedAt: Date.now(),
|
|
59
|
+
};
|
|
60
|
+
return this.snapshot;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export async function probeProvider(provider: "claude" | "codex", timeoutMs = DEFAULT_TIMEOUT_MS): Promise<ProviderProbeResult> {
|
|
65
|
+
const cli = await probeCommand(provider, ["--version"], timeoutMs, provider, (path) => isRelayProviderShim(path, provider));
|
|
66
|
+
const runner = await probeRunner(provider, timeoutMs);
|
|
67
|
+
const available = cli.ok && runner.ok;
|
|
68
|
+
const features = probeProviderFeatures(provider);
|
|
69
|
+
return {
|
|
70
|
+
name: provider,
|
|
71
|
+
available,
|
|
72
|
+
checkedAt: Date.now(),
|
|
73
|
+
reason: available ? undefined : unavailableReason(cli, runner),
|
|
74
|
+
version: cli.version,
|
|
75
|
+
runnerVersion: VERSION,
|
|
76
|
+
...(Object.keys(features).length > 0 ? { features } : {}),
|
|
77
|
+
cli,
|
|
78
|
+
runner,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function probeProviderFeatures(provider: "claude" | "codex"): Record<string, boolean> {
|
|
83
|
+
const features: Record<string, boolean> = {};
|
|
84
|
+
if (provider === "claude") {
|
|
85
|
+
const hasRigBinary = !!resolveExecutable("claude-rig");
|
|
86
|
+
const hasRigDir = existsSync(join(homedir(), ".claude-rig"));
|
|
87
|
+
if (hasRigBinary || hasRigDir) features.rig = true;
|
|
88
|
+
}
|
|
89
|
+
return features;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async function probeRunner(provider: "claude" | "codex", timeoutMs: number): Promise<ProviderProbeDetail> {
|
|
93
|
+
const repoLauncher = resolve(import.meta.dir, "../../runner/src/index.ts");
|
|
94
|
+
if (existsSync(repoLauncher)) {
|
|
95
|
+
const bun = resolveExecutable("bun");
|
|
96
|
+
if (!bun) return { command: "bun", ok: false, error: "bun executable not found" };
|
|
97
|
+
return probeCommand(bun, ["run", repoLauncher, provider, "--help"], timeoutMs, `bun run ${repoLauncher}`);
|
|
98
|
+
}
|
|
99
|
+
return probeCommand(`${provider}-relay`, ["--help"], timeoutMs);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
async function probeCommand(
|
|
103
|
+
command: string,
|
|
104
|
+
args: string[],
|
|
105
|
+
timeoutMs: number,
|
|
106
|
+
displayCommand = command,
|
|
107
|
+
skipPath?: (path: string) => boolean,
|
|
108
|
+
): Promise<ProviderProbeDetail> {
|
|
109
|
+
const path = resolveExecutable(command, skipPath);
|
|
110
|
+
if (!path) return { command: displayCommand, ok: false, error: `${displayCommand} executable not found` };
|
|
111
|
+
|
|
112
|
+
let proc: Bun.Subprocess<"ignore", "pipe", "pipe"> | undefined;
|
|
113
|
+
try {
|
|
114
|
+
proc = Bun.spawn([path, ...args], { stdin: "ignore", stdout: "pipe", stderr: "pipe" });
|
|
115
|
+
const output = await Promise.race([
|
|
116
|
+
processOutput(proc),
|
|
117
|
+
Bun.sleep(timeoutMs).then(() => "timeout" as const),
|
|
118
|
+
]);
|
|
119
|
+
if (output === "timeout") {
|
|
120
|
+
try { proc.kill("SIGKILL"); } catch {}
|
|
121
|
+
return { command: displayCommand, path, ok: false, error: `probe timed out after ${timeoutMs}ms` };
|
|
122
|
+
}
|
|
123
|
+
const { exitCode, stdout, stderr } = output;
|
|
124
|
+
return {
|
|
125
|
+
command: displayCommand,
|
|
126
|
+
path,
|
|
127
|
+
ok: exitCode === 0,
|
|
128
|
+
version: firstLine(stdout),
|
|
129
|
+
error: exitCode === 0 ? undefined : firstLine(stderr) || `exit code ${exitCode}`,
|
|
130
|
+
};
|
|
131
|
+
} catch (error) {
|
|
132
|
+
try { proc?.kill("SIGKILL"); } catch {}
|
|
133
|
+
return { command: displayCommand, path, ok: false, error: error instanceof Error ? error.message : String(error) };
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
async function processOutput(proc: Bun.Subprocess<"ignore", "pipe", "pipe">): Promise<{ exitCode: number; stdout: string; stderr: string }> {
|
|
138
|
+
const [stdout, stderr, exitCode] = await Promise.all([
|
|
139
|
+
new Response(proc.stdout).text(),
|
|
140
|
+
new Response(proc.stderr).text(),
|
|
141
|
+
proc.exited,
|
|
142
|
+
]);
|
|
143
|
+
return { stdout, stderr, exitCode };
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function unavailableReason(cli: ProviderProbeDetail, runner: ProviderProbeDetail): string {
|
|
147
|
+
if (!cli.ok) return cli.error || `${cli.command} unavailable`;
|
|
148
|
+
if (!runner.ok) return runner.error || `${runner.command} unavailable`;
|
|
149
|
+
return "provider unavailable";
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function firstLine(value: string): string | undefined {
|
|
153
|
+
return value.split(/\r?\n/).map((line) => line.trim()).find(Boolean);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export function resolveExecutable(command: string, skipPath?: (path: string) => boolean): string | undefined {
|
|
157
|
+
if (command.includes("/")) {
|
|
158
|
+
const path = resolve(command);
|
|
159
|
+
return isExecutable(path) && !skipPath?.(path) ? path : undefined;
|
|
160
|
+
}
|
|
161
|
+
for (const dir of (process.env.PATH || "").split(delimiter).filter(Boolean)) {
|
|
162
|
+
const path = resolve(dir, command);
|
|
163
|
+
if (isExecutable(path) && !skipPath?.(path)) return path;
|
|
164
|
+
}
|
|
165
|
+
return undefined;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function isExecutable(path: string): boolean {
|
|
169
|
+
try {
|
|
170
|
+
accessSync(path, constants.X_OK);
|
|
171
|
+
return true;
|
|
172
|
+
} catch {
|
|
173
|
+
return false;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function isRelayProviderShim(path: string, provider: "claude" | "codex"): boolean {
|
|
178
|
+
try {
|
|
179
|
+
const content = readFileSync(path, "utf8").slice(0, 1024);
|
|
180
|
+
return content.includes(`${provider}-relay ${provider}`);
|
|
181
|
+
} catch {
|
|
182
|
+
return false;
|
|
183
|
+
}
|
|
184
|
+
}
|
package/src/recovery.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { OrchestratorConfig } from "./config";
|
|
2
2
|
import type { ManagedAgentReport, RelayClient } from "./relay";
|
|
3
|
-
import { recoverExistingSessions } from "./
|
|
3
|
+
import { recoverExistingSessions } from "./spawn";
|
|
4
4
|
|
|
5
5
|
interface ManagedAgentControl {
|
|
6
6
|
setManagedAgents(agents: ManagedAgentReport[]): void;
|