agent-relay-orchestrator 0.10.19 → 0.10.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/spawn.ts ADDED
@@ -0,0 +1,1267 @@
1
+ import { chmodSync, closeSync, existsSync, mkdirSync, openSync, readFileSync, renameSync, rmSync, statSync, writeFileSync } from "node:fs";
2
+ import { homedir } from "node:os";
3
+ import { isAbsolute, join, relative, resolve } from "node:path";
4
+ import { artifactProxyBaseUrl } from "./artifact-proxy";
5
+ import type { OrchestratorConfig } from "./config";
6
+ import type { ManagedAgentReport, ManagedSessionExitDiagnostics } from "./relay";
7
+ import { resolveSpawnWorkspace } from "./workspace-probe";
8
+ import type { WorkspaceMetadata, WorkspaceMode } from "agent-relay-sdk";
9
+
10
+ export interface SpawnOptions {
11
+ provider: "claude" | "codex";
12
+ cwd: string;
13
+ rig?: string;
14
+ model?: string;
15
+ effort?: string;
16
+ profile?: string;
17
+ workspaceMode?: WorkspaceMode;
18
+ workspace?: WorkspaceMetadata;
19
+ agentProfile?: Record<string, unknown>;
20
+ label?: string;
21
+ agentId?: string;
22
+ approvalMode: string;
23
+ prompt?: string;
24
+ systemPromptAppend?: string;
25
+ env?: Record<string, string>;
26
+ tags?: string[];
27
+ capabilities?: string[];
28
+ providerArgs?: string[];
29
+ policyName?: string;
30
+ spawnRequestId?: string;
31
+ automationId?: string;
32
+ automationRunId?: string;
33
+ }
34
+
35
+ interface SessionInfo {
36
+ name: string;
37
+ sessionName: string;
38
+ pid: number;
39
+ alive: boolean;
40
+ supervisor: SessionSupervisor["type"];
41
+ systemdUnit?: string;
42
+ terminalSession?: string;
43
+ terminalAvailable: boolean;
44
+ logFile: string;
45
+ }
46
+
47
+ export interface TerminalGuestSession {
48
+ session: string;
49
+ mode: "guest";
50
+ provider: string;
51
+ running: boolean;
52
+ interactive: boolean;
53
+ expiresAt: number;
54
+ }
55
+
56
+ export interface TerminalSnapshot {
57
+ session: string;
58
+ content: string;
59
+ running: boolean;
60
+ // `running` only means the tmux pane still exists — tmux keeps a pane after the
61
+ // process inside it exits. `agentAlive` is the real liveness of the agent process
62
+ // (pid / systemd unit) so the dashboard can flag an orphaned, stale terminal.
63
+ agentAlive: boolean;
64
+ cols?: number;
65
+ rows?: number;
66
+ cursorX?: number;
67
+ cursorY?: number;
68
+ capturedAt: number;
69
+ }
70
+
71
+ type TerminalInputToken =
72
+ | { type: "literal"; value: string }
73
+ | { type: "key"; value: string };
74
+
75
+ interface TerminalInputResult {
76
+ session: string;
77
+ running: boolean;
78
+ sent: number;
79
+ capturedAt: number;
80
+ }
81
+
82
+ export interface SessionRecord {
83
+ name: string;
84
+ pid: number;
85
+ supervisor?: SessionSupervisor;
86
+ provider: string;
87
+ model?: string;
88
+ effort?: string;
89
+ profile?: string;
90
+ workspaceMode?: WorkspaceMode;
91
+ workspace?: WorkspaceMetadata;
92
+ label?: string;
93
+ cwd: string;
94
+ logFile: string;
95
+ runnerInfoFile?: string;
96
+ agentId: string;
97
+ approvalMode: string;
98
+ policyName?: string;
99
+ spawnRequestId?: string;
100
+ automationId?: string;
101
+ automationRunId?: string;
102
+ startedAt: number;
103
+ }
104
+
105
+ interface SessionSupervisor {
106
+ type: "process" | "systemd";
107
+ unit?: string;
108
+ launchScript?: string;
109
+ }
110
+
111
+ interface SpawnedRunner {
112
+ pid: number;
113
+ supervisor: SessionSupervisor;
114
+ }
115
+
116
+ interface RunnerInfo {
117
+ agentId: string;
118
+ runnerId: string;
119
+ provider: string;
120
+ controlUrl: string;
121
+ tmuxSession?: string;
122
+ tmuxSocket?: string;
123
+ pid?: number;
124
+ startedAt?: number;
125
+ }
126
+
127
+ interface TerminalAttachSpec {
128
+ mode: "guest";
129
+ provider: string;
130
+ cwd: string;
131
+ command: string[];
132
+ env?: Record<string, string>;
133
+ title?: string;
134
+ ttlMs?: number;
135
+ }
136
+
137
+ const LOG_DIR = join(homedir(), ".agent-relay", "logs");
138
+ const STATE_FILE = join(homedir(), ".agent-relay", "orchestrator-sessions.json");
139
+ const SESSION_DIR = join(homedir(), ".agent-relay", "sessions");
140
+ const RUNNER_INFO_DIR = join(homedir(), ".agent-relay", "runners");
141
+ const GUEST_TTL_MS = 60 * 60 * 1000;
142
+ const terminalGuests = new Map<string, { expiresAt: number }>();
143
+
144
+ export function isWithinBaseDir(path: string, baseDir: string): boolean {
145
+ const base = resolve(baseDir);
146
+ const target = resolve(path);
147
+ const rel = relative(base, target);
148
+ return rel === "" || (!!rel && !rel.startsWith("..") && !isAbsolute(rel));
149
+ }
150
+
151
+ export function sessionName(config: OrchestratorConfig, provider: string, label: string, uniqueId?: string): string {
152
+ const clean = label.replace(/[^a-zA-Z0-9._-]+/g, "-").toLowerCase();
153
+ const suffix = uniqueId ? `-${uniqueId.replace(/[^a-zA-Z0-9._-]+/g, "-").toLowerCase().slice(-8)}` : "";
154
+ return `${config.tmuxPrefix}-${provider}-${clean}${suffix}`;
155
+ }
156
+
157
+ export function defaultSpawnLabel(now = Date.now()): string {
158
+ return `session-${now}`;
159
+ }
160
+
161
+ export function buildRunnerCommand(opts: SpawnOptions, config: OrchestratorConfig): string[] {
162
+ const repoLauncher = resolve(import.meta.dir, "../../runner/src/index.ts");
163
+ const installedLauncher = resolve(import.meta.dir, "../../agent-relay-runner/src/index.ts");
164
+ const bun = process.env.AGENT_RELAY_BUN_BIN
165
+ || (process.platform === "darwin" && existsSync("/opt/homebrew/bin/bun") ? "/opt/homebrew/bin/bun" : "bun");
166
+ const launcher = existsSync(repoLauncher)
167
+ ? [bun, "run", repoLauncher, opts.provider]
168
+ : existsSync(installedLauncher)
169
+ ? [bun, "run", installedLauncher, opts.provider]
170
+ : [`${opts.provider}-relay`, opts.provider];
171
+ const args = [
172
+ ...launcher,
173
+ "--headless",
174
+ "--cwd", opts.cwd,
175
+ "--relay-url", config.relayUrl,
176
+ "--approval", opts.approvalMode || "guarded",
177
+ ];
178
+ if (opts.rig) args.push("--rig", opts.rig);
179
+ if (opts.model) args.push("--model", opts.model);
180
+ if (opts.effort) args.push("--effort", opts.effort);
181
+ if (opts.profile) args.push("--profile", opts.profile);
182
+ if (opts.label) args.push("--label", opts.label);
183
+ if (opts.agentId) args.push("--agent-id", opts.agentId);
184
+ if (opts.prompt) args.push("--prompt", opts.prompt);
185
+ if (opts.systemPromptAppend) args.push("--system-prompt-append", opts.systemPromptAppend);
186
+ if (opts.tags?.length) args.push("--tags", opts.tags.join(","));
187
+ if (opts.capabilities?.length) args.push("--caps", opts.capabilities.join(","));
188
+ if (opts.providerArgs?.length) args.push("--", ...opts.providerArgs);
189
+ return args;
190
+ }
191
+
192
+ export function buildEnv(opts: SpawnOptions & { label: string; agentId: string }, config: OrchestratorConfig, logFile?: string, tmuxSession?: string): Record<string, string> {
193
+ const currentPath = process.env.PATH || "";
194
+ const extraPaths = [
195
+ join(homedir(), ".local", "bin"),
196
+ join(homedir(), ".bun", "bin"),
197
+ join(homedir(), ".npm-global", "bin"),
198
+ ];
199
+ const fullPath = [...extraPaths, ...currentPath.split(":").filter(Boolean)]
200
+ .filter((v, i, a) => a.indexOf(v) === i)
201
+ .join(":");
202
+
203
+ return {
204
+ ...process.env as Record<string, string>,
205
+ ...(config.token ? { AGENT_RELAY_TOKEN: config.token } : {}),
206
+ ...config.env,
207
+ ...agentProfileEnv(opts.agentProfile),
208
+ ...(opts.env || {}),
209
+ PATH: fullPath,
210
+ AGENT_RELAY_URL: config.relayUrl,
211
+ AGENT_RELAY_ORCHESTRATOR_URL: `http://127.0.0.1:${config.apiPort}`,
212
+ AGENT_RELAY_ARTIFACT_URL: artifactProxyBaseUrl(config),
213
+ AGENT_RELAY_APPROVAL: opts.approvalMode || "guarded",
214
+ ...(opts.profile ? { AGENT_RELAY_AGENT_PROFILE: opts.profile } : {}),
215
+ ...(opts.agentProfile ? { AGENT_RELAY_AGENT_PROFILE_JSON: JSON.stringify(opts.agentProfile) } : {}),
216
+ AGENT_RELAY_TAGS: [...new Set(["headless", "dashboard-spawned", config.hostname, ...(opts.tags ?? [])])].join(","),
217
+ AGENT_RELAY_CAPS: [...new Set(opts.capabilities ?? [])].join(","),
218
+ AGENT_RELAY_CAPABILITIES: [...new Set(opts.capabilities ?? [])].join(","),
219
+ AGENT_RELAY_HEADLESS: "1",
220
+ ...(logFile ? { AGENT_RELAY_LOG_FILE: logFile } : {}),
221
+ ...(tmuxSession ? { AGENT_RELAY_TMUX_SESSION: tmuxSession } : {}),
222
+ ...(opts.label ? { AGENT_RELAY_LABEL: opts.label } : {}),
223
+ ...(opts.policyName ? { AGENT_RELAY_POLICY: opts.policyName } : {}),
224
+ ...(opts.spawnRequestId ? { AGENT_RELAY_SPAWN_REQUEST_ID: opts.spawnRequestId } : {}),
225
+ AGENT_RELAY_WORKSPACE_MODE: opts.workspaceMode ?? "inherit",
226
+ ...(opts.workspace ? { AGENT_RELAY_WORKSPACE_JSON: JSON.stringify(opts.workspace) } : {}),
227
+ ...(opts.automationId ? { AGENT_RELAY_AUTOMATION_ID: opts.automationId } : {}),
228
+ ...(opts.automationRunId ? { AGENT_RELAY_AUTOMATION_RUN_ID: opts.automationRunId } : {}),
229
+ };
230
+ }
231
+
232
+ function agentProfileEnv(profile: Record<string, unknown> | undefined): Record<string, string> {
233
+ const raw = profile?.env;
234
+ if (!raw || typeof raw !== "object" || Array.isArray(raw)) return {};
235
+ return Object.fromEntries(Object.entries(raw).filter((entry): entry is [string, string] => typeof entry[1] === "string"));
236
+ }
237
+
238
+ function logFilePath(name: string): string {
239
+ return join(LOG_DIR, `${name}.log`);
240
+ }
241
+
242
+ function runnerInfoPath(name: string): string {
243
+ const safe = name.replace(/[^a-zA-Z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "runner";
244
+ return join(RUNNER_INFO_DIR, `${safe}.json`);
245
+ }
246
+
247
+ function ensureLogDir(): void {
248
+ mkdirSync(LOG_DIR, { recursive: true });
249
+ }
250
+
251
+ function ensureSessionDir(): void {
252
+ mkdirSync(SESSION_DIR, { recursive: true, mode: 0o700 });
253
+ }
254
+
255
+ function ensureRunnerInfoDir(): void {
256
+ mkdirSync(RUNNER_INFO_DIR, { recursive: true, mode: 0o700 });
257
+ }
258
+
259
+ function saveState(records: SessionRecord[]): void {
260
+ mkdirSync(join(homedir(), ".agent-relay"), { recursive: true });
261
+ // Atomic write: a crash mid-write would otherwise leave truncated JSON and
262
+ // loadState would silently return [], losing every tracked session.
263
+ const tmp = `${STATE_FILE}.tmp`;
264
+ writeFileSync(tmp, JSON.stringify(records, null, 2) + "\n");
265
+ renameSync(tmp, STATE_FILE);
266
+ }
267
+
268
+ function loadState(): SessionRecord[] {
269
+ try {
270
+ return JSON.parse(readFileSync(STATE_FILE, "utf8"));
271
+ } catch {
272
+ return [];
273
+ }
274
+ }
275
+
276
+ function addSessionRecord(record: SessionRecord): void {
277
+ const records = loadState().filter((r) => r.name !== record.name);
278
+ records.push(record);
279
+ saveState(records);
280
+ }
281
+
282
+ function removeSessionRecord(name: string): void {
283
+ saveState(loadState().filter((r) => r.name !== name));
284
+ }
285
+
286
+ // A zombie process still has a PID-table entry, so kill(pid, 0) succeeds even
287
+ // though it is dead and unreapable. We never wait() our spawned children, so
288
+ // they linger as zombies; treat them as not-alive (Linux-only via /proc).
289
+ export function parseProcStateIsZombie(statusText: string): boolean {
290
+ const match = statusText.match(/^State:\s+(\w)/m);
291
+ return match?.[1] === "Z";
292
+ }
293
+
294
+ function isZombie(pid: number): boolean {
295
+ try {
296
+ return parseProcStateIsZombie(readFileSync(`/proc/${pid}/status`, "utf8"));
297
+ } catch {
298
+ return false;
299
+ }
300
+ }
301
+
302
+ export function isPidAlive(pid: number): boolean {
303
+ try {
304
+ process.kill(pid, 0);
305
+ } catch {
306
+ return false;
307
+ }
308
+ return !isZombie(pid);
309
+ }
310
+
311
+ function sessionSupervisor(record?: Pick<SessionRecord, "supervisor">): SessionSupervisor {
312
+ return record?.supervisor ?? { type: "process" };
313
+ }
314
+
315
+ function isSessionRecordAlive(record: SessionRecord): boolean {
316
+ const supervisor = sessionSupervisor(record);
317
+ if (supervisor.type === "systemd" && supervisor.unit) {
318
+ const pid = systemdMainPid(supervisor.unit);
319
+ return pid > 0 && isPidAlive(pid);
320
+ }
321
+ return isPidAlive(record.pid);
322
+ }
323
+
324
+ function currentSessionPid(record: SessionRecord): number {
325
+ const supervisor = sessionSupervisor(record);
326
+ if (supervisor.type === "systemd" && supervisor.unit) {
327
+ const pid = systemdMainPid(supervisor.unit);
328
+ if (pid > 0) return pid;
329
+ }
330
+ return record.pid;
331
+ }
332
+
333
+ function sessionReportFields(record: Pick<SessionRecord, "name" | "supervisor" | "runnerInfoFile" | "agentId" | "provider">): Pick<ManagedAgentReport, "sessionName" | "tmuxSession" | "supervisor" | "systemdUnit" | "terminalSession" | "terminalAvailable"> {
334
+ const supervisor = sessionSupervisor(record);
335
+ const terminalAvailable = tmuxHasSession(record.name, readRunnerInfo(record)?.tmuxSocket);
336
+ return {
337
+ sessionName: record.name,
338
+ tmuxSession: record.name,
339
+ supervisor: supervisor.type,
340
+ ...(supervisor.type === "systemd" && supervisor.unit ? { systemdUnit: supervisor.unit } : {}),
341
+ terminalSession: record.name,
342
+ terminalAvailable,
343
+ };
344
+ }
345
+
346
+ export async function spawnAgent(
347
+ opts: SpawnOptions,
348
+ config: OrchestratorConfig,
349
+ ): Promise<ManagedAgentReport> {
350
+ const label = opts.label || defaultSpawnLabel();
351
+ const agentId = opts.agentId || managedAgentId(config, opts.provider, label);
352
+ const name = sessionName(config, opts.provider, label, opts.spawnRequestId ?? agentId);
353
+
354
+ if (!existsSync(opts.cwd)) {
355
+ throw new Error(`cwd does not exist: ${opts.cwd}`);
356
+ }
357
+ if (!isWithinBaseDir(opts.cwd, config.baseDir)) {
358
+ throw new Error(`cwd must be within base directory: ${config.baseDir}`);
359
+ }
360
+
361
+ const resolvedWorkspace = await resolveSpawnWorkspace({
362
+ ...opts,
363
+ label,
364
+ workspaceRoot: join(resolve(config.baseDir), ".agent-relay", "workspaces"),
365
+ });
366
+ const spawnOpts = { ...opts, label, agentId, cwd: resolvedWorkspace.cwd, workspace: resolvedWorkspace.workspace };
367
+
368
+ const command = buildRunnerCommand(spawnOpts, config);
369
+
370
+ ensureLogDir();
371
+ ensureRunnerInfoDir();
372
+ const logFile = logFilePath(name);
373
+ const runnerInfoFile = runnerInfoPath(name);
374
+ rmSync(runnerInfoFile, { force: true });
375
+ const env = buildEnv({ ...spawnOpts, env: { ...(spawnOpts.env ?? {}), AGENT_RELAY_RUNNER_INFO_FILE: runnerInfoFile } }, config, logFile, name);
376
+ const logFd = openSync(logFile, "w");
377
+
378
+ console.error(`[orchestrator] Spawning ${opts.provider} agent: ${name}`);
379
+ console.error(`[orchestrator] cwd: ${opts.cwd}`);
380
+ console.error(`[orchestrator] command: ${command.join(" ")}`);
381
+ console.error(`[orchestrator] log: ${logFile}`);
382
+
383
+ closeSync(logFd);
384
+
385
+ const runner = spawnRunner(name, command, spawnOpts.cwd, env, logFile);
386
+
387
+ addSessionRecord({
388
+ name,
389
+ pid: runner.pid,
390
+ supervisor: runner.supervisor,
391
+ provider: spawnOpts.provider,
392
+ model: spawnOpts.model,
393
+ effort: spawnOpts.effort,
394
+ profile: spawnOpts.profile,
395
+ workspaceMode: spawnOpts.workspaceMode,
396
+ workspace: spawnOpts.workspace,
397
+ label,
398
+ cwd: spawnOpts.cwd,
399
+ logFile,
400
+ runnerInfoFile,
401
+ agentId,
402
+ approvalMode: spawnOpts.approvalMode,
403
+ policyName: spawnOpts.policyName,
404
+ spawnRequestId: spawnOpts.spawnRequestId,
405
+ automationId: spawnOpts.automationId,
406
+ automationRunId: spawnOpts.automationRunId,
407
+ startedAt: Date.now(),
408
+ });
409
+
410
+ return {
411
+ agentId,
412
+ provider: spawnOpts.provider,
413
+ model: spawnOpts.model,
414
+ effort: spawnOpts.effort,
415
+ profile: spawnOpts.profile,
416
+ workspaceMode: spawnOpts.workspaceMode,
417
+ workspace: spawnOpts.workspace,
418
+ ...sessionReportFields({ name, supervisor: runner.supervisor, runnerInfoFile, agentId, provider: spawnOpts.provider }),
419
+ cwd: spawnOpts.cwd,
420
+ label,
421
+ approvalMode: spawnOpts.approvalMode || "guarded",
422
+ policyName: spawnOpts.policyName,
423
+ spawnRequestId: spawnOpts.spawnRequestId,
424
+ automationRunId: spawnOpts.automationRunId,
425
+ pid: runner.pid,
426
+ startedAt: Date.now(),
427
+ };
428
+ }
429
+
430
+ export async function createTerminalGuest(
431
+ input: { agentId?: string; policyName?: string; spawnRequestId?: string; tmuxSession?: string },
432
+ config: OrchestratorConfig,
433
+ ): Promise<TerminalGuestSession> {
434
+ cleanupExpiredTerminalGuests();
435
+ const record = findSessionRecord(input);
436
+ if (!record || !isSessionRecordAlive(record)) throw new Error("managed runner session not found");
437
+ const runner = readRunnerInfo(record);
438
+ if (!runner?.controlUrl) throw new Error("runner control URL is unavailable; restart the agent to enable terminal attach");
439
+ const spec = await fetchTerminalAttachSpec(runner.controlUrl);
440
+ validateAttachSpec(spec, config);
441
+ const session = guestSessionName(config, spec.provider, record.agentId);
442
+ killTmuxSession(session);
443
+ const expiresAt = Date.now() + Math.min(Math.max(spec.ttlMs ?? GUEST_TTL_MS, 60_000), 4 * GUEST_TTL_MS);
444
+ const shellCmd = spec.command.map(shellEscape).join(" ");
445
+ const tmuxArgs = ["new-session", "-d", "-s", session, "-x", "200", "-y", "50"];
446
+ for (const [key, value] of Object.entries(spec.env ?? {}).sort(([a], [b]) => a.localeCompare(b))) {
447
+ if (/^[A-Za-z_][A-Za-z0-9_]*$/.test(key)) tmuxArgs.push("-e", `${key}=${value}`);
448
+ }
449
+ tmuxArgs.push("-c", spec.cwd, shellCmd);
450
+ const result = Bun.spawnSync(["tmux", ...tmuxArgs], {
451
+ stdin: "ignore",
452
+ stdout: "pipe",
453
+ stderr: "pipe",
454
+ });
455
+ if (result.exitCode !== 0) {
456
+ const stderr = result.stderr.toString().trim();
457
+ throw new Error(stderr || `tmux guest creation failed with exit code ${result.exitCode}`);
458
+ }
459
+ terminalGuests.set(session, { expiresAt });
460
+ return { session, mode: "guest", provider: spec.provider, running: true, interactive: true, expiresAt };
461
+ }
462
+
463
+ export function stopTerminalGuest(session: string, config: OrchestratorConfig): { session: string; stopped: boolean } {
464
+ if (!isGuestSessionName(session, config)) throw new Error("terminal session is not a guest session");
465
+ const running = tmuxHasSession(session);
466
+ if (running) killTmuxSession(session);
467
+ terminalGuests.delete(session);
468
+ return { session, stopped: running };
469
+ }
470
+
471
+ export function selectSessionRecord(records: SessionRecord[], input: { agentId?: string; policyName?: string; spawnRequestId?: string; tmuxSession?: string }): SessionRecord | undefined {
472
+ if (input.tmuxSession) return records.find((record) => record.name === input.tmuxSession);
473
+
474
+ if (input.spawnRequestId) {
475
+ return records.find((record) =>
476
+ record.spawnRequestId === input.spawnRequestId &&
477
+ (!input.policyName || record.policyName === input.policyName)
478
+ );
479
+ }
480
+
481
+ if (input.agentId) {
482
+ return records.find((record) =>
483
+ record.agentId === input.agentId &&
484
+ (!input.policyName || record.policyName === input.policyName)
485
+ );
486
+ }
487
+
488
+ // Policy-only lookup: a respawn leaves multiple records for one policy. The
489
+ // live session is the most recent one, so pick the highest startedAt rather
490
+ // than the first match (which is the stale, already-replaced session).
491
+ if (input.policyName) {
492
+ const policyName = input.policyName;
493
+ return records
494
+ .filter((record) => record.policyName === policyName)
495
+ .reduce<SessionRecord | undefined>((latest, record) => (
496
+ !latest || record.startedAt > latest.startedAt ? record : latest
497
+ ), undefined);
498
+ }
499
+ return undefined;
500
+ }
501
+
502
+ function findSessionRecord(input: { agentId?: string; policyName?: string; spawnRequestId?: string; tmuxSession?: string }): SessionRecord | undefined {
503
+ return selectSessionRecord(loadState(), input);
504
+ }
505
+
506
+ function readRunnerInfo(record: Pick<SessionRecord, "runnerInfoFile" | "agentId" | "provider">): RunnerInfo | null {
507
+ if (!record.runnerInfoFile) return null;
508
+ try {
509
+ const parsed = JSON.parse(readFileSync(record.runnerInfoFile, "utf8"));
510
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return null;
511
+ const info = parsed as Record<string, unknown>;
512
+ if (typeof info.controlUrl !== "string" || !info.controlUrl.startsWith("http://127.0.0.1:")) return null;
513
+ return {
514
+ agentId: typeof info.agentId === "string" ? info.agentId : record.agentId,
515
+ runnerId: typeof info.runnerId === "string" ? info.runnerId : "",
516
+ provider: typeof info.provider === "string" ? info.provider : record.provider,
517
+ controlUrl: info.controlUrl,
518
+ tmuxSession: typeof info.tmuxSession === "string" ? info.tmuxSession : undefined,
519
+ tmuxSocket: typeof info.tmuxSocket === "string" ? info.tmuxSocket : undefined,
520
+ pid: typeof info.pid === "number" ? info.pid : undefined,
521
+ startedAt: typeof info.startedAt === "number" ? info.startedAt : undefined,
522
+ };
523
+ } catch {
524
+ return null;
525
+ }
526
+ }
527
+
528
+ async function fetchTerminalAttachSpec(controlUrl: string): Promise<TerminalAttachSpec> {
529
+ const res = await fetch(`${controlUrl}/terminal/attach-spec`, { signal: AbortSignal.timeout(5_000) });
530
+ const body = await res.json().catch(() => null) as unknown;
531
+ if (!res.ok) {
532
+ const message = body && typeof body === "object" && !Array.isArray(body) && typeof (body as { error?: unknown }).error === "string"
533
+ ? (body as { error: string }).error
534
+ : `runner attach-spec failed with ${res.status}`;
535
+ throw new Error(message);
536
+ }
537
+ if (!body || typeof body !== "object" || Array.isArray(body)) throw new Error("runner attach-spec response must be an object");
538
+ return body as TerminalAttachSpec;
539
+ }
540
+
541
+ function validateAttachSpec(spec: TerminalAttachSpec, config: OrchestratorConfig): void {
542
+ if (spec.mode !== "guest") throw new Error("runner attach-spec mode must be guest");
543
+ if (typeof spec.provider !== "string" || !spec.provider.trim()) throw new Error("runner attach-spec provider required");
544
+ if (typeof spec.cwd !== "string" || !isWithinBaseDir(spec.cwd, config.baseDir)) throw new Error("runner attach-spec cwd must be within base directory");
545
+ if (!Array.isArray(spec.command) || spec.command.length === 0 || spec.command.some((item) => typeof item !== "string" || !item)) {
546
+ throw new Error("runner attach-spec command must be a non-empty string array");
547
+ }
548
+ if (spec.env !== undefined && (!spec.env || typeof spec.env !== "object" || Array.isArray(spec.env) || Object.values(spec.env).some((value) => typeof value !== "string"))) {
549
+ throw new Error("runner attach-spec env must be a string record");
550
+ }
551
+ }
552
+
553
+ function guestSessionName(config: OrchestratorConfig, provider: string, agentId: string): string {
554
+ const cleanProvider = provider.replace(/[^a-zA-Z0-9._-]+/g, "-").toLowerCase() || "provider";
555
+ const cleanAgent = agentId.replace(/[^a-zA-Z0-9._-]+/g, "-").toLowerCase().slice(0, 48) || "agent";
556
+ return `${config.tmuxPrefix}-guest-${cleanProvider}-${cleanAgent}-${crypto.randomUUID().slice(0, 8)}`;
557
+ }
558
+
559
+ function isGuestSessionName(session: string, config: OrchestratorConfig): boolean {
560
+ return session.startsWith(`${config.tmuxPrefix}-guest-`);
561
+ }
562
+
563
+ function cleanupExpiredTerminalGuests(): void {
564
+ const now = Date.now();
565
+ for (const [session, guest] of terminalGuests.entries()) {
566
+ if (guest.expiresAt > now) continue;
567
+ killTmuxSession(session);
568
+ terminalGuests.delete(session);
569
+ }
570
+ }
571
+
572
+ function killTmuxSession(session: string): void {
573
+ Bun.spawnSync(["tmux", "kill-session", "-t", session], {
574
+ stdin: "ignore",
575
+ stdout: "ignore",
576
+ stderr: "ignore",
577
+ });
578
+ }
579
+
580
+ function spawnRunner(name: string, command: string[], cwd: string, env: Record<string, string>, logFile: string): SpawnedRunner {
581
+ if (shouldUseSystemdSupervisor()) {
582
+ try {
583
+ return spawnSystemdRunner(name, command, cwd, env, logFile);
584
+ } catch (error) {
585
+ console.error(`[orchestrator] systemd runner supervisor unavailable for ${name}: ${error instanceof Error ? error.message : String(error)}`);
586
+ console.error("[orchestrator] Falling back to process child; this agent will not survive orchestrator service restart.");
587
+ }
588
+ }
589
+
590
+ const launchScript = launchScriptPath(name);
591
+ ensureSessionDir();
592
+ writeFileSync(launchScript, buildLaunchScript(command, cwd, env), { mode: 0o700 });
593
+ chmodSync(launchScript, 0o700);
594
+
595
+ const logFd = openSync(logFile, "a");
596
+ try {
597
+ const proc = Bun.spawn([launchScript], {
598
+ cwd,
599
+ env,
600
+ stdin: "ignore",
601
+ stdout: logFd,
602
+ stderr: logFd,
603
+ });
604
+ return { pid: proc.pid, supervisor: { type: "process", launchScript } };
605
+ } finally {
606
+ closeSync(logFd);
607
+ }
608
+ }
609
+
610
+ function shouldUseSystemdSupervisor(): boolean {
611
+ if (process.platform !== "linux") return false;
612
+ if (process.env.AGENT_RELAY_DISABLE_SYSTEMD_SUPERVISOR === "1") return false;
613
+ if (process.env.AGENT_RELAY_FORCE_SYSTEMD_SUPERVISOR === "1") return true;
614
+ const result = Bun.spawnSync(["systemctl", "--user", "show-environment"], {
615
+ stdin: "ignore",
616
+ stdout: "ignore",
617
+ stderr: "ignore",
618
+ });
619
+ return result.exitCode === 0;
620
+ }
621
+
622
+ function spawnSystemdRunner(name: string, command: string[], cwd: string, env: Record<string, string>, logFile: string): SpawnedRunner {
623
+ const unit = systemdUnitName(name);
624
+ const launchScript = launchScriptPath(name);
625
+ ensureSessionDir();
626
+ writeFileSync(launchScript, buildLaunchScript(command, cwd, env), { mode: 0o700 });
627
+ chmodSync(launchScript, 0o700);
628
+
629
+ Bun.spawnSync(["systemctl", "--user", "stop", `${unit}.service`], {
630
+ stdin: "ignore",
631
+ stdout: "ignore",
632
+ stderr: "ignore",
633
+ });
634
+
635
+ const result = Bun.spawnSync([
636
+ "systemd-run",
637
+ "--user",
638
+ `--unit=${unit}`,
639
+ "--collect",
640
+ "--property=KillMode=control-group",
641
+ `--property=StandardOutput=append:${logFile}`,
642
+ `--property=StandardError=append:${logFile}`,
643
+ launchScript,
644
+ ], {
645
+ stdin: "ignore",
646
+ stdout: "pipe",
647
+ stderr: "pipe",
648
+ });
649
+ if (result.exitCode !== 0) {
650
+ const stderr = result.stderr.toString().trim();
651
+ throw new Error(stderr || `systemd-run failed with exit code ${result.exitCode}`);
652
+ }
653
+
654
+ const pid = waitForSystemdMainPid(unit, 2_000);
655
+ if (!pid) throw new Error(`systemd unit ${unit}.service started without a MainPID`);
656
+ return { pid, supervisor: { type: "systemd", unit, launchScript } };
657
+ }
658
+
659
+ export function systemdUnitName(session: string): string {
660
+ const safe = session.replace(/[^a-zA-Z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "agent";
661
+ return `agent-relay-managed-${safe}`.slice(0, 180);
662
+ }
663
+
664
+ function launchScriptPath(session: string): string {
665
+ const safe = session.replace(/[^a-zA-Z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "agent";
666
+ return join(SESSION_DIR, `${safe}.sh`);
667
+ }
668
+
669
+ export function buildLaunchScript(command: string[], cwd: string, env: Record<string, string>): string {
670
+ const exports = Object.entries(env)
671
+ .filter(([key, value]) => /^[A-Za-z_][A-Za-z0-9_]*$/.test(key) && value !== undefined)
672
+ .sort(([a], [b]) => a.localeCompare(b))
673
+ .map(([key, value]) => `export ${key}=${shellEscape(String(value))}`);
674
+ return [
675
+ "#!/usr/bin/env bash",
676
+ "set -euo pipefail",
677
+ ...exports,
678
+ `cd ${shellEscape(cwd)}`,
679
+ `exec ${command.map(shellEscape).join(" ")}`,
680
+ "",
681
+ ].join("\n");
682
+ }
683
+
684
+ function waitForSystemdMainPid(unit: string, timeoutMs: number): number {
685
+ const deadline = Date.now() + timeoutMs;
686
+ while (Date.now() < deadline) {
687
+ const pid = systemdMainPid(unit);
688
+ if (pid > 0 && isPidAlive(pid)) return pid;
689
+ Bun.sleepSync(50);
690
+ }
691
+ return 0;
692
+ }
693
+
694
+ function systemdMainPid(unit: string): number {
695
+ const result = Bun.spawnSync(["systemctl", "--user", "show", `${unit}.service`, "-p", "MainPID", "--value"], {
696
+ stdin: "ignore",
697
+ stdout: "pipe",
698
+ stderr: "ignore",
699
+ });
700
+ if (result.exitCode !== 0) return 0;
701
+ const pid = Number(result.stdout.toString().trim());
702
+ return Number.isFinite(pid) ? pid : 0;
703
+ }
704
+
705
+ function systemdUnitDiagnostics(unit: string): NonNullable<ManagedSessionExitDiagnostics["systemd"]> {
706
+ const result = Bun.spawnSync([
707
+ "systemctl", "--user", "show", `${unit}.service`,
708
+ "-p", "ActiveState",
709
+ "-p", "SubState",
710
+ "-p", "Result",
711
+ "-p", "ExecMainCode",
712
+ "-p", "ExecMainStatus",
713
+ "-p", "MainPID",
714
+ ], {
715
+ stdin: "ignore",
716
+ stdout: "pipe",
717
+ stderr: "pipe",
718
+ });
719
+ if (result.exitCode !== 0) {
720
+ return {
721
+ unit,
722
+ unavailable: result.stderr.toString().trim() || `systemctl show exited with ${result.exitCode}`,
723
+ };
724
+ }
725
+ const props = new Map<string, string>();
726
+ for (const line of result.stdout.toString().split("\n")) {
727
+ const index = line.indexOf("=");
728
+ if (index <= 0) continue;
729
+ props.set(line.slice(0, index), line.slice(index + 1));
730
+ }
731
+ const mainPid = Number(props.get("MainPID"));
732
+ return {
733
+ unit,
734
+ activeState: props.get("ActiveState") || undefined,
735
+ subState: props.get("SubState") || undefined,
736
+ result: props.get("Result") || undefined,
737
+ execMainCode: props.get("ExecMainCode") || undefined,
738
+ execMainStatus: props.get("ExecMainStatus") || undefined,
739
+ mainPid: Number.isFinite(mainPid) && mainPid > 0 ? mainPid : undefined,
740
+ };
741
+ }
742
+
743
+ function logFileDiagnostics(logFile: string): Pick<ManagedSessionExitDiagnostics, "logBytes" | "logEmpty" | "logTail"> & { logUnavailable?: string } {
744
+ try {
745
+ const stat = statSync(logFile);
746
+ if (stat.size === 0) return { logBytes: 0, logEmpty: true, logTail: [] };
747
+ const content = readFileSync(logFile, "utf8");
748
+ return {
749
+ logBytes: stat.size,
750
+ logEmpty: false,
751
+ logTail: logLines(content).slice(-20),
752
+ };
753
+ } catch (error) {
754
+ return {
755
+ logUnavailable: error instanceof Error ? error.message : String(error),
756
+ };
757
+ }
758
+ }
759
+
760
+ function describeSessionExit(record: SessionRecord, diagnostics: Omit<ManagedSessionExitDiagnostics, "lastError">): string {
761
+ const seconds = Math.max(0, Math.round(diagnostics.runtimeMs / 1000));
762
+ const parts = [`managed ${record.provider} session ${record.name} exited after ${seconds}s`];
763
+ if (diagnostics.systemd?.unavailable) {
764
+ parts.push(`systemd status unavailable: ${diagnostics.systemd.unavailable}`);
765
+ } else if (diagnostics.systemd) {
766
+ const state = [diagnostics.systemd.activeState, diagnostics.systemd.subState].filter(Boolean).join("/") || "unknown";
767
+ const result = diagnostics.systemd.result || "unknown";
768
+ const exit = [diagnostics.systemd.execMainCode, diagnostics.systemd.execMainStatus].filter(Boolean).join("/") || "unknown";
769
+ parts.push(`systemd ${diagnostics.systemd.unit}.service state=${state} result=${result} exit=${exit}`);
770
+ }
771
+ if (diagnostics.logEmpty) {
772
+ parts.push("stdout/stderr log is empty");
773
+ } else if (diagnostics.logBytes === undefined) {
774
+ parts.push("stdout/stderr log unavailable");
775
+ }
776
+ if (!diagnostics.runnerInfoPresent) parts.push("runner info was not written");
777
+ return parts.join("; ");
778
+ }
779
+
780
+ export function diagnoseSessionExit(input: { agentId?: string; policyName?: string; spawnRequestId?: string; tmuxSession?: string }): ManagedSessionExitDiagnostics | null {
781
+ const record = findSessionRecord(input);
782
+ if (!record) return null;
783
+ const detectedAt = Date.now();
784
+ const supervisor = sessionSupervisor(record);
785
+ const currentPid = currentSessionPid(record);
786
+ const terminalAvailable = tmuxHasSession(record.name, readRunnerInfo(record)?.tmuxSocket);
787
+ const log = logFileDiagnostics(record.logFile);
788
+ const runnerInfoPresent = record.runnerInfoFile ? existsSync(record.runnerInfoFile) : false;
789
+ const unavailable = [
790
+ ...(log.logUnavailable ? [`stdout/stderr log unavailable: ${log.logUnavailable}`] : []),
791
+ ...(log.logEmpty ? ["stdout/stderr log empty"] : []),
792
+ ...(!runnerInfoPresent ? ["runner info unavailable"] : []),
793
+ ];
794
+ const base: Omit<ManagedSessionExitDiagnostics, "lastError"> = {
795
+ agentId: record.agentId,
796
+ provider: record.provider as "claude" | "codex",
797
+ workspaceMode: record.workspaceMode,
798
+ workspace: record.workspace ?? (record.workspaceMode ? { mode: "shared", requestedMode: record.workspaceMode } : undefined),
799
+ sessionName: record.name,
800
+ tmuxSession: record.name,
801
+ cwd: record.cwd,
802
+ label: record.label,
803
+ policyName: record.policyName,
804
+ spawnRequestId: record.spawnRequestId,
805
+ automationRunId: record.automationRunId,
806
+ supervisor: supervisor.type,
807
+ ...(supervisor.type === "systemd" && supervisor.unit ? { systemdUnit: supervisor.unit } : {}),
808
+ terminalSession: record.name,
809
+ terminalAvailable,
810
+ pid: record.pid,
811
+ currentPid,
812
+ startedAt: record.startedAt,
813
+ detectedAt,
814
+ runtimeMs: Math.max(0, detectedAt - record.startedAt),
815
+ logFile: record.logFile,
816
+ logBytes: log.logBytes,
817
+ logEmpty: log.logEmpty,
818
+ logTail: log.logTail,
819
+ runnerInfoFile: record.runnerInfoFile,
820
+ runnerInfoPresent,
821
+ ...(supervisor.type === "systemd" && supervisor.unit ? { systemd: systemdUnitDiagnostics(supervisor.unit) } : {}),
822
+ ...(unavailable.length ? { unavailable } : {}),
823
+ };
824
+ return {
825
+ ...base,
826
+ lastError: describeSessionExit(record, base),
827
+ };
828
+ }
829
+
830
+ function stopSystemdUnit(unit: string): void {
831
+ Bun.spawnSync(["systemctl", "--user", "stop", `${unit}.service`], {
832
+ stdin: "ignore",
833
+ stdout: "ignore",
834
+ stderr: "ignore",
835
+ });
836
+ }
837
+
838
+ function killSystemdUnit(unit: string): void {
839
+ Bun.spawnSync(["systemctl", "--user", "kill", "--kill-whom=all", "--signal=SIGKILL", `${unit}.service`], {
840
+ stdin: "ignore",
841
+ stdout: "ignore",
842
+ stderr: "ignore",
843
+ });
844
+ }
845
+
846
+ function cleanupSupervisor(supervisor: SessionSupervisor): void {
847
+ if (supervisor.type === "systemd" && supervisor.unit) stopSystemdUnit(supervisor.unit);
848
+ if (supervisor.launchScript) rmSync(supervisor.launchScript, { force: true });
849
+ }
850
+
851
+ function cleanupSessionRecord(record: SessionRecord): void {
852
+ cleanupSupervisor(sessionSupervisor(record));
853
+ if (record.runnerInfoFile) rmSync(record.runnerInfoFile, { force: true });
854
+ }
855
+
856
+ export async function stopSession(name: string, config: OrchestratorConfig, reason: string, graceful = true, timeoutMs?: number): Promise<{ stopped: boolean; wasRunning: boolean }> {
857
+ if (!name.startsWith(`${config.tmuxPrefix}-`)) throw new Error("session is not managed by this orchestrator");
858
+
859
+ const records = loadState();
860
+ const record = records.find((r) => r.name === name);
861
+ if (!record || !isSessionRecordAlive(record)) {
862
+ if (record) cleanupSessionRecord(record);
863
+ removeSessionRecord(name);
864
+ return { stopped: false, wasRunning: false };
865
+ }
866
+
867
+ const pid = currentSessionPid(record);
868
+ console.error(`[orchestrator] Stopping session ${name} (pid ${pid}): ${reason}`);
869
+
870
+ const supervisor = sessionSupervisor(record);
871
+ const gracefulTimeoutMs = sessionStopTimeoutMs(graceful, timeoutMs);
872
+ if (supervisor.type === "systemd" && supervisor.unit) {
873
+ stopSystemdUnit(supervisor.unit);
874
+ const deadline = Date.now() + gracefulTimeoutMs;
875
+ while (Date.now() < deadline && isSessionRecordAlive(record)) {
876
+ await Bun.sleep(200);
877
+ }
878
+ if (isSessionRecordAlive(record)) {
879
+ killSystemdUnit(supervisor.unit);
880
+ const killDeadline = Date.now() + 2_000;
881
+ while (Date.now() < killDeadline && isSessionRecordAlive(record)) {
882
+ await Bun.sleep(100);
883
+ }
884
+ }
885
+ if (isSessionRecordAlive(record)) return { stopped: false, wasRunning: true };
886
+ cleanupSessionRecord(record);
887
+ removeSessionRecord(name);
888
+ return { stopped: true, wasRunning: true };
889
+ }
890
+
891
+ if (graceful) {
892
+ try { process.kill(pid, "SIGTERM"); } catch {}
893
+ const deadline = Date.now() + gracefulTimeoutMs;
894
+ while (Date.now() < deadline && isPidAlive(pid)) {
895
+ await Bun.sleep(200);
896
+ }
897
+ }
898
+
899
+ if (isPidAlive(pid)) {
900
+ try { process.kill(pid, "SIGKILL"); } catch {}
901
+ const deadline = Date.now() + 2_000;
902
+ while (Date.now() < deadline && isPidAlive(pid)) {
903
+ await Bun.sleep(100);
904
+ }
905
+ }
906
+
907
+ // Never report success while the process is still alive: deleting the session
908
+ // record here would orphan a running process with no handle to stop it again.
909
+ if (isPidAlive(pid)) {
910
+ console.error(`[orchestrator] Session ${name} (pid ${pid}) survived SIGKILL; keeping record for retry`);
911
+ return { stopped: false, wasRunning: true };
912
+ }
913
+
914
+ cleanupSessionRecord(record);
915
+ removeSessionRecord(name);
916
+ return { stopped: true, wasRunning: true };
917
+ }
918
+
919
+ function sessionStopTimeoutMs(graceful: boolean, timeoutMs?: number): number {
920
+ if (!graceful) return 2_000;
921
+ if (!Number.isSafeInteger(timeoutMs) || !timeoutMs || timeoutMs <= 0) return 10_000;
922
+ return Math.min(timeoutMs, 60_000);
923
+ }
924
+
925
+ export function captureSession(
926
+ name: string,
927
+ config: OrchestratorConfig,
928
+ lines = 100,
929
+ options: { raw?: boolean } = {},
930
+ ): { session: string; lines: string[]; running: boolean } {
931
+ if (!name.startsWith(`${config.tmuxPrefix}-`)) throw new Error("session is not managed by this orchestrator");
932
+
933
+ const records = loadState();
934
+ const record = records.find((r) => r.name === name);
935
+ const logFile = record?.logFile ?? logFilePath(name);
936
+ const running = record ? isSessionRecordAlive(record) : false;
937
+
938
+ let content: string;
939
+ try {
940
+ content = readFileSync(logFile, "utf8");
941
+ } catch {
942
+ return { session: name, lines: [], running };
943
+ }
944
+
945
+ const allLines = logLines(content, !options.raw);
946
+ const safeLines = Math.min(Math.max(lines, 1), 1000);
947
+ return {
948
+ session: name,
949
+ lines: allLines.slice(-safeLines),
950
+ running,
951
+ };
952
+ }
953
+
954
+ export function captureTerminal(name: string, config: OrchestratorConfig): TerminalSnapshot {
955
+ if (!name.startsWith(`${config.tmuxPrefix}-`)) throw new Error("session is not managed by this orchestrator");
956
+
957
+ const agentAlive = isSessionAlive(name);
958
+ const socketName = tmuxSocketForSession(name);
959
+ const running = tmuxHasSession(name, socketName);
960
+ if (!running) {
961
+ return { session: name, content: "", running: false, agentAlive, capturedAt: Date.now() };
962
+ }
963
+
964
+ const size = tmuxPaneSize(name, socketName);
965
+ const cursor = tmuxCursorPos(name, socketName);
966
+ const result = Bun.spawnSync(tmuxCommand(socketName, "capture-pane", "-p", "-e", "-S", "-1000", "-t", name), {
967
+ stdin: "ignore",
968
+ stdout: "pipe",
969
+ stderr: "pipe",
970
+ });
971
+ if (result.exitCode !== 0) {
972
+ const stderr = result.stderr.toString().trim();
973
+ throw new Error(stderr || `tmux capture-pane failed with exit code ${result.exitCode}`);
974
+ }
975
+
976
+ return {
977
+ session: name,
978
+ content: result.stdout.toString(),
979
+ running: true,
980
+ agentAlive,
981
+ ...size,
982
+ ...cursor,
983
+ capturedAt: Date.now(),
984
+ };
985
+ }
986
+
987
+ export function terminalInputTokens(data: string): TerminalInputToken[] {
988
+ const tokens: TerminalInputToken[] = [];
989
+ let literal = "";
990
+ const flushLiteral = () => {
991
+ if (!literal) return;
992
+ tokens.push({ type: "literal", value: literal });
993
+ literal = "";
994
+ };
995
+ const escapeSequences: Array<[string, string]> = [
996
+ ["\x1b[A", "Up"],
997
+ ["\x1b[B", "Down"],
998
+ ["\x1b[C", "Right"],
999
+ ["\x1b[D", "Left"],
1000
+ ["\x1b[H", "Home"],
1001
+ ["\x1b[F", "End"],
1002
+ ["\x1b[3~", "Delete"],
1003
+ ];
1004
+
1005
+ for (let index = 0; index < data.length;) {
1006
+ const match = escapeSequences.find(([sequence]) => data.startsWith(sequence, index));
1007
+ if (match) {
1008
+ flushLiteral();
1009
+ tokens.push({ type: "key", value: match[1] });
1010
+ index += match[0].length;
1011
+ continue;
1012
+ }
1013
+
1014
+ const ch = data[index]!;
1015
+ if (ch === "\r" || ch === "\n") {
1016
+ flushLiteral();
1017
+ tokens.push({ type: "key", value: "Enter" });
1018
+ } else if (ch === "\t") {
1019
+ flushLiteral();
1020
+ tokens.push({ type: "key", value: "Tab" });
1021
+ } else if (ch === "\u0003") {
1022
+ flushLiteral();
1023
+ tokens.push({ type: "key", value: "C-c" });
1024
+ } else if (ch === "\u007f" || ch === "\b") {
1025
+ flushLiteral();
1026
+ tokens.push({ type: "key", value: "BSpace" });
1027
+ } else if (ch === "\x1b") {
1028
+ flushLiteral();
1029
+ tokens.push({ type: "key", value: "Escape" });
1030
+ } else if (ch >= " " || ch > "\x7f") {
1031
+ literal += ch;
1032
+ }
1033
+ index += 1;
1034
+ }
1035
+
1036
+ flushLiteral();
1037
+ return tokens;
1038
+ }
1039
+
1040
+ export function sendTerminalInput(name: string, config: OrchestratorConfig, input: unknown): TerminalInputResult {
1041
+ if (!name.startsWith(`${config.tmuxPrefix}-`)) throw new Error("session is not managed by this orchestrator");
1042
+ const socketName = tmuxSocketForSession(name);
1043
+ if (!tmuxHasSession(name, socketName)) throw new Error("terminal session is not running");
1044
+ if (!input || typeof input !== "object" || Array.isArray(input)) throw new Error("terminal input body must be an object");
1045
+
1046
+ const data = (input as { data?: unknown }).data;
1047
+ if (typeof data !== "string") throw new Error("terminal input data must be a string");
1048
+ if (data.length > 4096) throw new Error("terminal input exceeds 4096 characters");
1049
+
1050
+ const tokens = terminalInputTokens(data);
1051
+ for (const token of tokens) {
1052
+ const args = token.type === "literal"
1053
+ ? tmuxCommand(socketName, "send-keys", "-t", name, "-l", token.value)
1054
+ : tmuxCommand(socketName, "send-keys", "-t", name, token.value);
1055
+ const result = Bun.spawnSync(args, {
1056
+ stdin: "ignore",
1057
+ stdout: "pipe",
1058
+ stderr: "pipe",
1059
+ });
1060
+ if (result.exitCode !== 0) {
1061
+ const stderr = result.stderr.toString().trim();
1062
+ throw new Error(stderr || `tmux send-keys failed with exit code ${result.exitCode}`);
1063
+ }
1064
+ }
1065
+
1066
+ return {
1067
+ session: name,
1068
+ running: true,
1069
+ sent: tokens.length,
1070
+ capturedAt: Date.now(),
1071
+ };
1072
+ }
1073
+
1074
+ export function resizeTerminal(name: string, config: OrchestratorConfig, input: unknown): { session: string; cols: number; rows: number } {
1075
+ if (!name.startsWith(`${config.tmuxPrefix}-`)) throw new Error("session is not managed by this orchestrator");
1076
+ const socketName = tmuxSocketForSession(name);
1077
+ if (!tmuxHasSession(name, socketName)) throw new Error("terminal session is not running");
1078
+ if (!input || typeof input !== "object" || Array.isArray(input)) throw new Error("resize body must be an object");
1079
+
1080
+ const cols = (input as { cols?: unknown }).cols;
1081
+ const rows = (input as { rows?: unknown }).rows;
1082
+ if (typeof cols !== "number" || typeof rows !== "number") throw new Error("cols and rows must be numbers");
1083
+ if (cols < 10 || cols > 500 || rows < 5 || rows > 200) throw new Error("cols must be 10-500, rows must be 5-200");
1084
+
1085
+ const clamped = { cols: Math.round(cols), rows: Math.round(rows) };
1086
+ const result = Bun.spawnSync(tmuxCommand(socketName, "resize-window", "-t", name, "-x", String(clamped.cols), "-y", String(clamped.rows)), {
1087
+ stdin: "ignore",
1088
+ stdout: "pipe",
1089
+ stderr: "pipe",
1090
+ });
1091
+ if (result.exitCode !== 0) {
1092
+ const stderr = result.stderr.toString().trim();
1093
+ throw new Error(stderr || `tmux resize-window failed with exit code ${result.exitCode}`);
1094
+ }
1095
+
1096
+ return { session: name, ...clamped };
1097
+ }
1098
+
1099
+ function tmuxSocketForSession(name: string): string | undefined {
1100
+ const record = loadState().find((item) => item.name === name);
1101
+ return record ? readRunnerInfo(record)?.tmuxSocket : undefined;
1102
+ }
1103
+
1104
+ function tmuxCommand(socketName: string | undefined, ...args: string[]): string[] {
1105
+ return socketName ? ["tmux", "-L", socketName, ...args] : ["tmux", ...args];
1106
+ }
1107
+
1108
+ function tmuxHasSession(name: string, socketName?: string): boolean {
1109
+ const result = Bun.spawnSync(tmuxCommand(socketName, "has-session", "-t", name), {
1110
+ stdin: "ignore",
1111
+ stdout: "ignore",
1112
+ stderr: "ignore",
1113
+ });
1114
+ return result.exitCode === 0;
1115
+ }
1116
+
1117
+ function tmuxPaneSize(name: string, socketName?: string): { cols?: number; rows?: number } {
1118
+ const result = Bun.spawnSync(tmuxCommand(socketName, "display-message", "-p", "-t", name, "#{pane_width} #{pane_height}"), {
1119
+ stdin: "ignore",
1120
+ stdout: "pipe",
1121
+ stderr: "ignore",
1122
+ });
1123
+ if (result.exitCode !== 0) return {};
1124
+ const [colsRaw, rowsRaw] = result.stdout.toString().trim().split(/\s+/, 2);
1125
+ const cols = Number(colsRaw);
1126
+ const rows = Number(rowsRaw);
1127
+ return {
1128
+ ...(Number.isFinite(cols) && cols > 0 ? { cols } : {}),
1129
+ ...(Number.isFinite(rows) && rows > 0 ? { rows } : {}),
1130
+ };
1131
+ }
1132
+
1133
+ function tmuxCursorPos(name: string, socketName?: string): { cursorX?: number; cursorY?: number } {
1134
+ const result = Bun.spawnSync(tmuxCommand(socketName, "display-message", "-p", "-t", name, "#{cursor_x} #{cursor_y}"), {
1135
+ stdin: "ignore",
1136
+ stdout: "pipe",
1137
+ stderr: "ignore",
1138
+ });
1139
+ if (result.exitCode !== 0) return {};
1140
+ const [xRaw, yRaw] = result.stdout.toString().trim().split(/\s+/, 2);
1141
+ const cursorX = Number(xRaw);
1142
+ const cursorY = Number(yRaw);
1143
+ return {
1144
+ ...(Number.isFinite(cursorX) ? { cursorX } : {}),
1145
+ ...(Number.isFinite(cursorY) ? { cursorY } : {}),
1146
+ };
1147
+ }
1148
+
1149
+ export function logLines(content: string, sanitize = true): string[] {
1150
+ const text = sanitize ? sanitizeLogText(content) : content.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
1151
+ return text
1152
+ .split("\n")
1153
+ .map((line) => line.trimEnd())
1154
+ .filter((line) => line.trim().length > 0);
1155
+ }
1156
+
1157
+ export function sanitizeLogText(content: string): string {
1158
+ return content
1159
+ .replace(/\x1B\][^\x07\x1B]*(?:\x07|\x1B\\)/g, "")
1160
+ .replace(/\x1B[PX^_][\s\S]*?\x1B\\/g, "")
1161
+ .replace(/\x1B\[(\d*)C/g, (_match, count: string) => " ".repeat(Math.min(Number(count || "1"), 120)))
1162
+ .replace(/\x1B\[[0-?]*[ -/]*[@-~]/g, "")
1163
+ .replace(/\x1B[()#%*+\-.\/ ][ -~]/g, "")
1164
+ .replace(/\x1B[ -/]*[@-~]/g, "")
1165
+ .replace(/\x9B[0-?]*[ -/]*[@-~]/g, "")
1166
+ .replace(/\x1B.?/g, "")
1167
+ .replace(/\r\n/g, "\n")
1168
+ .replace(/\r/g, "\n")
1169
+ .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "");
1170
+ }
1171
+
1172
+ export function listSessions(prefix: string): SessionInfo[] {
1173
+ return loadState()
1174
+ .filter((r) => r.name.startsWith(`${prefix}-`))
1175
+ .map((r) => {
1176
+ const supervisor = sessionSupervisor(r);
1177
+ return {
1178
+ name: r.name,
1179
+ sessionName: r.name,
1180
+ pid: currentSessionPid(r),
1181
+ alive: isSessionRecordAlive(r),
1182
+ supervisor: supervisor.type,
1183
+ ...(supervisor.type === "systemd" && supervisor.unit ? { systemdUnit: supervisor.unit } : {}),
1184
+ terminalSession: r.name,
1185
+ terminalAvailable: tmuxHasSession(r.name, readRunnerInfo(r)?.tmuxSocket),
1186
+ logFile: r.logFile,
1187
+ };
1188
+ });
1189
+ }
1190
+
1191
+ export function isSessionAlive(name: string): boolean {
1192
+ const record = loadState().find((r) => r.name === name);
1193
+ return record ? isSessionRecordAlive(record) : false;
1194
+ }
1195
+
1196
+ export function refreshManagedAgentReport(agent: ManagedAgentReport): ManagedAgentReport {
1197
+ const record = findSessionRecord({
1198
+ tmuxSession: agent.sessionName ?? agent.tmuxSession,
1199
+ agentId: agent.agentId,
1200
+ policyName: agent.policyName,
1201
+ spawnRequestId: agent.spawnRequestId,
1202
+ });
1203
+ if (!record) return agent;
1204
+ return {
1205
+ ...agent,
1206
+ workspaceMode: record.workspaceMode,
1207
+ workspace: record.workspace ?? agent.workspace ?? (record.workspaceMode ? { mode: "shared", requestedMode: record.workspaceMode } : undefined),
1208
+ pid: currentSessionPid(record),
1209
+ ...sessionReportFields(record),
1210
+ };
1211
+ }
1212
+
1213
+ export async function recoverExistingSessions(
1214
+ config: OrchestratorConfig,
1215
+ ): Promise<ManagedAgentReport[]> {
1216
+ const records = loadState().filter((r) => r.name.startsWith(`${config.tmuxPrefix}-`));
1217
+ const managed: ManagedAgentReport[] = [];
1218
+ const alive: SessionRecord[] = [];
1219
+
1220
+ for (const record of records) {
1221
+ if (!isSessionRecordAlive(record)) {
1222
+ console.error(`[orchestrator] Stale session: ${record.name} (pid ${record.pid} dead) — removing`);
1223
+ cleanupSessionRecord(record);
1224
+ continue;
1225
+ }
1226
+
1227
+ const pid = currentSessionPid(record);
1228
+ const updatedRecord = { ...record, pid };
1229
+ alive.push(updatedRecord);
1230
+ managed.push({
1231
+ agentId: record.agentId,
1232
+ provider: record.provider as "claude" | "codex",
1233
+ workspaceMode: record.workspaceMode,
1234
+ workspace: record.workspace ?? (record.workspaceMode ? { mode: "shared", requestedMode: record.workspaceMode } : undefined),
1235
+ ...sessionReportFields(updatedRecord),
1236
+ cwd: record.cwd,
1237
+ label: record.label,
1238
+ approvalMode: record.approvalMode || "guarded",
1239
+ policyName: record.policyName,
1240
+ spawnRequestId: record.spawnRequestId,
1241
+ automationRunId: record.automationRunId,
1242
+ pid,
1243
+ startedAt: record.startedAt,
1244
+ });
1245
+
1246
+ console.error(`[orchestrator] Recovered existing session: ${record.name} (pid ${record.pid})`);
1247
+ }
1248
+
1249
+ // Merge rather than overwrite: only replace the records this recovery actually
1250
+ // inspected, so a session added concurrently (or owned by another prefix) is
1251
+ // not erased by writing back a pre-filtered snapshot.
1252
+ const processedNames = new Set(records.map((r) => r.name));
1253
+ const untouched = loadState().filter((r) => !processedNames.has(r.name));
1254
+ saveState([...untouched, ...alive]);
1255
+ return managed;
1256
+ }
1257
+
1258
+ function managedAgentId(config: OrchestratorConfig, provider: string, label: string): string {
1259
+ const cleanHost = config.hostname.replace(/[^a-zA-Z0-9._-]+/g, "-").toLowerCase();
1260
+ const cleanLabel = label.replace(/[^a-zA-Z0-9._-]+/g, "-").toLowerCase();
1261
+ return `${cleanHost}-${provider}-${cleanLabel}-${crypto.randomUUID().slice(0, 8)}`;
1262
+ }
1263
+
1264
+ export function shellEscape(s: string): string {
1265
+ if (/^[a-zA-Z0-9._\-/:=@]+$/.test(s)) return s;
1266
+ return `'${s.replace(/'/g, "'\\''")}'`;
1267
+ }