agent-relay-orchestrator 0.22.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/api.ts +6 -4
- package/src/index.ts +20 -1
- package/src/spawn.ts +168 -13
package/package.json
CHANGED
package/src/api.ts
CHANGED
|
@@ -6,7 +6,7 @@ import { proxyArtifactRequest } from "./artifact-proxy";
|
|
|
6
6
|
import type { OrchestratorConfig } from "./config";
|
|
7
7
|
import type { ProviderProbeCache } from "./provider-probe";
|
|
8
8
|
import type { RelayClient } from "./relay";
|
|
9
|
-
import { captureSession, captureSessionMirror, captureTerminal, createTerminalGuest, listSessions, sendTerminalInput, resizeTerminal, stopTerminalGuest } from "./spawn";
|
|
9
|
+
import { captureSession, captureSessionMirror, captureTerminal, createTerminalGuest, listSessions, sendTerminalInput, resizeTerminal, stopTerminalGuest, validateTerminalInputData, validateTerminalResize } from "./spawn";
|
|
10
10
|
import { acquireTerminalStream, type TerminalStreamHandle, type TerminalStreamSubscriber } from "./terminal-stream";
|
|
11
11
|
import { VERSION, runtimeMetadata } from "./version";
|
|
12
12
|
import { previewWorkspaceMerge, probeWorkspace, workspaceDiff, workspaceGitState } from "./workspace-probe";
|
|
@@ -751,11 +751,13 @@ function handleTerminalSocketMessage(ws: TerminalSocket, data: string | Buffer):
|
|
|
751
751
|
const frame = payload as Record<string, unknown>;
|
|
752
752
|
try {
|
|
753
753
|
if (frame.type === "input") {
|
|
754
|
-
|
|
754
|
+
// Same envelope as the HTTP input route (#143): type + 4096-char cap. Invalid
|
|
755
|
+
// frames throw → caught below → terminal error frame, tmux untouched.
|
|
756
|
+
const text = validateTerminalInputData(frame);
|
|
755
757
|
if (text) ws.data.stream?.write(new TextEncoder().encode(text));
|
|
756
758
|
} else if (frame.type === "resize") {
|
|
757
|
-
|
|
758
|
-
const rows =
|
|
759
|
+
// Same bounds as the HTTP resize route (#143): cols 10-500, rows 5-200.
|
|
760
|
+
const { cols, rows } = validateTerminalResize(frame);
|
|
759
761
|
// First resize sizes the pane and triggers the (size-matched) backfill;
|
|
760
762
|
// later ones just reflow the live stream.
|
|
761
763
|
if (!ws.data.synced) {
|
package/src/index.ts
CHANGED
|
@@ -3,7 +3,7 @@ import { loadConfig, initConfigFile } from "./config";
|
|
|
3
3
|
import { createRelayClient } from "./relay";
|
|
4
4
|
import type { ManagedSessionExitDiagnostics } from "./relay";
|
|
5
5
|
import { createControlHandler } from "./control";
|
|
6
|
-
import { diagnoseSessionExit, isSessionAlive, refreshManagedAgentReport } from "./spawn";
|
|
6
|
+
import { diagnoseSessionExit, hydrateTerminalGuests, isSessionAlive, reapTerminalGuests, refreshManagedAgentReport } from "./spawn";
|
|
7
7
|
import { startApiServer } from "./api";
|
|
8
8
|
import { recoverManagedAgents } from "./recovery";
|
|
9
9
|
import { ProviderProbeCache } from "./provider-probe";
|
|
@@ -51,8 +51,10 @@ const control = createControlHandler(config, relay);
|
|
|
51
51
|
|
|
52
52
|
const POLL_INTERVAL_MS = 3_000;
|
|
53
53
|
const REGISTER_RETRY_MS = 5_000;
|
|
54
|
+
const GUEST_REAP_INTERVAL_MS = 60_000;
|
|
54
55
|
let pollTimer: Timer | null = null;
|
|
55
56
|
let healthCheckTimer: Timer | null = null;
|
|
57
|
+
let guestReaperTimer: Timer | null = null;
|
|
56
58
|
let apiServer: { stop(): void; url: string } | null = null;
|
|
57
59
|
|
|
58
60
|
async function startup(): Promise<void> {
|
|
@@ -75,12 +77,28 @@ async function startup(): Promise<void> {
|
|
|
75
77
|
// Recover existing tmux sessions
|
|
76
78
|
await recoverManagedAgents(config, control, relay);
|
|
77
79
|
|
|
80
|
+
// Restore guest-terminal TTLs persisted before the last restart, then reap any
|
|
81
|
+
// that expired (or were orphaned) while the orchestrator was down (#144).
|
|
82
|
+
hydrateTerminalGuests();
|
|
83
|
+
const reaped = reapTerminalGuests(config);
|
|
84
|
+
if (reaped.length > 0) console.error(`[orchestrator] Reaped ${reaped.length} expired guest terminal(s)`);
|
|
85
|
+
|
|
78
86
|
// Start polling for command requests
|
|
79
87
|
startPolling();
|
|
80
88
|
|
|
81
89
|
// Periodic health check — remove dead sessions
|
|
82
90
|
healthCheckTimer = setInterval(healthCheck, 60_000);
|
|
83
91
|
|
|
92
|
+
// Periodic guest-terminal reaper — enforces guest TTL without requiring a new
|
|
93
|
+
// guest creation to trigger cleanup (#144).
|
|
94
|
+
guestReaperTimer = setInterval(() => {
|
|
95
|
+
try {
|
|
96
|
+
reapTerminalGuests(config);
|
|
97
|
+
} catch (err) {
|
|
98
|
+
console.error(`[orchestrator] Guest reap error: ${err}`);
|
|
99
|
+
}
|
|
100
|
+
}, GUEST_REAP_INTERVAL_MS);
|
|
101
|
+
|
|
84
102
|
console.error("[orchestrator] Ready. Polling for command requests...");
|
|
85
103
|
}
|
|
86
104
|
|
|
@@ -178,6 +196,7 @@ async function shutdown(): Promise<void> {
|
|
|
178
196
|
console.error("[orchestrator] Shutting down...");
|
|
179
197
|
if (pollTimer) clearInterval(pollTimer);
|
|
180
198
|
if (healthCheckTimer) clearInterval(healthCheckTimer);
|
|
199
|
+
if (guestReaperTimer) clearInterval(guestReaperTimer);
|
|
181
200
|
if (apiServer) apiServer.stop();
|
|
182
201
|
relay.stopHeartbeatLoop();
|
|
183
202
|
process.exit(0);
|
package/src/spawn.ts
CHANGED
|
@@ -146,7 +146,9 @@ const STATE_FILE = join(homedir(), ".agent-relay", "orchestrator-sessions.json")
|
|
|
146
146
|
const SESSION_DIR = join(homedir(), ".agent-relay", "sessions");
|
|
147
147
|
const RUNNER_INFO_DIR = join(homedir(), ".agent-relay", "runners");
|
|
148
148
|
const GUEST_TTL_MS = 60 * 60 * 1000;
|
|
149
|
+
const GUEST_STATE_FILE = join(homedir(), ".agent-relay", "orchestrator-guests.json");
|
|
149
150
|
const terminalGuests = new Map<string, { expiresAt: number }>();
|
|
151
|
+
let guestStateHydrated = false;
|
|
150
152
|
|
|
151
153
|
export function isWithinBaseDir(path: string, baseDir: string): boolean {
|
|
152
154
|
const base = resolve(baseDir);
|
|
@@ -444,6 +446,7 @@ export async function createTerminalGuest(
|
|
|
444
446
|
throw new Error(stderr || `tmux guest creation failed with exit code ${result.exitCode}`);
|
|
445
447
|
}
|
|
446
448
|
terminalGuests.set(session, { expiresAt });
|
|
449
|
+
saveGuestState();
|
|
447
450
|
return { session, mode: "guest", provider: spec.provider, running: true, interactive: true, expiresAt };
|
|
448
451
|
}
|
|
449
452
|
|
|
@@ -452,6 +455,7 @@ export function stopTerminalGuest(session: string, config: OrchestratorConfig):
|
|
|
452
455
|
const running = tmuxHasSession(session);
|
|
453
456
|
if (running) killTmuxSession(session);
|
|
454
457
|
terminalGuests.delete(session);
|
|
458
|
+
saveGuestState();
|
|
455
459
|
return { session, stopped: running };
|
|
456
460
|
}
|
|
457
461
|
|
|
@@ -547,13 +551,147 @@ function isGuestSessionName(session: string, config: OrchestratorConfig): boolea
|
|
|
547
551
|
return session.startsWith(`${config.tmuxPrefix}-guest-`);
|
|
548
552
|
}
|
|
549
553
|
|
|
554
|
+
interface GuestRecord {
|
|
555
|
+
session: string;
|
|
556
|
+
expiresAt: number;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
interface LiveGuestSession {
|
|
560
|
+
session: string;
|
|
561
|
+
createdAtMs: number;
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
/** Flatten the in-memory guest registry to a persistable, deterministic list. */
|
|
565
|
+
export function serializeGuests(guests: Map<string, { expiresAt: number }>): GuestRecord[] {
|
|
566
|
+
return [...guests.entries()]
|
|
567
|
+
.map(([session, { expiresAt }]) => ({ session, expiresAt }))
|
|
568
|
+
.sort((a, b) => a.session.localeCompare(b.session));
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
/** Tolerant inverse of serializeGuests — drops malformed entries instead of throwing. */
|
|
572
|
+
export function deserializeGuests(raw: unknown): Map<string, { expiresAt: number }> {
|
|
573
|
+
const map = new Map<string, { expiresAt: number }>();
|
|
574
|
+
if (!Array.isArray(raw)) return map;
|
|
575
|
+
for (const entry of raw) {
|
|
576
|
+
if (!entry || typeof entry !== "object") continue;
|
|
577
|
+
const { session, expiresAt } = entry as Record<string, unknown>;
|
|
578
|
+
if (typeof session === "string" && session && typeof expiresAt === "number" && Number.isFinite(expiresAt)) {
|
|
579
|
+
map.set(session, { expiresAt });
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
return map;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
function saveGuestState(): void {
|
|
586
|
+
try {
|
|
587
|
+
mkdirSync(join(homedir(), ".agent-relay"), { recursive: true });
|
|
588
|
+
const tmp = `${GUEST_STATE_FILE}.tmp`;
|
|
589
|
+
writeFileSync(tmp, JSON.stringify(serializeGuests(terminalGuests), null, 2) + "\n");
|
|
590
|
+
renameSync(tmp, GUEST_STATE_FILE);
|
|
591
|
+
} catch {
|
|
592
|
+
// Persistence is best-effort: a write failure must never break guest creation.
|
|
593
|
+
// The periodic reaper's tmux age-based fallback still bounds orphan lifetime.
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
/**
|
|
598
|
+
* Rehydrate the in-memory guest registry from disk so guest TTLs survive an
|
|
599
|
+
* orchestrator restart. Call once at boot before the first reap.
|
|
600
|
+
*/
|
|
601
|
+
export function hydrateTerminalGuests(): void {
|
|
602
|
+
if (guestStateHydrated) return;
|
|
603
|
+
guestStateHydrated = true;
|
|
604
|
+
try {
|
|
605
|
+
const persisted = deserializeGuests(JSON.parse(readFileSync(GUEST_STATE_FILE, "utf8")));
|
|
606
|
+
for (const [session, value] of persisted) {
|
|
607
|
+
if (!terminalGuests.has(session)) terminalGuests.set(session, value);
|
|
608
|
+
}
|
|
609
|
+
} catch {
|
|
610
|
+
// No persisted state (first boot or unreadable) — the age-based fallback in
|
|
611
|
+
// reapTerminalGuests still cleans any orphaned guest tmux sessions.
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
/** Live `<prefix>-guest-*` tmux sessions with their creation time (ms). */
|
|
616
|
+
function listGuestTmuxSessions(config: OrchestratorConfig): LiveGuestSession[] {
|
|
617
|
+
const result = Bun.spawnSync(["tmux", "list-sessions", "-F", "#{session_name}\t#{session_created}"], {
|
|
618
|
+
stdin: "ignore",
|
|
619
|
+
stdout: "pipe",
|
|
620
|
+
stderr: "ignore",
|
|
621
|
+
});
|
|
622
|
+
if (result.exitCode !== 0) return []; // no tmux server / no sessions
|
|
623
|
+
const sessions: LiveGuestSession[] = [];
|
|
624
|
+
for (const line of result.stdout.toString().split("\n")) {
|
|
625
|
+
const tab = line.indexOf("\t");
|
|
626
|
+
if (tab < 0) continue;
|
|
627
|
+
const session = line.slice(0, tab);
|
|
628
|
+
if (!isGuestSessionName(session, config)) continue;
|
|
629
|
+
const createdSec = Number(line.slice(tab + 1).trim());
|
|
630
|
+
sessions.push({ session, createdAtMs: Number.isFinite(createdSec) ? createdSec * 1000 : 0 });
|
|
631
|
+
}
|
|
632
|
+
return sessions;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
/**
|
|
636
|
+
* Decide which live guest sessions to reap. Pure so the TTL policy is testable
|
|
637
|
+
* without tmux or fs:
|
|
638
|
+
* - tracked + past its recorded expiry → reap
|
|
639
|
+
* - untracked (metadata lost across a restart) + older than the fallback TTL → reap
|
|
640
|
+
*/
|
|
641
|
+
export function selectExpiredGuests(
|
|
642
|
+
tracked: Map<string, { expiresAt: number }>,
|
|
643
|
+
liveGuests: LiveGuestSession[],
|
|
644
|
+
now: number,
|
|
645
|
+
fallbackTtlMs = GUEST_TTL_MS,
|
|
646
|
+
): string[] {
|
|
647
|
+
const toReap = new Set<string>();
|
|
648
|
+
for (const { session, createdAtMs } of liveGuests) {
|
|
649
|
+
const record = tracked.get(session);
|
|
650
|
+
if (record) {
|
|
651
|
+
if (record.expiresAt <= now) toReap.add(session);
|
|
652
|
+
} else if (now - createdAtMs >= fallbackTtlMs) {
|
|
653
|
+
toReap.add(session);
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
return [...toReap];
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
/**
|
|
660
|
+
* Kill guest tmux sessions whose TTL has elapsed, independent of any new guest
|
|
661
|
+
* creation, and prune tracked entries whose tmux session is already gone. Runs
|
|
662
|
+
* at boot and on a periodic timer (see orchestrator index).
|
|
663
|
+
*/
|
|
664
|
+
export function reapTerminalGuests(config: OrchestratorConfig, now = Date.now()): string[] {
|
|
665
|
+
const live = listGuestTmuxSessions(config);
|
|
666
|
+
const liveNames = new Set(live.map((g) => g.session));
|
|
667
|
+
const reaped = selectExpiredGuests(terminalGuests, live, now);
|
|
668
|
+
for (const session of reaped) {
|
|
669
|
+
killTmuxSession(session);
|
|
670
|
+
terminalGuests.delete(session);
|
|
671
|
+
}
|
|
672
|
+
// Drop tracked guests with no live tmux session (manually killed, or reaped
|
|
673
|
+
// above) so the registry can't grow without bound.
|
|
674
|
+
let pruned = false;
|
|
675
|
+
for (const session of [...terminalGuests.keys()]) {
|
|
676
|
+
if (!liveNames.has(session)) {
|
|
677
|
+
terminalGuests.delete(session);
|
|
678
|
+
pruned = true;
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
if (reaped.length || pruned) saveGuestState();
|
|
682
|
+
return reaped;
|
|
683
|
+
}
|
|
684
|
+
|
|
550
685
|
function cleanupExpiredTerminalGuests(): void {
|
|
551
686
|
const now = Date.now();
|
|
687
|
+
let changed = false;
|
|
552
688
|
for (const [session, guest] of terminalGuests.entries()) {
|
|
553
689
|
if (guest.expiresAt > now) continue;
|
|
554
690
|
killTmuxSession(session);
|
|
555
691
|
terminalGuests.delete(session);
|
|
692
|
+
changed = true;
|
|
556
693
|
}
|
|
694
|
+
if (changed) saveGuestState();
|
|
557
695
|
}
|
|
558
696
|
|
|
559
697
|
function killTmuxSession(session: string): void {
|
|
@@ -1088,15 +1226,39 @@ export function terminalInputTokens(data: string): TerminalInputToken[] {
|
|
|
1088
1226
|
return tokens;
|
|
1089
1227
|
}
|
|
1090
1228
|
|
|
1229
|
+
// Validation contract shared by the HTTP terminal routes and the websocket terminal
|
|
1230
|
+
// frames (orchestrator/src/api.ts). Both transports MUST enforce the same envelope —
|
|
1231
|
+
// keep these the single source of truth (see #143). Pure: no tmux, safe to unit-test.
|
|
1232
|
+
const TERMINAL_INPUT_MAX = 4096;
|
|
1233
|
+
|
|
1234
|
+
export function validateTerminalInputData(input: unknown): string {
|
|
1235
|
+
if (!input || typeof input !== "object" || Array.isArray(input)) throw new Error("terminal input body must be an object");
|
|
1236
|
+
const data = (input as { data?: unknown }).data;
|
|
1237
|
+
if (typeof data !== "string") throw new Error("terminal input data must be a string");
|
|
1238
|
+
if (data.length > TERMINAL_INPUT_MAX) throw new Error(`terminal input exceeds ${TERMINAL_INPUT_MAX} characters`);
|
|
1239
|
+
return data;
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
export function validateTerminalResize(input: unknown): { cols: number; rows: number } {
|
|
1243
|
+
if (!input || typeof input !== "object" || Array.isArray(input)) throw new Error("resize body must be an object");
|
|
1244
|
+
const cols = (input as { cols?: unknown }).cols;
|
|
1245
|
+
const rows = (input as { rows?: unknown }).rows;
|
|
1246
|
+
// typeof narrows for the bounds comparison; Number.isFinite additionally rejects
|
|
1247
|
+
// NaN/Infinity — without it NaN slips past the bounds check below (every NaN
|
|
1248
|
+
// comparison is false), the exact malformed-resize frame the websocket path used to
|
|
1249
|
+
// forward via Number(frame.cols).
|
|
1250
|
+
if (typeof cols !== "number" || typeof rows !== "number" || !Number.isFinite(cols) || !Number.isFinite(rows)) {
|
|
1251
|
+
throw new Error("cols and rows must be numbers");
|
|
1252
|
+
}
|
|
1253
|
+
if (cols < 10 || cols > 500 || rows < 5 || rows > 200) throw new Error("cols must be 10-500, rows must be 5-200");
|
|
1254
|
+
return { cols: Math.round(cols), rows: Math.round(rows) };
|
|
1255
|
+
}
|
|
1256
|
+
|
|
1091
1257
|
export function sendTerminalInput(name: string, config: OrchestratorConfig, input: unknown): TerminalInputResult {
|
|
1092
1258
|
if (!name.startsWith(`${config.tmuxPrefix}-`)) throw new Error("session is not managed by this orchestrator");
|
|
1093
1259
|
const socketName = tmuxSocketForSession(name);
|
|
1094
1260
|
if (!tmuxHasSession(name, socketName)) throw new Error("terminal session is not running");
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
const data = (input as { data?: unknown }).data;
|
|
1098
|
-
if (typeof data !== "string") throw new Error("terminal input data must be a string");
|
|
1099
|
-
if (data.length > 4096) throw new Error("terminal input exceeds 4096 characters");
|
|
1261
|
+
const data = validateTerminalInputData(input);
|
|
1100
1262
|
|
|
1101
1263
|
const tokens = terminalInputTokens(data);
|
|
1102
1264
|
for (const token of tokens) {
|
|
@@ -1126,14 +1288,7 @@ export function resizeTerminal(name: string, config: OrchestratorConfig, input:
|
|
|
1126
1288
|
if (!name.startsWith(`${config.tmuxPrefix}-`)) throw new Error("session is not managed by this orchestrator");
|
|
1127
1289
|
const socketName = tmuxSocketForSession(name);
|
|
1128
1290
|
if (!tmuxHasSession(name, socketName)) throw new Error("terminal session is not running");
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
const cols = (input as { cols?: unknown }).cols;
|
|
1132
|
-
const rows = (input as { rows?: unknown }).rows;
|
|
1133
|
-
if (typeof cols !== "number" || typeof rows !== "number") throw new Error("cols and rows must be numbers");
|
|
1134
|
-
if (cols < 10 || cols > 500 || rows < 5 || rows > 200) throw new Error("cols must be 10-500, rows must be 5-200");
|
|
1135
|
-
|
|
1136
|
-
const clamped = { cols: Math.round(cols), rows: Math.round(rows) };
|
|
1291
|
+
const clamped = validateTerminalResize(input);
|
|
1137
1292
|
const result = Bun.spawnSync(tmuxCommand(socketName, "resize-window", "-t", name, "-x", String(clamped.cols), "-y", String(clamped.rows)), {
|
|
1138
1293
|
stdin: "ignore",
|
|
1139
1294
|
stdout: "pipe",
|