agent-relay-orchestrator 0.23.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-orchestrator",
3
- "version": "0.23.0",
3
+ "version": "0.25.0",
4
4
  "description": "Agent Relay orchestrator — manages agent lifecycle across hosts",
5
5
  "type": "module",
6
6
  "bin": {
@@ -16,7 +16,7 @@
16
16
  "test": "bun test"
17
17
  },
18
18
  "dependencies": {
19
- "agent-relay-sdk": "0.2.13"
19
+ "agent-relay-sdk": "0.2.14"
20
20
  },
21
21
  "devDependencies": {
22
22
  "@types/bun": "latest",
package/src/api.ts CHANGED
@@ -6,7 +6,7 @@ import { proxyArtifactRequest } from "./artifact-proxy";
6
6
  import type { OrchestratorConfig } from "./config";
7
7
  import type { ProviderProbeCache } from "./provider-probe";
8
8
  import type { RelayClient } from "./relay";
9
- import { captureSession, captureSessionMirror, captureTerminal, createTerminalGuest, listSessions, sendTerminalInput, resizeTerminal, stopTerminalGuest } from "./spawn";
9
+ import { captureSession, captureSessionMirror, captureTerminal, createTerminalGuest, listSessions, sendTerminalInput, resizeTerminal, stopTerminalGuest, validateTerminalInputData, validateTerminalResize } from "./spawn";
10
10
  import { acquireTerminalStream, type TerminalStreamHandle, type TerminalStreamSubscriber } from "./terminal-stream";
11
11
  import { VERSION, runtimeMetadata } from "./version";
12
12
  import { previewWorkspaceMerge, probeWorkspace, workspaceDiff, workspaceGitState } from "./workspace-probe";
@@ -751,11 +751,13 @@ function handleTerminalSocketMessage(ws: TerminalSocket, data: string | Buffer):
751
751
  const frame = payload as Record<string, unknown>;
752
752
  try {
753
753
  if (frame.type === "input") {
754
- const text = typeof frame.data === "string" ? frame.data : "";
754
+ // Same envelope as the HTTP input route (#143): type + 4096-char cap. Invalid
755
+ // frames throw → caught below → terminal error frame, tmux untouched.
756
+ const text = validateTerminalInputData(frame);
755
757
  if (text) ws.data.stream?.write(new TextEncoder().encode(text));
756
758
  } else if (frame.type === "resize") {
757
- const cols = Number(frame.cols);
758
- const rows = Number(frame.rows);
759
+ // Same bounds as the HTTP resize route (#143): cols 10-500, rows 5-200.
760
+ const { cols, rows } = validateTerminalResize(frame);
759
761
  // First resize sizes the pane and triggers the (size-matched) backfill;
760
762
  // later ones just reflow the live stream.
761
763
  if (!ws.data.synced) {
package/src/index.ts CHANGED
@@ -3,7 +3,7 @@ import { loadConfig, initConfigFile } from "./config";
3
3
  import { createRelayClient } from "./relay";
4
4
  import type { ManagedSessionExitDiagnostics } from "./relay";
5
5
  import { createControlHandler } from "./control";
6
- import { diagnoseSessionExit, isSessionAlive, refreshManagedAgentReport } from "./spawn";
6
+ import { diagnoseSessionExit, hydrateTerminalGuests, isSessionAlive, reapTerminalGuests, refreshManagedAgentReport } from "./spawn";
7
7
  import { startApiServer } from "./api";
8
8
  import { recoverManagedAgents } from "./recovery";
9
9
  import { ProviderProbeCache } from "./provider-probe";
@@ -51,8 +51,10 @@ const control = createControlHandler(config, relay);
51
51
 
52
52
  const POLL_INTERVAL_MS = 3_000;
53
53
  const REGISTER_RETRY_MS = 5_000;
54
+ const GUEST_REAP_INTERVAL_MS = 60_000;
54
55
  let pollTimer: Timer | null = null;
55
56
  let healthCheckTimer: Timer | null = null;
57
+ let guestReaperTimer: Timer | null = null;
56
58
  let apiServer: { stop(): void; url: string } | null = null;
57
59
 
58
60
  async function startup(): Promise<void> {
@@ -75,12 +77,28 @@ async function startup(): Promise<void> {
75
77
  // Recover existing tmux sessions
76
78
  await recoverManagedAgents(config, control, relay);
77
79
 
80
+ // Restore guest-terminal TTLs persisted before the last restart, then reap any
81
+ // that expired (or were orphaned) while the orchestrator was down (#144).
82
+ hydrateTerminalGuests();
83
+ const reaped = reapTerminalGuests(config);
84
+ if (reaped.length > 0) console.error(`[orchestrator] Reaped ${reaped.length} expired guest terminal(s)`);
85
+
78
86
  // Start polling for command requests
79
87
  startPolling();
80
88
 
81
89
  // Periodic health check — remove dead sessions
82
90
  healthCheckTimer = setInterval(healthCheck, 60_000);
83
91
 
92
+ // Periodic guest-terminal reaper — enforces guest TTL without requiring a new
93
+ // guest creation to trigger cleanup (#144).
94
+ guestReaperTimer = setInterval(() => {
95
+ try {
96
+ reapTerminalGuests(config);
97
+ } catch (err) {
98
+ console.error(`[orchestrator] Guest reap error: ${err}`);
99
+ }
100
+ }, GUEST_REAP_INTERVAL_MS);
101
+
84
102
  console.error("[orchestrator] Ready. Polling for command requests...");
85
103
  }
86
104
 
@@ -178,6 +196,7 @@ async function shutdown(): Promise<void> {
178
196
  console.error("[orchestrator] Shutting down...");
179
197
  if (pollTimer) clearInterval(pollTimer);
180
198
  if (healthCheckTimer) clearInterval(healthCheckTimer);
199
+ if (guestReaperTimer) clearInterval(guestReaperTimer);
181
200
  if (apiServer) apiServer.stop();
182
201
  relay.stopHeartbeatLoop();
183
202
  process.exit(0);
package/src/spawn.ts CHANGED
@@ -146,7 +146,9 @@ const STATE_FILE = join(homedir(), ".agent-relay", "orchestrator-sessions.json")
146
146
  const SESSION_DIR = join(homedir(), ".agent-relay", "sessions");
147
147
  const RUNNER_INFO_DIR = join(homedir(), ".agent-relay", "runners");
148
148
  const GUEST_TTL_MS = 60 * 60 * 1000;
149
+ const GUEST_STATE_FILE = join(homedir(), ".agent-relay", "orchestrator-guests.json");
149
150
  const terminalGuests = new Map<string, { expiresAt: number }>();
151
+ let guestStateHydrated = false;
150
152
 
151
153
  export function isWithinBaseDir(path: string, baseDir: string): boolean {
152
154
  const base = resolve(baseDir);
@@ -444,6 +446,7 @@ export async function createTerminalGuest(
444
446
  throw new Error(stderr || `tmux guest creation failed with exit code ${result.exitCode}`);
445
447
  }
446
448
  terminalGuests.set(session, { expiresAt });
449
+ saveGuestState();
447
450
  return { session, mode: "guest", provider: spec.provider, running: true, interactive: true, expiresAt };
448
451
  }
449
452
 
@@ -452,6 +455,7 @@ export function stopTerminalGuest(session: string, config: OrchestratorConfig):
452
455
  const running = tmuxHasSession(session);
453
456
  if (running) killTmuxSession(session);
454
457
  terminalGuests.delete(session);
458
+ saveGuestState();
455
459
  return { session, stopped: running };
456
460
  }
457
461
 
@@ -547,13 +551,147 @@ function isGuestSessionName(session: string, config: OrchestratorConfig): boolea
547
551
  return session.startsWith(`${config.tmuxPrefix}-guest-`);
548
552
  }
549
553
 
554
+ interface GuestRecord {
555
+ session: string;
556
+ expiresAt: number;
557
+ }
558
+
559
+ interface LiveGuestSession {
560
+ session: string;
561
+ createdAtMs: number;
562
+ }
563
+
564
+ /** Flatten the in-memory guest registry to a persistable, deterministic list. */
565
+ export function serializeGuests(guests: Map<string, { expiresAt: number }>): GuestRecord[] {
566
+ return [...guests.entries()]
567
+ .map(([session, { expiresAt }]) => ({ session, expiresAt }))
568
+ .sort((a, b) => a.session.localeCompare(b.session));
569
+ }
570
+
571
+ /** Tolerant inverse of serializeGuests — drops malformed entries instead of throwing. */
572
+ export function deserializeGuests(raw: unknown): Map<string, { expiresAt: number }> {
573
+ const map = new Map<string, { expiresAt: number }>();
574
+ if (!Array.isArray(raw)) return map;
575
+ for (const entry of raw) {
576
+ if (!entry || typeof entry !== "object") continue;
577
+ const { session, expiresAt } = entry as Record<string, unknown>;
578
+ if (typeof session === "string" && session && typeof expiresAt === "number" && Number.isFinite(expiresAt)) {
579
+ map.set(session, { expiresAt });
580
+ }
581
+ }
582
+ return map;
583
+ }
584
+
585
+ function saveGuestState(): void {
586
+ try {
587
+ mkdirSync(join(homedir(), ".agent-relay"), { recursive: true });
588
+ const tmp = `${GUEST_STATE_FILE}.tmp`;
589
+ writeFileSync(tmp, JSON.stringify(serializeGuests(terminalGuests), null, 2) + "\n");
590
+ renameSync(tmp, GUEST_STATE_FILE);
591
+ } catch {
592
+ // Persistence is best-effort: a write failure must never break guest creation.
593
+ // The periodic reaper's tmux age-based fallback still bounds orphan lifetime.
594
+ }
595
+ }
596
+
597
+ /**
598
+ * Rehydrate the in-memory guest registry from disk so guest TTLs survive an
599
+ * orchestrator restart. Call once at boot before the first reap.
600
+ */
601
+ export function hydrateTerminalGuests(): void {
602
+ if (guestStateHydrated) return;
603
+ guestStateHydrated = true;
604
+ try {
605
+ const persisted = deserializeGuests(JSON.parse(readFileSync(GUEST_STATE_FILE, "utf8")));
606
+ for (const [session, value] of persisted) {
607
+ if (!terminalGuests.has(session)) terminalGuests.set(session, value);
608
+ }
609
+ } catch {
610
+ // No persisted state (first boot or unreadable) — the age-based fallback in
611
+ // reapTerminalGuests still cleans any orphaned guest tmux sessions.
612
+ }
613
+ }
614
+
615
+ /** Live `<prefix>-guest-*` tmux sessions with their creation time (ms). */
616
+ function listGuestTmuxSessions(config: OrchestratorConfig): LiveGuestSession[] {
617
+ const result = Bun.spawnSync(["tmux", "list-sessions", "-F", "#{session_name}\t#{session_created}"], {
618
+ stdin: "ignore",
619
+ stdout: "pipe",
620
+ stderr: "ignore",
621
+ });
622
+ if (result.exitCode !== 0) return []; // no tmux server / no sessions
623
+ const sessions: LiveGuestSession[] = [];
624
+ for (const line of result.stdout.toString().split("\n")) {
625
+ const tab = line.indexOf("\t");
626
+ if (tab < 0) continue;
627
+ const session = line.slice(0, tab);
628
+ if (!isGuestSessionName(session, config)) continue;
629
+ const createdSec = Number(line.slice(tab + 1).trim());
630
+ sessions.push({ session, createdAtMs: Number.isFinite(createdSec) ? createdSec * 1000 : 0 });
631
+ }
632
+ return sessions;
633
+ }
634
+
635
+ /**
636
+ * Decide which live guest sessions to reap. Pure so the TTL policy is testable
637
+ * without tmux or fs:
638
+ * - tracked + past its recorded expiry → reap
639
+ * - untracked (metadata lost across a restart) + older than the fallback TTL → reap
640
+ */
641
+ export function selectExpiredGuests(
642
+ tracked: Map<string, { expiresAt: number }>,
643
+ liveGuests: LiveGuestSession[],
644
+ now: number,
645
+ fallbackTtlMs = GUEST_TTL_MS,
646
+ ): string[] {
647
+ const toReap = new Set<string>();
648
+ for (const { session, createdAtMs } of liveGuests) {
649
+ const record = tracked.get(session);
650
+ if (record) {
651
+ if (record.expiresAt <= now) toReap.add(session);
652
+ } else if (now - createdAtMs >= fallbackTtlMs) {
653
+ toReap.add(session);
654
+ }
655
+ }
656
+ return [...toReap];
657
+ }
658
+
659
+ /**
660
+ * Kill guest tmux sessions whose TTL has elapsed, independent of any new guest
661
+ * creation, and prune tracked entries whose tmux session is already gone. Runs
662
+ * at boot and on a periodic timer (see orchestrator index).
663
+ */
664
+ export function reapTerminalGuests(config: OrchestratorConfig, now = Date.now()): string[] {
665
+ const live = listGuestTmuxSessions(config);
666
+ const liveNames = new Set(live.map((g) => g.session));
667
+ const reaped = selectExpiredGuests(terminalGuests, live, now);
668
+ for (const session of reaped) {
669
+ killTmuxSession(session);
670
+ terminalGuests.delete(session);
671
+ }
672
+ // Drop tracked guests with no live tmux session (manually killed, or reaped
673
+ // above) so the registry can't grow without bound.
674
+ let pruned = false;
675
+ for (const session of [...terminalGuests.keys()]) {
676
+ if (!liveNames.has(session)) {
677
+ terminalGuests.delete(session);
678
+ pruned = true;
679
+ }
680
+ }
681
+ if (reaped.length || pruned) saveGuestState();
682
+ return reaped;
683
+ }
684
+
550
685
  function cleanupExpiredTerminalGuests(): void {
551
686
  const now = Date.now();
687
+ let changed = false;
552
688
  for (const [session, guest] of terminalGuests.entries()) {
553
689
  if (guest.expiresAt > now) continue;
554
690
  killTmuxSession(session);
555
691
  terminalGuests.delete(session);
692
+ changed = true;
556
693
  }
694
+ if (changed) saveGuestState();
557
695
  }
558
696
 
559
697
  function killTmuxSession(session: string): void {
@@ -1088,15 +1226,39 @@ export function terminalInputTokens(data: string): TerminalInputToken[] {
1088
1226
  return tokens;
1089
1227
  }
1090
1228
 
1229
+ // Validation contract shared by the HTTP terminal routes and the websocket terminal
1230
+ // frames (orchestrator/src/api.ts). Both transports MUST enforce the same envelope —
1231
+ // keep these the single source of truth (see #143). Pure: no tmux, safe to unit-test.
1232
+ const TERMINAL_INPUT_MAX = 4096;
1233
+
1234
+ export function validateTerminalInputData(input: unknown): string {
1235
+ if (!input || typeof input !== "object" || Array.isArray(input)) throw new Error("terminal input body must be an object");
1236
+ const data = (input as { data?: unknown }).data;
1237
+ if (typeof data !== "string") throw new Error("terminal input data must be a string");
1238
+ if (data.length > TERMINAL_INPUT_MAX) throw new Error(`terminal input exceeds ${TERMINAL_INPUT_MAX} characters`);
1239
+ return data;
1240
+ }
1241
+
1242
+ export function validateTerminalResize(input: unknown): { cols: number; rows: number } {
1243
+ if (!input || typeof input !== "object" || Array.isArray(input)) throw new Error("resize body must be an object");
1244
+ const cols = (input as { cols?: unknown }).cols;
1245
+ const rows = (input as { rows?: unknown }).rows;
1246
+ // typeof narrows for the bounds comparison; Number.isFinite additionally rejects
1247
+ // NaN/Infinity — without it NaN slips past the bounds check below (every NaN
1248
+ // comparison is false), the exact malformed-resize frame the websocket path used to
1249
+ // forward via Number(frame.cols).
1250
+ if (typeof cols !== "number" || typeof rows !== "number" || !Number.isFinite(cols) || !Number.isFinite(rows)) {
1251
+ throw new Error("cols and rows must be numbers");
1252
+ }
1253
+ if (cols < 10 || cols > 500 || rows < 5 || rows > 200) throw new Error("cols must be 10-500, rows must be 5-200");
1254
+ return { cols: Math.round(cols), rows: Math.round(rows) };
1255
+ }
1256
+
1091
1257
  export function sendTerminalInput(name: string, config: OrchestratorConfig, input: unknown): TerminalInputResult {
1092
1258
  if (!name.startsWith(`${config.tmuxPrefix}-`)) throw new Error("session is not managed by this orchestrator");
1093
1259
  const socketName = tmuxSocketForSession(name);
1094
1260
  if (!tmuxHasSession(name, socketName)) throw new Error("terminal session is not running");
1095
- if (!input || typeof input !== "object" || Array.isArray(input)) throw new Error("terminal input body must be an object");
1096
-
1097
- const data = (input as { data?: unknown }).data;
1098
- if (typeof data !== "string") throw new Error("terminal input data must be a string");
1099
- if (data.length > 4096) throw new Error("terminal input exceeds 4096 characters");
1261
+ const data = validateTerminalInputData(input);
1100
1262
 
1101
1263
  const tokens = terminalInputTokens(data);
1102
1264
  for (const token of tokens) {
@@ -1126,14 +1288,7 @@ export function resizeTerminal(name: string, config: OrchestratorConfig, input:
1126
1288
  if (!name.startsWith(`${config.tmuxPrefix}-`)) throw new Error("session is not managed by this orchestrator");
1127
1289
  const socketName = tmuxSocketForSession(name);
1128
1290
  if (!tmuxHasSession(name, socketName)) throw new Error("terminal session is not running");
1129
- if (!input || typeof input !== "object" || Array.isArray(input)) throw new Error("resize body must be an object");
1130
-
1131
- const cols = (input as { cols?: unknown }).cols;
1132
- const rows = (input as { rows?: unknown }).rows;
1133
- if (typeof cols !== "number" || typeof rows !== "number") throw new Error("cols and rows must be numbers");
1134
- if (cols < 10 || cols > 500 || rows < 5 || rows > 200) throw new Error("cols must be 10-500, rows must be 5-200");
1135
-
1136
- const clamped = { cols: Math.round(cols), rows: Math.round(rows) };
1291
+ const clamped = validateTerminalResize(input);
1137
1292
  const result = Bun.spawnSync(tmuxCommand(socketName, "resize-window", "-t", name, "-x", String(clamped.cols), "-y", String(clamped.rows)), {
1138
1293
  stdin: "ignore",
1139
1294
  stdout: "pipe",
@@ -922,6 +922,9 @@ function mergeRebaseFf(
922
922
  }
923
923
  }
924
924
  const headSha = git(["rev-parse", "HEAD"], worktreePath).stdout;
925
+ // Subject of the landed commit for the relay's branch.landed notice (#239). Best-effort:
926
+ // an empty/failed read just omits it from the message body.
927
+ const landedSubject = git(["log", "-1", "--format=%s", headSha], worktreePath).stdout || undefined;
925
928
 
926
929
  // Advance base to the rebased branch. If base is checked out somewhere, do a
927
930
  // real ff-only merge there so its working tree stays consistent; otherwise
@@ -964,15 +967,15 @@ function mergeRebaseFf(
964
967
  // continues from a buildable state (issue #51). No-op when nothing is stale.
965
968
  const depsRefresh = refreshWorkspaceDeps(repoRoot, worktreePath);
966
969
  const reportDeps = depsRefresh.refreshed || depsRefresh.stale || depsRefresh.error;
967
- return head({ merged: true, status: "active", mergedSha: headSha, worktreeRemoved: false, branch: fresh, newBranch: fresh, branchDeleted: oldDeleted, pushed, ...(reportDeps ? { depsRefresh } : {}), error: undefined });
970
+ return head({ merged: true, status: "active", mergedSha: headSha, subject: landedSubject, worktreeRemoved: false, branch: fresh, newBranch: fresh, branchDeleted: oldDeleted, pushed, ...(reportDeps ? { depsRefresh } : {}), error: undefined });
968
971
  }
969
972
  // Recycle failed — keep the existing branch. Still landed, still active.
970
- return head({ merged: true, status: "active", mergedSha: headSha, worktreeRemoved: false, branchDeleted: false, pushed, error: undefined });
973
+ return head({ merged: true, status: "active", mergedSha: headSha, subject: landedSubject, worktreeRemoved: false, branchDeleted: false, pushed, error: undefined });
971
974
  }
972
975
  const removed = git(["worktree", "remove", "--force", worktreePath], repoRoot);
973
976
  const worktreeRemoved = removed.ok;
974
977
  const branchDeleted = worktreeRemoved ? git(["branch", "-D", branch], repoRoot).ok : false;
975
- return head({ merged: true, status: "merged", mergedSha: headSha, worktreeRemoved, branchDeleted, pushed, error: undefined });
978
+ return head({ merged: true, status: "merged", mergedSha: headSha, subject: landedSubject, worktreeRemoved, branchDeleted, pushed, error: undefined });
976
979
  }
977
980
 
978
981
  async function availableBranch(repoRoot: string, base: string): Promise<string> {