agent-relay-orchestrator 0.118.0 → 0.118.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-relay-orchestrator",
3
- "version": "0.118.0",
3
+ "version": "0.118.1",
4
4
  "description": "Agent Relay orchestrator — manages agent lifecycle across hosts",
5
5
  "type": "module",
6
6
  "bin": {
@@ -17,7 +17,7 @@
17
17
  },
18
18
  "dependencies": {
19
19
  "agent-relay-providers": "0.104.1",
20
- "agent-relay-sdk": "0.2.100",
20
+ "agent-relay-sdk": "0.2.101",
21
21
  "callmux": "0.23.0"
22
22
  },
23
23
  "devDependencies": {
@@ -8,14 +8,20 @@ interface CommandPollerOptions {
8
8
  relay: Pick<RelayClient, "connected" | "pollCommands">;
9
9
  control: CommandPollerControl;
10
10
  log?: (message: string) => void;
11
+ intervalMs?: number;
12
+ errorBackoffMs?: number;
11
13
  }
12
14
 
13
- export function createCommandPoller({ relay, control, log = console.error }: CommandPollerOptions) {
15
+ export function createCommandPoller({ relay, control, log = console.error, intervalMs = 3_000, errorBackoffMs = 3_000 }: CommandPollerOptions) {
14
16
  let inFlight = false;
17
+ let stopped = true;
18
+ let timer: ReturnType<typeof setTimeout> | undefined;
19
+ let lastTickErrored = false;
15
20
 
16
21
  async function tick(): Promise<boolean> {
17
22
  if (!relay.connected || inFlight) return false;
18
23
  inFlight = true;
24
+ lastTickErrored = false;
19
25
  try {
20
26
  const commands = await relay.pollCommands();
21
27
  if (commands.length > 0) {
@@ -27,6 +33,7 @@ export function createCommandPoller({ relay, control, log = console.error }: Com
27
33
  }
28
34
  return true;
29
35
  } catch (err) {
36
+ lastTickErrored = true;
30
37
  log(`[orchestrator] Poll error: ${err}`);
31
38
  return false;
32
39
  } finally {
@@ -34,8 +41,44 @@ export function createCommandPoller({ relay, control, log = console.error }: Com
34
41
  }
35
42
  }
36
43
 
44
+ function schedule(delayMs: number): void {
45
+ if (stopped) return;
46
+ timer = setTimeout(() => {
47
+ timer = undefined;
48
+ void runCycle();
49
+ }, delayMs);
50
+ timer.unref?.();
51
+ }
52
+
53
+ async function runCycle(): Promise<void> {
54
+ let errored = false;
55
+ try {
56
+ await tick();
57
+ errored = lastTickErrored;
58
+ } catch (err) {
59
+ errored = true;
60
+ log(`[orchestrator] Poll loop error: ${err}`);
61
+ } finally {
62
+ if (!stopped) schedule(errored ? errorBackoffMs : intervalMs);
63
+ }
64
+ }
65
+
66
+ function start(): void {
67
+ if (!stopped) return;
68
+ stopped = false;
69
+ schedule(0);
70
+ }
71
+
72
+ function stop(): void {
73
+ stopped = true;
74
+ if (timer) clearTimeout(timer);
75
+ timer = undefined;
76
+ }
77
+
37
78
  return {
38
79
  tick,
80
+ start,
81
+ stop,
39
82
  get inFlight() {
40
83
  return inFlight;
41
84
  },
package/src/control.ts CHANGED
@@ -33,16 +33,10 @@ export function createControlHandler(
33
33
 
34
34
  async function handleSpawn(ctrl: Record<string, any>): Promise<boolean> {
35
35
  const opts = spawnOptionsFromControl(ctrl, config);
36
-
37
- try {
38
- const agent = await spawnAgent(opts, config);
39
- managedAgents.push(agent);
40
- console.error(`[orchestrator] Spawned ${opts.provider} agent: ${agent.tmuxSession}`);
41
- return true;
42
- } catch (err) {
43
- console.error(`[orchestrator] Spawn failed: ${err}`);
44
- return false;
45
- }
36
+ const agent = await spawnAgent(opts, config);
37
+ managedAgents.push(agent);
38
+ console.error(`[orchestrator] Spawned ${opts.provider} agent: ${agent.tmuxSession}`);
39
+ return true;
46
40
  }
47
41
 
48
42
  async function handleShutdown(ctrl: Record<string, any>, restart = false): Promise<Record<string, unknown>> {
@@ -124,7 +118,7 @@ export function createControlHandler(
124
118
  });
125
119
  await relay.updateCommand(command.id, "succeeded", result);
126
120
  } else if (command.type === "workspace.merge") {
127
- const result = mergeWorkspace({
121
+ const result = await mergeWorkspace({
128
122
  id: typeof command.params.workspaceId === "string" ? command.params.workspaceId : undefined,
129
123
  repoRoot: typeof command.params.repoRoot === "string" ? command.params.repoRoot : undefined,
130
124
  worktreePath: typeof command.params.worktreePath === "string" ? command.params.worktreePath : undefined,
@@ -297,6 +291,7 @@ function spawnOptionsFromRecord(source: Record<string, any>, config: Orchestrato
297
291
  automationRunId: typeof source.automationRunId === "string" ? source.automationRunId : undefined,
298
292
  requestedVia: typeof source.requestedVia === "string" ? source.requestedVia : undefined,
299
293
  resumeWorkspace: parseResumeWorkspace(source.resumeWorkspace),
294
+ acquisition: parseProjectAcquisition(source.acquisition),
300
295
  };
301
296
  }
302
297
 
@@ -331,3 +326,22 @@ function parseResumeWorkspace(value: unknown): import("./workspace-probe/types")
331
326
  baseSha: typeof value.baseSha === "string" ? value.baseSha : undefined,
332
327
  };
333
328
  }
329
+
330
+ function parseProjectAcquisition(value: unknown): import("./spawn/types").ProjectAcquisitionManifest | undefined {
331
+ if (!isRecord(value)) return undefined;
332
+ if (value.mode !== "project-root" || value.sync !== "ff-only") return undefined;
333
+ const projectId = typeof value.projectId === "string" ? value.projectId : undefined;
334
+ const rootPath = typeof value.rootPath === "string" ? value.rootPath : undefined;
335
+ const cwd = typeof value.cwd === "string" ? value.cwd : undefined;
336
+ const remoteUrl = typeof value.remoteUrl === "string" ? value.remoteUrl : undefined;
337
+ if (!projectId || !rootPath || !cwd || !remoteUrl) return undefined;
338
+ return {
339
+ mode: "project-root",
340
+ projectId,
341
+ rootPath,
342
+ cwd,
343
+ remoteUrl,
344
+ ref: typeof value.ref === "string" ? value.ref : undefined,
345
+ sync: "ff-only",
346
+ };
347
+ }
package/src/index.ts CHANGED
@@ -60,7 +60,7 @@ const sharedCallmux = new SharedCallmuxSupervisor(config);
60
60
  const POLL_INTERVAL_MS = 3_000;
61
61
  const REGISTER_RETRY_MS = 5_000;
62
62
  const GUEST_REAP_INTERVAL_MS = 60_000;
63
- let pollTimer: Timer | null = null;
63
+ let commandPoller: ReturnType<typeof createCommandPoller> | null = null;
64
64
  let healthCheckTimer: Timer | null = null;
65
65
  let guestReaperTimer: Timer | null = null;
66
66
  let apiServer: { stop(): void; url: string } | null = null;
@@ -124,10 +124,8 @@ async function startup(): Promise<void> {
124
124
  }
125
125
 
126
126
  function startPolling(): void {
127
- const commandPoller = createCommandPoller({ relay, control });
128
- pollTimer = setInterval(async () => {
129
- await commandPoller.tick();
130
- }, POLL_INTERVAL_MS);
127
+ commandPoller = createCommandPoller({ relay, control, intervalMs: POLL_INTERVAL_MS, errorBackoffMs: POLL_INTERVAL_MS });
128
+ commandPoller.start();
131
129
  }
132
130
 
133
131
  async function registerUntilConnected(): Promise<void> {
@@ -208,7 +206,7 @@ async function healthCheck(): Promise<void> {
208
206
 
209
207
  async function shutdown(): Promise<void> {
210
208
  console.error("[orchestrator] Shutting down...");
211
- if (pollTimer) clearInterval(pollTimer);
209
+ commandPoller?.stop();
212
210
  if (healthCheckTimer) clearInterval(healthCheckTimer);
213
211
  if (guestReaperTimer) clearInterval(guestReaperTimer);
214
212
  if (apiServer) apiServer.stop();
@@ -0,0 +1,202 @@
1
+ import { createHash } from "node:crypto";
2
+ import { existsSync, mkdirSync, renameSync, rmSync, statSync, writeFileSync } from "node:fs";
3
+ import { basename, dirname, join, resolve } from "node:path";
4
+ import { errMessage, isPathWithinBase } from "agent-relay-sdk";
5
+ import { git, requireGit } from "../git";
6
+ import type { ProjectAcquisitionManifest } from "./types";
7
+
8
+ export interface ProjectAcquisitionResult {
9
+ applied: boolean;
10
+ action: "cloned" | "synced" | "noop";
11
+ rootPath: string;
12
+ remoteUrl: string;
13
+ ref?: string;
14
+ headSha?: string;
15
+ }
16
+
17
+ const inFlight = new Map<string, Promise<ProjectAcquisitionResult>>();
18
+ const LOCK_TIMEOUT_MS = 5 * 60_000;
19
+ const LOCK_POLL_MS = 100;
20
+
21
+ export async function applyProjectAcquisitionManifest(
22
+ manifest: ProjectAcquisitionManifest | undefined,
23
+ baseDir: string,
24
+ ): Promise<ProjectAcquisitionResult | undefined> {
25
+ if (!manifest) return undefined;
26
+ const rootPath = resolve(manifest.rootPath);
27
+ const prior = inFlight.get(rootPath);
28
+ if (prior) return prior;
29
+ const run = withAcquisitionLock(rootPath, baseDir, () => applyManifestLocked(manifest, baseDir));
30
+ inFlight.set(rootPath, run);
31
+ try {
32
+ return await run;
33
+ } finally {
34
+ if (inFlight.get(rootPath) === run) inFlight.delete(rootPath);
35
+ }
36
+ }
37
+
38
+ async function withAcquisitionLock(rootPath: string, baseDir: string, fn: () => ProjectAcquisitionResult): Promise<ProjectAcquisitionResult> {
39
+ const lockDir = join(resolve(baseDir), ".agent-relay", "locks", `acquire-${hash(rootPath)}.lock`);
40
+ mkdirSync(dirname(lockDir), { recursive: true });
41
+ const started = Date.now();
42
+ for (;;) {
43
+ try {
44
+ mkdirSync(lockDir);
45
+ writeFileSync(join(lockDir, "owner"), `${process.pid}\n${rootPath}\n`);
46
+ break;
47
+ } catch (error) {
48
+ if (Date.now() - started > LOCK_TIMEOUT_MS) {
49
+ throw new Error(`repo acquisition lock timed out for ${rootPath}: ${errMessage(error)}`);
50
+ }
51
+ await new Promise((resolve) => setTimeout(resolve, LOCK_POLL_MS));
52
+ }
53
+ }
54
+ try {
55
+ return fn();
56
+ } finally {
57
+ rmSync(lockDir, { recursive: true, force: true });
58
+ }
59
+ }
60
+
61
+ function applyManifestLocked(manifest: ProjectAcquisitionManifest, baseDir: string): ProjectAcquisitionResult {
62
+ validateManifest(manifest, baseDir);
63
+ const rootPath = resolve(manifest.rootPath);
64
+ const remoteUrl = manifest.remoteUrl.trim();
65
+ let action: ProjectAcquisitionResult["action"] = "noop";
66
+ if (!existsSync(rootPath)) {
67
+ cloneRoot(manifest, baseDir);
68
+ action = "cloned";
69
+ }
70
+ const syncAction = syncRoot(manifest);
71
+ if (syncAction === "synced" && action !== "cloned") action = "synced";
72
+ return {
73
+ applied: true,
74
+ action,
75
+ rootPath,
76
+ remoteUrl,
77
+ ...(manifest.ref ? { ref: manifest.ref } : {}),
78
+ headSha: git(["rev-parse", "HEAD"], rootPath).stdout || undefined,
79
+ };
80
+ }
81
+
82
+ function validateManifest(manifest: ProjectAcquisitionManifest, baseDir: string): void {
83
+ const rootPath = resolve(manifest.rootPath);
84
+ if (!manifest.remoteUrl.trim()) throw new Error("project acquisition remoteUrl is required");
85
+ if (!isPathWithinBase(rootPath, baseDir) || rootPath === resolve(baseDir)) {
86
+ throw new Error(`project acquisition rootPath must be within orchestrator baseDir: ${baseDir}`);
87
+ }
88
+ if (!isPathWithinBase(resolve(manifest.cwd), rootPath)) {
89
+ throw new Error(`project acquisition cwd must be within rootPath: ${manifest.cwd}`);
90
+ }
91
+ }
92
+
93
+ function cloneRoot(manifest: ProjectAcquisitionManifest, baseDir: string): void {
94
+ const rootPath = resolve(manifest.rootPath);
95
+ const parent = dirname(rootPath);
96
+ if (!isPathWithinBase(parent, baseDir)) throw new Error(`project acquisition parent must be within orchestrator baseDir: ${parent}`);
97
+ mkdirSync(parent, { recursive: true });
98
+ const tmp = join(parent, `.${basename(rootPath)}.agent-relay-clone-${process.pid}-${Date.now()}`);
99
+ rmSync(tmp, { recursive: true, force: true });
100
+ const args = ["clone", "--origin", "origin"];
101
+ if (manifest.ref) args.push("--branch", manifest.ref);
102
+ args.push(manifest.remoteUrl.trim(), tmp);
103
+ const cloned = runGit(args, parent);
104
+ if (!cloned.ok) {
105
+ rmSync(tmp, { recursive: true, force: true });
106
+ throw new Error(`git clone failed for ${rootPath}: ${cloned.stderr || cloned.stdout}`);
107
+ }
108
+ try {
109
+ renameSync(tmp, rootPath);
110
+ } catch (error) {
111
+ rmSync(tmp, { recursive: true, force: true });
112
+ throw new Error(`failed to install cloned repo at ${rootPath}: ${errMessage(error)}`);
113
+ }
114
+ }
115
+
116
+ function syncRoot(manifest: ProjectAcquisitionManifest): "synced" | "noop" {
117
+ const rootPath = resolve(manifest.rootPath);
118
+ assertExistingGitRoot(rootPath);
119
+ assertRemote(rootPath, manifest.remoteUrl.trim());
120
+ const status = git(["status", "--porcelain"], rootPath);
121
+ if (!status.ok) throw new Error(`git status failed for ${rootPath}: ${status.stderr}`);
122
+ if (status.stdout.trim()) throw new Error(`project root ${rootPath} has local changes; refusing ff-only sync before spawn`);
123
+ const fetch = git(["fetch", "--prune", "origin"], rootPath);
124
+ if (!fetch.ok) throw new Error(`git fetch failed for ${rootPath}: ${fetch.stderr || fetch.stdout}`);
125
+ const target = checkoutSyncTarget(rootPath, manifest.ref);
126
+ if (!target) return "noop";
127
+ const head = requireGit(["rev-parse", "HEAD"], rootPath);
128
+ const targetHead = requireGit(["rev-parse", target], rootPath);
129
+ if (head === targetHead) return "noop";
130
+ if (!git(["merge-base", "--is-ancestor", "HEAD", target], rootPath).ok) {
131
+ throw new Error(`project root ${rootPath} has diverged from ${target}; refusing non-fast-forward sync`);
132
+ }
133
+ const merged = git(["merge", "--ff-only", target], rootPath);
134
+ if (!merged.ok) throw new Error(`git ff-only sync failed for ${rootPath}: ${merged.stderr || merged.stdout}`);
135
+ return "synced";
136
+ }
137
+
138
+ function assertExistingGitRoot(rootPath: string): void {
139
+ let stat;
140
+ try {
141
+ stat = statSync(rootPath);
142
+ } catch (error) {
143
+ throw new Error(`project root does not exist after acquisition: ${rootPath}: ${errMessage(error)}`);
144
+ }
145
+ if (!stat.isDirectory()) throw new Error(`project root exists but is not a directory: ${rootPath}`);
146
+ const top = git(["rev-parse", "--show-toplevel"], rootPath);
147
+ if (!top.ok || resolve(top.stdout) !== rootPath) throw new Error(`project root exists but is not a git checkout root: ${rootPath}`);
148
+ }
149
+
150
+ function assertRemote(rootPath: string, remoteUrl: string): void {
151
+ const current = git(["remote", "get-url", "origin"], rootPath);
152
+ if (!current.ok || !current.stdout) throw new Error(`project root ${rootPath} has no origin remote`);
153
+ if (current.stdout.trim() !== remoteUrl) {
154
+ throw new Error(`project root ${rootPath} origin mismatch: expected ${remoteUrl}, found ${current.stdout.trim()}`);
155
+ }
156
+ }
157
+
158
+ function checkoutSyncTarget(rootPath: string, ref: string | undefined): string | undefined {
159
+ if (ref?.trim()) {
160
+ const name = ref.trim();
161
+ const remoteBranch = `refs/remotes/origin/${name}`;
162
+ if (git(["show-ref", "--verify", "--quiet", remoteBranch], rootPath).ok) {
163
+ if (git(["show-ref", "--verify", "--quiet", `refs/heads/${name}`], rootPath).ok) {
164
+ const checked = git(["checkout", name], rootPath);
165
+ if (!checked.ok) throw new Error(`git checkout ${name} failed for ${rootPath}: ${checked.stderr || checked.stdout}`);
166
+ } else {
167
+ const checked = git(["checkout", "-B", name, `origin/${name}`], rootPath);
168
+ if (!checked.ok) throw new Error(`git checkout ${name} failed for ${rootPath}: ${checked.stderr || checked.stdout}`);
169
+ }
170
+ return `origin/${name}`;
171
+ }
172
+ if (git(["rev-parse", "--verify", name], rootPath).ok) {
173
+ const checked = git(["checkout", name], rootPath);
174
+ if (!checked.ok) throw new Error(`git checkout ${name} failed for ${rootPath}: ${checked.stderr || checked.stdout}`);
175
+ return undefined;
176
+ }
177
+ throw new Error(`project acquisition ref "${name}" not found on origin for ${rootPath}`);
178
+ }
179
+ const upstream = git(["rev-parse", "--abbrev-ref", "--symbolic-full-name", "@{u}"], rootPath);
180
+ if (upstream.ok && upstream.stdout) return upstream.stdout;
181
+ const originHead = git(["symbolic-ref", "--short", "refs/remotes/origin/HEAD"], rootPath);
182
+ if (originHead.ok && originHead.stdout) return originHead.stdout;
183
+ throw new Error(`project root ${rootPath} has no upstream or origin/HEAD for ff-only sync`);
184
+ }
185
+
186
+ function runGit(args: string[], cwd: string): { ok: boolean; stdout: string; stderr: string } {
187
+ const proc = Bun.spawnSync(["git", ...args], {
188
+ cwd,
189
+ stdin: "ignore",
190
+ stdout: "pipe",
191
+ stderr: "pipe",
192
+ });
193
+ return {
194
+ ok: proc.exitCode === 0,
195
+ stdout: proc.stdout.toString().trim(),
196
+ stderr: proc.stderr.toString().trim(),
197
+ };
198
+ }
199
+
200
+ function hash(value: string): string {
201
+ return createHash("sha1").update(resolve(value)).digest("hex").slice(0, 16);
202
+ }
@@ -1,4 +1,5 @@
1
1
  export * from "./command";
2
+ export * from "./acquisition";
2
3
  export * from "./guests";
3
4
  export * from "./log-utils";
4
5
  export * from "./runtime";
@@ -2,6 +2,7 @@ import { closeSync, existsSync, openSync, rmSync } from "node:fs";
2
2
  import type { OrchestratorConfig } from "../config";
3
3
  import { resolveSpawnWorkspace, workspacesRoot } from "../workspace-probe";
4
4
  import type { ManagedAgentReport } from "../relay";
5
+ import { applyProjectAcquisitionManifest } from "./acquisition";
5
6
  import { buildEnv, buildRunnerCommand, defaultSpawnLabel, isWithinBaseDir, sessionName } from "./command";
6
7
  import { addSessionRecord, currentSessionPid, findSessionRecord, ensureLogDir, ensureRunnerInfoDir, logFilePath, runnerInfoPath, sessionRecordLiveness, sessionReportFields } from "./runtime";
7
8
  import { managedAgentId } from "./sessions";
@@ -10,6 +11,7 @@ import type { SpawnOptions } from "./types";
10
11
 
11
12
  interface SpawnAgentDeps {
12
13
  resolveSpawnWorkspace: typeof resolveSpawnWorkspace;
14
+ applyProjectAcquisitionManifest: typeof applyProjectAcquisitionManifest;
13
15
  spawnRunner: typeof spawnRunner;
14
16
  addSessionRecord: typeof addSessionRecord;
15
17
  findSessionRecord: typeof findSessionRecord;
@@ -24,6 +26,7 @@ interface SpawnAgentDeps {
24
26
 
25
27
  const defaultSpawnAgentDeps: SpawnAgentDeps = {
26
28
  resolveSpawnWorkspace,
29
+ applyProjectAcquisitionManifest,
27
30
  spawnRunner,
28
31
  addSessionRecord,
29
32
  findSessionRecord,
@@ -46,12 +49,13 @@ export async function spawnAgent(
46
49
  const agentId = opts.agentId || managedAgentId(config, opts.provider, label);
47
50
  const name = sessionName(config, opts.provider, label, opts.spawnRequestId ?? agentId);
48
51
 
49
- if (!existsSync(opts.cwd)) {
50
- throw new Error(`cwd does not exist: ${opts.cwd}`);
51
- }
52
52
  if (!isWithinBaseDir(opts.cwd, config.baseDir)) {
53
53
  throw new Error(`cwd must be within base directory: ${config.baseDir}`);
54
54
  }
55
+ await d.applyProjectAcquisitionManifest(opts.acquisition, config.baseDir);
56
+ if (!existsSync(opts.cwd)) {
57
+ throw new Error(`cwd does not exist: ${opts.cwd}`);
58
+ }
55
59
  const existing = existingSpawnSession(opts, d);
56
60
  if (existing) return existing;
57
61
 
@@ -2,6 +2,16 @@ import type { OrchestratorConfig } from "../config";
2
2
  import type { AgentLifecycle, SpawnProvider, WorkspaceMetadata, WorkspaceMode } from "agent-relay-sdk";
3
3
  import type { ResumeWorkspaceTarget } from "../workspace-probe/types";
4
4
 
5
+ export interface ProjectAcquisitionManifest {
6
+ mode: "project-root";
7
+ projectId: string;
8
+ rootPath: string;
9
+ cwd: string;
10
+ remoteUrl: string;
11
+ ref?: string;
12
+ sync: "ff-only";
13
+ }
14
+
5
15
  export interface SpawnOptions {
6
16
  provider: SpawnProvider;
7
17
  cwd: string;
@@ -33,6 +43,8 @@ export interface SpawnOptions {
33
43
  requestedVia?: string;
34
44
  /** #635 — attach to or branch off an existing worktree instead of creating a fresh one. */
35
45
  resumeWorkspace?: ResumeWorkspaceTarget;
46
+ /** #410 — lazy clone/sync manifest, applied on the host before worktree prep. */
47
+ acquisition?: ProjectAcquisitionManifest;
36
48
  }
37
49
 
38
50
  export interface SessionInfo {
@@ -13,6 +13,8 @@ import { loadRepoLandGates } from "agent-relay-sdk/land-gates";
13
13
  * are meant to be a FAST high-signal subset, not the full suite — keep the ceiling
14
14
  * generous but bounded so a hung gate can't wedge the per-repo merge lease forever. */
15
15
  const DEFAULT_GATE_TIMEOUT_MS = 5 * 60 * 1000;
16
+ const TIMEOUT_KILL_GRACE_MS = 1_000;
17
+ const OUTPUT_CANCEL_GRACE_MS = 50;
16
18
  /** Cap on the full output streamed to the relay artifact (the notification only ever
17
19
  * carries the tail). Errors usually surface at the END, so we keep the tail on overflow. */
18
20
  const MAX_FULL_OUTPUT_BYTES = 256 * 1024;
@@ -31,26 +33,88 @@ function combineOutput(stdout: string, stderr: string): string {
31
33
  return stdout || stderr;
32
34
  }
33
35
 
36
+ function sleep(ms: number): Promise<void> {
37
+ return new Promise((resolve) => setTimeout(resolve, ms));
38
+ }
39
+
40
+ interface OutputCapture {
41
+ done: Promise<void>;
42
+ text(): string;
43
+ cancel(): void;
44
+ }
45
+
46
+ function captureProcessOutput(pipe: unknown): OutputCapture {
47
+ if (!(pipe instanceof ReadableStream)) {
48
+ return { done: Promise.resolve(), text: () => "", cancel: () => {} };
49
+ }
50
+
51
+ const reader = pipe.getReader();
52
+ const decoder = new TextDecoder();
53
+ let output = "";
54
+ let canceled = false;
55
+ const done = (async () => {
56
+ try {
57
+ while (!canceled) {
58
+ const chunk = await reader.read();
59
+ if (chunk.done) break;
60
+ output += decoder.decode(chunk.value, { stream: true });
61
+ }
62
+ output += decoder.decode();
63
+ } catch {
64
+ // A timeout cancels the reader intentionally so pipe EOF cannot hold up the gate result.
65
+ } finally {
66
+ try { reader.releaseLock(); } catch {}
67
+ }
68
+ })();
69
+
70
+ return {
71
+ done,
72
+ text: () => output,
73
+ cancel: () => {
74
+ canceled = true;
75
+ void reader.cancel().catch(() => {});
76
+ },
77
+ };
78
+ }
79
+
80
+ function signalGateProcessGroup(proc: ReturnType<typeof Bun.spawn>, signal: NodeJS.Signals): void {
81
+ if (typeof proc.pid === "number" && proc.pid > 0) {
82
+ try {
83
+ process.kill(-proc.pid, signal);
84
+ return;
85
+ } catch (err) {
86
+ if ((err as { code?: string }).code !== "ESRCH") {
87
+ try { proc.kill(signal); } catch {}
88
+ }
89
+ return;
90
+ }
91
+ }
92
+ try { proc.kill(signal); } catch {}
93
+ }
94
+
34
95
  /** Run a single gate and capture its outcome. Never throws — a spawn failure (e.g.
35
96
  * the command can't launch) is reported as a non-passing result so the caller can
36
97
  * decide block-vs-warn from the gate's `optional` flag. */
37
- export function runOneLandGate(worktreePath: string, gate: LandGate): LandGateRunResult {
98
+ export async function runOneLandGate(worktreePath: string, gate: LandGate): Promise<LandGateRunResult> {
38
99
  const cwd = gate.cwd ? resolve(worktreePath, gate.cwd) : worktreePath;
39
100
  const timeoutMs = gate.timeoutMs ?? DEFAULT_GATE_TIMEOUT_MS;
40
101
  const started = Date.now();
41
102
  const base = { name: gate.name, command: gate.command, optional: gate.optional === true } as const;
42
103
 
43
- let proc: ReturnType<typeof Bun.spawnSync>;
104
+ let proc: ReturnType<typeof Bun.spawn>;
105
+ let timedOut = false;
106
+ let timeout: ReturnType<typeof setTimeout> | undefined;
107
+ let killTimeout: ReturnType<typeof setTimeout> | undefined;
44
108
  try {
45
109
  // A login shell so PATH (bun, node, project bins) resolves like the worker's own
46
110
  // environment; `env: process.env` makes runtime env mutations visible to the child.
47
- proc = Bun.spawnSync(["bash", "-lc", gate.command], {
111
+ proc = Bun.spawn(["bash", "-lc", gate.command], {
48
112
  cwd,
49
113
  env: process.env,
50
114
  stdin: "ignore",
51
115
  stdout: "pipe",
52
116
  stderr: "pipe",
53
- timeout: timeoutMs,
117
+ detached: true,
54
118
  });
55
119
  } catch (err) {
56
120
  const durationMs = Date.now() - started;
@@ -58,12 +122,47 @@ export function runOneLandGate(worktreePath: string, gate: LandGate): LandGateRu
58
122
  return { ...base, passed: false, exitCode: null, timedOut: false, durationMs, outputTail: output, output };
59
123
  }
60
124
 
125
+ const stdoutCapture = captureProcessOutput(proc.stdout);
126
+ const stderrCapture = captureProcessOutput(proc.stderr);
127
+ let timeoutTriggeredResolve: () => void = () => {};
128
+ const timeoutTriggered = new Promise<void>((resolve) => { timeoutTriggeredResolve = resolve; });
129
+ timeout = setTimeout(() => {
130
+ timedOut = true;
131
+ timeoutTriggeredResolve();
132
+ signalGateProcessGroup(proc, "SIGTERM");
133
+ stdoutCapture.cancel();
134
+ stderrCapture.cancel();
135
+ killTimeout = setTimeout(() => {
136
+ signalGateProcessGroup(proc, "SIGKILL");
137
+ stdoutCapture.cancel();
138
+ stderrCapture.cancel();
139
+ }, TIMEOUT_KILL_GRACE_MS);
140
+ killTimeout.unref?.();
141
+ }, timeoutMs);
142
+ timeout.unref?.();
143
+
144
+ const exitCodeRaw = await Promise.race([
145
+ proc.exited,
146
+ timeoutTriggered.then(async () => {
147
+ await Promise.race([proc.exited.then(() => undefined), sleep(TIMEOUT_KILL_GRACE_MS + OUTPUT_CANCEL_GRACE_MS)]);
148
+ return null;
149
+ }),
150
+ ]).finally(() => {
151
+ if (timeout) clearTimeout(timeout);
152
+ if (killTimeout) clearTimeout(killTimeout);
153
+ });
154
+ if (timedOut) {
155
+ stdoutCapture.cancel();
156
+ stderrCapture.cancel();
157
+ await Promise.race([Promise.allSettled([stdoutCapture.done, stderrCapture.done]), sleep(OUTPUT_CANCEL_GRACE_MS)]);
158
+ } else {
159
+ await Promise.all([stdoutCapture.done, stderrCapture.done]);
160
+ }
61
161
  const durationMs = Date.now() - started;
62
- const stdout = proc.stdout ? proc.stdout.toString() : "";
63
- const stderr = proc.stderr ? proc.stderr.toString() : "";
162
+ const stdout = stdoutCapture.text();
163
+ const stderr = stderrCapture.text();
64
164
  let combined = combineOutput(stdout, stderr);
65
- const exitCode = typeof proc.exitCode === "number" ? proc.exitCode : null;
66
- const timedOut = proc.exitedDueToTimeout === true;
165
+ const exitCode = timedOut ? null : typeof exitCodeRaw === "number" ? exitCodeRaw : null;
67
166
  const passed = exitCode === 0;
68
167
  if (timedOut) combined = `${combined}\n[land-gate] timed out after ${timeoutMs}ms`.trimStart();
69
168
  if (!combined) combined = passed ? "(gate produced no output)" : "(gate produced no output)";
@@ -100,7 +199,7 @@ export interface LandGatesResult {
100
199
  * (surfaced as a synthetic required gate) rather than an unhandled throw that would
101
200
  * crash the merge command — the worker fixes the config and re-lands.
102
201
  */
103
- export function runLandGates(worktreePath: string): LandGatesResult {
202
+ export async function runLandGates(worktreePath: string): Promise<LandGatesResult> {
104
203
  const warnings: LandGateRunResult[] = [];
105
204
  let gates: LandGate[];
106
205
  try {
@@ -116,7 +215,7 @@ export function runLandGates(worktreePath: string): LandGatesResult {
116
215
  if (gates.length === 0) return { ran: 0, warnings };
117
216
 
118
217
  for (const gate of gates) {
119
- const result = runOneLandGate(worktreePath, gate);
218
+ const result = await runOneLandGate(worktreePath, gate);
120
219
  if (result.passed) continue;
121
220
  if (result.optional) { warnings.push(result); continue; }
122
221
  // First required failure blocks the land — don't run the rest (the worker fixes
@@ -346,7 +346,7 @@ function validPreviewStrategy(strategy: string | null): "pr" | "rebase-ff" | "au
346
346
  * Refuses on a dirty worktree, predicted conflicts, or nothing to merge. Never
347
347
  * destroys work on uncertainty.
348
348
  */
349
- export function mergeWorkspace(input: WorkspaceMergeInput): WorkspaceMergeResult {
349
+ export async function mergeWorkspace(input: WorkspaceMergeInput): Promise<WorkspaceMergeResult> {
350
350
  if (!input.worktreePath) return { strategy: "rebase-ff", merged: false, status: "review_requested", error: "worktreePath required", workspaceId: input.id };
351
351
  const worktreePath = resolve(input.worktreePath);
352
352
  const repoRoot = input.repoRoot ? resolve(input.repoRoot) : worktreePath;
@@ -391,7 +391,7 @@ export function mergeWorkspace(input: WorkspaceMergeInput): WorkspaceMergeResult
391
391
  if (preview.conflict) return head({ conflict: true, status: "conflict", error: "merge would conflict with base" });
392
392
 
393
393
  if (strategy === "pr") return mergePr(input, worktreePath, branch, preview, head);
394
- return mergeRebaseFf(input, worktreePath, repoRoot, branch, preview, head);
394
+ return await mergeRebaseFf(input, worktreePath, repoRoot, branch, preview, head);
395
395
  }
396
396
 
397
397
  /**
@@ -576,15 +576,15 @@ function recordNoFfMerge(
576
576
  * Returns the gate outcome, or an `abort` describing a merge result the caller must return early
577
577
  * (a real merge conflict computing the integrated tree, or a failure materializing it).
578
578
  */
579
- function runLandGatesOnIntegratedTree(
579
+ async function runLandGatesOnIntegratedTree(
580
580
  repoRoot: string,
581
581
  worktreePath: string,
582
582
  behind: number,
583
583
  integrationBaseSha: string,
584
584
  headSha: string,
585
585
  mergeMessage: string,
586
- ): { gates: LandGatesResult } | { abort: { conflict?: boolean; error: string } } {
587
- if (behind === 0) return { gates: runLandGates(worktreePath) };
586
+ ): Promise<{ gates: LandGatesResult } | { abort: { conflict?: boolean; error: string } }> {
587
+ if (behind === 0) return { gates: await runLandGates(worktreePath) };
588
588
 
589
589
  const synth = synthesizeNoFfMerge(repoRoot, integrationBaseSha, headSha, mergeMessage);
590
590
  if (!synth.ok) return { abort: { conflict: synth.conflict, error: synth.error } };
@@ -600,7 +600,7 @@ function runLandGatesOnIntegratedTree(
600
600
  return { abort: { error: add.stderr || "failed to materialize integrated tree for land gates" } };
601
601
  }
602
602
  try {
603
- return { gates: runLandGates(tmpWorktree) };
603
+ return { gates: await runLandGates(tmpWorktree) };
604
604
  } finally {
605
605
  git(["worktree", "remove", "--force", tmpWorktree], repoRoot);
606
606
  rmSync(tmpParent, { recursive: true, force: true });
@@ -643,14 +643,14 @@ function syncLocalBaseToUpstream(
643
643
  return { ok: true, baseSync };
644
644
  }
645
645
 
646
- function mergeRebaseFf(
646
+ async function mergeRebaseFf(
647
647
  input: WorkspaceMergeInput,
648
648
  worktreePath: string,
649
649
  repoRoot: string,
650
650
  branch: string | undefined,
651
651
  preview: WorkspaceMergePreview,
652
652
  head: (field: Partial<WorkspaceMergeResult>) => WorkspaceMergeResult,
653
- ): WorkspaceMergeResult {
653
+ ): Promise<WorkspaceMergeResult> {
654
654
  const base = preview.baseRef;
655
655
  if (!base) return head({ status: "review_requested", error: "no base branch to merge into" });
656
656
  if (!branch) return head({ status: "review_requested", error: "cannot determine agent branch" });
@@ -724,7 +724,7 @@ function mergeRebaseFf(
724
724
  // status:"review_requested" carrying `gateFailure`, NOT "conflict": gate failures bounce back to
725
725
  // the worker, never the merge-conflict/steward path. Optional-gate failures ride along as
726
726
  // `gateWarnings` on the successful land below.
727
- const gateRun = runLandGatesOnIntegratedTree(repoRoot, worktreePath, behind, integrationBaseSha, headSha, landMergeMessage(branch, landedSubject));
727
+ const gateRun = await runLandGatesOnIntegratedTree(repoRoot, worktreePath, behind, integrationBaseSha, headSha, landMergeMessage(branch, landedSubject));
728
728
  if ("abort" in gateRun) {
729
729
  return gateRun.abort.conflict
730
730
  ? head({ conflict: true, status: "conflict", error: gateRun.abort.error })