@cat-factory/executor-harness 1.31.8 → 1.31.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import { spawn } from 'node:child_process';
2
2
  import { mkdtemp, rm, writeFile } from 'node:fs/promises';
3
3
  import { tmpdir } from 'node:os';
4
4
  import { join } from 'node:path';
5
- import { killChildProcess } from './process.js';
5
+ import { killChildProcess, spawnDetached } from './process.js';
6
6
  import { redact, secretsToRedact } from './redact.js';
7
7
  function isObject(value) {
8
8
  return typeof value === 'object' && value !== null;
@@ -28,6 +28,8 @@ function streamCli(command, args, prompt, opts, env, secrets, onEvent) {
28
28
  cwd: opts.cwd,
29
29
  env: { ...process.env, ...env },
30
30
  stdio: ['pipe', 'pipe', 'pipe'],
31
+ // Own process group (POSIX) so killChildProcess reaps the CLI's grandchildren too.
32
+ detached: spawnDetached,
31
33
  });
32
34
  child.stdin.on('error', () => { });
33
35
  child.stdin.end(prompt);
@@ -59,6 +59,9 @@ function guardProcess(child, label, logger) {
59
59
  export async function standUpFrontend(dir, infra, signal, onActivity, logger = log) {
60
60
  const startedAt = Date.now();
61
61
  const processes = [];
62
+ // The frontend app's directory: the checkout root, or a monorepo subdirectory when the config
63
+ // named one. install/build/serve run here and `outputDir`/`mockMappingsPath` are relative to it.
64
+ const workDir = infra.directory ? join(dir, infra.directory) : dir;
62
65
  // Keep the run's inactivity watchdog fed while the (activity-silent) install → build → serve
63
66
  // stand-up runs. A real frontend's `install` + `build` can exceed the harness inactivity
64
67
  // window (default 10 min, JOB_INACTIVITY_MS) — and unlike the Pi phase this stand-up emits
@@ -96,7 +99,7 @@ export async function standUpFrontend(dir, infra, signal, onActivity, logger = l
96
99
  const install = installCommand(infra);
97
100
  logger.info('agent(frontend): installing', { command: install.join(' ') });
98
101
  const installed = await exec(install[0], install.slice(1), {
99
- cwd: dir,
102
+ cwd: workDir,
100
103
  signal,
101
104
  timeout: 8 * 60_000,
102
105
  maxBuffer: 16 * 1024 * 1024,
@@ -107,7 +110,7 @@ export async function standUpFrontend(dir, infra, signal, onActivity, logger = l
107
110
  const buildScript = infra.buildScript ?? DEFAULTS.buildScript;
108
111
  logger.info('agent(frontend): building', { buildScript });
109
112
  const built = await exec(pm, ['run', buildScript], {
110
- cwd: dir,
113
+ cwd: workDir,
111
114
  signal,
112
115
  timeout: 12 * 60_000,
113
116
  maxBuffer: 16 * 1024 * 1024,
@@ -128,7 +131,7 @@ export async function standUpFrontend(dir, infra, signal, onActivity, logger = l
128
131
  'is only served in static mode; relying on the forwarded env instead', { outputDir });
129
132
  }
130
133
  const shim = `window.env = ${JSON.stringify(infra.env)};\n`;
131
- await writeFile(join(dir, outputDir, 'env.js'), shim, 'utf8').catch((err) => {
134
+ await writeFile(join(workDir, outputDir, 'env.js'), shim, 'utf8').catch((err) => {
132
135
  // Best-effort, but no longer silent: a missing/renamed output dir would drop the shim
133
136
  // and the app would read no URLs, so surface it in the log for diagnosis.
134
137
  logger.warn('agent(frontend): could not write runtime env shim', {
@@ -139,9 +142,9 @@ export async function standUpFrontend(dir, infra, signal, onActivity, logger = l
139
142
  }
140
143
  // 3) WireMock for the mocked upstreams. Seeded from the FE repo's mappings dir when present;
141
144
  // otherwise it still binds the port (unmatched requests 404, gentler than ECONNREFUSED).
142
- processes.push(await startWireMock(dir, infra, wiremockPort, logger));
145
+ processes.push(await startWireMock(workDir, infra, wiremockPort, logger));
143
146
  // 4) Serve the built app.
144
- processes.push(startServe(dir, infra, servePort, outputDir, logger));
147
+ processes.push(startServe(workDir, infra, servePort, outputDir, logger));
145
148
  // 5) Health-check the served app AND WireMock before handing off, concurrently (WireMock is
146
149
  // a JVM that cold-starts slower than the static server). A dead WireMock would otherwise let
147
150
  // the agent start and hit ECONNREFUSED on the app's first mocked-upstream call.
package/dist/job.js CHANGED
@@ -81,7 +81,7 @@ function parseHarnessAuth(o) {
81
81
  * `..` segment) — the agent's cwd is built from this, so a hostile value must never
82
82
  * point outside the cloned repo.
83
83
  */
84
- function sanitizeServiceDirectory(value) {
84
+ function sanitizeServiceDirectory(value, field = 'repo.serviceDirectory') {
85
85
  if (typeof value !== 'string')
86
86
  return undefined;
87
87
  const normalized = value
@@ -94,7 +94,7 @@ function sanitizeServiceDirectory(value) {
94
94
  if (segments.length === 0)
95
95
  return undefined;
96
96
  if (segments.some((s) => s === '..')) {
97
- throw new Error("Invalid job: 'repo.serviceDirectory' must be a path inside the repo");
97
+ throw new Error(`Invalid job: '${field}' must be a path inside the repo`);
98
98
  }
99
99
  return segments.join('/');
100
100
  }
@@ -301,8 +301,13 @@ function parseFrontendInfraSpec(o) {
301
301
  }
302
302
  const servePort = port(o.servePort);
303
303
  const wiremockPort = port(o.wiremockPort);
304
+ // The app's monorepo subdirectory becomes the install/build/serve cwd, so it goes through the
305
+ // same escape-guard as `repo.serviceDirectory` — strip slashes and reject any `..` segment so a
306
+ // hostile value can't point the stand-up outside the cloned repo.
307
+ const directory = sanitizeServiceDirectory(o.directory, 'frontend.directory');
304
308
  return {
305
309
  kind: 'frontend',
310
+ ...(directory ? { directory } : {}),
306
311
  ...(packageManager ? { packageManager } : {}),
307
312
  ...(typeof o.install === 'string' && o.install ? { install: o.install } : {}),
308
313
  ...(typeof o.buildScript === 'string' && o.buildScript ? { buildScript: o.buildScript } : {}),
package/dist/pi.js CHANGED
@@ -2,7 +2,7 @@ import { spawn } from 'node:child_process';
2
2
  import { appendFile, mkdir, writeFile } from 'node:fs/promises';
3
3
  import { homedir } from 'node:os';
4
4
  import { dirname, join } from 'node:path';
5
- import { killChildProcess } from './process.js';
5
+ import { killChildProcess, spawnDetached } from './process.js';
6
6
  import { pathExists } from './fs-utils.js';
7
7
  import { redactSecrets } from './redact.js';
8
8
  import { log } from './logger.js';
@@ -573,6 +573,8 @@ export function runPi(opts) {
573
573
  // stdin is piped (not 'ignore') so the prompt is delivered out-of-band
574
574
  // rather than on argv — see the function doc for the injection rationale.
575
575
  stdio: ['pipe', 'pipe', 'pipe'],
576
+ // Own process group (POSIX) so killChildProcess reaps Pi's grandchildren too.
577
+ detached: spawnDetached,
576
578
  });
577
579
  // Hand Pi the prompt over stdin, then close it so print mode sees EOF and
578
580
  // runs. Ignore stdin errors (e.g. EPIPE if Pi exits before reading): the
package/dist/process.js CHANGED
@@ -6,20 +6,48 @@ import { log } from './logger.js';
6
6
  // How long to wait after SIGTERM before escalating to SIGKILL.
7
7
  const KILL_GRACE_MS = 5_000;
8
8
  /**
9
- * Terminate a child process: SIGTERM first, then SIGKILL after a grace period if it
10
- * hasn't exited (ignored an ordinary terminate). The escalation timer is `unref()`d
11
- * so it never by itself keeps the event loop alive. Safe to call more than once.
9
+ * Signal a child and, when it was spawned detached (a process-group leader on POSIX — see
10
+ * `spawnDetached`), the whole group with it. The agent CLIs (`claude`/`codex`/Pi) spawn their
11
+ * own grandchildren (a shell tool, a build, their own git); a plain `child.kill()` reaps only
12
+ * the direct child and those grandchildren reparent to init and keep running unsupervised.
13
+ * `process.kill(-pid)` targets the group instead. Falls back to a direct kill on Windows (no
14
+ * POSIX process groups) or when the group send fails (already reaped, or the child wasn't
15
+ * spawned detached so no group of its own exists).
16
+ */
17
+ function signalTree(child, signal) {
18
+ if (child.pid !== undefined && process.platform !== 'win32') {
19
+ try {
20
+ process.kill(-child.pid, signal);
21
+ return;
22
+ }
23
+ catch {
24
+ // Fall through to the direct kill below.
25
+ }
26
+ }
27
+ child.kill(signal);
28
+ }
29
+ /**
30
+ * Terminate a child process (and its group — see {@link signalTree}): SIGTERM first, then
31
+ * SIGKILL after a grace period if it hasn't exited (ignored an ordinary terminate). The
32
+ * escalation timer is `unref()`d so it never by itself keeps the event loop alive. Safe to
33
+ * call more than once.
12
34
  *
13
35
  * An actual escalation to SIGKILL is logged at warn level: a process that ignores
14
36
  * SIGTERM and has to be force-killed is a signal worth seeing (a wedged Pi/CLI), and
15
37
  * was previously invisible. Pass a child logger to carry the run's `jobId`.
16
38
  */
17
39
  export function killChildProcess(child, graceMs = KILL_GRACE_MS, logger = log) {
18
- child.kill('SIGTERM');
40
+ signalTree(child, 'SIGTERM');
19
41
  setTimeout(() => {
20
42
  if (child.exitCode === null && child.signalCode === null) {
21
43
  logger.warn('killChildProcess: process ignored SIGTERM, escalating to SIGKILL', { graceMs });
22
- child.kill('SIGKILL');
44
+ signalTree(child, 'SIGKILL');
23
45
  }
24
46
  }, graceMs).unref();
25
47
  }
48
+ /**
49
+ * Whether a spawned agent CLI should be its own process-group leader so {@link killChildProcess}
50
+ * can reap the whole tree (its grandchildren) on abort. POSIX only; Windows has no process
51
+ * groups (and `detached` there spawns a new console we don't want), so it stays false.
52
+ */
53
+ export const spawnDetached = process.platform !== 'win32';
package/dist/runner.js CHANGED
@@ -20,7 +20,7 @@ export function loadRunnerLimits(env = process.env) {
20
20
  };
21
21
  }
22
22
  function toView(entry) {
23
- const { promise: _promise, spanBuffer: _spanBuffer, followUpBuffer: _followUpBuffer, ...view } = entry;
23
+ const { promise: _promise, spanBuffer: _spanBuffer, followUpBuffer: _followUpBuffer, abort: _abort, ...view } = entry;
24
24
  return { ...view };
25
25
  }
26
26
  /**
@@ -90,6 +90,35 @@ export class JobRegistry {
90
90
  }
91
91
  return view;
92
92
  }
93
+ /**
94
+ * Abort every RUNNING job (fires each run's abort signal, which SIGTERM→SIGKILLs its
95
+ * CLI/git children via `killChildProcess`). The graceful-shutdown hook: a harness dying
96
+ * to SIGTERM must not orphan a live agent subprocess — reparented, it would keep working
97
+ * unsupervised (and, in native local mode, on the developer's own login). Returns the
98
+ * number of jobs aborted.
99
+ */
100
+ abortAll(reason) {
101
+ let aborted = 0;
102
+ for (const entry of this.jobs.values()) {
103
+ if (entry.state === 'running' && entry.abort) {
104
+ entry.abort(reason);
105
+ aborted += 1;
106
+ }
107
+ }
108
+ return aborted;
109
+ }
110
+ /**
111
+ * How many jobs are still RUNNING. Graceful shutdown polls this so it can exit the moment the
112
+ * aborted jobs have actually settled (the common case: the CLI honours SIGTERM in ms) instead
113
+ * of waiting out a fixed kill-grace window.
114
+ */
115
+ runningCount() {
116
+ let running = 0;
117
+ for (const entry of this.jobs.values())
118
+ if (entry.state === 'running')
119
+ running += 1;
120
+ return running;
121
+ }
93
122
  async drive(entry, job) {
94
123
  const controller = new AbortController();
95
124
  let killReason;
@@ -130,6 +159,8 @@ export class JobRegistry {
130
159
  resetInactivity();
131
160
  };
132
161
  resetInactivity();
162
+ // Expose the abort for shutdown (see abortAll); cleared in `finally` once the job settles.
163
+ entry.abort = (reason) => controller.abort(new Error(reason));
133
164
  jobLog.info('job started', {});
134
165
  try {
135
166
  const result = await this.run(job, {
@@ -182,6 +213,7 @@ export class JobRegistry {
182
213
  finally {
183
214
  clearTimeout(inactivity);
184
215
  clearTimeout(cap);
216
+ entry.abort = undefined;
185
217
  entry.heartbeatAt = Date.now();
186
218
  }
187
219
  }
package/dist/server.js CHANGED
@@ -13,6 +13,11 @@ import { log } from './logger.js';
13
13
  // in the request body and live only for the duration of the job in an ephemeral
14
14
  // workspace.
15
15
  const PORT = Number(process.env.PORT ?? 8080);
16
+ // Optional bind address. Default (unset) binds all interfaces — a container needs that for
17
+ // its published port. The native local transport runs the harness UNSANDBOXED on the
18
+ // developer's host and only ever connects over loopback, so it sets 127.0.0.1 to keep the
19
+ // agent-spawning API off the LAN.
20
+ const BIND_HOST = process.env.HARNESS_BIND_HOST?.trim() || undefined;
16
21
  // Optional inbound auth. When HARNESS_SHARED_SECRET is set, every non-health
17
22
  // request must present a matching `x-harness-secret` header (constant-time
18
23
  // compared). When it is unset the harness behaves as before (open), so local/dev
@@ -128,8 +133,35 @@ const server = createServer((req, res) => {
128
133
  });
129
134
  // Only auto-listen when run as the entry point (tests import handleRun directly).
130
135
  if (process.env.NODE_ENV !== 'test') {
131
- server.listen(PORT, () => {
132
- console.log(`executor-harness listening on :${PORT}`);
136
+ server.listen(PORT, BIND_HOST, () => {
137
+ console.log(`executor-harness listening on ${BIND_HOST ?? ''}:${PORT}`);
133
138
  });
139
+ // Graceful shutdown: dying to a bare SIGTERM/SIGINT (the default handler) would ORPHAN any
140
+ // in-flight `claude`/`codex`/git child — reparented, it keeps working unsupervised (and in
141
+ // native local mode on the developer's own login). Abort every running job first (the
142
+ // SIGTERM→SIGKILL escalation in killChildProcess), then exit as SOON as the aborted jobs have
143
+ // settled — the CLI usually honours SIGTERM in milliseconds, so don't block every shutdown on
144
+ // a fixed window. The 6s cap covers a job that ignored SIGTERM and had to be force-killed (the
145
+ // 5s escalation) plus a margin. Nothing running ⇒ exit immediately. A second signal takes the
146
+ // default (immediate) exit, since `once` leaves it unhandled.
147
+ const shutdown = (signal) => {
148
+ const aborted = Object.values(KINDS).reduce((count, { registry }) => count + registry.abortAll(`harness shutting down (${signal})`), 0);
149
+ log.info('shutting down', { signal, abortedJobs: aborted });
150
+ server.close();
151
+ if (aborted === 0) {
152
+ process.exit(0);
153
+ return;
154
+ }
155
+ const deadline = Date.now() + 6_000;
156
+ const timer = setInterval(() => {
157
+ const stillRunning = Object.values(KINDS).some(({ registry }) => registry.runningCount() > 0);
158
+ if (!stillRunning || Date.now() >= deadline) {
159
+ clearInterval(timer);
160
+ process.exit(0);
161
+ }
162
+ }, 50);
163
+ };
164
+ process.once('SIGTERM', () => shutdown('SIGTERM'));
165
+ process.once('SIGINT', () => shutdown('SIGINT'));
134
166
  }
135
167
  export { server };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cat-factory/executor-harness",
3
- "version": "1.31.8",
3
+ "version": "1.31.12",
4
4
  "description": "Container payload: a thin TypeScript wrapper that runs the Pi coding agent against a cloned repo and opens a PR. Runs in the Cloudflare Container (and, in local native mode, as a host process); carries no secrets.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -26,8 +26,8 @@
26
26
  "hono": "^4.12.27",
27
27
  "typescript": "^6.0.3",
28
28
  "vitest": "^4.1.9",
29
- "@cat-factory/server": "0.66.4",
30
- "@cat-factory/spend": "0.10.70"
29
+ "@cat-factory/spend": "0.10.73",
30
+ "@cat-factory/server": "0.67.0"
31
31
  },
32
32
  "scripts": {
33
33
  "build": "tsc -p tsconfig.json",
@@ -3,7 +3,7 @@ import { mkdtemp, rm, writeFile } from 'node:fs/promises'
3
3
  import { tmpdir } from 'node:os'
4
4
  import { join } from 'node:path'
5
5
  import type { PiRunOutcome, PiRunStats, TodoProgress } from './pi.js'
6
- import { killChildProcess } from './process.js'
6
+ import { killChildProcess, spawnDetached } from './process.js'
7
7
  import { redact, secretsToRedact } from './redact.js'
8
8
 
9
9
  // The alternate (subscription) harness runners. The Pi harness reaches models
@@ -93,6 +93,8 @@ function streamCli(
93
93
  cwd: opts.cwd,
94
94
  env: { ...process.env, ...env },
95
95
  stdio: ['pipe', 'pipe', 'pipe'],
96
+ // Own process group (POSIX) so killChildProcess reaps the CLI's grandchildren too.
97
+ detached: spawnDetached,
96
98
  })
97
99
  child.stdin.on('error', () => {})
98
100
  child.stdin.end(prompt)
@@ -83,6 +83,9 @@ export async function standUpFrontend(
83
83
  ): Promise<FrontendStandUp> {
84
84
  const startedAt = Date.now()
85
85
  const processes: ChildProcess[] = []
86
+ // The frontend app's directory: the checkout root, or a monorepo subdirectory when the config
87
+ // named one. install/build/serve run here and `outputDir`/`mockMappingsPath` are relative to it.
88
+ const workDir = infra.directory ? join(dir, infra.directory) : dir
86
89
  // Keep the run's inactivity watchdog fed while the (activity-silent) install → build → serve
87
90
  // stand-up runs. A real frontend's `install` + `build` can exceed the harness inactivity
88
91
  // window (default 10 min, JOB_INACTIVITY_MS) — and unlike the Pi phase this stand-up emits
@@ -121,7 +124,7 @@ export async function standUpFrontend(
121
124
  const install = installCommand(infra)
122
125
  logger.info('agent(frontend): installing', { command: install.join(' ') })
123
126
  const installed = await exec(install[0]!, install.slice(1), {
124
- cwd: dir,
127
+ cwd: workDir,
125
128
  signal,
126
129
  timeout: 8 * 60_000,
127
130
  maxBuffer: 16 * 1024 * 1024,
@@ -133,7 +136,7 @@ export async function standUpFrontend(
133
136
  const buildScript = infra.buildScript ?? DEFAULTS.buildScript
134
137
  logger.info('agent(frontend): building', { buildScript })
135
138
  const built = await exec(pm, ['run', buildScript], {
136
- cwd: dir,
139
+ cwd: workDir,
137
140
  signal,
138
141
  timeout: 12 * 60_000,
139
142
  maxBuffer: 16 * 1024 * 1024,
@@ -158,7 +161,7 @@ export async function standUpFrontend(
158
161
  )
159
162
  }
160
163
  const shim = `window.env = ${JSON.stringify(infra.env)};\n`
161
- await writeFile(join(dir, outputDir, 'env.js'), shim, 'utf8').catch((err) => {
164
+ await writeFile(join(workDir, outputDir, 'env.js'), shim, 'utf8').catch((err) => {
162
165
  // Best-effort, but no longer silent: a missing/renamed output dir would drop the shim
163
166
  // and the app would read no URLs, so surface it in the log for diagnosis.
164
167
  logger.warn('agent(frontend): could not write runtime env shim', {
@@ -170,10 +173,10 @@ export async function standUpFrontend(
170
173
 
171
174
  // 3) WireMock for the mocked upstreams. Seeded from the FE repo's mappings dir when present;
172
175
  // otherwise it still binds the port (unmatched requests 404, gentler than ECONNREFUSED).
173
- processes.push(await startWireMock(dir, infra, wiremockPort, logger))
176
+ processes.push(await startWireMock(workDir, infra, wiremockPort, logger))
174
177
 
175
178
  // 4) Serve the built app.
176
- processes.push(startServe(dir, infra, servePort, outputDir, logger))
179
+ processes.push(startServe(workDir, infra, servePort, outputDir, logger))
177
180
 
178
181
  // 5) Health-check the served app AND WireMock before handing off, concurrently (WireMock is
179
182
  // a JVM that cold-starts slower than the static server). A dead WireMock would otherwise let
package/src/job.ts CHANGED
@@ -146,7 +146,10 @@ function parseHarnessAuth(o: Record<string, unknown>): HarnessAuthFields {
146
146
  * `..` segment) — the agent's cwd is built from this, so a hostile value must never
147
147
  * point outside the cloned repo.
148
148
  */
149
- function sanitizeServiceDirectory(value: unknown): string | undefined {
149
+ function sanitizeServiceDirectory(
150
+ value: unknown,
151
+ field = 'repo.serviceDirectory',
152
+ ): string | undefined {
150
153
  if (typeof value !== 'string') return undefined
151
154
  const normalized = value
152
155
  .trim()
@@ -156,7 +159,7 @@ function sanitizeServiceDirectory(value: unknown): string | undefined {
156
159
  const segments = normalized.split('/').filter((s) => s !== '' && s !== '.')
157
160
  if (segments.length === 0) return undefined
158
161
  if (segments.some((s) => s === '..')) {
159
- throw new Error("Invalid job: 'repo.serviceDirectory' must be a path inside the repo")
162
+ throw new Error(`Invalid job: '${field}' must be a path inside the repo`)
160
163
  }
161
164
  return segments.join('/')
162
165
  }
@@ -290,6 +293,12 @@ export interface ServiceInfraSpec {
290
293
  */
291
294
  export interface FrontendInfraSpec {
292
295
  kind: 'frontend'
296
+ /**
297
+ * The frontend app's subdirectory within the checkout (a monorepo frontend). Absent ⇒ the
298
+ * checkout root. When set, install/build/serve run there and `outputDir`/`wiremockMappingsPath`
299
+ * are resolved relative to it.
300
+ */
301
+ directory?: string
293
302
  /** Package manager for install/build. Default `pnpm`. */
294
303
  packageManager?: 'pnpm' | 'npm' | 'yarn'
295
304
  /** Explicit install command, overriding the one derived from `packageManager`. */
@@ -659,8 +668,13 @@ function parseFrontendInfraSpec(o: Record<string, unknown>): FrontendInfraSpec {
659
668
  }
660
669
  const servePort = port(o.servePort)
661
670
  const wiremockPort = port(o.wiremockPort)
671
+ // The app's monorepo subdirectory becomes the install/build/serve cwd, so it goes through the
672
+ // same escape-guard as `repo.serviceDirectory` — strip slashes and reject any `..` segment so a
673
+ // hostile value can't point the stand-up outside the cloned repo.
674
+ const directory = sanitizeServiceDirectory(o.directory, 'frontend.directory')
662
675
  return {
663
676
  kind: 'frontend',
677
+ ...(directory ? { directory } : {}),
664
678
  ...(packageManager ? { packageManager } : {}),
665
679
  ...(typeof o.install === 'string' && o.install ? { install: o.install } : {}),
666
680
  ...(typeof o.buildScript === 'string' && o.buildScript ? { buildScript: o.buildScript } : {}),
package/src/pi.ts CHANGED
@@ -2,7 +2,7 @@ import { spawn } from 'node:child_process'
2
2
  import { appendFile, mkdir, writeFile } from 'node:fs/promises'
3
3
  import { homedir } from 'node:os'
4
4
  import { dirname, join } from 'node:path'
5
- import { killChildProcess } from './process.js'
5
+ import { killChildProcess, spawnDetached } from './process.js'
6
6
  import { pathExists } from './fs-utils.js'
7
7
  import { redactSecrets } from './redact.js'
8
8
  import { log } from './logger.js'
@@ -831,6 +831,8 @@ export function runPi(opts: {
831
831
  // stdin is piped (not 'ignore') so the prompt is delivered out-of-band
832
832
  // rather than on argv — see the function doc for the injection rationale.
833
833
  stdio: ['pipe', 'pipe', 'pipe'],
834
+ // Own process group (POSIX) so killChildProcess reaps Pi's grandchildren too.
835
+ detached: spawnDetached,
834
836
  },
835
837
  )
836
838
  // Hand Pi the prompt over stdin, then close it so print mode sees EOF and
package/src/process.ts CHANGED
@@ -10,9 +10,31 @@ import { log, type Logger } from './logger.js'
10
10
  const KILL_GRACE_MS = 5_000
11
11
 
12
12
  /**
13
- * Terminate a child process: SIGTERM first, then SIGKILL after a grace period if it
14
- * hasn't exited (ignored an ordinary terminate). The escalation timer is `unref()`d
15
- * so it never by itself keeps the event loop alive. Safe to call more than once.
13
+ * Signal a child and, when it was spawned detached (a process-group leader on POSIX — see
14
+ * `spawnDetached`), the whole group with it. The agent CLIs (`claude`/`codex`/Pi) spawn their
15
+ * own grandchildren (a shell tool, a build, their own git); a plain `child.kill()` reaps only
16
+ * the direct child and those grandchildren reparent to init and keep running unsupervised.
17
+ * `process.kill(-pid)` targets the group instead. Falls back to a direct kill on Windows (no
18
+ * POSIX process groups) or when the group send fails (already reaped, or the child wasn't
19
+ * spawned detached so no group of its own exists).
20
+ */
21
+ function signalTree(child: ChildProcess, signal: NodeJS.Signals): void {
22
+ if (child.pid !== undefined && process.platform !== 'win32') {
23
+ try {
24
+ process.kill(-child.pid, signal)
25
+ return
26
+ } catch {
27
+ // Fall through to the direct kill below.
28
+ }
29
+ }
30
+ child.kill(signal)
31
+ }
32
+
33
+ /**
34
+ * Terminate a child process (and its group — see {@link signalTree}): SIGTERM first, then
35
+ * SIGKILL after a grace period if it hasn't exited (ignored an ordinary terminate). The
36
+ * escalation timer is `unref()`d so it never by itself keeps the event loop alive. Safe to
37
+ * call more than once.
16
38
  *
17
39
  * An actual escalation to SIGKILL is logged at warn level: a process that ignores
18
40
  * SIGTERM and has to be force-killed is a signal worth seeing (a wedged Pi/CLI), and
@@ -23,11 +45,18 @@ export function killChildProcess(
23
45
  graceMs: number = KILL_GRACE_MS,
24
46
  logger: Logger = log,
25
47
  ): void {
26
- child.kill('SIGTERM')
48
+ signalTree(child, 'SIGTERM')
27
49
  setTimeout(() => {
28
50
  if (child.exitCode === null && child.signalCode === null) {
29
51
  logger.warn('killChildProcess: process ignored SIGTERM, escalating to SIGKILL', { graceMs })
30
- child.kill('SIGKILL')
52
+ signalTree(child, 'SIGKILL')
31
53
  }
32
54
  }, graceMs).unref()
33
55
  }
56
+
57
+ /**
58
+ * Whether a spawned agent CLI should be its own process-group leader so {@link killChildProcess}
59
+ * can reap the whole tree (its grandchildren) on abort. POSIX only; Windows has no process
60
+ * groups (and `detached` there spawns a new console we don't want), so it stays false.
61
+ */
62
+ export const spawnDetached = process.platform !== 'win32'
package/src/runner.ts CHANGED
@@ -123,6 +123,8 @@ interface JobEntry<TResult extends JobResultBase> extends JobView<TResult> {
123
123
  spanBuffer: ToolSpan[]
124
124
  /** Follow-up items buffered since the last drain (see {@link JobView.followUps}). */
125
125
  followUpBuffer: FollowUpLine[]
126
+ /** Abort the in-flight run (see {@link JobRegistry.abortAll}); set while running only. */
127
+ abort?: (reason: string) => void
126
128
  }
127
129
 
128
130
  /** Watchdog windows that bound every job. Tunable via the container's env. */
@@ -158,6 +160,7 @@ function toView<TResult extends JobResultBase>(entry: JobEntry<TResult>): JobVie
158
160
  promise: _promise,
159
161
  spanBuffer: _spanBuffer,
160
162
  followUpBuffer: _followUpBuffer,
163
+ abort: _abort,
161
164
  ...view
162
165
  } = entry
163
166
  return { ...view }
@@ -228,6 +231,35 @@ export class JobRegistry<TJob = unknown, TResult extends JobResultBase = JobResu
228
231
  return view
229
232
  }
230
233
 
234
+ /**
235
+ * Abort every RUNNING job (fires each run's abort signal, which SIGTERM→SIGKILLs its
236
+ * CLI/git children via `killChildProcess`). The graceful-shutdown hook: a harness dying
237
+ * to SIGTERM must not orphan a live agent subprocess — reparented, it would keep working
238
+ * unsupervised (and, in native local mode, on the developer's own login). Returns the
239
+ * number of jobs aborted.
240
+ */
241
+ abortAll(reason: string): number {
242
+ let aborted = 0
243
+ for (const entry of this.jobs.values()) {
244
+ if (entry.state === 'running' && entry.abort) {
245
+ entry.abort(reason)
246
+ aborted += 1
247
+ }
248
+ }
249
+ return aborted
250
+ }
251
+
252
+ /**
253
+ * How many jobs are still RUNNING. Graceful shutdown polls this so it can exit the moment the
254
+ * aborted jobs have actually settled (the common case: the CLI honours SIGTERM in ms) instead
255
+ * of waiting out a fixed kill-grace window.
256
+ */
257
+ runningCount(): number {
258
+ let running = 0
259
+ for (const entry of this.jobs.values()) if (entry.state === 'running') running += 1
260
+ return running
261
+ }
262
+
231
263
  private async drive(entry: JobEntry<TResult>, job: TJob): Promise<void> {
232
264
  const controller = new AbortController()
233
265
  let killReason: 'inactivity' | 'max-duration' | undefined
@@ -271,6 +303,8 @@ export class JobRegistry<TJob = unknown, TResult extends JobResultBase = JobResu
271
303
  resetInactivity()
272
304
  }
273
305
  resetInactivity()
306
+ // Expose the abort for shutdown (see abortAll); cleared in `finally` once the job settles.
307
+ entry.abort = (reason) => controller.abort(new Error(reason))
274
308
 
275
309
  jobLog.info('job started', {})
276
310
  try {
@@ -327,6 +361,7 @@ export class JobRegistry<TJob = unknown, TResult extends JobResultBase = JobResu
327
361
  } finally {
328
362
  clearTimeout(inactivity)
329
363
  clearTimeout(cap)
364
+ entry.abort = undefined
330
365
  entry.heartbeatAt = Date.now()
331
366
  }
332
367
  }
package/src/server.ts CHANGED
@@ -16,6 +16,12 @@ import { log } from './logger.js'
16
16
 
17
17
  const PORT = Number(process.env.PORT ?? 8080)
18
18
 
19
+ // Optional bind address. Default (unset) binds all interfaces — a container needs that for
20
+ // its published port. The native local transport runs the harness UNSANDBOXED on the
21
+ // developer's host and only ever connects over loopback, so it sets 127.0.0.1 to keep the
22
+ // agent-spawning API off the LAN.
23
+ const BIND_HOST = process.env.HARNESS_BIND_HOST?.trim() || undefined
24
+
19
25
  // Optional inbound auth. When HARNESS_SHARED_SECRET is set, every non-health
20
26
  // request must present a matching `x-harness-secret` header (constant-time
21
27
  // compared). When it is unset the harness behaves as before (open), so local/dev
@@ -145,9 +151,40 @@ const server = createServer((req, res) => {
145
151
 
146
152
  // Only auto-listen when run as the entry point (tests import handleRun directly).
147
153
  if (process.env.NODE_ENV !== 'test') {
148
- server.listen(PORT, () => {
149
- console.log(`executor-harness listening on :${PORT}`)
154
+ server.listen(PORT, BIND_HOST, () => {
155
+ console.log(`executor-harness listening on ${BIND_HOST ?? ''}:${PORT}`)
150
156
  })
157
+
158
+ // Graceful shutdown: dying to a bare SIGTERM/SIGINT (the default handler) would ORPHAN any
159
+ // in-flight `claude`/`codex`/git child — reparented, it keeps working unsupervised (and in
160
+ // native local mode on the developer's own login). Abort every running job first (the
161
+ // SIGTERM→SIGKILL escalation in killChildProcess), then exit as SOON as the aborted jobs have
162
+ // settled — the CLI usually honours SIGTERM in milliseconds, so don't block every shutdown on
163
+ // a fixed window. The 6s cap covers a job that ignored SIGTERM and had to be force-killed (the
164
+ // 5s escalation) plus a margin. Nothing running ⇒ exit immediately. A second signal takes the
165
+ // default (immediate) exit, since `once` leaves it unhandled.
166
+ const shutdown = (signal: string): void => {
167
+ const aborted = Object.values(KINDS).reduce(
168
+ (count, { registry }) => count + registry.abortAll(`harness shutting down (${signal})`),
169
+ 0,
170
+ )
171
+ log.info('shutting down', { signal, abortedJobs: aborted })
172
+ server.close()
173
+ if (aborted === 0) {
174
+ process.exit(0)
175
+ return
176
+ }
177
+ const deadline = Date.now() + 6_000
178
+ const timer = setInterval(() => {
179
+ const stillRunning = Object.values(KINDS).some(({ registry }) => registry.runningCount() > 0)
180
+ if (!stillRunning || Date.now() >= deadline) {
181
+ clearInterval(timer)
182
+ process.exit(0)
183
+ }
184
+ }, 50)
185
+ }
186
+ process.once('SIGTERM', () => shutdown('SIGTERM'))
187
+ process.once('SIGINT', () => shutdown('SIGINT'))
151
188
  }
152
189
 
153
190
  export { server }