@cat-factory/executor-harness 1.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +143 -0
  3. package/dist/agent-runner.js +389 -0
  4. package/dist/agent.js +810 -0
  5. package/dist/blueprint.js +367 -0
  6. package/dist/bootstrap.js +99 -0
  7. package/dist/ci-fixer.js +46 -0
  8. package/dist/coding-agent.js +285 -0
  9. package/dist/conflict-resolver.js +138 -0
  10. package/dist/embed.js +8 -0
  11. package/dist/explore.js +74 -0
  12. package/dist/failure.js +47 -0
  13. package/dist/fixer.js +44 -0
  14. package/dist/follow-ups.js +103 -0
  15. package/dist/frontend-infra.js +283 -0
  16. package/dist/fs-utils.js +11 -0
  17. package/dist/git.js +778 -0
  18. package/dist/job.js +409 -0
  19. package/dist/logger.js +27 -0
  20. package/dist/merger.js +135 -0
  21. package/dist/on-call.js +126 -0
  22. package/dist/pi-workspace.js +237 -0
  23. package/dist/pi.js +971 -0
  24. package/dist/process.js +25 -0
  25. package/dist/redact.js +109 -0
  26. package/dist/runner.js +228 -0
  27. package/dist/server.js +135 -0
  28. package/dist/spec.js +754 -0
  29. package/dist/structured-output.js +431 -0
  30. package/dist/tester.js +191 -0
  31. package/package.json +35 -0
  32. package/src/agent-runner.ts +484 -0
  33. package/src/agent.ts +948 -0
  34. package/src/coding-agent.ts +393 -0
  35. package/src/embed.ts +32 -0
  36. package/src/failure.ts +73 -0
  37. package/src/follow-ups.ts +106 -0
  38. package/src/frontend-infra.ts +340 -0
  39. package/src/fs-utils.ts +11 -0
  40. package/src/git.ts +955 -0
  41. package/src/job.ts +766 -0
  42. package/src/logger.ts +45 -0
  43. package/src/pi-workspace.ts +348 -0
  44. package/src/pi.ts +1236 -0
  45. package/src/process.ts +33 -0
  46. package/src/redact.ts +109 -0
  47. package/src/runner.ts +384 -0
  48. package/src/server.ts +153 -0
  49. package/src/structured-output.ts +524 -0
@@ -0,0 +1,103 @@
1
+ import { readFile } from 'node:fs/promises';
2
+ import { log } from './logger.js';
3
+ // The Coder's forward-looking side channel. As the implementer works it appends one
4
+ // JSON line per item to a sentinel file in its working directory; the harness tails
5
+ // that file and streams the new items OUT on the job view (drain-on-read), so the
6
+ // backend lifts them onto the run's step and the "Follow-up companion" lights up
7
+ // while the container is still running. This is the OUT-bound half only — there is no
8
+ // in-bound path back into a running container (an answer reaches the Coder via a
9
+ // backend-driven re-run, not by resuming the live process).
10
+ /** The sentinel file the Coder appends items to, relative to its working directory. */
11
+ export const FOLLOW_UPS_FILENAME = '.cat-follow-ups.jsonl';
12
+ /** Coerce one parsed JSON line into a {@link FollowUpLine}, or null when unusable. */
13
+ function coerceLine(value) {
14
+ if (typeof value !== 'object' || value === null)
15
+ return null;
16
+ const o = value;
17
+ const title = typeof o.title === 'string' ? o.title.trim() : '';
18
+ if (!title)
19
+ return null;
20
+ const kind = o.kind === 'question' ? 'question' : 'follow_up';
21
+ const detail = typeof o.detail === 'string' ? o.detail : '';
22
+ const suggestedAction = typeof o.suggestedAction === 'string' && o.suggestedAction.trim()
23
+ ? o.suggestedAction.trim()
24
+ : undefined;
25
+ return {
26
+ kind,
27
+ title: title.slice(0, 300),
28
+ detail,
29
+ ...(suggestedAction ? { suggestedAction } : {}),
30
+ };
31
+ }
32
+ /**
33
+ * Tails an append-only JSONL sentinel file, yielding only the NEW complete lines on each
34
+ * {@link poll}. Tracks how many characters have been consumed so a partially-written
35
+ * trailing line (no newline yet) is held back until it completes. Tolerant: a malformed
36
+ * line is skipped, a missing file yields nothing — surfacing follow-ups must never
37
+ * disturb the coding run.
38
+ */
39
+ export class FollowUpTailer {
40
+ filePath;
41
+ onItems;
42
+ logger;
43
+ consumed = 0;
44
+ /** Running count of complete-but-unparsable lines, so silent drops become visible. */
45
+ skipped = 0;
46
+ constructor(filePath, onItems, logger = log) {
47
+ this.filePath = filePath;
48
+ this.onItems = onItems;
49
+ this.logger = logger;
50
+ }
51
+ /** Read any new complete lines and emit the coerced items. Best-effort; never throws. */
52
+ async poll() {
53
+ let content;
54
+ try {
55
+ content = await readFile(this.filePath, 'utf8');
56
+ }
57
+ catch {
58
+ // Not created yet (or vanished): nothing to surface.
59
+ return;
60
+ }
61
+ if (content.length <= this.consumed)
62
+ return;
63
+ const fresh = content.slice(this.consumed);
64
+ // Only consume up to the last newline; hold any trailing partial line for next poll.
65
+ const lastNewline = fresh.lastIndexOf('\n');
66
+ if (lastNewline === -1)
67
+ return;
68
+ this.consumed += lastNewline + 1;
69
+ const items = [];
70
+ let skippedThisPoll = 0;
71
+ for (const raw of fresh.slice(0, lastNewline).split('\n')) {
72
+ const line = raw.trim();
73
+ if (!line)
74
+ continue;
75
+ try {
76
+ const coerced = coerceLine(JSON.parse(line));
77
+ if (coerced)
78
+ items.push(coerced);
79
+ else
80
+ skippedThisPoll++;
81
+ }
82
+ catch {
83
+ // A non-JSON / half-written line — skip it (a later poll re-reads from `consumed`,
84
+ // which only advanced past complete newline-terminated lines).
85
+ skippedThisPoll++;
86
+ }
87
+ }
88
+ if (skippedThisPoll > 0) {
89
+ // A complete line that didn't yield an item is dropped for good (consumed past it).
90
+ // Surface it at warn with a running total rather than swallowing it silently — a
91
+ // steadily-growing count points at a malformed-emitter bug, not a transient race.
92
+ this.skipped += skippedThisPoll;
93
+ this.logger.warn('follow-ups: skipped malformed lines', {
94
+ skipped: skippedThisPoll,
95
+ skippedTotal: this.skipped,
96
+ });
97
+ }
98
+ if (items.length > 0) {
99
+ this.logger.info('follow-ups: surfaced items', { count: items.length });
100
+ this.onItems(items);
101
+ }
102
+ }
103
+ }
@@ -0,0 +1,283 @@
1
+ import { execFile, spawn } from 'node:child_process';
2
+ import { promisify } from 'node:util';
3
+ import { writeFile } from 'node:fs/promises';
4
+ import { join } from 'node:path';
5
+ import { killChildProcess } from './process.js';
6
+ import { pathExists } from './fs-utils.js';
7
+ import { captureRedactedOutput, redactSecrets } from './redact.js';
8
+ import { log } from './logger.js';
9
+ const exec = promisify(execFile);
10
+ // The self-contained frontend UI-test stand-up (the `tester-ui` flow). In ONE container we
11
+ // build the frontend, stand WireMock up for its mocked upstreams, serve the built app, and
12
+ // point the agent at it — all as localhost PROCESSES (no Docker-in-Docker), so it works on
13
+ // Cloudflare and Apple `container` too. The backend has already resolved every upstream to a
14
+ // concrete URL and handed them in `infra.env`; this file only builds/serves/mocks. WireMock
15
+ // standalone (the jar at `$WIREMOCK_JAR`) and a static file server (`serve`) plus the package
16
+ // managers are provided by the UI image (Dockerfile.ui).
17
+ /** Where the WireMock standalone jar lives in the image (overridable for tests). */
18
+ const WIREMOCK_JAR = process.env.WIREMOCK_JAR ?? '/opt/wiremock/wiremock-standalone.jar';
19
+ /** Defaults the backend may omit; kept here so the harness owns the runtime shape. */
20
+ const DEFAULTS = {
21
+ packageManager: 'pnpm',
22
+ buildScript: 'build',
23
+ outputDir: 'dist',
24
+ serveMode: 'static',
25
+ servePort: 4173,
26
+ envInjection: 'build',
27
+ mockMappingsPath: 'mocks/',
28
+ wiremockPort: 8089,
29
+ };
30
+ /** The install command for a package manager (an explicit `install` overrides this). */
31
+ export function installCommand(spec) {
32
+ if (spec.install)
33
+ return spec.install.split(/\s+/).filter(Boolean);
34
+ const pm = spec.packageManager ?? DEFAULTS.packageManager;
35
+ return [pm, 'install'];
36
+ }
37
+ /**
38
+ * Attach an `'error'` listener to a spawned background process. A `ChildProcess` is an
39
+ * EventEmitter, and an `'error'` event with NO listener (an ENOENT for a missing binary, an
40
+ * EAGAIN/ENOMEM under the container's memory limit) is re-thrown by Node as an UNCAUGHT
41
+ * exception that would kill the whole harness job server. The frontend stand-up is
42
+ * best-effort, so we swallow the error into the log instead — a dead WireMock / server is
43
+ * then caught by the health-check and surfaced as a prompt note, not a container crash.
44
+ */
45
+ function guardProcess(child, label, logger) {
46
+ child.on('error', (err) => {
47
+ logger.warn(`agent(frontend): ${label} process error`, {
48
+ error: err instanceof Error ? err.message : String(err),
49
+ });
50
+ });
51
+ return child;
52
+ }
53
+ /**
54
+ * Build the frontend, start WireMock, serve the built app and health-check both. Best-effort,
55
+ * like the docker-compose stand-up: a failed build / server that never binds is surfaced to
56
+ * the agent as a prompt note (and captured on the record) rather than failing the job — the
57
+ * agent then reports the gap as a concern. Every path returns the processes to tear down.
58
+ */
59
+ export async function standUpFrontend(dir, infra, signal, onActivity, logger = log) {
60
+ const startedAt = Date.now();
61
+ const processes = [];
62
+ // Keep the run's inactivity watchdog fed while the (activity-silent) install → build → serve
63
+ // stand-up runs. A real frontend's `install` + `build` can exceed the harness inactivity
64
+ // window (default 10 min, JOB_INACTIVITY_MS) — and unlike the Pi phase this stand-up emits
65
+ // no activity events of its own — so without this heartbeat the watchdog would abort the job
66
+ // MID-BUILD with a misleading "likely hung". `unref()` so it never itself holds the loop open.
67
+ const heartbeat = setInterval(() => onActivity?.(), 30_000);
68
+ if (typeof heartbeat.unref === 'function')
69
+ heartbeat.unref();
70
+ const servePort = infra.servePort ?? DEFAULTS.servePort;
71
+ const wiremockPort = infra.wiremockPort ?? DEFAULTS.wiremockPort;
72
+ const serveUrl = `http://localhost:${servePort}`;
73
+ // Raw (un-redacted) stage output; redacted+bounded ONCE when a record is built.
74
+ const rawOutput = [];
75
+ const pushOutput = (stdout, stderr) => {
76
+ const merged = [String(stdout ?? ''), String(stderr ?? '')]
77
+ .map((s) => s.trim())
78
+ .filter(Boolean)
79
+ .join('\n');
80
+ if (merged)
81
+ rawOutput.push(merged);
82
+ };
83
+ const record = (extra) => {
84
+ const logs = rawOutput.length ? captureRedactedOutput(rawOutput.join('\n'), '') : undefined;
85
+ return {
86
+ started: false,
87
+ at: Date.now(),
88
+ durationMs: Date.now() - startedAt,
89
+ ...(logs ? { logs } : {}),
90
+ ...extra,
91
+ };
92
+ };
93
+ const buildEnv = (infra.envInjection ?? DEFAULTS.envInjection) === 'build' ? (infra.env ?? {}) : {};
94
+ try {
95
+ // 1) Install dependencies.
96
+ const install = installCommand(infra);
97
+ logger.info('agent(frontend): installing', { command: install.join(' ') });
98
+ const installed = await exec(install[0], install.slice(1), {
99
+ cwd: dir,
100
+ signal,
101
+ timeout: 8 * 60_000,
102
+ maxBuffer: 16 * 1024 * 1024,
103
+ });
104
+ pushOutput(installed.stdout, installed.stderr);
105
+ // 2) Build (build-time env injected here; runtime injection writes a shim after).
106
+ const pm = infra.packageManager ?? DEFAULTS.packageManager;
107
+ const buildScript = infra.buildScript ?? DEFAULTS.buildScript;
108
+ logger.info('agent(frontend): building', { buildScript });
109
+ const built = await exec(pm, ['run', buildScript], {
110
+ cwd: dir,
111
+ signal,
112
+ timeout: 12 * 60_000,
113
+ maxBuffer: 16 * 1024 * 1024,
114
+ env: { ...process.env, ...buildEnv },
115
+ });
116
+ pushOutput(built.stdout, built.stderr);
117
+ // Runtime injection: write a `window.env` shim into the build output the app can load
118
+ // (`<outputDir>/env.js`). Best-effort — the app must reference it; a build-time app ignores it.
119
+ const outputDir = infra.outputDir ?? DEFAULTS.outputDir;
120
+ if ((infra.envInjection ?? DEFAULTS.envInjection) === 'runtime' && infra.env) {
121
+ // The shim is written under `outputDir`, which the STATIC server serves as its root. A
122
+ // `serveMode: 'command'` dev/preview server serves from its OWN root, so it won't expose
123
+ // `<outputDir>/env.js` — the shim is then inert. Warn rather than silently no-op, and the
124
+ // `env` is ALSO forwarded to the serve process (see startServe) so a runtime-reading
125
+ // server can still pick the URLs up from the environment.
126
+ if ((infra.serveMode ?? DEFAULTS.serveMode) === 'command') {
127
+ logger.warn('agent(frontend): runtime env injection with serveMode:command — the window.env shim ' +
128
+ 'is only served in static mode; relying on the forwarded env instead', { outputDir });
129
+ }
130
+ const shim = `window.env = ${JSON.stringify(infra.env)};\n`;
131
+ await writeFile(join(dir, outputDir, 'env.js'), shim, 'utf8').catch((err) => {
132
+ // Best-effort, but no longer silent: a missing/renamed output dir would drop the shim
133
+ // and the app would read no URLs, so surface it in the log for diagnosis.
134
+ logger.warn('agent(frontend): could not write runtime env shim', {
135
+ path: join(outputDir, 'env.js'),
136
+ error: err instanceof Error ? err.message : String(err),
137
+ });
138
+ });
139
+ }
140
+ // 3) WireMock for the mocked upstreams. Seeded from the FE repo's mappings dir when present;
141
+ // otherwise it still binds the port (unmatched requests 404, gentler than ECONNREFUSED).
142
+ processes.push(await startWireMock(dir, infra, wiremockPort, logger));
143
+ // 4) Serve the built app.
144
+ processes.push(startServe(dir, infra, servePort, outputDir, logger));
145
+ // 5) Health-check the served app AND WireMock before handing off, concurrently (WireMock is
146
+ // a JVM that cold-starts slower than the static server). A dead WireMock would otherwise let
147
+ // the agent start and hit ECONNREFUSED on the app's first mocked-upstream call.
148
+ const wiremockUrl = `http://localhost:${wiremockPort}/__admin/`;
149
+ const [appHealthy, wiremockHealthy] = await Promise.all([
150
+ waitForHttp(serveUrl, signal, logger),
151
+ waitForHttp(wiremockUrl, signal, logger),
152
+ ]);
153
+ if (!appHealthy) {
154
+ return {
155
+ processes,
156
+ note: `the frontend was built but its server never became reachable at ${serveUrl}`,
157
+ record: record({ error: `frontend server did not become reachable at ${serveUrl}` }),
158
+ };
159
+ }
160
+ if (!wiremockHealthy) {
161
+ // The app is up but the mock upstream isn't — the agent can still smoke-test the app;
162
+ // flag that mocked-backend calls may fail so it reports the gap rather than treating a
163
+ // mock ECONNREFUSED as a real defect.
164
+ return {
165
+ processes,
166
+ serveUrl,
167
+ note: `the frontend is served at ${serveUrl}, but WireMock (the mock for its other backend ` +
168
+ `upstreams) never became reachable on port ${wiremockPort}, so calls to mocked ` +
169
+ `upstreams may fail — flag any such failures as an infra gap, not an app defect`,
170
+ record: record({
171
+ started: true,
172
+ error: `WireMock did not become reachable on port ${wiremockPort}`,
173
+ }),
174
+ };
175
+ }
176
+ logger.info('agent(frontend): app is serving', { serveUrl, wiremockPort });
177
+ return {
178
+ processes,
179
+ serveUrl,
180
+ record: record({ started: true }),
181
+ };
182
+ }
183
+ catch (err) {
184
+ const note = err instanceof Error ? err.message : String(err);
185
+ logger.warn('agent(frontend): stand-up failed', { error: note });
186
+ const e = err;
187
+ pushOutput(e.stdout, e.stderr);
188
+ return { processes, note, record: record({ error: redactSecrets(note) }) };
189
+ }
190
+ finally {
191
+ // The stand-up is done (handed off, or failed) — the Pi phase feeds the watchdog from here.
192
+ clearInterval(heartbeat);
193
+ }
194
+ }
195
+ /** Terminate the frontend stand-up's processes (WireMock + the served app). Best-effort. */
196
+ export async function tearDownFrontend(processes, logger = log) {
197
+ for (const child of processes) {
198
+ try {
199
+ killChildProcess(child, undefined, logger);
200
+ }
201
+ catch {
202
+ // The container is ephemeral and torn down with the run anyway — ignore.
203
+ }
204
+ }
205
+ }
206
+ /**
207
+ * Start WireMock standalone as a background process on `wiremockPort`, seeded from the FE
208
+ * repo's mappings dir (`mockMappingsPath`, passed verbatim as WireMock's `--root-dir`) when it
209
+ * exists. WireMock reads stub JSON from a `mappings/` subdirectory (and bodies from `__files/`)
210
+ * UNDER that root — so the default `mocks/` means `mocks/mappings/*.json`. A missing jar / JRE
211
+ * surfaces asynchronously as a process `'error'` (swallowed by {@link guardProcess}) and is then
212
+ * caught by the caller's WireMock health-check; a missing root dir is non-fatal (WireMock still
213
+ * binds the port and 404s unmatched requests).
214
+ */
215
+ async function startWireMock(dir, infra, wiremockPort, logger) {
216
+ const mappingsPath = infra.wiremockMappingsPath ?? DEFAULTS.mockMappingsPath;
217
+ const rootDir = join(dir, mappingsPath);
218
+ const hasMappings = await pathExists(rootDir);
219
+ const args = ['-jar', WIREMOCK_JAR, '--port', String(wiremockPort), '--disable-banner'];
220
+ if (hasMappings)
221
+ args.push('--root-dir', rootDir);
222
+ else
223
+ logger.warn('agent(frontend): no WireMock mappings dir, starting empty', { mappingsPath });
224
+ logger.info('agent(frontend): starting WireMock', { wiremockPort, hasMappings });
225
+ return guardProcess(spawn('java', args, { cwd: dir, stdio: 'ignore' }), 'WireMock', logger);
226
+ }
227
+ /**
228
+ * Serve the built app on `servePort`: a static file server of `outputDir` (`static` mode), or
229
+ * the FE's own serve script (`command` mode, e.g. `preview`). In `command` mode the port is
230
+ * passed as the `PORT` env var, so the script MUST honour it (else it binds its own default
231
+ * port and the health-check — which polls `servePort` — never sees it), and the resolved
232
+ * backend URLs (`infra.env`) are ALSO forwarded so a runtime-reading dev/preview server sees
233
+ * them (a static server needs no env — build/runtime injection already baked/shim'd the URLs
234
+ * into the output it serves). Returns the background process; the static server (`serve`) is
235
+ * provided by the UI image.
236
+ */
237
+ function startServe(dir, infra, servePort, outputDir, logger) {
238
+ const mode = infra.serveMode ?? DEFAULTS.serveMode;
239
+ if (mode === 'command' && infra.serveScript) {
240
+ const pm = infra.packageManager ?? DEFAULTS.packageManager;
241
+ logger.info('agent(frontend): serving via script', {
242
+ serveScript: infra.serveScript,
243
+ servePort,
244
+ });
245
+ return guardProcess(spawn(pm, ['run', infra.serveScript], {
246
+ cwd: dir,
247
+ stdio: 'ignore',
248
+ // Reserved names were already filtered from `infra.env` at parse; PORT wins last so
249
+ // the health-check's port is authoritative even if a binding tried to set it.
250
+ // (Spreading an undefined `infra.env` is a no-op, so no `?? {}` fallback is needed.)
251
+ env: { ...process.env, ...infra.env, PORT: String(servePort) },
252
+ }), 'serve', logger);
253
+ }
254
+ logger.info('agent(frontend): serving static output', { outputDir, servePort });
255
+ // `serve -s` single-page fallback so a client-routed SPA resolves deep links to index.html.
256
+ return guardProcess(spawn('serve', ['-s', outputDir, '-l', String(servePort)], { cwd: dir, stdio: 'ignore' }), 'serve', logger);
257
+ }
258
+ /** Poll a URL until it answers (any HTTP status) or the timeout elapses. */
259
+ async function waitForHttp(url, signal, logger, timeoutMs = 90_000) {
260
+ const deadline = Date.now() + timeoutMs;
261
+ while (Date.now() < deadline) {
262
+ if (signal?.aborted)
263
+ return false;
264
+ try {
265
+ // Abort a hung probe on the per-attempt 5s timeout AND on the run's own signal, so an
266
+ // aborted run doesn't wait out an in-flight fetch (the top-of-loop check only catches an
267
+ // abort BETWEEN attempts).
268
+ const attemptSignal = signal
269
+ ? AbortSignal.any([signal, AbortSignal.timeout(5_000)])
270
+ : AbortSignal.timeout(5_000);
271
+ const res = await fetch(url, { signal: attemptSignal });
272
+ // Any response (even a 404) means the server is up and accepting connections.
273
+ if (res.status > 0)
274
+ return true;
275
+ }
276
+ catch {
277
+ // Not up yet — back off and retry.
278
+ }
279
+ await new Promise((resolve) => setTimeout(resolve, 1_000));
280
+ }
281
+ logger.warn('agent(frontend): health-check timed out', { url, timeoutMs });
282
+ return false;
283
+ }
@@ -0,0 +1,11 @@
1
+ import { stat } from 'node:fs/promises';
2
+ /** Whether `path` exists (a file or directory), swallowing ENOENT (and any stat error). */
3
+ export async function pathExists(path) {
4
+ try {
5
+ await stat(path);
6
+ return true;
7
+ }
8
+ catch {
9
+ return false;
10
+ }
11
+ }