@cat-factory/executor-harness 1.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +143 -0
  3. package/dist/agent-runner.js +389 -0
  4. package/dist/agent.js +810 -0
  5. package/dist/blueprint.js +367 -0
  6. package/dist/bootstrap.js +99 -0
  7. package/dist/ci-fixer.js +46 -0
  8. package/dist/coding-agent.js +285 -0
  9. package/dist/conflict-resolver.js +138 -0
  10. package/dist/embed.js +8 -0
  11. package/dist/explore.js +74 -0
  12. package/dist/failure.js +47 -0
  13. package/dist/fixer.js +44 -0
  14. package/dist/follow-ups.js +103 -0
  15. package/dist/frontend-infra.js +283 -0
  16. package/dist/fs-utils.js +11 -0
  17. package/dist/git.js +778 -0
  18. package/dist/job.js +409 -0
  19. package/dist/logger.js +27 -0
  20. package/dist/merger.js +135 -0
  21. package/dist/on-call.js +126 -0
  22. package/dist/pi-workspace.js +237 -0
  23. package/dist/pi.js +971 -0
  24. package/dist/process.js +25 -0
  25. package/dist/redact.js +109 -0
  26. package/dist/runner.js +228 -0
  27. package/dist/server.js +135 -0
  28. package/dist/spec.js +754 -0
  29. package/dist/structured-output.js +431 -0
  30. package/dist/tester.js +191 -0
  31. package/package.json +35 -0
  32. package/src/agent-runner.ts +484 -0
  33. package/src/agent.ts +948 -0
  34. package/src/coding-agent.ts +393 -0
  35. package/src/embed.ts +32 -0
  36. package/src/failure.ts +73 -0
  37. package/src/follow-ups.ts +106 -0
  38. package/src/frontend-infra.ts +340 -0
  39. package/src/fs-utils.ts +11 -0
  40. package/src/git.ts +955 -0
  41. package/src/job.ts +766 -0
  42. package/src/logger.ts +45 -0
  43. package/src/pi-workspace.ts +348 -0
  44. package/src/pi.ts +1236 -0
  45. package/src/process.ts +33 -0
  46. package/src/redact.ts +109 -0
  47. package/src/runner.ts +384 -0
  48. package/src/server.ts +153 -0
  49. package/src/structured-output.ts +524 -0
@@ -0,0 +1,25 @@
1
+ import { log } from './logger.js';
2
+ // Shared child-process lifecycle helpers. Every CLI the harness spawns (Pi and the
3
+ // subscription harnesses) must die the same way when the watchdog or a no-progress
4
+ // guard aborts, so the SIGTERM→SIGKILL escalation lives here rather than being
5
+ // re-implemented per runner.
6
+ // How long to wait after SIGTERM before escalating to SIGKILL.
7
+ const KILL_GRACE_MS = 5_000;
8
+ /**
9
+ * Terminate a child process: SIGTERM first, then SIGKILL after a grace period if it
10
+ * hasn't exited (ignored an ordinary terminate). The escalation timer is `unref()`d
11
+ * so it never by itself keeps the event loop alive. Safe to call more than once.
12
+ *
13
+ * An actual escalation to SIGKILL is logged at warn level: a process that ignores
14
+ * SIGTERM and has to be force-killed is a signal worth seeing (a wedged Pi/CLI), and
15
+ * was previously invisible. Pass a child logger to carry the run's `jobId`.
16
+ */
17
+ export function killChildProcess(child, graceMs = KILL_GRACE_MS, logger = log) {
18
+ child.kill('SIGTERM');
19
+ setTimeout(() => {
20
+ if (child.exitCode === null && child.signalCode === null) {
21
+ logger.warn('killChildProcess: process ignored SIGTERM, escalating to SIGKILL', { graceMs });
22
+ child.kill('SIGKILL');
23
+ }
24
+ }, graceMs).unref();
25
+ }
package/dist/redact.js ADDED
@@ -0,0 +1,109 @@
1
+ // Single source of truth for credential redaction. Two complementary rules run on
2
+ // EVERY redaction so no error path can scrub one class of secret and leak the other:
3
+ //
4
+ // - PATTERN-based: scrubs credential SHAPES (URL userinfo, `x-access-token:<tok>`,
5
+ // bare GitHub token prefixes, and `KEY=value` / `KEY: value` assignments whose key
6
+ // names a credential) even when the exact value isn't known ahead of time — this is
7
+ // what catches a freshly-minted installation token in a git error, or a plaintext
8
+ // `POSTGRES_PASSWORD=…` echoed by a docker-compose dependency stand-up.
9
+ // - VALUE-based: scrubs a list of KNOWN secret strings (the leased subscription
10
+ // token + any token-like JSON leaf harvested from a credential blob).
11
+ //
12
+ // Historically these lived in two modules (git.ts pattern-only, agent-runner.ts
13
+ // value-only) and ran on disjoint paths, so a secret only one rule covered could leak
14
+ // on the other. They are unified here.
15
+ // Below this length a "known secret" is too short to scrub without mangling
16
+ // legitimate output (it would replace common substrings).
17
+ const MIN_REDACT_LEN = 6;
18
+ // Only harvest token-like JSON leaves: real OAuth access/refresh tokens and ids are
19
+ // long, while short values (`auth_mode: "chatgpt"`, `type: "oauth"`, …) are non-secret
20
+ // words that would over-redact legitimate error text if scrubbed. 12 chars is a safe
21
+ // floor below which a value is not a credential.
22
+ const MIN_HARVEST_LEN = 12;
23
+ // `KEY=value` / `KEY: value` assignments whose key NAMES a credential. Catches plaintext
24
+ // secrets the shape rules above miss — e.g. a docker-compose dependency echoing
25
+ // `POSTGRES_PASSWORD=hunter2` or `DATABASE_PASSWORD: hunter2` on a failed stand-up, which
26
+ // is not a token shape and is not in the known-value list (the harness never sees the
27
+ // service's own secrets). The key token is matched within a surrounding identifier so
28
+ // `DB_ACCESS_KEY`/`api_key` are covered; `auth` is deliberately excluded so it can't
29
+ // clobber a git `Author:` line. The value is the first whitespace-delimited run.
30
+ const CREDENTIAL_ASSIGNMENT = /\b([A-Za-z0-9_]*(?:password|passwd|pwd|secret|token|key|credential)[A-Za-z0-9_]*\s*[:=]\s*)\S+/gi;
31
+ /**
32
+ * Strip credentials out of any string before it is logged or stored. Applies the
33
+ * pattern rules (URL userinfo `https://user:pass@host`, `x-access-token:<token>`, bare
34
+ * `ghs_`/`ghp_`/`gho_`/`github_pat_` shapes, and credential-named `KEY=value` / `KEY:
35
+ * value` assignments) and then scrubs every supplied known-secret value. Idempotent —
36
+ * safe to call on already-redacted text.
37
+ */
38
+ export function redact(input, knownSecrets = []) {
39
+ let out = input
40
+ .replace(/(https?:\/\/)[^@\s/]*@/gi, '$1***@')
41
+ .replace(/x-access-token:[^@\s]+/gi, 'x-access-token:***')
42
+ .replace(/\b(gh[pso]_|github_pat_)[A-Za-z0-9_]+/g, '$1***')
43
+ .replace(CREDENTIAL_ASSIGNMENT, '$1***');
44
+ for (const secret of knownSecrets) {
45
+ // Guard against scrubbing trivially-short values that would mangle output.
46
+ if (secret.length >= MIN_REDACT_LEN)
47
+ out = out.split(secret).join('***');
48
+ }
49
+ return out;
50
+ }
51
+ /** Pattern-only redaction (no known values). Kept for callers without a secret list. */
52
+ export function redactSecrets(input) {
53
+ return redact(input);
54
+ }
55
+ /** Cap on captured command output kept on an infra record (tail-biased — failures show last). */
56
+ export const MAX_CAPTURED_OUTPUT_CHARS = 16_000;
57
+ /**
58
+ * Combine, redact and tail-bound captured stdout+stderr into a single stored string. Keeps
59
+ * the LAST {@link MAX_CAPTURED_OUTPUT_CHARS} (where a failure's error lives), prefixed with a
60
+ * truncation marker when trimmed. Returns undefined for empty output so a record stays sparse.
61
+ * Shared by the docker-compose and the frontend UI-test stand-ups.
62
+ */
63
+ export function captureRedactedOutput(stdout, stderr) {
64
+ const merged = [String(stdout ?? ''), String(stderr ?? '')]
65
+ .map((s) => s.trim())
66
+ .filter(Boolean)
67
+ .join('\n');
68
+ if (!merged)
69
+ return undefined;
70
+ const redacted = redactSecrets(merged);
71
+ if (redacted.length <= MAX_CAPTURED_OUTPUT_CHARS)
72
+ return redacted;
73
+ return `…(${redacted.length - MAX_CAPTURED_OUTPUT_CHARS} earlier chars trimmed)\n${redacted.slice(-MAX_CAPTURED_OUTPUT_CHARS)}`;
74
+ }
75
+ /** Recursively harvest token-like string leaves from a parsed JSON value. */
76
+ function collectStrings(value, out) {
77
+ if (typeof value === 'string') {
78
+ if (value.length >= MIN_HARVEST_LEN)
79
+ out.add(value);
80
+ }
81
+ else if (Array.isArray(value)) {
82
+ for (const v of value)
83
+ collectStrings(v, out);
84
+ }
85
+ else if (value && typeof value === 'object') {
86
+ for (const v of Object.values(value))
87
+ collectStrings(v, out);
88
+ }
89
+ }
90
+ /**
91
+ * The set of secret strings to scrub from a run's stderr/output. For Claude (and the
92
+ * Anthropic-compatible vendors GLM/Kimi/DeepSeek) the credential IS the token string,
93
+ * so the whole-string entry covers it. For Codex the credential is a whole `auth.json`
94
+ * blob, so we ALSO scrub every string value parsed out of it (access/refresh tokens,
95
+ * ids): a token echoed on its OWN — not as part of the whole blob — would otherwise
96
+ * slip past a whole-blob-only match and leak into an error message.
97
+ */
98
+ export function secretsToRedact(subscriptionToken) {
99
+ const secrets = new Set();
100
+ if (subscriptionToken)
101
+ secrets.add(subscriptionToken);
102
+ try {
103
+ collectStrings(JSON.parse(subscriptionToken), secrets);
104
+ }
105
+ catch {
106
+ // Not JSON (a Claude OAuth token / API key) — the whole-string entry covers it.
107
+ }
108
+ return [...secrets];
109
+ }
package/dist/runner.js ADDED
@@ -0,0 +1,228 @@
1
+ import { redactSecrets } from './redact.js';
2
+ import { log } from './logger.js';
3
+ import { failureCauseOf, inactivityAbortMessage, maxDurationAbortMessage, } from './failure.js';
4
+ function intEnv(value, fallback) {
5
+ const n = value ? Number(value) : NaN;
6
+ return Number.isFinite(n) && n > 0 ? n : fallback;
7
+ }
8
+ export function loadRunnerLimits(env = process.env) {
9
+ return {
10
+ // 60 minutes: generous headroom for serious multi-file coding tasks while
11
+ // still bounding a runaway container.
12
+ maxDurationMs: intEnv(env.JOB_MAX_DURATION_MS, 60 * 60_000),
13
+ // 10 minutes of zero output is treated as hung (a single long LLM/tool call
14
+ // is far shorter; Pi streams events as it works). The per-git command ceiling
15
+ // (`GIT_TIMEOUT_MS` in git.ts) is DERIVED from this value — a fixed margin below
16
+ // it — so a slow clone/push (which emits no activity events) always times out
17
+ // with git's own clear reason rather than this watchdog's "likely hung" message,
18
+ // for any configured window. See the invariant note in git.ts.
19
+ inactivityMs: intEnv(env.JOB_INACTIVITY_MS, 10 * 60_000),
20
+ };
21
+ }
22
+ function toView(entry) {
23
+ const { promise: _promise, spanBuffer: _spanBuffer, followUpBuffer: _followUpBuffer, ...view } = entry;
24
+ return { ...view };
25
+ }
26
+ /**
27
+ * Tracks background jobs by id. Keyed by the backend-supplied job id (the per-step
28
+ * job id) so a re-dispatched start re-attaches to the running job rather than starting
29
+ * a duplicate — which keeps the durable driver's retries idempotent and avoids redoing
30
+ * already-running work. Generic over the job/result shape so the same lifecycle +
31
+ * inactivity/max-duration watchdogs drive every agent run.
32
+ */
33
+ export class JobRegistry {
34
+ limits;
35
+ run;
36
+ describe;
37
+ jobs = new Map();
38
+ constructor(limits,
39
+ // The unit of work (the `agent` handler). Injectable so tests can drive the
40
+ // registry's lifecycle/watchdog logic with a different runner.
41
+ run,
42
+ // Non-secret correlation fields to bind on the per-job logger (repo, branch, agentKind).
43
+ // The registry is generic over the job shape, so the kind supplies this extractor; the
44
+ // job id is always bound. Defaults to no extra fields.
45
+ describe = () => ({})) {
46
+ this.limits = limits;
47
+ this.run = run;
48
+ this.describe = describe;
49
+ }
50
+ /** Start the job for `id`, or return the existing one (idempotent re-attach). */
51
+ start(id, job) {
52
+ const existing = this.jobs.get(id);
53
+ if (existing)
54
+ return toView(existing);
55
+ const now = Date.now();
56
+ const entry = {
57
+ id,
58
+ state: 'running',
59
+ startedAt: now,
60
+ // Seed the live phase so a poll BEFORE the handler enters its first phase still
61
+ // shows "starting" (the container is up; the agent hasn't begun cloning yet)
62
+ // rather than an absent/blank phase.
63
+ phase: 'starting',
64
+ heartbeatAt: now,
65
+ promise: Promise.resolve(),
66
+ spanBuffer: [],
67
+ followUpBuffer: [],
68
+ };
69
+ this.jobs.set(id, entry);
70
+ entry.promise = this.drive(entry, job);
71
+ return toView(entry);
72
+ }
73
+ /**
74
+ * Poll the job — and DRAIN its tool-span buffer (drain-on-read). The GET /jobs/{id}
75
+ * handler is the sole caller, so each poll returns the spans accumulated since the
76
+ * previous poll and clears them, bounding the harness buffer to one poll interval.
77
+ */
78
+ get(id) {
79
+ const entry = this.jobs.get(id);
80
+ if (!entry)
81
+ return undefined;
82
+ const view = toView(entry);
83
+ if (entry.spanBuffer.length > 0) {
84
+ view.spans = entry.spanBuffer;
85
+ entry.spanBuffer = [];
86
+ }
87
+ if (entry.followUpBuffer.length > 0) {
88
+ view.followUps = entry.followUpBuffer;
89
+ entry.followUpBuffer = [];
90
+ }
91
+ return view;
92
+ }
93
+ async drive(entry, job) {
94
+ const controller = new AbortController();
95
+ let killReason;
96
+ const jobLog = log.child({ jobId: entry.id, ...this.describe(job) });
97
+ // Stuck-run breadcrumb: the coarse phase the handler is in, per-phase wall-clock, and
98
+ // the last completed tool — so an inactivity kill can say WHERE it hung instead of a
99
+ // bare "likely hung", and the finish/fail log carries the phase-timing breakdown.
100
+ let phase = 'starting';
101
+ let phaseEnteredAt = Date.now();
102
+ const phaseTimingsMs = {};
103
+ const markPhase = (next) => {
104
+ const now = Date.now();
105
+ phaseTimingsMs[phase] = (phaseTimingsMs[phase] ?? 0) + (now - phaseEnteredAt);
106
+ phase = next;
107
+ phaseEnteredAt = now;
108
+ // Surface the live phase on the view so a poll shows WHAT the container is doing
109
+ // (cloning / running the agent / pushing) — the same marker drives the failure
110
+ // breadcrumb. A terminal `done`/`failed` is set by the caller below.
111
+ entry.phase = next;
112
+ };
113
+ let lastTool;
114
+ let inactivity;
115
+ const resetInactivity = () => {
116
+ clearTimeout(inactivity);
117
+ inactivity = setTimeout(() => {
118
+ // First watchdog to fire wins the reason (a later timer firing in the
119
+ // teardown window must not relabel why the job was killed).
120
+ killReason ??= 'inactivity';
121
+ controller.abort(new Error('inactivity timeout'));
122
+ }, this.limits.inactivityMs);
123
+ };
124
+ const cap = setTimeout(() => {
125
+ killReason ??= 'max-duration';
126
+ controller.abort(new Error('max duration exceeded'));
127
+ }, this.limits.maxDurationMs);
128
+ const heartbeat = () => {
129
+ entry.heartbeatAt = Date.now();
130
+ resetInactivity();
131
+ };
132
+ resetInactivity();
133
+ jobLog.info('job started', {});
134
+ try {
135
+ const result = await this.run(job, {
136
+ signal: controller.signal,
137
+ onActivity: heartbeat,
138
+ onProgress: (progress) => {
139
+ entry.progress = progress;
140
+ },
141
+ onSpan: (span) => {
142
+ entry.spanBuffer.push(span);
143
+ lastTool = { name: span.tool, at: span.endedAt };
144
+ },
145
+ onFollowUp: (items) => {
146
+ entry.followUpBuffer.push(...items);
147
+ },
148
+ onPhase: (next) => markPhase(next),
149
+ log: jobLog,
150
+ });
151
+ markPhase('done');
152
+ entry.state = 'done';
153
+ entry.result = result;
154
+ // A clean-exit result can still be a failure (e.g. no usable output): carry its
155
+ // structured cause onto the view so the backend classifies it without regex.
156
+ if (result.error && result.failureCause)
157
+ entry.failureCause = result.failureCause;
158
+ jobLog.info('job finished', {
159
+ durationMs: Date.now() - entry.startedAt,
160
+ jobError: result.error ?? null,
161
+ phaseTimingsMs,
162
+ });
163
+ }
164
+ catch (error) {
165
+ // Capture the phase the job was IN before recording the 'failed' transition, so the
166
+ // breadcrumb names where it hung (markPhase below would otherwise overwrite it).
167
+ const failedInPhase = phase;
168
+ markPhase('failed');
169
+ const { message, cause, detail } = this.describeFailure(killReason, error, failedInPhase, lastTool, phaseTimingsMs);
170
+ entry.state = 'failed';
171
+ entry.error = message;
172
+ entry.failureCause = cause;
173
+ entry.detail = detail;
174
+ jobLog.error('job failed', {
175
+ durationMs: Date.now() - entry.startedAt,
176
+ reason: killReason ?? 'error',
177
+ failureCause: cause,
178
+ error: message,
179
+ phaseTimingsMs,
180
+ });
181
+ }
182
+ finally {
183
+ clearTimeout(inactivity);
184
+ clearTimeout(cap);
185
+ entry.heartbeatAt = Date.now();
186
+ }
187
+ }
188
+ /**
189
+ * Build the redacted one-line `error`, the structured {@link FailureCause}, and the extended
190
+ * `detail` for a failed job. Watchdog kills keep their regex-stable phrase (so the backend's
191
+ * `classifyBootstrapFailure` fallback still works) and gain a breadcrumb of where they hung;
192
+ * a thrown error keeps its own message and its structured cause when tagged (a git op → `git`,
193
+ * an upstream API call → `api`), else `agent`. All strings are credential-scrubbed.
194
+ */
195
+ describeFailure(killReason, error, phase, lastTool, phaseTimingsMs) {
196
+ // `lastTool` is the last tool that COMPLETED (a span is emitted on tool end), so when the
197
+ // hang is inside a still-running tool the breadcrumb points at the prior one — worded
198
+ // "last completed tool" so the reader knows the stuck call may be the next, unfinished one.
199
+ const breadcrumb = lastTool
200
+ ? `last completed tool ${lastTool.name} ${Math.round((Date.now() - lastTool.at) / 1000)}s ago`
201
+ : 'no tool had completed yet';
202
+ const phaseBreakdown = Object.entries(phaseTimingsMs)
203
+ .map(([p, ms]) => `${p}=${Math.round(ms / 1000)}s`)
204
+ .join(', ');
205
+ if (killReason === 'inactivity') {
206
+ return {
207
+ message: redactSecrets(`${inactivityAbortMessage(this.limits.inactivityMs)} (likely hung in ${phase} phase; ${breadcrumb})`),
208
+ cause: 'inactivity-timeout',
209
+ detail: redactSecrets(`Phase timings: ${phaseBreakdown || '(none)'}. ${breadcrumb}.`),
210
+ };
211
+ }
212
+ if (killReason === 'max-duration') {
213
+ return {
214
+ message: redactSecrets(maxDurationAbortMessage(this.limits.maxDurationMs)),
215
+ cause: 'max-duration',
216
+ detail: redactSecrets(`Phase timings: ${phaseBreakdown || '(none)'}. ${breadcrumb}.`),
217
+ };
218
+ }
219
+ const raw = error instanceof Error ? error.message : String(error);
220
+ // A thrown error tagged with a structured cause (a git op / an upstream API call) keeps
221
+ // it; an untagged throw is a generic agent failure.
222
+ return {
223
+ message: redactSecrets(raw),
224
+ cause: failureCauseOf(error) ?? 'agent',
225
+ detail: redactSecrets(`${phaseBreakdown ? `Phase timings: ${phaseBreakdown}. ` : ''}Failed in ${phase} phase; ${breadcrumb}.`),
226
+ };
227
+ }
228
+ }
package/dist/server.js ADDED
@@ -0,0 +1,135 @@
1
+ import { timingSafeEqual } from 'node:crypto';
2
+ import { createServer } from 'node:http';
3
+ import { parseAgentJob } from './job.js';
4
+ import { handleAgent } from './agent.js';
5
+ import { redactSecrets } from './git.js';
6
+ import { JobRegistry, loadRunnerLimits } from './runner.js';
7
+ import { log } from './logger.js';
8
+ // The container's HTTP entry point. The Worker addresses one instance per run and
9
+ // POSTs a job to /jobs (the body's `kind` selects which agent runs); the harness
10
+ // starts that job in the background (bounded by an inactivity + max-duration
11
+ // watchdog) and returns a job id, which the Worker then polls via GET /jobs/{id}.
12
+ // Nothing here holds long-lived secrets: the per-job GitHub + proxy tokens arrive
13
+ // in the request body and live only for the duration of the job in an ephemeral
14
+ // workspace.
15
+ const PORT = Number(process.env.PORT ?? 8080);
16
+ // Optional inbound auth. When HARNESS_SHARED_SECRET is set, every non-health
17
+ // request must present a matching `x-harness-secret` header (constant-time
18
+ // compared). When it is unset the harness behaves as before (open), so local/dev
19
+ // and the existing acceptance flow keep working without configuration.
20
+ // The direct callers send the matching header when the secret is configured: the
21
+ // local Docker transport (LocalContainerRunnerTransport) and the Cloudflare
22
+ // transport (CloudflareContainerTransport, which also injects the secret into the
23
+ // container env). A self-hosted runner pool reaches the harness through its own
24
+ // control plane, so its operator configures the secret pool-side.
25
+ const SHARED_SECRET = process.env.HARNESS_SHARED_SECRET;
26
+ const HEADER = 'x-harness-secret';
27
+ /** Constant-time check of the shared-secret header; true when auth is disabled. */
28
+ function authorized(req) {
29
+ if (!SHARED_SECRET)
30
+ return true;
31
+ const provided = req.headers[HEADER];
32
+ const got = Buffer.from(Array.isArray(provided) ? (provided[0] ?? '') : (provided ?? ''));
33
+ const want = Buffer.from(SHARED_SECRET);
34
+ // Length check first; timingSafeEqual requires equal-length buffers.
35
+ return got.length === want.length && timingSafeEqual(got, want);
36
+ }
37
+ // One registry per kind per container process. A run addresses its own container
38
+ // instance (one Durable Object id per execution / bootstrap job) and dispatches its
39
+ // sequence of step jobs to it; every kind shares the same watchdog/lifecycle but
40
+ // produces a different result, so each gets its own registry keyed by the job id.
41
+ const limits = loadRunnerLimits();
42
+ /** Pair a body validator with a registry running its handler under the shared limits. */
43
+ function defineKind(parse, handler,
44
+ // Non-secret correlation fields bound on the per-job logger (see JobRegistry.describe).
45
+ describe) {
46
+ return {
47
+ parse,
48
+ registry: new JobRegistry(limits, handler, describe),
49
+ };
50
+ }
51
+ // The dispatch table. The harness now serves a SINGLE, manifest-driven kind: the
52
+ // generic `agent` (the job body's `mode` — explore | coding — and its data select the
53
+ // flow; WHAT the agent does is decided entirely by the backend). The per-kind bespoke
54
+ // handlers (run/blueprint/spec/explore/merge/test/…) were strangled onto this one kind
55
+ // and removed. A `POST /jobs` reads the body's `kind` to pick the entry; `GET /jobs/{id}`
56
+ // checks every registry (job ids never collide across kinds). `kind` mirrors kernel's
57
+ // `RunnerDispatchKind` (now also just `'agent'`); the harness keeps its own copy so the
58
+ // image carries no runtime deps.
59
+ const KINDS = {
60
+ agent: defineKind(parseAgentJob, handleAgent, (job) => ({
61
+ mode: job.mode,
62
+ repo: `${job.repo.owner}/${job.repo.name}`,
63
+ branch: job.branch,
64
+ })),
65
+ };
66
+ async function readBody(req) {
67
+ const chunks = [];
68
+ for await (const chunk of req)
69
+ chunks.push(chunk);
70
+ return Buffer.concat(chunks).toString('utf8');
71
+ }
72
+ function send(res, status, body) {
73
+ const payload = JSON.stringify(body);
74
+ res.writeHead(status, { 'content-type': 'application/json' });
75
+ res.end(payload);
76
+ }
77
+ const server = createServer((req, res) => {
78
+ void (async () => {
79
+ if (req.method === 'GET' && req.url === '/health') {
80
+ return send(res, 200, { status: 'ok' });
81
+ }
82
+ // All non-health endpoints are gated by the optional shared secret.
83
+ if (!authorized(req)) {
84
+ return send(res, 401, { error: 'unauthorized' });
85
+ }
86
+ // Poll a running/finished job: GET /jobs/{id}. Job ids are unique per kind, so
87
+ // check each registry in turn; the first hit wins.
88
+ if (req.method === 'GET' && req.url?.startsWith('/jobs/')) {
89
+ const id = decodeURIComponent(req.url.slice('/jobs/'.length));
90
+ for (const { registry } of Object.values(KINDS)) {
91
+ const view = registry.get(id);
92
+ if (view)
93
+ return send(res, 200, view);
94
+ }
95
+ return send(res, 404, { error: 'job not found' });
96
+ }
97
+ // Start (or re-attach to) a job: POST /jobs with the kind in the body. The body's
98
+ // `kind` selects the validator + registry; the rest is that kind's job spec.
99
+ // Returns immediately with the job id; the caller polls GET /jobs/{id} for live
100
+ // subtask progress and the final result. Idempotent: a re-dispatched POST
101
+ // (a durable-driver replay) re-attaches to the job already running for the id
102
+ // rather than starting a duplicate.
103
+ if (req.method === 'POST' && req.url === '/jobs') {
104
+ let kind;
105
+ try {
106
+ const raw = JSON.parse(await readBody(req));
107
+ kind = raw.kind;
108
+ const entry = typeof kind === 'string' ? KINDS[kind] : undefined;
109
+ if (!entry) {
110
+ return send(res, 404, { error: `unknown job kind '${String(kind)}'` });
111
+ }
112
+ const job = entry.parse(raw);
113
+ const view = entry.registry.start(job.jobId, job);
114
+ return send(res, 202, { jobId: view.id, state: view.state });
115
+ }
116
+ catch (error) {
117
+ // Parse failures (incl. host-allowlist rejection) are client errors → 400.
118
+ const message = redactSecrets(error instanceof Error ? error.message : String(error));
119
+ log.error('failed to start job', {
120
+ kind: typeof kind === 'string' ? kind : undefined,
121
+ error: message,
122
+ });
123
+ return send(res, 400, { error: message });
124
+ }
125
+ }
126
+ return send(res, 404, { error: 'not found' });
127
+ })();
128
+ });
129
+ // Only auto-listen when run as the entry point (tests import handleRun directly).
130
+ if (process.env.NODE_ENV !== 'test') {
131
+ server.listen(PORT, () => {
132
+ console.log(`executor-harness listening on :${PORT}`);
133
+ });
134
+ }
135
+ export { server };