@cat-factory/executor-harness 1.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +143 -0
  3. package/dist/agent-runner.js +389 -0
  4. package/dist/agent.js +810 -0
  5. package/dist/blueprint.js +367 -0
  6. package/dist/bootstrap.js +99 -0
  7. package/dist/ci-fixer.js +46 -0
  8. package/dist/coding-agent.js +285 -0
  9. package/dist/conflict-resolver.js +138 -0
  10. package/dist/embed.js +8 -0
  11. package/dist/explore.js +74 -0
  12. package/dist/failure.js +47 -0
  13. package/dist/fixer.js +44 -0
  14. package/dist/follow-ups.js +103 -0
  15. package/dist/frontend-infra.js +283 -0
  16. package/dist/fs-utils.js +11 -0
  17. package/dist/git.js +778 -0
  18. package/dist/job.js +409 -0
  19. package/dist/logger.js +27 -0
  20. package/dist/merger.js +135 -0
  21. package/dist/on-call.js +126 -0
  22. package/dist/pi-workspace.js +237 -0
  23. package/dist/pi.js +971 -0
  24. package/dist/process.js +25 -0
  25. package/dist/redact.js +109 -0
  26. package/dist/runner.js +228 -0
  27. package/dist/server.js +135 -0
  28. package/dist/spec.js +754 -0
  29. package/dist/structured-output.js +431 -0
  30. package/dist/tester.js +191 -0
  31. package/package.json +35 -0
  32. package/src/agent-runner.ts +484 -0
  33. package/src/agent.ts +948 -0
  34. package/src/coding-agent.ts +393 -0
  35. package/src/embed.ts +32 -0
  36. package/src/failure.ts +73 -0
  37. package/src/follow-ups.ts +106 -0
  38. package/src/frontend-infra.ts +340 -0
  39. package/src/fs-utils.ts +11 -0
  40. package/src/git.ts +955 -0
  41. package/src/job.ts +766 -0
  42. package/src/logger.ts +45 -0
  43. package/src/pi-workspace.ts +348 -0
  44. package/src/pi.ts +1236 -0
  45. package/src/process.ts +33 -0
  46. package/src/redact.ts +109 -0
  47. package/src/runner.ts +384 -0
  48. package/src/server.ts +153 -0
  49. package/src/structured-output.ts +524 -0
package/src/logger.ts ADDED
@@ -0,0 +1,45 @@
1
+ // Minimal zero-dependency structured logger. The container image installs no npm
2
+ // packages at runtime (see Dockerfile — it compiles the TS against standalone
3
+ // typescript/@types/node and ships only Node built-ins + the global Pi CLI), so
4
+ // pino can't live here. This emits pino-shaped JSON lines (level/time/msg +
5
+ // fields) which the platform captures from stdout/stderr. The Worker uses pino.
6
+
7
+ type Level = 'debug' | 'info' | 'warn' | 'error'
8
+ type Fields = Record<string, unknown>
9
+
10
+ function emit(level: Level, msg: string, bound: Fields, fields?: Fields): void {
11
+ // Bound (per-job context) fields first so a call-site field can override a bound one; the
12
+ // envelope keys (level/time/msg) go LAST so neither bound nor call-site fields can corrupt
13
+ // them — a stray field named `level` must never disagree with the stream the line routes to.
14
+ const line = JSON.stringify({ ...bound, ...fields, level, time: new Date().toISOString(), msg })
15
+ // Errors/warnings to stderr, everything else to stdout — mirrors pino routing.
16
+ if (level === 'error' || level === 'warn') process.stderr.write(`${line}\n`)
17
+ else process.stdout.write(`${line}\n`)
18
+ }
19
+
20
+ /** The logging surface: the four levels plus `child` to bind correlation fields once. */
21
+ export interface Logger {
22
+ debug: (msg: string, fields?: Fields) => void
23
+ info: (msg: string, fields?: Fields) => void
24
+ warn: (msg: string, fields?: Fields) => void
25
+ error: (msg: string, fields?: Fields) => void
26
+ /**
27
+ * Return a logger that merges `bound` into every line (e.g. `{ jobId, repo, branch }`),
28
+ * so a per-job logger carries its correlation context without each call site re-spreading
29
+ * it. Nestable — the returned logger's own `child` accumulates onto these bound fields.
30
+ */
31
+ child: (bound: Fields) => Logger
32
+ }
33
+
34
+ /** Build a logger whose every emit folds in `bound`. The root logger binds nothing. */
35
+ function makeLogger(bound: Fields): Logger {
36
+ return {
37
+ debug: (msg, fields) => emit('debug', msg, bound, fields),
38
+ info: (msg, fields) => emit('info', msg, bound, fields),
39
+ warn: (msg, fields) => emit('warn', msg, bound, fields),
40
+ error: (msg, fields) => emit('error', msg, bound, fields),
41
+ child: (extra) => makeLogger({ ...bound, ...extra }),
42
+ }
43
+ }
44
+
45
+ export const log: Logger = makeLogger({})
@@ -0,0 +1,348 @@
1
+ import { mkdir, mkdtemp, rm } from 'node:fs/promises'
2
+ import { tmpdir } from 'node:os'
3
+ import { join } from 'node:path'
4
+ import type { RepoSpec } from './job.js'
5
+ import {
6
+ type ContextFileInfo,
7
+ type PiRunOutcome,
8
+ type PiRunStats,
9
+ type ProgressGuardLimits,
10
+ type RunDiagnostics,
11
+ CONTEXT_DIR,
12
+ materializeContextFiles,
13
+ mergeGuardLimits,
14
+ progressGuardLimitsFromEnv,
15
+ runPi,
16
+ webSearchConfigFromEnv,
17
+ webSearchProxyEnv,
18
+ writeAgentsContext,
19
+ writePiModelsConfig,
20
+ writeWebToolsConfig,
21
+ } from './pi.js'
22
+ import type { RunOptions } from './runner.js'
23
+ import { type SubscriptionHarness, runSubscriptionHarness } from './agent-runner.js'
24
+
25
+ /** Which container harness runs an agent (the default Pi, or a subscription CLI). */
26
+ export type HarnessKind = 'pi' | SubscriptionHarness
27
+
28
+ // The thin base every container agent shares: an ephemeral working directory, and
29
+ // one Pi run inside it driven by the harness-written context. The agents differ in
30
+ // how the directory is prepared (clone a branch, scaffold from scratch, read files
31
+ // to build the prompt) and what they do with the result (push a branch, open a PR,
32
+ // render files, return JSON) — but the middle (write AGENTS.md + provider config,
33
+ // run Pi, tear the workspace down) is identical, so it lives here once. Carries no
34
+ // secrets beyond the call: the per-job tokens arrive in the spec and are gone when
35
+ // the workspace is removed.
36
+
37
+ /**
38
+ * Run `fn` against a fresh temp working directory, always removing it afterwards
39
+ * (even on throw). `prefix` labels the directory (e.g. 'impl', 'merge').
40
+ */
41
+ export async function withWorkspace<T>(
42
+ prefix: string,
43
+ fn: (dir: string) => Promise<T>,
44
+ ): Promise<T> {
45
+ const dir = await mkdtemp(join(tmpdir(), `${prefix}-`))
46
+ try {
47
+ return await fn(dir)
48
+ } finally {
49
+ await rm(dir, { recursive: true, force: true })
50
+ }
51
+ }
52
+
53
+ /**
54
+ * The PERSISTENT-checkout root in a reused (pooled) container — a stable per-repo
55
+ * directory that survives across jobs so a new run can `git fetch` + switch branch
56
+ * instead of cloning from scratch. Only the local warm-pool transport activates this
57
+ * (by setting `persistentCheckout` on the job); every other runtime uses the ephemeral
58
+ * {@link withWorkspace} path, so this code is dormant there.
59
+ */
60
+ function persistentWorkspaceRoot(): string {
61
+ return process.env.HARNESS_WORKSPACE_ROOT?.trim() || '/workspace'
62
+ }
63
+
64
+ /** Sanitise an owner/name path segment so a repo identity can never escape the root. */
65
+ function safeSegment(value: string): string {
66
+ return value.replace(/[^A-Za-z0-9._-]/g, '-') || '_'
67
+ }
68
+
69
+ // A per-directory async mutex: two jobs that land in the same container share ONE
70
+ // persistent checkout, so they must not mutate its working tree concurrently. The
71
+ // engine runs a run's steps sequentially, so contention is rare — this is correctness
72
+ // insurance (and keeps a stray concurrent dispatch from corrupting the tree).
73
+ const dirLocks = new Map<string, Promise<void>>()
74
+ async function withDirLock<T>(dir: string, fn: () => Promise<T>): Promise<T> {
75
+ const prev = dirLocks.get(dir) ?? Promise.resolve()
76
+ let release!: () => void
77
+ const current = new Promise<void>((resolve) => {
78
+ release = resolve
79
+ })
80
+ // Store the SAME promise we await on for cleanup-identity. (Storing `prev.then(...)`
81
+ // instead would make the tail check below — `=== tail` — never match, so the entry
82
+ // would never be deleted and the map would grow without bound.)
83
+ const tail = prev.then(() => current)
84
+ dirLocks.set(dir, tail)
85
+ await prev.catch(() => {})
86
+ try {
87
+ return await fn()
88
+ } finally {
89
+ release()
90
+ // Drop the entry once we're the tail (no later caller has queued behind us), so the
91
+ // map doesn't grow unbounded across distinct repo dirs.
92
+ if (dirLocks.get(dir) === tail) dirLocks.delete(dir)
93
+ }
94
+ }
95
+
96
+ /**
97
+ * Run `fn` against a STABLE per-repo working directory (`<root>/<owner>/<repo>`) that is
98
+ * NOT removed afterwards — the persistent-checkout analogue of {@link withWorkspace}. The
99
+ * caller (via `prepareExistingCheckout`) clean-sweeps + fetches the dir into the right
100
+ * state before use; serialised per dir so concurrent jobs can't corrupt the shared tree.
101
+ */
102
+ export async function withPersistentWorkspace<T>(
103
+ repo: RepoSpec,
104
+ fn: (dir: string) => Promise<T>,
105
+ ): Promise<T> {
106
+ const dir = join(persistentWorkspaceRoot(), safeSegment(repo.owner), safeSegment(repo.name))
107
+ return withDirLock(dir, async () => {
108
+ await mkdir(dir, { recursive: true })
109
+ return fn(dir)
110
+ })
111
+ }
112
+
113
+ /**
114
+ * Acquire a working directory for a run: a STABLE, reused per-repo checkout when the job
115
+ * opted into persistent checkout (the warm-pool path), else a fresh ephemeral temp dir
116
+ * (every other runtime). The two flows differ ONLY in dir lifecycle — the caller populates
117
+ * the dir (clone vs `prepareExistingCheckout`) itself, so it can keep its flow-specific
118
+ * resume / full-clone / branch logic.
119
+ */
120
+ export async function acquireRepoCheckout<T>(
121
+ opts: { persistent: boolean; prefix: string; repo: RepoSpec },
122
+ fn: (dir: string) => Promise<T>,
123
+ ): Promise<T> {
124
+ if (opts.persistent) return withPersistentWorkspace(opts.repo, fn)
125
+ return withWorkspace(opts.prefix, fn)
126
+ }
127
+
128
+ /** What every agent needs to drive Pi against an already-prepared directory. */
129
+ export interface AgentRunSpec {
130
+ /** The prepared working directory (cloned/scaffolded by the caller). */
131
+ dir: string
132
+ /** Composed role + best-practice fragments; written to Pi's global AGENTS.md context. */
133
+ systemPrompt: string
134
+ /** The concrete task prompt handed to Pi. */
135
+ userPrompt: string
136
+ model: string
137
+ /**
138
+ * Which harness runs the agent. Absent ⇒ the default Pi harness (proxy +
139
+ * sessionToken). For `claude-code` / `codex`, `subscriptionToken` carries the
140
+ * leased credential and the proxy fields are unused (the CLI talks direct).
141
+ */
142
+ harness?: HarnessKind
143
+ /** The leased subscription credential for `claude-code` / `codex`. */
144
+ subscriptionToken?: string
145
+ /** Anthropic-compatible base URL for a non-Anthropic Claude-Code vendor (GLM/Kimi). */
146
+ subscriptionBaseUrl?: string
147
+ /**
148
+ * Native local execution: run the developer's installed `claude` / `codex` with its
149
+ * OWN ambient login instead of a leased credential. Set only by the local native
150
+ * transport; a no-op for the Pi harness.
151
+ */
152
+ ambientAuth?: boolean
153
+ /** Pi proxy base URL (Pi harness only). */
154
+ proxyBaseUrl?: string
155
+ /** Pi proxy session token (Pi harness only). */
156
+ sessionToken?: string
157
+ /**
158
+ * For a monorepo service, the subdirectory (relative to the repo root) this run
159
+ * operates within — `spec.dir` already points there. Surfaced to the agent in
160
+ * AGENTS.md so it knows it's in a monorepo and where its service lives. Absent ⇒
161
+ * whole-repo run (no monorepo note).
162
+ */
163
+ serviceDirectory?: string
164
+ /**
165
+ * Whether this run is expected to edit files. Defaults to true; set false for
166
+ * assess-only runs (the merger) so the no-progress guard's no-edit bound — which
167
+ * would otherwise fire on a run that correctly makes zero edits — is skipped.
168
+ */
169
+ expectsEdits?: boolean
170
+ /**
171
+ * Per-knob overrides for the progress guard, set by the backend per agent kind (it
172
+ * only LOOSENS limits, never tightens). Each present knob overrides the env/default;
173
+ * absent knobs keep {@link progressGuardLimitsFromEnv}. Absent ⇒ env/default for all.
174
+ */
175
+ guardLimits?: Partial<ProgressGuardLimits>
176
+ /**
177
+ * Per-kind web-search guidance composed by the backend (so it can speak to what
178
+ * this agent kind does). Surfaced in AGENTS.md only when web search is configured;
179
+ * absent ⇒ the generic blurb is used. See `writeAgentsContext`.
180
+ */
181
+ webToolsGuidance?: string
182
+ /**
183
+ * Linked-context files the backend prepared (requirements / RFCs / PRDs / tracker
184
+ * issues). Materialised into CONTEXT_DIR in the checkout before the run and pointed at
185
+ * from AGENTS.md, so the agent reads them on demand. Absent ⇒ none.
186
+ */
187
+ contextFiles?: ContextFileInfo[]
188
+ /**
189
+ * Enable proxy-backed web search: point the rpiv-web-tools SearXNG provider at the
190
+ * backend's search proxy (`${proxyBaseUrl}/web-search`) with the session token as
191
+ * the bearer — so the search runs server-side under the deployment's key and no
192
+ * provider secret reaches the sandbox. Off ⇒ web search is on only if a provider key
193
+ * is present directly in the container env (the self-hosted runner-pool path).
194
+ */
195
+ webSearchProxy?: boolean
196
+ }
197
+
198
+ /**
199
+ * Write Pi's global agent context (`~/.pi/agent/AGENTS.md`) + provider config,
200
+ * then run Pi once in `spec.dir` and return its summary/stats/stderr. The context
201
+ * lives outside the checkout so it never lands in a commit; the shared middle of
202
+ * every container agent.
203
+ */
204
+ export async function runAgentInWorkspace(
205
+ spec: AgentRunSpec,
206
+ opts: RunOptions = {},
207
+ ): Promise<PiRunOutcome> {
208
+ // Materialise any backend-prepared linked context into the checkout up front, so the
209
+ // agent (which can't reach Jira/GitHub) reads it on demand from disk. Shared by both
210
+ // harness paths; kept out of the agent's commits via a local git exclude entry.
211
+ const contextFiles = spec.contextFiles ?? []
212
+ await materializeContextFiles(spec.dir, contextFiles)
213
+
214
+ // Subscription harnesses (Claude Code / Codex) authenticate with the leased
215
+ // token and talk direct to the vendor — no proxy config, no AGENTS.md. The
216
+ // system prompt is passed straight to the CLI; everything around this (clone,
217
+ // push, watchdogs) is unchanged.
218
+ if (spec.harness === 'claude-code' || spec.harness === 'codex') {
219
+ // Ambient (native) mode authenticates with the developer's own CLI login, so no
220
+ // leased token is required; otherwise the leased subscription token is mandatory.
221
+ if (!spec.ambientAuth && !spec.subscriptionToken) {
222
+ throw new Error(`The ${spec.harness} harness requires a subscription token`)
223
+ }
224
+ return runSubscriptionHarness(spec.harness, {
225
+ cwd: spec.dir,
226
+ model: spec.model,
227
+ systemPrompt: subscriptionSystemPrompt(spec.systemPrompt, contextFiles),
228
+ userPrompt: spec.userPrompt,
229
+ ...(spec.subscriptionToken ? { subscriptionToken: spec.subscriptionToken } : {}),
230
+ subscriptionBaseUrl: spec.subscriptionBaseUrl,
231
+ ...(spec.ambientAuth ? { ambientAuth: true } : {}),
232
+ signal: opts.signal,
233
+ onActivity: opts.onActivity,
234
+ onProgress: opts.onProgress,
235
+ })
236
+ }
237
+ if (!spec.proxyBaseUrl || !spec.sessionToken) {
238
+ throw new Error('The Pi harness requires proxyBaseUrl and sessionToken')
239
+ }
240
+ const proxyBaseUrl = spec.proxyBaseUrl
241
+ const sessionToken = spec.sessionToken
242
+ // Opt-in web search/fetch (rpiv-web-tools). Two ways it turns on, both no-ops by
243
+ // default:
244
+ // - proxy-backed (the Cloudflare/managed path): the backend set `webSearchProxy`,
245
+ // so point the SearXNG provider at `${proxyBaseUrl}/web-search` with the session
246
+ // token — the search runs server-side, no provider key in the sandbox.
247
+ // - direct (the self-hosted runner-pool path): a provider key is present in the
248
+ // container env, which `webSearchConfigFromEnv` autodetects.
249
+ // The proxy vars are handed to Pi's child via `extraEnv` (not the harness's own
250
+ // process.env), so detection runs against the same merged view the extension sees.
251
+ const extraEnv: Record<string, string> = spec.webSearchProxy
252
+ ? webSearchProxyEnv(proxyBaseUrl, sessionToken)
253
+ : {}
254
+ const webSearch = webSearchConfigFromEnv({ ...process.env, ...extraEnv })
255
+ if (webSearch) await writeWebToolsConfig(webSearch)
256
+ await writeAgentsContext(spec.systemPrompt, {
257
+ webSearch: Boolean(webSearch),
258
+ guidance: spec.webToolsGuidance,
259
+ serviceDirectory: spec.serviceDirectory,
260
+ contextFiles,
261
+ })
262
+ await writePiModelsConfig({ model: spec.model, proxyBaseUrl })
263
+ const { signal, onActivity, onProgress, onSpan } = opts
264
+ return runPi({
265
+ cwd: spec.dir,
266
+ model: spec.model,
267
+ userPrompt: spec.userPrompt,
268
+ sessionToken,
269
+ signal,
270
+ onActivity,
271
+ onProgress,
272
+ onSpan,
273
+ expectsEdits: spec.expectsEdits ?? true,
274
+ // Start from the env/built-in defaults and apply only the per-knob overrides the
275
+ // backend set for this kind (loosen-only), so an unspecified knob keeps its default.
276
+ guardLimits: mergeGuardLimits(progressGuardLimitsFromEnv(), spec.guardLimits),
277
+ extraEnv,
278
+ })
279
+ }
280
+
281
+ /**
282
+ * Append a pointer to the materialised linked context onto a subscription harness's
283
+ * system prompt. The Pi harness surfaces this via AGENTS.md, but Claude Code / Codex
284
+ * take the system prompt straight, so the note has to ride along here. '' files ⇒ the
285
+ * prompt is returned unchanged.
286
+ */
287
+ function subscriptionSystemPrompt(systemPrompt: string, files: ContextFileInfo[]): string {
288
+ if (!files.length) return systemPrompt
289
+ const list = files.map((f) => `- ${CONTEXT_DIR}/${f.path} — ${f.title}`).join('\n')
290
+ return `${systemPrompt}
291
+
292
+ Linked context (requirements / RFCs / PRDs / tracker issues) for this task is in the
293
+ ${CONTEXT_DIR}/ directory of your checkout — read a file when relevant. Do NOT try to reach
294
+ external systems; everything available is already on disk:
295
+ ${list}`
296
+ }
297
+
298
+ /**
299
+ * True when Pi exited cleanly without a single tool call or token of output — the
300
+ * signature of a run where it never reached the model. Used by every agent's
301
+ * no-op reason to point at the most likely cause (an unreachable proxy / rejected
302
+ * model) rather than a genuine "nothing to do".
303
+ */
304
+ export function agentNeverActed(stats: PiRunStats): boolean {
305
+ return stats.toolCalls === 0 && stats.assistantChars === 0
306
+ }
307
+
308
+ /** The full-sentence "never acted" cause shared by the structured no-op reasons. */
309
+ export const NEVER_ACTED_CAUSE =
310
+ ' The agent never acted (no tool calls, no model output) — it most likely could not reach the model.'
311
+
312
+ /**
313
+ * A human-readable cause when the agent's FINAL answer is unusable — its last turn was
314
+ * cut off at the output ceiling, or carried no text at all (an empty completion) — or
315
+ * `undefined` when the final answer looks fine.
316
+ *
317
+ * This is OPT-IN per agent, never a blanket harness rule. Only agents whose work
318
+ * product is a final text/document the pipeline hands ONWARD to be reviewed or parsed
319
+ * (the spec-writer, the blueprinter) should treat a non-undefined result as a hard
320
+ * failure — for them an empty/cut-off final turn means there is nothing trustworthy to
321
+ * review, which is exactly what drove the spec-writer ⇄ companion rework loop. Agents
322
+ * whose product is a side effect (a pushed PR/commit from the coder or ci-fixer, a
323
+ * self-contained validation) legitimately end with no final text and MUST NOT call this.
324
+ */
325
+ export function unusableFinalAnswerCause(
326
+ diagnostics: RunDiagnostics | undefined,
327
+ ): string | undefined {
328
+ if (!diagnostics) return undefined
329
+ if (diagnostics.finalTruncated) {
330
+ return 'its final answer hit the output-token ceiling and was cut off (raise the limit or narrow the task)'
331
+ }
332
+ if (diagnostics.finalAnswerEmpty) {
333
+ return 'its final turn produced no text (an empty completion), so there is no document to read'
334
+ }
335
+ return undefined
336
+ }
337
+
338
+ /**
339
+ * The credential-scrubbed tail where a no-op's real cause shows up: a slice of Pi's
340
+ * stderr, or — when stderr is empty — a slice of its summary. Empty when neither is
341
+ * present. Shared by every agent's no-op reason so the cause is always diagnosable
342
+ * without shelling into the (ephemeral) container.
343
+ */
344
+ export function agentOutputTail(stderrTail: string | undefined, summary?: string): string {
345
+ if (stderrTail) return ` Agent stderr: ${stderrTail.slice(-700)}`
346
+ if (summary) return ` Agent output: ${summary.slice(0, 700)}`
347
+ return ''
348
+ }