@cat-factory/executor-harness 1.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +143 -0
  3. package/dist/agent-runner.js +389 -0
  4. package/dist/agent.js +810 -0
  5. package/dist/blueprint.js +367 -0
  6. package/dist/bootstrap.js +99 -0
  7. package/dist/ci-fixer.js +46 -0
  8. package/dist/coding-agent.js +285 -0
  9. package/dist/conflict-resolver.js +138 -0
  10. package/dist/embed.js +8 -0
  11. package/dist/explore.js +74 -0
  12. package/dist/failure.js +47 -0
  13. package/dist/fixer.js +44 -0
  14. package/dist/follow-ups.js +103 -0
  15. package/dist/frontend-infra.js +283 -0
  16. package/dist/fs-utils.js +11 -0
  17. package/dist/git.js +778 -0
  18. package/dist/job.js +409 -0
  19. package/dist/logger.js +27 -0
  20. package/dist/merger.js +135 -0
  21. package/dist/on-call.js +126 -0
  22. package/dist/pi-workspace.js +237 -0
  23. package/dist/pi.js +971 -0
  24. package/dist/process.js +25 -0
  25. package/dist/redact.js +109 -0
  26. package/dist/runner.js +228 -0
  27. package/dist/server.js +135 -0
  28. package/dist/spec.js +754 -0
  29. package/dist/structured-output.js +431 -0
  30. package/dist/tester.js +191 -0
  31. package/package.json +35 -0
  32. package/src/agent-runner.ts +484 -0
  33. package/src/agent.ts +948 -0
  34. package/src/coding-agent.ts +393 -0
  35. package/src/embed.ts +32 -0
  36. package/src/failure.ts +73 -0
  37. package/src/follow-ups.ts +106 -0
  38. package/src/frontend-infra.ts +340 -0
  39. package/src/fs-utils.ts +11 -0
  40. package/src/git.ts +955 -0
  41. package/src/job.ts +766 -0
  42. package/src/logger.ts +45 -0
  43. package/src/pi-workspace.ts +348 -0
  44. package/src/pi.ts +1236 -0
  45. package/src/process.ts +33 -0
  46. package/src/redact.ts +109 -0
  47. package/src/runner.ts +384 -0
  48. package/src/server.ts +153 -0
  49. package/src/structured-output.ts +524 -0
@@ -0,0 +1,393 @@
1
+ import { mkdir } from 'node:fs/promises'
2
+ import { join } from 'node:path'
3
+ import type { HarnessAuthFields, RepoSpec } from './job.js'
4
+ import {
5
+ branchHasCommitsSince,
6
+ cloneExistingBranch,
7
+ cloneRepo,
8
+ commitTrackedEdits,
9
+ createBranch,
10
+ excludeFromGit,
11
+ headCommit,
12
+ listUntrackedFiles,
13
+ prepareExistingCheckout,
14
+ pushBranch,
15
+ refreshFromBaseIfClean,
16
+ remoteBranchExists,
17
+ } from './git.js'
18
+ import { FOLLOW_UPS_FILENAME, FollowUpTailer } from './follow-ups.js'
19
+ import type { PiRunStats } from './pi.js'
20
+ import {
21
+ acquireRepoCheckout,
22
+ agentNeverActed,
23
+ agentOutputTail,
24
+ runAgentInWorkspace,
25
+ } from './pi-workspace.js'
26
+ import type { ProgressGuardLimits } from './pi.js'
27
+ import type { RunOptions } from './runner.js'
28
+ import { log } from './logger.js'
29
+
30
+ // The shared skeleton for the container coding agents that clone a repo, run Pi
31
+ // against it and push the result on a branch. The implementation (`/run`) and
32
+ // CI-fixer (`/ci-fix`) agents are conceptually the same job — only what they clone
33
+ // onto and what they do with the outcome differ — so they share this whole flow
34
+ // rather than each re-deriving (and separately bug-fixing) it. Built on the thinner
35
+ // {@link withWorkspace}/{@link runAgentInWorkspace} base shared with the non-pushing
36
+ // agents (bootstrap/blueprint/merger). Mirrors their secret handling: the per-job
37
+ // GitHub + proxy tokens arrive in the spec and live only for the job's duration.
38
+
39
+ /** What a coding agent run needs: where to clone, what to run, where to push. */
40
+ export interface CodingAgentSpec extends HarnessAuthFields {
41
+ /** Short label for the temp dir + log lines (e.g. 'impl', 'ci-fix'). */
42
+ kind: string
43
+ /** The job id, threaded into every log line for end-to-end tracing. */
44
+ jobId: string
45
+ repo: RepoSpec
46
+ /** Branch to clone and check out as the starting point. */
47
+ cloneBranch: string
48
+ /** A fresh branch to create off the clone before running; omit to work directly on `cloneBranch`. */
49
+ newBranch?: string
50
+ /** Branch the produced change is pushed to. */
51
+ pushBranch: string
52
+ ghToken: string
53
+ /** Composed role + best-practice fragments; written to Pi's global AGENTS.md context. */
54
+ systemPrompt: string
55
+ /** The concrete task prompt handed to Pi. */
56
+ userPrompt: string
57
+ model: string
58
+ /** Commit message for any work the agent left uncommitted. */
59
+ commitMessage: string
60
+ /** Per-kind web-search guidance (backend-composed); surfaced only when web search is on. */
61
+ webToolsGuidance?: string
62
+ /** Enable proxy-backed web search for this run (see {@link AgentRunSpec.webSearchProxy}). */
63
+ webSearchProxy?: boolean
64
+ /** Per-knob progress-guard overrides (loosen-only), set per agent kind by the backend. */
65
+ guardLimits?: Partial<ProgressGuardLimits>
66
+ /**
67
+ * Reuse a stable per-repo checkout (clean-sweep + fetch + switch branch) instead of a
68
+ * fresh clone into a throwaway temp dir. Set only by the local warm-pool transport
69
+ * (its containers are reused across runs); absent everywhere else.
70
+ */
71
+ persistentCheckout?: boolean
72
+ /**
73
+ * Tail the Coder's follow-up sentinel file ({@link FOLLOW_UPS_FILENAME}) and stream the
74
+ * forward-looking items it surfaces out on the job view (the Follow-up companion). Set
75
+ * only for the implementer (`coder`) dispatch; absent ⇒ no tailing (e.g. the CI-fixer).
76
+ */
77
+ streamFollowUps?: boolean
78
+ }
79
+
80
+ /** The outcome of a coding agent run, before each caller maps it to its own result shape. */
81
+ export interface CodingAgentOutcome {
82
+ /** Whether the branch carries work and was therefore pushed (new commits, or resumed prior work). */
83
+ pushed: boolean
84
+ /** Whether the run resumed an existing remote branch (prior work already pushed). */
85
+ resumed: boolean
86
+ summary: string
87
+ stats: PiRunStats
88
+ stderrTail?: string
89
+ /** Token usage from a subscription harness's CLI stream (absent for Pi). */
90
+ usage?: { inputTokens: number; outputTokens: number }
91
+ }
92
+
93
+ /**
94
+ * How often the harness checkpoints the agent's work mid-run by pushing the branch.
95
+ * A per-run container can be evicted at any moment; pushing the agent's commits
96
+ * periodically means an evicted run's work survives on the branch, so a retry
97
+ * RESUMES on top of it instead of starting over. Overridable via env for tests.
98
+ */
99
+ function checkpointIntervalMs(): number {
100
+ const n = Number(process.env.JOB_CHECKPOINT_INTERVAL_MS)
101
+ return Number.isFinite(n) && n > 0 ? Math.floor(n) : 60_000
102
+ }
103
+
104
+ /**
105
+ * How often the harness tails the Coder's follow-up sentinel file to surface new items.
106
+ * Short (a few seconds) so the Follow-up companion lights up promptly while the Coder is
107
+ * still running. Overridable via env for tests.
108
+ */
109
+ function followUpPollIntervalMs(): number {
110
+ const n = Number(process.env.JOB_FOLLOWUP_POLL_INTERVAL_MS)
111
+ return Number.isFinite(n) && n > 0 ? Math.floor(n) : 3_000
112
+ }
113
+
114
+ /**
115
+ * Clone (or RESUME an existing branch) → write context → run Pi → push the branch
116
+ * iff it carries work. The agent commits its OWN work (it alone knows which files
117
+ * belong vs scratch/artifacts it created), so the harness never blanket-stages:
118
+ * {@link commitTrackedEdits} is only a safety net for forgotten edits to ALREADY
119
+ * tracked files, and the run is judged a no-op only when the branch never advanced
120
+ * past its pre-run tip ({@link branchHasCommitsSince}). The harness owns push + PR;
121
+ * it checkpoints (pushes) periodically so an evicted run's commits survive and a
122
+ * retry resumes on them. Returns the run's summary/stats, whether it pushed, and
123
+ * whether it resumed; callers decide what to do after a push (open a PR, or nothing).
124
+ */
125
+ export async function runCodingAgent(
126
+ spec: CodingAgentSpec,
127
+ opts: RunOptions = {},
128
+ ): Promise<CodingAgentOutcome> {
129
+ const { signal } = opts
130
+ // The registry already binds jobId/repo/branch; add the coding kind + the push branch
131
+ // (which differs from the cloned branch the registry bound).
132
+ const logger = (opts.log ?? log).child({ kind: spec.kind, branch: spec.pushBranch })
133
+ return acquireRepoCheckout(
134
+ { persistent: spec.persistentCheckout === true, prefix: spec.kind, repo: spec.repo },
135
+ async (dir) => {
136
+ // Resume an evicted earlier run when its work branch already exists on the
137
+ // remote: clone THAT branch and continue on its commits, rather than branching
138
+ // off base and redoing everything. Only the impl path (which creates a fresh
139
+ // `newBranch`) can resume; the ci-fix/conflict paths already clone the PR branch.
140
+ //
141
+ // Resume safety relies on two invariants the dispatcher (worker) upholds, since
142
+ // the harness can't see run/PR state from inside the container:
143
+ // - At most ONE active run per block at a time. The work branch is deterministic
144
+ // per block (`cat-factory/<blockId>`), so two concurrent runs would target the
145
+ // same branch; their pushes race. A plain (non-forced) push fails safely on a
146
+ // non-fast-forward rather than clobbering the other run's commits, so the worst
147
+ // case is one run failing — never lost work — but the dispatcher should not
148
+ // knowingly run two at once.
149
+ // - Re-dispatch only NON-terminal runs (failed / evicted / stale-running), whose
150
+ // branch is by definition unmerged. Resuming a branch whose PR already merged
151
+ // could re-introduce merged work; that is avoided two ways: the platform deletes
152
+ // the work branch when its PR merges (GitHubPullRequestMerger), so a re-run finds
153
+ // no branch and starts fresh, and a `done` block is never re-dispatched anyway.
154
+ const resumed =
155
+ spec.newBranch != null &&
156
+ (await remoteBranchExists(spec.repo.cloneUrl, spec.newBranch, spec.ghToken, signal))
157
+ opts.onPhase?.('clone')
158
+ if (spec.persistentCheckout) {
159
+ // Reused checkout: clean-sweep + fetch + switch branch in place. A resumed branch
160
+ // (or a run without `newBranch`, working directly on `cloneBranch`) already exists
161
+ // on the remote, so check it out directly; otherwise (re)create `newBranch` off the
162
+ // base tip — the same resume-vs-fresh decision the clone paths below make.
163
+ const targetBranch = spec.newBranch ?? spec.cloneBranch
164
+ logger.info('coding-agent: preparing reused checkout', { branch: targetBranch, resumed })
165
+ await prepareExistingCheckout({
166
+ dir,
167
+ repo: spec.repo,
168
+ ghToken: spec.ghToken,
169
+ branch: targetBranch,
170
+ baseBranch: spec.cloneBranch,
171
+ existing: resumed || spec.newBranch == null,
172
+ signal,
173
+ })
174
+ } else if (resumed) {
175
+ logger.info('coding-agent: resuming existing branch', { branch: spec.newBranch })
176
+ await cloneExistingBranch({
177
+ cloneUrl: spec.repo.cloneUrl,
178
+ branch: spec.newBranch!,
179
+ ghToken: spec.ghToken,
180
+ dir,
181
+ signal,
182
+ })
183
+ } else {
184
+ logger.info('coding-agent: cloning', { cloneBranch: spec.cloneBranch })
185
+ await cloneRepo({
186
+ repo: { ...spec.repo, baseBranch: spec.cloneBranch },
187
+ ghToken: spec.ghToken,
188
+ dir,
189
+ signal,
190
+ })
191
+ if (spec.newBranch) await createBranch(dir, spec.newBranch, signal)
192
+ }
193
+ // The branch tip before the agent runs this time. A FRESH run produced work iff
194
+ // the branch advances past it; a RESUMED run already carries prior work, so it is
195
+ // never a no-op regardless of what this pass adds. Captured BEFORE the resume base
196
+ // refresh below so that refresh's merge commit counts as advancement and is pushed.
197
+ const baseSha = await headCommit(dir, signal)
198
+
199
+ // A resumed branch was cut from an OLDER base; merge the latest base in when the
200
+ // two merge cleanly, so the agent works against current base and the PR stays
201
+ // current. On a conflict this is a no-op (the run continues on the stale base — the
202
+ // merge gate handles a conflicting PR downstream, as before), so it never blocks a
203
+ // resume. Best-effort: any error is treated as "continue without refreshing".
204
+ if (resumed) {
205
+ const refreshed = await refreshFromBaseIfClean(
206
+ dir,
207
+ spec.cloneBranch,
208
+ spec.ghToken,
209
+ signal,
210
+ ).catch(() => false)
211
+ if (!refreshed) {
212
+ logger.info('coding-agent: resume base refresh skipped (conflict or error)', {
213
+ base: spec.cloneBranch,
214
+ })
215
+ }
216
+ }
217
+
218
+ // Serialize all pushes to the work branch through a single in-flight promise.
219
+ // A checkpoint tick and the final push (or two slow checkpoint ticks) must never
220
+ // run `git push` to the same branch concurrently: overlapping pushes race on the
221
+ // remote ref and can make a push fail with a ref-lock / non-fast-forward error —
222
+ // which, on the FINAL push, would fail the whole run even though the work is
223
+ // committed. `pushWorkOnce` coalesces concurrent callers onto one push and only
224
+ // pushes once the branch has advanced past `baseSha` (see below).
225
+ //
226
+ // Only push once the branch has advanced past its pre-run tip: pushing while it
227
+ // still sits at `baseSha` would create the work branch at the base commit (a
228
+ // zero-diff branch), which a later retry would see via `remoteBranchExists` and
229
+ // treat as resumable work — then fail to open a PR ("no commits between base and
230
+ // head"). So a run that never commits leaves NO branch behind, preserving the
231
+ // clean no-op outcome.
232
+ let pushInFlight: Promise<void> | null = null
233
+ const pushWorkOnce = (): Promise<void> => {
234
+ if (pushInFlight) return pushInFlight
235
+ pushInFlight = (async () => {
236
+ if (!(await branchHasCommitsSince(dir, baseSha, signal))) return
237
+ await pushBranch(dir, spec.pushBranch, spec.ghToken, signal)
238
+ })().finally(() => {
239
+ pushInFlight = null
240
+ })
241
+ return pushInFlight
242
+ }
243
+ // Read the in-flight push, if any. A function (with an explicit return type) so the
244
+ // value isn't subject to the caller's straight-line narrowing — `pushInFlight` is
245
+ // only ever assigned inside closures, which flow analysis can't observe.
246
+ const inFlightPush = (): Promise<void> | null => pushInFlight
247
+
248
+ // Checkpoint the agent's committed work to the branch periodically so an eviction
249
+ // mid-run doesn't lose it (a retry then resumes from the pushed commits). The
250
+ // agent commits its own work; this only PUSHES already-committed commits, so it
251
+ // never races the agent's staging. Best-effort: a failed checkpoint is skipped.
252
+ // Surface checkpoint-push failures at warn with a running count: a checkpoint losing
253
+ // a race is harmless once, but a steadily-climbing count means mid-run work is NOT
254
+ // being durably checkpointed, so an eviction would lose it — previously invisible at
255
+ // info level. Still best-effort: a failed checkpoint never fails the run.
256
+ let checkpointFailures = 0
257
+ const checkpoint = setInterval(() => {
258
+ pushWorkOnce().catch((err) => {
259
+ checkpointFailures++
260
+ logger.warn('coding-agent: checkpoint push failed', {
261
+ reason: err instanceof Error ? err.message : String(err),
262
+ checkpointFailures,
263
+ })
264
+ })
265
+ }, checkpointIntervalMs())
266
+ checkpoint.unref?.()
267
+
268
+ // In a monorepo the service lives in a subdirectory: run Pi with its cwd set to
269
+ // that subtree (git stays rooted at `dir` so commits/pushes still cover the whole
270
+ // checkout). Created if missing so a coder scaffolding a brand-new service into an
271
+ // existing monorepo has a cwd to start in. The agent is also TOLD it's in a
272
+ // monorepo (and where) via the AGENTS.md context below.
273
+ const serviceDirectory = spec.repo.serviceDirectory
274
+ const workDir = serviceDirectory ? join(dir, serviceDirectory) : dir
275
+ if (serviceDirectory) await mkdir(workDir, { recursive: true })
276
+
277
+ // Follow-up companion: tail the Coder's sentinel file and stream new items out on the
278
+ // job view. Locally exclude it from git first so the agent's own `git add` can never
279
+ // stage it and it never surfaces as an untracked leftover or in the PR. The sentinel
280
+ // lives in the agent's working directory (its cwd), where the prompt tells it to write.
281
+ const followUpTailer =
282
+ spec.streamFollowUps && opts.onFollowUp
283
+ ? new FollowUpTailer(join(workDir, FOLLOW_UPS_FILENAME), opts.onFollowUp, logger)
284
+ : undefined
285
+ let followUpTick: ReturnType<typeof setInterval> | undefined
286
+ if (followUpTailer) {
287
+ await excludeFromGit(dir, FOLLOW_UPS_FILENAME, signal)
288
+ followUpTick = setInterval(() => {
289
+ void followUpTailer.poll()
290
+ }, followUpPollIntervalMs())
291
+ followUpTick.unref?.()
292
+ }
293
+
294
+ let outcome: CodingAgentOutcome
295
+ try {
296
+ opts.onPhase?.('agent')
297
+ logger.info('coding-agent: running agent', { serviceDirectory })
298
+ const { summary, stats, stderrTail, usage } = await runAgentInWorkspace(
299
+ {
300
+ dir: workDir,
301
+ systemPrompt: spec.systemPrompt,
302
+ userPrompt: spec.userPrompt,
303
+ model: spec.model,
304
+ harness: spec.harness,
305
+ subscriptionToken: spec.subscriptionToken,
306
+ subscriptionBaseUrl: spec.subscriptionBaseUrl,
307
+ ambientAuth: spec.ambientAuth,
308
+ proxyBaseUrl: spec.proxyBaseUrl,
309
+ sessionToken: spec.sessionToken,
310
+ serviceDirectory,
311
+ webToolsGuidance: spec.webToolsGuidance,
312
+ webSearchProxy: spec.webSearchProxy,
313
+ guardLimits: spec.guardLimits,
314
+ },
315
+ opts,
316
+ )
317
+
318
+ // Stop tailing the follow-up sentinel and flush any items written after the last
319
+ // tick, so a fast final burst still reaches the job view before the run is recorded.
320
+ if (followUpTick) clearInterval(followUpTick)
321
+ if (followUpTailer) await followUpTailer.poll().catch(() => {})
322
+
323
+ // Safety net for forgotten edits: commit changes to TRACKED files only (never
324
+ // untracked scratch files/artifacts — the agent owns committing new files).
325
+ await commitTrackedEdits(dir, spec.commitMessage, signal)
326
+
327
+ // Stop periodic checkpoints and let any in-flight one settle BEFORE the final
328
+ // push, so the two never run a concurrent `git push` to the same branch (the
329
+ // final push below is then a fresh attempt whose failure is the real signal).
330
+ clearInterval(checkpoint)
331
+ const inflight = inFlightPush()
332
+ if (inflight) await inflight.catch(() => {})
333
+
334
+ // Surface (don't fail on) untracked, non-ignored files the agent left behind:
335
+ // `commitTrackedEdits` only captures edits to ALREADY tracked files, so a NEW
336
+ // file the agent created but forgot to commit is silently dropped. Logging it
337
+ // makes that loss observable when a PR turns out to be missing a file.
338
+ const leftover = await listUntrackedFiles(dir, signal)
339
+ if (leftover.length > 0) {
340
+ logger.warn('coding-agent: uncommitted new files left behind (not pushed)', {
341
+ count: leftover.length,
342
+ files: leftover.slice(0, 20),
343
+ })
344
+ }
345
+
346
+ const hasWork = resumed || (await branchHasCommitsSince(dir, baseSha, signal))
347
+ if (!hasWork) {
348
+ logger.info('coding-agent: no changes produced', { ...stats })
349
+ outcome = {
350
+ pushed: false,
351
+ resumed,
352
+ summary,
353
+ stats,
354
+ ...(stderrTail ? { stderrTail } : {}),
355
+ ...(usage ? { usage } : {}),
356
+ }
357
+ } else {
358
+ opts.onPhase?.('push')
359
+ logger.info('coding-agent: pushing', { resumed, ...stats })
360
+ await pushWorkOnce()
361
+ outcome = {
362
+ pushed: true,
363
+ resumed,
364
+ summary,
365
+ stats,
366
+ ...(stderrTail ? { stderrTail } : {}),
367
+ ...(usage ? { usage } : {}),
368
+ }
369
+ }
370
+ } finally {
371
+ // Safety net for the throw path (the happy path already cleared these above).
372
+ clearInterval(checkpoint)
373
+ if (followUpTick) clearInterval(followUpTick)
374
+ }
375
+ return outcome
376
+ },
377
+ )
378
+ }
379
+
380
+ /**
381
+ * The "no changes" reason both coding agents report: a caller-supplied lead phrase
382
+ * plus the shared "never acted" cause and a credential-scrubbed tail of Pi's stderr.
383
+ */
384
+ export function noChangesReason(
385
+ lead: string,
386
+ stats: PiRunStats,
387
+ stderrTail: string | undefined,
388
+ ): string {
389
+ const cause = agentNeverActed(stats)
390
+ ? ' (the agent never acted — it most likely could not reach the model)'
391
+ : ''
392
+ return `${lead}${cause}.${agentOutputTail(stderrTail)}`
393
+ }
package/src/embed.ts ADDED
@@ -0,0 +1,32 @@
1
+ // Embeddable surface of the executor harness: the Pi-driving and git helpers
2
+ // the container payload uses, re-exported so other packages (e.g. the benchmark
3
+ // harness) can run the *same* coding-agent flow outside the container — clone a
4
+ // repo, write the agent context, point Pi at an OpenAI-compatible endpoint, run
5
+ // it, and inspect what changed. The HTTP server / job lifecycle stays internal;
6
+ // only the reusable primitives are exposed here.
7
+
8
+ export {
9
+ PI_MAX_OUTPUT_TOKENS,
10
+ DEFAULT_PROGRESS_GUARD_LIMITS,
11
+ writePiModelsConfig,
12
+ writeAgentsContext,
13
+ runPi,
14
+ summarizePiRun,
15
+ parsePiOutput,
16
+ parseTodoProgress,
17
+ progressGuardLimitsFromEnv,
18
+ terminalRunError,
19
+ type PiRunOutcome,
20
+ type PiRunStats,
21
+ type ProgressGuardLimits,
22
+ type TodoItem,
23
+ type TodoProgress,
24
+ } from './pi.js'
25
+ export {
26
+ cloneRepo,
27
+ createBranch,
28
+ changedPathsFromPorcelain,
29
+ hasAgentChanges,
30
+ redactSecrets,
31
+ } from './git.js'
32
+ export type { RepoSpec } from './job.js'
package/src/failure.ts ADDED
@@ -0,0 +1,73 @@
1
+ // Single source of truth for how a job FAILS: the canonical failure-cause vocabulary plus
2
+ // the watchdog abort-message builders.
3
+ //
4
+ // WHY THIS MODULE EXISTS — the backend classifies a failed job by REGEX-matching the
5
+ // harness's free-text `error` string (it has no other signal today):
6
+ // - server `ContainerRepoBootstrapper.classifyBootstrapFailure`:
7
+ // /inactivity|no agent activity|max duration/i → 'timeout', else → 'agent'
8
+ // - orchestration `job.logic.isContainerEvictionError`: /evicted or crashed/i (FACADE-owned,
9
+ // NOT emitted here — the harness must keep NOT emitting that phrase for a non-eviction)
10
+ // Because those phrases are matched downstream, their wording MUST stay stable. Centralizing
11
+ // the builders here keeps the emitted text from drifting away from the regex that reads it.
12
+ // Alongside the strings we now also emit a STRUCTURED {@link FailureCause} on the job view so
13
+ // the backend can prefer it and treat the regex as a backward-compatible fallback.
14
+
15
+ /**
16
+ * The structured reason a harness job failed, surfaced on the job view's `failureCause`.
17
+ * Covers only HARNESS-owned failures — container eviction is detected by the runtime facade
18
+ * (a vanished container → `(container evicted or crashed)`), never set here.
19
+ *
20
+ * - `inactivity-timeout` — the inactivity watchdog fired (no agent output for the window).
21
+ * - `max-duration` — the overall wall-clock cap fired.
22
+ * - `agent` — the agent ran but produced an unusable/failed result, or threw.
23
+ * - `git` — a git operation failed (clone/push/merge/PR).
24
+ * - `api` — an upstream API call failed (e.g. the GitHub/GitLab PR/MR REST call).
25
+ * - `no-usable-output` — the agent finished but returned no usable report / structured output.
26
+ * - `no-changes` — a coding agent finished without producing any change to push.
27
+ */
28
+ export type FailureCause =
29
+ | 'inactivity-timeout'
30
+ | 'max-duration'
31
+ | 'agent'
32
+ | 'git'
33
+ | 'api'
34
+ | 'no-usable-output'
35
+ | 'no-changes'
36
+
37
+ /**
38
+ * A thrown failure that carries a structured {@link FailureCause}, so a `git` / `api`
39
+ * operation that fails deep in a helper surfaces its real cause instead of being flattened
40
+ * to the generic `agent` in the registry's catch. The watchdog kills set their cause from
41
+ * `killReason` and never throw this; anything else thrown without a cause stays `agent`.
42
+ */
43
+ export class HarnessFailure extends Error {
44
+ readonly failureCause: FailureCause
45
+ constructor(failureCause: FailureCause, message: string) {
46
+ super(message)
47
+ this.name = 'HarnessFailure'
48
+ this.failureCause = failureCause
49
+ }
50
+ }
51
+
52
+ /** The structured cause a thrown error carries, or undefined for a plain/agent error. */
53
+ export function failureCauseOf(err: unknown): FailureCause | undefined {
54
+ return err instanceof HarnessFailure ? err.failureCause : undefined
55
+ }
56
+
57
+ /**
58
+ * The inactivity-watchdog abort message PREFIX. The `no agent activity` phrase is
59
+ * regex-matched by the backend's `classifyBootstrapFailure` (→ `timeout`); do not reword it.
60
+ * The caller appends a `(likely hung ...)` diagnostic clause (phase + last tool) after this,
61
+ * so the prefix deliberately stops before the parenthetical (see `runner.ts` drive catch).
62
+ */
63
+ export function inactivityAbortMessage(inactivityMs: number): string {
64
+ return `Aborted: no agent activity for ${Math.round(inactivityMs / 1000)}s`
65
+ }
66
+
67
+ /**
68
+ * The max-duration-watchdog abort message. The `max duration` phrase is regex-matched by the
69
+ * backend's `classifyBootstrapFailure` (→ `timeout`); do not reword it.
70
+ */
71
+ export function maxDurationAbortMessage(maxDurationMs: number): string {
72
+ return `Aborted: exceeded max duration of ${Math.round(maxDurationMs / 1000)}s`
73
+ }
@@ -0,0 +1,106 @@
1
+ import { readFile } from 'node:fs/promises'
2
+ import { log, type Logger } from './logger.js'
3
+
4
+ // The Coder's forward-looking side channel. As the implementer works it appends one
5
+ // JSON line per item to a sentinel file in its working directory; the harness tails
6
+ // that file and streams the new items OUT on the job view (drain-on-read), so the
7
+ // backend lifts them onto the run's step and the "Follow-up companion" lights up
8
+ // while the container is still running. This is the OUT-bound half only — there is no
9
+ // in-bound path back into a running container (an answer reaches the Coder via a
10
+ // backend-driven re-run, not by resuming the live process).
11
+
12
+ /** The sentinel file the Coder appends items to, relative to its working directory. */
13
+ export const FOLLOW_UPS_FILENAME = '.cat-follow-ups.jsonl'
14
+
15
+ /** One streamed item the Coder surfaced. Mirrors the backend's `streamedFollowUpSchema`. */
16
+ export interface FollowUpLine {
17
+ kind: 'follow_up' | 'question'
18
+ title: string
19
+ detail: string
20
+ suggestedAction?: string
21
+ }
22
+
23
+ /** Coerce one parsed JSON line into a {@link FollowUpLine}, or null when unusable. */
24
+ function coerceLine(value: unknown): FollowUpLine | null {
25
+ if (typeof value !== 'object' || value === null) return null
26
+ const o = value as Record<string, unknown>
27
+ const title = typeof o.title === 'string' ? o.title.trim() : ''
28
+ if (!title) return null
29
+ const kind = o.kind === 'question' ? 'question' : 'follow_up'
30
+ const detail = typeof o.detail === 'string' ? o.detail : ''
31
+ const suggestedAction =
32
+ typeof o.suggestedAction === 'string' && o.suggestedAction.trim()
33
+ ? o.suggestedAction.trim()
34
+ : undefined
35
+ return {
36
+ kind,
37
+ title: title.slice(0, 300),
38
+ detail,
39
+ ...(suggestedAction ? { suggestedAction } : {}),
40
+ }
41
+ }
42
+
43
+ /**
44
+ * Tails an append-only JSONL sentinel file, yielding only the NEW complete lines on each
45
+ * {@link poll}. Tracks how many characters have been consumed so a partially-written
46
+ * trailing line (no newline yet) is held back until it completes. Tolerant: a malformed
47
+ * line is skipped, a missing file yields nothing — surfacing follow-ups must never
48
+ * disturb the coding run.
49
+ */
50
+ export class FollowUpTailer {
51
+ private consumed = 0
52
+ /** Running count of complete-but-unparsable lines, so silent drops become visible. */
53
+ private skipped = 0
54
+
55
+ constructor(
56
+ private readonly filePath: string,
57
+ private readonly onItems: (items: FollowUpLine[]) => void,
58
+ private readonly logger: Logger = log,
59
+ ) {}
60
+
61
+ /** Read any new complete lines and emit the coerced items. Best-effort; never throws. */
62
+ async poll(): Promise<void> {
63
+ let content: string
64
+ try {
65
+ content = await readFile(this.filePath, 'utf8')
66
+ } catch {
67
+ // Not created yet (or vanished): nothing to surface.
68
+ return
69
+ }
70
+ if (content.length <= this.consumed) return
71
+ const fresh = content.slice(this.consumed)
72
+ // Only consume up to the last newline; hold any trailing partial line for next poll.
73
+ const lastNewline = fresh.lastIndexOf('\n')
74
+ if (lastNewline === -1) return
75
+ this.consumed += lastNewline + 1
76
+ const items: FollowUpLine[] = []
77
+ let skippedThisPoll = 0
78
+ for (const raw of fresh.slice(0, lastNewline).split('\n')) {
79
+ const line = raw.trim()
80
+ if (!line) continue
81
+ try {
82
+ const coerced = coerceLine(JSON.parse(line))
83
+ if (coerced) items.push(coerced)
84
+ else skippedThisPoll++
85
+ } catch {
86
+ // A non-JSON / half-written line — skip it (a later poll re-reads from `consumed`,
87
+ // which only advanced past complete newline-terminated lines).
88
+ skippedThisPoll++
89
+ }
90
+ }
91
+ if (skippedThisPoll > 0) {
92
+ // A complete line that didn't yield an item is dropped for good (consumed past it).
93
+ // Surface it at warn with a running total rather than swallowing it silently — a
94
+ // steadily-growing count points at a malformed-emitter bug, not a transient race.
95
+ this.skipped += skippedThisPoll
96
+ this.logger.warn('follow-ups: skipped malformed lines', {
97
+ skipped: skippedThisPoll,
98
+ skippedTotal: this.skipped,
99
+ })
100
+ }
101
+ if (items.length > 0) {
102
+ this.logger.info('follow-ups: surfaced items', { count: items.length })
103
+ this.onItems(items)
104
+ }
105
+ }
106
+ }