@cat-factory/executor-harness 1.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +143 -0
  3. package/dist/agent-runner.js +389 -0
  4. package/dist/agent.js +810 -0
  5. package/dist/blueprint.js +367 -0
  6. package/dist/bootstrap.js +99 -0
  7. package/dist/ci-fixer.js +46 -0
  8. package/dist/coding-agent.js +285 -0
  9. package/dist/conflict-resolver.js +138 -0
  10. package/dist/embed.js +8 -0
  11. package/dist/explore.js +74 -0
  12. package/dist/failure.js +47 -0
  13. package/dist/fixer.js +44 -0
  14. package/dist/follow-ups.js +103 -0
  15. package/dist/frontend-infra.js +283 -0
  16. package/dist/fs-utils.js +11 -0
  17. package/dist/git.js +778 -0
  18. package/dist/job.js +409 -0
  19. package/dist/logger.js +27 -0
  20. package/dist/merger.js +135 -0
  21. package/dist/on-call.js +126 -0
  22. package/dist/pi-workspace.js +237 -0
  23. package/dist/pi.js +971 -0
  24. package/dist/process.js +25 -0
  25. package/dist/redact.js +109 -0
  26. package/dist/runner.js +228 -0
  27. package/dist/server.js +135 -0
  28. package/dist/spec.js +754 -0
  29. package/dist/structured-output.js +431 -0
  30. package/dist/tester.js +191 -0
  31. package/package.json +35 -0
  32. package/src/agent-runner.ts +484 -0
  33. package/src/agent.ts +948 -0
  34. package/src/coding-agent.ts +393 -0
  35. package/src/embed.ts +32 -0
  36. package/src/failure.ts +73 -0
  37. package/src/follow-ups.ts +106 -0
  38. package/src/frontend-infra.ts +340 -0
  39. package/src/fs-utils.ts +11 -0
  40. package/src/git.ts +955 -0
  41. package/src/job.ts +766 -0
  42. package/src/logger.ts +45 -0
  43. package/src/pi-workspace.ts +348 -0
  44. package/src/pi.ts +1236 -0
  45. package/src/process.ts +33 -0
  46. package/src/redact.ts +109 -0
  47. package/src/runner.ts +384 -0
  48. package/src/server.ts +153 -0
  49. package/src/structured-output.ts +524 -0
package/src/git.ts ADDED
@@ -0,0 +1,955 @@
1
+ import { execFile } from 'node:child_process'
2
+ import { appendFile, chmod, mkdtemp, rm, writeFile } from 'node:fs/promises'
3
+ import { tmpdir } from 'node:os'
4
+ import { join } from 'node:path'
5
+ import { promisify } from 'node:util'
6
+ import type { BootstrapTargetSpec, PrSpec, RepoSpec } from './job.js'
7
+ import { pathExists } from './fs-utils.js'
8
+ import { redactSecrets } from './redact.js'
9
+ import { loadRunnerLimits } from './runner.js'
10
+ import { HarnessFailure } from './failure.js'
11
+
12
+ // Re-exported so existing importers that pull `redactSecrets` from this module keep
13
+ // working; the single source of truth now lives in ./redact.js.
14
+ export { redactSecrets } from './redact.js'
15
+
16
+ const exec = promisify(execFile)
17
+
18
+ // Git + GitHub helpers. The installation token is NEVER placed in a clone/remote
19
+ // URL or in any git argv. Instead git authenticates over HTTPS via a GIT_ASKPASS
20
+ // helper: the plain `https://x-access-token@host/...` remote (username only, no
21
+ // secret) is used everywhere, and the token is handed to git out-of-band through
22
+ // an environment variable the helper reads. That keeps the token out of process
23
+ // listings and out of any command string Node echoes into an error/cmd field.
24
+
25
+ const GIT_AUTHOR = 'cat-factory[bot]'
26
+ const GIT_EMAIL = 'cat-factory[bot]@users.noreply.github.com'
27
+
28
+ // Per-git-command wall-clock ceiling. A single git op (clone/push over a flaky
29
+ // network) must not hang the job indefinitely; the job's overall watchdog
30
+ // (see runner.ts) is the outer bound, this stops one wedged command first.
31
+ //
32
+ // INVARIANT: this MUST stay STRICTLY BELOW the inactivity watchdog
33
+ // (`RunnerLimits.inactivityMs`). Git emits no Pi activity events while it runs, so a
34
+ // slow clone/push races both timers; if they were equal the job could fail with the
35
+ // misleading "no agent activity … likely hung" instead of a clear "git timed out".
36
+ // Staying under that window means git always loses the race and surfaces its own
37
+ // accurate reason.
38
+ //
39
+ // Rather than hardcode a constant against the *default* watchdog (which silently
40
+ // breaks the invariant when an operator lowers `JOB_INACTIVITY_MS`), we DERIVE the
41
+ // ceiling from the actually-configured window: a fixed margin below it, floored so a
42
+ // tiny window can't yield a non-positive timeout. At the 10-min default this resolves
43
+ // to the same 7 min as before; at a lowered 5-min window it tracks down to 2 min.
44
+ const GIT_TIMEOUT_MARGIN_MS = 3 * 60_000
45
+ const GIT_TIMEOUT_FLOOR_MS = 60_000
46
+ const GIT_TIMEOUT_MS = Math.max(
47
+ GIT_TIMEOUT_FLOOR_MS,
48
+ loadRunnerLimits().inactivityMs - GIT_TIMEOUT_MARGIN_MS,
49
+ )
50
+
51
+ /** Wrap an error so its message/stack carry no credentials. */
52
+ function redactError(err: unknown): Error {
53
+ if (err instanceof Error) {
54
+ const redacted = new Error(redactSecrets(err.message))
55
+ if (err.stack) redacted.stack = redactSecrets(err.stack)
56
+ return redacted
57
+ }
58
+ return new Error(redactSecrets(String(err)))
59
+ }
60
+
61
+ /**
62
+ * Build the remote URL git uses. Only the username (`x-access-token`) is embedded
63
+ * — never the token — so the token never appears in argv. The token is supplied
64
+ * separately via {@link authEnv} and read by the GIT_ASKPASS helper.
65
+ *
66
+ * The `x-access-token` username is host-neutral: GitHub keys auth off the token (password)
67
+ * and ignores the username, and GitLab likewise accepts ANY non-blank username with a PAT as
68
+ * the password — so the same embedded username authenticates github.com and gitlab.com alike.
69
+ */
70
+ export function authenticatedCloneUrl(cloneUrl: string): string {
71
+ // https://github.com/owner/name.git → https://x-access-token@github.com/...
72
+ // (no secret in the URL). file:// and other local URLs are left untouched.
73
+ return cloneUrl.replace(/^https:\/\//, 'https://x-access-token@')
74
+ }
75
+
76
+ /** Drop any `user[:pass]@` userinfo from a URL so two clone URLs can be compared by repo. */
77
+ function withoutUserinfo(url: string): string {
78
+ return url.replace(/^([a-z]+:\/\/)[^@/]*@/i, '$1')
79
+ }
80
+
81
+ // A tiny askpass helper that prints the token git asks for. Created once per
82
+ // process and reused; the token itself is passed per-command via the env (below),
83
+ // never baked into the script.
84
+ let askpassPathPromise: Promise<string> | undefined
85
+ function ensureAskpass(): Promise<string> {
86
+ askpassPathPromise ??= (async () => {
87
+ const dir = await mkdtemp(join(tmpdir(), 'git-askpass-'))
88
+ const path = join(dir, 'askpass.sh')
89
+ // git invokes this with the prompt as argv[1]; we only ever return the token
90
+ // (the username is already in the remote URL, so git only asks for the
91
+ // password). The token comes from the env, never from argv.
92
+ await writeFile(path, '#!/bin/sh\nexec printf %s "$GIT_ASKPASS_TOKEN"\n', 'utf8')
93
+ await chmod(path, 0o700)
94
+ return path
95
+ })()
96
+ return askpassPathPromise
97
+ }
98
+
99
+ /** Child-process env that lets git authenticate with `ghToken` without it touching argv. */
100
+ async function authEnv(ghToken: string): Promise<NodeJS.ProcessEnv> {
101
+ return {
102
+ ...process.env,
103
+ GIT_ASKPASS: await ensureAskpass(),
104
+ GIT_ASKPASS_TOKEN: ghToken,
105
+ // Never fall back to an interactive prompt (which would hang the job).
106
+ GIT_TERMINAL_PROMPT: '0',
107
+ }
108
+ }
109
+
110
+ /**
111
+ * Run one git command. `signal` (the job watchdog's) and a per-command timeout
112
+ * both abort a wedged process, so neither a hung clone nor a stalled push can
113
+ * keep the container running forever. Any failure is re-thrown with its message
114
+ * and stack scrubbed of credentials.
115
+ */
116
+ async function git(
117
+ args: string[],
118
+ opts: { cwd?: string; signal?: AbortSignal; env?: NodeJS.ProcessEnv } = {},
119
+ ): Promise<string> {
120
+ try {
121
+ const { stdout } = await exec('git', args, {
122
+ ...(opts.cwd ? { cwd: opts.cwd } : {}),
123
+ maxBuffer: 16 * 1024 * 1024,
124
+ timeout: GIT_TIMEOUT_MS,
125
+ ...(opts.env ? { env: opts.env } : {}),
126
+ ...(opts.signal ? { signal: opts.signal } : {}),
127
+ })
128
+ return stdout
129
+ } catch (err) {
130
+ // Tag the failure as `git` so the registry's catch records the real cause instead of
131
+ // the generic `agent`. A watchdog abort still wins: `describeFailure` keys off
132
+ // `killReason` first, so an abort during a git op keeps the timeout message/cause.
133
+ const redacted = redactError(err)
134
+ const failure = new HarnessFailure('git', redacted.message)
135
+ if (redacted.stack) failure.stack = redacted.stack
136
+ throw failure
137
+ }
138
+ }
139
+
140
+ /** Clone `repo`'s base branch (shallow by default) into `dir` and set commit identity. */
141
+ export async function cloneRepo(opts: {
142
+ repo: RepoSpec
143
+ ghToken: string
144
+ dir: string
145
+ signal?: AbortSignal
146
+ /**
147
+ * Full history + all remote-tracking branches. A shallow single-branch clone is
148
+ * enough to implement on one branch, but merging ANOTHER branch in (the
149
+ * conflict-resolver) needs the merge base in history and `origin/<other>` present
150
+ * — so `full` drops both `--depth 1` (which implies `--single-branch`).
151
+ */
152
+ full?: boolean
153
+ }): Promise<void> {
154
+ const url = authenticatedCloneUrl(opts.repo.cloneUrl)
155
+ const cloneArgs = opts.full
156
+ ? ['clone', '--branch', opts.repo.baseBranch, url, opts.dir]
157
+ : ['clone', '--depth', '1', '--branch', opts.repo.baseBranch, url, opts.dir]
158
+ await git(cloneArgs, { signal: opts.signal, env: await authEnv(opts.ghToken) })
159
+ await git(['config', 'user.name', GIT_AUTHOR], { cwd: opts.dir, signal: opts.signal })
160
+ await git(['config', 'user.email', GIT_EMAIL], { cwd: opts.dir, signal: opts.signal })
161
+ }
162
+
163
+ /** Create and switch to the work branch. */
164
+ export async function createBranch(
165
+ dir: string,
166
+ branch: string,
167
+ signal?: AbortSignal,
168
+ ): Promise<void> {
169
+ await git(['checkout', '-b', branch], { cwd: dir, signal })
170
+ }
171
+
172
+ /**
173
+ * Whether `branch` already exists on the remote — i.e. an earlier (possibly
174
+ * evicted) run of this task already pushed work to it, so a re-dispatch should
175
+ * RESUME on it (clone it, continue on its commits) rather than branch off base and
176
+ * start over. Uses `git ls-remote` (no checkout); the token is supplied out of band.
177
+ */
178
+ export async function remoteBranchExists(
179
+ cloneUrl: string,
180
+ branch: string,
181
+ ghToken: string,
182
+ signal?: AbortSignal,
183
+ ): Promise<boolean> {
184
+ const url = authenticatedCloneUrl(cloneUrl)
185
+ const out = await git(['ls-remote', '--heads', url, branch], {
186
+ signal,
187
+ env: await authEnv(ghToken),
188
+ })
189
+ return out.trim() !== ''
190
+ }
191
+
192
+ /**
193
+ * Clone an EXISTING work branch (full history) into `dir` and check it out — used
194
+ * to resume a task whose earlier run already pushed commits to this branch, so the
195
+ * agent continues on top of that work instead of redoing it.
196
+ */
197
+ export async function cloneExistingBranch(opts: {
198
+ cloneUrl: string
199
+ branch: string
200
+ ghToken: string
201
+ dir: string
202
+ signal?: AbortSignal
203
+ }): Promise<void> {
204
+ const url = authenticatedCloneUrl(opts.cloneUrl)
205
+ await git(['clone', '--branch', opts.branch, '--single-branch', url, opts.dir], {
206
+ signal: opts.signal,
207
+ env: await authEnv(opts.ghToken),
208
+ })
209
+ await git(['config', 'user.name', GIT_AUTHOR], { cwd: opts.dir, signal: opts.signal })
210
+ await git(['config', 'user.email', GIT_EMAIL], { cwd: opts.dir, signal: opts.signal })
211
+ }
212
+
213
+ /**
214
+ * The directory-name globs the clean sweep PRESERVES — dependency caches that are
215
+ * expensive to rebuild (node_modules, language toolchain caches). Keeping them is the
216
+ * whole point of reusing a checkout: a `git clean -ffdx` would otherwise wipe them and
217
+ * force a reinstall every run. Configurable via `HARNESS_CLEAN_KEEP` (comma-separated).
218
+ */
219
+ export function cleanKeepPatterns(env: NodeJS.ProcessEnv = process.env): string[] {
220
+ const raw = env.HARNESS_CLEAN_KEEP ?? 'node_modules,.venv,target,.gradle,.pnpm-store'
221
+ return raw
222
+ .split(',')
223
+ .map((s) => s.trim())
224
+ .filter((s) => s !== '')
225
+ }
226
+
227
+ /**
228
+ * Reset a REUSED checkout to a pristine state before the next job runs in it: hard-reset
229
+ * tracked files and remove every untracked/ignored file EXCEPT the preserved dependency
230
+ * caches (see {@link cleanKeepPatterns}). This is what guarantees a prior run's garbage —
231
+ * stray scratch files, half-written edits, stale build output — never contaminates the
232
+ * next run that reuses the same persistent checkout. A fresh clone never needs it.
233
+ *
234
+ * Submodules: when `.gitmodules` is present we use a single `-f` (which makes `git clean`
235
+ * skip nested git repositories, i.e. the submodule worktrees) and reset/refresh the
236
+ * submodules explicitly; otherwise `-ff` also nukes any stray nested repo the agent left.
237
+ */
238
+ export async function cleanSweep(
239
+ dir: string,
240
+ ghToken: string,
241
+ signal?: AbortSignal,
242
+ env: NodeJS.ProcessEnv = process.env,
243
+ ): Promise<void> {
244
+ await git(['reset', '--hard'], { cwd: dir, signal })
245
+ const hasSubmodules = await pathExists(join(dir, '.gitmodules'))
246
+ if (hasSubmodules) {
247
+ await git(['submodule', 'foreach', '--recursive', 'git reset --hard'], {
248
+ cwd: dir,
249
+ signal,
250
+ }).catch(() => {})
251
+ }
252
+ const keep = cleanKeepPatterns(env).flatMap((p) => ['-e', p])
253
+ // `-ffdx` (or `-fdx` with submodules) removes untracked + ignored files and dirs; the
254
+ // `-e` excludes keep the dependency caches. Tracked files were already hard-reset above.
255
+ await git(['clean', hasSubmodules ? '-fdx' : '-ffdx', ...keep], { cwd: dir, signal })
256
+ if (hasSubmodules) {
257
+ await git(['submodule', 'update', '--init', '--recursive'], {
258
+ cwd: dir,
259
+ signal,
260
+ env: await authEnv(ghToken),
261
+ }).catch(() => {})
262
+ }
263
+ }
264
+
265
+ /**
266
+ * The `origin` remote URL (without credentials) of the checkout at `dir`, or undefined
267
+ * when it isn't a git repo / has no origin. Used to detect a persistent checkout dir that
268
+ * somehow holds a DIFFERENT repo than the one we're about to prepare (it never should —
269
+ * the dir is keyed per repo — but a stale dir from a prior layout would be a silent
270
+ * cross-repo bleed, so we re-clone rather than reuse).
271
+ */
272
+ export async function checkoutRemoteUrl(
273
+ dir: string,
274
+ signal?: AbortSignal,
275
+ ): Promise<string | undefined> {
276
+ try {
277
+ return (await git(['remote', 'get-url', 'origin'], { cwd: dir, signal })).trim() || undefined
278
+ } catch {
279
+ return undefined
280
+ }
281
+ }
282
+
283
+ /**
284
+ * Prepare a REUSED (persistent) checkout at `dir` so the agent runs against a clean tree
285
+ * on the right branch — the persistent-checkout analogue of {@link cloneRepo} +
286
+ * {@link cloneExistingBranch}. On the FIRST use of a per-repo dir there's no `.git` yet, so
287
+ * it clones once (full history, so a later merger/conflict step reusing the dir can diff
288
+ * against the base); afterwards it reuses the dir in place: clean sweep → re-point origin →
289
+ * fetch → check out `branch`. When `existing` is true `branch` is fetched and checked out
290
+ * directly (resume / base branch); otherwise `branch` is (re)created off `baseBranch`'s tip
291
+ * (a fresh work branch). Only the local transport sets `persistentCheckout`, so every other
292
+ * runtime keeps the fresh-clone path untouched.
293
+ */
294
+ export async function prepareExistingCheckout(opts: {
295
+ dir: string
296
+ repo: RepoSpec
297
+ ghToken: string
298
+ /** The branch to end up checked out on. */
299
+ branch: string
300
+ /** Base branch to (re)create `branch` off when `existing` is false; also fetched for history. */
301
+ baseBranch: string
302
+ /** Whether `branch` already exists on the remote (resume / base) — checkout it directly. */
303
+ existing: boolean
304
+ signal?: AbortSignal
305
+ }): Promise<void> {
306
+ const { dir, repo, ghToken, branch, baseBranch, existing, signal } = opts
307
+ const cloneUrl = authenticatedCloneUrl(repo.cloneUrl)
308
+
309
+ // First use of this per-repo dir, or a stale dir holding a DIFFERENT repo → clone fresh
310
+ // (full history, so a later merger/conflict step reusing the dir can diff against base).
311
+ const currentRemote = (await pathExists(join(dir, '.git')))
312
+ ? await checkoutRemoteUrl(dir, signal)
313
+ : undefined
314
+ if (!currentRemote || withoutUserinfo(currentRemote) !== withoutUserinfo(cloneUrl)) {
315
+ await rm(dir, { recursive: true, force: true })
316
+ await cloneRepo({ repo: { ...repo, baseBranch }, ghToken, dir, full: true, signal })
317
+ }
318
+
319
+ const env = await authEnv(ghToken)
320
+ await cleanSweep(dir, ghToken, signal)
321
+ // Re-point origin in case the stored URL drifted (idempotent; carries no secret).
322
+ await git(['remote', 'set-url', 'origin', cloneUrl], { cwd: dir, signal })
323
+ const fetchRef = existing ? branch : baseBranch
324
+ // Fetch the target ref AND the base into their tracking refs in ONE command, with explicit
325
+ // destination refspecs. The checkout below then reads `origin/<fetchRef>` directly rather
326
+ // than FETCH_HEAD: FETCH_HEAD only ever holds the LAST fetched ref, so a second base fetch
327
+ // would clobber it and a resumed work branch (base != branch) would be reset to the BASE
328
+ // tip — silently discarding the resumed commits. Keeping `origin/<baseBranch>` fresh also
329
+ // matters for the downstream merger/diff; a missing base diverges from a fresh full clone,
330
+ // so this is NOT best-effort (a failure surfaces rather than leaving a stale base ref).
331
+ const refspecs = [`+${fetchRef}:refs/remotes/origin/${fetchRef}`]
332
+ if (baseBranch !== fetchRef) refspecs.push(`+${baseBranch}:refs/remotes/origin/${baseBranch}`)
333
+ await git(['fetch', 'origin', ...refspecs], { cwd: dir, signal, env })
334
+ // `-f`: the clean sweep deliberately PRESERVES dependency caches (node_modules/target/…)
335
+ // as untracked files; if one collides with a path the target branch TRACKS, a plain
336
+ // checkout aborts ("untracked working tree files would be overwritten"). Force overwrites
337
+ // only the in-the-way files, leaving the other kept caches intact.
338
+ await git(['checkout', '-f', '-B', branch, `refs/remotes/origin/${fetchRef}`], {
339
+ cwd: dir,
340
+ signal,
341
+ })
342
+ await git(['config', 'user.name', GIT_AUTHOR], { cwd: dir, signal })
343
+ await git(['config', 'user.email', GIT_EMAIL], { cwd: dir, signal })
344
+ }
345
+
346
+ /**
347
+ * Commit edits the agent left UNCOMMITTED — but only to files git already tracks
348
+ * (`git add -u`), never new untracked files. The agent owns commit selection (it
349
+ * alone knows which new files are part of the solution vs scratch scripts/artifacts
350
+ * it created while exploring), so this is just a safety net that captures forgotten
351
+ * edits to existing files without ever sweeping in junk a blanket `git add -A`
352
+ * would. Returns false when there was nothing tracked to commit.
353
+ */
354
+ export async function commitTrackedEdits(
355
+ dir: string,
356
+ message: string,
357
+ signal?: AbortSignal,
358
+ ): Promise<boolean> {
359
+ await git(['add', '-u'], { cwd: dir, signal })
360
+ // Only consider staged (tracked) changes — untracked files are deliberately ignored.
361
+ const staged = await git(['diff', '--cached', '--name-only'], { cwd: dir, signal })
362
+ if (staged.trim() === '') return false
363
+ await git(['commit', '-m', message], { cwd: dir, signal })
364
+ return true
365
+ }
366
+
367
+ /**
368
+ * The untracked, non-ignored files left in the working tree (`git ls-files --others
369
+ * --exclude-standard`). The harness deliberately never blanket-stages new files (the
370
+ * agent owns commit selection), so this is exactly what {@link commitTrackedEdits}
371
+ * does NOT capture — a NEW file the agent created but forgot to commit. The caller
372
+ * surfaces it as a warning so that silent loss is at least observable in the logs.
373
+ */
374
+ export async function listUntrackedFiles(dir: string, signal?: AbortSignal): Promise<string[]> {
375
+ const out = await git(['ls-files', '--others', '--exclude-standard'], { cwd: dir, signal })
376
+ return out
377
+ .split('\n')
378
+ .map((line) => line.replace(/\r$/, '').trim())
379
+ .filter((path) => path !== '')
380
+ }
381
+
382
+ /**
383
+ * Locally exclude `pattern` from this checkout via `.git/info/exclude` — a per-clone
384
+ * ignore that never lands in the repo (unlike a `.gitignore`). Used for the harness's
385
+ * follow-up sentinel file so the agent's own `git add` can never stage it and it never
386
+ * surfaces as an untracked-leftover warning or in the PR. Best-effort: a failure here
387
+ * just means the sentinel might show as untracked (logged, not pushed), never fatal.
388
+ */
389
+ export async function excludeFromGit(
390
+ dir: string,
391
+ pattern: string,
392
+ signal?: AbortSignal,
393
+ ): Promise<void> {
394
+ try {
395
+ const excludePath = join(dir, '.git', 'info', 'exclude')
396
+ await appendFile(excludePath, `\n${pattern}\n`, 'utf8')
397
+ } catch {
398
+ // A missing .git/info/exclude (worktree layout) or write error is non-fatal.
399
+ void signal
400
+ }
401
+ }
402
+
403
+ /** Whether the branch advanced past `baseSha` via commits (the agent's own + any safety-net commit). */
404
+ export async function branchHasCommitsSince(
405
+ dir: string,
406
+ baseSha: string,
407
+ signal?: AbortSignal,
408
+ ): Promise<boolean> {
409
+ return (await headCommit(dir, signal)) !== baseSha
410
+ }
411
+
412
+ /**
413
+ * Whether the checked-out branch has a real, examinable diff against
414
+ * `origin/<baseBranch>` — i.e. the base branch's remote-tracking ref exists (so the
415
+ * merge base resolves) AND there are changes between that merge base and HEAD. The
416
+ * merger uses this to refuse to score a PR it could not actually inspect (a missing
417
+ * base ref or an empty diff) instead of emitting bogus low scores that would
418
+ * auto-merge. Returns false on ANY git error (e.g. an unknown ref). Requires a
419
+ * {@link cloneRepo} with `full: true` so `origin/<baseBranch>` and the merge base exist.
420
+ */
421
+ export async function hasDiffAgainstBase(
422
+ dir: string,
423
+ baseBranch: string,
424
+ signal?: AbortSignal,
425
+ ): Promise<boolean> {
426
+ try {
427
+ const stat = await git(['diff', '--stat', `origin/${baseBranch}...HEAD`], { cwd: dir, signal })
428
+ return stat.trim() !== ''
429
+ } catch {
430
+ return false
431
+ }
432
+ }
433
+
434
+ /**
435
+ * Parse the paths out of `git status --porcelain` (v1) output. Each line is
436
+ * `XY <path>`, or `XY <old> -> <new>` for a rename/copy (we keep the new path);
437
+ * git quotes paths with special characters, which we unquote. Blank lines are
438
+ * skipped. Pure so the no-op detection can be tested without spawning git.
439
+ */
440
+ export function changedPathsFromPorcelain(status: string): string[] {
441
+ const paths: string[] = []
442
+ for (const raw of status.split('\n')) {
443
+ const line = raw.replace(/\r$/, '')
444
+ if (line.trim() === '') continue
445
+ let path = line.slice(3)
446
+ const arrow = path.indexOf(' -> ')
447
+ if (arrow !== -1) path = path.slice(arrow + 4)
448
+ path = path.trim().replace(/^"(.*)"$/, '$1')
449
+ if (path) paths.push(path)
450
+ }
451
+ return paths
452
+ }
453
+
454
+ /**
455
+ * Whether the agent changed anything in a cloned checkout. Stages the working
456
+ * tree and inspects the porcelain status: an empty result means the bootstrapper
457
+ * made no adaptation — a no-op we must not pass off as a successful push. (The
458
+ * harness writes its prompt context to Pi's global `~/.pi/agent/AGENTS.md`, never
459
+ * into the checkout, so every change reported here is a genuine agent edit.)
460
+ */
461
+ export async function hasAgentChanges(dir: string, signal?: AbortSignal): Promise<boolean> {
462
+ await git(['add', '-A'], { cwd: dir, signal })
463
+ const status = await git(['status', '--porcelain'], { cwd: dir, signal })
464
+ return changedPathsFromPorcelain(status).length > 0
465
+ }
466
+
467
+ /** The commit SHA at `dir`'s HEAD — captured right after clone as the base tip. */
468
+ export async function headCommit(dir: string, signal?: AbortSignal): Promise<string> {
469
+ return (await git(['rev-parse', 'HEAD'], { cwd: dir, signal })).trim()
470
+ }
471
+
472
+ /** Stage everything and commit; returns false when there was nothing to commit. */
473
+ export async function commitAll(
474
+ dir: string,
475
+ message: string,
476
+ signal?: AbortSignal,
477
+ ): Promise<boolean> {
478
+ await git(['add', '-A'], { cwd: dir, signal })
479
+ const status = await git(['status', '--porcelain'], { cwd: dir, signal })
480
+ if (status.trim() === '') return false
481
+ await git(['commit', '-m', message], { cwd: dir, signal })
482
+ return true
483
+ }
484
+
485
+ /** Paths git still reports as unmerged (conflict stage entries) in the working tree. */
486
+ export async function unmergedPaths(dir: string, signal?: AbortSignal): Promise<string[]> {
487
+ const out = await git(['diff', '--name-only', '--diff-filter=U'], { cwd: dir, signal })
488
+ return out
489
+ .split('\n')
490
+ .map((line) =>
491
+ line
492
+ .replace(/\r$/, '')
493
+ .trim()
494
+ .replace(/^"(.*)"$/, '$1'),
495
+ )
496
+ .filter((path) => path !== '')
497
+ }
498
+
499
+ /**
500
+ * The conflict hunks for the given unmerged `paths`: `git diff` over exactly those
501
+ * files, which for an unmerged entry renders the combined diff carrying the
502
+ * `<<<<<<<` / `=======` / `>>>>>>>` markers each side contributed. Handed to the
503
+ * conflict-resolver agent so it sees the actual conflicts instead of having to
504
+ * rediscover them. Capped to `maxChars` total (a note is appended on truncation) so a
505
+ * huge conflict can't blow up the prompt. Returns '' when there are no paths.
506
+ */
507
+ export async function conflictDiff(
508
+ dir: string,
509
+ paths: string[],
510
+ signal?: AbortSignal,
511
+ maxChars = 24_000,
512
+ ): Promise<string> {
513
+ if (paths.length === 0) return ''
514
+ const out = await git(['diff', '--', ...paths], { cwd: dir, signal })
515
+ if (out.length <= maxChars) return out
516
+ return `${out.slice(0, maxChars)}\n\n[diff truncated at ${maxChars} characters — open the files directly to see the remaining conflicts]`
517
+ }
518
+
519
+ /**
520
+ * Merge `origin/<baseBranch>` into the current branch (no fast-forward squash, no
521
+ * editor). Returns `true` for a clean merge (or an already-up-to-date no-op) and
522
+ * `false` when the merge left conflicts in the working tree — the expected case the
523
+ * conflict-resolver agent then fixes, NOT an error. Any other git failure (e.g. an
524
+ * unknown ref) is re-thrown. Requires a {@link cloneRepo} with `full: true` so the
525
+ * merge base and `origin/<baseBranch>` are present.
526
+ */
527
+ export async function mergeBranch(
528
+ dir: string,
529
+ baseBranch: string,
530
+ signal?: AbortSignal,
531
+ ): Promise<boolean> {
532
+ try {
533
+ await git(['merge', '--no-edit', `origin/${baseBranch}`], { cwd: dir, signal })
534
+ return true
535
+ } catch (err) {
536
+ // A merge conflict exits non-zero and leaves unmerged paths; distinguish it
537
+ // from a genuine failure (which leaves none) so only real errors propagate.
538
+ if ((await unmergedPaths(dir, signal)).length > 0) return false
539
+ throw err
540
+ }
541
+ }
542
+
543
+ /**
544
+ * Bring a RESUMED work branch up to the latest `baseBranch` when (and only when) the
545
+ * two merge cleanly. A resumed branch was cut from an older base, so without this the
546
+ * agent continues against a stale base and the eventual PR can carry avoidable
547
+ * conflicts. Fetches the base (the single-branch resume clone doesn't have it),
548
+ * attempts `git merge --no-edit`, and on a conflict ABORTS — leaving the branch
549
+ * exactly as it was so the run proceeds on the stale base (the CI/merge gate handles
550
+ * a genuinely conflicting PR downstream, as before). Returns whether base was merged
551
+ * in. Best-effort: callers treat a thrown/false result as "continue without refresh".
552
+ */
553
+ export async function refreshFromBaseIfClean(
554
+ dir: string,
555
+ baseBranch: string,
556
+ ghToken: string,
557
+ signal?: AbortSignal,
558
+ ): Promise<boolean> {
559
+ await git(['fetch', 'origin', baseBranch], { cwd: dir, signal, env: await authEnv(ghToken) })
560
+ try {
561
+ await git(['merge', '--no-edit', 'FETCH_HEAD'], { cwd: dir, signal })
562
+ return true
563
+ } catch (err) {
564
+ if ((await unmergedPaths(dir, signal)).length > 0) {
565
+ // Conflict — undo the half-done merge and keep the branch on its old base.
566
+ await git(['merge', '--abort'], { cwd: dir, signal }).catch(() => {})
567
+ return false
568
+ }
569
+ throw err
570
+ }
571
+ }
572
+
573
+ /**
574
+ * Push the work branch to origin. The remote URL carries only the username, so
575
+ * the token is supplied here via the askpass env (never in argv).
576
+ */
577
+ export async function pushBranch(
578
+ dir: string,
579
+ branch: string,
580
+ ghToken: string,
581
+ signal?: AbortSignal,
582
+ ): Promise<void> {
583
+ await git(['push', '-u', 'origin', branch], {
584
+ cwd: dir,
585
+ signal,
586
+ env: await authEnv(ghToken),
587
+ })
588
+ }
589
+
590
+ /**
591
+ * Reset the working tree's git history to a single bootstrap commit and push it
592
+ * to the target repository's default branch. Wiping `.git` before re-initialising
593
+ * means the new repo starts clean — it inherits the bootstrapped *contents* of the
594
+ * reference architecture, not its commit history.
595
+ *
596
+ * The push is forced: the fresh single-commit history shares no ancestor with
597
+ * whatever GitHub prepopulated when the user created the repo (a README,
598
+ * .gitignore and/or license picked on the new-repo page), so a fast-forward is
599
+ * impossible. The Worker pre-flights that the target is empty or holds only that
600
+ * boilerplate, so overwriting it is safe and intended.
601
+ */
602
+ export async function reinitAndPush(opts: {
603
+ dir: string
604
+ target: BootstrapTargetSpec
605
+ ghToken: string
606
+ message: string
607
+ }): Promise<void> {
608
+ await rm(join(opts.dir, '.git'), { recursive: true, force: true })
609
+ await git(['init'], { cwd: opts.dir })
610
+ // Start the history on the target's default branch (init may default to master).
611
+ await git(['checkout', '-b', opts.target.defaultBranch], { cwd: opts.dir })
612
+ await git(['config', 'user.name', GIT_AUTHOR], { cwd: opts.dir })
613
+ await git(['config', 'user.email', GIT_EMAIL], { cwd: opts.dir })
614
+ await git(['add', '-A'], { cwd: opts.dir })
615
+ await git(['commit', '-m', opts.message], { cwd: opts.dir })
616
+ const url = authenticatedCloneUrl(opts.target.cloneUrl)
617
+ await git(['remote', 'add', 'origin', url], { cwd: opts.dir })
618
+ await git(['push', '--force', '-u', 'origin', opts.target.defaultBranch], {
619
+ cwd: opts.dir,
620
+ env: await authEnv(opts.ghToken),
621
+ })
622
+ }
623
+
624
+ export interface OpenPullRequestOptions {
625
+ owner: string
626
+ name: string
627
+ ghToken: string
628
+ head: string
629
+ base: string
630
+ pr: PrSpec
631
+ apiBase?: string
632
+ /**
633
+ * The repo's clone URL. Used (when {@link provider} is absent) to detect the provider and,
634
+ * for GitLab, to derive the REST base + project path from its host — so the harness opens a
635
+ * GitLab **merge request** rather than POSTing to GitHub's pulls API. Absent ⇒ GitHub.
636
+ */
637
+ cloneUrl?: string
638
+ /**
639
+ * The VCS provider, when the dispatcher knows it (the server derives it from the configured
640
+ * source-control backend and sets `repo.provider`). AUTHORITATIVE — it overrides host
641
+ * inference — so a self-managed GitLab on an arbitrarily-named host (e.g. `git.acme.com`,
642
+ * which {@link inferVcsProvider} can't recognise) still opens a merge request instead of
643
+ * being misrouted to GitHub's API. Absent ⇒ inferred from {@link cloneUrl}'s host.
644
+ */
645
+ provider?: 'github' | 'gitlab'
646
+ signal?: AbortSignal
647
+ }
648
+
649
+ /**
650
+ * The VCS host a clone URL points at. The harness is otherwise provider-agnostic (its git
651
+ * auth is a host-neutral GIT_ASKPASS credential), but the "open the PR/MR" REST call is not:
652
+ * GitHub and GitLab have different endpoints, so infer which to call from the host. GitHub is
653
+ * the default; a host of `gitlab.com` or one in the `gitlab.*` / `*.gitlab.*` family (covering
654
+ * self-managed instances named that way) is treated as GitLab.
655
+ */
656
+ export function inferVcsProvider(cloneUrl: string): 'github' | 'gitlab' {
657
+ let host = ''
658
+ try {
659
+ host = new URL(cloneUrl).host.toLowerCase()
660
+ } catch {
661
+ return 'github'
662
+ }
663
+ if (host === 'gitlab.com' || host.startsWith('gitlab.') || host.includes('.gitlab.')) {
664
+ return 'gitlab'
665
+ }
666
+ return 'github'
667
+ }
668
+
669
+ /** The GitLab REST v4 base for a clone URL's host, e.g. `https://gitlab.com/api/v4`. */
670
+ export function gitlabApiBaseFromCloneUrl(cloneUrl: string): string {
671
+ const u = new URL(cloneUrl)
672
+ return `${u.protocol}//${u.host}/api/v4`
673
+ }
674
+
675
+ /**
676
+ * The URL-encoded GitLab project path from a clone URL — the full namespace path (so subgroups
677
+ * survive), with the trailing `.git` stripped, e.g.
678
+ * `https://gitlab.com/group/sub/proj.git` → `group%2Fsub%2Fproj`.
679
+ */
680
+ export function gitlabProjectPath(cloneUrl: string): string {
681
+ const path = new URL(cloneUrl).pathname.replace(/^\/+/, '').replace(/\.git$/, '')
682
+ return encodeURIComponent(path)
683
+ }
684
+
685
+ /** The abort reason as an Error (the watchdog aborts with one), or a generic fallback. */
686
+ function abortError(signal: AbortSignal): Error {
687
+ return signal.reason instanceof Error ? signal.reason : new Error('aborted')
688
+ }
689
+
690
+ /** Whether a thrown fetch error is an AbortError (caller-initiated, never retried). */
691
+ function isAbortError(err: unknown): boolean {
692
+ return err instanceof Error && err.name === 'AbortError'
693
+ }
694
+
695
+ /**
696
+ * Parse a `Retry-After` header into ms, bounded so it can't stall the job. Accepts BOTH
697
+ * forms the spec allows: integer delay-seconds (`120`) and an HTTP-date (`Wed, 21 Oct 2026
698
+ * 07:28:00 GMT`); the latter is turned into a delay from now. A past/zero/unparseable value
699
+ * yields undefined so the caller falls back to exponential backoff.
700
+ */
701
+ function retryAfterMs(res: Response): number | undefined {
702
+ const raw = res.headers.get('retry-after')
703
+ if (!raw) return undefined
704
+ const secs = Number(raw)
705
+ if (Number.isFinite(secs)) {
706
+ return secs > 0 ? Math.min(secs * 1000, MAX_RETRY_AFTER_MS) : undefined
707
+ }
708
+ const at = Date.parse(raw)
709
+ if (Number.isNaN(at)) return undefined
710
+ const ms = at - Date.now()
711
+ return ms > 0 ? Math.min(ms, MAX_RETRY_AFTER_MS) : undefined
712
+ }
713
+
714
+ /** Sleep `ms`, rejecting immediately (with the abort reason) if `signal` aborts meanwhile. */
715
+ function abortableDelay(ms: number, signal?: AbortSignal): Promise<void> {
716
+ return new Promise((resolve, reject) => {
717
+ if (signal?.aborted) return reject(abortError(signal))
718
+ const onAbort = (): void => {
719
+ clearTimeout(timer)
720
+ reject(abortError(signal as AbortSignal))
721
+ }
722
+ const timer = setTimeout(() => {
723
+ signal?.removeEventListener('abort', onAbort)
724
+ resolve()
725
+ }, ms)
726
+ signal?.addEventListener('abort', onAbort, { once: true })
727
+ })
728
+ }
729
+
730
+ const MAX_RETRY_AFTER_MS = 8_000
731
+ const RETRY_BASE_MS = 500
732
+ const RETRY_MAX_DELAY_MS = 4_000
733
+
734
+ /**
735
+ * Run a single HTTP request with bounded retry for TRANSIENT failures, so a momentary
736
+ * upstream blip (a 5xx, a 429 rate-limit, or a dropped connection) no longer fails an
737
+ * otherwise-complete run on its very last step (opening the PR/MR). Up to 3 attempts
738
+ * (2 retries) with exponential backoff + jitter (honoring a `Retry-After` on a 429),
739
+ * every wait abort-aware so the inactivity/max-duration watchdog still cancels promptly.
740
+ *
741
+ * ONLY transient failures retry: a `>=500`/`429` response, or a network-level fetch
742
+ * rejection. A 4xx (incl. the 422/409 "already exists" the callers treat as success) is
743
+ * returned to the caller unretried, and a caller abort is rethrown at once. The response
744
+ * body is never read here, so the caller's existing status handling is unchanged.
745
+ */
746
+ async function withApiRetry(
747
+ fn: () => Promise<Response>,
748
+ opts: { signal?: AbortSignal; attempts?: number } = {},
749
+ ): Promise<Response> {
750
+ const maxAttempts = opts.attempts ?? 3
751
+ let lastError: unknown
752
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
753
+ if (opts.signal?.aborted) throw abortError(opts.signal)
754
+ let res: Response | undefined
755
+ try {
756
+ res = await fn()
757
+ } catch (err) {
758
+ // A caller/watchdog abort is terminal; a network error is transient → retry.
759
+ if (isAbortError(err) || opts.signal?.aborted) throw err
760
+ lastError = err
761
+ }
762
+ if (res) {
763
+ const transient = res.status >= 500 || res.status === 429
764
+ if (!transient || attempt >= maxAttempts) return res
765
+ const after = retryAfterMs(res)
766
+ // Discard the unread body before retrying so the connection can be reused.
767
+ await res.body?.cancel().catch(() => {})
768
+ await abortableDelay(after ?? backoffMs(attempt), opts.signal)
769
+ continue
770
+ }
771
+ if (attempt >= maxAttempts) break
772
+ await abortableDelay(backoffMs(attempt), opts.signal)
773
+ }
774
+ // Exhausted on a network-level rejection (no HTTP response): an upstream API failure.
775
+ const message =
776
+ lastError instanceof Error ? lastError.message : 'API request failed after retries'
777
+ throw new HarnessFailure('api', redactSecrets(message))
778
+ }
779
+
780
+ /** Exponential backoff (base 500ms, capped 4s) with up to 25% positive jitter. */
781
+ function backoffMs(attempt: number): number {
782
+ const base = Math.min(RETRY_MAX_DELAY_MS, RETRY_BASE_MS * 2 ** (attempt - 1))
783
+ return base + Math.floor(base * 0.25 * Math.random())
784
+ }
785
+
786
+ /**
787
+ * Open a PR (GitHub) or merge request (GitLab) for the pushed branch; returns its web URL.
788
+ * The provider is chosen from the EXPLICIT `opts.provider` when the dispatcher set it,
789
+ * falling back to host inference from the clone URL only when it didn't — so a self-managed
790
+ * GitLab whose host isn't named `gitlab.*` still opens an MR instead of being misrouted to
791
+ * GitHub's API. The GitHub path is unchanged.
792
+ */
793
+ export async function openPullRequest(opts: OpenPullRequestOptions): Promise<string> {
794
+ const provider = opts.provider ?? (opts.cloneUrl ? inferVcsProvider(opts.cloneUrl) : 'github')
795
+ if (provider === 'gitlab') {
796
+ if (!opts.cloneUrl) {
797
+ throw new Error('Cannot open a GitLab merge request without the repo clone URL')
798
+ }
799
+ return openGitLabMergeRequest({ ...opts, cloneUrl: opts.cloneUrl })
800
+ }
801
+ const apiBase = opts.apiBase ?? 'https://api.github.com'
802
+ const path = `${encodeURIComponent(opts.owner)}/${encodeURIComponent(opts.name)}`
803
+ const res = await withApiRetry(
804
+ () =>
805
+ fetch(`${apiBase}/repos/${path}/pulls`, {
806
+ method: 'POST',
807
+ headers: {
808
+ authorization: `Bearer ${opts.ghToken}`,
809
+ accept: 'application/vnd.github+json',
810
+ 'user-agent': 'cat-factory-executor',
811
+ 'x-github-api-version': '2022-11-28',
812
+ 'content-type': 'application/json',
813
+ },
814
+ body: JSON.stringify({
815
+ title: opts.pr.title,
816
+ head: opts.head,
817
+ base: opts.base,
818
+ body: opts.pr.body,
819
+ }),
820
+ // Bound on the watchdog so a hung GitHub call can't stall the job.
821
+ ...(opts.signal ? { signal: opts.signal } : {}),
822
+ }),
823
+ { signal: opts.signal },
824
+ )
825
+ if (!res.ok) {
826
+ const detail = await res.text().catch(() => '')
827
+ // A resumed run pushes to a branch that already has an open PR; GitHub answers
828
+ // 422 "A pull request already exists". That's success for us — return the
829
+ // existing PR's url rather than failing the resumed run.
830
+ if (res.status === 422 && /pull request already exists/i.test(detail)) {
831
+ const existing = await findOpenPullRequestUrl(opts)
832
+ if (existing) return existing
833
+ }
834
+ throw new HarnessFailure(
835
+ 'api',
836
+ redactSecrets(`Failed to open PR (HTTP ${res.status}): ${detail.slice(0, 300)}`),
837
+ )
838
+ }
839
+ const body = (await res.json()) as { html_url?: string }
840
+ if (!body.html_url) throw new HarnessFailure('api', 'GitHub did not return a PR url')
841
+ return body.html_url
842
+ }
843
+
844
+ /** GitLab API headers for the PAT (the `PRIVATE-TOKEN` auth GitLab uses). */
845
+ function gitlabHeaders(token: string): Record<string, string> {
846
+ return {
847
+ 'private-token': token,
848
+ accept: 'application/json',
849
+ 'user-agent': 'cat-factory-executor',
850
+ 'content-type': 'application/json',
851
+ }
852
+ }
853
+
854
+ /**
855
+ * Open a GitLab merge request (the analogue of {@link openPullRequest} for GitLab). The REST
856
+ * base + project path are derived from the clone URL's host, so it works for gitlab.com and a
857
+ * self-managed instance alike. `head`→`source_branch`, `base`→`target_branch`. On a duplicate
858
+ * (a resumed run whose branch already has an open MR — GitLab answers 409) the existing MR's
859
+ * web URL is returned instead of failing the run, mirroring the GitHub 422 handling.
860
+ */
861
+ async function openGitLabMergeRequest(
862
+ opts: OpenPullRequestOptions & { cloneUrl: string },
863
+ ): Promise<string> {
864
+ const apiBase = gitlabApiBaseFromCloneUrl(opts.cloneUrl)
865
+ const project = gitlabProjectPath(opts.cloneUrl)
866
+ const res = await withApiRetry(
867
+ () =>
868
+ fetch(`${apiBase}/projects/${project}/merge_requests`, {
869
+ method: 'POST',
870
+ headers: gitlabHeaders(opts.ghToken),
871
+ body: JSON.stringify({
872
+ source_branch: opts.head,
873
+ target_branch: opts.base,
874
+ title: opts.pr.title,
875
+ description: opts.pr.body,
876
+ }),
877
+ ...(opts.signal ? { signal: opts.signal } : {}),
878
+ }),
879
+ { signal: opts.signal },
880
+ )
881
+ if (!res.ok) {
882
+ const detail = await res.text().catch(() => '')
883
+ // GitLab returns 409 (sometimes 400) when an open MR already exists for this source
884
+ // branch; that is success for a resumed run — return the existing MR's url.
885
+ if (
886
+ (res.status === 409 || res.status === 400) &&
887
+ /already exists|open merge request/i.test(detail)
888
+ ) {
889
+ const existing = await findOpenMergeRequestUrl(apiBase, project, opts)
890
+ if (existing) return existing
891
+ }
892
+ throw new HarnessFailure(
893
+ 'api',
894
+ redactSecrets(`Failed to open merge request (HTTP ${res.status}): ${detail.slice(0, 300)}`),
895
+ )
896
+ }
897
+ const body = (await res.json()) as { web_url?: string }
898
+ if (!body.web_url) throw new HarnessFailure('api', 'GitLab did not return a merge request url')
899
+ return body.web_url
900
+ }
901
+
902
+ /** Find the open GitLab MR for `opts.head`→`opts.base`, returning its web_url or undefined. */
903
+ async function findOpenMergeRequestUrl(
904
+ apiBase: string,
905
+ project: string,
906
+ opts: { head: string; base: string; ghToken: string; signal?: AbortSignal },
907
+ ): Promise<string | undefined> {
908
+ // Filter by BOTH branches: a source branch can have open MRs to several targets, so the
909
+ // source alone could match an MR against a different base than the one we just tried to open.
910
+ const query = new URLSearchParams({
911
+ source_branch: opts.head,
912
+ target_branch: opts.base,
913
+ state: 'opened',
914
+ })
915
+ const res = await fetch(`${apiBase}/projects/${project}/merge_requests?${query}`, {
916
+ headers: gitlabHeaders(opts.ghToken),
917
+ ...(opts.signal ? { signal: opts.signal } : {}),
918
+ })
919
+ if (!res.ok) return undefined
920
+ const list = (await res.json().catch(() => [])) as Array<{ web_url?: string }>
921
+ return Array.isArray(list) && list[0]?.web_url ? list[0].web_url : undefined
922
+ }
923
+
924
+ /** Find the open PR for `opts.head` on `opts.base`, returning its html_url or undefined. */
925
+ async function findOpenPullRequestUrl(opts: {
926
+ owner: string
927
+ name: string
928
+ ghToken: string
929
+ head: string
930
+ base: string
931
+ apiBase?: string
932
+ signal?: AbortSignal
933
+ }): Promise<string | undefined> {
934
+ const apiBase = opts.apiBase ?? 'https://api.github.com'
935
+ // Encode the ref-derived query params: a branch/owner containing `&` or `#` would
936
+ // otherwise split the query string or inject an unintended parameter.
937
+ const query = new URLSearchParams({
938
+ head: `${opts.owner}:${opts.head}`,
939
+ base: opts.base,
940
+ state: 'open',
941
+ })
942
+ const path = `${encodeURIComponent(opts.owner)}/${encodeURIComponent(opts.name)}`
943
+ const res = await fetch(`${apiBase}/repos/${path}/pulls?${query}`, {
944
+ headers: {
945
+ authorization: `Bearer ${opts.ghToken}`,
946
+ accept: 'application/vnd.github+json',
947
+ 'user-agent': 'cat-factory-executor',
948
+ 'x-github-api-version': '2022-11-28',
949
+ },
950
+ ...(opts.signal ? { signal: opts.signal } : {}),
951
+ })
952
+ if (!res.ok) return undefined
953
+ const list = (await res.json().catch(() => [])) as Array<{ html_url?: string }>
954
+ return Array.isArray(list) && list[0]?.html_url ? list[0].html_url : undefined
955
+ }