@cat-factory/executor-harness 1.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +143 -0
- package/dist/agent-runner.js +389 -0
- package/dist/agent.js +810 -0
- package/dist/blueprint.js +367 -0
- package/dist/bootstrap.js +99 -0
- package/dist/ci-fixer.js +46 -0
- package/dist/coding-agent.js +285 -0
- package/dist/conflict-resolver.js +138 -0
- package/dist/embed.js +8 -0
- package/dist/explore.js +74 -0
- package/dist/failure.js +47 -0
- package/dist/fixer.js +44 -0
- package/dist/follow-ups.js +103 -0
- package/dist/frontend-infra.js +283 -0
- package/dist/fs-utils.js +11 -0
- package/dist/git.js +778 -0
- package/dist/job.js +409 -0
- package/dist/logger.js +27 -0
- package/dist/merger.js +135 -0
- package/dist/on-call.js +126 -0
- package/dist/pi-workspace.js +237 -0
- package/dist/pi.js +971 -0
- package/dist/process.js +25 -0
- package/dist/redact.js +109 -0
- package/dist/runner.js +228 -0
- package/dist/server.js +135 -0
- package/dist/spec.js +754 -0
- package/dist/structured-output.js +431 -0
- package/dist/tester.js +191 -0
- package/package.json +35 -0
- package/src/agent-runner.ts +484 -0
- package/src/agent.ts +948 -0
- package/src/coding-agent.ts +393 -0
- package/src/embed.ts +32 -0
- package/src/failure.ts +73 -0
- package/src/follow-ups.ts +106 -0
- package/src/frontend-infra.ts +340 -0
- package/src/fs-utils.ts +11 -0
- package/src/git.ts +955 -0
- package/src/job.ts +766 -0
- package/src/logger.ts +45 -0
- package/src/pi-workspace.ts +348 -0
- package/src/pi.ts +1236 -0
- package/src/process.ts +33 -0
- package/src/redact.ts +109 -0
- package/src/runner.ts +384 -0
- package/src/server.ts +153 -0
- package/src/structured-output.ts +524 -0
package/src/git.ts
ADDED
|
@@ -0,0 +1,955 @@
|
|
|
1
|
+
import { execFile } from 'node:child_process'
|
|
2
|
+
import { appendFile, chmod, mkdtemp, rm, writeFile } from 'node:fs/promises'
|
|
3
|
+
import { tmpdir } from 'node:os'
|
|
4
|
+
import { join } from 'node:path'
|
|
5
|
+
import { promisify } from 'node:util'
|
|
6
|
+
import type { BootstrapTargetSpec, PrSpec, RepoSpec } from './job.js'
|
|
7
|
+
import { pathExists } from './fs-utils.js'
|
|
8
|
+
import { redactSecrets } from './redact.js'
|
|
9
|
+
import { loadRunnerLimits } from './runner.js'
|
|
10
|
+
import { HarnessFailure } from './failure.js'
|
|
11
|
+
|
|
12
|
+
// Re-exported so existing importers that pull `redactSecrets` from this module keep
|
|
13
|
+
// working; the single source of truth now lives in ./redact.js.
|
|
14
|
+
export { redactSecrets } from './redact.js'
|
|
15
|
+
|
|
16
|
+
const exec = promisify(execFile)
|
|
17
|
+
|
|
18
|
+
// Git + GitHub helpers. The installation token is NEVER placed in a clone/remote
|
|
19
|
+
// URL or in any git argv. Instead git authenticates over HTTPS via a GIT_ASKPASS
|
|
20
|
+
// helper: the plain `https://x-access-token@host/...` remote (username only, no
|
|
21
|
+
// secret) is used everywhere, and the token is handed to git out-of-band through
|
|
22
|
+
// an environment variable the helper reads. That keeps the token out of process
|
|
23
|
+
// listings and out of any command string Node echoes into an error/cmd field.
|
|
24
|
+
|
|
25
|
+
const GIT_AUTHOR = 'cat-factory[bot]'
|
|
26
|
+
const GIT_EMAIL = 'cat-factory[bot]@users.noreply.github.com'
|
|
27
|
+
|
|
28
|
+
// Per-git-command wall-clock ceiling. A single git op (clone/push over a flaky
|
|
29
|
+
// network) must not hang the job indefinitely; the job's overall watchdog
|
|
30
|
+
// (see runner.ts) is the outer bound, this stops one wedged command first.
|
|
31
|
+
//
|
|
32
|
+
// INVARIANT: this MUST stay STRICTLY BELOW the inactivity watchdog
|
|
33
|
+
// (`RunnerLimits.inactivityMs`). Git emits no Pi activity events while it runs, so a
|
|
34
|
+
// slow clone/push races both timers; if they were equal the job could fail with the
|
|
35
|
+
// misleading "no agent activity … likely hung" instead of a clear "git timed out".
|
|
36
|
+
// Staying under that window means git always loses the race and surfaces its own
|
|
37
|
+
// accurate reason.
|
|
38
|
+
//
|
|
39
|
+
// Rather than hardcode a constant against the *default* watchdog (which silently
|
|
40
|
+
// breaks the invariant when an operator lowers `JOB_INACTIVITY_MS`), we DERIVE the
|
|
41
|
+
// ceiling from the actually-configured window: a fixed margin below it, floored so a
|
|
42
|
+
// tiny window can't yield a non-positive timeout. At the 10-min default this resolves
|
|
43
|
+
// to the same 7 min as before; at a lowered 5-min window it tracks down to 2 min.
|
|
44
|
+
const GIT_TIMEOUT_MARGIN_MS = 3 * 60_000
|
|
45
|
+
const GIT_TIMEOUT_FLOOR_MS = 60_000
|
|
46
|
+
const GIT_TIMEOUT_MS = Math.max(
|
|
47
|
+
GIT_TIMEOUT_FLOOR_MS,
|
|
48
|
+
loadRunnerLimits().inactivityMs - GIT_TIMEOUT_MARGIN_MS,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
/** Wrap an error so its message/stack carry no credentials. */
|
|
52
|
+
function redactError(err: unknown): Error {
|
|
53
|
+
if (err instanceof Error) {
|
|
54
|
+
const redacted = new Error(redactSecrets(err.message))
|
|
55
|
+
if (err.stack) redacted.stack = redactSecrets(err.stack)
|
|
56
|
+
return redacted
|
|
57
|
+
}
|
|
58
|
+
return new Error(redactSecrets(String(err)))
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Build the remote URL git uses. Only the username (`x-access-token`) is embedded
|
|
63
|
+
* — never the token — so the token never appears in argv. The token is supplied
|
|
64
|
+
* separately via {@link authEnv} and read by the GIT_ASKPASS helper.
|
|
65
|
+
*
|
|
66
|
+
* The `x-access-token` username is host-neutral: GitHub keys auth off the token (password)
|
|
67
|
+
* and ignores the username, and GitLab likewise accepts ANY non-blank username with a PAT as
|
|
68
|
+
* the password — so the same embedded username authenticates github.com and gitlab.com alike.
|
|
69
|
+
*/
|
|
70
|
+
export function authenticatedCloneUrl(cloneUrl: string): string {
|
|
71
|
+
// https://github.com/owner/name.git → https://x-access-token@github.com/...
|
|
72
|
+
// (no secret in the URL). file:// and other local URLs are left untouched.
|
|
73
|
+
return cloneUrl.replace(/^https:\/\//, 'https://x-access-token@')
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Drop any `user[:pass]@` userinfo from a URL so two clone URLs can be compared by repo. */
|
|
77
|
+
function withoutUserinfo(url: string): string {
|
|
78
|
+
return url.replace(/^([a-z]+:\/\/)[^@/]*@/i, '$1')
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// A tiny askpass helper that prints the token git asks for. Created once per
|
|
82
|
+
// process and reused; the token itself is passed per-command via the env (below),
|
|
83
|
+
// never baked into the script.
|
|
84
|
+
let askpassPathPromise: Promise<string> | undefined
|
|
85
|
+
function ensureAskpass(): Promise<string> {
|
|
86
|
+
askpassPathPromise ??= (async () => {
|
|
87
|
+
const dir = await mkdtemp(join(tmpdir(), 'git-askpass-'))
|
|
88
|
+
const path = join(dir, 'askpass.sh')
|
|
89
|
+
// git invokes this with the prompt as argv[1]; we only ever return the token
|
|
90
|
+
// (the username is already in the remote URL, so git only asks for the
|
|
91
|
+
// password). The token comes from the env, never from argv.
|
|
92
|
+
await writeFile(path, '#!/bin/sh\nexec printf %s "$GIT_ASKPASS_TOKEN"\n', 'utf8')
|
|
93
|
+
await chmod(path, 0o700)
|
|
94
|
+
return path
|
|
95
|
+
})()
|
|
96
|
+
return askpassPathPromise
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/** Child-process env that lets git authenticate with `ghToken` without it touching argv. */
|
|
100
|
+
async function authEnv(ghToken: string): Promise<NodeJS.ProcessEnv> {
|
|
101
|
+
return {
|
|
102
|
+
...process.env,
|
|
103
|
+
GIT_ASKPASS: await ensureAskpass(),
|
|
104
|
+
GIT_ASKPASS_TOKEN: ghToken,
|
|
105
|
+
// Never fall back to an interactive prompt (which would hang the job).
|
|
106
|
+
GIT_TERMINAL_PROMPT: '0',
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Run one git command. `signal` (the job watchdog's) and a per-command timeout
|
|
112
|
+
* both abort a wedged process, so neither a hung clone nor a stalled push can
|
|
113
|
+
* keep the container running forever. Any failure is re-thrown with its message
|
|
114
|
+
* and stack scrubbed of credentials.
|
|
115
|
+
*/
|
|
116
|
+
async function git(
|
|
117
|
+
args: string[],
|
|
118
|
+
opts: { cwd?: string; signal?: AbortSignal; env?: NodeJS.ProcessEnv } = {},
|
|
119
|
+
): Promise<string> {
|
|
120
|
+
try {
|
|
121
|
+
const { stdout } = await exec('git', args, {
|
|
122
|
+
...(opts.cwd ? { cwd: opts.cwd } : {}),
|
|
123
|
+
maxBuffer: 16 * 1024 * 1024,
|
|
124
|
+
timeout: GIT_TIMEOUT_MS,
|
|
125
|
+
...(opts.env ? { env: opts.env } : {}),
|
|
126
|
+
...(opts.signal ? { signal: opts.signal } : {}),
|
|
127
|
+
})
|
|
128
|
+
return stdout
|
|
129
|
+
} catch (err) {
|
|
130
|
+
// Tag the failure as `git` so the registry's catch records the real cause instead of
|
|
131
|
+
// the generic `agent`. A watchdog abort still wins: `describeFailure` keys off
|
|
132
|
+
// `killReason` first, so an abort during a git op keeps the timeout message/cause.
|
|
133
|
+
const redacted = redactError(err)
|
|
134
|
+
const failure = new HarnessFailure('git', redacted.message)
|
|
135
|
+
if (redacted.stack) failure.stack = redacted.stack
|
|
136
|
+
throw failure
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/** Clone `repo`'s base branch (shallow by default) into `dir` and set commit identity. */
|
|
141
|
+
export async function cloneRepo(opts: {
|
|
142
|
+
repo: RepoSpec
|
|
143
|
+
ghToken: string
|
|
144
|
+
dir: string
|
|
145
|
+
signal?: AbortSignal
|
|
146
|
+
/**
|
|
147
|
+
* Full history + all remote-tracking branches. A shallow single-branch clone is
|
|
148
|
+
* enough to implement on one branch, but merging ANOTHER branch in (the
|
|
149
|
+
* conflict-resolver) needs the merge base in history and `origin/<other>` present
|
|
150
|
+
* — so `full` drops both `--depth 1` (which implies `--single-branch`).
|
|
151
|
+
*/
|
|
152
|
+
full?: boolean
|
|
153
|
+
}): Promise<void> {
|
|
154
|
+
const url = authenticatedCloneUrl(opts.repo.cloneUrl)
|
|
155
|
+
const cloneArgs = opts.full
|
|
156
|
+
? ['clone', '--branch', opts.repo.baseBranch, url, opts.dir]
|
|
157
|
+
: ['clone', '--depth', '1', '--branch', opts.repo.baseBranch, url, opts.dir]
|
|
158
|
+
await git(cloneArgs, { signal: opts.signal, env: await authEnv(opts.ghToken) })
|
|
159
|
+
await git(['config', 'user.name', GIT_AUTHOR], { cwd: opts.dir, signal: opts.signal })
|
|
160
|
+
await git(['config', 'user.email', GIT_EMAIL], { cwd: opts.dir, signal: opts.signal })
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/** Create and switch to the work branch. */
|
|
164
|
+
export async function createBranch(
|
|
165
|
+
dir: string,
|
|
166
|
+
branch: string,
|
|
167
|
+
signal?: AbortSignal,
|
|
168
|
+
): Promise<void> {
|
|
169
|
+
await git(['checkout', '-b', branch], { cwd: dir, signal })
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Whether `branch` already exists on the remote — i.e. an earlier (possibly
|
|
174
|
+
* evicted) run of this task already pushed work to it, so a re-dispatch should
|
|
175
|
+
* RESUME on it (clone it, continue on its commits) rather than branch off base and
|
|
176
|
+
* start over. Uses `git ls-remote` (no checkout); the token is supplied out of band.
|
|
177
|
+
*/
|
|
178
|
+
export async function remoteBranchExists(
|
|
179
|
+
cloneUrl: string,
|
|
180
|
+
branch: string,
|
|
181
|
+
ghToken: string,
|
|
182
|
+
signal?: AbortSignal,
|
|
183
|
+
): Promise<boolean> {
|
|
184
|
+
const url = authenticatedCloneUrl(cloneUrl)
|
|
185
|
+
const out = await git(['ls-remote', '--heads', url, branch], {
|
|
186
|
+
signal,
|
|
187
|
+
env: await authEnv(ghToken),
|
|
188
|
+
})
|
|
189
|
+
return out.trim() !== ''
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Clone an EXISTING work branch (full history) into `dir` and check it out — used
|
|
194
|
+
* to resume a task whose earlier run already pushed commits to this branch, so the
|
|
195
|
+
* agent continues on top of that work instead of redoing it.
|
|
196
|
+
*/
|
|
197
|
+
export async function cloneExistingBranch(opts: {
|
|
198
|
+
cloneUrl: string
|
|
199
|
+
branch: string
|
|
200
|
+
ghToken: string
|
|
201
|
+
dir: string
|
|
202
|
+
signal?: AbortSignal
|
|
203
|
+
}): Promise<void> {
|
|
204
|
+
const url = authenticatedCloneUrl(opts.cloneUrl)
|
|
205
|
+
await git(['clone', '--branch', opts.branch, '--single-branch', url, opts.dir], {
|
|
206
|
+
signal: opts.signal,
|
|
207
|
+
env: await authEnv(opts.ghToken),
|
|
208
|
+
})
|
|
209
|
+
await git(['config', 'user.name', GIT_AUTHOR], { cwd: opts.dir, signal: opts.signal })
|
|
210
|
+
await git(['config', 'user.email', GIT_EMAIL], { cwd: opts.dir, signal: opts.signal })
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* The directory-name globs the clean sweep PRESERVES — dependency caches that are
|
|
215
|
+
* expensive to rebuild (node_modules, language toolchain caches). Keeping them is the
|
|
216
|
+
* whole point of reusing a checkout: a `git clean -ffdx` would otherwise wipe them and
|
|
217
|
+
* force a reinstall every run. Configurable via `HARNESS_CLEAN_KEEP` (comma-separated).
|
|
218
|
+
*/
|
|
219
|
+
export function cleanKeepPatterns(env: NodeJS.ProcessEnv = process.env): string[] {
|
|
220
|
+
const raw = env.HARNESS_CLEAN_KEEP ?? 'node_modules,.venv,target,.gradle,.pnpm-store'
|
|
221
|
+
return raw
|
|
222
|
+
.split(',')
|
|
223
|
+
.map((s) => s.trim())
|
|
224
|
+
.filter((s) => s !== '')
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Reset a REUSED checkout to a pristine state before the next job runs in it: hard-reset
|
|
229
|
+
* tracked files and remove every untracked/ignored file EXCEPT the preserved dependency
|
|
230
|
+
* caches (see {@link cleanKeepPatterns}). This is what guarantees a prior run's garbage —
|
|
231
|
+
* stray scratch files, half-written edits, stale build output — never contaminates the
|
|
232
|
+
* next run that reuses the same persistent checkout. A fresh clone never needs it.
|
|
233
|
+
*
|
|
234
|
+
* Submodules: when `.gitmodules` is present we use a single `-f` (which makes `git clean`
|
|
235
|
+
* skip nested git repositories, i.e. the submodule worktrees) and reset/refresh the
|
|
236
|
+
* submodules explicitly; otherwise `-ff` also nukes any stray nested repo the agent left.
|
|
237
|
+
*/
|
|
238
|
+
export async function cleanSweep(
|
|
239
|
+
dir: string,
|
|
240
|
+
ghToken: string,
|
|
241
|
+
signal?: AbortSignal,
|
|
242
|
+
env: NodeJS.ProcessEnv = process.env,
|
|
243
|
+
): Promise<void> {
|
|
244
|
+
await git(['reset', '--hard'], { cwd: dir, signal })
|
|
245
|
+
const hasSubmodules = await pathExists(join(dir, '.gitmodules'))
|
|
246
|
+
if (hasSubmodules) {
|
|
247
|
+
await git(['submodule', 'foreach', '--recursive', 'git reset --hard'], {
|
|
248
|
+
cwd: dir,
|
|
249
|
+
signal,
|
|
250
|
+
}).catch(() => {})
|
|
251
|
+
}
|
|
252
|
+
const keep = cleanKeepPatterns(env).flatMap((p) => ['-e', p])
|
|
253
|
+
// `-ffdx` (or `-fdx` with submodules) removes untracked + ignored files and dirs; the
|
|
254
|
+
// `-e` excludes keep the dependency caches. Tracked files were already hard-reset above.
|
|
255
|
+
await git(['clean', hasSubmodules ? '-fdx' : '-ffdx', ...keep], { cwd: dir, signal })
|
|
256
|
+
if (hasSubmodules) {
|
|
257
|
+
await git(['submodule', 'update', '--init', '--recursive'], {
|
|
258
|
+
cwd: dir,
|
|
259
|
+
signal,
|
|
260
|
+
env: await authEnv(ghToken),
|
|
261
|
+
}).catch(() => {})
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* The `origin` remote URL (without credentials) of the checkout at `dir`, or undefined
|
|
267
|
+
* when it isn't a git repo / has no origin. Used to detect a persistent checkout dir that
|
|
268
|
+
* somehow holds a DIFFERENT repo than the one we're about to prepare (it never should —
|
|
269
|
+
* the dir is keyed per repo — but a stale dir from a prior layout would be a silent
|
|
270
|
+
* cross-repo bleed, so we re-clone rather than reuse).
|
|
271
|
+
*/
|
|
272
|
+
export async function checkoutRemoteUrl(
|
|
273
|
+
dir: string,
|
|
274
|
+
signal?: AbortSignal,
|
|
275
|
+
): Promise<string | undefined> {
|
|
276
|
+
try {
|
|
277
|
+
return (await git(['remote', 'get-url', 'origin'], { cwd: dir, signal })).trim() || undefined
|
|
278
|
+
} catch {
|
|
279
|
+
return undefined
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Prepare a REUSED (persistent) checkout at `dir` so the agent runs against a clean tree
|
|
285
|
+
* on the right branch — the persistent-checkout analogue of {@link cloneRepo} +
|
|
286
|
+
* {@link cloneExistingBranch}. On the FIRST use of a per-repo dir there's no `.git` yet, so
|
|
287
|
+
* it clones once (full history, so a later merger/conflict step reusing the dir can diff
|
|
288
|
+
* against the base); afterwards it reuses the dir in place: clean sweep → re-point origin →
|
|
289
|
+
* fetch → check out `branch`. When `existing` is true `branch` is fetched and checked out
|
|
290
|
+
* directly (resume / base branch); otherwise `branch` is (re)created off `baseBranch`'s tip
|
|
291
|
+
* (a fresh work branch). Only the local transport sets `persistentCheckout`, so every other
|
|
292
|
+
* runtime keeps the fresh-clone path untouched.
|
|
293
|
+
*/
|
|
294
|
+
export async function prepareExistingCheckout(opts: {
|
|
295
|
+
dir: string
|
|
296
|
+
repo: RepoSpec
|
|
297
|
+
ghToken: string
|
|
298
|
+
/** The branch to end up checked out on. */
|
|
299
|
+
branch: string
|
|
300
|
+
/** Base branch to (re)create `branch` off when `existing` is false; also fetched for history. */
|
|
301
|
+
baseBranch: string
|
|
302
|
+
/** Whether `branch` already exists on the remote (resume / base) — checkout it directly. */
|
|
303
|
+
existing: boolean
|
|
304
|
+
signal?: AbortSignal
|
|
305
|
+
}): Promise<void> {
|
|
306
|
+
const { dir, repo, ghToken, branch, baseBranch, existing, signal } = opts
|
|
307
|
+
const cloneUrl = authenticatedCloneUrl(repo.cloneUrl)
|
|
308
|
+
|
|
309
|
+
// First use of this per-repo dir, or a stale dir holding a DIFFERENT repo → clone fresh
|
|
310
|
+
// (full history, so a later merger/conflict step reusing the dir can diff against base).
|
|
311
|
+
const currentRemote = (await pathExists(join(dir, '.git')))
|
|
312
|
+
? await checkoutRemoteUrl(dir, signal)
|
|
313
|
+
: undefined
|
|
314
|
+
if (!currentRemote || withoutUserinfo(currentRemote) !== withoutUserinfo(cloneUrl)) {
|
|
315
|
+
await rm(dir, { recursive: true, force: true })
|
|
316
|
+
await cloneRepo({ repo: { ...repo, baseBranch }, ghToken, dir, full: true, signal })
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
const env = await authEnv(ghToken)
|
|
320
|
+
await cleanSweep(dir, ghToken, signal)
|
|
321
|
+
// Re-point origin in case the stored URL drifted (idempotent; carries no secret).
|
|
322
|
+
await git(['remote', 'set-url', 'origin', cloneUrl], { cwd: dir, signal })
|
|
323
|
+
const fetchRef = existing ? branch : baseBranch
|
|
324
|
+
// Fetch the target ref AND the base into their tracking refs in ONE command, with explicit
|
|
325
|
+
// destination refspecs. The checkout below then reads `origin/<fetchRef>` directly rather
|
|
326
|
+
// than FETCH_HEAD: FETCH_HEAD only ever holds the LAST fetched ref, so a second base fetch
|
|
327
|
+
// would clobber it and a resumed work branch (base != branch) would be reset to the BASE
|
|
328
|
+
// tip — silently discarding the resumed commits. Keeping `origin/<baseBranch>` fresh also
|
|
329
|
+
// matters for the downstream merger/diff; a missing base diverges from a fresh full clone,
|
|
330
|
+
// so this is NOT best-effort (a failure surfaces rather than leaving a stale base ref).
|
|
331
|
+
const refspecs = [`+${fetchRef}:refs/remotes/origin/${fetchRef}`]
|
|
332
|
+
if (baseBranch !== fetchRef) refspecs.push(`+${baseBranch}:refs/remotes/origin/${baseBranch}`)
|
|
333
|
+
await git(['fetch', 'origin', ...refspecs], { cwd: dir, signal, env })
|
|
334
|
+
// `-f`: the clean sweep deliberately PRESERVES dependency caches (node_modules/target/…)
|
|
335
|
+
// as untracked files; if one collides with a path the target branch TRACKS, a plain
|
|
336
|
+
// checkout aborts ("untracked working tree files would be overwritten"). Force overwrites
|
|
337
|
+
// only the in-the-way files, leaving the other kept caches intact.
|
|
338
|
+
await git(['checkout', '-f', '-B', branch, `refs/remotes/origin/${fetchRef}`], {
|
|
339
|
+
cwd: dir,
|
|
340
|
+
signal,
|
|
341
|
+
})
|
|
342
|
+
await git(['config', 'user.name', GIT_AUTHOR], { cwd: dir, signal })
|
|
343
|
+
await git(['config', 'user.email', GIT_EMAIL], { cwd: dir, signal })
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* Commit edits the agent left UNCOMMITTED — but only to files git already tracks
|
|
348
|
+
* (`git add -u`), never new untracked files. The agent owns commit selection (it
|
|
349
|
+
* alone knows which new files are part of the solution vs scratch scripts/artifacts
|
|
350
|
+
* it created while exploring), so this is just a safety net that captures forgotten
|
|
351
|
+
* edits to existing files without ever sweeping in junk a blanket `git add -A`
|
|
352
|
+
* would. Returns false when there was nothing tracked to commit.
|
|
353
|
+
*/
|
|
354
|
+
export async function commitTrackedEdits(
|
|
355
|
+
dir: string,
|
|
356
|
+
message: string,
|
|
357
|
+
signal?: AbortSignal,
|
|
358
|
+
): Promise<boolean> {
|
|
359
|
+
await git(['add', '-u'], { cwd: dir, signal })
|
|
360
|
+
// Only consider staged (tracked) changes — untracked files are deliberately ignored.
|
|
361
|
+
const staged = await git(['diff', '--cached', '--name-only'], { cwd: dir, signal })
|
|
362
|
+
if (staged.trim() === '') return false
|
|
363
|
+
await git(['commit', '-m', message], { cwd: dir, signal })
|
|
364
|
+
return true
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
/**
|
|
368
|
+
* The untracked, non-ignored files left in the working tree (`git ls-files --others
|
|
369
|
+
* --exclude-standard`). The harness deliberately never blanket-stages new files (the
|
|
370
|
+
* agent owns commit selection), so this is exactly what {@link commitTrackedEdits}
|
|
371
|
+
* does NOT capture — a NEW file the agent created but forgot to commit. The caller
|
|
372
|
+
* surfaces it as a warning so that silent loss is at least observable in the logs.
|
|
373
|
+
*/
|
|
374
|
+
export async function listUntrackedFiles(dir: string, signal?: AbortSignal): Promise<string[]> {
|
|
375
|
+
const out = await git(['ls-files', '--others', '--exclude-standard'], { cwd: dir, signal })
|
|
376
|
+
return out
|
|
377
|
+
.split('\n')
|
|
378
|
+
.map((line) => line.replace(/\r$/, '').trim())
|
|
379
|
+
.filter((path) => path !== '')
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* Locally exclude `pattern` from this checkout via `.git/info/exclude` — a per-clone
|
|
384
|
+
* ignore that never lands in the repo (unlike a `.gitignore`). Used for the harness's
|
|
385
|
+
* follow-up sentinel file so the agent's own `git add` can never stage it and it never
|
|
386
|
+
* surfaces as an untracked-leftover warning or in the PR. Best-effort: a failure here
|
|
387
|
+
* just means the sentinel might show as untracked (logged, not pushed), never fatal.
|
|
388
|
+
*/
|
|
389
|
+
export async function excludeFromGit(
|
|
390
|
+
dir: string,
|
|
391
|
+
pattern: string,
|
|
392
|
+
signal?: AbortSignal,
|
|
393
|
+
): Promise<void> {
|
|
394
|
+
try {
|
|
395
|
+
const excludePath = join(dir, '.git', 'info', 'exclude')
|
|
396
|
+
await appendFile(excludePath, `\n${pattern}\n`, 'utf8')
|
|
397
|
+
} catch {
|
|
398
|
+
// A missing .git/info/exclude (worktree layout) or write error is non-fatal.
|
|
399
|
+
void signal
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
/** Whether the branch advanced past `baseSha` via commits (the agent's own + any safety-net commit). */
|
|
404
|
+
export async function branchHasCommitsSince(
|
|
405
|
+
dir: string,
|
|
406
|
+
baseSha: string,
|
|
407
|
+
signal?: AbortSignal,
|
|
408
|
+
): Promise<boolean> {
|
|
409
|
+
return (await headCommit(dir, signal)) !== baseSha
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* Whether the checked-out branch has a real, examinable diff against
|
|
414
|
+
* `origin/<baseBranch>` — i.e. the base branch's remote-tracking ref exists (so the
|
|
415
|
+
* merge base resolves) AND there are changes between that merge base and HEAD. The
|
|
416
|
+
* merger uses this to refuse to score a PR it could not actually inspect (a missing
|
|
417
|
+
* base ref or an empty diff) instead of emitting bogus low scores that would
|
|
418
|
+
* auto-merge. Returns false on ANY git error (e.g. an unknown ref). Requires a
|
|
419
|
+
* {@link cloneRepo} with `full: true` so `origin/<baseBranch>` and the merge base exist.
|
|
420
|
+
*/
|
|
421
|
+
export async function hasDiffAgainstBase(
|
|
422
|
+
dir: string,
|
|
423
|
+
baseBranch: string,
|
|
424
|
+
signal?: AbortSignal,
|
|
425
|
+
): Promise<boolean> {
|
|
426
|
+
try {
|
|
427
|
+
const stat = await git(['diff', '--stat', `origin/${baseBranch}...HEAD`], { cwd: dir, signal })
|
|
428
|
+
return stat.trim() !== ''
|
|
429
|
+
} catch {
|
|
430
|
+
return false
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
/**
|
|
435
|
+
* Parse the paths out of `git status --porcelain` (v1) output. Each line is
|
|
436
|
+
* `XY <path>`, or `XY <old> -> <new>` for a rename/copy (we keep the new path);
|
|
437
|
+
* git quotes paths with special characters, which we unquote. Blank lines are
|
|
438
|
+
* skipped. Pure so the no-op detection can be tested without spawning git.
|
|
439
|
+
*/
|
|
440
|
+
export function changedPathsFromPorcelain(status: string): string[] {
|
|
441
|
+
const paths: string[] = []
|
|
442
|
+
for (const raw of status.split('\n')) {
|
|
443
|
+
const line = raw.replace(/\r$/, '')
|
|
444
|
+
if (line.trim() === '') continue
|
|
445
|
+
let path = line.slice(3)
|
|
446
|
+
const arrow = path.indexOf(' -> ')
|
|
447
|
+
if (arrow !== -1) path = path.slice(arrow + 4)
|
|
448
|
+
path = path.trim().replace(/^"(.*)"$/, '$1')
|
|
449
|
+
if (path) paths.push(path)
|
|
450
|
+
}
|
|
451
|
+
return paths
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
/**
|
|
455
|
+
* Whether the agent changed anything in a cloned checkout. Stages the working
|
|
456
|
+
* tree and inspects the porcelain status: an empty result means the bootstrapper
|
|
457
|
+
* made no adaptation — a no-op we must not pass off as a successful push. (The
|
|
458
|
+
* harness writes its prompt context to Pi's global `~/.pi/agent/AGENTS.md`, never
|
|
459
|
+
* into the checkout, so every change reported here is a genuine agent edit.)
|
|
460
|
+
*/
|
|
461
|
+
export async function hasAgentChanges(dir: string, signal?: AbortSignal): Promise<boolean> {
|
|
462
|
+
await git(['add', '-A'], { cwd: dir, signal })
|
|
463
|
+
const status = await git(['status', '--porcelain'], { cwd: dir, signal })
|
|
464
|
+
return changedPathsFromPorcelain(status).length > 0
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
/** The commit SHA at `dir`'s HEAD — captured right after clone as the base tip. */
|
|
468
|
+
export async function headCommit(dir: string, signal?: AbortSignal): Promise<string> {
|
|
469
|
+
return (await git(['rev-parse', 'HEAD'], { cwd: dir, signal })).trim()
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
/** Stage everything and commit; returns false when there was nothing to commit. */
|
|
473
|
+
export async function commitAll(
|
|
474
|
+
dir: string,
|
|
475
|
+
message: string,
|
|
476
|
+
signal?: AbortSignal,
|
|
477
|
+
): Promise<boolean> {
|
|
478
|
+
await git(['add', '-A'], { cwd: dir, signal })
|
|
479
|
+
const status = await git(['status', '--porcelain'], { cwd: dir, signal })
|
|
480
|
+
if (status.trim() === '') return false
|
|
481
|
+
await git(['commit', '-m', message], { cwd: dir, signal })
|
|
482
|
+
return true
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
/** Paths git still reports as unmerged (conflict stage entries) in the working tree. */
|
|
486
|
+
export async function unmergedPaths(dir: string, signal?: AbortSignal): Promise<string[]> {
|
|
487
|
+
const out = await git(['diff', '--name-only', '--diff-filter=U'], { cwd: dir, signal })
|
|
488
|
+
return out
|
|
489
|
+
.split('\n')
|
|
490
|
+
.map((line) =>
|
|
491
|
+
line
|
|
492
|
+
.replace(/\r$/, '')
|
|
493
|
+
.trim()
|
|
494
|
+
.replace(/^"(.*)"$/, '$1'),
|
|
495
|
+
)
|
|
496
|
+
.filter((path) => path !== '')
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
/**
|
|
500
|
+
* The conflict hunks for the given unmerged `paths`: `git diff` over exactly those
|
|
501
|
+
* files, which for an unmerged entry renders the combined diff carrying the
|
|
502
|
+
* `<<<<<<<` / `=======` / `>>>>>>>` markers each side contributed. Handed to the
|
|
503
|
+
* conflict-resolver agent so it sees the actual conflicts instead of having to
|
|
504
|
+
* rediscover them. Capped to `maxChars` total (a note is appended on truncation) so a
|
|
505
|
+
* huge conflict can't blow up the prompt. Returns '' when there are no paths.
|
|
506
|
+
*/
|
|
507
|
+
export async function conflictDiff(
|
|
508
|
+
dir: string,
|
|
509
|
+
paths: string[],
|
|
510
|
+
signal?: AbortSignal,
|
|
511
|
+
maxChars = 24_000,
|
|
512
|
+
): Promise<string> {
|
|
513
|
+
if (paths.length === 0) return ''
|
|
514
|
+
const out = await git(['diff', '--', ...paths], { cwd: dir, signal })
|
|
515
|
+
if (out.length <= maxChars) return out
|
|
516
|
+
return `${out.slice(0, maxChars)}\n\n[diff truncated at ${maxChars} characters — open the files directly to see the remaining conflicts]`
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
/**
|
|
520
|
+
* Merge `origin/<baseBranch>` into the current branch (no fast-forward squash, no
|
|
521
|
+
* editor). Returns `true` for a clean merge (or an already-up-to-date no-op) and
|
|
522
|
+
* `false` when the merge left conflicts in the working tree — the expected case the
|
|
523
|
+
* conflict-resolver agent then fixes, NOT an error. Any other git failure (e.g. an
|
|
524
|
+
* unknown ref) is re-thrown. Requires a {@link cloneRepo} with `full: true` so the
|
|
525
|
+
* merge base and `origin/<baseBranch>` are present.
|
|
526
|
+
*/
|
|
527
|
+
export async function mergeBranch(
|
|
528
|
+
dir: string,
|
|
529
|
+
baseBranch: string,
|
|
530
|
+
signal?: AbortSignal,
|
|
531
|
+
): Promise<boolean> {
|
|
532
|
+
try {
|
|
533
|
+
await git(['merge', '--no-edit', `origin/${baseBranch}`], { cwd: dir, signal })
|
|
534
|
+
return true
|
|
535
|
+
} catch (err) {
|
|
536
|
+
// A merge conflict exits non-zero and leaves unmerged paths; distinguish it
|
|
537
|
+
// from a genuine failure (which leaves none) so only real errors propagate.
|
|
538
|
+
if ((await unmergedPaths(dir, signal)).length > 0) return false
|
|
539
|
+
throw err
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
/**
|
|
544
|
+
* Bring a RESUMED work branch up to the latest `baseBranch` when (and only when) the
|
|
545
|
+
* two merge cleanly. A resumed branch was cut from an older base, so without this the
|
|
546
|
+
* agent continues against a stale base and the eventual PR can carry avoidable
|
|
547
|
+
* conflicts. Fetches the base (the single-branch resume clone doesn't have it),
|
|
548
|
+
* attempts `git merge --no-edit`, and on a conflict ABORTS — leaving the branch
|
|
549
|
+
* exactly as it was so the run proceeds on the stale base (the CI/merge gate handles
|
|
550
|
+
* a genuinely conflicting PR downstream, as before). Returns whether base was merged
|
|
551
|
+
* in. Best-effort: callers treat a thrown/false result as "continue without refresh".
|
|
552
|
+
*/
|
|
553
|
+
export async function refreshFromBaseIfClean(
|
|
554
|
+
dir: string,
|
|
555
|
+
baseBranch: string,
|
|
556
|
+
ghToken: string,
|
|
557
|
+
signal?: AbortSignal,
|
|
558
|
+
): Promise<boolean> {
|
|
559
|
+
await git(['fetch', 'origin', baseBranch], { cwd: dir, signal, env: await authEnv(ghToken) })
|
|
560
|
+
try {
|
|
561
|
+
await git(['merge', '--no-edit', 'FETCH_HEAD'], { cwd: dir, signal })
|
|
562
|
+
return true
|
|
563
|
+
} catch (err) {
|
|
564
|
+
if ((await unmergedPaths(dir, signal)).length > 0) {
|
|
565
|
+
// Conflict — undo the half-done merge and keep the branch on its old base.
|
|
566
|
+
await git(['merge', '--abort'], { cwd: dir, signal }).catch(() => {})
|
|
567
|
+
return false
|
|
568
|
+
}
|
|
569
|
+
throw err
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
/**
|
|
574
|
+
* Push the work branch to origin. The remote URL carries only the username, so
|
|
575
|
+
* the token is supplied here via the askpass env (never in argv).
|
|
576
|
+
*/
|
|
577
|
+
export async function pushBranch(
|
|
578
|
+
dir: string,
|
|
579
|
+
branch: string,
|
|
580
|
+
ghToken: string,
|
|
581
|
+
signal?: AbortSignal,
|
|
582
|
+
): Promise<void> {
|
|
583
|
+
await git(['push', '-u', 'origin', branch], {
|
|
584
|
+
cwd: dir,
|
|
585
|
+
signal,
|
|
586
|
+
env: await authEnv(ghToken),
|
|
587
|
+
})
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
/**
|
|
591
|
+
* Reset the working tree's git history to a single bootstrap commit and push it
|
|
592
|
+
* to the target repository's default branch. Wiping `.git` before re-initialising
|
|
593
|
+
* means the new repo starts clean — it inherits the bootstrapped *contents* of the
|
|
594
|
+
* reference architecture, not its commit history.
|
|
595
|
+
*
|
|
596
|
+
* The push is forced: the fresh single-commit history shares no ancestor with
|
|
597
|
+
* whatever GitHub prepopulated when the user created the repo (a README,
|
|
598
|
+
* .gitignore and/or license picked on the new-repo page), so a fast-forward is
|
|
599
|
+
* impossible. The Worker pre-flights that the target is empty or holds only that
|
|
600
|
+
* boilerplate, so overwriting it is safe and intended.
|
|
601
|
+
*/
|
|
602
|
+
export async function reinitAndPush(opts: {
|
|
603
|
+
dir: string
|
|
604
|
+
target: BootstrapTargetSpec
|
|
605
|
+
ghToken: string
|
|
606
|
+
message: string
|
|
607
|
+
}): Promise<void> {
|
|
608
|
+
await rm(join(opts.dir, '.git'), { recursive: true, force: true })
|
|
609
|
+
await git(['init'], { cwd: opts.dir })
|
|
610
|
+
// Start the history on the target's default branch (init may default to master).
|
|
611
|
+
await git(['checkout', '-b', opts.target.defaultBranch], { cwd: opts.dir })
|
|
612
|
+
await git(['config', 'user.name', GIT_AUTHOR], { cwd: opts.dir })
|
|
613
|
+
await git(['config', 'user.email', GIT_EMAIL], { cwd: opts.dir })
|
|
614
|
+
await git(['add', '-A'], { cwd: opts.dir })
|
|
615
|
+
await git(['commit', '-m', opts.message], { cwd: opts.dir })
|
|
616
|
+
const url = authenticatedCloneUrl(opts.target.cloneUrl)
|
|
617
|
+
await git(['remote', 'add', 'origin', url], { cwd: opts.dir })
|
|
618
|
+
await git(['push', '--force', '-u', 'origin', opts.target.defaultBranch], {
|
|
619
|
+
cwd: opts.dir,
|
|
620
|
+
env: await authEnv(opts.ghToken),
|
|
621
|
+
})
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
export interface OpenPullRequestOptions {
|
|
625
|
+
owner: string
|
|
626
|
+
name: string
|
|
627
|
+
ghToken: string
|
|
628
|
+
head: string
|
|
629
|
+
base: string
|
|
630
|
+
pr: PrSpec
|
|
631
|
+
apiBase?: string
|
|
632
|
+
/**
|
|
633
|
+
* The repo's clone URL. Used (when {@link provider} is absent) to detect the provider and,
|
|
634
|
+
* for GitLab, to derive the REST base + project path from its host — so the harness opens a
|
|
635
|
+
* GitLab **merge request** rather than POSTing to GitHub's pulls API. Absent ⇒ GitHub.
|
|
636
|
+
*/
|
|
637
|
+
cloneUrl?: string
|
|
638
|
+
/**
|
|
639
|
+
* The VCS provider, when the dispatcher knows it (the server derives it from the configured
|
|
640
|
+
* source-control backend and sets `repo.provider`). AUTHORITATIVE — it overrides host
|
|
641
|
+
* inference — so a self-managed GitLab on an arbitrarily-named host (e.g. `git.acme.com`,
|
|
642
|
+
* which {@link inferVcsProvider} can't recognise) still opens a merge request instead of
|
|
643
|
+
* being misrouted to GitHub's API. Absent ⇒ inferred from {@link cloneUrl}'s host.
|
|
644
|
+
*/
|
|
645
|
+
provider?: 'github' | 'gitlab'
|
|
646
|
+
signal?: AbortSignal
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
/**
|
|
650
|
+
* The VCS host a clone URL points at. The harness is otherwise provider-agnostic (its git
|
|
651
|
+
* auth is a host-neutral GIT_ASKPASS credential), but the "open the PR/MR" REST call is not:
|
|
652
|
+
* GitHub and GitLab have different endpoints, so infer which to call from the host. GitHub is
|
|
653
|
+
* the default; a host of `gitlab.com` or one in the `gitlab.*` / `*.gitlab.*` family (covering
|
|
654
|
+
* self-managed instances named that way) is treated as GitLab.
|
|
655
|
+
*/
|
|
656
|
+
export function inferVcsProvider(cloneUrl: string): 'github' | 'gitlab' {
|
|
657
|
+
let host = ''
|
|
658
|
+
try {
|
|
659
|
+
host = new URL(cloneUrl).host.toLowerCase()
|
|
660
|
+
} catch {
|
|
661
|
+
return 'github'
|
|
662
|
+
}
|
|
663
|
+
if (host === 'gitlab.com' || host.startsWith('gitlab.') || host.includes('.gitlab.')) {
|
|
664
|
+
return 'gitlab'
|
|
665
|
+
}
|
|
666
|
+
return 'github'
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
/** The GitLab REST v4 base for a clone URL's host, e.g. `https://gitlab.com/api/v4`. */
|
|
670
|
+
export function gitlabApiBaseFromCloneUrl(cloneUrl: string): string {
|
|
671
|
+
const u = new URL(cloneUrl)
|
|
672
|
+
return `${u.protocol}//${u.host}/api/v4`
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
/**
|
|
676
|
+
* The URL-encoded GitLab project path from a clone URL — the full namespace path (so subgroups
|
|
677
|
+
* survive), with the trailing `.git` stripped, e.g.
|
|
678
|
+
* `https://gitlab.com/group/sub/proj.git` → `group%2Fsub%2Fproj`.
|
|
679
|
+
*/
|
|
680
|
+
export function gitlabProjectPath(cloneUrl: string): string {
|
|
681
|
+
const path = new URL(cloneUrl).pathname.replace(/^\/+/, '').replace(/\.git$/, '')
|
|
682
|
+
return encodeURIComponent(path)
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
/** The abort reason as an Error (the watchdog aborts with one), or a generic fallback. */
|
|
686
|
+
function abortError(signal: AbortSignal): Error {
|
|
687
|
+
return signal.reason instanceof Error ? signal.reason : new Error('aborted')
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
/** Whether a thrown fetch error is an AbortError (caller-initiated, never retried). */
|
|
691
|
+
function isAbortError(err: unknown): boolean {
|
|
692
|
+
return err instanceof Error && err.name === 'AbortError'
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
/**
|
|
696
|
+
* Parse a `Retry-After` header into ms, bounded so it can't stall the job. Accepts BOTH
|
|
697
|
+
* forms the spec allows: integer delay-seconds (`120`) and an HTTP-date (`Wed, 21 Oct 2026
|
|
698
|
+
* 07:28:00 GMT`); the latter is turned into a delay from now. A past/zero/unparseable value
|
|
699
|
+
* yields undefined so the caller falls back to exponential backoff.
|
|
700
|
+
*/
|
|
701
|
+
function retryAfterMs(res: Response): number | undefined {
|
|
702
|
+
const raw = res.headers.get('retry-after')
|
|
703
|
+
if (!raw) return undefined
|
|
704
|
+
const secs = Number(raw)
|
|
705
|
+
if (Number.isFinite(secs)) {
|
|
706
|
+
return secs > 0 ? Math.min(secs * 1000, MAX_RETRY_AFTER_MS) : undefined
|
|
707
|
+
}
|
|
708
|
+
const at = Date.parse(raw)
|
|
709
|
+
if (Number.isNaN(at)) return undefined
|
|
710
|
+
const ms = at - Date.now()
|
|
711
|
+
return ms > 0 ? Math.min(ms, MAX_RETRY_AFTER_MS) : undefined
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
/** Sleep `ms`, rejecting immediately (with the abort reason) if `signal` aborts meanwhile. */
|
|
715
|
+
function abortableDelay(ms: number, signal?: AbortSignal): Promise<void> {
|
|
716
|
+
return new Promise((resolve, reject) => {
|
|
717
|
+
if (signal?.aborted) return reject(abortError(signal))
|
|
718
|
+
const onAbort = (): void => {
|
|
719
|
+
clearTimeout(timer)
|
|
720
|
+
reject(abortError(signal as AbortSignal))
|
|
721
|
+
}
|
|
722
|
+
const timer = setTimeout(() => {
|
|
723
|
+
signal?.removeEventListener('abort', onAbort)
|
|
724
|
+
resolve()
|
|
725
|
+
}, ms)
|
|
726
|
+
signal?.addEventListener('abort', onAbort, { once: true })
|
|
727
|
+
})
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
const MAX_RETRY_AFTER_MS = 8_000
|
|
731
|
+
const RETRY_BASE_MS = 500
|
|
732
|
+
const RETRY_MAX_DELAY_MS = 4_000
|
|
733
|
+
|
|
734
|
+
/**
|
|
735
|
+
* Run a single HTTP request with bounded retry for TRANSIENT failures, so a momentary
|
|
736
|
+
* upstream blip (a 5xx, a 429 rate-limit, or a dropped connection) no longer fails an
|
|
737
|
+
* otherwise-complete run on its very last step (opening the PR/MR). Up to 3 attempts
|
|
738
|
+
* (2 retries) with exponential backoff + jitter (honoring a `Retry-After` on a 429),
|
|
739
|
+
* every wait abort-aware so the inactivity/max-duration watchdog still cancels promptly.
|
|
740
|
+
*
|
|
741
|
+
* ONLY transient failures retry: a `>=500`/`429` response, or a network-level fetch
|
|
742
|
+
* rejection. A 4xx (incl. the 422/409 "already exists" the callers treat as success) is
|
|
743
|
+
* returned to the caller unretried, and a caller abort is rethrown at once. The response
|
|
744
|
+
* body is never read here, so the caller's existing status handling is unchanged.
|
|
745
|
+
*/
|
|
746
|
+
async function withApiRetry(
|
|
747
|
+
fn: () => Promise<Response>,
|
|
748
|
+
opts: { signal?: AbortSignal; attempts?: number } = {},
|
|
749
|
+
): Promise<Response> {
|
|
750
|
+
const maxAttempts = opts.attempts ?? 3
|
|
751
|
+
let lastError: unknown
|
|
752
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
753
|
+
if (opts.signal?.aborted) throw abortError(opts.signal)
|
|
754
|
+
let res: Response | undefined
|
|
755
|
+
try {
|
|
756
|
+
res = await fn()
|
|
757
|
+
} catch (err) {
|
|
758
|
+
// A caller/watchdog abort is terminal; a network error is transient → retry.
|
|
759
|
+
if (isAbortError(err) || opts.signal?.aborted) throw err
|
|
760
|
+
lastError = err
|
|
761
|
+
}
|
|
762
|
+
if (res) {
|
|
763
|
+
const transient = res.status >= 500 || res.status === 429
|
|
764
|
+
if (!transient || attempt >= maxAttempts) return res
|
|
765
|
+
const after = retryAfterMs(res)
|
|
766
|
+
// Discard the unread body before retrying so the connection can be reused.
|
|
767
|
+
await res.body?.cancel().catch(() => {})
|
|
768
|
+
await abortableDelay(after ?? backoffMs(attempt), opts.signal)
|
|
769
|
+
continue
|
|
770
|
+
}
|
|
771
|
+
if (attempt >= maxAttempts) break
|
|
772
|
+
await abortableDelay(backoffMs(attempt), opts.signal)
|
|
773
|
+
}
|
|
774
|
+
// Exhausted on a network-level rejection (no HTTP response): an upstream API failure.
|
|
775
|
+
const message =
|
|
776
|
+
lastError instanceof Error ? lastError.message : 'API request failed after retries'
|
|
777
|
+
throw new HarnessFailure('api', redactSecrets(message))
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
/** Exponential backoff (base 500ms, capped 4s) with up to 25% positive jitter. */
|
|
781
|
+
function backoffMs(attempt: number): number {
|
|
782
|
+
const base = Math.min(RETRY_MAX_DELAY_MS, RETRY_BASE_MS * 2 ** (attempt - 1))
|
|
783
|
+
return base + Math.floor(base * 0.25 * Math.random())
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
/**
|
|
787
|
+
* Open a PR (GitHub) or merge request (GitLab) for the pushed branch; returns its web URL.
|
|
788
|
+
* The provider is chosen from the EXPLICIT `opts.provider` when the dispatcher set it,
|
|
789
|
+
* falling back to host inference from the clone URL only when it didn't — so a self-managed
|
|
790
|
+
* GitLab whose host isn't named `gitlab.*` still opens an MR instead of being misrouted to
|
|
791
|
+
* GitHub's API. The GitHub path is unchanged.
|
|
792
|
+
*/
|
|
793
|
+
export async function openPullRequest(opts: OpenPullRequestOptions): Promise<string> {
|
|
794
|
+
const provider = opts.provider ?? (opts.cloneUrl ? inferVcsProvider(opts.cloneUrl) : 'github')
|
|
795
|
+
if (provider === 'gitlab') {
|
|
796
|
+
if (!opts.cloneUrl) {
|
|
797
|
+
throw new Error('Cannot open a GitLab merge request without the repo clone URL')
|
|
798
|
+
}
|
|
799
|
+
return openGitLabMergeRequest({ ...opts, cloneUrl: opts.cloneUrl })
|
|
800
|
+
}
|
|
801
|
+
const apiBase = opts.apiBase ?? 'https://api.github.com'
|
|
802
|
+
const path = `${encodeURIComponent(opts.owner)}/${encodeURIComponent(opts.name)}`
|
|
803
|
+
const res = await withApiRetry(
|
|
804
|
+
() =>
|
|
805
|
+
fetch(`${apiBase}/repos/${path}/pulls`, {
|
|
806
|
+
method: 'POST',
|
|
807
|
+
headers: {
|
|
808
|
+
authorization: `Bearer ${opts.ghToken}`,
|
|
809
|
+
accept: 'application/vnd.github+json',
|
|
810
|
+
'user-agent': 'cat-factory-executor',
|
|
811
|
+
'x-github-api-version': '2022-11-28',
|
|
812
|
+
'content-type': 'application/json',
|
|
813
|
+
},
|
|
814
|
+
body: JSON.stringify({
|
|
815
|
+
title: opts.pr.title,
|
|
816
|
+
head: opts.head,
|
|
817
|
+
base: opts.base,
|
|
818
|
+
body: opts.pr.body,
|
|
819
|
+
}),
|
|
820
|
+
// Bound on the watchdog so a hung GitHub call can't stall the job.
|
|
821
|
+
...(opts.signal ? { signal: opts.signal } : {}),
|
|
822
|
+
}),
|
|
823
|
+
{ signal: opts.signal },
|
|
824
|
+
)
|
|
825
|
+
if (!res.ok) {
|
|
826
|
+
const detail = await res.text().catch(() => '')
|
|
827
|
+
// A resumed run pushes to a branch that already has an open PR; GitHub answers
|
|
828
|
+
// 422 "A pull request already exists". That's success for us — return the
|
|
829
|
+
// existing PR's url rather than failing the resumed run.
|
|
830
|
+
if (res.status === 422 && /pull request already exists/i.test(detail)) {
|
|
831
|
+
const existing = await findOpenPullRequestUrl(opts)
|
|
832
|
+
if (existing) return existing
|
|
833
|
+
}
|
|
834
|
+
throw new HarnessFailure(
|
|
835
|
+
'api',
|
|
836
|
+
redactSecrets(`Failed to open PR (HTTP ${res.status}): ${detail.slice(0, 300)}`),
|
|
837
|
+
)
|
|
838
|
+
}
|
|
839
|
+
const body = (await res.json()) as { html_url?: string }
|
|
840
|
+
if (!body.html_url) throw new HarnessFailure('api', 'GitHub did not return a PR url')
|
|
841
|
+
return body.html_url
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
/** GitLab API headers for the PAT (the `PRIVATE-TOKEN` auth GitLab uses). */
|
|
845
|
+
function gitlabHeaders(token: string): Record<string, string> {
|
|
846
|
+
return {
|
|
847
|
+
'private-token': token,
|
|
848
|
+
accept: 'application/json',
|
|
849
|
+
'user-agent': 'cat-factory-executor',
|
|
850
|
+
'content-type': 'application/json',
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
/**
|
|
855
|
+
* Open a GitLab merge request (the analogue of {@link openPullRequest} for GitLab). The REST
|
|
856
|
+
* base + project path are derived from the clone URL's host, so it works for gitlab.com and a
|
|
857
|
+
* self-managed instance alike. `head`→`source_branch`, `base`→`target_branch`. On a duplicate
|
|
858
|
+
* (a resumed run whose branch already has an open MR — GitLab answers 409) the existing MR's
|
|
859
|
+
* web URL is returned instead of failing the run, mirroring the GitHub 422 handling.
|
|
860
|
+
*/
|
|
861
|
+
async function openGitLabMergeRequest(
|
|
862
|
+
opts: OpenPullRequestOptions & { cloneUrl: string },
|
|
863
|
+
): Promise<string> {
|
|
864
|
+
const apiBase = gitlabApiBaseFromCloneUrl(opts.cloneUrl)
|
|
865
|
+
const project = gitlabProjectPath(opts.cloneUrl)
|
|
866
|
+
const res = await withApiRetry(
|
|
867
|
+
() =>
|
|
868
|
+
fetch(`${apiBase}/projects/${project}/merge_requests`, {
|
|
869
|
+
method: 'POST',
|
|
870
|
+
headers: gitlabHeaders(opts.ghToken),
|
|
871
|
+
body: JSON.stringify({
|
|
872
|
+
source_branch: opts.head,
|
|
873
|
+
target_branch: opts.base,
|
|
874
|
+
title: opts.pr.title,
|
|
875
|
+
description: opts.pr.body,
|
|
876
|
+
}),
|
|
877
|
+
...(opts.signal ? { signal: opts.signal } : {}),
|
|
878
|
+
}),
|
|
879
|
+
{ signal: opts.signal },
|
|
880
|
+
)
|
|
881
|
+
if (!res.ok) {
|
|
882
|
+
const detail = await res.text().catch(() => '')
|
|
883
|
+
// GitLab returns 409 (sometimes 400) when an open MR already exists for this source
|
|
884
|
+
// branch; that is success for a resumed run — return the existing MR's url.
|
|
885
|
+
if (
|
|
886
|
+
(res.status === 409 || res.status === 400) &&
|
|
887
|
+
/already exists|open merge request/i.test(detail)
|
|
888
|
+
) {
|
|
889
|
+
const existing = await findOpenMergeRequestUrl(apiBase, project, opts)
|
|
890
|
+
if (existing) return existing
|
|
891
|
+
}
|
|
892
|
+
throw new HarnessFailure(
|
|
893
|
+
'api',
|
|
894
|
+
redactSecrets(`Failed to open merge request (HTTP ${res.status}): ${detail.slice(0, 300)}`),
|
|
895
|
+
)
|
|
896
|
+
}
|
|
897
|
+
const body = (await res.json()) as { web_url?: string }
|
|
898
|
+
if (!body.web_url) throw new HarnessFailure('api', 'GitLab did not return a merge request url')
|
|
899
|
+
return body.web_url
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
/** Find the open GitLab MR for `opts.head`→`opts.base`, returning its web_url or undefined. */
|
|
903
|
+
async function findOpenMergeRequestUrl(
|
|
904
|
+
apiBase: string,
|
|
905
|
+
project: string,
|
|
906
|
+
opts: { head: string; base: string; ghToken: string; signal?: AbortSignal },
|
|
907
|
+
): Promise<string | undefined> {
|
|
908
|
+
// Filter by BOTH branches: a source branch can have open MRs to several targets, so the
|
|
909
|
+
// source alone could match an MR against a different base than the one we just tried to open.
|
|
910
|
+
const query = new URLSearchParams({
|
|
911
|
+
source_branch: opts.head,
|
|
912
|
+
target_branch: opts.base,
|
|
913
|
+
state: 'opened',
|
|
914
|
+
})
|
|
915
|
+
const res = await fetch(`${apiBase}/projects/${project}/merge_requests?${query}`, {
|
|
916
|
+
headers: gitlabHeaders(opts.ghToken),
|
|
917
|
+
...(opts.signal ? { signal: opts.signal } : {}),
|
|
918
|
+
})
|
|
919
|
+
if (!res.ok) return undefined
|
|
920
|
+
const list = (await res.json().catch(() => [])) as Array<{ web_url?: string }>
|
|
921
|
+
return Array.isArray(list) && list[0]?.web_url ? list[0].web_url : undefined
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
/** Find the open PR for `opts.head` on `opts.base`, returning its html_url or undefined. */
|
|
925
|
+
async function findOpenPullRequestUrl(opts: {
|
|
926
|
+
owner: string
|
|
927
|
+
name: string
|
|
928
|
+
ghToken: string
|
|
929
|
+
head: string
|
|
930
|
+
base: string
|
|
931
|
+
apiBase?: string
|
|
932
|
+
signal?: AbortSignal
|
|
933
|
+
}): Promise<string | undefined> {
|
|
934
|
+
const apiBase = opts.apiBase ?? 'https://api.github.com'
|
|
935
|
+
// Encode the ref-derived query params: a branch/owner containing `&` or `#` would
|
|
936
|
+
// otherwise split the query string or inject an unintended parameter.
|
|
937
|
+
const query = new URLSearchParams({
|
|
938
|
+
head: `${opts.owner}:${opts.head}`,
|
|
939
|
+
base: opts.base,
|
|
940
|
+
state: 'open',
|
|
941
|
+
})
|
|
942
|
+
const path = `${encodeURIComponent(opts.owner)}/${encodeURIComponent(opts.name)}`
|
|
943
|
+
const res = await fetch(`${apiBase}/repos/${path}/pulls?${query}`, {
|
|
944
|
+
headers: {
|
|
945
|
+
authorization: `Bearer ${opts.ghToken}`,
|
|
946
|
+
accept: 'application/vnd.github+json',
|
|
947
|
+
'user-agent': 'cat-factory-executor',
|
|
948
|
+
'x-github-api-version': '2022-11-28',
|
|
949
|
+
},
|
|
950
|
+
...(opts.signal ? { signal: opts.signal } : {}),
|
|
951
|
+
})
|
|
952
|
+
if (!res.ok) return undefined
|
|
953
|
+
const list = (await res.json().catch(() => [])) as Array<{ html_url?: string }>
|
|
954
|
+
return Array.isArray(list) && list[0]?.html_url ? list[0].html_url : undefined
|
|
955
|
+
}
|