@cat-factory/executor-harness 1.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +143 -0
- package/dist/agent-runner.js +389 -0
- package/dist/agent.js +810 -0
- package/dist/blueprint.js +367 -0
- package/dist/bootstrap.js +99 -0
- package/dist/ci-fixer.js +46 -0
- package/dist/coding-agent.js +285 -0
- package/dist/conflict-resolver.js +138 -0
- package/dist/embed.js +8 -0
- package/dist/explore.js +74 -0
- package/dist/failure.js +47 -0
- package/dist/fixer.js +44 -0
- package/dist/follow-ups.js +103 -0
- package/dist/frontend-infra.js +283 -0
- package/dist/fs-utils.js +11 -0
- package/dist/git.js +778 -0
- package/dist/job.js +409 -0
- package/dist/logger.js +27 -0
- package/dist/merger.js +135 -0
- package/dist/on-call.js +126 -0
- package/dist/pi-workspace.js +237 -0
- package/dist/pi.js +971 -0
- package/dist/process.js +25 -0
- package/dist/redact.js +109 -0
- package/dist/runner.js +228 -0
- package/dist/server.js +135 -0
- package/dist/spec.js +754 -0
- package/dist/structured-output.js +431 -0
- package/dist/tester.js +191 -0
- package/package.json +35 -0
- package/src/agent-runner.ts +484 -0
- package/src/agent.ts +948 -0
- package/src/coding-agent.ts +393 -0
- package/src/embed.ts +32 -0
- package/src/failure.ts +73 -0
- package/src/follow-ups.ts +106 -0
- package/src/frontend-infra.ts +340 -0
- package/src/fs-utils.ts +11 -0
- package/src/git.ts +955 -0
- package/src/job.ts +766 -0
- package/src/logger.ts +45 -0
- package/src/pi-workspace.ts +348 -0
- package/src/pi.ts +1236 -0
- package/src/process.ts +33 -0
- package/src/redact.ts +109 -0
- package/src/runner.ts +384 -0
- package/src/server.ts +153 -0
- package/src/structured-output.ts +524 -0
package/dist/job.js
ADDED
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
function str(value, path) {
|
|
2
|
+
if (typeof value !== 'string' || value.length === 0) {
|
|
3
|
+
throw new Error(`Invalid job: '${path}' must be a non-empty string`);
|
|
4
|
+
}
|
|
5
|
+
return value;
|
|
6
|
+
}
|
|
7
|
+
/** A positive finite integer, or undefined for any other input (silently ignored). */
|
|
8
|
+
function posInt(value) {
|
|
9
|
+
return typeof value === 'number' && Number.isFinite(value) && value > 0
|
|
10
|
+
? Math.floor(value)
|
|
11
|
+
: undefined;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* A valid TCP port (1..65535), or undefined for anything else. The backend already validates
|
|
15
|
+
* frontend ports against this range, but the harness re-checks at its untrusted-body boundary:
|
|
16
|
+
* an out-of-range value can never bind, so dropping it falls back to the harness default rather
|
|
17
|
+
* than spawning a server that fails to listen.
|
|
18
|
+
*/
|
|
19
|
+
function port(value) {
|
|
20
|
+
const n = posInt(value);
|
|
21
|
+
return n !== undefined && n <= 65535 ? n : undefined;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Parse the optional per-job progress-guard overrides. Each knob must be a positive
|
|
25
|
+
* int; a malformed value is dropped (the run keeps the env / default for that knob).
|
|
26
|
+
* This only validates the SHAPE — it does NOT enforce loosen-only. The loosen-only
|
|
27
|
+
* guarantee (an override can only raise a knob, never tighten it below the base) is
|
|
28
|
+
* applied later, where the override meets the base, by {@link mergeGuardLimits}. So a
|
|
29
|
+
* tighter-than-default value parses fine here and is clamped back up to the base there.
|
|
30
|
+
* Returns undefined when nothing usable was supplied so the job body stays sparse.
|
|
31
|
+
*/
|
|
32
|
+
function parseGuardLimits(value) {
|
|
33
|
+
if (typeof value !== 'object' || value === null)
|
|
34
|
+
return undefined;
|
|
35
|
+
const o = value;
|
|
36
|
+
const spec = {};
|
|
37
|
+
const noEdit = posInt(o.maxToolCallsWithoutEdit);
|
|
38
|
+
const errors = posInt(o.maxConsecutiveErrors);
|
|
39
|
+
const web = posInt(o.maxConsecutiveWebCalls);
|
|
40
|
+
if (noEdit !== undefined)
|
|
41
|
+
spec.maxToolCallsWithoutEdit = noEdit;
|
|
42
|
+
if (errors !== undefined)
|
|
43
|
+
spec.maxConsecutiveErrors = errors;
|
|
44
|
+
if (web !== undefined)
|
|
45
|
+
spec.maxConsecutiveWebCalls = web;
|
|
46
|
+
return Object.keys(spec).length > 0 ? spec : undefined;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Parse the shared per-job auth fields, validating per harness: a subscription
|
|
50
|
+
* harness (`claude-code` / `codex`) requires `subscriptionToken`; the default Pi
|
|
51
|
+
* harness requires `proxyBaseUrl` + `sessionToken`.
|
|
52
|
+
*/
|
|
53
|
+
function parseHarnessAuth(o) {
|
|
54
|
+
const harness = o.harness === 'claude-code' || o.harness === 'codex' || o.harness === 'pi'
|
|
55
|
+
? o.harness
|
|
56
|
+
: undefined;
|
|
57
|
+
if (harness === 'claude-code' || harness === 'codex') {
|
|
58
|
+
// Native ambient auth uses the developer's own CLI login, so no leased token is
|
|
59
|
+
// required (and none should be sent); otherwise the subscription token is mandatory.
|
|
60
|
+
const ambientAuth = o.ambientAuth === true;
|
|
61
|
+
return {
|
|
62
|
+
harness,
|
|
63
|
+
...(ambientAuth
|
|
64
|
+
? { ambientAuth: true }
|
|
65
|
+
: { subscriptionToken: str(o.subscriptionToken, 'subscriptionToken') }),
|
|
66
|
+
...(typeof o.subscriptionBaseUrl === 'string' && o.subscriptionBaseUrl
|
|
67
|
+
? { subscriptionBaseUrl: o.subscriptionBaseUrl }
|
|
68
|
+
: {}),
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
return {
|
|
72
|
+
harness,
|
|
73
|
+
proxyBaseUrl: str(o.proxyBaseUrl, 'proxyBaseUrl'),
|
|
74
|
+
sessionToken: str(o.sessionToken, 'sessionToken'),
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Coerce a body-supplied monorepo service directory into a SAFE relative path, or
|
|
79
|
+
* undefined when absent/empty. Normalises separators, strips leading/trailing
|
|
80
|
+
* slashes, and rejects anything that could escape the checkout (absolute paths or a
|
|
81
|
+
* `..` segment) — the agent's cwd is built from this, so a hostile value must never
|
|
82
|
+
* point outside the cloned repo.
|
|
83
|
+
*/
|
|
84
|
+
function sanitizeServiceDirectory(value) {
|
|
85
|
+
if (typeof value !== 'string')
|
|
86
|
+
return undefined;
|
|
87
|
+
const normalized = value
|
|
88
|
+
.trim()
|
|
89
|
+
.replace(/\\/g, '/')
|
|
90
|
+
.replace(/^\/+|\/+$/g, '');
|
|
91
|
+
if (!normalized)
|
|
92
|
+
return undefined;
|
|
93
|
+
const segments = normalized.split('/').filter((s) => s !== '' && s !== '.');
|
|
94
|
+
if (segments.length === 0)
|
|
95
|
+
return undefined;
|
|
96
|
+
if (segments.some((s) => s === '..')) {
|
|
97
|
+
throw new Error("Invalid job: 'repo.serviceDirectory' must be a path inside the repo");
|
|
98
|
+
}
|
|
99
|
+
return segments.join('/');
|
|
100
|
+
}
|
|
101
|
+
/** Parse the shared repo spec, including the optional monorepo service subdirectory. */
|
|
102
|
+
function parseRepoSpec(repo) {
|
|
103
|
+
const spec = {
|
|
104
|
+
owner: str(repo.owner, 'repo.owner'),
|
|
105
|
+
name: str(repo.name, 'repo.name'),
|
|
106
|
+
baseBranch: str(repo.baseBranch, 'repo.baseBranch'),
|
|
107
|
+
cloneUrl: str(repo.cloneUrl, 'repo.cloneUrl'),
|
|
108
|
+
};
|
|
109
|
+
const provider = parseVcsProvider(repo.provider);
|
|
110
|
+
if (provider)
|
|
111
|
+
spec.provider = provider;
|
|
112
|
+
const dir = sanitizeServiceDirectory(repo.serviceDirectory);
|
|
113
|
+
if (dir)
|
|
114
|
+
spec.serviceDirectory = dir;
|
|
115
|
+
return spec;
|
|
116
|
+
}
|
|
117
|
+
/** Parse the optional `repo.provider` discriminator (defaults to undefined ⇒ host inference). */
|
|
118
|
+
function parseVcsProvider(value) {
|
|
119
|
+
if (value === undefined || value === null)
|
|
120
|
+
return undefined;
|
|
121
|
+
if (value === 'github' || value === 'gitlab')
|
|
122
|
+
return value;
|
|
123
|
+
throw new Error("Invalid job: 'repo.provider' must be 'github' or 'gitlab'");
|
|
124
|
+
}
|
|
125
|
+
// ---- Host allowlist -------------------------------------------------------
|
|
126
|
+
// The short-lived GitHub installation token is sent (a) to the clone/push remote
|
|
127
|
+
// over HTTPS and (b) to the REST API base. A body-supplied URL pointing at an
|
|
128
|
+
// attacker-named host would exfiltrate that token, so every such URL's host is
|
|
129
|
+
// checked against an allowlist before use. Defaults to github.com /
|
|
130
|
+
// api.github.com; a GitHub Enterprise deployment can add its host via env.
|
|
131
|
+
/** Hosts the harness is willing to send the installation token to. */
|
|
132
|
+
export function allowedGithubHosts(env = process.env) {
|
|
133
|
+
const hosts = new Set(['github.com', 'api.github.com']);
|
|
134
|
+
const enterprise = env.GITHUB_ENTERPRISE_HOST?.trim().toLowerCase();
|
|
135
|
+
if (enterprise)
|
|
136
|
+
hosts.add(enterprise);
|
|
137
|
+
// Optional extra allowlist (comma-separated) for tests / bespoke deployments.
|
|
138
|
+
for (const h of (env.GITHUB_ALLOWED_HOSTS ?? '').split(',')) {
|
|
139
|
+
const t = h.trim().toLowerCase();
|
|
140
|
+
if (t)
|
|
141
|
+
hosts.add(t);
|
|
142
|
+
}
|
|
143
|
+
return hosts;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Reject a URL whose host isn't an allowed GitHub host. `file://` clone sources
|
|
147
|
+
* are local (no token leaves the box) and so are always permitted; anything else
|
|
148
|
+
* must be http(s) to an allowlisted host.
|
|
149
|
+
*/
|
|
150
|
+
function assertAllowedHost(rawUrl, path, env = process.env) {
|
|
151
|
+
let url;
|
|
152
|
+
try {
|
|
153
|
+
url = new URL(rawUrl);
|
|
154
|
+
}
|
|
155
|
+
catch {
|
|
156
|
+
throw new Error(`Invalid job: '${path}' must be a valid URL`);
|
|
157
|
+
}
|
|
158
|
+
if (url.protocol === 'file:')
|
|
159
|
+
return;
|
|
160
|
+
if (url.protocol !== 'https:' && url.protocol !== 'http:') {
|
|
161
|
+
throw new Error(`Invalid job: '${path}' must be an https or file URL`);
|
|
162
|
+
}
|
|
163
|
+
const host = url.hostname.toLowerCase();
|
|
164
|
+
if (!allowedGithubHosts(env).has(host)) {
|
|
165
|
+
throw new Error(`Invalid job: '${path}' host '${host}' is not an allowed GitHub host`);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
/** Parse the coding-mode bootstrap spec, or undefined when absent. Validates the target. */
|
|
169
|
+
function parseAgentBootstrapSpec(value) {
|
|
170
|
+
if (typeof value !== 'object' || value === null)
|
|
171
|
+
return undefined;
|
|
172
|
+
const o = value;
|
|
173
|
+
const t = (typeof o.target === 'object' && o.target !== null ? o.target : {});
|
|
174
|
+
const target = {
|
|
175
|
+
owner: str(t.owner, 'bootstrap.target.owner'),
|
|
176
|
+
name: str(t.name, 'bootstrap.target.name'),
|
|
177
|
+
cloneUrl: str(t.cloneUrl, 'bootstrap.target.cloneUrl'),
|
|
178
|
+
defaultBranch: str(t.defaultBranch, 'bootstrap.target.defaultBranch'),
|
|
179
|
+
};
|
|
180
|
+
return {
|
|
181
|
+
target,
|
|
182
|
+
...(o.fromScratch === true ? { fromScratch: true } : {}),
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Sanitise a body-supplied context filename to a safe basename within CONTEXT_DIR:
|
|
187
|
+
* strip any directory part, allow only `[A-Za-z0-9._-]`, and reject empties / dotfiles
|
|
188
|
+
* / `..` so a hostile value can't escape the directory or clobber repo files.
|
|
189
|
+
*/
|
|
190
|
+
function sanitizeContextFileName(value) {
|
|
191
|
+
if (typeof value !== 'string')
|
|
192
|
+
return undefined;
|
|
193
|
+
const base = value.replace(/\\/g, '/').split('/').pop() ?? '';
|
|
194
|
+
const cleaned = base.replace(/[^A-Za-z0-9._-]/g, '');
|
|
195
|
+
if (!cleaned || cleaned === '.' || cleaned === '..' || cleaned.startsWith('.'))
|
|
196
|
+
return undefined;
|
|
197
|
+
return cleaned;
|
|
198
|
+
}
|
|
199
|
+
/** Parse the linked-context files, dropping any malformed/unsafe entry. */
|
|
200
|
+
function parseContextFiles(value) {
|
|
201
|
+
if (!Array.isArray(value))
|
|
202
|
+
return [];
|
|
203
|
+
const files = [];
|
|
204
|
+
const used = new Set();
|
|
205
|
+
for (const entry of value) {
|
|
206
|
+
if (typeof entry !== 'object' || entry === null)
|
|
207
|
+
continue;
|
|
208
|
+
const e = entry;
|
|
209
|
+
const path = sanitizeContextFileName(e.path);
|
|
210
|
+
if (!path || used.has(path))
|
|
211
|
+
continue;
|
|
212
|
+
if (typeof e.content !== 'string')
|
|
213
|
+
continue;
|
|
214
|
+
used.add(path);
|
|
215
|
+
files.push({
|
|
216
|
+
path,
|
|
217
|
+
title: typeof e.title === 'string' ? e.title : path,
|
|
218
|
+
url: typeof e.url === 'string' ? e.url : '',
|
|
219
|
+
content: e.content,
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
return files;
|
|
223
|
+
}
|
|
224
|
+
/** Parse the explore-mode infra stand-up spec, or undefined when absent/unrecognised. */
|
|
225
|
+
function parseAgentInfraSpec(value) {
|
|
226
|
+
if (typeof value !== 'object' || value === null)
|
|
227
|
+
return undefined;
|
|
228
|
+
const o = value;
|
|
229
|
+
if (o.kind === 'frontend')
|
|
230
|
+
return parseFrontendInfraSpec(o);
|
|
231
|
+
const environment = o.environment === 'local' ? 'local' : o.environment === 'ephemeral' ? 'ephemeral' : undefined;
|
|
232
|
+
if (!environment)
|
|
233
|
+
return undefined;
|
|
234
|
+
return {
|
|
235
|
+
environment,
|
|
236
|
+
...(o.noInfraDependencies === true ? { noInfraDependencies: true } : {}),
|
|
237
|
+
...(typeof o.composePath === 'string' && o.composePath ? { composePath: o.composePath } : {}),
|
|
238
|
+
...(typeof o.environmentUrl === 'string' && o.environmentUrl
|
|
239
|
+
? { environmentUrl: o.environmentUrl }
|
|
240
|
+
: {}),
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Env-var names never injected from a frontend binding: spread over `process.env` at build
|
|
245
|
+
* time, so any of these would break the toolchain (or enable code execution / cert overrides)
|
|
246
|
+
* rather than name an upstream URL. Matched exactly (Linux env is case-sensitive); the
|
|
247
|
+
* {@link RESERVED_ENV_PREFIXES} below cover whole families (`npm_config_*`, `GIT_*`, …).
|
|
248
|
+
*/
|
|
249
|
+
const RESERVED_ENV_NAMES = new Set([
|
|
250
|
+
'PATH',
|
|
251
|
+
'HOME',
|
|
252
|
+
'NODE_OPTIONS',
|
|
253
|
+
'NODE_PATH',
|
|
254
|
+
'NODE_EXTRA_CA_CERTS',
|
|
255
|
+
'LD_PRELOAD',
|
|
256
|
+
'LD_LIBRARY_PATH',
|
|
257
|
+
'BASH_ENV',
|
|
258
|
+
'ENV',
|
|
259
|
+
'SHELL',
|
|
260
|
+
'IFS',
|
|
261
|
+
]);
|
|
262
|
+
/**
|
|
263
|
+
* Env-var name PREFIXES never injected from a frontend binding. `npm_config_*` reconfigures the
|
|
264
|
+
* package manager (registry, scripts, prefix), and `GIT_*` reconfigures git — both run during a
|
|
265
|
+
* frontend install/build, so a binding in either family is toolchain control, not an upstream URL.
|
|
266
|
+
* Compared case-INSENSITIVELY (lower-cased here, matched lower-cased below): npm reads its config
|
|
267
|
+
* env with a case-insensitive `/^npm_config_/i`, so `NPM_CONFIG_REGISTRY` is honoured just like
|
|
268
|
+
* `npm_config_registry` — a case-sensitive prefix match would let the upper-cased form slip through.
|
|
269
|
+
*/
|
|
270
|
+
const RESERVED_ENV_PREFIXES = ['npm_config_', 'git_'];
|
|
271
|
+
/**
|
|
272
|
+
* Whether an env-var name is reserved (an exact name, or a reserved family prefix). The exact
|
|
273
|
+
* names are canonical upper-case env vars matched verbatim (Linux env is case-sensitive, so a
|
|
274
|
+
* distinct lower-cased `home` is a different, harmless var); the family PREFIXES are matched
|
|
275
|
+
* case-insensitively because npm interprets `npm_config_*` regardless of case (see above).
|
|
276
|
+
*/
|
|
277
|
+
function isReservedEnvName(key) {
|
|
278
|
+
if (RESERVED_ENV_NAMES.has(key))
|
|
279
|
+
return true;
|
|
280
|
+
const lower = key.toLowerCase();
|
|
281
|
+
return RESERVED_ENV_PREFIXES.some((p) => lower.startsWith(p));
|
|
282
|
+
}
|
|
283
|
+
/** Parse the frontend UI-test infra spec (`kind: 'frontend'`), tolerating missing knobs. */
|
|
284
|
+
function parseFrontendInfraSpec(o) {
|
|
285
|
+
const packageManager = o.packageManager === 'pnpm' || o.packageManager === 'npm' || o.packageManager === 'yarn'
|
|
286
|
+
? o.packageManager
|
|
287
|
+
: undefined;
|
|
288
|
+
const serveMode = o.serveMode === 'static' || o.serveMode === 'command' ? o.serveMode : undefined;
|
|
289
|
+
const envInjection = o.envInjection === 'build' || o.envInjection === 'runtime' ? o.envInjection : undefined;
|
|
290
|
+
// Only string→string entries survive; a non-string value is dropped so a malformed
|
|
291
|
+
// binding can't inject `[object Object]` (or undefined) as an upstream URL. Reserved names
|
|
292
|
+
// that would break the toolchain or enable injection (PATH, NODE_OPTIONS, LD_PRELOAD, …) are
|
|
293
|
+
// dropped too: they are spread over `process.env` at build time, so a binding named `PATH`
|
|
294
|
+
// would replace it with a URL and the build would no longer find its tools.
|
|
295
|
+
const env = {};
|
|
296
|
+
if (typeof o.env === 'object' && o.env !== null) {
|
|
297
|
+
for (const [key, val] of Object.entries(o.env)) {
|
|
298
|
+
if (key && !isReservedEnvName(key) && typeof val === 'string')
|
|
299
|
+
env[key] = val;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
const servePort = port(o.servePort);
|
|
303
|
+
const wiremockPort = port(o.wiremockPort);
|
|
304
|
+
return {
|
|
305
|
+
kind: 'frontend',
|
|
306
|
+
...(packageManager ? { packageManager } : {}),
|
|
307
|
+
...(typeof o.install === 'string' && o.install ? { install: o.install } : {}),
|
|
308
|
+
...(typeof o.buildScript === 'string' && o.buildScript ? { buildScript: o.buildScript } : {}),
|
|
309
|
+
...(typeof o.outputDir === 'string' && o.outputDir ? { outputDir: o.outputDir } : {}),
|
|
310
|
+
...(serveMode ? { serveMode } : {}),
|
|
311
|
+
...(typeof o.serveScript === 'string' && o.serveScript ? { serveScript: o.serveScript } : {}),
|
|
312
|
+
...(servePort !== undefined ? { servePort } : {}),
|
|
313
|
+
...(envInjection ? { envInjection } : {}),
|
|
314
|
+
...(Object.keys(env).length ? { env } : {}),
|
|
315
|
+
...(typeof o.wiremockMappingsPath === 'string' && o.wiremockMappingsPath
|
|
316
|
+
? { wiremockMappingsPath: o.wiremockMappingsPath }
|
|
317
|
+
: {}),
|
|
318
|
+
...(wiremockPort !== undefined ? { wiremockPort } : {}),
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
/** Validate + narrow an untrusted body into an {@link AgentJob}, throwing on bad input. */
|
|
322
|
+
export function parseAgentJob(input) {
|
|
323
|
+
if (typeof input !== 'object' || input === null) {
|
|
324
|
+
throw new Error('Invalid job: body must be an object');
|
|
325
|
+
}
|
|
326
|
+
const o = input;
|
|
327
|
+
const mode = o.mode === 'coding'
|
|
328
|
+
? 'coding'
|
|
329
|
+
: o.mode === 'explore'
|
|
330
|
+
? 'explore'
|
|
331
|
+
: o.mode === 'preview'
|
|
332
|
+
? 'preview'
|
|
333
|
+
: undefined;
|
|
334
|
+
if (!mode)
|
|
335
|
+
throw new Error("Invalid job: 'mode' must be 'explore', 'coding' or 'preview'");
|
|
336
|
+
// Preview runs NO agent (it only builds + serves the frontend), so the agent-only fields
|
|
337
|
+
// (system/user prompt, model) are unused there — accept them absent rather than forcing the
|
|
338
|
+
// preview dispatch to send dummy values it has no reason to supply. Every other mode still
|
|
339
|
+
// requires them (throws when missing/empty), exactly as before.
|
|
340
|
+
const agentField = (value, path) => mode === 'preview' ? (typeof value === 'string' ? value : '') : str(value, path);
|
|
341
|
+
const repo = (o.repo ?? {});
|
|
342
|
+
const output = typeof o.output === 'object' && o.output !== null
|
|
343
|
+
? (() => {
|
|
344
|
+
const so = o.output;
|
|
345
|
+
const kind = so.kind === 'structured' ? 'structured' : 'prose';
|
|
346
|
+
const spec = { kind };
|
|
347
|
+
if (typeof so.shapeHint === 'string')
|
|
348
|
+
spec.shapeHint = so.shapeHint;
|
|
349
|
+
// Carry an explicit `repair: false` through — the handler defaults to repair-on
|
|
350
|
+
// when absent, so dropping `false` would silently re-enable the repair call for a
|
|
351
|
+
// kind that opted out (it keys off `output.repair === false`).
|
|
352
|
+
if (typeof so.repair === 'boolean')
|
|
353
|
+
spec.repair = so.repair;
|
|
354
|
+
// Carry the opt-in truncation gate through (document producers set it); dropping
|
|
355
|
+
// it would silently re-enable laundering a cut-off reply into a half-baked doc.
|
|
356
|
+
if (so.failOnUnusableFinal === true)
|
|
357
|
+
spec.failOnUnusableFinal = true;
|
|
358
|
+
return spec;
|
|
359
|
+
})()
|
|
360
|
+
: undefined;
|
|
361
|
+
const pr = typeof o.pr === 'object' && o.pr !== null
|
|
362
|
+
? (() => {
|
|
363
|
+
const p = o.pr;
|
|
364
|
+
return { title: str(p.title, 'pr.title'), body: typeof p.body === 'string' ? p.body : '' };
|
|
365
|
+
})()
|
|
366
|
+
: undefined;
|
|
367
|
+
const infra = parseAgentInfraSpec(o.infra);
|
|
368
|
+
const bootstrap = parseAgentBootstrapSpec(o.bootstrap);
|
|
369
|
+
const contextFiles = parseContextFiles(o.contextFiles);
|
|
370
|
+
const guardLimits = parseGuardLimits(o.guardLimits);
|
|
371
|
+
const job = {
|
|
372
|
+
jobId: str(o.jobId, 'jobId'),
|
|
373
|
+
mode,
|
|
374
|
+
systemPrompt: agentField(o.systemPrompt, 'systemPrompt'),
|
|
375
|
+
userPrompt: agentField(o.userPrompt, 'userPrompt'),
|
|
376
|
+
model: agentField(o.model, 'model'),
|
|
377
|
+
...parseHarnessAuth(o),
|
|
378
|
+
ghToken: str(o.ghToken, 'ghToken'),
|
|
379
|
+
repo: parseRepoSpec(repo),
|
|
380
|
+
branch: str(o.branch, 'branch'),
|
|
381
|
+
...(typeof o.githubApiBase === 'string' ? { githubApiBase: o.githubApiBase } : {}),
|
|
382
|
+
...(typeof o.webToolsGuidance === 'string' ? { webToolsGuidance: o.webToolsGuidance } : {}),
|
|
383
|
+
...(o.webSearch === true ? { webSearch: true } : {}),
|
|
384
|
+
...(o.full === true ? { full: true } : {}),
|
|
385
|
+
...(typeof o.mergeBase === 'string' && o.mergeBase ? { mergeBase: o.mergeBase } : {}),
|
|
386
|
+
...(bootstrap ? { bootstrap } : {}),
|
|
387
|
+
...(output ? { output } : {}),
|
|
388
|
+
...(contextFiles.length ? { contextFiles } : {}),
|
|
389
|
+
...(infra ? { infra } : {}),
|
|
390
|
+
...(typeof o.newBranch === 'string' && o.newBranch ? { newBranch: o.newBranch } : {}),
|
|
391
|
+
...(typeof o.pushBranch === 'string' && o.pushBranch ? { pushBranch: o.pushBranch } : {}),
|
|
392
|
+
...(typeof o.commitMessage === 'string' && o.commitMessage
|
|
393
|
+
? { commitMessage: o.commitMessage }
|
|
394
|
+
: {}),
|
|
395
|
+
...(pr ? { pr } : {}),
|
|
396
|
+
...(o.noChangesIsError === false ? { noChangesIsError: false } : {}),
|
|
397
|
+
...(o.persistentCheckout === true ? { persistentCheckout: true } : {}),
|
|
398
|
+
...(o.streamFollowUps === true ? { streamFollowUps: true } : {}),
|
|
399
|
+
...(guardLimits ? { guardLimits } : {}),
|
|
400
|
+
};
|
|
401
|
+
assertAllowedHost(job.repo.cloneUrl, 'repo.cloneUrl');
|
|
402
|
+
if (job.githubApiBase)
|
|
403
|
+
assertAllowedHost(job.githubApiBase, 'githubApiBase');
|
|
404
|
+
// Bootstrap pushes the result to a SEPARATE target repo, so its clone URL must be an
|
|
405
|
+
// allowed GitHub host too (the installation token is sent to it on the force-push).
|
|
406
|
+
if (job.bootstrap)
|
|
407
|
+
assertAllowedHost(job.bootstrap.target.cloneUrl, 'bootstrap.target.cloneUrl');
|
|
408
|
+
return job;
|
|
409
|
+
}
|
package/dist/logger.js
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
// Minimal zero-dependency structured logger. The container image installs no npm
|
|
2
|
+
// packages at runtime (see Dockerfile — it compiles the TS against standalone
|
|
3
|
+
// typescript/@types/node and ships only Node built-ins + the global Pi CLI), so
|
|
4
|
+
// pino can't live here. This emits pino-shaped JSON lines (level/time/msg +
|
|
5
|
+
// fields) which the platform captures from stdout/stderr. The Worker uses pino.
|
|
6
|
+
function emit(level, msg, bound, fields) {
|
|
7
|
+
// Bound (per-job context) fields first so a call-site field can override a bound one; the
|
|
8
|
+
// envelope keys (level/time/msg) go LAST so neither bound nor call-site fields can corrupt
|
|
9
|
+
// them — a stray field named `level` must never disagree with the stream the line routes to.
|
|
10
|
+
const line = JSON.stringify({ ...bound, ...fields, level, time: new Date().toISOString(), msg });
|
|
11
|
+
// Errors/warnings to stderr, everything else to stdout — mirrors pino routing.
|
|
12
|
+
if (level === 'error' || level === 'warn')
|
|
13
|
+
process.stderr.write(`${line}\n`);
|
|
14
|
+
else
|
|
15
|
+
process.stdout.write(`${line}\n`);
|
|
16
|
+
}
|
|
17
|
+
/** Build a logger whose every emit folds in `bound`. The root logger binds nothing. */
|
|
18
|
+
function makeLogger(bound) {
|
|
19
|
+
return {
|
|
20
|
+
debug: (msg, fields) => emit('debug', msg, bound, fields),
|
|
21
|
+
info: (msg, fields) => emit('info', msg, bound, fields),
|
|
22
|
+
warn: (msg, fields) => emit('warn', msg, bound, fields),
|
|
23
|
+
error: (msg, fields) => emit('error', msg, bound, fields),
|
|
24
|
+
child: (extra) => makeLogger({ ...bound, ...extra }),
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
export const log = makeLogger({});
|
package/dist/merger.js
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import { cloneRepo, hasDiffAgainstBase } from './git.js';
|
|
2
|
+
import { extractJsonObject } from './blueprint.js';
|
|
3
|
+
import { agentNeverActed, agentOutputTail, NEVER_ACTED_CAUSE, runAgentInWorkspace, withWorkspace, } from './pi-workspace.js';
|
|
4
|
+
import { diagnosticsSuffix, resolveStructuredOutput, } from './structured-output.js';
|
|
5
|
+
import { log } from './logger.js';
|
|
6
|
+
/** Compact description of the merge-assessment shape, fed to the JSON repair call. */
|
|
7
|
+
const ASSESSMENT_SHAPE_HINT = 'Expected a merge assessment: {"complexity": number 0..1, "risk": number 0..1, ' +
|
|
8
|
+
'"impact": number 0..1, "rationale": string}.';
|
|
9
|
+
/** Clamp a value to a 0..1 number, defaulting to `fallback` when not finite. */
|
|
10
|
+
function clamp01(value, fallback) {
|
|
11
|
+
const n = typeof value === 'number' ? value : Number(value);
|
|
12
|
+
if (!Number.isFinite(n))
|
|
13
|
+
return fallback;
|
|
14
|
+
return Math.min(1, Math.max(0, n));
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Coerce the agent's JSON into a well-formed assessment. Missing/garbage scores
|
|
18
|
+
* default to a CONSERVATIVE 1 (treat as severe → routes to human review rather
|
|
19
|
+
* than a silent auto-merge); the rationale falls back to the raw summary.
|
|
20
|
+
*/
|
|
21
|
+
function coerceAssessment(raw, summary) {
|
|
22
|
+
const o = (typeof raw === 'object' && raw !== null ? raw : {});
|
|
23
|
+
return {
|
|
24
|
+
complexity: clamp01(o.complexity, 1),
|
|
25
|
+
risk: clamp01(o.risk, 1),
|
|
26
|
+
impact: clamp01(o.impact, 1),
|
|
27
|
+
rationale: typeof o.rationale === 'string' && o.rationale ? o.rationale : summary.slice(0, 2000),
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
/** Build the merger task prompt: assess the PR branch against the base. */
|
|
31
|
+
function buildUserPrompt(job) {
|
|
32
|
+
const pr = job.prNumber !== undefined ? ` (PR #${job.prNumber})` : '';
|
|
33
|
+
return [
|
|
34
|
+
job.instructions,
|
|
35
|
+
'',
|
|
36
|
+
`The pull request${pr} is on branch \`${job.branch}\`; the base branch is ` +
|
|
37
|
+
`\`${job.repo.baseBranch}\`. Inspect the change (e.g. \`git fetch origin ${job.repo.baseBranch}\` ` +
|
|
38
|
+
`then \`git diff origin/${job.repo.baseBranch}...HEAD\`) and score complexity, risk and impact.`,
|
|
39
|
+
'',
|
|
40
|
+
'Respond with ONLY a JSON object {"complexity":0.0,"risk":0.0,"impact":0.0,"rationale":"…"}.',
|
|
41
|
+
].join('\n');
|
|
42
|
+
}
|
|
43
|
+
/** Run one merger job end to end: clone branch → Pi assesses → return scores (no commit). */
|
|
44
|
+
export async function handleMerger(job, opts = {}) {
|
|
45
|
+
const trace = { jobId: job.jobId, repo: `${job.repo.owner}/${job.repo.name}`, branch: job.branch };
|
|
46
|
+
return withWorkspace('merge', async (dir) => {
|
|
47
|
+
log.info('merge: cloning PR branch', trace);
|
|
48
|
+
await cloneRepo({
|
|
49
|
+
repo: { ...job.repo, baseBranch: job.branch },
|
|
50
|
+
ghToken: job.ghToken,
|
|
51
|
+
dir,
|
|
52
|
+
// Full clone: scoring the PR means diffing it against the base, which needs the
|
|
53
|
+
// base branch's remote-tracking ref (origin/<base>) AND the merge base present. A
|
|
54
|
+
// shallow single-branch clone has neither, so `git diff origin/<base>...HEAD` fails
|
|
55
|
+
// with "branch not found" and the agent is left to GUESS scores (it tends to emit
|
|
56
|
+
// 0/0/0, which then auto-merges). The full clone gives the merger a real diff.
|
|
57
|
+
full: true,
|
|
58
|
+
signal: opts.signal,
|
|
59
|
+
});
|
|
60
|
+
// Guard the auto-merge path: confirm a real diff against the base is examinable. If
|
|
61
|
+
// it is not (missing base ref / empty diff), the agent's scores can't be trusted —
|
|
62
|
+
// we force a CONSERVATIVE assessment below so the engine routes to human review
|
|
63
|
+
// rather than auto-merging on bogus low scores.
|
|
64
|
+
const diffExaminable = await hasDiffAgainstBase(dir, job.repo.baseBranch, opts.signal);
|
|
65
|
+
if (!diffExaminable) {
|
|
66
|
+
log.warn('merge: no examinable diff against base; will assess conservatively', trace);
|
|
67
|
+
}
|
|
68
|
+
log.info('merge: running agent', trace);
|
|
69
|
+
const { summary, stats, stderrTail, usage } = await runAgentInWorkspace({
|
|
70
|
+
dir,
|
|
71
|
+
systemPrompt: job.systemPrompt,
|
|
72
|
+
userPrompt: buildUserPrompt(job),
|
|
73
|
+
model: job.model,
|
|
74
|
+
harness: job.harness,
|
|
75
|
+
subscriptionToken: job.subscriptionToken,
|
|
76
|
+
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
77
|
+
proxyBaseUrl: job.proxyBaseUrl,
|
|
78
|
+
sessionToken: job.sessionToken,
|
|
79
|
+
// The merger only assesses (no commits/edits), so the no-edit guard must
|
|
80
|
+
// not fire on its legitimately edit-free run.
|
|
81
|
+
expectsEdits: false,
|
|
82
|
+
}, opts);
|
|
83
|
+
// Parse the agent's assessment; on a malformed reply, make ONE structured repair
|
|
84
|
+
// call (see structured-output) before giving up. `coerceAssessment` only yields
|
|
85
|
+
// null when no JSON object could be extracted at all (it defaults conservatively
|
|
86
|
+
// otherwise), so a usable-but-vague reply still routes to human review as before.
|
|
87
|
+
const { value: assessment, diagnostics } = await resolveStructuredOutput({
|
|
88
|
+
label: 'merger',
|
|
89
|
+
shapeHint: ASSESSMENT_SHAPE_HINT,
|
|
90
|
+
parse: (text) => coerceAssessment(extractJsonObject(text), text),
|
|
91
|
+
}, summary, {
|
|
92
|
+
harness: job.harness,
|
|
93
|
+
subscriptionToken: job.subscriptionToken,
|
|
94
|
+
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
95
|
+
proxyBaseUrl: job.proxyBaseUrl,
|
|
96
|
+
sessionToken: job.sessionToken,
|
|
97
|
+
model: job.model,
|
|
98
|
+
jobId: job.jobId,
|
|
99
|
+
signal: opts.signal,
|
|
100
|
+
});
|
|
101
|
+
if (!assessment) {
|
|
102
|
+
return {
|
|
103
|
+
summary,
|
|
104
|
+
stats,
|
|
105
|
+
error: noAssessmentReason(stats, stderrTail, diagnostics),
|
|
106
|
+
...(usage ? { usage } : {}),
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
// The agent could not actually examine the change: its scores are not trustworthy
|
|
110
|
+
// (a failed diff retrieval typically yields a bogus 0/0/0 that would auto-merge).
|
|
111
|
+
// Return a CONSERVATIVE assessment (max on every axis) so the engine's threshold
|
|
112
|
+
// check fails and the PR is routed to a human merge review instead.
|
|
113
|
+
if (!diffExaminable) {
|
|
114
|
+
const conservative = {
|
|
115
|
+
complexity: 1,
|
|
116
|
+
risk: 1,
|
|
117
|
+
impact: 1,
|
|
118
|
+
rationale: `Could not examine a real diff of \`${job.branch}\` against \`${job.repo.baseBranch}\` ` +
|
|
119
|
+
`(the base ref was missing or the diff was empty), so this PR was NOT auto-assessed ` +
|
|
120
|
+
`and needs a human merge review.`,
|
|
121
|
+
};
|
|
122
|
+
log.info('merge: assessed conservatively (no examinable diff)', { ...trace, ...conservative });
|
|
123
|
+
return { assessment: conservative, summary, stats, ...(usage ? { usage } : {}) };
|
|
124
|
+
}
|
|
125
|
+
log.info('merge: assessed', { ...trace, ...assessment });
|
|
126
|
+
return { assessment, summary, stats, ...(usage ? { usage } : {}) };
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
/** Human-readable reason a merger run produced no usable assessment. */
|
|
130
|
+
function noAssessmentReason(stats, stderrTail, diagnostics) {
|
|
131
|
+
const cause = agentNeverActed(stats)
|
|
132
|
+
? NEVER_ACTED_CAUSE
|
|
133
|
+
: ' The agent did not return a parseable JSON assessment.';
|
|
134
|
+
return `Merger produced no assessment.${cause}${diagnostics ? diagnosticsSuffix(diagnostics) : ''}${agentOutputTail(stderrTail)}`;
|
|
135
|
+
}
|