@cat-factory/executor-harness 1.34.4 → 1.34.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.js +240 -110
- package/dist/coding-agent.js +242 -2
- package/dist/job.js +44 -0
- package/dist/pi-workspace.js +1 -0
- package/dist/pi.js +27 -3
- package/package.json +3 -3
- package/src/agent.ts +296 -118
- package/src/coding-agent.ts +289 -1
- package/src/job.ts +77 -0
- package/src/pi-workspace.ts +7 -0
- package/src/pi.ts +30 -3
package/dist/agent.js
CHANGED
|
@@ -7,7 +7,7 @@ import { standUpFrontend, tearDownFrontend } from './frontend-infra.js';
|
|
|
7
7
|
import { configurePackageRegistries } from './package-registries.js';
|
|
8
8
|
import { captureRedactedOutput, redactSecrets } from './redact.js';
|
|
9
9
|
import { cloneRepo, commitAll, conflictDiff, hasAgentChanges, headCommit, mergeBranch, openPullRequest, prepareExistingCheckout, pushBranch, reinitAndPush, unmergedPaths, } from './git.js';
|
|
10
|
-
import { noChangesReason, runCodingAgent } from './coding-agent.js';
|
|
10
|
+
import { makeDirClaimer, noChangesReason, runCodingAgent, runMultiRepoCoding, } from './coding-agent.js';
|
|
11
11
|
import { acquireRepoCheckout, agentNeverActed, agentOutputTail, NEVER_ACTED_CAUSE, runAgentInWorkspace, unusableFinalAnswerCause, withWorkspace, } from './pi-workspace.js';
|
|
12
12
|
import { diagnosticsSuffix, resolveStructuredOutput, } from './structured-output.js';
|
|
13
13
|
import { log } from './logger.js';
|
|
@@ -152,6 +152,39 @@ async function tearDownInfra(dir, infra) {
|
|
|
152
152
|
// The container is ephemeral and torn down with the run anyway — ignore.
|
|
153
153
|
}
|
|
154
154
|
}
|
|
155
|
+
/**
|
|
156
|
+
* Parse an agent's final reply into the structured JSON `custom`, shared by the explore and
|
|
157
|
+
* coding structured-output paths. With repair enabled (default) a malformed reply gets ONE
|
|
158
|
+
* structured repair call before giving up; with `output.repair === false` it parses directly.
|
|
159
|
+
* Returns the parsed value (or null when unusable) plus the repair diagnostics. Never throws —
|
|
160
|
+
* a parse failure is a null value, and each caller decides whether that is fatal (explore: yes;
|
|
161
|
+
* coding: no, the pushed commits are the deliverable).
|
|
162
|
+
*/
|
|
163
|
+
async function resolveReplyCustom(job, summary, signal) {
|
|
164
|
+
if (job.output?.repair === false) {
|
|
165
|
+
try {
|
|
166
|
+
return { value: extractJsonObject(summary) };
|
|
167
|
+
}
|
|
168
|
+
catch {
|
|
169
|
+
return { value: null };
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
const resolved = await resolveStructuredOutput({
|
|
173
|
+
label: 'agent',
|
|
174
|
+
shapeHint: job.output?.shapeHint ?? 'Expected a single JSON object.',
|
|
175
|
+
parse: (text) => extractJsonObject(text),
|
|
176
|
+
}, summary, {
|
|
177
|
+
harness: job.harness,
|
|
178
|
+
subscriptionToken: job.subscriptionToken,
|
|
179
|
+
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
180
|
+
proxyBaseUrl: job.proxyBaseUrl,
|
|
181
|
+
sessionToken: job.sessionToken,
|
|
182
|
+
model: job.model,
|
|
183
|
+
jobId: job.jobId,
|
|
184
|
+
signal,
|
|
185
|
+
});
|
|
186
|
+
return { value: resolved.value, diagnostics: resolved.diagnostics };
|
|
187
|
+
}
|
|
155
188
|
/** Extract the first JSON object from an agent's final message (tolerating fences/prose). */
|
|
156
189
|
function extractJsonObject(text) {
|
|
157
190
|
const trimmed = text.trim();
|
|
@@ -298,6 +331,15 @@ async function runPreviewMode(job, opts) {
|
|
|
298
331
|
*/
|
|
299
332
|
async function runExploreMode(job, opts) {
|
|
300
333
|
const logger = opts.log ?? log;
|
|
334
|
+
// Multi-repo read-only exploration (service-connections phase 3): when the job carries peer
|
|
335
|
+
// repos, clone them all as siblings and run at the workspace root. Keyed off job DATA
|
|
336
|
+
// (`peerRepos`), not the agent kind — the backend sets it for the bug-investigator when the
|
|
337
|
+
// task has involved services in distinct repos. `runMultiRepoExplore` uses its own ephemeral
|
|
338
|
+
// `withWorkspace`, so a `persistentCheckout` flag (which a warm-pool dispatch injects on EVERY
|
|
339
|
+
// job) is harmlessly ignored — it must NOT suppress the fan-out, or a pooled bug-investigator
|
|
340
|
+
// would silently drop its peer repos and only ever see the primary one.
|
|
341
|
+
if (job.peerRepos?.length)
|
|
342
|
+
return runMultiRepoExplore(job, opts);
|
|
301
343
|
return acquireRepoCheckout({ persistent: job.persistentCheckout === true, prefix: 'agent-explore', repo: job.repo }, async (dir) => {
|
|
302
344
|
opts.onPhase?.('clone');
|
|
303
345
|
// Monorepo: run with cwd set to the service subtree (created if missing), mirroring the
|
|
@@ -367,130 +409,193 @@ async function runExploreMode(job, opts) {
|
|
|
367
409
|
contextFiles: job.contextFiles,
|
|
368
410
|
guardLimits: job.guardLimits,
|
|
369
411
|
}, opts);
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
...(callMetrics ? { callMetrics } : {}),
|
|
407
|
-
...infraSetupFields,
|
|
408
|
-
};
|
|
409
|
-
}
|
|
410
|
-
// Structured: parse the agent's JSON. With repair enabled (default) a malformed
|
|
411
|
-
// reply gets ONE structured repair call before giving up; with `repair:false` we
|
|
412
|
-
// parse directly (no repair channel). The backend coerces/validates + renders from
|
|
413
|
-
// the returned object in a post-op.
|
|
414
|
-
let custom = null;
|
|
415
|
-
let diagnostics;
|
|
416
|
-
if (job.output.repair === false) {
|
|
417
|
-
try {
|
|
418
|
-
custom = extractJsonObject(summary);
|
|
419
|
-
}
|
|
420
|
-
catch {
|
|
421
|
-
custom = null;
|
|
422
|
-
}
|
|
423
|
-
}
|
|
424
|
-
else {
|
|
425
|
-
const resolved = await resolveStructuredOutput({
|
|
426
|
-
label: 'agent',
|
|
427
|
-
shapeHint: job.output.shapeHint ?? 'Expected a single JSON object.',
|
|
428
|
-
parse: (text) => extractJsonObject(text),
|
|
429
|
-
}, summary, {
|
|
430
|
-
harness: job.harness,
|
|
431
|
-
subscriptionToken: job.subscriptionToken,
|
|
432
|
-
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
433
|
-
proxyBaseUrl: job.proxyBaseUrl,
|
|
434
|
-
sessionToken: job.sessionToken,
|
|
435
|
-
model: job.model,
|
|
436
|
-
jobId: job.jobId,
|
|
437
|
-
signal: opts.signal,
|
|
438
|
-
});
|
|
439
|
-
custom = resolved.value;
|
|
440
|
-
diagnostics = resolved.diagnostics;
|
|
441
|
-
}
|
|
442
|
-
if (custom === undefined || custom === null) {
|
|
443
|
-
return {
|
|
444
|
-
summary,
|
|
445
|
-
stats,
|
|
446
|
-
error: noStructuredReason(stats, stderrTail, diagnostics),
|
|
447
|
-
failureCause: 'no-usable-output',
|
|
448
|
-
...(usage ? { usage } : {}),
|
|
449
|
-
...(callMetrics ? { callMetrics } : {}),
|
|
450
|
-
...infraSetupFields,
|
|
451
|
-
};
|
|
452
|
-
}
|
|
453
|
-
// Stamp the run's actual environment authoritatively onto the structured result when
|
|
454
|
-
// infra was managed (the tester): which env the suite ran in is decided by the job's
|
|
455
|
-
// infra spec, NOT the model, so the backend can echo it back to the UI deterministically
|
|
456
|
-
// even when the model omits it from its JSON (or a structured repair drops it). A
|
|
457
|
-
// frontend run tests the app against its live ephemeral backend(s), so it reports
|
|
458
|
-
// `ephemeral` (the TestReport env vocabulary has no separate frontend value).
|
|
459
|
-
const reportedEnvironment = infra
|
|
460
|
-
? infra.kind === 'frontend'
|
|
461
|
-
? 'ephemeral'
|
|
462
|
-
: infra.environment
|
|
463
|
-
: undefined;
|
|
464
|
-
if (reportedEnvironment && typeof custom === 'object') {
|
|
465
|
-
;
|
|
466
|
-
custom.environment = reportedEnvironment;
|
|
467
|
-
}
|
|
468
|
-
logger.info('agent(explore): done (structured)', { ...stats });
|
|
412
|
+
return await finalizeExploreResult(job, { summary, stats, stderrTail, usage, callMetrics, runDiag }, { infra, infraSetupFields, logger, signal: opts.signal });
|
|
413
|
+
}
|
|
414
|
+
finally {
|
|
415
|
+
if (managed)
|
|
416
|
+
await managed.cleanup();
|
|
417
|
+
}
|
|
418
|
+
});
|
|
419
|
+
}
|
|
420
|
+
/**
|
|
421
|
+
* Turn an explore agent's raw run into an {@link AgentResult}: guard an empty/truncated reply,
|
|
422
|
+
* then either return the prose summary or parse (+ optionally repair) the structured JSON as
|
|
423
|
+
* `custom` — the backend renders any artifact files from it in a post-op. Extracted so the
|
|
424
|
+
* single-repo {@link runExploreMode} and the read-only {@link runMultiRepoExplore} share ONE
|
|
425
|
+
* result contract (the multi-repo path passes no infra, so the tester-only env stamping no-ops).
|
|
426
|
+
*/
|
|
427
|
+
async function finalizeExploreResult(job, run, ctx) {
|
|
428
|
+
const { summary, stats, stderrTail, usage, callMetrics, runDiag } = run;
|
|
429
|
+
const { infra, infraSetupFields, logger, signal } = ctx;
|
|
430
|
+
if (!summary.trim()) {
|
|
431
|
+
return {
|
|
432
|
+
summary,
|
|
433
|
+
stats,
|
|
434
|
+
error: noOutputReason(stats, stderrTail),
|
|
435
|
+
failureCause: 'no-usable-output',
|
|
436
|
+
...(usage ? { usage } : {}),
|
|
437
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
438
|
+
...infraSetupFields,
|
|
439
|
+
};
|
|
440
|
+
}
|
|
441
|
+
// Opt-in (document producers): a final answer cut off at the output ceiling — or empty —
|
|
442
|
+
// must FAIL LOUDLY here, BEFORE the structured repair below could launder a truncated
|
|
443
|
+
// reply into a half-baked doc the backend then shards/commits + hands onward. Mirrors the
|
|
444
|
+
// bespoke `/spec` handler's `unusableFinalAnswerCause` gate (which drove the old loop).
|
|
445
|
+
if (job.output?.kind === 'structured' && job.output.failOnUnusableFinal) {
|
|
446
|
+
const unusable = unusableFinalAnswerCause(runDiag);
|
|
447
|
+
if (unusable) {
|
|
469
448
|
return {
|
|
470
449
|
summary,
|
|
471
|
-
custom,
|
|
472
450
|
stats,
|
|
451
|
+
error: `the agent did not return a usable result: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
|
|
452
|
+
failureCause: 'no-usable-output',
|
|
473
453
|
...(usage ? { usage } : {}),
|
|
474
454
|
...(callMetrics ? { callMetrics } : {}),
|
|
475
455
|
...infraSetupFields,
|
|
476
456
|
};
|
|
477
457
|
}
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
}
|
|
458
|
+
}
|
|
459
|
+
// Prose: the summary IS the deliverable.
|
|
460
|
+
if (job.output?.kind !== 'structured') {
|
|
461
|
+
logger.info('agent(explore): done (prose)', { ...stats });
|
|
462
|
+
return {
|
|
463
|
+
summary,
|
|
464
|
+
stats,
|
|
465
|
+
...(usage ? { usage } : {}),
|
|
466
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
467
|
+
...infraSetupFields,
|
|
468
|
+
};
|
|
469
|
+
}
|
|
470
|
+
// Structured: parse the agent's JSON via the shared resolver. With repair enabled (default)
|
|
471
|
+
// a malformed reply gets ONE structured repair call before giving up; with `repair:false` it
|
|
472
|
+
// parses directly (no repair channel). The backend coerces/validates + renders from the
|
|
473
|
+
// returned object in a post-op. Unlike the coding path, an unparseable explore reply IS a
|
|
474
|
+
// failure — the report/JSON is the whole deliverable.
|
|
475
|
+
const { value: custom, diagnostics } = await resolveReplyCustom(job, summary, signal);
|
|
476
|
+
if (custom === undefined || custom === null) {
|
|
477
|
+
return {
|
|
478
|
+
summary,
|
|
479
|
+
stats,
|
|
480
|
+
error: noStructuredReason(stats, stderrTail, diagnostics),
|
|
481
|
+
failureCause: 'no-usable-output',
|
|
482
|
+
...(usage ? { usage } : {}),
|
|
483
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
484
|
+
...infraSetupFields,
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
// Stamp the run's actual environment authoritatively onto the structured result when
|
|
488
|
+
// infra was managed (the tester): which env the suite ran in is decided by the job's
|
|
489
|
+
// infra spec, NOT the model, so the backend can echo it back to the UI deterministically
|
|
490
|
+
// even when the model omits it from its JSON (or a structured repair drops it). A
|
|
491
|
+
// frontend run tests the app against its live ephemeral backend(s), so it reports
|
|
492
|
+
// `ephemeral` (the TestReport env vocabulary has no separate frontend value).
|
|
493
|
+
const reportedEnvironment = infra
|
|
494
|
+
? infra.kind === 'frontend'
|
|
495
|
+
? 'ephemeral'
|
|
496
|
+
: infra.environment
|
|
497
|
+
: undefined;
|
|
498
|
+
if (reportedEnvironment && typeof custom === 'object') {
|
|
499
|
+
;
|
|
500
|
+
custom.environment = reportedEnvironment;
|
|
501
|
+
}
|
|
502
|
+
logger.info('agent(explore): done (structured)', { ...stats });
|
|
503
|
+
return {
|
|
504
|
+
summary,
|
|
505
|
+
custom,
|
|
506
|
+
stats,
|
|
507
|
+
...(usage ? { usage } : {}),
|
|
508
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
509
|
+
...infraSetupFields,
|
|
510
|
+
};
|
|
511
|
+
}
|
|
512
|
+
/**
|
|
513
|
+
* Read-only MULTI-REPO exploration (service-connections phase 3, read-only): clone the primary
|
|
514
|
+
* repo PLUS every connected peer repo as SIBLING checkouts under one workspace root, run the
|
|
515
|
+
* agent ONCE with its cwd at the root (so it can read across every repo the bug touches), and
|
|
516
|
+
* return its prose/structured result — making NO edits, NO commits and opening NO PR. The
|
|
517
|
+
* counterpart of {@link runMultiRepoCoding} for the `bug-investigator`, but strictly read-only:
|
|
518
|
+
* peers carry no `newBranch`/`pr`, nothing is pushed, and the peers exist only to be read. The
|
|
519
|
+
* multi-repo layout is explained to the agent by the backend-composed system-prompt section
|
|
520
|
+
* (which repo/subdir each service lives in) + the harness's own AGENTS.md multi-repo note.
|
|
521
|
+
*/
|
|
522
|
+
async function runMultiRepoExplore(job, opts) {
|
|
523
|
+
const logger = (opts.log ?? log).child({ kind: 'multi-repo-explore', jobId: job.jobId });
|
|
524
|
+
const peers = job.peerRepos ?? [];
|
|
525
|
+
// Unique sibling directory per repo (owner-prefixed on a name collision), so two repos
|
|
526
|
+
// named the same never clobber each other — shared claim scheme with the coding fan-out.
|
|
527
|
+
const claimDir = makeDirClaimer();
|
|
528
|
+
const legs = [
|
|
529
|
+
{ repo: job.repo, cloneBranch: job.branch, ghToken: job.ghToken },
|
|
530
|
+
...peers.map((peer) => ({
|
|
531
|
+
repo: peer.repo,
|
|
532
|
+
cloneBranch: peer.repo.baseBranch,
|
|
533
|
+
ghToken: peer.ghToken ?? job.ghToken,
|
|
534
|
+
})),
|
|
535
|
+
].map((leg) => ({ ...leg, dirName: claimDir(leg.repo) }));
|
|
536
|
+
return withWorkspace('explore-multi', async (root) => {
|
|
537
|
+
// Clone phase: every repo (read-only) into its sibling dir under the workspace root. No
|
|
538
|
+
// work branch, no resume — the investigator only reads — so the legs are independent and
|
|
539
|
+
// clone in parallel (wall-clock is the slowest single clone, not the sum).
|
|
540
|
+
opts.onPhase?.('clone');
|
|
541
|
+
await Promise.all(legs.map(async (leg) => {
|
|
542
|
+
const dir = join(root, leg.dirName);
|
|
543
|
+
await mkdir(dir, { recursive: true });
|
|
544
|
+
logger.info('multi-repo-explore: cloning', {
|
|
545
|
+
repo: leg.dirName,
|
|
546
|
+
cloneBranch: leg.cloneBranch,
|
|
547
|
+
});
|
|
548
|
+
await cloneRepo({
|
|
549
|
+
repo: { ...leg.repo, baseBranch: leg.cloneBranch },
|
|
550
|
+
ghToken: leg.ghToken,
|
|
551
|
+
dir,
|
|
552
|
+
signal: opts.signal,
|
|
553
|
+
});
|
|
554
|
+
}));
|
|
555
|
+
opts.onPhase?.('agent');
|
|
556
|
+
logger.info('multi-repo-explore: running agent', { repos: legs.map((l) => l.dirName) });
|
|
557
|
+
const run = await runAgentInWorkspace({
|
|
558
|
+
dir: root,
|
|
559
|
+
systemPrompt: job.systemPrompt,
|
|
560
|
+
userPrompt: job.userPrompt,
|
|
561
|
+
model: job.model,
|
|
562
|
+
harness: job.harness,
|
|
563
|
+
subscriptionToken: job.subscriptionToken,
|
|
564
|
+
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
565
|
+
ambientAuth: job.ambientAuth,
|
|
566
|
+
proxyBaseUrl: job.proxyBaseUrl,
|
|
567
|
+
sessionToken: job.sessionToken,
|
|
568
|
+
// Read-only: no edits expected, so the no-progress guard's no-edit bound must not fire.
|
|
569
|
+
expectsEdits: false,
|
|
570
|
+
webToolsGuidance: job.webToolsGuidance,
|
|
571
|
+
webSearchProxy: job.webSearch,
|
|
572
|
+
...(job.contextFiles ? { contextFiles: job.contextFiles } : {}),
|
|
573
|
+
guardLimits: job.guardLimits,
|
|
574
|
+
multiRepo: true,
|
|
575
|
+
}, opts);
|
|
576
|
+
return finalizeExploreResult(job, {
|
|
577
|
+
summary: run.summary,
|
|
578
|
+
stats: run.stats,
|
|
579
|
+
stderrTail: run.stderrTail,
|
|
580
|
+
usage: run.usage,
|
|
581
|
+
callMetrics: run.callMetrics,
|
|
582
|
+
runDiag: run.diagnostics,
|
|
583
|
+
}, { infraSetupFields: {}, logger, signal: opts.signal });
|
|
482
584
|
});
|
|
483
585
|
}
|
|
484
586
|
/**
|
|
485
|
-
* Edit-and-push coding
|
|
486
|
-
*
|
|
487
|
-
*
|
|
488
|
-
*
|
|
587
|
+
* Edit-and-push coding, dispatching on job DATA: repo-bootstrap (force-push a fresh history to a
|
|
588
|
+
* separate target repo), conflict-resolution (merge the base in, resolve, push back), multi-repo
|
|
589
|
+
* fan-out (sibling checkouts + one PR per changed repo), else the ordinary single-repo flow.
|
|
590
|
+
* After the flow, a STRUCTURED coding kind (e.g. `repro-test`, whose deliverable is BOTH a pushed
|
|
591
|
+
* commit AND a JSON outcome) parses its final reply into `custom` — best-effort, so an unparseable
|
|
592
|
+
* outcome degrades to no `custom` (the backend resolver then defaults) rather than failing the
|
|
593
|
+
* run, whose real deliverable is the pushed commits.
|
|
489
594
|
*/
|
|
490
595
|
async function runCodingMode(job, opts) {
|
|
491
596
|
// Repo bootstrap is a coding run that force-pushes a fresh history to a SEPARATE target
|
|
492
597
|
// repo (clone + adapt a reference, or scaffold from scratch). Keyed off job DATA
|
|
493
|
-
// (`bootstrap`), not the agent kind.
|
|
598
|
+
// (`bootstrap`), not the agent kind. Bootstrap/conflict never carry a structured `output`.
|
|
494
599
|
if (job.bootstrap)
|
|
495
600
|
return runBootstrap(job, opts);
|
|
496
601
|
// Conflict resolution is a coding run with a different pre/post around the agent:
|
|
@@ -498,6 +603,31 @@ async function runCodingMode(job, opts) {
|
|
|
498
603
|
// commit + push (no PR). Keyed off job DATA (`mergeBase`), not the agent kind.
|
|
499
604
|
if (job.mergeBase)
|
|
500
605
|
return runConflictResolution(job, opts);
|
|
606
|
+
// Multi-repo coding (service-connections phase 3): clone every connected peer repo as a
|
|
607
|
+
// sibling, run the agent once across all of them, and open one PR per changed repo. Keyed
|
|
608
|
+
// off job DATA (`peerRepos`), not the agent kind — the implementer sets it when the task
|
|
609
|
+
// has involved services in distinct repos.
|
|
610
|
+
const result = job.peerRepos?.length
|
|
611
|
+
? await runMultiRepoCoding(job, opts)
|
|
612
|
+
: await runSingleRepoCoding(job, opts);
|
|
613
|
+
// Structured coding kind (repro-test): fold the final reply's JSON onto `custom` so the
|
|
614
|
+
// backend post-completion resolver records the outcome. Skipped on a failed run (its `error`
|
|
615
|
+
// is the signal) and when there is no reply to parse. Best-effort: a null parse leaves
|
|
616
|
+
// `custom` unset (the run still succeeds on its commits).
|
|
617
|
+
if (job.output?.kind === 'structured' && !result.error && result.summary) {
|
|
618
|
+
const { value } = await resolveReplyCustom(job, result.summary, opts.signal);
|
|
619
|
+
if (value !== null && value !== undefined)
|
|
620
|
+
result.custom = value;
|
|
621
|
+
}
|
|
622
|
+
return result;
|
|
623
|
+
}
|
|
624
|
+
/**
|
|
625
|
+
* The ordinary single-repo coding flow: clone `branch` (or resume `newBranch`), run the agent,
|
|
626
|
+
* commit + push to `pushBranch`, and open `pr` when one is set and the run produced changes. A
|
|
627
|
+
* no-op is a failure for the implementer (`noChangesIsError` default) and a non-fatal no-op for
|
|
628
|
+
* the in-place fixers (and for a seed-only kind like `repro-test`).
|
|
629
|
+
*/
|
|
630
|
+
async function runSingleRepoCoding(job, opts) {
|
|
501
631
|
const pushBranch = job.pushBranch ?? job.newBranch ?? job.branch;
|
|
502
632
|
const { summary, stats, stderrTail, pushed, usage, callMetrics } = await runCodingAgent({
|
|
503
633
|
kind: 'agent',
|
package/dist/coding-agent.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { mkdir } from 'node:fs/promises';
|
|
2
2
|
import { join } from 'node:path';
|
|
3
|
-
import { branchAheadOfBase, branchHasCommitsSince, cloneExistingBranch, cloneRepo, commitTrackedEdits, createBranch, excludeFromGit, headCommit, listUntrackedFiles, prepareExistingCheckout, pushBranch, refreshFromBaseIfClean, remoteBranchExists, } from './git.js';
|
|
3
|
+
import { branchAheadOfBase, branchHasCommitsSince, cloneExistingBranch, cloneRepo, commitTrackedEdits, createBranch, excludeFromGit, headCommit, listUntrackedFiles, openPullRequest, prepareExistingCheckout, pushBranch, refreshFromBaseIfClean, remoteBranchExists, } from './git.js';
|
|
4
4
|
import { FOLLOW_UPS_FILENAME, FollowUpTailer } from './follow-ups.js';
|
|
5
|
-
import { acquireRepoCheckout, agentNeverActed, agentOutputTail, runAgentInWorkspace, } from './pi-workspace.js';
|
|
5
|
+
import { acquireRepoCheckout, agentNeverActed, agentOutputTail, runAgentInWorkspace, withWorkspace, } from './pi-workspace.js';
|
|
6
6
|
import { log } from './logger.js';
|
|
7
7
|
/**
|
|
8
8
|
* How often the harness checkpoints the agent's work mid-run by pushing the branch.
|
|
@@ -292,6 +292,246 @@ export async function runCodingAgent(spec, opts = {}) {
|
|
|
292
292
|
return outcome;
|
|
293
293
|
});
|
|
294
294
|
}
|
|
295
|
+
/** Sanitise an owner/name into a safe single path segment for a sibling checkout directory. */
|
|
296
|
+
export function safeDirSegment(value) {
|
|
297
|
+
return value.replace(/[^A-Za-z0-9._-]/g, '-') || '_';
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* A sibling-directory allocator for a multi-repo run: returns the checkout directory name for a
|
|
301
|
+
* repo under the workspace root. Deterministic (`owner__name`) and collision-free by construction
|
|
302
|
+
* — the checkout set is deduped by `owner/name` upstream and GitHub owners contain no `_`, so the
|
|
303
|
+
* `owner__name` join is unique per repo without a stateful collision dance. Kept as a factory so
|
|
304
|
+
* the coding + read-only explore fan-outs share ONE scheme, and it MUST stay byte-identical to the
|
|
305
|
+
* backend's `siblingCheckoutDir` / `renderMultiRepoWorkspaceSection` in `@cat-factory/server`
|
|
306
|
+
* (jobBody.ts), which names this exact directory in the agent's prompt — the two are computed
|
|
307
|
+
* independently, so a divergent rule would point the agent at a directory that does not exist.
|
|
308
|
+
*/
|
|
309
|
+
export function makeDirClaimer() {
|
|
310
|
+
return (repo) => `${safeDirSegment(repo.owner)}__${safeDirSegment(repo.name)}`;
|
|
311
|
+
}
|
|
312
|
+
/**
|
|
313
|
+
* Multi-repo coding (service-connections phase 3): clone the primary repo AND every connected
|
|
314
|
+
* peer repo as SIBLING checkouts under one workspace root, run the agent ONCE with its cwd at
|
|
315
|
+
* that root (so it makes the cross-service change coherently across all of them), then commit +
|
|
316
|
+
* push each repo that actually changed and open one PR per dirty repo. The task's own-service PR
|
|
317
|
+
* is reported as `prUrl`/`branch`; the peer PRs as `peerPullRequests`.
|
|
318
|
+
*
|
|
319
|
+
* Deliberately simpler than the single-repo {@link runCodingAgent} for the first cut: NO mid-run
|
|
320
|
+
* checkpoint pushes (an evicted multi-repo run re-clones on retry — the deterministic work branch
|
|
321
|
+
* still lets it resume any commits it managed to push at the end), NO warm-pool persistent
|
|
322
|
+
* checkout (always ephemeral), and NO follow-up sentinel streaming. It reuses the SAME dir-scoped
|
|
323
|
+
* git helpers, so the per-repo clone/commit/push/PR mechanics match the single-repo path exactly.
|
|
324
|
+
*/
|
|
325
|
+
export async function runMultiRepoCoding(job, opts = {}) {
|
|
326
|
+
const { signal } = opts;
|
|
327
|
+
const logger = (opts.log ?? log).child({ kind: 'multi-repo', jobId: job.jobId });
|
|
328
|
+
const peers = job.peerRepos ?? [];
|
|
329
|
+
const primaryWorkBranch = job.pushBranch ?? job.newBranch ?? job.branch;
|
|
330
|
+
// Assign the sibling directory per repo via the shared deterministic allocator (`owner__name`,
|
|
331
|
+
// matching the backend prompt's `siblingCheckoutDir`), shared with the read-only explore fan-out.
|
|
332
|
+
const claimDir = makeDirClaimer();
|
|
333
|
+
const legs = [
|
|
334
|
+
{
|
|
335
|
+
repo: job.repo,
|
|
336
|
+
dirName: claimDir(job.repo),
|
|
337
|
+
dir: '',
|
|
338
|
+
cloneBranch: job.branch,
|
|
339
|
+
workBranch: primaryWorkBranch,
|
|
340
|
+
ghToken: job.ghToken,
|
|
341
|
+
...(job.pr ? { pr: job.pr } : {}),
|
|
342
|
+
primary: true,
|
|
343
|
+
baseSha: '',
|
|
344
|
+
resumed: false,
|
|
345
|
+
},
|
|
346
|
+
...peers.map((peer) => ({
|
|
347
|
+
repo: peer.repo,
|
|
348
|
+
dirName: claimDir(peer.repo),
|
|
349
|
+
dir: '',
|
|
350
|
+
cloneBranch: peer.repo.baseBranch,
|
|
351
|
+
// Coding peers always carry `newBranch` (the backend sets the shared work branch);
|
|
352
|
+
// fall back to the primary's for the type (read-only peers never reach this path).
|
|
353
|
+
workBranch: peer.newBranch ?? primaryWorkBranch,
|
|
354
|
+
ghToken: peer.ghToken ?? job.ghToken,
|
|
355
|
+
...(peer.pr ? { pr: peer.pr } : {}),
|
|
356
|
+
...(peer.frameId ? { frameId: peer.frameId } : {}),
|
|
357
|
+
primary: false,
|
|
358
|
+
baseSha: '',
|
|
359
|
+
resumed: false,
|
|
360
|
+
})),
|
|
361
|
+
];
|
|
362
|
+
return withWorkspace('multi', async (root) => {
|
|
363
|
+
// Clone phase: every repo into its sibling dir under the workspace root. Resume an
|
|
364
|
+
// existing remote work branch (an evicted retry) rather than branching off base again.
|
|
365
|
+
opts.onPhase?.('clone');
|
|
366
|
+
for (const leg of legs) {
|
|
367
|
+
const dir = join(root, leg.dirName);
|
|
368
|
+
await mkdir(dir, { recursive: true });
|
|
369
|
+
leg.resumed = await remoteBranchExists(leg.repo.cloneUrl, leg.workBranch, leg.ghToken, signal);
|
|
370
|
+
if (leg.resumed) {
|
|
371
|
+
logger.info('multi-repo: resuming existing branch', {
|
|
372
|
+
repo: leg.dirName,
|
|
373
|
+
branch: leg.workBranch,
|
|
374
|
+
});
|
|
375
|
+
await cloneExistingBranch({
|
|
376
|
+
cloneUrl: leg.repo.cloneUrl,
|
|
377
|
+
branch: leg.workBranch,
|
|
378
|
+
ghToken: leg.ghToken,
|
|
379
|
+
dir,
|
|
380
|
+
signal,
|
|
381
|
+
});
|
|
382
|
+
}
|
|
383
|
+
else {
|
|
384
|
+
logger.info('multi-repo: cloning', { repo: leg.dirName, cloneBranch: leg.cloneBranch });
|
|
385
|
+
await cloneRepo({
|
|
386
|
+
repo: { ...leg.repo, baseBranch: leg.cloneBranch },
|
|
387
|
+
ghToken: leg.ghToken,
|
|
388
|
+
dir,
|
|
389
|
+
signal,
|
|
390
|
+
});
|
|
391
|
+
await createBranch(dir, leg.workBranch, signal);
|
|
392
|
+
}
|
|
393
|
+
leg.dir = dir;
|
|
394
|
+
// The branch tip before the agent runs. Captured BEFORE the resume base refresh below so
|
|
395
|
+
// that refresh's merge commit counts as advancement and is pushed (as in the single-repo
|
|
396
|
+
// path). A fresh leg produced work iff its branch advances past this; a resumed leg already
|
|
397
|
+
// carries prior work.
|
|
398
|
+
leg.baseSha = await headCommit(dir, signal);
|
|
399
|
+
// A resumed branch was cut from an OLDER base; merge the latest base in when the two merge
|
|
400
|
+
// cleanly so the agent works against current base and the peer/own PRs stay current. On a
|
|
401
|
+
// conflict this is a best-effort no-op (the merge gate handles a conflicting PR downstream),
|
|
402
|
+
// mirroring the single-repo {@link runCodingAgent} resume refresh.
|
|
403
|
+
if (leg.resumed) {
|
|
404
|
+
const refreshed = await refreshFromBaseIfClean(dir, leg.cloneBranch, leg.ghToken, signal).catch(() => false);
|
|
405
|
+
if (!refreshed) {
|
|
406
|
+
logger.info('multi-repo: resume base refresh skipped (conflict or error)', {
|
|
407
|
+
repo: leg.dirName,
|
|
408
|
+
base: leg.cloneBranch,
|
|
409
|
+
});
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
// Run the agent ONCE with its cwd at the workspace root, so it sees every sibling checkout
|
|
414
|
+
// and can change them coherently. No monorepo/service-directory scoping — the multi-repo
|
|
415
|
+
// note + the backend system-prompt section explain the layout.
|
|
416
|
+
opts.onPhase?.('agent');
|
|
417
|
+
logger.info('multi-repo: running agent', { repos: legs.map((l) => l.dirName) });
|
|
418
|
+
const { summary, stats, stderrTail, usage, callMetrics } = await runAgentInWorkspace({
|
|
419
|
+
dir: root,
|
|
420
|
+
systemPrompt: job.systemPrompt,
|
|
421
|
+
userPrompt: job.userPrompt,
|
|
422
|
+
model: job.model,
|
|
423
|
+
harness: job.harness,
|
|
424
|
+
subscriptionToken: job.subscriptionToken,
|
|
425
|
+
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
426
|
+
ambientAuth: job.ambientAuth,
|
|
427
|
+
proxyBaseUrl: job.proxyBaseUrl,
|
|
428
|
+
sessionToken: job.sessionToken,
|
|
429
|
+
webToolsGuidance: job.webToolsGuidance,
|
|
430
|
+
webSearchProxy: job.webSearch,
|
|
431
|
+
guardLimits: job.guardLimits,
|
|
432
|
+
...(job.contextFiles ? { contextFiles: job.contextFiles } : {}),
|
|
433
|
+
multiRepo: true,
|
|
434
|
+
}, opts);
|
|
435
|
+
// Push phase: commit forgotten tracked edits, then push + open a PR for each repo the run
|
|
436
|
+
// actually changed. A repo the agent left untouched is skipped (no branch, no PR).
|
|
437
|
+
opts.onPhase?.('push');
|
|
438
|
+
let primaryPushed = false;
|
|
439
|
+
let primaryPrUrl;
|
|
440
|
+
const peerPullRequests = [];
|
|
441
|
+
for (const leg of legs) {
|
|
442
|
+
await commitTrackedEdits(leg.dir, job.commitMessage ?? leg.pr?.title ?? 'Agent changes', signal);
|
|
443
|
+
const advanced = await branchHasCommitsSince(leg.dir, leg.baseSha, signal);
|
|
444
|
+
let hasWork = advanced || leg.resumed;
|
|
445
|
+
if (leg.resumed && !advanced) {
|
|
446
|
+
const ahead = await branchAheadOfBase(leg.dir, leg.repo.baseBranch, leg.ghToken, signal);
|
|
447
|
+
if (ahead === false)
|
|
448
|
+
hasWork = false;
|
|
449
|
+
}
|
|
450
|
+
const leftover = await listUntrackedFiles(leg.dir, signal);
|
|
451
|
+
if (leftover.length > 0) {
|
|
452
|
+
logger.warn('multi-repo: uncommitted new files left behind (not pushed)', {
|
|
453
|
+
repo: leg.dirName,
|
|
454
|
+
count: leftover.length,
|
|
455
|
+
files: leftover.slice(0, 20),
|
|
456
|
+
});
|
|
457
|
+
}
|
|
458
|
+
if (!hasWork) {
|
|
459
|
+
logger.info('multi-repo: no changes for repo', { repo: leg.dirName });
|
|
460
|
+
continue;
|
|
461
|
+
}
|
|
462
|
+
await pushBranch(leg.dir, leg.workBranch, leg.ghToken, signal);
|
|
463
|
+
let prUrl = null;
|
|
464
|
+
if (leg.pr) {
|
|
465
|
+
prUrl = await openPullRequest({
|
|
466
|
+
owner: leg.repo.owner,
|
|
467
|
+
name: leg.repo.name,
|
|
468
|
+
ghToken: leg.ghToken,
|
|
469
|
+
head: leg.workBranch,
|
|
470
|
+
base: leg.repo.baseBranch,
|
|
471
|
+
pr: leg.pr,
|
|
472
|
+
apiBase: job.githubApiBase,
|
|
473
|
+
cloneUrl: leg.repo.cloneUrl,
|
|
474
|
+
...(leg.repo.provider ? { provider: leg.repo.provider } : {}),
|
|
475
|
+
signal,
|
|
476
|
+
});
|
|
477
|
+
}
|
|
478
|
+
if (leg.primary) {
|
|
479
|
+
primaryPushed = true;
|
|
480
|
+
if (prUrl)
|
|
481
|
+
primaryPrUrl = prUrl;
|
|
482
|
+
}
|
|
483
|
+
else if (prUrl) {
|
|
484
|
+
peerPullRequests.push({
|
|
485
|
+
repo: `${leg.repo.owner}/${leg.repo.name}`,
|
|
486
|
+
...(leg.frameId ? { frameId: leg.frameId } : {}),
|
|
487
|
+
prUrl,
|
|
488
|
+
branch: leg.workBranch,
|
|
489
|
+
});
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
const anyWork = primaryPushed || peerPullRequests.length > 0;
|
|
493
|
+
if (!anyWork) {
|
|
494
|
+
// Nothing changed in ANY repo. For the implementer this is a failure (as in the
|
|
495
|
+
// single-repo path); a caller that tolerates a no-op (never the implementer today)
|
|
496
|
+
// gets a clean non-event.
|
|
497
|
+
if (job.noChangesIsError === false) {
|
|
498
|
+
return {
|
|
499
|
+
pushed: false,
|
|
500
|
+
branch: primaryWorkBranch,
|
|
501
|
+
summary,
|
|
502
|
+
stats,
|
|
503
|
+
...(usage ? { usage } : {}),
|
|
504
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
505
|
+
};
|
|
506
|
+
}
|
|
507
|
+
return {
|
|
508
|
+
pushed: false,
|
|
509
|
+
branch: primaryWorkBranch,
|
|
510
|
+
summary,
|
|
511
|
+
stats,
|
|
512
|
+
error: noChangesReason('the agent produced no file changes in any repository', stats, stderrTail),
|
|
513
|
+
failureCause: 'no-changes',
|
|
514
|
+
...(usage ? { usage } : {}),
|
|
515
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
516
|
+
};
|
|
517
|
+
}
|
|
518
|
+
logger.info('multi-repo: complete', {
|
|
519
|
+
primaryPushed,
|
|
520
|
+
primaryPrUrl: primaryPrUrl ?? null,
|
|
521
|
+
peers: peerPullRequests.length,
|
|
522
|
+
});
|
|
523
|
+
return {
|
|
524
|
+
pushed: primaryPushed,
|
|
525
|
+
...(primaryPrUrl ? { prUrl: primaryPrUrl } : {}),
|
|
526
|
+
branch: primaryWorkBranch,
|
|
527
|
+
...(peerPullRequests.length ? { peerPullRequests } : {}),
|
|
528
|
+
summary,
|
|
529
|
+
stats,
|
|
530
|
+
...(usage ? { usage } : {}),
|
|
531
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
532
|
+
};
|
|
533
|
+
});
|
|
534
|
+
}
|
|
295
535
|
/**
|
|
296
536
|
* The "no changes" reason both coding agents report: a caller-supplied lead phrase
|
|
297
537
|
* plus the shared "never acted" cause and a credential-scrubbed tail of Pi's stderr.
|