@cat-factory/executor-harness 1.34.4 → 1.34.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.js +205 -104
- package/dist/coding-agent.js +242 -2
- package/dist/job.js +44 -0
- package/dist/pi-workspace.js +1 -0
- package/dist/pi.js +27 -3
- package/package.json +3 -3
- package/src/agent.ts +255 -113
- package/src/coding-agent.ts +289 -1
- package/src/job.ts +77 -0
- package/src/pi-workspace.ts +7 -0
- package/src/pi.ts +30 -3
package/dist/agent.js
CHANGED
|
@@ -7,7 +7,7 @@ import { standUpFrontend, tearDownFrontend } from './frontend-infra.js';
|
|
|
7
7
|
import { configurePackageRegistries } from './package-registries.js';
|
|
8
8
|
import { captureRedactedOutput, redactSecrets } from './redact.js';
|
|
9
9
|
import { cloneRepo, commitAll, conflictDiff, hasAgentChanges, headCommit, mergeBranch, openPullRequest, prepareExistingCheckout, pushBranch, reinitAndPush, unmergedPaths, } from './git.js';
|
|
10
|
-
import { noChangesReason, runCodingAgent } from './coding-agent.js';
|
|
10
|
+
import { makeDirClaimer, noChangesReason, runCodingAgent, runMultiRepoCoding, } from './coding-agent.js';
|
|
11
11
|
import { acquireRepoCheckout, agentNeverActed, agentOutputTail, NEVER_ACTED_CAUSE, runAgentInWorkspace, unusableFinalAnswerCause, withWorkspace, } from './pi-workspace.js';
|
|
12
12
|
import { diagnosticsSuffix, resolveStructuredOutput, } from './structured-output.js';
|
|
13
13
|
import { log } from './logger.js';
|
|
@@ -298,6 +298,15 @@ async function runPreviewMode(job, opts) {
|
|
|
298
298
|
*/
|
|
299
299
|
async function runExploreMode(job, opts) {
|
|
300
300
|
const logger = opts.log ?? log;
|
|
301
|
+
// Multi-repo read-only exploration (service-connections phase 3): when the job carries peer
|
|
302
|
+
// repos, clone them all as siblings and run at the workspace root. Keyed off job DATA
|
|
303
|
+
// (`peerRepos`), not the agent kind — the backend sets it for the bug-investigator when the
|
|
304
|
+
// task has involved services in distinct repos. `runMultiRepoExplore` uses its own ephemeral
|
|
305
|
+
// `withWorkspace`, so a `persistentCheckout` flag (which a warm-pool dispatch injects on EVERY
|
|
306
|
+
// job) is harmlessly ignored — it must NOT suppress the fan-out, or a pooled bug-investigator
|
|
307
|
+
// would silently drop its peer repos and only ever see the primary one.
|
|
308
|
+
if (job.peerRepos?.length)
|
|
309
|
+
return runMultiRepoExplore(job, opts);
|
|
301
310
|
return acquireRepoCheckout({ persistent: job.persistentCheckout === true, prefix: 'agent-explore', repo: job.repo }, async (dir) => {
|
|
302
311
|
opts.onPhase?.('clone');
|
|
303
312
|
// Monorepo: run with cwd set to the service subtree (created if missing), mirroring the
|
|
@@ -367,118 +376,204 @@ async function runExploreMode(job, opts) {
|
|
|
367
376
|
contextFiles: job.contextFiles,
|
|
368
377
|
guardLimits: job.guardLimits,
|
|
369
378
|
}, opts);
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
...(callMetrics ? { callMetrics } : {}),
|
|
407
|
-
...infraSetupFields,
|
|
408
|
-
};
|
|
409
|
-
}
|
|
410
|
-
// Structured: parse the agent's JSON. With repair enabled (default) a malformed
|
|
411
|
-
// reply gets ONE structured repair call before giving up; with `repair:false` we
|
|
412
|
-
// parse directly (no repair channel). The backend coerces/validates + renders from
|
|
413
|
-
// the returned object in a post-op.
|
|
414
|
-
let custom = null;
|
|
415
|
-
let diagnostics;
|
|
416
|
-
if (job.output.repair === false) {
|
|
417
|
-
try {
|
|
418
|
-
custom = extractJsonObject(summary);
|
|
419
|
-
}
|
|
420
|
-
catch {
|
|
421
|
-
custom = null;
|
|
422
|
-
}
|
|
423
|
-
}
|
|
424
|
-
else {
|
|
425
|
-
const resolved = await resolveStructuredOutput({
|
|
426
|
-
label: 'agent',
|
|
427
|
-
shapeHint: job.output.shapeHint ?? 'Expected a single JSON object.',
|
|
428
|
-
parse: (text) => extractJsonObject(text),
|
|
429
|
-
}, summary, {
|
|
430
|
-
harness: job.harness,
|
|
431
|
-
subscriptionToken: job.subscriptionToken,
|
|
432
|
-
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
433
|
-
proxyBaseUrl: job.proxyBaseUrl,
|
|
434
|
-
sessionToken: job.sessionToken,
|
|
435
|
-
model: job.model,
|
|
436
|
-
jobId: job.jobId,
|
|
437
|
-
signal: opts.signal,
|
|
438
|
-
});
|
|
439
|
-
custom = resolved.value;
|
|
440
|
-
diagnostics = resolved.diagnostics;
|
|
441
|
-
}
|
|
442
|
-
if (custom === undefined || custom === null) {
|
|
443
|
-
return {
|
|
444
|
-
summary,
|
|
445
|
-
stats,
|
|
446
|
-
error: noStructuredReason(stats, stderrTail, diagnostics),
|
|
447
|
-
failureCause: 'no-usable-output',
|
|
448
|
-
...(usage ? { usage } : {}),
|
|
449
|
-
...(callMetrics ? { callMetrics } : {}),
|
|
450
|
-
...infraSetupFields,
|
|
451
|
-
};
|
|
452
|
-
}
|
|
453
|
-
// Stamp the run's actual environment authoritatively onto the structured result when
|
|
454
|
-
// infra was managed (the tester): which env the suite ran in is decided by the job's
|
|
455
|
-
// infra spec, NOT the model, so the backend can echo it back to the UI deterministically
|
|
456
|
-
// even when the model omits it from its JSON (or a structured repair drops it). A
|
|
457
|
-
// frontend run tests the app against its live ephemeral backend(s), so it reports
|
|
458
|
-
// `ephemeral` (the TestReport env vocabulary has no separate frontend value).
|
|
459
|
-
const reportedEnvironment = infra
|
|
460
|
-
? infra.kind === 'frontend'
|
|
461
|
-
? 'ephemeral'
|
|
462
|
-
: infra.environment
|
|
463
|
-
: undefined;
|
|
464
|
-
if (reportedEnvironment && typeof custom === 'object') {
|
|
465
|
-
;
|
|
466
|
-
custom.environment = reportedEnvironment;
|
|
467
|
-
}
|
|
468
|
-
logger.info('agent(explore): done (structured)', { ...stats });
|
|
379
|
+
return await finalizeExploreResult(job, { summary, stats, stderrTail, usage, callMetrics, runDiag }, { infra, infraSetupFields, logger, signal: opts.signal });
|
|
380
|
+
}
|
|
381
|
+
finally {
|
|
382
|
+
if (managed)
|
|
383
|
+
await managed.cleanup();
|
|
384
|
+
}
|
|
385
|
+
});
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Turn an explore agent's raw run into an {@link AgentResult}: guard an empty/truncated reply,
|
|
389
|
+
* then either return the prose summary or parse (+ optionally repair) the structured JSON as
|
|
390
|
+
* `custom` — the backend renders any artifact files from it in a post-op. Extracted so the
|
|
391
|
+
* single-repo {@link runExploreMode} and the read-only {@link runMultiRepoExplore} share ONE
|
|
392
|
+
* result contract (the multi-repo path passes no infra, so the tester-only env stamping no-ops).
|
|
393
|
+
*/
|
|
394
|
+
async function finalizeExploreResult(job, run, ctx) {
|
|
395
|
+
const { summary, stats, stderrTail, usage, callMetrics, runDiag } = run;
|
|
396
|
+
const { infra, infraSetupFields, logger, signal } = ctx;
|
|
397
|
+
if (!summary.trim()) {
|
|
398
|
+
return {
|
|
399
|
+
summary,
|
|
400
|
+
stats,
|
|
401
|
+
error: noOutputReason(stats, stderrTail),
|
|
402
|
+
failureCause: 'no-usable-output',
|
|
403
|
+
...(usage ? { usage } : {}),
|
|
404
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
405
|
+
...infraSetupFields,
|
|
406
|
+
};
|
|
407
|
+
}
|
|
408
|
+
// Opt-in (document producers): a final answer cut off at the output ceiling — or empty —
|
|
409
|
+
// must FAIL LOUDLY here, BEFORE the structured repair below could launder a truncated
|
|
410
|
+
// reply into a half-baked doc the backend then shards/commits + hands onward. Mirrors the
|
|
411
|
+
// bespoke `/spec` handler's `unusableFinalAnswerCause` gate (which drove the old loop).
|
|
412
|
+
if (job.output?.kind === 'structured' && job.output.failOnUnusableFinal) {
|
|
413
|
+
const unusable = unusableFinalAnswerCause(runDiag);
|
|
414
|
+
if (unusable) {
|
|
469
415
|
return {
|
|
470
416
|
summary,
|
|
471
|
-
custom,
|
|
472
417
|
stats,
|
|
418
|
+
error: `the agent did not return a usable result: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
|
|
419
|
+
failureCause: 'no-usable-output',
|
|
473
420
|
...(usage ? { usage } : {}),
|
|
474
421
|
...(callMetrics ? { callMetrics } : {}),
|
|
475
422
|
...infraSetupFields,
|
|
476
423
|
};
|
|
477
424
|
}
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
425
|
+
}
|
|
426
|
+
// Prose: the summary IS the deliverable.
|
|
427
|
+
if (job.output?.kind !== 'structured') {
|
|
428
|
+
logger.info('agent(explore): done (prose)', { ...stats });
|
|
429
|
+
return {
|
|
430
|
+
summary,
|
|
431
|
+
stats,
|
|
432
|
+
...(usage ? { usage } : {}),
|
|
433
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
434
|
+
...infraSetupFields,
|
|
435
|
+
};
|
|
436
|
+
}
|
|
437
|
+
// Structured: parse the agent's JSON. With repair enabled (default) a malformed
|
|
438
|
+
// reply gets ONE structured repair call before giving up; with `repair:false` we
|
|
439
|
+
// parse directly (no repair channel). The backend coerces/validates + renders from
|
|
440
|
+
// the returned object in a post-op.
|
|
441
|
+
let custom = null;
|
|
442
|
+
let diagnostics;
|
|
443
|
+
if (job.output.repair === false) {
|
|
444
|
+
try {
|
|
445
|
+
custom = extractJsonObject(summary);
|
|
481
446
|
}
|
|
447
|
+
catch {
|
|
448
|
+
custom = null;
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
else {
|
|
452
|
+
const resolved = await resolveStructuredOutput({
|
|
453
|
+
label: 'agent',
|
|
454
|
+
shapeHint: job.output.shapeHint ?? 'Expected a single JSON object.',
|
|
455
|
+
parse: (text) => extractJsonObject(text),
|
|
456
|
+
}, summary, {
|
|
457
|
+
harness: job.harness,
|
|
458
|
+
subscriptionToken: job.subscriptionToken,
|
|
459
|
+
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
460
|
+
proxyBaseUrl: job.proxyBaseUrl,
|
|
461
|
+
sessionToken: job.sessionToken,
|
|
462
|
+
model: job.model,
|
|
463
|
+
jobId: job.jobId,
|
|
464
|
+
signal,
|
|
465
|
+
});
|
|
466
|
+
custom = resolved.value;
|
|
467
|
+
diagnostics = resolved.diagnostics;
|
|
468
|
+
}
|
|
469
|
+
if (custom === undefined || custom === null) {
|
|
470
|
+
return {
|
|
471
|
+
summary,
|
|
472
|
+
stats,
|
|
473
|
+
error: noStructuredReason(stats, stderrTail, diagnostics),
|
|
474
|
+
failureCause: 'no-usable-output',
|
|
475
|
+
...(usage ? { usage } : {}),
|
|
476
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
477
|
+
...infraSetupFields,
|
|
478
|
+
};
|
|
479
|
+
}
|
|
480
|
+
// Stamp the run's actual environment authoritatively onto the structured result when
|
|
481
|
+
// infra was managed (the tester): which env the suite ran in is decided by the job's
|
|
482
|
+
// infra spec, NOT the model, so the backend can echo it back to the UI deterministically
|
|
483
|
+
// even when the model omits it from its JSON (or a structured repair drops it). A
|
|
484
|
+
// frontend run tests the app against its live ephemeral backend(s), so it reports
|
|
485
|
+
// `ephemeral` (the TestReport env vocabulary has no separate frontend value).
|
|
486
|
+
const reportedEnvironment = infra
|
|
487
|
+
? infra.kind === 'frontend'
|
|
488
|
+
? 'ephemeral'
|
|
489
|
+
: infra.environment
|
|
490
|
+
: undefined;
|
|
491
|
+
if (reportedEnvironment && typeof custom === 'object') {
|
|
492
|
+
;
|
|
493
|
+
custom.environment = reportedEnvironment;
|
|
494
|
+
}
|
|
495
|
+
logger.info('agent(explore): done (structured)', { ...stats });
|
|
496
|
+
return {
|
|
497
|
+
summary,
|
|
498
|
+
custom,
|
|
499
|
+
stats,
|
|
500
|
+
...(usage ? { usage } : {}),
|
|
501
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
502
|
+
...infraSetupFields,
|
|
503
|
+
};
|
|
504
|
+
}
|
|
505
|
+
/**
|
|
506
|
+
* Read-only MULTI-REPO exploration (service-connections phase 3, read-only): clone the primary
|
|
507
|
+
* repo PLUS every connected peer repo as SIBLING checkouts under one workspace root, run the
|
|
508
|
+
* agent ONCE with its cwd at the root (so it can read across every repo the bug touches), and
|
|
509
|
+
* return its prose/structured result — making NO edits, NO commits and opening NO PR. The
|
|
510
|
+
* counterpart of {@link runMultiRepoCoding} for the `bug-investigator`, but strictly read-only:
|
|
511
|
+
* peers carry no `newBranch`/`pr`, nothing is pushed, and the peers exist only to be read. The
|
|
512
|
+
* multi-repo layout is explained to the agent by the backend-composed system-prompt section
|
|
513
|
+
* (which repo/subdir each service lives in) + the harness's own AGENTS.md multi-repo note.
|
|
514
|
+
*/
|
|
515
|
+
async function runMultiRepoExplore(job, opts) {
|
|
516
|
+
const logger = (opts.log ?? log).child({ kind: 'multi-repo-explore', jobId: job.jobId });
|
|
517
|
+
const peers = job.peerRepos ?? [];
|
|
518
|
+
// Unique sibling directory per repo (owner-prefixed on a name collision), so two repos
|
|
519
|
+
// named the same never clobber each other — shared claim scheme with the coding fan-out.
|
|
520
|
+
const claimDir = makeDirClaimer();
|
|
521
|
+
const legs = [
|
|
522
|
+
{ repo: job.repo, cloneBranch: job.branch, ghToken: job.ghToken },
|
|
523
|
+
...peers.map((peer) => ({
|
|
524
|
+
repo: peer.repo,
|
|
525
|
+
cloneBranch: peer.repo.baseBranch,
|
|
526
|
+
ghToken: peer.ghToken ?? job.ghToken,
|
|
527
|
+
})),
|
|
528
|
+
].map((leg) => ({ ...leg, dirName: claimDir(leg.repo) }));
|
|
529
|
+
return withWorkspace('explore-multi', async (root) => {
|
|
530
|
+
// Clone phase: every repo (read-only) into its sibling dir under the workspace root. No
|
|
531
|
+
// work branch, no resume — the investigator only reads — so the legs are independent and
|
|
532
|
+
// clone in parallel (wall-clock is the slowest single clone, not the sum).
|
|
533
|
+
opts.onPhase?.('clone');
|
|
534
|
+
await Promise.all(legs.map(async (leg) => {
|
|
535
|
+
const dir = join(root, leg.dirName);
|
|
536
|
+
await mkdir(dir, { recursive: true });
|
|
537
|
+
logger.info('multi-repo-explore: cloning', {
|
|
538
|
+
repo: leg.dirName,
|
|
539
|
+
cloneBranch: leg.cloneBranch,
|
|
540
|
+
});
|
|
541
|
+
await cloneRepo({
|
|
542
|
+
repo: { ...leg.repo, baseBranch: leg.cloneBranch },
|
|
543
|
+
ghToken: leg.ghToken,
|
|
544
|
+
dir,
|
|
545
|
+
signal: opts.signal,
|
|
546
|
+
});
|
|
547
|
+
}));
|
|
548
|
+
opts.onPhase?.('agent');
|
|
549
|
+
logger.info('multi-repo-explore: running agent', { repos: legs.map((l) => l.dirName) });
|
|
550
|
+
const run = await runAgentInWorkspace({
|
|
551
|
+
dir: root,
|
|
552
|
+
systemPrompt: job.systemPrompt,
|
|
553
|
+
userPrompt: job.userPrompt,
|
|
554
|
+
model: job.model,
|
|
555
|
+
harness: job.harness,
|
|
556
|
+
subscriptionToken: job.subscriptionToken,
|
|
557
|
+
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
558
|
+
ambientAuth: job.ambientAuth,
|
|
559
|
+
proxyBaseUrl: job.proxyBaseUrl,
|
|
560
|
+
sessionToken: job.sessionToken,
|
|
561
|
+
// Read-only: no edits expected, so the no-progress guard's no-edit bound must not fire.
|
|
562
|
+
expectsEdits: false,
|
|
563
|
+
webToolsGuidance: job.webToolsGuidance,
|
|
564
|
+
webSearchProxy: job.webSearch,
|
|
565
|
+
...(job.contextFiles ? { contextFiles: job.contextFiles } : {}),
|
|
566
|
+
guardLimits: job.guardLimits,
|
|
567
|
+
multiRepo: true,
|
|
568
|
+
}, opts);
|
|
569
|
+
return finalizeExploreResult(job, {
|
|
570
|
+
summary: run.summary,
|
|
571
|
+
stats: run.stats,
|
|
572
|
+
stderrTail: run.stderrTail,
|
|
573
|
+
usage: run.usage,
|
|
574
|
+
callMetrics: run.callMetrics,
|
|
575
|
+
runDiag: run.diagnostics,
|
|
576
|
+
}, { infraSetupFields: {}, logger, signal: opts.signal });
|
|
482
577
|
});
|
|
483
578
|
}
|
|
484
579
|
/**
|
|
@@ -498,6 +593,12 @@ async function runCodingMode(job, opts) {
|
|
|
498
593
|
// commit + push (no PR). Keyed off job DATA (`mergeBase`), not the agent kind.
|
|
499
594
|
if (job.mergeBase)
|
|
500
595
|
return runConflictResolution(job, opts);
|
|
596
|
+
// Multi-repo coding (service-connections phase 3): clone every connected peer repo as a
|
|
597
|
+
// sibling, run the agent once across all of them, and open one PR per changed repo. Keyed
|
|
598
|
+
// off job DATA (`peerRepos`), not the agent kind — the implementer sets it when the task
|
|
599
|
+
// has involved services in distinct repos.
|
|
600
|
+
if (job.peerRepos?.length)
|
|
601
|
+
return runMultiRepoCoding(job, opts);
|
|
501
602
|
const pushBranch = job.pushBranch ?? job.newBranch ?? job.branch;
|
|
502
603
|
const { summary, stats, stderrTail, pushed, usage, callMetrics } = await runCodingAgent({
|
|
503
604
|
kind: 'agent',
|
package/dist/coding-agent.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { mkdir } from 'node:fs/promises';
|
|
2
2
|
import { join } from 'node:path';
|
|
3
|
-
import { branchAheadOfBase, branchHasCommitsSince, cloneExistingBranch, cloneRepo, commitTrackedEdits, createBranch, excludeFromGit, headCommit, listUntrackedFiles, prepareExistingCheckout, pushBranch, refreshFromBaseIfClean, remoteBranchExists, } from './git.js';
|
|
3
|
+
import { branchAheadOfBase, branchHasCommitsSince, cloneExistingBranch, cloneRepo, commitTrackedEdits, createBranch, excludeFromGit, headCommit, listUntrackedFiles, openPullRequest, prepareExistingCheckout, pushBranch, refreshFromBaseIfClean, remoteBranchExists, } from './git.js';
|
|
4
4
|
import { FOLLOW_UPS_FILENAME, FollowUpTailer } from './follow-ups.js';
|
|
5
|
-
import { acquireRepoCheckout, agentNeverActed, agentOutputTail, runAgentInWorkspace, } from './pi-workspace.js';
|
|
5
|
+
import { acquireRepoCheckout, agentNeverActed, agentOutputTail, runAgentInWorkspace, withWorkspace, } from './pi-workspace.js';
|
|
6
6
|
import { log } from './logger.js';
|
|
7
7
|
/**
|
|
8
8
|
* How often the harness checkpoints the agent's work mid-run by pushing the branch.
|
|
@@ -292,6 +292,246 @@ export async function runCodingAgent(spec, opts = {}) {
|
|
|
292
292
|
return outcome;
|
|
293
293
|
});
|
|
294
294
|
}
|
|
295
|
+
/** Sanitise an owner/name into a safe single path segment for a sibling checkout directory. */
|
|
296
|
+
export function safeDirSegment(value) {
|
|
297
|
+
return value.replace(/[^A-Za-z0-9._-]/g, '-') || '_';
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* A sibling-directory allocator for a multi-repo run: returns the checkout directory name for a
|
|
301
|
+
* repo under the workspace root. Deterministic (`owner__name`) and collision-free by construction
|
|
302
|
+
* — the checkout set is deduped by `owner/name` upstream and GitHub owners contain no `_`, so the
|
|
303
|
+
* `owner__name` join is unique per repo without a stateful collision dance. Kept as a factory so
|
|
304
|
+
* the coding + read-only explore fan-outs share ONE scheme, and it MUST stay byte-identical to the
|
|
305
|
+
* backend's `siblingCheckoutDir` / `renderMultiRepoWorkspaceSection` in `@cat-factory/server`
|
|
306
|
+
* (jobBody.ts), which names this exact directory in the agent's prompt — the two are computed
|
|
307
|
+
* independently, so a divergent rule would point the agent at a directory that does not exist.
|
|
308
|
+
*/
|
|
309
|
+
export function makeDirClaimer() {
|
|
310
|
+
return (repo) => `${safeDirSegment(repo.owner)}__${safeDirSegment(repo.name)}`;
|
|
311
|
+
}
|
|
312
|
+
/**
|
|
313
|
+
* Multi-repo coding (service-connections phase 3): clone the primary repo AND every connected
|
|
314
|
+
* peer repo as SIBLING checkouts under one workspace root, run the agent ONCE with its cwd at
|
|
315
|
+
* that root (so it makes the cross-service change coherently across all of them), then commit +
|
|
316
|
+
* push each repo that actually changed and open one PR per dirty repo. The task's own-service PR
|
|
317
|
+
* is reported as `prUrl`/`branch`; the peer PRs as `peerPullRequests`.
|
|
318
|
+
*
|
|
319
|
+
* Deliberately simpler than the single-repo {@link runCodingAgent} for the first cut: NO mid-run
|
|
320
|
+
* checkpoint pushes (an evicted multi-repo run re-clones on retry — the deterministic work branch
|
|
321
|
+
* still lets it resume any commits it managed to push at the end), NO warm-pool persistent
|
|
322
|
+
* checkout (always ephemeral), and NO follow-up sentinel streaming. It reuses the SAME dir-scoped
|
|
323
|
+
* git helpers, so the per-repo clone/commit/push/PR mechanics match the single-repo path exactly.
|
|
324
|
+
*/
|
|
325
|
+
export async function runMultiRepoCoding(job, opts = {}) {
|
|
326
|
+
const { signal } = opts;
|
|
327
|
+
const logger = (opts.log ?? log).child({ kind: 'multi-repo', jobId: job.jobId });
|
|
328
|
+
const peers = job.peerRepos ?? [];
|
|
329
|
+
const primaryWorkBranch = job.pushBranch ?? job.newBranch ?? job.branch;
|
|
330
|
+
// Assign the sibling directory per repo via the shared deterministic allocator (`owner__name`,
|
|
331
|
+
// matching the backend prompt's `siblingCheckoutDir`), shared with the read-only explore fan-out.
|
|
332
|
+
const claimDir = makeDirClaimer();
|
|
333
|
+
const legs = [
|
|
334
|
+
{
|
|
335
|
+
repo: job.repo,
|
|
336
|
+
dirName: claimDir(job.repo),
|
|
337
|
+
dir: '',
|
|
338
|
+
cloneBranch: job.branch,
|
|
339
|
+
workBranch: primaryWorkBranch,
|
|
340
|
+
ghToken: job.ghToken,
|
|
341
|
+
...(job.pr ? { pr: job.pr } : {}),
|
|
342
|
+
primary: true,
|
|
343
|
+
baseSha: '',
|
|
344
|
+
resumed: false,
|
|
345
|
+
},
|
|
346
|
+
...peers.map((peer) => ({
|
|
347
|
+
repo: peer.repo,
|
|
348
|
+
dirName: claimDir(peer.repo),
|
|
349
|
+
dir: '',
|
|
350
|
+
cloneBranch: peer.repo.baseBranch,
|
|
351
|
+
// Coding peers always carry `newBranch` (the backend sets the shared work branch);
|
|
352
|
+
// fall back to the primary's for the type (read-only peers never reach this path).
|
|
353
|
+
workBranch: peer.newBranch ?? primaryWorkBranch,
|
|
354
|
+
ghToken: peer.ghToken ?? job.ghToken,
|
|
355
|
+
...(peer.pr ? { pr: peer.pr } : {}),
|
|
356
|
+
...(peer.frameId ? { frameId: peer.frameId } : {}),
|
|
357
|
+
primary: false,
|
|
358
|
+
baseSha: '',
|
|
359
|
+
resumed: false,
|
|
360
|
+
})),
|
|
361
|
+
];
|
|
362
|
+
return withWorkspace('multi', async (root) => {
|
|
363
|
+
// Clone phase: every repo into its sibling dir under the workspace root. Resume an
|
|
364
|
+
// existing remote work branch (an evicted retry) rather than branching off base again.
|
|
365
|
+
opts.onPhase?.('clone');
|
|
366
|
+
for (const leg of legs) {
|
|
367
|
+
const dir = join(root, leg.dirName);
|
|
368
|
+
await mkdir(dir, { recursive: true });
|
|
369
|
+
leg.resumed = await remoteBranchExists(leg.repo.cloneUrl, leg.workBranch, leg.ghToken, signal);
|
|
370
|
+
if (leg.resumed) {
|
|
371
|
+
logger.info('multi-repo: resuming existing branch', {
|
|
372
|
+
repo: leg.dirName,
|
|
373
|
+
branch: leg.workBranch,
|
|
374
|
+
});
|
|
375
|
+
await cloneExistingBranch({
|
|
376
|
+
cloneUrl: leg.repo.cloneUrl,
|
|
377
|
+
branch: leg.workBranch,
|
|
378
|
+
ghToken: leg.ghToken,
|
|
379
|
+
dir,
|
|
380
|
+
signal,
|
|
381
|
+
});
|
|
382
|
+
}
|
|
383
|
+
else {
|
|
384
|
+
logger.info('multi-repo: cloning', { repo: leg.dirName, cloneBranch: leg.cloneBranch });
|
|
385
|
+
await cloneRepo({
|
|
386
|
+
repo: { ...leg.repo, baseBranch: leg.cloneBranch },
|
|
387
|
+
ghToken: leg.ghToken,
|
|
388
|
+
dir,
|
|
389
|
+
signal,
|
|
390
|
+
});
|
|
391
|
+
await createBranch(dir, leg.workBranch, signal);
|
|
392
|
+
}
|
|
393
|
+
leg.dir = dir;
|
|
394
|
+
// The branch tip before the agent runs. Captured BEFORE the resume base refresh below so
|
|
395
|
+
// that refresh's merge commit counts as advancement and is pushed (as in the single-repo
|
|
396
|
+
// path). A fresh leg produced work iff its branch advances past this; a resumed leg already
|
|
397
|
+
// carries prior work.
|
|
398
|
+
leg.baseSha = await headCommit(dir, signal);
|
|
399
|
+
// A resumed branch was cut from an OLDER base; merge the latest base in when the two merge
|
|
400
|
+
// cleanly so the agent works against current base and the peer/own PRs stay current. On a
|
|
401
|
+
// conflict this is a best-effort no-op (the merge gate handles a conflicting PR downstream),
|
|
402
|
+
// mirroring the single-repo {@link runCodingAgent} resume refresh.
|
|
403
|
+
if (leg.resumed) {
|
|
404
|
+
const refreshed = await refreshFromBaseIfClean(dir, leg.cloneBranch, leg.ghToken, signal).catch(() => false);
|
|
405
|
+
if (!refreshed) {
|
|
406
|
+
logger.info('multi-repo: resume base refresh skipped (conflict or error)', {
|
|
407
|
+
repo: leg.dirName,
|
|
408
|
+
base: leg.cloneBranch,
|
|
409
|
+
});
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
// Run the agent ONCE with its cwd at the workspace root, so it sees every sibling checkout
|
|
414
|
+
// and can change them coherently. No monorepo/service-directory scoping — the multi-repo
|
|
415
|
+
// note + the backend system-prompt section explain the layout.
|
|
416
|
+
opts.onPhase?.('agent');
|
|
417
|
+
logger.info('multi-repo: running agent', { repos: legs.map((l) => l.dirName) });
|
|
418
|
+
const { summary, stats, stderrTail, usage, callMetrics } = await runAgentInWorkspace({
|
|
419
|
+
dir: root,
|
|
420
|
+
systemPrompt: job.systemPrompt,
|
|
421
|
+
userPrompt: job.userPrompt,
|
|
422
|
+
model: job.model,
|
|
423
|
+
harness: job.harness,
|
|
424
|
+
subscriptionToken: job.subscriptionToken,
|
|
425
|
+
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
426
|
+
ambientAuth: job.ambientAuth,
|
|
427
|
+
proxyBaseUrl: job.proxyBaseUrl,
|
|
428
|
+
sessionToken: job.sessionToken,
|
|
429
|
+
webToolsGuidance: job.webToolsGuidance,
|
|
430
|
+
webSearchProxy: job.webSearch,
|
|
431
|
+
guardLimits: job.guardLimits,
|
|
432
|
+
...(job.contextFiles ? { contextFiles: job.contextFiles } : {}),
|
|
433
|
+
multiRepo: true,
|
|
434
|
+
}, opts);
|
|
435
|
+
// Push phase: commit forgotten tracked edits, then push + open a PR for each repo the run
|
|
436
|
+
// actually changed. A repo the agent left untouched is skipped (no branch, no PR).
|
|
437
|
+
opts.onPhase?.('push');
|
|
438
|
+
let primaryPushed = false;
|
|
439
|
+
let primaryPrUrl;
|
|
440
|
+
const peerPullRequests = [];
|
|
441
|
+
for (const leg of legs) {
|
|
442
|
+
await commitTrackedEdits(leg.dir, job.commitMessage ?? leg.pr?.title ?? 'Agent changes', signal);
|
|
443
|
+
const advanced = await branchHasCommitsSince(leg.dir, leg.baseSha, signal);
|
|
444
|
+
let hasWork = advanced || leg.resumed;
|
|
445
|
+
if (leg.resumed && !advanced) {
|
|
446
|
+
const ahead = await branchAheadOfBase(leg.dir, leg.repo.baseBranch, leg.ghToken, signal);
|
|
447
|
+
if (ahead === false)
|
|
448
|
+
hasWork = false;
|
|
449
|
+
}
|
|
450
|
+
const leftover = await listUntrackedFiles(leg.dir, signal);
|
|
451
|
+
if (leftover.length > 0) {
|
|
452
|
+
logger.warn('multi-repo: uncommitted new files left behind (not pushed)', {
|
|
453
|
+
repo: leg.dirName,
|
|
454
|
+
count: leftover.length,
|
|
455
|
+
files: leftover.slice(0, 20),
|
|
456
|
+
});
|
|
457
|
+
}
|
|
458
|
+
if (!hasWork) {
|
|
459
|
+
logger.info('multi-repo: no changes for repo', { repo: leg.dirName });
|
|
460
|
+
continue;
|
|
461
|
+
}
|
|
462
|
+
await pushBranch(leg.dir, leg.workBranch, leg.ghToken, signal);
|
|
463
|
+
let prUrl = null;
|
|
464
|
+
if (leg.pr) {
|
|
465
|
+
prUrl = await openPullRequest({
|
|
466
|
+
owner: leg.repo.owner,
|
|
467
|
+
name: leg.repo.name,
|
|
468
|
+
ghToken: leg.ghToken,
|
|
469
|
+
head: leg.workBranch,
|
|
470
|
+
base: leg.repo.baseBranch,
|
|
471
|
+
pr: leg.pr,
|
|
472
|
+
apiBase: job.githubApiBase,
|
|
473
|
+
cloneUrl: leg.repo.cloneUrl,
|
|
474
|
+
...(leg.repo.provider ? { provider: leg.repo.provider } : {}),
|
|
475
|
+
signal,
|
|
476
|
+
});
|
|
477
|
+
}
|
|
478
|
+
if (leg.primary) {
|
|
479
|
+
primaryPushed = true;
|
|
480
|
+
if (prUrl)
|
|
481
|
+
primaryPrUrl = prUrl;
|
|
482
|
+
}
|
|
483
|
+
else if (prUrl) {
|
|
484
|
+
peerPullRequests.push({
|
|
485
|
+
repo: `${leg.repo.owner}/${leg.repo.name}`,
|
|
486
|
+
...(leg.frameId ? { frameId: leg.frameId } : {}),
|
|
487
|
+
prUrl,
|
|
488
|
+
branch: leg.workBranch,
|
|
489
|
+
});
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
const anyWork = primaryPushed || peerPullRequests.length > 0;
|
|
493
|
+
if (!anyWork) {
|
|
494
|
+
// Nothing changed in ANY repo. For the implementer this is a failure (as in the
|
|
495
|
+
// single-repo path); a caller that tolerates a no-op (never the implementer today)
|
|
496
|
+
// gets a clean non-event.
|
|
497
|
+
if (job.noChangesIsError === false) {
|
|
498
|
+
return {
|
|
499
|
+
pushed: false,
|
|
500
|
+
branch: primaryWorkBranch,
|
|
501
|
+
summary,
|
|
502
|
+
stats,
|
|
503
|
+
...(usage ? { usage } : {}),
|
|
504
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
505
|
+
};
|
|
506
|
+
}
|
|
507
|
+
return {
|
|
508
|
+
pushed: false,
|
|
509
|
+
branch: primaryWorkBranch,
|
|
510
|
+
summary,
|
|
511
|
+
stats,
|
|
512
|
+
error: noChangesReason('the agent produced no file changes in any repository', stats, stderrTail),
|
|
513
|
+
failureCause: 'no-changes',
|
|
514
|
+
...(usage ? { usage } : {}),
|
|
515
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
516
|
+
};
|
|
517
|
+
}
|
|
518
|
+
logger.info('multi-repo: complete', {
|
|
519
|
+
primaryPushed,
|
|
520
|
+
primaryPrUrl: primaryPrUrl ?? null,
|
|
521
|
+
peers: peerPullRequests.length,
|
|
522
|
+
});
|
|
523
|
+
return {
|
|
524
|
+
pushed: primaryPushed,
|
|
525
|
+
...(primaryPrUrl ? { prUrl: primaryPrUrl } : {}),
|
|
526
|
+
branch: primaryWorkBranch,
|
|
527
|
+
...(peerPullRequests.length ? { peerPullRequests } : {}),
|
|
528
|
+
summary,
|
|
529
|
+
stats,
|
|
530
|
+
...(usage ? { usage } : {}),
|
|
531
|
+
...(callMetrics ? { callMetrics } : {}),
|
|
532
|
+
};
|
|
533
|
+
});
|
|
534
|
+
}
|
|
295
535
|
/**
|
|
296
536
|
* The "no changes" reason both coding agents report: a caller-supplied lead phrase
|
|
297
537
|
* plus the shared "never acted" cause and a credential-scrubbed tail of Pi's stderr.
|