@cat-factory/executor-harness 1.34.4 → 1.34.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent.js CHANGED
@@ -7,7 +7,7 @@ import { standUpFrontend, tearDownFrontend } from './frontend-infra.js';
7
7
  import { configurePackageRegistries } from './package-registries.js';
8
8
  import { captureRedactedOutput, redactSecrets } from './redact.js';
9
9
  import { cloneRepo, commitAll, conflictDiff, hasAgentChanges, headCommit, mergeBranch, openPullRequest, prepareExistingCheckout, pushBranch, reinitAndPush, unmergedPaths, } from './git.js';
10
- import { noChangesReason, runCodingAgent } from './coding-agent.js';
10
+ import { makeDirClaimer, noChangesReason, runCodingAgent, runMultiRepoCoding, } from './coding-agent.js';
11
11
  import { acquireRepoCheckout, agentNeverActed, agentOutputTail, NEVER_ACTED_CAUSE, runAgentInWorkspace, unusableFinalAnswerCause, withWorkspace, } from './pi-workspace.js';
12
12
  import { diagnosticsSuffix, resolveStructuredOutput, } from './structured-output.js';
13
13
  import { log } from './logger.js';
@@ -298,6 +298,15 @@ async function runPreviewMode(job, opts) {
298
298
  */
299
299
  async function runExploreMode(job, opts) {
300
300
  const logger = opts.log ?? log;
301
+ // Multi-repo read-only exploration (service-connections phase 3): when the job carries peer
302
+ // repos, clone them all as siblings and run at the workspace root. Keyed off job DATA
303
+ // (`peerRepos`), not the agent kind — the backend sets it for the bug-investigator when the
304
+ // task has involved services in distinct repos. `runMultiRepoExplore` uses its own ephemeral
305
+ // `withWorkspace`, so a `persistentCheckout` flag (which a warm-pool dispatch injects on EVERY
306
+ // job) is harmlessly ignored — it must NOT suppress the fan-out, or a pooled bug-investigator
307
+ // would silently drop its peer repos and only ever see the primary one.
308
+ if (job.peerRepos?.length)
309
+ return runMultiRepoExplore(job, opts);
301
310
  return acquireRepoCheckout({ persistent: job.persistentCheckout === true, prefix: 'agent-explore', repo: job.repo }, async (dir) => {
302
311
  opts.onPhase?.('clone');
303
312
  // Monorepo: run with cwd set to the service subtree (created if missing), mirroring the
@@ -367,118 +376,204 @@ async function runExploreMode(job, opts) {
367
376
  contextFiles: job.contextFiles,
368
377
  guardLimits: job.guardLimits,
369
378
  }, opts);
370
- if (!summary.trim()) {
371
- return {
372
- summary,
373
- stats,
374
- error: noOutputReason(stats, stderrTail),
375
- failureCause: 'no-usable-output',
376
- ...(usage ? { usage } : {}),
377
- ...(callMetrics ? { callMetrics } : {}),
378
- ...infraSetupFields,
379
- };
380
- }
381
- // Opt-in (document producers): a final answer cut off at the output ceiling or empty
382
- // must FAIL LOUDLY here, BEFORE the structured repair below could launder a truncated
383
- // reply into a half-baked doc the backend then shards/commits + hands onward. Mirrors the
384
- // bespoke `/spec` handler's `unusableFinalAnswerCause` gate (which drove the old loop).
385
- if (job.output?.kind === 'structured' && job.output.failOnUnusableFinal) {
386
- const unusable = unusableFinalAnswerCause(runDiag);
387
- if (unusable) {
388
- return {
389
- summary,
390
- stats,
391
- error: `the agent did not return a usable result: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
392
- failureCause: 'no-usable-output',
393
- ...(usage ? { usage } : {}),
394
- ...(callMetrics ? { callMetrics } : {}),
395
- ...infraSetupFields,
396
- };
397
- }
398
- }
399
- // Prose: the summary IS the deliverable.
400
- if (job.output?.kind !== 'structured') {
401
- logger.info('agent(explore): done (prose)', { ...stats });
402
- return {
403
- summary,
404
- stats,
405
- ...(usage ? { usage } : {}),
406
- ...(callMetrics ? { callMetrics } : {}),
407
- ...infraSetupFields,
408
- };
409
- }
410
- // Structured: parse the agent's JSON. With repair enabled (default) a malformed
411
- // reply gets ONE structured repair call before giving up; with `repair:false` we
412
- // parse directly (no repair channel). The backend coerces/validates + renders from
413
- // the returned object in a post-op.
414
- let custom = null;
415
- let diagnostics;
416
- if (job.output.repair === false) {
417
- try {
418
- custom = extractJsonObject(summary);
419
- }
420
- catch {
421
- custom = null;
422
- }
423
- }
424
- else {
425
- const resolved = await resolveStructuredOutput({
426
- label: 'agent',
427
- shapeHint: job.output.shapeHint ?? 'Expected a single JSON object.',
428
- parse: (text) => extractJsonObject(text),
429
- }, summary, {
430
- harness: job.harness,
431
- subscriptionToken: job.subscriptionToken,
432
- subscriptionBaseUrl: job.subscriptionBaseUrl,
433
- proxyBaseUrl: job.proxyBaseUrl,
434
- sessionToken: job.sessionToken,
435
- model: job.model,
436
- jobId: job.jobId,
437
- signal: opts.signal,
438
- });
439
- custom = resolved.value;
440
- diagnostics = resolved.diagnostics;
441
- }
442
- if (custom === undefined || custom === null) {
443
- return {
444
- summary,
445
- stats,
446
- error: noStructuredReason(stats, stderrTail, diagnostics),
447
- failureCause: 'no-usable-output',
448
- ...(usage ? { usage } : {}),
449
- ...(callMetrics ? { callMetrics } : {}),
450
- ...infraSetupFields,
451
- };
452
- }
453
- // Stamp the run's actual environment authoritatively onto the structured result when
454
- // infra was managed (the tester): which env the suite ran in is decided by the job's
455
- // infra spec, NOT the model, so the backend can echo it back to the UI deterministically
456
- // even when the model omits it from its JSON (or a structured repair drops it). A
457
- // frontend run tests the app against its live ephemeral backend(s), so it reports
458
- // `ephemeral` (the TestReport env vocabulary has no separate frontend value).
459
- const reportedEnvironment = infra
460
- ? infra.kind === 'frontend'
461
- ? 'ephemeral'
462
- : infra.environment
463
- : undefined;
464
- if (reportedEnvironment && typeof custom === 'object') {
465
- ;
466
- custom.environment = reportedEnvironment;
467
- }
468
- logger.info('agent(explore): done (structured)', { ...stats });
379
+ return await finalizeExploreResult(job, { summary, stats, stderrTail, usage, callMetrics, runDiag }, { infra, infraSetupFields, logger, signal: opts.signal });
380
+ }
381
+ finally {
382
+ if (managed)
383
+ await managed.cleanup();
384
+ }
385
+ });
386
+ }
387
+ /**
388
+ * Turn an explore agent's raw run into an {@link AgentResult}: guard an empty/truncated reply,
389
+ * then either return the prose summary or parse (+ optionally repair) the structured JSON as
390
+ * `custom` the backend renders any artifact files from it in a post-op. Extracted so the
391
+ * single-repo {@link runExploreMode} and the read-only {@link runMultiRepoExplore} share ONE
392
+ * result contract (the multi-repo path passes no infra, so the tester-only env stamping no-ops).
393
+ */
394
+ async function finalizeExploreResult(job, run, ctx) {
395
+ const { summary, stats, stderrTail, usage, callMetrics, runDiag } = run;
396
+ const { infra, infraSetupFields, logger, signal } = ctx;
397
+ if (!summary.trim()) {
398
+ return {
399
+ summary,
400
+ stats,
401
+ error: noOutputReason(stats, stderrTail),
402
+ failureCause: 'no-usable-output',
403
+ ...(usage ? { usage } : {}),
404
+ ...(callMetrics ? { callMetrics } : {}),
405
+ ...infraSetupFields,
406
+ };
407
+ }
408
+ // Opt-in (document producers): a final answer cut off at the output ceiling — or empty —
409
+ // must FAIL LOUDLY here, BEFORE the structured repair below could launder a truncated
410
+ // reply into a half-baked doc the backend then shards/commits + hands onward. Mirrors the
411
+ // bespoke `/spec` handler's `unusableFinalAnswerCause` gate (which drove the old loop).
412
+ if (job.output?.kind === 'structured' && job.output.failOnUnusableFinal) {
413
+ const unusable = unusableFinalAnswerCause(runDiag);
414
+ if (unusable) {
469
415
  return {
470
416
  summary,
471
- custom,
472
417
  stats,
418
+ error: `the agent did not return a usable result: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
419
+ failureCause: 'no-usable-output',
473
420
  ...(usage ? { usage } : {}),
474
421
  ...(callMetrics ? { callMetrics } : {}),
475
422
  ...infraSetupFields,
476
423
  };
477
424
  }
478
- finally {
479
- if (managed)
480
- await managed.cleanup();
425
+ }
426
+ // Prose: the summary IS the deliverable.
427
+ if (job.output?.kind !== 'structured') {
428
+ logger.info('agent(explore): done (prose)', { ...stats });
429
+ return {
430
+ summary,
431
+ stats,
432
+ ...(usage ? { usage } : {}),
433
+ ...(callMetrics ? { callMetrics } : {}),
434
+ ...infraSetupFields,
435
+ };
436
+ }
437
+ // Structured: parse the agent's JSON. With repair enabled (default) a malformed
438
+ // reply gets ONE structured repair call before giving up; with `repair:false` we
439
+ // parse directly (no repair channel). The backend coerces/validates + renders from
440
+ // the returned object in a post-op.
441
+ let custom = null;
442
+ let diagnostics;
443
+ if (job.output.repair === false) {
444
+ try {
445
+ custom = extractJsonObject(summary);
481
446
  }
447
+ catch {
448
+ custom = null;
449
+ }
450
+ }
451
+ else {
452
+ const resolved = await resolveStructuredOutput({
453
+ label: 'agent',
454
+ shapeHint: job.output.shapeHint ?? 'Expected a single JSON object.',
455
+ parse: (text) => extractJsonObject(text),
456
+ }, summary, {
457
+ harness: job.harness,
458
+ subscriptionToken: job.subscriptionToken,
459
+ subscriptionBaseUrl: job.subscriptionBaseUrl,
460
+ proxyBaseUrl: job.proxyBaseUrl,
461
+ sessionToken: job.sessionToken,
462
+ model: job.model,
463
+ jobId: job.jobId,
464
+ signal,
465
+ });
466
+ custom = resolved.value;
467
+ diagnostics = resolved.diagnostics;
468
+ }
469
+ if (custom === undefined || custom === null) {
470
+ return {
471
+ summary,
472
+ stats,
473
+ error: noStructuredReason(stats, stderrTail, diagnostics),
474
+ failureCause: 'no-usable-output',
475
+ ...(usage ? { usage } : {}),
476
+ ...(callMetrics ? { callMetrics } : {}),
477
+ ...infraSetupFields,
478
+ };
479
+ }
480
+ // Stamp the run's actual environment authoritatively onto the structured result when
481
+ // infra was managed (the tester): which env the suite ran in is decided by the job's
482
+ // infra spec, NOT the model, so the backend can echo it back to the UI deterministically
483
+ // even when the model omits it from its JSON (or a structured repair drops it). A
484
+ // frontend run tests the app against its live ephemeral backend(s), so it reports
485
+ // `ephemeral` (the TestReport env vocabulary has no separate frontend value).
486
+ const reportedEnvironment = infra
487
+ ? infra.kind === 'frontend'
488
+ ? 'ephemeral'
489
+ : infra.environment
490
+ : undefined;
491
+ if (reportedEnvironment && typeof custom === 'object') {
492
+ ;
493
+ custom.environment = reportedEnvironment;
494
+ }
495
+ logger.info('agent(explore): done (structured)', { ...stats });
496
+ return {
497
+ summary,
498
+ custom,
499
+ stats,
500
+ ...(usage ? { usage } : {}),
501
+ ...(callMetrics ? { callMetrics } : {}),
502
+ ...infraSetupFields,
503
+ };
504
+ }
505
+ /**
506
+ * Read-only MULTI-REPO exploration (service-connections phase 3, read-only): clone the primary
507
+ * repo PLUS every connected peer repo as SIBLING checkouts under one workspace root, run the
508
+ * agent ONCE with its cwd at the root (so it can read across every repo the bug touches), and
509
+ * return its prose/structured result — making NO edits, NO commits and opening NO PR. The
510
+ * counterpart of {@link runMultiRepoCoding} for the `bug-investigator`, but strictly read-only:
511
+ * peers carry no `newBranch`/`pr`, nothing is pushed, and the peers exist only to be read. The
512
+ * multi-repo layout is explained to the agent by the backend-composed system-prompt section
513
+ * (which repo/subdir each service lives in) + the harness's own AGENTS.md multi-repo note.
514
+ */
515
+ async function runMultiRepoExplore(job, opts) {
516
+ const logger = (opts.log ?? log).child({ kind: 'multi-repo-explore', jobId: job.jobId });
517
+ const peers = job.peerRepos ?? [];
518
+ // Unique sibling directory per repo (owner-prefixed on a name collision), so two repos
519
+ // named the same never clobber each other — shared claim scheme with the coding fan-out.
520
+ const claimDir = makeDirClaimer();
521
+ const legs = [
522
+ { repo: job.repo, cloneBranch: job.branch, ghToken: job.ghToken },
523
+ ...peers.map((peer) => ({
524
+ repo: peer.repo,
525
+ cloneBranch: peer.repo.baseBranch,
526
+ ghToken: peer.ghToken ?? job.ghToken,
527
+ })),
528
+ ].map((leg) => ({ ...leg, dirName: claimDir(leg.repo) }));
529
+ return withWorkspace('explore-multi', async (root) => {
530
+ // Clone phase: every repo (read-only) into its sibling dir under the workspace root. No
531
+ // work branch, no resume — the investigator only reads — so the legs are independent and
532
+ // clone in parallel (wall-clock is the slowest single clone, not the sum).
533
+ opts.onPhase?.('clone');
534
+ await Promise.all(legs.map(async (leg) => {
535
+ const dir = join(root, leg.dirName);
536
+ await mkdir(dir, { recursive: true });
537
+ logger.info('multi-repo-explore: cloning', {
538
+ repo: leg.dirName,
539
+ cloneBranch: leg.cloneBranch,
540
+ });
541
+ await cloneRepo({
542
+ repo: { ...leg.repo, baseBranch: leg.cloneBranch },
543
+ ghToken: leg.ghToken,
544
+ dir,
545
+ signal: opts.signal,
546
+ });
547
+ }));
548
+ opts.onPhase?.('agent');
549
+ logger.info('multi-repo-explore: running agent', { repos: legs.map((l) => l.dirName) });
550
+ const run = await runAgentInWorkspace({
551
+ dir: root,
552
+ systemPrompt: job.systemPrompt,
553
+ userPrompt: job.userPrompt,
554
+ model: job.model,
555
+ harness: job.harness,
556
+ subscriptionToken: job.subscriptionToken,
557
+ subscriptionBaseUrl: job.subscriptionBaseUrl,
558
+ ambientAuth: job.ambientAuth,
559
+ proxyBaseUrl: job.proxyBaseUrl,
560
+ sessionToken: job.sessionToken,
561
+ // Read-only: no edits expected, so the no-progress guard's no-edit bound must not fire.
562
+ expectsEdits: false,
563
+ webToolsGuidance: job.webToolsGuidance,
564
+ webSearchProxy: job.webSearch,
565
+ ...(job.contextFiles ? { contextFiles: job.contextFiles } : {}),
566
+ guardLimits: job.guardLimits,
567
+ multiRepo: true,
568
+ }, opts);
569
+ return finalizeExploreResult(job, {
570
+ summary: run.summary,
571
+ stats: run.stats,
572
+ stderrTail: run.stderrTail,
573
+ usage: run.usage,
574
+ callMetrics: run.callMetrics,
575
+ runDiag: run.diagnostics,
576
+ }, { infraSetupFields: {}, logger, signal: opts.signal });
482
577
  });
483
578
  }
484
579
  /**
@@ -498,6 +593,12 @@ async function runCodingMode(job, opts) {
498
593
  // commit + push (no PR). Keyed off job DATA (`mergeBase`), not the agent kind.
499
594
  if (job.mergeBase)
500
595
  return runConflictResolution(job, opts);
596
+ // Multi-repo coding (service-connections phase 3): clone every connected peer repo as a
597
+ // sibling, run the agent once across all of them, and open one PR per changed repo. Keyed
598
+ // off job DATA (`peerRepos`), not the agent kind — the implementer sets it when the task
599
+ // has involved services in distinct repos.
600
+ if (job.peerRepos?.length)
601
+ return runMultiRepoCoding(job, opts);
501
602
  const pushBranch = job.pushBranch ?? job.newBranch ?? job.branch;
502
603
  const { summary, stats, stderrTail, pushed, usage, callMetrics } = await runCodingAgent({
503
604
  kind: 'agent',
@@ -1,8 +1,8 @@
1
1
  import { mkdir } from 'node:fs/promises';
2
2
  import { join } from 'node:path';
3
- import { branchAheadOfBase, branchHasCommitsSince, cloneExistingBranch, cloneRepo, commitTrackedEdits, createBranch, excludeFromGit, headCommit, listUntrackedFiles, prepareExistingCheckout, pushBranch, refreshFromBaseIfClean, remoteBranchExists, } from './git.js';
3
+ import { branchAheadOfBase, branchHasCommitsSince, cloneExistingBranch, cloneRepo, commitTrackedEdits, createBranch, excludeFromGit, headCommit, listUntrackedFiles, openPullRequest, prepareExistingCheckout, pushBranch, refreshFromBaseIfClean, remoteBranchExists, } from './git.js';
4
4
  import { FOLLOW_UPS_FILENAME, FollowUpTailer } from './follow-ups.js';
5
- import { acquireRepoCheckout, agentNeverActed, agentOutputTail, runAgentInWorkspace, } from './pi-workspace.js';
5
+ import { acquireRepoCheckout, agentNeverActed, agentOutputTail, runAgentInWorkspace, withWorkspace, } from './pi-workspace.js';
6
6
  import { log } from './logger.js';
7
7
  /**
8
8
  * How often the harness checkpoints the agent's work mid-run by pushing the branch.
@@ -292,6 +292,246 @@ export async function runCodingAgent(spec, opts = {}) {
292
292
  return outcome;
293
293
  });
294
294
  }
295
+ /** Sanitise an owner/name into a safe single path segment for a sibling checkout directory. */
296
+ export function safeDirSegment(value) {
297
+ return value.replace(/[^A-Za-z0-9._-]/g, '-') || '_';
298
+ }
299
+ /**
300
+ * A sibling-directory allocator for a multi-repo run: returns the checkout directory name for a
301
+ * repo under the workspace root. Deterministic (`owner__name`) and collision-free by construction
302
+ * — the checkout set is deduped by `owner/name` upstream and GitHub owners contain no `_`, so the
303
+ * `owner__name` join is unique per repo without a stateful collision dance. Kept as a factory so
304
+ * the coding + read-only explore fan-outs share ONE scheme, and it MUST stay byte-identical to the
305
+ * backend's `siblingCheckoutDir` / `renderMultiRepoWorkspaceSection` in `@cat-factory/server`
306
+ * (jobBody.ts), which names this exact directory in the agent's prompt — the two are computed
307
+ * independently, so a divergent rule would point the agent at a directory that does not exist.
308
+ */
309
+ export function makeDirClaimer() {
310
+ return (repo) => `${safeDirSegment(repo.owner)}__${safeDirSegment(repo.name)}`;
311
+ }
312
+ /**
313
+ * Multi-repo coding (service-connections phase 3): clone the primary repo AND every connected
314
+ * peer repo as SIBLING checkouts under one workspace root, run the agent ONCE with its cwd at
315
+ * that root (so it makes the cross-service change coherently across all of them), then commit +
316
+ * push each repo that actually changed and open one PR per dirty repo. The task's own-service PR
317
+ * is reported as `prUrl`/`branch`; the peer PRs as `peerPullRequests`.
318
+ *
319
+ * Deliberately simpler than the single-repo {@link runCodingAgent} for the first cut: NO mid-run
320
+ * checkpoint pushes (an evicted multi-repo run re-clones on retry — the deterministic work branch
321
+ * still lets it resume any commits it managed to push at the end), NO warm-pool persistent
322
+ * checkout (always ephemeral), and NO follow-up sentinel streaming. It reuses the SAME dir-scoped
323
+ * git helpers, so the per-repo clone/commit/push/PR mechanics match the single-repo path exactly.
324
+ */
325
+ export async function runMultiRepoCoding(job, opts = {}) {
326
+ const { signal } = opts;
327
+ const logger = (opts.log ?? log).child({ kind: 'multi-repo', jobId: job.jobId });
328
+ const peers = job.peerRepos ?? [];
329
+ const primaryWorkBranch = job.pushBranch ?? job.newBranch ?? job.branch;
330
+ // Assign the sibling directory per repo via the shared deterministic allocator (`owner__name`,
331
+ // matching the backend prompt's `siblingCheckoutDir`), shared with the read-only explore fan-out.
332
+ const claimDir = makeDirClaimer();
333
+ const legs = [
334
+ {
335
+ repo: job.repo,
336
+ dirName: claimDir(job.repo),
337
+ dir: '',
338
+ cloneBranch: job.branch,
339
+ workBranch: primaryWorkBranch,
340
+ ghToken: job.ghToken,
341
+ ...(job.pr ? { pr: job.pr } : {}),
342
+ primary: true,
343
+ baseSha: '',
344
+ resumed: false,
345
+ },
346
+ ...peers.map((peer) => ({
347
+ repo: peer.repo,
348
+ dirName: claimDir(peer.repo),
349
+ dir: '',
350
+ cloneBranch: peer.repo.baseBranch,
351
+ // Coding peers always carry `newBranch` (the backend sets the shared work branch);
352
+ // fall back to the primary's for the type (read-only peers never reach this path).
353
+ workBranch: peer.newBranch ?? primaryWorkBranch,
354
+ ghToken: peer.ghToken ?? job.ghToken,
355
+ ...(peer.pr ? { pr: peer.pr } : {}),
356
+ ...(peer.frameId ? { frameId: peer.frameId } : {}),
357
+ primary: false,
358
+ baseSha: '',
359
+ resumed: false,
360
+ })),
361
+ ];
362
+ return withWorkspace('multi', async (root) => {
363
+ // Clone phase: every repo into its sibling dir under the workspace root. Resume an
364
+ // existing remote work branch (an evicted retry) rather than branching off base again.
365
+ opts.onPhase?.('clone');
366
+ for (const leg of legs) {
367
+ const dir = join(root, leg.dirName);
368
+ await mkdir(dir, { recursive: true });
369
+ leg.resumed = await remoteBranchExists(leg.repo.cloneUrl, leg.workBranch, leg.ghToken, signal);
370
+ if (leg.resumed) {
371
+ logger.info('multi-repo: resuming existing branch', {
372
+ repo: leg.dirName,
373
+ branch: leg.workBranch,
374
+ });
375
+ await cloneExistingBranch({
376
+ cloneUrl: leg.repo.cloneUrl,
377
+ branch: leg.workBranch,
378
+ ghToken: leg.ghToken,
379
+ dir,
380
+ signal,
381
+ });
382
+ }
383
+ else {
384
+ logger.info('multi-repo: cloning', { repo: leg.dirName, cloneBranch: leg.cloneBranch });
385
+ await cloneRepo({
386
+ repo: { ...leg.repo, baseBranch: leg.cloneBranch },
387
+ ghToken: leg.ghToken,
388
+ dir,
389
+ signal,
390
+ });
391
+ await createBranch(dir, leg.workBranch, signal);
392
+ }
393
+ leg.dir = dir;
394
+ // The branch tip before the agent runs. Captured BEFORE the resume base refresh below so
395
+ // that refresh's merge commit counts as advancement and is pushed (as in the single-repo
396
+ // path). A fresh leg produced work iff its branch advances past this; a resumed leg already
397
+ // carries prior work.
398
+ leg.baseSha = await headCommit(dir, signal);
399
+ // A resumed branch was cut from an OLDER base; merge the latest base in when the two merge
400
+ // cleanly so the agent works against current base and the peer/own PRs stay current. On a
401
+ // conflict this is a best-effort no-op (the merge gate handles a conflicting PR downstream),
402
+ // mirroring the single-repo {@link runCodingAgent} resume refresh.
403
+ if (leg.resumed) {
404
+ const refreshed = await refreshFromBaseIfClean(dir, leg.cloneBranch, leg.ghToken, signal).catch(() => false);
405
+ if (!refreshed) {
406
+ logger.info('multi-repo: resume base refresh skipped (conflict or error)', {
407
+ repo: leg.dirName,
408
+ base: leg.cloneBranch,
409
+ });
410
+ }
411
+ }
412
+ }
413
+ // Run the agent ONCE with its cwd at the workspace root, so it sees every sibling checkout
414
+ // and can change them coherently. No monorepo/service-directory scoping — the multi-repo
415
+ // note + the backend system-prompt section explain the layout.
416
+ opts.onPhase?.('agent');
417
+ logger.info('multi-repo: running agent', { repos: legs.map((l) => l.dirName) });
418
+ const { summary, stats, stderrTail, usage, callMetrics } = await runAgentInWorkspace({
419
+ dir: root,
420
+ systemPrompt: job.systemPrompt,
421
+ userPrompt: job.userPrompt,
422
+ model: job.model,
423
+ harness: job.harness,
424
+ subscriptionToken: job.subscriptionToken,
425
+ subscriptionBaseUrl: job.subscriptionBaseUrl,
426
+ ambientAuth: job.ambientAuth,
427
+ proxyBaseUrl: job.proxyBaseUrl,
428
+ sessionToken: job.sessionToken,
429
+ webToolsGuidance: job.webToolsGuidance,
430
+ webSearchProxy: job.webSearch,
431
+ guardLimits: job.guardLimits,
432
+ ...(job.contextFiles ? { contextFiles: job.contextFiles } : {}),
433
+ multiRepo: true,
434
+ }, opts);
435
+ // Push phase: commit forgotten tracked edits, then push + open a PR for each repo the run
436
+ // actually changed. A repo the agent left untouched is skipped (no branch, no PR).
437
+ opts.onPhase?.('push');
438
+ let primaryPushed = false;
439
+ let primaryPrUrl;
440
+ const peerPullRequests = [];
441
+ for (const leg of legs) {
442
+ await commitTrackedEdits(leg.dir, job.commitMessage ?? leg.pr?.title ?? 'Agent changes', signal);
443
+ const advanced = await branchHasCommitsSince(leg.dir, leg.baseSha, signal);
444
+ let hasWork = advanced || leg.resumed;
445
+ if (leg.resumed && !advanced) {
446
+ const ahead = await branchAheadOfBase(leg.dir, leg.repo.baseBranch, leg.ghToken, signal);
447
+ if (ahead === false)
448
+ hasWork = false;
449
+ }
450
+ const leftover = await listUntrackedFiles(leg.dir, signal);
451
+ if (leftover.length > 0) {
452
+ logger.warn('multi-repo: uncommitted new files left behind (not pushed)', {
453
+ repo: leg.dirName,
454
+ count: leftover.length,
455
+ files: leftover.slice(0, 20),
456
+ });
457
+ }
458
+ if (!hasWork) {
459
+ logger.info('multi-repo: no changes for repo', { repo: leg.dirName });
460
+ continue;
461
+ }
462
+ await pushBranch(leg.dir, leg.workBranch, leg.ghToken, signal);
463
+ let prUrl = null;
464
+ if (leg.pr) {
465
+ prUrl = await openPullRequest({
466
+ owner: leg.repo.owner,
467
+ name: leg.repo.name,
468
+ ghToken: leg.ghToken,
469
+ head: leg.workBranch,
470
+ base: leg.repo.baseBranch,
471
+ pr: leg.pr,
472
+ apiBase: job.githubApiBase,
473
+ cloneUrl: leg.repo.cloneUrl,
474
+ ...(leg.repo.provider ? { provider: leg.repo.provider } : {}),
475
+ signal,
476
+ });
477
+ }
478
+ if (leg.primary) {
479
+ primaryPushed = true;
480
+ if (prUrl)
481
+ primaryPrUrl = prUrl;
482
+ }
483
+ else if (prUrl) {
484
+ peerPullRequests.push({
485
+ repo: `${leg.repo.owner}/${leg.repo.name}`,
486
+ ...(leg.frameId ? { frameId: leg.frameId } : {}),
487
+ prUrl,
488
+ branch: leg.workBranch,
489
+ });
490
+ }
491
+ }
492
+ const anyWork = primaryPushed || peerPullRequests.length > 0;
493
+ if (!anyWork) {
494
+ // Nothing changed in ANY repo. For the implementer this is a failure (as in the
495
+ // single-repo path); a caller that tolerates a no-op (never the implementer today)
496
+ // gets a clean non-event.
497
+ if (job.noChangesIsError === false) {
498
+ return {
499
+ pushed: false,
500
+ branch: primaryWorkBranch,
501
+ summary,
502
+ stats,
503
+ ...(usage ? { usage } : {}),
504
+ ...(callMetrics ? { callMetrics } : {}),
505
+ };
506
+ }
507
+ return {
508
+ pushed: false,
509
+ branch: primaryWorkBranch,
510
+ summary,
511
+ stats,
512
+ error: noChangesReason('the agent produced no file changes in any repository', stats, stderrTail),
513
+ failureCause: 'no-changes',
514
+ ...(usage ? { usage } : {}),
515
+ ...(callMetrics ? { callMetrics } : {}),
516
+ };
517
+ }
518
+ logger.info('multi-repo: complete', {
519
+ primaryPushed,
520
+ primaryPrUrl: primaryPrUrl ?? null,
521
+ peers: peerPullRequests.length,
522
+ });
523
+ return {
524
+ pushed: primaryPushed,
525
+ ...(primaryPrUrl ? { prUrl: primaryPrUrl } : {}),
526
+ branch: primaryWorkBranch,
527
+ ...(peerPullRequests.length ? { peerPullRequests } : {}),
528
+ summary,
529
+ stats,
530
+ ...(usage ? { usage } : {}),
531
+ ...(callMetrics ? { callMetrics } : {}),
532
+ };
533
+ });
534
+ }
295
535
  /**
296
536
  * The "no changes" reason both coding agents report: a caller-supplied lead phrase
297
537
  * plus the shared "never acted" cause and a credential-scrubbed tail of Pi's stderr.