@cat-factory/executor-harness 1.34.2 → 1.34.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/job.js CHANGED
@@ -114,6 +114,42 @@ function parseRepoSpec(repo) {
114
114
  spec.serviceDirectory = dir;
115
115
  return spec;
116
116
  }
117
+ /**
118
+ * Parse the optional multi-repo peer list (service-connections phase 3). Each entry carries a
119
+ * full {@link RepoSpec} (validated + sanitised like the primary), the work branch to push, and
120
+ * an optional PR + per-repo token. A malformed list throws; an absent one yields `[]`.
121
+ */
122
+ function parsePeerRepos(value) {
123
+ if (value === undefined || value === null)
124
+ return [];
125
+ if (!Array.isArray(value))
126
+ throw new Error("Invalid job: 'peerRepos' must be an array");
127
+ return value.map((entry, i) => {
128
+ if (typeof entry !== 'object' || entry === null) {
129
+ throw new Error(`Invalid job: 'peerRepos[${i}]' must be an object`);
130
+ }
131
+ const e = entry;
132
+ const spec = {
133
+ repo: parseRepoSpec((e.repo ?? {})),
134
+ };
135
+ // `newBranch` is required for a coding fan-out (it pushes to it) but ABSENT for a
136
+ // read-only explore fan-out (bug-investigator) — validate it only when present.
137
+ if (e.newBranch !== undefined)
138
+ spec.newBranch = str(e.newBranch, `peerRepos[${i}].newBranch`);
139
+ if (typeof e.frameId === 'string' && e.frameId)
140
+ spec.frameId = e.frameId;
141
+ if (typeof e.ghToken === 'string' && e.ghToken)
142
+ spec.ghToken = e.ghToken;
143
+ if (typeof e.pr === 'object' && e.pr !== null) {
144
+ const p = e.pr;
145
+ spec.pr = {
146
+ title: str(p.title, `peerRepos[${i}].pr.title`),
147
+ body: typeof p.body === 'string' ? p.body : '',
148
+ };
149
+ }
150
+ return spec;
151
+ });
152
+ }
117
153
  /** Parse the optional `repo.provider` discriminator (defaults to undefined ⇒ host inference). */
118
154
  function parseVcsProvider(value) {
119
155
  if (value === undefined || value === null)
@@ -295,8 +331,23 @@ function parseAgentInfraSpec(value) {
295
331
  ...(typeof o.environmentUrl === 'string' && o.environmentUrl
296
332
  ? { environmentUrl: o.environmentUrl }
297
333
  : {}),
334
+ ...(() => {
335
+ const peers = parseStringMap(o.peerEnvironments);
336
+ return peers ? { peerEnvironments: peers } : {};
337
+ })(),
298
338
  };
299
339
  }
340
+ /** Parse a `Record<string, string>` from untrusted input, keeping only string→non-empty-string. */
341
+ function parseStringMap(value) {
342
+ if (typeof value !== 'object' || value === null)
343
+ return undefined;
344
+ const out = {};
345
+ for (const [key, val] of Object.entries(value)) {
346
+ if (typeof val === 'string' && val)
347
+ out[key] = val;
348
+ }
349
+ return Object.keys(out).length ? out : undefined;
350
+ }
300
351
  /**
301
352
  * Env-var names never injected from a frontend binding: spread over `process.env` at build
302
353
  * time, so any of these would break the toolchain (or enable code execution / cert overrides)
@@ -427,6 +478,7 @@ export function parseAgentJob(input) {
427
478
  })()
428
479
  : undefined;
429
480
  const infra = parseAgentInfraSpec(o.infra);
481
+ const peerRepos = parsePeerRepos(o.peerRepos);
430
482
  const bootstrap = parseAgentBootstrapSpec(o.bootstrap);
431
483
  const contextFiles = parseContextFiles(o.contextFiles);
432
484
  const packageRegistries = parsePackageRegistries(o.packageRegistries);
@@ -457,6 +509,7 @@ export function parseAgentJob(input) {
457
509
  ? { commitMessage: o.commitMessage }
458
510
  : {}),
459
511
  ...(pr ? { pr } : {}),
512
+ ...(peerRepos.length ? { peerRepos } : {}),
460
513
  ...(o.noChangesIsError === false ? { noChangesIsError: false } : {}),
461
514
  ...(o.persistentCheckout === true ? { persistentCheckout: true } : {}),
462
515
  ...(o.streamFollowUps === true ? { streamFollowUps: true } : {}),
@@ -469,5 +522,11 @@ export function parseAgentJob(input) {
469
522
  // allowed GitHub host too (the installation token is sent to it on the force-push).
470
523
  if (job.bootstrap)
471
524
  assertAllowedHost(job.bootstrap.target.cloneUrl, 'bootstrap.target.cloneUrl');
525
+ // Each peer repo's clone URL receives the installation token on clone/push, so it must be
526
+ // an allowed GitHub host too — a body-supplied peer pointing at an attacker host would
527
+ // exfiltrate the token exactly like a rogue primary clone URL.
528
+ for (const [i, peer] of (job.peerRepos ?? []).entries()) {
529
+ assertAllowedHost(peer.repo.cloneUrl, `peerRepos[${i}].repo.cloneUrl`);
530
+ }
472
531
  return job;
473
532
  }
@@ -165,6 +165,7 @@ export async function runAgentInWorkspace(spec, opts = {}) {
165
165
  guidance: spec.webToolsGuidance,
166
166
  serviceDirectory: spec.serviceDirectory,
167
167
  contextFiles,
168
+ ...(spec.multiRepo ? { multiRepo: true } : {}),
168
169
  });
169
170
  await writePiModelsConfig({ model: spec.model, proxyBaseUrl });
170
171
  const { signal, onActivity, onProgress, onSpan } = opts;
package/dist/pi.js CHANGED
@@ -131,13 +131,37 @@ export async function writeAgentsContext(systemPrompt, opts = {}) {
131
131
  const webTools = opts.webSearch ? (opts.guidance ?? WEB_TOOLS_GUIDANCE) : '';
132
132
  // Tell the agent it's in a monorepo and which subtree is its service, so it scopes
133
133
  // its work (and its build/test commands) there. Only present when the dispatcher
134
- // resolved a monorepo service directory; the agent's cwd already points at it.
135
- const monorepo = opts.serviceDirectory ? monorepoGuidance(opts.serviceDirectory) : '';
134
+ // resolved a monorepo service directory; the agent's cwd already points at it. A
135
+ // MULTI-REPO run runs at the workspace root (cwd spans sibling checkouts), so the
136
+ // monorepo note is suppressed there — the multi-repo mechanics note replaces it.
137
+ const monorepo = opts.serviceDirectory && !opts.multiRepo ? monorepoGuidance(opts.serviceDirectory) : '';
138
+ // Multi-repo mechanics note (service-connections phase 3): the concrete repo→role mapping
139
+ // is in the backend-composed system prompt above; this explains the shared MECHANICS (cwd
140
+ // is the workspace root, repos are sibling checkouts, one PR per dirty repo).
141
+ const multiRepo = opts.multiRepo ? MULTI_REPO_GUIDANCE : '';
136
142
  // Point the agent at any linked context the backend materialised into the checkout
137
143
  // (requirements / RFCs / PRDs / tracker issues) so it reads them on demand.
138
144
  const context = contextGuidance(opts.contextFiles ?? []);
139
- await writeFile(join(dir, 'AGENTS.md'), `${systemPrompt}${BLUEPRINT_GUIDANCE}${SPEC_GUIDANCE}${TODO_GUIDANCE}${monorepo}${webTools}${context}`, 'utf8');
145
+ await writeFile(join(dir, 'AGENTS.md'), `${systemPrompt}${BLUEPRINT_GUIDANCE}${SPEC_GUIDANCE}${TODO_GUIDANCE}${monorepo}${multiRepo}${webTools}${context}`, 'utf8');
140
146
  }
147
+ /** The MULTI-REPO mechanics note appended to AGENTS.md when a run spans sibling checkouts. */
148
+ const MULTI_REPO_GUIDANCE = `
149
+
150
+ ## Multi-repo workspace (work across sibling checkouts)
151
+
152
+ This task spans MORE THAN ONE repository. Your working directory is the WORKSPACE ROOT, and
153
+ each involved repository is checked out as a sibling directory directly under it. The workspace
154
+ root itself is NOT a git repository — run git INSIDE each repository's directory. The system
155
+ prompt above lists which repository is which and each one's role. Make the cross-service
156
+ change coherently across every repository the task requires — a provider's API and its
157
+ consumer's call site belong in the SAME piece of work. Run each repository's own build/test
158
+ commands inside that repository's directory.
159
+
160
+ Commit your own work inside each repository you change (\`cd\` into it, stage the files that
161
+ belong — INCLUDING any new files you added — and commit). The platform will NOT add untracked
162
+ files for you, so anything you leave uncommitted and untracked is lost. Each repository you
163
+ change is opened as a SEPARATE pull request; leave a repository untouched if the task does not
164
+ require changing it.`;
141
165
  /** Directory in the checkout where linked-context files are materialised (see CONTEXT_DIR in agents). */
142
166
  export const CONTEXT_DIR = '.cat-context';
143
167
  /** The AGENTS.md block enumerating the materialised linked-context files, or '' when none. */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cat-factory/executor-harness",
3
- "version": "1.34.2",
3
+ "version": "1.34.8",
4
4
  "description": "Container payload: a thin TypeScript wrapper that runs the Pi coding agent against a cloned repo and opens a PR. Runs in the Cloudflare Container (and, in local native mode, as a host process); carries no secrets.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -26,8 +26,8 @@
26
26
  "hono": "^4.12.27",
27
27
  "typescript": "^6.0.3",
28
28
  "vitest": "^4.1.9",
29
- "@cat-factory/spend": "0.10.78",
30
- "@cat-factory/server": "0.69.1"
29
+ "@cat-factory/server": "0.81.0",
30
+ "@cat-factory/spend": "0.10.93"
31
31
  },
32
32
  "scripts": {
33
33
  "build": "tsc -p tsconfig.json",
package/src/agent.ts CHANGED
@@ -26,8 +26,13 @@ import {
26
26
  reinitAndPush,
27
27
  unmergedPaths,
28
28
  } from './git.js'
29
- import type { PiRunStats } from './pi.js'
30
- import { noChangesReason, runCodingAgent } from './coding-agent.js'
29
+ import type { PiRunStats, RunDiagnostics } from './pi.js'
30
+ import {
31
+ makeDirClaimer,
32
+ noChangesReason,
33
+ runCodingAgent,
34
+ runMultiRepoCoding,
35
+ } from './coding-agent.js'
31
36
  import {
32
37
  acquireRepoCheckout,
33
38
  agentNeverActed,
@@ -370,6 +375,14 @@ async function runPreviewMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
370
375
  */
371
376
  async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentResult> {
372
377
  const logger = opts.log ?? log
378
+ // Multi-repo read-only exploration (service-connections phase 3): when the job carries peer
379
+ // repos, clone them all as siblings and run at the workspace root. Keyed off job DATA
380
+ // (`peerRepos`), not the agent kind — the backend sets it for the bug-investigator when the
381
+ // task has involved services in distinct repos. `runMultiRepoExplore` uses its own ephemeral
382
+ // `withWorkspace`, so a `persistentCheckout` flag (which a warm-pool dispatch injects on EVERY
383
+ // job) is harmlessly ignored — it must NOT suppress the fan-out, or a pooled bug-investigator
384
+ // would silently drop its peer repos and only ever see the primary one.
385
+ if (job.peerRepos?.length) return runMultiRepoExplore(job, opts)
373
386
  return acquireRepoCheckout(
374
387
  { persistent: job.persistentCheckout === true, prefix: 'agent-explore', repo: job.repo },
375
388
  async (dir) => {
@@ -453,117 +466,11 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
453
466
  opts,
454
467
  )
455
468
 
456
- if (!summary.trim()) {
457
- return {
458
- summary,
459
- stats,
460
- error: noOutputReason(stats, stderrTail),
461
- failureCause: 'no-usable-output',
462
- ...(usage ? { usage } : {}),
463
- ...(callMetrics ? { callMetrics } : {}),
464
- ...infraSetupFields,
465
- }
466
- }
467
-
468
- // Opt-in (document producers): a final answer cut off at the output ceiling — or empty —
469
- // must FAIL LOUDLY here, BEFORE the structured repair below could launder a truncated
470
- // reply into a half-baked doc the backend then shards/commits + hands onward. Mirrors the
471
- // bespoke `/spec` handler's `unusableFinalAnswerCause` gate (which drove the old loop).
472
- if (job.output?.kind === 'structured' && job.output.failOnUnusableFinal) {
473
- const unusable = unusableFinalAnswerCause(runDiag)
474
- if (unusable) {
475
- return {
476
- summary,
477
- stats,
478
- error: `the agent did not return a usable result: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
479
- failureCause: 'no-usable-output',
480
- ...(usage ? { usage } : {}),
481
- ...(callMetrics ? { callMetrics } : {}),
482
- ...infraSetupFields,
483
- }
484
- }
485
- }
486
-
487
- // Prose: the summary IS the deliverable.
488
- if (job.output?.kind !== 'structured') {
489
- logger.info('agent(explore): done (prose)', { ...stats })
490
- return {
491
- summary,
492
- stats,
493
- ...(usage ? { usage } : {}),
494
- ...(callMetrics ? { callMetrics } : {}),
495
- ...infraSetupFields,
496
- }
497
- }
498
-
499
- // Structured: parse the agent's JSON. With repair enabled (default) a malformed
500
- // reply gets ONE structured repair call before giving up; with `repair:false` we
501
- // parse directly (no repair channel). The backend coerces/validates + renders from
502
- // the returned object in a post-op.
503
- let custom: unknown = null
504
- let diagnostics: StructuredOutputDiagnostics | undefined
505
- if (job.output.repair === false) {
506
- try {
507
- custom = extractJsonObject(summary)
508
- } catch {
509
- custom = null
510
- }
511
- } else {
512
- const resolved = await resolveStructuredOutput(
513
- {
514
- label: 'agent',
515
- shapeHint: job.output.shapeHint ?? 'Expected a single JSON object.',
516
- parse: (text) => extractJsonObject(text),
517
- },
518
- summary,
519
- {
520
- harness: job.harness,
521
- subscriptionToken: job.subscriptionToken,
522
- subscriptionBaseUrl: job.subscriptionBaseUrl,
523
- proxyBaseUrl: job.proxyBaseUrl,
524
- sessionToken: job.sessionToken,
525
- model: job.model,
526
- jobId: job.jobId,
527
- signal: opts.signal,
528
- },
529
- )
530
- custom = resolved.value
531
- diagnostics = resolved.diagnostics
532
- }
533
- if (custom === undefined || custom === null) {
534
- return {
535
- summary,
536
- stats,
537
- error: noStructuredReason(stats, stderrTail, diagnostics),
538
- failureCause: 'no-usable-output',
539
- ...(usage ? { usage } : {}),
540
- ...(callMetrics ? { callMetrics } : {}),
541
- ...infraSetupFields,
542
- }
543
- }
544
- // Stamp the run's actual environment authoritatively onto the structured result when
545
- // infra was managed (the tester): which env the suite ran in is decided by the job's
546
- // infra spec, NOT the model, so the backend can echo it back to the UI deterministically
547
- // even when the model omits it from its JSON (or a structured repair drops it). A
548
- // frontend run tests the app against its live ephemeral backend(s), so it reports
549
- // `ephemeral` (the TestReport env vocabulary has no separate frontend value).
550
- const reportedEnvironment = infra
551
- ? infra.kind === 'frontend'
552
- ? 'ephemeral'
553
- : infra.environment
554
- : undefined
555
- if (reportedEnvironment && typeof custom === 'object') {
556
- ;(custom as Record<string, unknown>).environment = reportedEnvironment
557
- }
558
- logger.info('agent(explore): done (structured)', { ...stats })
559
- return {
560
- summary,
561
- custom,
562
- stats,
563
- ...(usage ? { usage } : {}),
564
- ...(callMetrics ? { callMetrics } : {}),
565
- ...infraSetupFields,
566
- }
469
+ return await finalizeExploreResult(
470
+ job,
471
+ { summary, stats, stderrTail, usage, callMetrics, runDiag },
472
+ { infra, infraSetupFields, logger, signal: opts.signal },
473
+ )
567
474
  } finally {
568
475
  if (managed) await managed.cleanup()
569
476
  }
@@ -571,6 +478,236 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
571
478
  )
572
479
  }
573
480
 
481
+ /** The agent-run outputs the explore result-parsing reads (shared single-/multi-repo). */
482
+ interface ExploreAgentRun {
483
+ summary: string
484
+ stats: PiRunStats
485
+ stderrTail?: string
486
+ usage?: AgentResult['usage']
487
+ callMetrics?: AgentResult['callMetrics']
488
+ runDiag?: RunDiagnostics
489
+ }
490
+
491
+ /**
492
+ * Turn an explore agent's raw run into an {@link AgentResult}: guard an empty/truncated reply,
493
+ * then either return the prose summary or parse (+ optionally repair) the structured JSON as
494
+ * `custom` — the backend renders any artifact files from it in a post-op. Extracted so the
495
+ * single-repo {@link runExploreMode} and the read-only {@link runMultiRepoExplore} share ONE
496
+ * result contract (the multi-repo path passes no infra, so the tester-only env stamping no-ops).
497
+ */
498
+ async function finalizeExploreResult(
499
+ job: AgentJob,
500
+ run: ExploreAgentRun,
501
+ ctx: {
502
+ infra?: AgentInfraSpec | ServiceInfraSpec
503
+ infraSetupFields: { infraSetup?: InfraSetupRecord }
504
+ logger: Logger
505
+ signal?: AbortSignal
506
+ },
507
+ ): Promise<AgentResult> {
508
+ const { summary, stats, stderrTail, usage, callMetrics, runDiag } = run
509
+ const { infra, infraSetupFields, logger, signal } = ctx
510
+
511
+ if (!summary.trim()) {
512
+ return {
513
+ summary,
514
+ stats,
515
+ error: noOutputReason(stats, stderrTail),
516
+ failureCause: 'no-usable-output',
517
+ ...(usage ? { usage } : {}),
518
+ ...(callMetrics ? { callMetrics } : {}),
519
+ ...infraSetupFields,
520
+ }
521
+ }
522
+
523
+ // Opt-in (document producers): a final answer cut off at the output ceiling — or empty —
524
+ // must FAIL LOUDLY here, BEFORE the structured repair below could launder a truncated
525
+ // reply into a half-baked doc the backend then shards/commits + hands onward. Mirrors the
526
+ // bespoke `/spec` handler's `unusableFinalAnswerCause` gate (which drove the old loop).
527
+ if (job.output?.kind === 'structured' && job.output.failOnUnusableFinal) {
528
+ const unusable = unusableFinalAnswerCause(runDiag)
529
+ if (unusable) {
530
+ return {
531
+ summary,
532
+ stats,
533
+ error: `the agent did not return a usable result: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
534
+ failureCause: 'no-usable-output',
535
+ ...(usage ? { usage } : {}),
536
+ ...(callMetrics ? { callMetrics } : {}),
537
+ ...infraSetupFields,
538
+ }
539
+ }
540
+ }
541
+
542
+ // Prose: the summary IS the deliverable.
543
+ if (job.output?.kind !== 'structured') {
544
+ logger.info('agent(explore): done (prose)', { ...stats })
545
+ return {
546
+ summary,
547
+ stats,
548
+ ...(usage ? { usage } : {}),
549
+ ...(callMetrics ? { callMetrics } : {}),
550
+ ...infraSetupFields,
551
+ }
552
+ }
553
+
554
+ // Structured: parse the agent's JSON. With repair enabled (default) a malformed
555
+ // reply gets ONE structured repair call before giving up; with `repair:false` we
556
+ // parse directly (no repair channel). The backend coerces/validates + renders from
557
+ // the returned object in a post-op.
558
+ let custom: unknown = null
559
+ let diagnostics: StructuredOutputDiagnostics | undefined
560
+ if (job.output.repair === false) {
561
+ try {
562
+ custom = extractJsonObject(summary)
563
+ } catch {
564
+ custom = null
565
+ }
566
+ } else {
567
+ const resolved = await resolveStructuredOutput(
568
+ {
569
+ label: 'agent',
570
+ shapeHint: job.output.shapeHint ?? 'Expected a single JSON object.',
571
+ parse: (text) => extractJsonObject(text),
572
+ },
573
+ summary,
574
+ {
575
+ harness: job.harness,
576
+ subscriptionToken: job.subscriptionToken,
577
+ subscriptionBaseUrl: job.subscriptionBaseUrl,
578
+ proxyBaseUrl: job.proxyBaseUrl,
579
+ sessionToken: job.sessionToken,
580
+ model: job.model,
581
+ jobId: job.jobId,
582
+ signal,
583
+ },
584
+ )
585
+ custom = resolved.value
586
+ diagnostics = resolved.diagnostics
587
+ }
588
+ if (custom === undefined || custom === null) {
589
+ return {
590
+ summary,
591
+ stats,
592
+ error: noStructuredReason(stats, stderrTail, diagnostics),
593
+ failureCause: 'no-usable-output',
594
+ ...(usage ? { usage } : {}),
595
+ ...(callMetrics ? { callMetrics } : {}),
596
+ ...infraSetupFields,
597
+ }
598
+ }
599
+ // Stamp the run's actual environment authoritatively onto the structured result when
600
+ // infra was managed (the tester): which env the suite ran in is decided by the job's
601
+ // infra spec, NOT the model, so the backend can echo it back to the UI deterministically
602
+ // even when the model omits it from its JSON (or a structured repair drops it). A
603
+ // frontend run tests the app against its live ephemeral backend(s), so it reports
604
+ // `ephemeral` (the TestReport env vocabulary has no separate frontend value).
605
+ const reportedEnvironment = infra
606
+ ? infra.kind === 'frontend'
607
+ ? 'ephemeral'
608
+ : infra.environment
609
+ : undefined
610
+ if (reportedEnvironment && typeof custom === 'object') {
611
+ ;(custom as Record<string, unknown>).environment = reportedEnvironment
612
+ }
613
+ logger.info('agent(explore): done (structured)', { ...stats })
614
+ return {
615
+ summary,
616
+ custom,
617
+ stats,
618
+ ...(usage ? { usage } : {}),
619
+ ...(callMetrics ? { callMetrics } : {}),
620
+ ...infraSetupFields,
621
+ }
622
+ }
623
+
624
+ /**
625
+ * Read-only MULTI-REPO exploration (service-connections phase 3, read-only): clone the primary
626
+ * repo PLUS every connected peer repo as SIBLING checkouts under one workspace root, run the
627
+ * agent ONCE with its cwd at the root (so it can read across every repo the bug touches), and
628
+ * return its prose/structured result — making NO edits, NO commits and opening NO PR. The
629
+ * counterpart of {@link runMultiRepoCoding} for the `bug-investigator`, but strictly read-only:
630
+ * peers carry no `newBranch`/`pr`, nothing is pushed, and the peers exist only to be read. The
631
+ * multi-repo layout is explained to the agent by the backend-composed system-prompt section
632
+ * (which repo/subdir each service lives in) + the harness's own AGENTS.md multi-repo note.
633
+ */
634
+ async function runMultiRepoExplore(job: AgentJob, opts: RunOptions): Promise<AgentResult> {
635
+ const logger = (opts.log ?? log).child({ kind: 'multi-repo-explore', jobId: job.jobId })
636
+ const peers = job.peerRepos ?? []
637
+
638
+ // Unique sibling directory per repo (owner-prefixed on a name collision), so two repos
639
+ // named the same never clobber each other — shared claim scheme with the coding fan-out.
640
+ const claimDir = makeDirClaimer()
641
+ const legs = [
642
+ { repo: job.repo, cloneBranch: job.branch, ghToken: job.ghToken },
643
+ ...peers.map((peer) => ({
644
+ repo: peer.repo,
645
+ cloneBranch: peer.repo.baseBranch,
646
+ ghToken: peer.ghToken ?? job.ghToken,
647
+ })),
648
+ ].map((leg) => ({ ...leg, dirName: claimDir(leg.repo) }))
649
+
650
+ return withWorkspace('explore-multi', async (root) => {
651
+ // Clone phase: every repo (read-only) into its sibling dir under the workspace root. No
652
+ // work branch, no resume — the investigator only reads — so the legs are independent and
653
+ // clone in parallel (wall-clock is the slowest single clone, not the sum).
654
+ opts.onPhase?.('clone')
655
+ await Promise.all(
656
+ legs.map(async (leg) => {
657
+ const dir = join(root, leg.dirName)
658
+ await mkdir(dir, { recursive: true })
659
+ logger.info('multi-repo-explore: cloning', {
660
+ repo: leg.dirName,
661
+ cloneBranch: leg.cloneBranch,
662
+ })
663
+ await cloneRepo({
664
+ repo: { ...leg.repo, baseBranch: leg.cloneBranch },
665
+ ghToken: leg.ghToken,
666
+ dir,
667
+ signal: opts.signal,
668
+ })
669
+ }),
670
+ )
671
+
672
+ opts.onPhase?.('agent')
673
+ logger.info('multi-repo-explore: running agent', { repos: legs.map((l) => l.dirName) })
674
+ const run = await runAgentInWorkspace(
675
+ {
676
+ dir: root,
677
+ systemPrompt: job.systemPrompt,
678
+ userPrompt: job.userPrompt,
679
+ model: job.model,
680
+ harness: job.harness,
681
+ subscriptionToken: job.subscriptionToken,
682
+ subscriptionBaseUrl: job.subscriptionBaseUrl,
683
+ ambientAuth: job.ambientAuth,
684
+ proxyBaseUrl: job.proxyBaseUrl,
685
+ sessionToken: job.sessionToken,
686
+ // Read-only: no edits expected, so the no-progress guard's no-edit bound must not fire.
687
+ expectsEdits: false,
688
+ webToolsGuidance: job.webToolsGuidance,
689
+ webSearchProxy: job.webSearch,
690
+ ...(job.contextFiles ? { contextFiles: job.contextFiles } : {}),
691
+ guardLimits: job.guardLimits,
692
+ multiRepo: true,
693
+ },
694
+ opts,
695
+ )
696
+ return finalizeExploreResult(
697
+ job,
698
+ {
699
+ summary: run.summary,
700
+ stats: run.stats,
701
+ stderrTail: run.stderrTail,
702
+ usage: run.usage,
703
+ callMetrics: run.callMetrics,
704
+ runDiag: run.diagnostics,
705
+ },
706
+ { infraSetupFields: {}, logger, signal: opts.signal },
707
+ )
708
+ })
709
+ }
710
+
574
711
  /**
575
712
  * Edit-and-push coding: clone `branch` (or resume `newBranch`), run the agent, commit +
576
713
  * push to `pushBranch`, and open `pr` when one is set and the run produced changes. A
@@ -586,6 +723,11 @@ async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResu
586
723
  // clone full, merge the base in to surface the conflicts, then complete the merge
587
724
  // commit + push (no PR). Keyed off job DATA (`mergeBase`), not the agent kind.
588
725
  if (job.mergeBase) return runConflictResolution(job, opts)
726
+ // Multi-repo coding (service-connections phase 3): clone every connected peer repo as a
727
+ // sibling, run the agent once across all of them, and open one PR per changed repo. Keyed
728
+ // off job DATA (`peerRepos`), not the agent kind — the implementer sets it when the task
729
+ // has involved services in distinct repos.
730
+ if (job.peerRepos?.length) return runMultiRepoCoding(job, opts)
589
731
 
590
732
  const pushBranch = job.pushBranch ?? job.newBranch ?? job.branch
591
733
  const { summary, stats, stderrTail, pushed, usage, callMetrics } = await runCodingAgent(