@cat-factory/executor-harness 1.34.4 → 1.34.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/job.js CHANGED
@@ -114,6 +114,42 @@ function parseRepoSpec(repo) {
114
114
  spec.serviceDirectory = dir;
115
115
  return spec;
116
116
  }
117
+ /**
118
+ * Parse the optional multi-repo peer list (service-connections phase 3). Each entry carries a
119
+ * full {@link RepoSpec} (validated + sanitised like the primary), the work branch to push, and
120
+ * an optional PR + per-repo token. A malformed list throws; an absent one yields `[]`.
121
+ */
122
+ function parsePeerRepos(value) {
123
+ if (value === undefined || value === null)
124
+ return [];
125
+ if (!Array.isArray(value))
126
+ throw new Error("Invalid job: 'peerRepos' must be an array");
127
+ return value.map((entry, i) => {
128
+ if (typeof entry !== 'object' || entry === null) {
129
+ throw new Error(`Invalid job: 'peerRepos[${i}]' must be an object`);
130
+ }
131
+ const e = entry;
132
+ const spec = {
133
+ repo: parseRepoSpec((e.repo ?? {})),
134
+ };
135
+ // `newBranch` is required for a coding fan-out (it pushes to it) but ABSENT for a
136
+ // read-only explore fan-out (bug-investigator) — validate it only when present.
137
+ if (e.newBranch !== undefined)
138
+ spec.newBranch = str(e.newBranch, `peerRepos[${i}].newBranch`);
139
+ if (typeof e.frameId === 'string' && e.frameId)
140
+ spec.frameId = e.frameId;
141
+ if (typeof e.ghToken === 'string' && e.ghToken)
142
+ spec.ghToken = e.ghToken;
143
+ if (typeof e.pr === 'object' && e.pr !== null) {
144
+ const p = e.pr;
145
+ spec.pr = {
146
+ title: str(p.title, `peerRepos[${i}].pr.title`),
147
+ body: typeof p.body === 'string' ? p.body : '',
148
+ };
149
+ }
150
+ return spec;
151
+ });
152
+ }
117
153
  /** Parse the optional `repo.provider` discriminator (defaults to undefined ⇒ host inference). */
118
154
  function parseVcsProvider(value) {
119
155
  if (value === undefined || value === null)
@@ -442,6 +478,7 @@ export function parseAgentJob(input) {
442
478
  })()
443
479
  : undefined;
444
480
  const infra = parseAgentInfraSpec(o.infra);
481
+ const peerRepos = parsePeerRepos(o.peerRepos);
445
482
  const bootstrap = parseAgentBootstrapSpec(o.bootstrap);
446
483
  const contextFiles = parseContextFiles(o.contextFiles);
447
484
  const packageRegistries = parsePackageRegistries(o.packageRegistries);
@@ -472,6 +509,7 @@ export function parseAgentJob(input) {
472
509
  ? { commitMessage: o.commitMessage }
473
510
  : {}),
474
511
  ...(pr ? { pr } : {}),
512
+ ...(peerRepos.length ? { peerRepos } : {}),
475
513
  ...(o.noChangesIsError === false ? { noChangesIsError: false } : {}),
476
514
  ...(o.persistentCheckout === true ? { persistentCheckout: true } : {}),
477
515
  ...(o.streamFollowUps === true ? { streamFollowUps: true } : {}),
@@ -484,5 +522,11 @@ export function parseAgentJob(input) {
484
522
  // allowed GitHub host too (the installation token is sent to it on the force-push).
485
523
  if (job.bootstrap)
486
524
  assertAllowedHost(job.bootstrap.target.cloneUrl, 'bootstrap.target.cloneUrl');
525
+ // Each peer repo's clone URL receives the installation token on clone/push, so it must be
526
+ // an allowed GitHub host too — a body-supplied peer pointing at an attacker host would
527
+ // exfiltrate the token exactly like a rogue primary clone URL.
528
+ for (const [i, peer] of (job.peerRepos ?? []).entries()) {
529
+ assertAllowedHost(peer.repo.cloneUrl, `peerRepos[${i}].repo.cloneUrl`);
530
+ }
487
531
  return job;
488
532
  }
@@ -165,6 +165,7 @@ export async function runAgentInWorkspace(spec, opts = {}) {
165
165
  guidance: spec.webToolsGuidance,
166
166
  serviceDirectory: spec.serviceDirectory,
167
167
  contextFiles,
168
+ ...(spec.multiRepo ? { multiRepo: true } : {}),
168
169
  });
169
170
  await writePiModelsConfig({ model: spec.model, proxyBaseUrl });
170
171
  const { signal, onActivity, onProgress, onSpan } = opts;
package/dist/pi.js CHANGED
@@ -131,13 +131,37 @@ export async function writeAgentsContext(systemPrompt, opts = {}) {
131
131
  const webTools = opts.webSearch ? (opts.guidance ?? WEB_TOOLS_GUIDANCE) : '';
132
132
  // Tell the agent it's in a monorepo and which subtree is its service, so it scopes
133
133
  // its work (and its build/test commands) there. Only present when the dispatcher
134
- // resolved a monorepo service directory; the agent's cwd already points at it.
135
- const monorepo = opts.serviceDirectory ? monorepoGuidance(opts.serviceDirectory) : '';
134
+ // resolved a monorepo service directory; the agent's cwd already points at it. A
135
+ // MULTI-REPO run runs at the workspace root (cwd spans sibling checkouts), so the
136
+ // monorepo note is suppressed there — the multi-repo mechanics note replaces it.
137
+ const monorepo = opts.serviceDirectory && !opts.multiRepo ? monorepoGuidance(opts.serviceDirectory) : '';
138
+ // Multi-repo mechanics note (service-connections phase 3): the concrete repo→role mapping
139
+ // is in the backend-composed system prompt above; this explains the shared MECHANICS (cwd
140
+ // is the workspace root, repos are sibling checkouts, one PR per dirty repo).
141
+ const multiRepo = opts.multiRepo ? MULTI_REPO_GUIDANCE : '';
136
142
  // Point the agent at any linked context the backend materialised into the checkout
137
143
  // (requirements / RFCs / PRDs / tracker issues) so it reads them on demand.
138
144
  const context = contextGuidance(opts.contextFiles ?? []);
139
- await writeFile(join(dir, 'AGENTS.md'), `${systemPrompt}${BLUEPRINT_GUIDANCE}${SPEC_GUIDANCE}${TODO_GUIDANCE}${monorepo}${webTools}${context}`, 'utf8');
145
+ await writeFile(join(dir, 'AGENTS.md'), `${systemPrompt}${BLUEPRINT_GUIDANCE}${SPEC_GUIDANCE}${TODO_GUIDANCE}${monorepo}${multiRepo}${webTools}${context}`, 'utf8');
140
146
  }
147
+ /** The MULTI-REPO mechanics note appended to AGENTS.md when a run spans sibling checkouts. */
148
+ const MULTI_REPO_GUIDANCE = `
149
+
150
+ ## Multi-repo workspace (work across sibling checkouts)
151
+
152
+ This task spans MORE THAN ONE repository. Your working directory is the WORKSPACE ROOT, and
153
+ each involved repository is checked out as a sibling directory directly under it. The workspace
154
+ root itself is NOT a git repository — run git INSIDE each repository's directory. The system
155
+ prompt above lists which repository is which and each one's role. Make the cross-service
156
+ change coherently across every repository the task requires — a provider's API and its
157
+ consumer's call site belong in the SAME piece of work. Run each repository's own build/test
158
+ commands inside that repository's directory.
159
+
160
+ Commit your own work inside each repository you change (\`cd\` into it, stage the files that
161
+ belong — INCLUDING any new files you added — and commit). The platform will NOT add untracked
162
+ files for you, so anything you leave uncommitted and untracked is lost. Each repository you
163
+ change is opened as a SEPARATE pull request; leave a repository untouched if the task does not
164
+ require changing it.`;
141
165
  /** Directory in the checkout where linked-context files are materialised (see CONTEXT_DIR in agents). */
142
166
  export const CONTEXT_DIR = '.cat-context';
143
167
  /** The AGENTS.md block enumerating the materialised linked-context files, or '' when none. */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cat-factory/executor-harness",
3
- "version": "1.34.4",
3
+ "version": "1.34.10",
4
4
  "description": "Container payload: a thin TypeScript wrapper that runs the Pi coding agent against a cloned repo and opens a PR. Runs in the Cloudflare Container (and, in local native mode, as a host process); carries no secrets.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -26,8 +26,8 @@
26
26
  "hono": "^4.12.27",
27
27
  "typescript": "^6.0.3",
28
28
  "vitest": "^4.1.9",
29
- "@cat-factory/server": "0.75.0",
30
- "@cat-factory/spend": "0.10.84"
29
+ "@cat-factory/spend": "0.10.95",
30
+ "@cat-factory/server": "0.82.0"
31
31
  },
32
32
  "scripts": {
33
33
  "build": "tsc -p tsconfig.json",
package/src/agent.ts CHANGED
@@ -26,8 +26,13 @@ import {
26
26
  reinitAndPush,
27
27
  unmergedPaths,
28
28
  } from './git.js'
29
- import type { PiRunStats } from './pi.js'
30
- import { noChangesReason, runCodingAgent } from './coding-agent.js'
29
+ import type { PiRunStats, RunDiagnostics } from './pi.js'
30
+ import {
31
+ makeDirClaimer,
32
+ noChangesReason,
33
+ runCodingAgent,
34
+ runMultiRepoCoding,
35
+ } from './coding-agent.js'
31
36
  import {
32
37
  acquireRepoCheckout,
33
38
  agentNeverActed,
@@ -214,6 +219,47 @@ async function tearDownInfra(dir: string, infra: ServiceInfraSpec): Promise<void
214
219
  }
215
220
  }
216
221
 
222
+ /**
223
+ * Parse an agent's final reply into the structured JSON `custom`, shared by the explore and
224
+ * coding structured-output paths. With repair enabled (default) a malformed reply gets ONE
225
+ * structured repair call before giving up; with `output.repair === false` it parses directly.
226
+ * Returns the parsed value (or null when unusable) plus the repair diagnostics. Never throws —
227
+ * a parse failure is a null value, and each caller decides whether that is fatal (explore: yes;
228
+ * coding: no, the pushed commits are the deliverable).
229
+ */
230
+ async function resolveReplyCustom(
231
+ job: AgentJob,
232
+ summary: string,
233
+ signal: AbortSignal | undefined,
234
+ ): Promise<{ value: unknown; diagnostics?: StructuredOutputDiagnostics }> {
235
+ if (job.output?.repair === false) {
236
+ try {
237
+ return { value: extractJsonObject(summary) }
238
+ } catch {
239
+ return { value: null }
240
+ }
241
+ }
242
+ const resolved = await resolveStructuredOutput(
243
+ {
244
+ label: 'agent',
245
+ shapeHint: job.output?.shapeHint ?? 'Expected a single JSON object.',
246
+ parse: (text) => extractJsonObject(text),
247
+ },
248
+ summary,
249
+ {
250
+ harness: job.harness,
251
+ subscriptionToken: job.subscriptionToken,
252
+ subscriptionBaseUrl: job.subscriptionBaseUrl,
253
+ proxyBaseUrl: job.proxyBaseUrl,
254
+ sessionToken: job.sessionToken,
255
+ model: job.model,
256
+ jobId: job.jobId,
257
+ signal,
258
+ },
259
+ )
260
+ return { value: resolved.value, diagnostics: resolved.diagnostics }
261
+ }
262
+
217
263
  /** Extract the first JSON object from an agent's final message (tolerating fences/prose). */
218
264
  function extractJsonObject(text: string): unknown {
219
265
  const trimmed = text.trim()
@@ -370,6 +416,14 @@ async function runPreviewMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
370
416
  */
371
417
  async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentResult> {
372
418
  const logger = opts.log ?? log
419
+ // Multi-repo read-only exploration (service-connections phase 3): when the job carries peer
420
+ // repos, clone them all as siblings and run at the workspace root. Keyed off job DATA
421
+ // (`peerRepos`), not the agent kind — the backend sets it for the bug-investigator when the
422
+ // task has involved services in distinct repos. `runMultiRepoExplore` uses its own ephemeral
423
+ // `withWorkspace`, so a `persistentCheckout` flag (which a warm-pool dispatch injects on EVERY
424
+ // job) is harmlessly ignored — it must NOT suppress the fan-out, or a pooled bug-investigator
425
+ // would silently drop its peer repos and only ever see the primary one.
426
+ if (job.peerRepos?.length) return runMultiRepoExplore(job, opts)
373
427
  return acquireRepoCheckout(
374
428
  { persistent: job.persistentCheckout === true, prefix: 'agent-explore', repo: job.repo },
375
429
  async (dir) => {
@@ -453,117 +507,11 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
453
507
  opts,
454
508
  )
455
509
 
456
- if (!summary.trim()) {
457
- return {
458
- summary,
459
- stats,
460
- error: noOutputReason(stats, stderrTail),
461
- failureCause: 'no-usable-output',
462
- ...(usage ? { usage } : {}),
463
- ...(callMetrics ? { callMetrics } : {}),
464
- ...infraSetupFields,
465
- }
466
- }
467
-
468
- // Opt-in (document producers): a final answer cut off at the output ceiling — or empty —
469
- // must FAIL LOUDLY here, BEFORE the structured repair below could launder a truncated
470
- // reply into a half-baked doc the backend then shards/commits + hands onward. Mirrors the
471
- // bespoke `/spec` handler's `unusableFinalAnswerCause` gate (which drove the old loop).
472
- if (job.output?.kind === 'structured' && job.output.failOnUnusableFinal) {
473
- const unusable = unusableFinalAnswerCause(runDiag)
474
- if (unusable) {
475
- return {
476
- summary,
477
- stats,
478
- error: `the agent did not return a usable result: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
479
- failureCause: 'no-usable-output',
480
- ...(usage ? { usage } : {}),
481
- ...(callMetrics ? { callMetrics } : {}),
482
- ...infraSetupFields,
483
- }
484
- }
485
- }
486
-
487
- // Prose: the summary IS the deliverable.
488
- if (job.output?.kind !== 'structured') {
489
- logger.info('agent(explore): done (prose)', { ...stats })
490
- return {
491
- summary,
492
- stats,
493
- ...(usage ? { usage } : {}),
494
- ...(callMetrics ? { callMetrics } : {}),
495
- ...infraSetupFields,
496
- }
497
- }
498
-
499
- // Structured: parse the agent's JSON. With repair enabled (default) a malformed
500
- // reply gets ONE structured repair call before giving up; with `repair:false` we
501
- // parse directly (no repair channel). The backend coerces/validates + renders from
502
- // the returned object in a post-op.
503
- let custom: unknown = null
504
- let diagnostics: StructuredOutputDiagnostics | undefined
505
- if (job.output.repair === false) {
506
- try {
507
- custom = extractJsonObject(summary)
508
- } catch {
509
- custom = null
510
- }
511
- } else {
512
- const resolved = await resolveStructuredOutput(
513
- {
514
- label: 'agent',
515
- shapeHint: job.output.shapeHint ?? 'Expected a single JSON object.',
516
- parse: (text) => extractJsonObject(text),
517
- },
518
- summary,
519
- {
520
- harness: job.harness,
521
- subscriptionToken: job.subscriptionToken,
522
- subscriptionBaseUrl: job.subscriptionBaseUrl,
523
- proxyBaseUrl: job.proxyBaseUrl,
524
- sessionToken: job.sessionToken,
525
- model: job.model,
526
- jobId: job.jobId,
527
- signal: opts.signal,
528
- },
529
- )
530
- custom = resolved.value
531
- diagnostics = resolved.diagnostics
532
- }
533
- if (custom === undefined || custom === null) {
534
- return {
535
- summary,
536
- stats,
537
- error: noStructuredReason(stats, stderrTail, diagnostics),
538
- failureCause: 'no-usable-output',
539
- ...(usage ? { usage } : {}),
540
- ...(callMetrics ? { callMetrics } : {}),
541
- ...infraSetupFields,
542
- }
543
- }
544
- // Stamp the run's actual environment authoritatively onto the structured result when
545
- // infra was managed (the tester): which env the suite ran in is decided by the job's
546
- // infra spec, NOT the model, so the backend can echo it back to the UI deterministically
547
- // even when the model omits it from its JSON (or a structured repair drops it). A
548
- // frontend run tests the app against its live ephemeral backend(s), so it reports
549
- // `ephemeral` (the TestReport env vocabulary has no separate frontend value).
550
- const reportedEnvironment = infra
551
- ? infra.kind === 'frontend'
552
- ? 'ephemeral'
553
- : infra.environment
554
- : undefined
555
- if (reportedEnvironment && typeof custom === 'object') {
556
- ;(custom as Record<string, unknown>).environment = reportedEnvironment
557
- }
558
- logger.info('agent(explore): done (structured)', { ...stats })
559
- return {
560
- summary,
561
- custom,
562
- stats,
563
- ...(usage ? { usage } : {}),
564
- ...(callMetrics ? { callMetrics } : {}),
565
- ...infraSetupFields,
566
- }
510
+ return await finalizeExploreResult(
511
+ job,
512
+ { summary, stats, stderrTail, usage, callMetrics, runDiag },
513
+ { infra, infraSetupFields, logger, signal: opts.signal },
514
+ )
567
515
  } finally {
568
516
  if (managed) await managed.cleanup()
569
517
  }
@@ -571,22 +519,252 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
571
519
  )
572
520
  }
573
521
 
522
+ /** The agent-run outputs the explore result-parsing reads (shared single-/multi-repo). */
523
+ interface ExploreAgentRun {
524
+ summary: string
525
+ stats: PiRunStats
526
+ stderrTail?: string
527
+ usage?: AgentResult['usage']
528
+ callMetrics?: AgentResult['callMetrics']
529
+ runDiag?: RunDiagnostics
530
+ }
531
+
574
532
  /**
575
- * Edit-and-push coding: clone `branch` (or resume `newBranch`), run the agent, commit +
576
- * push to `pushBranch`, and open `pr` when one is set and the run produced changes. A
577
- * no-op is a failure for the implementer (`noChangesIsError` default) and a non-fatal
578
- * no-op for the in-place fixers.
533
+ * Turn an explore agent's raw run into an {@link AgentResult}: guard an empty/truncated reply,
534
+ * then either return the prose summary or parse (+ optionally repair) the structured JSON as
535
+ * `custom` the backend renders any artifact files from it in a post-op. Extracted so the
536
+ * single-repo {@link runExploreMode} and the read-only {@link runMultiRepoExplore} share ONE
537
+ * result contract (the multi-repo path passes no infra, so the tester-only env stamping no-ops).
538
+ */
539
+ async function finalizeExploreResult(
540
+ job: AgentJob,
541
+ run: ExploreAgentRun,
542
+ ctx: {
543
+ infra?: AgentInfraSpec | ServiceInfraSpec
544
+ infraSetupFields: { infraSetup?: InfraSetupRecord }
545
+ logger: Logger
546
+ signal?: AbortSignal
547
+ },
548
+ ): Promise<AgentResult> {
549
+ const { summary, stats, stderrTail, usage, callMetrics, runDiag } = run
550
+ const { infra, infraSetupFields, logger, signal } = ctx
551
+
552
+ if (!summary.trim()) {
553
+ return {
554
+ summary,
555
+ stats,
556
+ error: noOutputReason(stats, stderrTail),
557
+ failureCause: 'no-usable-output',
558
+ ...(usage ? { usage } : {}),
559
+ ...(callMetrics ? { callMetrics } : {}),
560
+ ...infraSetupFields,
561
+ }
562
+ }
563
+
564
+ // Opt-in (document producers): a final answer cut off at the output ceiling — or empty —
565
+ // must FAIL LOUDLY here, BEFORE the structured repair below could launder a truncated
566
+ // reply into a half-baked doc the backend then shards/commits + hands onward. Mirrors the
567
+ // bespoke `/spec` handler's `unusableFinalAnswerCause` gate (which drove the old loop).
568
+ if (job.output?.kind === 'structured' && job.output.failOnUnusableFinal) {
569
+ const unusable = unusableFinalAnswerCause(runDiag)
570
+ if (unusable) {
571
+ return {
572
+ summary,
573
+ stats,
574
+ error: `the agent did not return a usable result: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
575
+ failureCause: 'no-usable-output',
576
+ ...(usage ? { usage } : {}),
577
+ ...(callMetrics ? { callMetrics } : {}),
578
+ ...infraSetupFields,
579
+ }
580
+ }
581
+ }
582
+
583
+ // Prose: the summary IS the deliverable.
584
+ if (job.output?.kind !== 'structured') {
585
+ logger.info('agent(explore): done (prose)', { ...stats })
586
+ return {
587
+ summary,
588
+ stats,
589
+ ...(usage ? { usage } : {}),
590
+ ...(callMetrics ? { callMetrics } : {}),
591
+ ...infraSetupFields,
592
+ }
593
+ }
594
+
595
+ // Structured: parse the agent's JSON via the shared resolver. With repair enabled (default)
596
+ // a malformed reply gets ONE structured repair call before giving up; with `repair:false` it
597
+ // parses directly (no repair channel). The backend coerces/validates + renders from the
598
+ // returned object in a post-op. Unlike the coding path, an unparseable explore reply IS a
599
+ // failure — the report/JSON is the whole deliverable.
600
+ const { value: custom, diagnostics } = await resolveReplyCustom(job, summary, signal)
601
+ if (custom === undefined || custom === null) {
602
+ return {
603
+ summary,
604
+ stats,
605
+ error: noStructuredReason(stats, stderrTail, diagnostics),
606
+ failureCause: 'no-usable-output',
607
+ ...(usage ? { usage } : {}),
608
+ ...(callMetrics ? { callMetrics } : {}),
609
+ ...infraSetupFields,
610
+ }
611
+ }
612
+ // Stamp the run's actual environment authoritatively onto the structured result when
613
+ // infra was managed (the tester): which env the suite ran in is decided by the job's
614
+ // infra spec, NOT the model, so the backend can echo it back to the UI deterministically
615
+ // even when the model omits it from its JSON (or a structured repair drops it). A
616
+ // frontend run tests the app against its live ephemeral backend(s), so it reports
617
+ // `ephemeral` (the TestReport env vocabulary has no separate frontend value).
618
+ const reportedEnvironment = infra
619
+ ? infra.kind === 'frontend'
620
+ ? 'ephemeral'
621
+ : infra.environment
622
+ : undefined
623
+ if (reportedEnvironment && typeof custom === 'object') {
624
+ ;(custom as Record<string, unknown>).environment = reportedEnvironment
625
+ }
626
+ logger.info('agent(explore): done (structured)', { ...stats })
627
+ return {
628
+ summary,
629
+ custom,
630
+ stats,
631
+ ...(usage ? { usage } : {}),
632
+ ...(callMetrics ? { callMetrics } : {}),
633
+ ...infraSetupFields,
634
+ }
635
+ }
636
+
637
+ /**
638
+ * Read-only MULTI-REPO exploration (service-connections phase 3, read-only): clone the primary
639
+ * repo PLUS every connected peer repo as SIBLING checkouts under one workspace root, run the
640
+ * agent ONCE with its cwd at the root (so it can read across every repo the bug touches), and
641
+ * return its prose/structured result — making NO edits, NO commits and opening NO PR. The
642
+ * counterpart of {@link runMultiRepoCoding} for the `bug-investigator`, but strictly read-only:
643
+ * peers carry no `newBranch`/`pr`, nothing is pushed, and the peers exist only to be read. The
644
+ * multi-repo layout is explained to the agent by the backend-composed system-prompt section
645
+ * (which repo/subdir each service lives in) + the harness's own AGENTS.md multi-repo note.
646
+ */
647
+ async function runMultiRepoExplore(job: AgentJob, opts: RunOptions): Promise<AgentResult> {
648
+ const logger = (opts.log ?? log).child({ kind: 'multi-repo-explore', jobId: job.jobId })
649
+ const peers = job.peerRepos ?? []
650
+
651
+ // Unique sibling directory per repo (owner-prefixed on a name collision), so two repos
652
+ // named the same never clobber each other — shared claim scheme with the coding fan-out.
653
+ const claimDir = makeDirClaimer()
654
+ const legs = [
655
+ { repo: job.repo, cloneBranch: job.branch, ghToken: job.ghToken },
656
+ ...peers.map((peer) => ({
657
+ repo: peer.repo,
658
+ cloneBranch: peer.repo.baseBranch,
659
+ ghToken: peer.ghToken ?? job.ghToken,
660
+ })),
661
+ ].map((leg) => ({ ...leg, dirName: claimDir(leg.repo) }))
662
+
663
+ return withWorkspace('explore-multi', async (root) => {
664
+ // Clone phase: every repo (read-only) into its sibling dir under the workspace root. No
665
+ // work branch, no resume — the investigator only reads — so the legs are independent and
666
+ // clone in parallel (wall-clock is the slowest single clone, not the sum).
667
+ opts.onPhase?.('clone')
668
+ await Promise.all(
669
+ legs.map(async (leg) => {
670
+ const dir = join(root, leg.dirName)
671
+ await mkdir(dir, { recursive: true })
672
+ logger.info('multi-repo-explore: cloning', {
673
+ repo: leg.dirName,
674
+ cloneBranch: leg.cloneBranch,
675
+ })
676
+ await cloneRepo({
677
+ repo: { ...leg.repo, baseBranch: leg.cloneBranch },
678
+ ghToken: leg.ghToken,
679
+ dir,
680
+ signal: opts.signal,
681
+ })
682
+ }),
683
+ )
684
+
685
+ opts.onPhase?.('agent')
686
+ logger.info('multi-repo-explore: running agent', { repos: legs.map((l) => l.dirName) })
687
+ const run = await runAgentInWorkspace(
688
+ {
689
+ dir: root,
690
+ systemPrompt: job.systemPrompt,
691
+ userPrompt: job.userPrompt,
692
+ model: job.model,
693
+ harness: job.harness,
694
+ subscriptionToken: job.subscriptionToken,
695
+ subscriptionBaseUrl: job.subscriptionBaseUrl,
696
+ ambientAuth: job.ambientAuth,
697
+ proxyBaseUrl: job.proxyBaseUrl,
698
+ sessionToken: job.sessionToken,
699
+ // Read-only: no edits expected, so the no-progress guard's no-edit bound must not fire.
700
+ expectsEdits: false,
701
+ webToolsGuidance: job.webToolsGuidance,
702
+ webSearchProxy: job.webSearch,
703
+ ...(job.contextFiles ? { contextFiles: job.contextFiles } : {}),
704
+ guardLimits: job.guardLimits,
705
+ multiRepo: true,
706
+ },
707
+ opts,
708
+ )
709
+ return finalizeExploreResult(
710
+ job,
711
+ {
712
+ summary: run.summary,
713
+ stats: run.stats,
714
+ stderrTail: run.stderrTail,
715
+ usage: run.usage,
716
+ callMetrics: run.callMetrics,
717
+ runDiag: run.diagnostics,
718
+ },
719
+ { infraSetupFields: {}, logger, signal: opts.signal },
720
+ )
721
+ })
722
+ }
723
+
724
+ /**
725
+ * Edit-and-push coding, dispatching on job DATA: repo-bootstrap (force-push a fresh history to a
726
+ * separate target repo), conflict-resolution (merge the base in, resolve, push back), multi-repo
727
+ * fan-out (sibling checkouts + one PR per changed repo), else the ordinary single-repo flow.
728
+ * After the flow, a STRUCTURED coding kind (e.g. `repro-test`, whose deliverable is BOTH a pushed
729
+ * commit AND a JSON outcome) parses its final reply into `custom` — best-effort, so an unparseable
730
+ * outcome degrades to no `custom` (the backend resolver then defaults) rather than failing the
731
+ * run, whose real deliverable is the pushed commits.
579
732
  */
580
733
  async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResult> {
581
734
  // Repo bootstrap is a coding run that force-pushes a fresh history to a SEPARATE target
582
735
  // repo (clone + adapt a reference, or scaffold from scratch). Keyed off job DATA
583
- // (`bootstrap`), not the agent kind.
736
+ // (`bootstrap`), not the agent kind. Bootstrap/conflict never carry a structured `output`.
584
737
  if (job.bootstrap) return runBootstrap(job, opts)
585
738
  // Conflict resolution is a coding run with a different pre/post around the agent:
586
739
  // clone full, merge the base in to surface the conflicts, then complete the merge
587
740
  // commit + push (no PR). Keyed off job DATA (`mergeBase`), not the agent kind.
588
741
  if (job.mergeBase) return runConflictResolution(job, opts)
742
+ // Multi-repo coding (service-connections phase 3): clone every connected peer repo as a
743
+ // sibling, run the agent once across all of them, and open one PR per changed repo. Keyed
744
+ // off job DATA (`peerRepos`), not the agent kind — the implementer sets it when the task
745
+ // has involved services in distinct repos.
746
+ const result = job.peerRepos?.length
747
+ ? await runMultiRepoCoding(job, opts)
748
+ : await runSingleRepoCoding(job, opts)
749
+
750
+ // Structured coding kind (repro-test): fold the final reply's JSON onto `custom` so the
751
+ // backend post-completion resolver records the outcome. Skipped on a failed run (its `error`
752
+ // is the signal) and when there is no reply to parse. Best-effort: a null parse leaves
753
+ // `custom` unset (the run still succeeds on its commits).
754
+ if (job.output?.kind === 'structured' && !result.error && result.summary) {
755
+ const { value } = await resolveReplyCustom(job, result.summary, opts.signal)
756
+ if (value !== null && value !== undefined) result.custom = value
757
+ }
758
+ return result
759
+ }
589
760
 
761
+ /**
762
+ * The ordinary single-repo coding flow: clone `branch` (or resume `newBranch`), run the agent,
763
+ * commit + push to `pushBranch`, and open `pr` when one is set and the run produced changes. A
764
+ * no-op is a failure for the implementer (`noChangesIsError` default) and a non-fatal no-op for
765
+ * the in-place fixers (and for a seed-only kind like `repro-test`).
766
+ */
767
+ async function runSingleRepoCoding(job: AgentJob, opts: RunOptions): Promise<AgentResult> {
590
768
  const pushBranch = job.pushBranch ?? job.newBranch ?? job.branch
591
769
  const { summary, stats, stderrTail, pushed, usage, callMetrics } = await runCodingAgent(
592
770
  {