npm - @cat-factory/executor-harness - Versions diffs - 1.34.4 → 1.34.10 - Mend

@cat-factory/executor-harness 1.34.4 → 1.34.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/job.js CHANGED Viewed

@@ -114,6 +114,42 @@ function parseRepoSpec(repo) {
         spec.serviceDirectory = dir;
     return spec;
 }
+/**
+ * Parse the optional multi-repo peer list (service-connections phase 3). Each entry carries a
+ * full {@link RepoSpec} (validated + sanitised like the primary), the work branch to push, and
+ * an optional PR + per-repo token. A malformed list throws; an absent one yields `[]`.
+ */
+function parsePeerRepos(value) {
+    if (value === undefined || value === null)
+        return [];
+    if (!Array.isArray(value))
+        throw new Error("Invalid job: 'peerRepos' must be an array");
+    return value.map((entry, i) => {
+        if (typeof entry !== 'object' || entry === null) {
+            throw new Error(`Invalid job: 'peerRepos[${i}]' must be an object`);
+        }
+        const e = entry;
+        const spec = {
+            repo: parseRepoSpec((e.repo ?? {})),
+        };
+        // `newBranch` is required for a coding fan-out (it pushes to it) but ABSENT for a
+        // read-only explore fan-out (bug-investigator) — validate it only when present.
+        if (e.newBranch !== undefined)
+            spec.newBranch = str(e.newBranch, `peerRepos[${i}].newBranch`);
+        if (typeof e.frameId === 'string' && e.frameId)
+            spec.frameId = e.frameId;
+        if (typeof e.ghToken === 'string' && e.ghToken)
+            spec.ghToken = e.ghToken;
+        if (typeof e.pr === 'object' && e.pr !== null) {
+            const p = e.pr;
+            spec.pr = {
+                title: str(p.title, `peerRepos[${i}].pr.title`),
+                body: typeof p.body === 'string' ? p.body : '',
+            };
+        }
+        return spec;
+    });
+}
 /** Parse the optional `repo.provider` discriminator (defaults to undefined ⇒ host inference). */
 function parseVcsProvider(value) {
     if (value === undefined || value === null)
@@ -442,6 +478,7 @@ export function parseAgentJob(input) {
         })()
         : undefined;
     const infra = parseAgentInfraSpec(o.infra);
+    const peerRepos = parsePeerRepos(o.peerRepos);
     const bootstrap = parseAgentBootstrapSpec(o.bootstrap);
     const contextFiles = parseContextFiles(o.contextFiles);
     const packageRegistries = parsePackageRegistries(o.packageRegistries);
@@ -472,6 +509,7 @@ export function parseAgentJob(input) {
             ? { commitMessage: o.commitMessage }
             : {}),
         ...(pr ? { pr } : {}),
+        ...(peerRepos.length ? { peerRepos } : {}),
         ...(o.noChangesIsError === false ? { noChangesIsError: false } : {}),
         ...(o.persistentCheckout === true ? { persistentCheckout: true } : {}),
         ...(o.streamFollowUps === true ? { streamFollowUps: true } : {}),
@@ -484,5 +522,11 @@ export function parseAgentJob(input) {
     // allowed GitHub host too (the installation token is sent to it on the force-push).
     if (job.bootstrap)
         assertAllowedHost(job.bootstrap.target.cloneUrl, 'bootstrap.target.cloneUrl');
+    // Each peer repo's clone URL receives the installation token on clone/push, so it must be
+    // an allowed GitHub host too — a body-supplied peer pointing at an attacker host would
+    // exfiltrate the token exactly like a rogue primary clone URL.
+    for (const [i, peer] of (job.peerRepos ?? []).entries()) {
+        assertAllowedHost(peer.repo.cloneUrl, `peerRepos[${i}].repo.cloneUrl`);
+    }
     return job;
 }

package/dist/pi-workspace.js CHANGED Viewed

@@ -165,6 +165,7 @@ export async function runAgentInWorkspace(spec, opts = {}) {
         guidance: spec.webToolsGuidance,
         serviceDirectory: spec.serviceDirectory,
         contextFiles,
+        ...(spec.multiRepo ? { multiRepo: true } : {}),
     });
     await writePiModelsConfig({ model: spec.model, proxyBaseUrl });
     const { signal, onActivity, onProgress, onSpan } = opts;

package/dist/pi.js CHANGED Viewed

@@ -131,13 +131,37 @@ export async function writeAgentsContext(systemPrompt, opts = {}) {
     const webTools = opts.webSearch ? (opts.guidance ?? WEB_TOOLS_GUIDANCE) : '';
     // Tell the agent it's in a monorepo and which subtree is its service, so it scopes
     // its work (and its build/test commands) there. Only present when the dispatcher
-    // resolved a monorepo service directory; the agent's cwd already points at it.
-    const monorepo = opts.serviceDirectory ? monorepoGuidance(opts.serviceDirectory) : '';
+    // resolved a monorepo service directory; the agent's cwd already points at it. A
+    // MULTI-REPO run runs at the workspace root (cwd spans sibling checkouts), so the
+    // monorepo note is suppressed there — the multi-repo mechanics note replaces it.
+    const monorepo = opts.serviceDirectory && !opts.multiRepo ? monorepoGuidance(opts.serviceDirectory) : '';
+    // Multi-repo mechanics note (service-connections phase 3): the concrete repo→role mapping
+    // is in the backend-composed system prompt above; this explains the shared MECHANICS (cwd
+    // is the workspace root, repos are sibling checkouts, one PR per dirty repo).
+    const multiRepo = opts.multiRepo ? MULTI_REPO_GUIDANCE : '';
     // Point the agent at any linked context the backend materialised into the checkout
     // (requirements / RFCs / PRDs / tracker issues) so it reads them on demand.
     const context = contextGuidance(opts.contextFiles ?? []);
-    await writeFile(join(dir, 'AGENTS.md'), `${systemPrompt}${BLUEPRINT_GUIDANCE}${SPEC_GUIDANCE}${TODO_GUIDANCE}${monorepo}${webTools}${context}`, 'utf8');
+    await writeFile(join(dir, 'AGENTS.md'), `${systemPrompt}${BLUEPRINT_GUIDANCE}${SPEC_GUIDANCE}${TODO_GUIDANCE}${monorepo}${multiRepo}${webTools}${context}`, 'utf8');
 }
+/** The MULTI-REPO mechanics note appended to AGENTS.md when a run spans sibling checkouts. */
+const MULTI_REPO_GUIDANCE = `
+## Multi-repo workspace (work across sibling checkouts)
+This task spans MORE THAN ONE repository. Your working directory is the WORKSPACE ROOT, and
+each involved repository is checked out as a sibling directory directly under it. The workspace
+root itself is NOT a git repository — run git INSIDE each repository's directory. The system
+prompt above lists which repository is which and each one's role. Make the cross-service
+change coherently across every repository the task requires — a provider's API and its
+consumer's call site belong in the SAME piece of work. Run each repository's own build/test
+commands inside that repository's directory.
+Commit your own work inside each repository you change (\`cd\` into it, stage the files that
+belong — INCLUDING any new files you added — and commit). The platform will NOT add untracked
+files for you, so anything you leave uncommitted and untracked is lost. Each repository you
+change is opened as a SEPARATE pull request; leave a repository untouched if the task does not
+require changing it.`;
 /** Directory in the checkout where linked-context files are materialised (see CONTEXT_DIR in agents). */
 export const CONTEXT_DIR = '.cat-context';
 /** The AGENTS.md block enumerating the materialised linked-context files, or '' when none. */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@cat-factory/executor-harness",
-  "version": "1.34.4",
+  "version": "1.34.10",
   "description": "Container payload: a thin TypeScript wrapper that runs the Pi coding agent against a cloned repo and opens a PR. Runs in the Cloudflare Container (and, in local native mode, as a host process); carries no secrets.",
   "repository": {
     "type": "git",
@@ -26,8 +26,8 @@
     "hono": "^4.12.27",
     "typescript": "^6.0.3",
     "vitest": "^4.1.9",
-    "@cat-factory/server": "0.75.0",
-    "@cat-factory/spend": "0.10.84"
+    "@cat-factory/spend": "0.10.95",
+    "@cat-factory/server": "0.82.0"
   },
   "scripts": {
     "build": "tsc -p tsconfig.json",

package/src/agent.ts CHANGED Viewed

@@ -26,8 +26,13 @@ import {
   reinitAndPush,
   unmergedPaths,
 } from './git.js'
-import type { PiRunStats } from './pi.js'
-import { noChangesReason, runCodingAgent } from './coding-agent.js'
+import type { PiRunStats, RunDiagnostics } from './pi.js'
+import {
+  makeDirClaimer,
+  noChangesReason,
+  runCodingAgent,
+  runMultiRepoCoding,
+} from './coding-agent.js'
 import {
   acquireRepoCheckout,
   agentNeverActed,
@@ -214,6 +219,47 @@ async function tearDownInfra(dir: string, infra: ServiceInfraSpec): Promise<void
   }
 }
+/**
+ * Parse an agent's final reply into the structured JSON `custom`, shared by the explore and
+ * coding structured-output paths. With repair enabled (default) a malformed reply gets ONE
+ * structured repair call before giving up; with `output.repair === false` it parses directly.
+ * Returns the parsed value (or null when unusable) plus the repair diagnostics. Never throws —
+ * a parse failure is a null value, and each caller decides whether that is fatal (explore: yes;
+ * coding: no, the pushed commits are the deliverable).
+ */
+async function resolveReplyCustom(
+  job: AgentJob,
+  summary: string,
+  signal: AbortSignal | undefined,
+): Promise<{ value: unknown; diagnostics?: StructuredOutputDiagnostics }> {
+  if (job.output?.repair === false) {
+    try {
+      return { value: extractJsonObject(summary) }
+    } catch {
+      return { value: null }
+    }
+  }
+  const resolved = await resolveStructuredOutput(
+    {
+      label: 'agent',
+      shapeHint: job.output?.shapeHint ?? 'Expected a single JSON object.',
+      parse: (text) => extractJsonObject(text),
+    },
+    summary,
+    {
+      harness: job.harness,
+      subscriptionToken: job.subscriptionToken,
+      subscriptionBaseUrl: job.subscriptionBaseUrl,
+      proxyBaseUrl: job.proxyBaseUrl,
+      sessionToken: job.sessionToken,
+      model: job.model,
+      jobId: job.jobId,
+      signal,
+    },
+  )
+  return { value: resolved.value, diagnostics: resolved.diagnostics }
+}
 /** Extract the first JSON object from an agent's final message (tolerating fences/prose). */
 function extractJsonObject(text: string): unknown {
   const trimmed = text.trim()
@@ -370,6 +416,14 @@ async function runPreviewMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
  */
 async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentResult> {
   const logger = opts.log ?? log
+  // Multi-repo read-only exploration (service-connections phase 3): when the job carries peer
+  // repos, clone them all as siblings and run at the workspace root. Keyed off job DATA
+  // (`peerRepos`), not the agent kind — the backend sets it for the bug-investigator when the
+  // task has involved services in distinct repos. `runMultiRepoExplore` uses its own ephemeral
+  // `withWorkspace`, so a `persistentCheckout` flag (which a warm-pool dispatch injects on EVERY
+  // job) is harmlessly ignored — it must NOT suppress the fan-out, or a pooled bug-investigator
+  // would silently drop its peer repos and only ever see the primary one.
+  if (job.peerRepos?.length) return runMultiRepoExplore(job, opts)
   return acquireRepoCheckout(
     { persistent: job.persistentCheckout === true, prefix: 'agent-explore', repo: job.repo },
     async (dir) => {
@@ -453,117 +507,11 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
           opts,
         )
-        if (!summary.trim()) {
-          return {
-            summary,
-            stats,
-            error: noOutputReason(stats, stderrTail),
-            failureCause: 'no-usable-output',
-            ...(usage ? { usage } : {}),
-            ...(callMetrics ? { callMetrics } : {}),
-            ...infraSetupFields,
-          }
-        }
-        // Opt-in (document producers): a final answer cut off at the output ceiling — or empty —
-        // must FAIL LOUDLY here, BEFORE the structured repair below could launder a truncated
-        // reply into a half-baked doc the backend then shards/commits + hands onward. Mirrors the
-        // bespoke `/spec` handler's `unusableFinalAnswerCause` gate (which drove the old loop).
-        if (job.output?.kind === 'structured' && job.output.failOnUnusableFinal) {
-          const unusable = unusableFinalAnswerCause(runDiag)
-          if (unusable) {
-            return {
-              summary,
-              stats,
-              error: `the agent did not return a usable result: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
-              failureCause: 'no-usable-output',
-              ...(usage ? { usage } : {}),
-              ...(callMetrics ? { callMetrics } : {}),
-              ...infraSetupFields,
-            }
-          }
-        }
-        // Prose: the summary IS the deliverable.
-        if (job.output?.kind !== 'structured') {
-          logger.info('agent(explore): done (prose)', { ...stats })
-          return {
-            summary,
-            stats,
-            ...(usage ? { usage } : {}),
-            ...(callMetrics ? { callMetrics } : {}),
-            ...infraSetupFields,
-          }
-        }
-        // Structured: parse the agent's JSON. With repair enabled (default) a malformed
-        // reply gets ONE structured repair call before giving up; with `repair:false` we
-        // parse directly (no repair channel). The backend coerces/validates + renders from
-        // the returned object in a post-op.
-        let custom: unknown = null
-        let diagnostics: StructuredOutputDiagnostics | undefined
-        if (job.output.repair === false) {
-          try {
-            custom = extractJsonObject(summary)
-          } catch {
-            custom = null
-          }
-        } else {
-          const resolved = await resolveStructuredOutput(
-            {
-              label: 'agent',
-              shapeHint: job.output.shapeHint ?? 'Expected a single JSON object.',
-              parse: (text) => extractJsonObject(text),
-            },
-            summary,
-            {
-              harness: job.harness,
-              subscriptionToken: job.subscriptionToken,
-              subscriptionBaseUrl: job.subscriptionBaseUrl,
-              proxyBaseUrl: job.proxyBaseUrl,
-              sessionToken: job.sessionToken,
-              model: job.model,
-              jobId: job.jobId,
-              signal: opts.signal,
-            },
-          )
-          custom = resolved.value
-          diagnostics = resolved.diagnostics
-        }
-        if (custom === undefined || custom === null) {
-          return {
-            summary,
-            stats,
-            error: noStructuredReason(stats, stderrTail, diagnostics),
-            failureCause: 'no-usable-output',
-            ...(usage ? { usage } : {}),
-            ...(callMetrics ? { callMetrics } : {}),
-            ...infraSetupFields,
-          }
-        }
-        // Stamp the run's actual environment authoritatively onto the structured result when
-        // infra was managed (the tester): which env the suite ran in is decided by the job's
-        // infra spec, NOT the model, so the backend can echo it back to the UI deterministically
-        // even when the model omits it from its JSON (or a structured repair drops it). A
-        // frontend run tests the app against its live ephemeral backend(s), so it reports
-        // `ephemeral` (the TestReport env vocabulary has no separate frontend value).
-        const reportedEnvironment = infra
-          ? infra.kind === 'frontend'
-            ? 'ephemeral'
-            : infra.environment
-          : undefined
-        if (reportedEnvironment && typeof custom === 'object') {
-          ;(custom as Record<string, unknown>).environment = reportedEnvironment
-        }
-        logger.info('agent(explore): done (structured)', { ...stats })
-        return {
-          summary,
-          custom,
-          stats,
-          ...(usage ? { usage } : {}),
-          ...(callMetrics ? { callMetrics } : {}),
-          ...infraSetupFields,
-        }
+        return await finalizeExploreResult(
+          job,
+          { summary, stats, stderrTail, usage, callMetrics, runDiag },
+          { infra, infraSetupFields, logger, signal: opts.signal },
+        )
       } finally {
         if (managed) await managed.cleanup()
       }
@@ -571,22 +519,252 @@ async function runExploreMode(job: AgentJob, opts: RunOptions): Promise<AgentRes
   )
 }
+/** The agent-run outputs the explore result-parsing reads (shared single-/multi-repo). */
+interface ExploreAgentRun {
+  summary: string
+  stats: PiRunStats
+  stderrTail?: string
+  usage?: AgentResult['usage']
+  callMetrics?: AgentResult['callMetrics']
+  runDiag?: RunDiagnostics
+}
 /**
- * Edit-and-push coding: clone `branch` (or resume `newBranch`), run the agent, commit +
- * push to `pushBranch`, and open `pr` when one is set and the run produced changes. A
- * no-op is a failure for the implementer (`noChangesIsError` default) and a non-fatal
- * no-op for the in-place fixers.
+ * Turn an explore agent's raw run into an {@link AgentResult}: guard an empty/truncated reply,
+ * then either return the prose summary or parse (+ optionally repair) the structured JSON as
+ * `custom` — the backend renders any artifact files from it in a post-op. Extracted so the
+ * single-repo {@link runExploreMode} and the read-only {@link runMultiRepoExplore} share ONE
+ * result contract (the multi-repo path passes no infra, so the tester-only env stamping no-ops).
+ */
+async function finalizeExploreResult(
+  job: AgentJob,
+  run: ExploreAgentRun,
+  ctx: {
+    infra?: AgentInfraSpec | ServiceInfraSpec
+    infraSetupFields: { infraSetup?: InfraSetupRecord }
+    logger: Logger
+    signal?: AbortSignal
+  },
+): Promise<AgentResult> {
+  const { summary, stats, stderrTail, usage, callMetrics, runDiag } = run
+  const { infra, infraSetupFields, logger, signal } = ctx
+  if (!summary.trim()) {
+    return {
+      summary,
+      stats,
+      error: noOutputReason(stats, stderrTail),
+      failureCause: 'no-usable-output',
+      ...(usage ? { usage } : {}),
+      ...(callMetrics ? { callMetrics } : {}),
+      ...infraSetupFields,
+    }
+  }
+  // Opt-in (document producers): a final answer cut off at the output ceiling — or empty —
+  // must FAIL LOUDLY here, BEFORE the structured repair below could launder a truncated
+  // reply into a half-baked doc the backend then shards/commits + hands onward. Mirrors the
+  // bespoke `/spec` handler's `unusableFinalAnswerCause` gate (which drove the old loop).
+  if (job.output?.kind === 'structured' && job.output.failOnUnusableFinal) {
+    const unusable = unusableFinalAnswerCause(runDiag)
+    if (unusable) {
+      return {
+        summary,
+        stats,
+        error: `the agent did not return a usable result: ${unusable}.${agentOutputTail(stderrTail, summary)}`,
+        failureCause: 'no-usable-output',
+        ...(usage ? { usage } : {}),
+        ...(callMetrics ? { callMetrics } : {}),
+        ...infraSetupFields,
+      }
+    }
+  }
+  // Prose: the summary IS the deliverable.
+  if (job.output?.kind !== 'structured') {
+    logger.info('agent(explore): done (prose)', { ...stats })
+    return {
+      summary,
+      stats,
+      ...(usage ? { usage } : {}),
+      ...(callMetrics ? { callMetrics } : {}),
+      ...infraSetupFields,
+    }
+  }
+  // Structured: parse the agent's JSON via the shared resolver. With repair enabled (default)
+  // a malformed reply gets ONE structured repair call before giving up; with `repair:false` it
+  // parses directly (no repair channel). The backend coerces/validates + renders from the
+  // returned object in a post-op. Unlike the coding path, an unparseable explore reply IS a
+  // failure — the report/JSON is the whole deliverable.
+  const { value: custom, diagnostics } = await resolveReplyCustom(job, summary, signal)
+  if (custom === undefined || custom === null) {
+    return {
+      summary,
+      stats,
+      error: noStructuredReason(stats, stderrTail, diagnostics),
+      failureCause: 'no-usable-output',
+      ...(usage ? { usage } : {}),
+      ...(callMetrics ? { callMetrics } : {}),
+      ...infraSetupFields,
+    }
+  }
+  // Stamp the run's actual environment authoritatively onto the structured result when
+  // infra was managed (the tester): which env the suite ran in is decided by the job's
+  // infra spec, NOT the model, so the backend can echo it back to the UI deterministically
+  // even when the model omits it from its JSON (or a structured repair drops it). A
+  // frontend run tests the app against its live ephemeral backend(s), so it reports
+  // `ephemeral` (the TestReport env vocabulary has no separate frontend value).
+  const reportedEnvironment = infra
+    ? infra.kind === 'frontend'
+      ? 'ephemeral'
+      : infra.environment
+    : undefined
+  if (reportedEnvironment && typeof custom === 'object') {
+    ;(custom as Record<string, unknown>).environment = reportedEnvironment
+  }
+  logger.info('agent(explore): done (structured)', { ...stats })
+  return {
+    summary,
+    custom,
+    stats,
+    ...(usage ? { usage } : {}),
+    ...(callMetrics ? { callMetrics } : {}),
+    ...infraSetupFields,
+  }
+}
+/**
+ * Read-only MULTI-REPO exploration (service-connections phase 3, read-only): clone the primary
+ * repo PLUS every connected peer repo as SIBLING checkouts under one workspace root, run the
+ * agent ONCE with its cwd at the root (so it can read across every repo the bug touches), and
+ * return its prose/structured result — making NO edits, NO commits and opening NO PR. The
+ * counterpart of {@link runMultiRepoCoding} for the `bug-investigator`, but strictly read-only:
+ * peers carry no `newBranch`/`pr`, nothing is pushed, and the peers exist only to be read. The
+ * multi-repo layout is explained to the agent by the backend-composed system-prompt section
+ * (which repo/subdir each service lives in) + the harness's own AGENTS.md multi-repo note.
+ */
+async function runMultiRepoExplore(job: AgentJob, opts: RunOptions): Promise<AgentResult> {
+  const logger = (opts.log ?? log).child({ kind: 'multi-repo-explore', jobId: job.jobId })
+  const peers = job.peerRepos ?? []
+  // Unique sibling directory per repo (owner-prefixed on a name collision), so two repos
+  // named the same never clobber each other — shared claim scheme with the coding fan-out.
+  const claimDir = makeDirClaimer()
+  const legs = [
+    { repo: job.repo, cloneBranch: job.branch, ghToken: job.ghToken },
+    ...peers.map((peer) => ({
+      repo: peer.repo,
+      cloneBranch: peer.repo.baseBranch,
+      ghToken: peer.ghToken ?? job.ghToken,
+    })),
+  ].map((leg) => ({ ...leg, dirName: claimDir(leg.repo) }))
+  return withWorkspace('explore-multi', async (root) => {
+    // Clone phase: every repo (read-only) into its sibling dir under the workspace root. No
+    // work branch, no resume — the investigator only reads — so the legs are independent and
+    // clone in parallel (wall-clock is the slowest single clone, not the sum).
+    opts.onPhase?.('clone')
+    await Promise.all(
+      legs.map(async (leg) => {
+        const dir = join(root, leg.dirName)
+        await mkdir(dir, { recursive: true })
+        logger.info('multi-repo-explore: cloning', {
+          repo: leg.dirName,
+          cloneBranch: leg.cloneBranch,
+        })
+        await cloneRepo({
+          repo: { ...leg.repo, baseBranch: leg.cloneBranch },
+          ghToken: leg.ghToken,
+          dir,
+          signal: opts.signal,
+        })
+      }),
+    )
+    opts.onPhase?.('agent')
+    logger.info('multi-repo-explore: running agent', { repos: legs.map((l) => l.dirName) })
+    const run = await runAgentInWorkspace(
+      {
+        dir: root,
+        systemPrompt: job.systemPrompt,
+        userPrompt: job.userPrompt,
+        model: job.model,
+        harness: job.harness,
+        subscriptionToken: job.subscriptionToken,
+        subscriptionBaseUrl: job.subscriptionBaseUrl,
+        ambientAuth: job.ambientAuth,
+        proxyBaseUrl: job.proxyBaseUrl,
+        sessionToken: job.sessionToken,
+        // Read-only: no edits expected, so the no-progress guard's no-edit bound must not fire.
+        expectsEdits: false,
+        webToolsGuidance: job.webToolsGuidance,
+        webSearchProxy: job.webSearch,
+        ...(job.contextFiles ? { contextFiles: job.contextFiles } : {}),
+        guardLimits: job.guardLimits,
+        multiRepo: true,
+      },
+      opts,
+    )
+    return finalizeExploreResult(
+      job,
+      {
+        summary: run.summary,
+        stats: run.stats,
+        stderrTail: run.stderrTail,
+        usage: run.usage,
+        callMetrics: run.callMetrics,
+        runDiag: run.diagnostics,
+      },
+      { infraSetupFields: {}, logger, signal: opts.signal },
+    )
+  })
+}
+/**
+ * Edit-and-push coding, dispatching on job DATA: repo-bootstrap (force-push a fresh history to a
+ * separate target repo), conflict-resolution (merge the base in, resolve, push back), multi-repo
+ * fan-out (sibling checkouts + one PR per changed repo), else the ordinary single-repo flow.
+ * After the flow, a STRUCTURED coding kind (e.g. `repro-test`, whose deliverable is BOTH a pushed
+ * commit AND a JSON outcome) parses its final reply into `custom` — best-effort, so an unparseable
+ * outcome degrades to no `custom` (the backend resolver then defaults) rather than failing the
+ * run, whose real deliverable is the pushed commits.
  */
 async function runCodingMode(job: AgentJob, opts: RunOptions): Promise<AgentResult> {
   // Repo bootstrap is a coding run that force-pushes a fresh history to a SEPARATE target
   // repo (clone + adapt a reference, or scaffold from scratch). Keyed off job DATA
-  // (`bootstrap`), not the agent kind.
+  // (`bootstrap`), not the agent kind. Bootstrap/conflict never carry a structured `output`.
   if (job.bootstrap) return runBootstrap(job, opts)
   // Conflict resolution is a coding run with a different pre/post around the agent:
   // clone full, merge the base in to surface the conflicts, then complete the merge
   // commit + push (no PR). Keyed off job DATA (`mergeBase`), not the agent kind.
   if (job.mergeBase) return runConflictResolution(job, opts)
+  // Multi-repo coding (service-connections phase 3): clone every connected peer repo as a
+  // sibling, run the agent once across all of them, and open one PR per changed repo. Keyed
+  // off job DATA (`peerRepos`), not the agent kind — the implementer sets it when the task
+  // has involved services in distinct repos.
+  const result = job.peerRepos?.length
+    ? await runMultiRepoCoding(job, opts)
+    : await runSingleRepoCoding(job, opts)
+  // Structured coding kind (repro-test): fold the final reply's JSON onto `custom` so the
+  // backend post-completion resolver records the outcome. Skipped on a failed run (its `error`
+  // is the signal) and when there is no reply to parse. Best-effort: a null parse leaves
+  // `custom` unset (the run still succeeds on its commits).
+  if (job.output?.kind === 'structured' && !result.error && result.summary) {
+    const { value } = await resolveReplyCustom(job, result.summary, opts.signal)
+    if (value !== null && value !== undefined) result.custom = value
+  }
+  return result
+}
+/**
+ * The ordinary single-repo coding flow: clone `branch` (or resume `newBranch`), run the agent,
+ * commit + push to `pushBranch`, and open `pr` when one is set and the run produced changes. A
+ * no-op is a failure for the implementer (`noChangesIsError` default) and a non-fatal no-op for
+ * the in-place fixers (and for a seed-only kind like `repro-test`).
+ */
+async function runSingleRepoCoding(job: AgentJob, opts: RunOptions): Promise<AgentResult> {
   const pushBranch = job.pushBranch ?? job.newBranch ?? job.branch
   const { summary, stats, stderrTail, pushed, usage, callMetrics } = await runCodingAgent(
     {