@oisincoveney/pipeline 2.4.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -70,6 +70,18 @@ Whichever host you are on, run the same six steps:
70
70
  5. **Learn** — Once the gates pass, run `MoKa Learner` to store durable lessons from the run (qdrant memory) when there is something worth reusing. This mirrors the canonical pipeline's LEARN phase; skip it only when the run produced nothing reusable.
71
71
  6. **Synthesize** — Report only the evidence the agents actually returned: what passed, what the diff is, what the reviewers proved. Never fabricate or assume an outcome an agent did not report.
72
72
 
73
+ ## Task sizing, reliability & token budget
74
+
75
+ Token usage is the dominant cost and quality lever — it explains the bulk of agent performance variance, and context degrades well before a model's window fills. But the first job of sizing is **reliable completion**: a lane an agent can't finish is worthless however cheap. Size the work accordingly:
76
+
77
+ - **Size for reliable completion first.** Each lane must be small enough that a single agent session finishes it cleanly. If an agent times out, stalls, or returns having only *planned* without producing its artifact, the lane was **too big** — split it into smaller lanes (one file, section, or concern each); do **not** just raise the timeout, that re-runs the same flake. **Slow is fine; flaky is not** — many small lanes that each reliably complete beat one big lane that gambles. Lanes that share a file run sequentially; only truly independent lanes fan out. Treat repeated stalls as a decomposition bug, not bad luck.
78
+ - **Under-timeouts and permission walls are the real flake sources — not a step cap.** Per opencode's docs, an agent with no `steps`/`maxSteps` set "will continue to iterate until the model chooses to stop or the user interrupts the session" — i.e. **no hard step budget by default** (the MoKa agents set none). So a Code Writer that returns having only *planned* was not hitting a step limit; it was killed by too short a dispatch timeout or blocked on a denied read (e.g. `external_directory: deny`). Fixes: give long multi-file authoring runs **generous wall-clock** (do not kill them early — they are slow, not capped); scope lanes so they don't need denied/external reads; and only if you must *bound* a runaway agent, set `steps` in its config. Smaller lanes still help (less work = faster, fewer surprises), but "multi-file authoring can't be delegated" is a timeout/scoping issue, not an opencode limit.
79
+ - **Scale fan-out to complexity, not ambition.** A trivial change is one agent (or just do it inline); a bounded change is 1–3 lanes; only go wide for genuinely independent breadth. Code parallelizes poorly — keep writer lanes narrow (the pipeline caps `green`/code fan-out at 2 for exactly this reason).
80
+ - **Keep each agent's context small and high-signal.** Pass context by path and hand over the distilled `research.json`, never raw repo dumps. A lane that needs half the repo in its context is mis-scoped — split it.
81
+ - **Distilled returns.** Expect each sub-agent to return a ~1–2k-token summary of its result, not its full transcript. Gather the summary; don't re-read the work.
82
+ - **Re-dispatch once, with evidence.** On a gate `FAIL`, re-dispatch the failing lane a *single* time with concentrated failure evidence — do not thrash. Each fresh `opencode run` re-pays the full cold-start context tax (~35k tokens of standup before any work), so a retry loop is expensive; fix the input, not the dice.
83
+ - **Smallest roster that covers the work.** Every extra lane is another cold standup. Default to the fewest specialists that close the task; add a lane only when it genuinely runs independently.
84
+
73
85
  ## Rules
74
86
 
75
87
  - **Doctrine is host-neutral; only the Dispatch section is host-specific.** Do not leak `opencode run` syntax into an OpenCode run or Task-tool talk into a Claude run.
@@ -344,8 +344,8 @@ declare const configSchema: z.ZodObject<{
344
344
  rules: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodObject<{
345
345
  path: z.ZodString;
346
346
  source_root: z.ZodDefault<z.ZodEnum<{
347
- project: "project";
348
347
  package: "package";
348
+ project: "project";
349
349
  }>>;
350
350
  }, z.core.$strict>>>;
351
351
  runners: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodObject<{
@@ -472,8 +472,8 @@ declare const configSchema: z.ZodObject<{
472
472
  schedules: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodObject<{
473
473
  description: z.ZodOptional<z.ZodString>;
474
474
  baseline: z.ZodEnum<{
475
- quick: "quick";
476
475
  execute: "execute";
476
+ quick: "quick";
477
477
  }>;
478
478
  max_parallel_nodes: z.ZodOptional<z.ZodNumber>;
479
479
  node_catalog: z.ZodOptional<z.ZodString>;
@@ -485,13 +485,26 @@ declare const configSchema: z.ZodObject<{
485
485
  skills: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodObject<{
486
486
  path: z.ZodString;
487
487
  source_root: z.ZodDefault<z.ZodEnum<{
488
- project: "project";
489
488
  package: "package";
489
+ project: "project";
490
490
  }>>;
491
491
  }, z.core.$strict>>>;
492
492
  task_context: z.ZodOptional<z.ZodObject<{
493
493
  type: z.ZodString;
494
494
  }, z.core.$loose>>;
495
+ best_of_n: z.ZodOptional<z.ZodObject<{
496
+ categories: z.ZodDefault<z.ZodArray<z.ZodString>>;
497
+ enabled: z.ZodDefault<z.ZodBoolean>;
498
+ judge_model: z.ZodOptional<z.ZodString>;
499
+ n: z.ZodDefault<z.ZodNumber>;
500
+ }, z.core.$strict>>;
501
+ context_handoff: z.ZodOptional<z.ZodObject<{
502
+ enabled: z.ZodDefault<z.ZodBoolean>;
503
+ model: z.ZodOptional<z.ZodString>;
504
+ }, z.core.$strict>>;
505
+ parallel_worktrees: z.ZodOptional<z.ZodObject<{
506
+ enabled: z.ZodDefault<z.ZodBoolean>;
507
+ }, z.core.$strict>>;
495
508
  token_budget: z.ZodDefault<z.ZodObject<{
496
509
  default_context_window: z.ZodDefault<z.ZodNumber>;
497
510
  max_context_pct: z.ZodDefault<z.ZodNumber>;
@@ -461,6 +461,17 @@ const DEFAULT_TOKEN_BUDGET = {
461
461
  by_category: {}
462
462
  }
463
463
  };
464
+ const contextHandoffSchema = z.object({
465
+ enabled: z.boolean().default(false),
466
+ model: z.string().optional()
467
+ }).strict();
468
+ const parallelWorktreesSchema = z.object({ enabled: z.boolean().default(false) }).strict();
469
+ const bestOfNSchema = z.object({
470
+ categories: z.array(z.string()).default(["green"]),
471
+ enabled: z.boolean().default(false),
472
+ judge_model: z.string().optional(),
473
+ n: z.number().int().positive().default(1)
474
+ }).strict();
464
475
  const pipelineFileSchema = z.object({
465
476
  default_workflow: z.string(),
466
477
  entrypoints: strictRecord(entrypointSchema).default({}),
@@ -482,6 +493,9 @@ const pipelineFileSchema = z.object({
482
493
  }),
483
494
  schedules: strictRecord(schedulePolicySchema).default({}),
484
495
  task_context: taskContextResolverSchema.optional(),
496
+ best_of_n: bestOfNSchema.optional(),
497
+ context_handoff: contextHandoffSchema.optional(),
498
+ parallel_worktrees: parallelWorktreesSchema.optional(),
485
499
  token_budget: tokenBudgetSchema.default(DEFAULT_TOKEN_BUDGET),
486
500
  workflows: strictRecord(workflowSchema).default({}),
487
501
  version: z.literal(1)
@@ -513,6 +527,9 @@ const configSchema = z.object({
513
527
  schedules: strictRecord(schedulePolicySchema).default({}),
514
528
  skills: strictRecord(pathRefSchema).default({}),
515
529
  task_context: taskContextResolverSchema.optional(),
530
+ best_of_n: bestOfNSchema.optional(),
531
+ context_handoff: contextHandoffSchema.optional(),
532
+ parallel_worktrees: parallelWorktreesSchema.optional(),
516
533
  token_budget: tokenBudgetSchema.default(DEFAULT_TOKEN_BUDGET),
517
534
  version: z.literal(1),
518
535
  workflows: strictRecord(workflowSchema).default({})
@@ -160,8 +160,8 @@ declare const mokaSubmitOptionsSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
160
160
  }, z.core.$strict>>;
161
161
  serviceAccountName: z.ZodOptional<z.ZodString>;
162
162
  mode: z.ZodEnum<{
163
- full: "full";
164
163
  quick: "quick";
164
+ full: "full";
165
165
  }>;
166
166
  schedulePath: z.ZodOptional<z.ZodString>;
167
167
  scheduleYaml: z.ZodOptional<z.ZodString>;
@@ -628,6 +628,7 @@ async function executeNodeAttemptCycle(node, context, attempt, previous) {
628
628
  const beforeSnapshot = context.nodeStateStore.getSnapshot(node.id);
629
629
  if (beforeSnapshot) context.nodeStateStore.setSnapshot(node.id, diffChangedFiles(beforeSnapshot, afterSnapshot, context.worktreePath));
630
630
  context.nodeStateStore.recordOutput(node.id, last.output);
631
+ context.nodeStateStore.recordHandoff(node.id, last.handoff);
631
632
  emitNodeOutputRecorded(context, node, attempt, last.output);
632
633
  recordNodeEvent(context, node.id, {
633
634
  at: now(),
@@ -5,6 +5,7 @@ import { createRunnerLaunchPlan, runLaunchPlan } from "../runner.js";
5
5
  import { normalizeRunnerOutput } from "../runner-output.js";
6
6
  import { loadBacklogPlanningContext } from "../schedule/backlog-context.js";
7
7
  import { baselineScheduleArtifact } from "../schedule/baseline.js";
8
+ import { expandBestOfNCandidates } from "../schedule/passes/candidates.js";
8
9
  import { dependentsByNeed, flattenNodes, hasReachableDependent } from "./graph.js";
9
10
  import { isCoverageNode, isImplementationNode } from "../schedule/scheduling-roles.js";
10
11
  import { addGeneratedImplementationCoverage } from "../schedule/passes/coverage.js";
@@ -91,7 +92,7 @@ async function generateScheduleArtifact(options) {
91
92
  const planningContext = { ...loadBacklogPlanningContext(options.task, options.worktreePath) };
92
93
  const generatedArtifact = await planScheduleArtifact(baseline, policy.planner_profile, options, planningContext);
93
94
  assertSchedulePassOrder();
94
- const artifact = hydrateScheduleTaskContexts(canonicalizeGeneratedScheduleIds(applyNodeCatalogModelFallbacks(options.config, policy.node_catalog, addGeneratedImplementationCoverage(options.config, generatedArtifact))), planningContext);
95
+ const artifact = hydrateScheduleTaskContexts(canonicalizeGeneratedScheduleIds(applyNodeCatalogModelFallbacks(options.config, policy.node_catalog, expandBestOfNCandidates(options.config, addGeneratedImplementationCoverage(options.config, generatedArtifact)))), planningContext);
95
96
  validateScheduleArtifact(options.config, artifact, planningContext);
96
97
  compileScheduleArtifact(options.config, artifact, options.worktreePath);
97
98
  return {
@@ -102,6 +103,7 @@ async function generateScheduleArtifact(options) {
102
103
  function assertSchedulePassOrder() {
103
104
  if (SCHEDULE_PASS_ORDER.join("\0") !== [
104
105
  "coverage",
106
+ "candidates",
105
107
  "models",
106
108
  "ids",
107
109
  "references"
@@ -43,8 +43,8 @@ declare const runnerDeliverySchema: z.ZodObject<{
43
43
  declare const mokaSubmissionSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
44
44
  kind: z.ZodLiteral<"graph">;
45
45
  mode: z.ZodEnum<{
46
- full: "full";
47
46
  quick: "quick";
47
+ full: "full";
48
48
  }>;
49
49
  }, z.core.$strict>, z.ZodObject<{
50
50
  argv: z.ZodArray<z.ZodString>;
@@ -104,8 +104,8 @@ declare const runnerCommandPayloadSchema: z.ZodObject<{
104
104
  submission: z.ZodDefault<z.ZodDiscriminatedUnion<[z.ZodObject<{
105
105
  kind: z.ZodLiteral<"graph">;
106
106
  mode: z.ZodEnum<{
107
- full: "full";
108
107
  quick: "quick";
108
+ full: "full";
109
109
  }>;
110
110
  }, z.core.$strict>, z.ZodObject<{
111
111
  argv: z.ZodArray<z.ZodString>;
@@ -9,6 +9,7 @@ import "../events/index.js";
9
9
  import { gatewayServerForProfile } from "../../mcp/gateway.js";
10
10
  import { selectNodeModel } from "../../model-resolver.js";
11
11
  import { estimateTokens } from "../../token-estimator.js";
12
+ import { handoffFinalizerPrompt, parseHandoff, renderHandoff, synthesizeMinimalHandoff } from "../handoff.js";
12
13
  import { readFileSync } from "node:fs";
13
14
  //#region src/runtime/agent-node/agent-node.ts
14
15
  async function executeAgentNode(node, context, attempt) {
@@ -63,7 +64,8 @@ async function executeAgentNode(node, context, attempt) {
63
64
  result,
64
65
  attempt
65
66
  });
66
- return {
67
+ const handoff = await maybeDeriveHandoff(context, node, finalized.output, attempt);
68
+ return withOptionalHandoff({
67
69
  evidence: [
68
70
  `agent boundary node=${node.id} profile=${node.profile} runner=${plan.runnerId}`,
69
71
  `estimated context tokens: ${decision.estimatedTokens}`,
@@ -76,7 +78,62 @@ async function executeAgentNode(node, context, attempt) {
76
78
  exitCode: result.exitCode,
77
79
  output: finalized.output,
78
80
  timedOut: result.timedOut
81
+ }, handoff);
82
+ }
83
+ function withOptionalHandoff(result, handoff) {
84
+ return handoff ? {
85
+ ...result,
86
+ handoff
87
+ } : result;
88
+ }
89
+ function profileRunner(context, node) {
90
+ return node.profile ? context.config.profiles[node.profile]?.runner : void 0;
91
+ }
92
+ /**
93
+ * PIPE-83.1: derive a structured NodeHandoff for this node when context_handoff
94
+ * is enabled. Fast-path reuses an already-handoff-shaped output; otherwise a
95
+ * cheap read-only finalizer (mirroring createOutputRepairPlan) summarizes the
96
+ * raw output, falling back to a synthesized minimal handoff. Returns undefined
97
+ * when disabled so behaviour is unchanged.
98
+ */
99
+ async function maybeDeriveHandoff(context, node, rawOutput, attempt) {
100
+ if (!context.config.context_handoff?.enabled) return;
101
+ return parseHandoff(rawOutput) ?? await runHandoffFinalizer(context, node, rawOutput, attempt);
102
+ }
103
+ async function runHandoffFinalizer(context, node, rawOutput, attempt) {
104
+ const runner = profileRunner(context, node);
105
+ if (!(runner && rawOutput.trim())) return synthesizeMinimalHandoff(rawOutput);
106
+ const plan = createHandoffFinalizerPlan(context, node, runner, rawOutput);
107
+ context.agentInvocations.push(plan);
108
+ emitAgentStart(context, plan, attempt);
109
+ const result = await context.executor(plan, { signal: context.signal });
110
+ emitAgentFinish(context, plan, attempt, result);
111
+ return parseHandoff(normalizeAgentOutput(plan, result.stdout).output) ?? synthesizeMinimalHandoff(rawOutput);
112
+ }
113
+ function createHandoffFinalizerPlan(context, node, runner, rawOutput) {
114
+ const finalizerProfileId = `${node.id}:handoff`;
115
+ const finalizerConfig = {
116
+ ...context.config,
117
+ profiles: {
118
+ ...context.config.profiles,
119
+ [finalizerProfileId]: {
120
+ filesystem: { mode: "read-only" },
121
+ instructions: { inline: "Summarize the agent output into a NodeHandoff JSON." },
122
+ network: { mode: "disabled" },
123
+ output: { format: "text" },
124
+ runner,
125
+ tools: []
126
+ }
127
+ }
79
128
  };
129
+ const model = context.config.context_handoff?.model;
130
+ return createRunnerLaunchPlan(finalizerConfig, {
131
+ nodeId: finalizerProfileId,
132
+ profileId: finalizerProfileId,
133
+ prompt: handoffFinalizerPrompt(rawOutput),
134
+ worktreePath: context.worktreePath,
135
+ ...model ? { model } : {}
136
+ });
80
137
  }
81
138
  /**
82
139
  * Pure model-routing decision for a node: estimate the assembled prompt size and
@@ -274,9 +331,18 @@ function renderAgentPrompt(node, context) {
274
331
  "",
275
332
  ...inheritedOutputSections(node, context),
276
333
  "Dependency outputs:",
277
- ...node.needs.map((need) => `## ${need}\n${context.nodeStateStore.outputText(need)}`)
334
+ ...node.needs.map((need) => renderDependencySection(need, context))
278
335
  ].filter(Boolean).join("\n");
279
336
  }
337
+ /**
338
+ * PIPE-83.5: render a dependency's curated NodeHandoff when one was derived
339
+ * (PIPE-83.1), otherwise fall back to its raw output text. The fallback keeps
340
+ * behaviour identical when context_handoff is disabled (no handoffs recorded).
341
+ */
342
+ function renderDependencySection(nodeId, context) {
343
+ const handoff = context.nodeStateStore.handoff(nodeId);
344
+ return handoff ? renderHandoff(nodeId, handoff) : `## ${nodeId}\n${context.nodeStateStore.outputText(nodeId)}`;
345
+ }
280
346
  function renderGateOutputContract(node) {
281
347
  const gates = node.gates ?? [];
282
348
  const hasAcceptanceGate = gates.some((gate) => gate.kind === "acceptance" && (gate.target === void 0 || gate.target === "stdout"));
@@ -319,7 +385,7 @@ function inheritedOutputSections(node, context) {
319
385
  if (inherited.length === 0) return [];
320
386
  return [
321
387
  "Inherited dependency outputs:",
322
- ...inherited.map((id) => `## ${id}\n${context.nodeStateStore.outputText(id)}`),
388
+ ...inherited.map((id) => renderDependencySection(id, context)),
323
389
  ""
324
390
  ];
325
391
  }
@@ -1,10 +1,12 @@
1
1
  import { runFallow, runJscpd, runLint, runSemgrep, runTests, runTypecheck } from "../../gates.js";
2
2
  import { executeDrainMergeBuiltin } from "../drain-merge/drain-merge.js";
3
3
  import "../drain-merge/index.js";
4
+ import { executeSelectCandidateBuiltin } from "../select-candidate/select-candidate.js";
4
5
  //#region src/runtime/builtins/builtins.ts
5
6
  async function executeBuiltin(builtin, context, node) {
6
7
  switch (builtin) {
7
8
  case "drain-merge": return executeDrainMergeBuiltin(context, node);
9
+ case "select-candidate": return executeSelectCandidateBuiltin(context, node);
8
10
  case "test": {
9
11
  const result = await runTests(context.worktreePath, context.signal);
10
12
  return {
@@ -0,0 +1 @@
1
+ import { z } from "zod";
@@ -0,0 +1,91 @@
1
+ import { z } from "zod";
2
+ //#region src/runtime/handoff.ts
3
+ /**
4
+ * NodeHandoff (PIPE-83.1) — the curated, typed envelope a node hands to its
5
+ * dependents in place of its raw transcript. PIPE-83.5 makes renderAgentPrompt
6
+ * consume these instead of re-hydrating every upstream node's full output text;
7
+ * PIPE-83.10 persists them durably as the unit of cross-node state.
8
+ *
9
+ * Produced by DERIVING from a node's raw output via a cheap finalizer (see
10
+ * agent-node), with a synthesized minimal fallback when no structured handoff
11
+ * is available so existing consumers keep working unchanged.
12
+ */
13
+ const MARKDOWN_JSON_FENCE_RE = /^\s*```(?:json)?\s*\r?\n([\s\S]*?)\r?\n```\s*$/i;
14
+ const SUMMARY_FALLBACK_MAX_CHARS = 600;
15
+ const handoffArtifactSchema = z.object({
16
+ lineRange: z.tuple([z.number().int().nonnegative(), z.number().int().nonnegative()]).optional(),
17
+ path: z.string().min(1)
18
+ });
19
+ const nodeHandoffSchema = z.object({
20
+ artifacts: z.array(handoffArtifactSchema).default([]),
21
+ decisions: z.array(z.string()).default([]),
22
+ openQuestions: z.array(z.string()).default([]),
23
+ summary: z.string(),
24
+ testNames: z.array(z.string()).default([])
25
+ });
26
+ /**
27
+ * Parse a candidate handoff JSON string (tolerant of a Markdown ```json fence).
28
+ * Returns null when the text is not JSON or does not satisfy the schema, so the
29
+ * caller can fall back rather than throw.
30
+ */
31
+ function parseHandoff(raw) {
32
+ const source = MARKDOWN_JSON_FENCE_RE.exec(raw.trim())?.[1].trim() ?? raw.trim();
33
+ let value;
34
+ try {
35
+ value = JSON.parse(source);
36
+ } catch {
37
+ return null;
38
+ }
39
+ const result = nodeHandoffSchema.safeParse(value);
40
+ return result.success ? result.data : null;
41
+ }
42
+ /**
43
+ * Minimal handoff synthesized from a node's raw output text. Used when no
44
+ * structured handoff is derived, preserving the pre-PIPE-83 behaviour (the
45
+ * summary stands in for the raw text downstream consumers used to receive).
46
+ */
47
+ function synthesizeMinimalHandoff(outputText) {
48
+ return {
49
+ artifacts: [],
50
+ decisions: [],
51
+ openQuestions: [],
52
+ summary: outputText.trim().slice(0, SUMMARY_FALLBACK_MAX_CHARS),
53
+ testNames: []
54
+ };
55
+ }
56
+ /**
57
+ * Render a handoff into the compact text a dependent node receives (PIPE-83.5):
58
+ * the curated summary + non-empty sections, in place of the full raw transcript.
59
+ */
60
+ function renderHandoff(nodeId, handoff) {
61
+ const sections = [
62
+ ["Decisions:", handoff.decisions],
63
+ ["Artifacts:", handoff.artifacts.map((a) => a.lineRange ? `${a.path}:${a.lineRange[0]}-${a.lineRange[1]}` : a.path)],
64
+ ["Tests:", handoff.testNames],
65
+ ["Open questions:", handoff.openQuestions]
66
+ ];
67
+ const lines = [`## ${nodeId}`, handoff.summary];
68
+ for (const [heading, items] of sections) if (items.length > 0) lines.push(heading, ...items.map((item) => `- ${item}`));
69
+ return lines.join("\n");
70
+ }
71
+ /** Prompt for the cheap finalizer that derives a handoff from raw node output. */
72
+ function handoffFinalizerPrompt(rawOutput) {
73
+ return [
74
+ "You are a handoff summarizer for a pipeline node.",
75
+ "Read the agent output below and return ONLY a JSON object describing what a",
76
+ "downstream node needs to continue — no Markdown fences, no prose outside JSON.",
77
+ "",
78
+ "Fields:",
79
+ "- \"summary\": string — concise description of what this node accomplished.",
80
+ "- \"decisions\": string[] — explicit choices made (libraries, APIs, approaches).",
81
+ "- \"artifacts\": {\"path\": string, \"lineRange\"?: [number, number]}[] — files touched.",
82
+ "- \"testNames\": string[] — tests added or changed.",
83
+ "- \"openQuestions\": string[] — unresolved items the next node should know.",
84
+ "Use empty arrays where nothing applies. Preserve facts; do not invent.",
85
+ "",
86
+ "Agent output:",
87
+ rawOutput
88
+ ].join("\n");
89
+ }
90
+ //#endregion
91
+ export { handoffFinalizerPrompt, parseHandoff, renderHandoff, synthesizeMinimalHandoff };
@@ -1,11 +1,13 @@
1
1
  //#region src/runtime/node-state-store.ts
2
2
  var NodeStateStore = class NodeStateStore {
3
+ handoffByNode;
3
4
  inheritedOutputNodeIds;
4
5
  lastOutputByNode;
5
6
  nodeSnapshots;
6
7
  nodeStates;
7
8
  structuredOutputs;
8
9
  constructor(input = {}) {
10
+ this.handoffByNode = input.handoffByNode ?? /* @__PURE__ */ new Map();
9
11
  this.inheritedOutputNodeIds = input.inheritedOutputNodeIds ?? /* @__PURE__ */ new Set();
10
12
  this.lastOutputByNode = input.lastOutputByNode ?? /* @__PURE__ */ new Map();
11
13
  this.nodeSnapshots = input.nodeSnapshots ?? /* @__PURE__ */ new Map();
@@ -14,6 +16,7 @@ var NodeStateStore = class NodeStateStore {
14
16
  }
15
17
  forkForParallelChildren(children) {
16
18
  return new NodeStateStore({
19
+ handoffByNode: new Map(this.handoffByNode),
17
20
  inheritedOutputNodeIds: new Set(this.lastOutputByNode.keys()),
18
21
  lastOutputByNode: new Map(this.lastOutputByNode),
19
22
  nodeSnapshots: /* @__PURE__ */ new Map(),
@@ -34,6 +37,9 @@ var NodeStateStore = class NodeStateStore {
34
37
  getOutput(nodeId) {
35
38
  return this.lastOutputByNode.get(nodeId);
36
39
  }
40
+ handoff(nodeId) {
41
+ return this.handoffByNode.get(nodeId);
42
+ }
37
43
  outputText(nodeId) {
38
44
  return this.lastOutputByNode.get(nodeId) ?? "";
39
45
  }
@@ -47,6 +53,9 @@ var NodeStateStore = class NodeStateStore {
47
53
  markInheritedOutput(nodeId) {
48
54
  this.inheritedOutputNodeIds.add(nodeId);
49
55
  }
56
+ recordHandoff(nodeId, handoff) {
57
+ if (handoff) this.handoffByNode.set(nodeId, handoff);
58
+ }
50
59
  recordOutput(nodeId, output) {
51
60
  this.lastOutputByNode.set(nodeId, output);
52
61
  }
@@ -26,6 +26,9 @@ function createOpencodeExecutor(deps) {
26
26
  }
27
27
  };
28
28
  }
29
+ function sessionDirectory(deps, plan) {
30
+ return plan.cwd ?? deps.directory;
31
+ }
29
32
  async function driveSession(deps, plan, options) {
30
33
  const sessionId = await resolveSessionId(deps, plan);
31
34
  deps.onSession?.(plan.nodeId, sessionId);
@@ -34,7 +37,7 @@ async function driveSession(deps, plan, options) {
34
37
  const data = unwrap(await deps.client.session.prompt({
35
38
  body: promptBody(plan),
36
39
  path: { id: sessionId },
37
- query: { directory: deps.directory }
40
+ query: { directory: sessionDirectory(deps, plan) }
38
41
  }));
39
42
  return {
40
43
  ...data.info ? { assistant: data.info } : {},
@@ -50,7 +53,7 @@ async function resolveSessionId(deps, plan) {
50
53
  if (existing) return existing;
51
54
  const session = unwrap(await deps.client.session.create({
52
55
  body: { title: `moka:${plan.nodeId}` },
53
- query: { directory: deps.directory }
56
+ query: { directory: plan.cwd ?? deps.directory }
54
57
  }));
55
58
  deps.registry.sessions.set(plan.nodeId, session.id);
56
59
  return session.id;
@@ -1,5 +1,6 @@
1
1
  import { childReporter } from "../events/events.js";
2
2
  import "../events/index.js";
3
+ import { createChildWorktree, gcParallelWorktrees } from "../parallel-worktrees/parallel-worktrees.js";
3
4
  import pLimit from "p-limit";
4
5
  //#region src/runtime/parallel-node/parallel-node.ts
5
6
  async function executeParallelNode(node, context, runtime) {
@@ -9,6 +10,7 @@ async function executeParallelNode(node, context, runtime) {
9
10
  exitCode: 1,
10
11
  output: ""
11
12
  };
13
+ gcStaleWorktrees(context);
12
14
  const linkedAbort = createLinkedAbortController(context.signal);
13
15
  const childContext = createParallelChildContext(context, node.id, children, context.plan.execution.failFast ? linkedAbort.controller.signal : context.signal);
14
16
  try {
@@ -23,6 +25,37 @@ async function executeParallelNode(node, context, runtime) {
23
25
  linkedAbort.cleanup();
24
26
  }
25
27
  }
28
+ function gcStaleWorktrees(context) {
29
+ if (context.config.parallel_worktrees?.enabled) gcParallelWorktrees(context.worktreePath);
30
+ }
31
+ /**
32
+ * PIPE-83.4: run a parallel child in its own git worktree when enabled, so
33
+ * concurrent candidate edits can't collide. The lease is created inside the
34
+ * per-child callback (not before scheduling) so failFast-cleared children never
35
+ * allocate a worktree; release retains dirty/unpushed work for downstream
36
+ * selection. Default-off path is byte-identical to the prior behaviour.
37
+ */
38
+ function runChildInWorktree(child, context, runtime) {
39
+ return context.config.parallel_worktrees?.enabled ? runInLease(child, context, runtime, createChildLease(child, context)) : runtime.executeNode(child, context);
40
+ }
41
+ function createChildLease(child, context) {
42
+ return createChildWorktree({
43
+ childNodeId: child.id,
44
+ parentNodeId: context.parentParallelNodeId ?? "parallel",
45
+ repoRoot: context.worktreePath,
46
+ ...context.runId ? { runId: context.runId } : {}
47
+ });
48
+ }
49
+ async function runInLease(child, context, runtime, lease) {
50
+ try {
51
+ return await runtime.executeNode(child, {
52
+ ...context,
53
+ worktreePath: lease.path
54
+ });
55
+ } finally {
56
+ lease.release();
57
+ }
58
+ }
26
59
  function createParallelChildContext(context, parentNodeId, children, signal) {
27
60
  return {
28
61
  ...context,
@@ -60,9 +93,9 @@ function createLinkedAbortController(signal) {
60
93
  }
61
94
  function executeParallelChildren(children, context, runtime) {
62
95
  for (const child of children) runtime.markNodeReady(context, child.id);
63
- if (!context.maxParallelNodes) return Promise.all(children.map((child) => runtime.executeNode(child, context)));
96
+ if (!context.maxParallelNodes) return Promise.all(children.map((child) => runChildInWorktree(child, context, runtime)));
64
97
  const limit = pLimit(context.maxParallelNodes);
65
- return Promise.all(children.map((child) => limit(() => runtime.executeNode(child, context))));
98
+ return Promise.all(children.map((child) => limit(() => runChildInWorktree(child, context, runtime))));
66
99
  }
67
100
  async function executeFailFastParallelChildren(children, context, abortController, runtime) {
68
101
  for (const child of children) runtime.markNodeReady(context, child.id);
@@ -71,7 +104,7 @@ async function executeFailFastParallelChildren(children, context, abortControlle
71
104
  rejectOnClear: true
72
105
  });
73
106
  return (await Promise.allSettled(children.map((child) => limit(async () => {
74
- const result = await runtime.executeNode(child, context);
107
+ const result = await runChildInWorktree(child, context, runtime);
75
108
  if (result.status === "failed") {
76
109
  abortController.abort();
77
110
  limit.clearQueue();
@@ -0,0 +1,132 @@
1
+ import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
2
+ import { join } from "node:path";
3
+ import { execFileSync } from "node:child_process";
4
+ //#region src/runtime/parallel-worktrees/parallel-worktrees.ts
5
+ /**
6
+ * PIPE-83.4: git-worktree isolation for parallel candidate nodes. Each parallel
7
+ * child runs in its own worktree on an auto-named branch so concurrent edits do
8
+ * not collide. Teardown is idempotent and crash-safe: a worktree with dirty or
9
+ * unpushed work is RETAINED (never deleted), and orphaned worktrees are GC'd on
10
+ * startup using the same safety guard. A worktree is NOT a sandbox — node_modules
11
+ * and build state are shared; real isolation remains k8s mode.
12
+ */
13
+ const WORKTREE_ROOT = ".pipeline/worktrees";
14
+ const REGISTRY_DIR = join(WORKTREE_ROOT, "registry");
15
+ const OWNER = "oisin-pipeline";
16
+ function git(cwd, args) {
17
+ return execFileSync("git", args, {
18
+ cwd,
19
+ encoding: "utf8"
20
+ }).trim();
21
+ }
22
+ function sanitize(id) {
23
+ return id.replace(/[^A-Za-z0-9._-]/g, "-");
24
+ }
25
+ function writeManifest(path, manifest) {
26
+ writeFileSync(path, `${JSON.stringify(manifest, null, 2)}\n`, "utf8");
27
+ }
28
+ function readManifest(path) {
29
+ return JSON.parse(readFileSync(path, "utf8"));
30
+ }
31
+ function createChildWorktree(opts) {
32
+ const runSeg = sanitize(opts.runId ?? "local");
33
+ const parentSeg = sanitize(opts.parentNodeId);
34
+ const childSeg = sanitize(opts.childNodeId);
35
+ const baseSha = git(opts.repoRoot, ["rev-parse", "HEAD"]);
36
+ const relPath = join(WORKTREE_ROOT, "trees", runSeg, parentSeg, childSeg);
37
+ const absPath = join(opts.repoRoot, relPath);
38
+ const branch = `pipeline/worktrees/${runSeg}/${parentSeg}/${childSeg}`;
39
+ const leaseId = `${runSeg}__${parentSeg}__${childSeg}`;
40
+ const registryAbs = join(opts.repoRoot, REGISTRY_DIR);
41
+ mkdirSync(registryAbs, { recursive: true });
42
+ const manifestPath = join(registryAbs, `${leaseId}.json`);
43
+ const manifest = {
44
+ baseSha,
45
+ branch,
46
+ childNodeId: opts.childNodeId,
47
+ leaseId,
48
+ owner: OWNER,
49
+ parentNodeId: opts.parentNodeId,
50
+ path: relPath,
51
+ runId: opts.runId,
52
+ schemaVersion: 1,
53
+ state: "creating"
54
+ };
55
+ writeManifest(manifestPath, manifest);
56
+ if (!existsSync(absPath)) git(opts.repoRoot, [
57
+ "worktree",
58
+ "add",
59
+ "-b",
60
+ branch,
61
+ absPath,
62
+ baseSha
63
+ ]);
64
+ writeManifest(manifestPath, {
65
+ ...manifest,
66
+ state: "active"
67
+ });
68
+ return {
69
+ baseSha,
70
+ branch,
71
+ leaseId,
72
+ path: absPath,
73
+ release: () => releaseWorktree(opts.repoRoot, manifestPath)
74
+ };
75
+ }
76
+ /** Idempotent, crash-safe teardown. Retains (never deletes) dirty/unpushed work. */
77
+ function releaseWorktree(repoRoot, manifestPath) {
78
+ if (!existsSync(manifestPath)) return "removed";
79
+ const manifest = readManifest(manifestPath);
80
+ const absPath = join(repoRoot, manifest.path);
81
+ git(repoRoot, ["worktree", "prune"]);
82
+ if (!existsSync(absPath)) {
83
+ writeManifest(manifestPath, {
84
+ ...manifest,
85
+ state: "removed"
86
+ });
87
+ return "removed";
88
+ }
89
+ const guarded = retentionState(absPath, manifest.baseSha);
90
+ if (guarded) {
91
+ writeManifest(manifestPath, {
92
+ ...manifest,
93
+ state: guarded
94
+ });
95
+ return guarded;
96
+ }
97
+ git(repoRoot, [
98
+ "worktree",
99
+ "remove",
100
+ "--force",
101
+ absPath
102
+ ]);
103
+ git(repoRoot, [
104
+ "branch",
105
+ "-D",
106
+ manifest.branch
107
+ ]);
108
+ writeManifest(manifestPath, {
109
+ ...manifest,
110
+ state: "removed"
111
+ });
112
+ return "removed";
113
+ }
114
+ /** Returns a retention reason when the worktree must be kept, else undefined. */
115
+ function retentionState(absPath, baseSha) {
116
+ if (git(absPath, [
117
+ "status",
118
+ "--porcelain",
119
+ "--untracked-files=all"
120
+ ]).length > 0) return "retained-dirty";
121
+ if (git(absPath, ["rev-parse", "HEAD"]) !== baseSha) return "retained-unpushed";
122
+ }
123
+ /** Startup GC: release every pipeline-owned lease using the same safety guard. */
124
+ function gcParallelWorktrees(repoRoot) {
125
+ const registryAbs = join(repoRoot, REGISTRY_DIR);
126
+ if (!existsSync(registryAbs)) return [];
127
+ const results = readdirSync(registryAbs).sort().filter((file) => file.endsWith(".json")).map((file) => join(registryAbs, file)).filter((manifestPath) => readManifest(manifestPath).owner === OWNER).map((manifestPath) => releaseWorktree(repoRoot, manifestPath));
128
+ git(repoRoot, ["worktree", "prune"]);
129
+ return results;
130
+ }
131
+ //#endregion
132
+ export { createChildWorktree, gcParallelWorktrees };
@@ -0,0 +1,116 @@
1
+ import { createRunnerLaunchPlan } from "../../runner.js";
2
+ import { normalizeRunnerOutput } from "../../runner-output.js";
3
+ import { parseJsonObject } from "../json-validation/json-validation.js";
4
+ import "../json-validation/index.js";
5
+ //#region src/runtime/select-candidate/select-candidate.ts
6
+ const SCORE_RE = /-?\d+(?:\.\d+)?/;
7
+ function selectBestCandidate(candidates) {
8
+ const passing = candidates.filter((candidate) => candidate.status === "PASS");
9
+ if (passing.length === 0) return null;
10
+ return passing.reduce((best, candidate) => (candidate.judgeScore ?? 0) > (best.judgeScore ?? 0) ? candidate : best);
11
+ }
12
+ async function executeSelectCandidateBuiltin(context, node) {
13
+ const candidates = await scoreCandidates(context, readCandidates(context, node?.needs.at(0) ?? null));
14
+ const selected = selectBestCandidate(candidates);
15
+ if (!selected) return {
16
+ evidence: [`select-candidate: no passing candidate among ${candidates.length}`, ...candidates.map((candidate) => `- ${candidate.nodeId}: FAIL`)],
17
+ exitCode: 1,
18
+ output: ""
19
+ };
20
+ return {
21
+ evidence: [`select-candidate: selected '${selected.nodeId}' (judge=${selected.judgeScore ?? "n/a"}) from ${candidates.length} candidates`],
22
+ exitCode: 0,
23
+ output: selected.output
24
+ };
25
+ }
26
+ async function scoreCandidates(context, candidates) {
27
+ const model = context.config.best_of_n?.judge_model;
28
+ const runner = Object.keys(context.config.runners).at(0);
29
+ if (!(model && runner)) return candidates;
30
+ return await Promise.all(candidates.map((candidate) => scoreCandidate(context, candidate, runner, model)));
31
+ }
32
+ async function scoreCandidate(context, candidate, runner, model) {
33
+ const plan = judgePlan(context, candidate, runner, model);
34
+ context.agentInvocations.push(plan);
35
+ const judgeScore = parseScore(normalizeRunnerOutput(plan, (await context.executor(plan, { signal: context.signal })).stdout).output);
36
+ return judgeScore === null ? candidate : {
37
+ ...candidate,
38
+ judgeScore
39
+ };
40
+ }
41
+ function judgePlan(context, candidate, runner, model) {
42
+ const profileId = `select-candidate:judge:${candidate.nodeId}`;
43
+ return createRunnerLaunchPlan({
44
+ ...context.config,
45
+ profiles: {
46
+ ...context.config.profiles,
47
+ [profileId]: {
48
+ filesystem: { mode: "read-only" },
49
+ instructions: { inline: "Score the candidate implementation." },
50
+ network: { mode: "disabled" },
51
+ output: { format: "text" },
52
+ runner,
53
+ tools: []
54
+ }
55
+ }
56
+ }, {
57
+ model,
58
+ nodeId: profileId,
59
+ profileId,
60
+ prompt: judgePrompt(context.task, candidate.output),
61
+ worktreePath: context.worktreePath
62
+ });
63
+ }
64
+ function judgePrompt(task, output) {
65
+ return [
66
+ "Score how well this candidate implementation satisfies the task.",
67
+ "Return ONLY a number between 0 and 1 (1 = best). No prose, no fences.",
68
+ "",
69
+ `Task: ${task}`,
70
+ "",
71
+ "Candidate result:",
72
+ output
73
+ ].join("\n");
74
+ }
75
+ function parseScore(text) {
76
+ const match = SCORE_RE.exec(text);
77
+ if (!match) return null;
78
+ const value = Number(match[0]);
79
+ return Number.isFinite(value) ? Math.max(0, Math.min(1, value)) : null;
80
+ }
81
+ function readCandidates(context, upstreamNodeId) {
82
+ if (!upstreamNodeId) return [];
83
+ const upstream = context.plan.graph.node(upstreamNodeId);
84
+ const childrenOutput = parseJsonObject(parseJsonObject(context.nodeStateStore.getOutput(upstreamNodeId)).children);
85
+ return (upstream?.children ?? []).flatMap((child) => {
86
+ const raw = childrenOutput[child.id];
87
+ return raw === void 0 ? [] : [parseCandidate(child.id, raw)];
88
+ });
89
+ }
90
+ function parseCandidate(nodeId, raw) {
91
+ const output = typeof raw === "string" ? raw : JSON.stringify(raw);
92
+ const parsed = safeParseObject(output);
93
+ return {
94
+ judgeScore: candidateJudgeScore(parsed),
95
+ nodeId,
96
+ output,
97
+ status: candidateStatus(parsed)
98
+ };
99
+ }
100
+ function candidateStatus(parsed) {
101
+ if (!parsed) return "PASS";
102
+ return parsed.verdict === "FAIL" || parsed.status === "FAIL" ? "FAIL" : "PASS";
103
+ }
104
+ function candidateJudgeScore(parsed) {
105
+ return typeof parsed?.judge_score === "number" ? parsed.judge_score : null;
106
+ }
107
+ function safeParseObject(text) {
108
+ try {
109
+ const value = JSON.parse(text);
110
+ return value && typeof value === "object" ? value : null;
111
+ } catch {
112
+ return null;
113
+ }
114
+ }
115
+ //#endregion
116
+ export { executeSelectCandidateBuiltin };
@@ -0,0 +1,42 @@
1
+ //#region src/schedule/passes/candidates.ts
2
+ /**
3
+ * PIPE-83.7: best-of-N candidate generation. When config.best_of_n is enabled
4
+ * with n > 1, each agent node whose id carries a configured category (e.g.
5
+ * "green") is expanded into a kind:parallel node holding N candidate children
6
+ * (each a full copy with a fresh id and no inter-candidate deps). The wrapper
7
+ * keeps the original id + upstream needs, so downstream consumers and the
8
+ * PIPE-83.9 selector see a single dependency. Default off / n=1 is identity, so
9
+ * generated schedules and the PIPE-57 goldens are unchanged.
10
+ */
11
+ function expandBestOfNCandidates(config, artifact) {
12
+ const bestOfN = config.best_of_n;
13
+ if (!bestOfN?.enabled || bestOfN.n <= 1) return artifact;
14
+ return {
15
+ ...artifact,
16
+ workflows: Object.fromEntries(Object.entries(artifact.workflows).map(([id, workflow]) => [id, {
17
+ ...workflow,
18
+ nodes: workflow.nodes.flatMap((node) => expandNode(node, bestOfN.categories, bestOfN.n))
19
+ }]))
20
+ };
21
+ }
22
+ function expandNode(node, categories, n) {
23
+ if (node.kind !== "agent" || !categories.some((category) => node.id.includes(category))) return [node];
24
+ const candidatesId = `${node.id}--candidates`;
25
+ return [{
26
+ id: candidatesId,
27
+ kind: "parallel",
28
+ nodes: Array.from({ length: n }, (_, index) => ({
29
+ ...node,
30
+ id: `${node.id}--c${index + 1}`,
31
+ needs: []
32
+ })),
33
+ ...node.needs ? { needs: node.needs } : {}
34
+ }, {
35
+ builtin: "select-candidate",
36
+ id: node.id,
37
+ kind: "builtin",
38
+ needs: [candidatesId]
39
+ }];
40
+ }
41
+ //#endregion
42
+ export { expandBestOfNCandidates };
@@ -1,6 +1,7 @@
1
1
  //#region src/schedule/passes/index.ts
2
2
  const SCHEDULE_PASS_ORDER = [
3
3
  "coverage",
4
+ "candidates",
4
5
  "models",
5
6
  "ids",
6
7
  "references"
@@ -1,13 +1,14 @@
1
1
  import { getEncoding } from "js-tiktoken";
2
2
  //#region src/token-estimator.ts
3
3
  /**
4
- * Token estimation for node sizing. Uses the `o200k_base` encoding (the GPT-5.5
5
- * family the MoKa agents run on).
4
+ * Token estimation for node sizing. Uses the `o200k_base` BPE as a
5
+ * model-agnostic heuristic NOT a guarantee of any specific model's tokenizer.
6
6
  *
7
7
  * This is a cross-model ESTIMATE, not a billing-accurate count: the pipeline
8
- * routes nodes across OpenAI/Kimi/Qwen models whose tokenizers differ, so the
9
- * value is a sizing heuristic for budget/routing decisions. For exact counts on
10
- * Anthropic runners, use the Anthropic `count_tokens` API instead.
8
+ * routes nodes across OpenAI/Kimi/Qwen models whose exact tokenizers differ (and
9
+ * are not all known here), so treat the value as a sizing heuristic for
10
+ * budget/routing decisions only. For exact counts on Anthropic runners, use the
11
+ * Anthropic `count_tokens` API.
11
12
  */
12
13
  let encoder;
13
14
  function encoding() {
package/package.json CHANGED
@@ -121,7 +121,7 @@
121
121
  "prepack": "bun run build:cli"
122
122
  },
123
123
  "type": "module",
124
- "version": "2.4.0",
124
+ "version": "2.6.0",
125
125
  "description": "Config-driven multi-agent pipeline runner for repository work",
126
126
  "main": "./dist/index.js",
127
127
  "types": "./dist/index.d.ts",