pi-pipelines 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1346 @@
1
+ /**
2
+ * Pipeline Runner — core orchestration engine
3
+ *
4
+ * Executes pipeline stages by dispatching subagent commands through
5
+ * pi-subagents' event bridge. This means:
6
+ * - No child Pi processes needed
7
+ * - pi-subagents handles all agent execution natively
8
+ * - Chain and parallel execution are native to pi-subagents
9
+ * - Review gates add the iterative scoring loop on top
10
+ *
11
+ * For simple linear pipelines, the runner emits a single chain request.
12
+ * For pipelines with gates, it runs stages one-by-one with programmatic
13
+ * control over the review loop.
14
+ */
15
+
16
+ import * as path from "node:path";
17
+ import yaml from "js-yaml";
18
+ import type { ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
19
+ import type { PipelineDef, Stage, StageResult, PipelineResult } from "./types.ts";
20
+ import { findPipelineFile, listPipelines, loadPipeline } from "./config-loader.ts";
21
+ import { executeSubagent, extractResponseText } from "./subagent-bridge.ts";
22
+ import { formatDuration } from "./utils.ts";
23
+
24
+ /**
25
+ * Per-round timeout for the worker agent inside a review gate.
26
+ * Prevents a stuck worker from blocking the entire pipeline.
27
+ * @internal Exported for testing.
28
+ */
29
+ export const GATE_WORKER_TIMEOUT_MS = 600_000; // 10 min
30
+
31
+ /**
32
+ * Per-round timeout for the reviewer agents inside a review gate.
33
+ * Prevents stuck reviewers from blocking the entire pipeline.
34
+ * @internal Exported for testing.
35
+ */
36
+ export const GATE_REVIEWER_TIMEOUT_MS = 300_000; // 5 min
37
+
38
+ /** Default pipelines directory relative to cwd */
39
+ const PIPELINES_DIR = ".pi/pipelines";
40
+
41
+ /**
42
+ * Combine a parent AbortSignal with an optional timeout.
43
+ * Returns a signal that aborts when EITHER the parent aborts OR the timeout fires.
44
+ * Returns undefined when neither is provided.
45
+ */
46
+ function createStageSignal(
47
+ parentSignal?: AbortSignal,
48
+ timeoutMs?: number,
49
+ ): AbortSignal | undefined {
50
+ const signals: AbortSignal[] = [];
51
+ if (parentSignal) signals.push(parentSignal);
52
+ if (timeoutMs && timeoutMs > 0) signals.push(AbortSignal.timeout(timeoutMs));
53
+ if (signals.length === 0) return undefined;
54
+ if (signals.length === 1) return signals[0];
55
+ return AbortSignal.any(signals);
56
+ }
57
+
58
+ /**
59
+ * Build a condensed context string from stage results for the report synthesizer.
60
+ * Truncates each stage output to a preview to avoid blowing up the context.
61
+ */
62
+ /** @internal Exported for testing. See pipeline-runner.test.ts */
63
+ export function buildReportContext(
64
+ pipelineName: string,
65
+ pipelineDescription: string,
66
+ task: string,
67
+ stages: StageResult[],
68
+ focus?: string,
69
+ ): string {
70
+ const lines: string[] = [];
71
+ lines.push(`# Pipeline: ${pipelineName}`);
72
+ lines.push(`Description: ${pipelineDescription}`);
73
+ lines.push(`Task: ${task}`);
74
+ if (focus) lines.push(`Focus: ${focus}`);
75
+ const passed = stages.filter((s) => s.success).length;
76
+ const total = stages.length;
77
+ const totalMs = stages.reduce((sum, s) => sum + s.durationMs, 0);
78
+ lines.push(
79
+ `Status: ${passed === total ? "PASSED" : "FAILED"} (${passed}/${total} stages passed, ${formatDuration(totalMs)})`,
80
+ );
81
+ lines.push("");
82
+ lines.push("## Stage Results");
83
+ lines.push("");
84
+
85
+ for (let i = 0; i < stages.length; i++) {
86
+ const s = stages[i]!;
87
+ const icon = s.success ? "✅ PASS" : "❌ FAIL";
88
+ const rounds = s.rounds ? ` (${s.rounds} round${s.rounds > 1 ? "s" : ""})` : "";
89
+ const scores = s.scores?.length ? ` scores=[${s.scores.join(", ")}]` : "";
90
+ lines.push(`### ${i + 1}. ${s.stageId} ${icon}${rounds}${scores}`);
91
+ lines.push(`Duration: ${formatDuration(s.durationMs)}`);
92
+ if (s.error) {
93
+ lines.push(`Error: ${s.error}`);
94
+ }
95
+ if (s.output) {
96
+ lines.push(`Output:\n${s.output}`);
97
+ }
98
+ lines.push("");
99
+ }
100
+
101
+ return lines.join("\n");
102
+ }
103
+
104
+ /**
105
+ * The default max length for a stage summary (characters).
106
+ */
107
+ const STAGE_SUMMARY_MAX_LENGTH = 500;
108
+
109
+ /**
110
+ * Summarize a single stage's output so the next stage doesn't get the full firehose.
111
+ * Uses a fast LLM call when mode='summary'.
112
+ * Returns the raw output unchanged when mode is not 'summary'.
113
+ */
114
+ /** @internal Exported for testing. See pipeline-runner.test.ts */
115
+ export async function applyStageReport(
116
+ pi: ExtensionAPI,
117
+ stage: Stage,
118
+ rawOutput: string,
119
+ task: string,
120
+ signal?: AbortSignal,
121
+ ): Promise<string> {
122
+ const report = stage.report;
123
+ if (!report || report.mode === "full" || report.mode === undefined) {
124
+ return rawOutput;
125
+ }
126
+
127
+ const maxLen = report.maxLength ?? STAGE_SUMMARY_MAX_LENGTH;
128
+ const instruction = report.instruction ?? "Summarize the key findings and results concisely.";
129
+
130
+ const prompt = `You are a stage output summarizer. A pipeline stage has just completed and its output
131
+ needs to be condensed so the next stage does not receive the full raw text.
132
+
133
+ Stage: ${stage.id}
134
+ Full task: ${task}
135
+
136
+ Summarization instruction: ${instruction}
137
+
138
+ Raw output (${rawOutput.length} chars):
139
+ ${rawOutput}
140
+
141
+ ---
142
+ Produce a concise summary of the stage output, maximum ${maxLen} characters.
143
+ Focus on key facts, decisions, and results relevant to downstream stages.
144
+ Ignore verbose reasoning, iterative exploration, and internal commentary.`;
145
+
146
+ const response = await executeSubagent(
147
+ pi,
148
+ {
149
+ agent: "worker",
150
+ task: prompt,
151
+ context: "fresh",
152
+ },
153
+ signal,
154
+ );
155
+
156
+ const summary = extractResponseText(response) || "(summary unavailable)";
157
+ return summary.length > maxLen ? summary.slice(0, maxLen) + "..." : summary;
158
+ }
159
+
160
+ /**
161
+ * Run the post-pipeline report synthesizer.
162
+ * Calls a planner agent with all stage results to produce a summary.
163
+ * This is best-effort — errors do not fail the pipeline.
164
+ */
165
+ /** @internal Exported for testing. See pipeline-runner.test.ts */
166
+ export async function runReportSynthesis(
167
+ pi: ExtensionAPI,
168
+ pipeline: PipelineDef,
169
+ task: string,
170
+ stages: StageResult[],
171
+ agentName?: string,
172
+ focus?: string,
173
+ signal?: AbortSignal,
174
+ ): Promise<string> {
175
+ const context = buildReportContext(pipeline.name, pipeline.description, task, stages, focus);
176
+
177
+ const synthesisAgent = agentName || "planner";
178
+
179
+ const prompt = `You are a pipeline report synthesizer. A pipeline has just completed its execution.
180
+
181
+ Below is the execution summary of all stages. Based on this, produce a concise report covering:
182
+ 1. **What was achieved** — what the pipeline accomplished
183
+ 2. **Key findings** — important results, decisions, or artifacts produced
184
+ 3. **Issues** — any failures, problems, or areas needing attention
185
+ 4. **Next steps / recommendations** — what should be done next
186
+
187
+ Keep the report concise and actionable. Focus on the substance of what happened.
188
+
189
+ ${context}
190
+
191
+ ---
192
+ Generate the pipeline report now.`;
193
+
194
+ const response = await executeSubagent(
195
+ pi,
196
+ {
197
+ agent: synthesisAgent,
198
+ task: prompt,
199
+ context: "fresh",
200
+ },
201
+ signal,
202
+ );
203
+
204
+ return extractResponseText(response) || "(report synthesis produced no output)";
205
+ }
206
+
207
+ /**
208
+ * Options passed to the pipeline runner.
209
+ */
210
+ export interface RunOptions {
211
+ pipeline: string;
212
+ task: string;
213
+ pipelinesDir?: string;
214
+ /** Optional AbortSignal to cancel the pipeline mid-execution */
215
+ signal?: AbortSignal;
216
+ /** Optional per-stage timeout in milliseconds (default: no timeout) */
217
+ stageTimeoutMs?: number;
218
+ }
219
+
220
+ /**
221
+ * Main entry point: run a pipeline by name.
222
+ */
223
+ export async function runPipeline(
224
+ pi: ExtensionAPI,
225
+ ctx: ExtensionContext,
226
+ options: RunOptions,
227
+ ): Promise<PipelineResult> {
228
+ const startTime = Date.now();
229
+ const pipelinesDir = options.pipelinesDir ?? path.join(ctx.cwd, PIPELINES_DIR);
230
+
231
+ // Resolve pipeline file
232
+ const filePath = findPipelineFile(pipelinesDir, options.pipeline);
233
+ if (!filePath) {
234
+ const available = listPipelines(pipelinesDir);
235
+ const names = available.map((p) => ` - ${p.name}`).join("\n");
236
+ const msg =
237
+ available.length > 0
238
+ ? `Pipeline "${options.pipeline}" not found.\nAvailable pipelines:\n${names}`
239
+ : `Pipeline "${options.pipeline}" not found.\nNo pipelines defined in ${pipelinesDir}/`;
240
+ return failResult(options.pipeline, options.task, msg, startTime);
241
+ }
242
+
243
+ // Load and validate pipeline
244
+ let pipeline: PipelineDef;
245
+ try {
246
+ pipeline = loadPipeline(filePath);
247
+ } catch (err) {
248
+ return failResult(
249
+ options.pipeline,
250
+ options.task,
251
+ `Failed to load pipeline: ${(err as Error).message}`,
252
+ startTime,
253
+ );
254
+ }
255
+
256
+ const parentSignal = options.signal;
257
+ const stageTimeoutMs = options.stageTimeoutMs ?? 1_800_000; // default 30 min
258
+ const stageSignal = createStageSignal(parentSignal, stageTimeoutMs);
259
+
260
+ // Check for pre-existing cancellation
261
+ if (parentSignal?.aborted) {
262
+ return failResult(
263
+ options.pipeline,
264
+ options.task,
265
+ "Pipeline cancelled before execution",
266
+ startTime,
267
+ );
268
+ }
269
+
270
+ // Execution context
271
+ const outputs = new Map<string, string>();
272
+ let lastFeedback: string | undefined;
273
+ const stages: StageResult[] = [];
274
+
275
+ if (ctx.hasUI) {
276
+ ctx.ui.setStatus("pipeline", `🚀 Pipeline: ${pipeline.name}`);
277
+ ctx.ui.notify(
278
+ `🧪 Running pipeline "${pipeline.name}" (${pipeline.stages.length} stages)`,
279
+ "info",
280
+ );
281
+ }
282
+
283
+ // Execute each stage sequentially
284
+ for (let i = 0; i < pipeline.stages.length; i++) {
285
+ const stage = pipeline.stages[i]!;
286
+ const stageStart = Date.now();
287
+ const stageLabel = `Stage ${i + 1}/${pipeline.stages.length}: ${stage.id}`;
288
+
289
+ try {
290
+ if (stage.parallel && stage.parallel.length > 0) {
291
+ // === PARALLEL STAGE ===
292
+ if (ctx.hasUI) {
293
+ ctx.ui.setStatus(
294
+ "pipeline",
295
+ `⚡ ${stageLabel} (parallel, ${stage.parallel.length} agents)`,
296
+ );
297
+ }
298
+
299
+ const parallelResult = await runParallelStage(
300
+ pi,
301
+ ctx,
302
+ stage,
303
+ options.task,
304
+ outputs,
305
+ stageSignal,
306
+ );
307
+ const rawOutput = parallelResult.output;
308
+ const stageOutput = await applyStageReport(pi, stage, rawOutput, options.task, stageSignal);
309
+ outputs.set(stage.id, stageOutput);
310
+
311
+ stages.push({
312
+ ...parallelResult,
313
+ stageId: stage.id,
314
+ output: stageOutput,
315
+ rawOutput,
316
+ });
317
+ } else if (stage.expand) {
318
+ // === EXPAND STAGE (dynamic stage expansion) ===
319
+ if (ctx.hasUI) {
320
+ ctx.ui.setStatus("pipeline", `✦ ${stageLabel} (expand from "${stage.expand.from}")`);
321
+ }
322
+
323
+ const expandResult = await runExpandStage(
324
+ pi,
325
+ ctx,
326
+ stage,
327
+ options.task,
328
+ outputs,
329
+ stageSignal,
330
+ );
331
+
332
+ if (expandResult.success) {
333
+ outputs.set(stage.id, expandResult.output);
334
+ }
335
+
336
+ stages.push({
337
+ stageId: stage.id,
338
+ success: expandResult.success,
339
+ output: expandResult.output,
340
+ error: expandResult.error,
341
+ durationMs: Date.now() - stageStart,
342
+ });
343
+ } else if (stage.gate) {
344
+ // === REVIEW GATE STAGE ===
345
+ if (ctx.hasUI) {
346
+ ctx.ui.setStatus(
347
+ "pipeline",
348
+ `🔍 ${stageLabel} (gate, max ${stage.gate.maxRounds} rounds)`,
349
+ );
350
+ }
351
+
352
+ const gateResult = await runReviewGate(
353
+ pi,
354
+ ctx,
355
+ stage,
356
+ options.task,
357
+ outputs,
358
+ lastFeedback,
359
+ stageSignal,
360
+ pipeline.judgeModel,
361
+ );
362
+ const rawOutput = gateResult.output;
363
+ const stageOutput = await applyStageReport(pi, stage, rawOutput, options.task, stageSignal);
364
+ outputs.set(stage.id, stageOutput);
365
+ lastFeedback = gateResult.lastFeedback;
366
+
367
+ stages.push({
368
+ stageId: stage.id,
369
+ success: gateResult.success,
370
+ output: stageOutput,
371
+ rawOutput,
372
+ error: gateResult.error,
373
+ durationMs: Date.now() - stageStart,
374
+ rounds: gateResult.rounds,
375
+ scores: gateResult.scores,
376
+ });
377
+ } else {
378
+ // === SIMPLE AGENT STAGE ===
379
+ if (ctx.hasUI) {
380
+ ctx.ui.setStatus("pipeline", `▶ ${stageLabel} (${stage.agent})`);
381
+ }
382
+
383
+ const result = await runSingleStage(
384
+ pi,
385
+ ctx,
386
+ stage,
387
+ options.task,
388
+ outputs,
389
+ lastFeedback,
390
+ stageSignal,
391
+ );
392
+ const rawOutput = result;
393
+ const stageOutput = await applyStageReport(pi, stage, rawOutput, options.task, stageSignal);
394
+ outputs.set(stage.id, stageOutput);
395
+
396
+ stages.push({
397
+ stageId: stage.id,
398
+ success: true,
399
+ output: stageOutput,
400
+ rawOutput,
401
+ durationMs: Date.now() - stageStart,
402
+ });
403
+ }
404
+ } catch (err) {
405
+ // If the pipeline was aborted, return partial results gracefully
406
+ if ((err as Error).name === "AbortError" || stageSignal?.aborted) {
407
+ if (ctx.hasUI) {
408
+ ctx.ui.setStatus("pipeline", "");
409
+ ctx.ui.notify(`⏹ Pipeline "${pipeline.name}" cancelled`, "warning");
410
+ }
411
+ return {
412
+ pipelineName: pipeline.name,
413
+ task: options.task,
414
+ success: false,
415
+ stages,
416
+ totalDurationMs: Date.now() - startTime,
417
+ error: `Pipeline cancelled at stage "${stage.id}" after ${stages.length} completed stages`,
418
+ };
419
+ }
420
+ const errorMsg = `Stage "${stage.id}" failed: ${(err as Error).message}`;
421
+ stages.push({
422
+ stageId: stage.id,
423
+ success: false,
424
+ output: "",
425
+ error: errorMsg,
426
+ durationMs: Date.now() - stageStart,
427
+ });
428
+
429
+ if (ctx.hasUI) {
430
+ ctx.ui.setStatus("pipeline", "");
431
+ ctx.ui.notify(`❌ ${errorMsg}`, "error");
432
+ }
433
+
434
+ return {
435
+ pipelineName: pipeline.name,
436
+ task: options.task,
437
+ success: false,
438
+ stages,
439
+ totalDurationMs: Date.now() - startTime,
440
+ error: errorMsg,
441
+ };
442
+ }
443
+ }
444
+
445
+ // --- Report synthesis ---
446
+ // Run BEFORE the pipeline-complete notification to avoid the appearance
447
+ // of completion while synthesis is still running.
448
+ const pipelineSuccess = stages.every((s) => s.success);
449
+ const result: PipelineResult = {
450
+ pipelineName: pipeline.name,
451
+ task: options.task,
452
+ success: pipelineSuccess,
453
+ stages,
454
+ totalDurationMs: Date.now() - startTime,
455
+ };
456
+
457
+ // Run the report synthesizer unless explicitly disabled
458
+ const reportCfg = pipeline.report !== false ? (pipeline.report ?? {}) : null;
459
+ if (reportCfg) {
460
+ try {
461
+ // Synthesis gets the parent signal without the stage timeout,
462
+ // but with a 2-minute self-imposed limit so it doesn't hang
463
+ const synthSignal = parentSignal
464
+ ? AbortSignal.any([parentSignal, AbortSignal.timeout(120_000)])
465
+ : AbortSignal.timeout(120_000);
466
+
467
+ const synthesis = await runReportSynthesis(
468
+ pi,
469
+ pipeline,
470
+ options.task,
471
+ stages,
472
+ reportCfg.agent,
473
+ reportCfg.focus,
474
+ synthSignal,
475
+ );
476
+ result.synthesis = synthesis;
477
+ } catch (synthErr) {
478
+ // Synthesis failure does NOT fail the pipeline — it's best-effort
479
+ result.synthesisError = `Report synthesis failed: ${(synthErr as Error).message}`;
480
+ }
481
+ }
482
+
483
+ // Pipeline complete (after synthesis to avoid showing "done" while synthesis runs)
484
+ if (ctx.hasUI) {
485
+ ctx.ui.setStatus("pipeline", "");
486
+ if (result.synthesisError) {
487
+ ctx.ui.notify(
488
+ `✅ Pipeline "${pipeline.name}" complete (report synthesis: ⚠️ ${result.synthesisError})`,
489
+ "info",
490
+ );
491
+ } else {
492
+ ctx.ui.notify(
493
+ `✅ Pipeline "${pipeline.name}" complete: ${stages.filter((s) => s.success).length}/${stages.length} stages passed`,
494
+ "info",
495
+ );
496
+ }
497
+ }
498
+
499
+ return result;
500
+ }
501
+
502
+ /**
503
+ * Run a single agent stage via pi-subagents.
504
+ */
505
+ /** @internal Exported for testing. See pipeline-runner.test.ts */
506
+ export async function runSingleStage(
507
+ pi: ExtensionAPI,
508
+ ctx: ExtensionContext,
509
+ stage: Stage,
510
+ task: string,
511
+ outputs: Map<string, string>,
512
+ lastFeedback: string | undefined,
513
+ signal?: AbortSignal,
514
+ ): Promise<string> {
515
+ const resolvedTask = resolveTemplate(stage.task ?? "", task, outputs, lastFeedback);
516
+ const agentName = stage.agent!;
517
+
518
+ const response = await executeSubagent(
519
+ pi,
520
+ {
521
+ agent: agentName,
522
+ task: resolvedTask,
523
+ clarify: false,
524
+ model: stage.model,
525
+ agentScope: "both",
526
+ cwd: ctx.cwd,
527
+ },
528
+ signal,
529
+ );
530
+
531
+ if (response.isError) {
532
+ throw new Error(`Agent "${agentName}" failed: ${response.errorText ?? "(unknown error)"}`);
533
+ }
534
+
535
+ return extractResponseText(response);
536
+ }
537
+
538
+ /**
539
+ * Run a parallel stage — fan out multiple agents concurrently.
540
+ */
541
+ /** @internal Exported for testing. See pipeline-runner.test.ts */
542
+ export async function runParallelStage(
543
+ pi: ExtensionAPI,
544
+ ctx: ExtensionContext,
545
+ stage: Stage,
546
+ task: string,
547
+ outputs: Map<string, string>,
548
+ signal?: AbortSignal,
549
+ ): Promise<StageResult> {
550
+ const stageStart = Date.now();
551
+ const parallelStages = stage.parallel!;
552
+
553
+ if (ctx.hasUI) {
554
+ ctx.ui.notify(`⚡ Running ${parallelStages.length} agents in parallel`, "info");
555
+ }
556
+
557
+ // Build task params for pi-subagents parallel execution
558
+ const subagentTasks = parallelStages.map((child) => ({
559
+ agent: child.agent ?? "worker",
560
+ task: resolveTemplate(child.task ?? "", task, outputs, undefined),
561
+ model: child.model,
562
+ }));
563
+
564
+ // Use pi-subagents' parallel execution via the bridge
565
+ const response = await executeSubagent(
566
+ pi,
567
+ {
568
+ tasks: subagentTasks,
569
+ clarify: false,
570
+ agentScope: "both",
571
+ cwd: ctx.cwd,
572
+ },
573
+ signal,
574
+ );
575
+
576
+ const combinedOutput = extractResponseText(response);
577
+
578
+ if (response.isError) {
579
+ return {
580
+ stageId: stage.id,
581
+ success: false,
582
+ output: combinedOutput,
583
+ error: response.errorText,
584
+ durationMs: Date.now() - stageStart,
585
+ };
586
+ }
587
+
588
+ // Preserve child outputs so later stages can reference {outputs.<childId>}.
589
+ parallelStages.forEach((child, index) => {
590
+ outputs.set(child.id, extractTaskOutput(combinedOutput, index, parallelStages.length));
591
+ });
592
+
593
+ return {
594
+ stageId: stage.id,
595
+ success: true,
596
+ output: combinedOutput,
597
+ durationMs: Date.now() - stageStart,
598
+ };
599
+ }
600
+
601
+ /**
602
+ * Run an expand stage — dynamically expand a template into N parallel stages
603
+ * based on items parsed from a source stage's output, then aggregate results.
604
+ *
605
+ * Flow:
606
+ * 1. Get source stage output from outputs map
607
+ * 2. Parse items (JSON, YAML, or markdown list)
608
+ * 3. Build dynamic stages (one per item, with {item.*} resolved)
609
+ * 4. Run all dynamic stages as parallel pi-subagents tasks
610
+ * 5. Aggregate outputs into a single combined result
611
+ *
612
+ * NOTE: Gates on expand templates are NOT executed in v1. Quality checks
613
+ * should use a separate parallel/review stage after the expand stage.
614
+ */
615
+ /** @internal Exported for testing. See pipeline-expand.test.ts */
616
+ export async function runExpandStage(
617
+ pi: ExtensionAPI,
618
+ ctx: ExtensionContext,
619
+ stage: Stage,
620
+ task: string,
621
+ outputs: Map<string, string>,
622
+ signal?: AbortSignal,
623
+ ): Promise<StageResult> {
624
+ const stageStart = Date.now();
625
+ const expand = stage.expand!;
626
+
627
+ // 1. Get source stage output
628
+ const sourceOutput = outputs.get(expand.from);
629
+ if (!sourceOutput) {
630
+ return {
631
+ stageId: stage.id,
632
+ success: false,
633
+ output: "",
634
+ error: `Expand source stage "${expand.from}" has no output. Stages before "${stage.id}": ${[...outputs.keys()].join(", ")}`,
635
+ durationMs: Date.now() - stageStart,
636
+ };
637
+ }
638
+
639
+ // 2. Parse items
640
+ let items: StageItem[];
641
+ try {
642
+ items = parseItems(sourceOutput);
643
+ } catch (err) {
644
+ return {
645
+ stageId: stage.id,
646
+ success: false,
647
+ output: "",
648
+ error: `Failed to parse items from stage "${expand.from}": ${(err as Error).message}`,
649
+ durationMs: Date.now() - stageStart,
650
+ };
651
+ }
652
+
653
+ // 3. Limit items
654
+ const maxItems = expand.maxItems ?? 10;
655
+ const limitedItems = items.slice(0, maxItems);
656
+
657
+ if (limitedItems.length === 0) {
658
+ // No items to expand — not an error, just nothing to do
659
+ if (ctx.hasUI) {
660
+ ctx.ui.notify(`✦ Stage "${stage.id}": no items to expand`, "info");
661
+ }
662
+ return {
663
+ stageId: stage.id,
664
+ success: true,
665
+ output: "(no items to expand)",
666
+ durationMs: Date.now() - stageStart,
667
+ };
668
+ }
669
+
670
+ // 4. Build dynamic stages
671
+ const dynamicStages = buildExpandStages(stage, limitedItems, task, outputs);
672
+
673
+ if (ctx.hasUI) {
674
+ ctx.ui.notify(
675
+ `✦ Stage "${stage.id}": expanding into ${dynamicStages.length} parallel tasks`,
676
+ "info",
677
+ );
678
+ }
679
+
680
+ // 5. Run all dynamic stages as parallel subagent tasks
681
+ // Each dynamic stage is a simple agent call (no gates in v1)
682
+ const subagentTasks = dynamicStages.map((ds) => ({
683
+ agent: ds.agent ?? "worker",
684
+ task: ds.task ?? "",
685
+ model: ds.model,
686
+ }));
687
+
688
+ try {
689
+ const response = await executeSubagent(
690
+ pi,
691
+ {
692
+ tasks: subagentTasks,
693
+ clarify: false,
694
+ agentScope: "both",
695
+ cwd: ctx.cwd,
696
+ },
697
+ signal,
698
+ );
699
+
700
+ const combinedOutput = extractResponseText(response);
701
+
702
+ if (response.isError) {
703
+ return {
704
+ stageId: stage.id,
705
+ success: false,
706
+ output: combinedOutput,
707
+ error: response.errorText,
708
+ durationMs: Date.now() - stageStart,
709
+ };
710
+ }
711
+
712
+ // Aggregate — prefix each task's output with its stage ID
713
+ const aggregated = dynamicStages
714
+ .map((ds, i) => {
715
+ // Extract each task's output from combined output
716
+ // We split by known markers if available, or trust the raw combined output
717
+ return `### ${ds.id}
718
+ ${extractTaskOutput(combinedOutput, i, dynamicStages.length)}`;
719
+ })
720
+ .join("\n\n---\n\n");
721
+
722
+ return {
723
+ stageId: stage.id,
724
+ success: true,
725
+ output: aggregated,
726
+ durationMs: Date.now() - stageStart,
727
+ };
728
+ } catch (err) {
729
+ if ((err as Error).name === "AbortError" || signal?.aborted) {
730
+ throw err;
731
+ }
732
+ return {
733
+ stageId: stage.id,
734
+ success: false,
735
+ output: "",
736
+ error: `Expand stage "${stage.id}" failed: ${(err as Error).message}`,
737
+ durationMs: Date.now() - stageStart,
738
+ };
739
+ }
740
+ }
741
+
742
+ /**
743
+ * Extract a single task's output from a combined parallel response.
744
+ * When pi-subagents runs N tasks in parallel, the combined response
745
+ * preserves each task's output sequentially. This function splits
746
+ * by reasonable boundaries (double newlines) as a heuristic.
747
+ */
748
+ /** @internal Exported for testing. See pipeline-expand.test.ts */
749
+ export function extractTaskOutput(combined: string, taskIndex: number, totalTasks: number): string {
750
+ if (totalTasks <= 1) return combined.trim();
751
+
752
+ // Split by double newlines as a heuristic for task boundaries
753
+ const parts = combined.split(/\n{2,}/).filter(Boolean);
754
+
755
+ if (parts.length >= totalTasks) {
756
+ // Each part likely corresponds to one task
757
+ return (parts[taskIndex] ?? "").trim();
758
+ }
759
+
760
+ // Fallback: give each task a proportional share
761
+ const charsPerTask = Math.floor(combined.length / totalTasks);
762
+ const start = taskIndex * charsPerTask;
763
+ const end = taskIndex === totalTasks - 1 ? combined.length : start + charsPerTask;
764
+ return combined.slice(start, end).trim();
765
+ }
766
+
767
+ /**
768
+ * Run a review gate stage.
769
+ *
770
+ * Pattern:
771
+ * 1. Worker executes the task
772
+ * 2. N reviewers evaluate the result (parallel)
773
+ * 3. Scores are averaged
774
+ * 4. If avg >= targetScore → pass
775
+ * 5. If avg < targetScore → worker receives feedback → repeat (max maxRounds)
776
+ */
777
+ /** @internal Exported for testing. See pipeline-runner.test.ts */
778
+ export async function runReviewGate(
779
+ pi: ExtensionAPI,
780
+ ctx: ExtensionContext,
781
+ stage: Stage,
782
+ task: string,
783
+ outputs: Map<string, string>,
784
+ lastFeedback: string | undefined,
785
+ signal?: AbortSignal,
786
+ pipelineJudgeModel?: string,
787
+ ): Promise<StageResult & { lastFeedback?: string }> {
788
+ const stageStart = Date.now();
789
+ const agentName = stage.agent!;
790
+ const gate = stage.gate!;
791
+ let currentFeedback = lastFeedback;
792
+ let lastOutput = "";
793
+
794
+ for (let round = 1; round <= gate.maxRounds; round++) {
795
+ if (ctx.hasUI) {
796
+ ctx.ui.notify(`🔄 Stage "${stage.id}": round ${round}/${gate.maxRounds}`, "info");
797
+ }
798
+
799
+ // 1. Worker executes the task (with per-round timeout)
800
+ const resolvedTask = resolveTemplate(stage.task ?? "", task, outputs, currentFeedback);
801
+
802
+ if (signal?.aborted) throw signal.reason ?? new DOMException("Aborted", "AbortError");
803
+
804
+ const workerSignal = signal
805
+ ? AbortSignal.any([signal, AbortSignal.timeout(GATE_WORKER_TIMEOUT_MS)])
806
+ : AbortSignal.timeout(GATE_WORKER_TIMEOUT_MS);
807
+
808
+ const workerResponse = await executeSubagent(
809
+ pi,
810
+ {
811
+ agent: agentName,
812
+ task: resolvedTask,
813
+ clarify: false,
814
+ model: stage.model,
815
+ agentScope: "both",
816
+ cwd: ctx.cwd,
817
+ },
818
+ workerSignal,
819
+ );
820
+
821
+ if (workerResponse.isError) {
822
+ throw new Error(`Worker round ${round} failed: ${workerResponse.errorText}`);
823
+ }
824
+
825
+ lastOutput = extractResponseText(workerResponse);
826
+
827
+ // 2. Run reviewers in parallel (with per-round timeout)
828
+ const reviewerTasks = gate.reviewers.map((reviewer) => ({
829
+ agent: reviewer.agent ?? "reviewer",
830
+ task: buildReviewerTask(reviewer.focus, lastOutput, stage.id),
831
+ model: gate.judgeModel ?? pipelineJudgeModel ?? stage.model,
832
+ }));
833
+
834
+ const reviewerSignal = signal
835
+ ? AbortSignal.any([signal, AbortSignal.timeout(GATE_REVIEWER_TIMEOUT_MS)])
836
+ : AbortSignal.timeout(GATE_REVIEWER_TIMEOUT_MS);
837
+
838
+ const reviewsResponse = await executeSubagent(
839
+ pi,
840
+ {
841
+ tasks: reviewerTasks,
842
+ clarify: false,
843
+ agentScope: "both",
844
+ cwd: ctx.cwd,
845
+ },
846
+ reviewerSignal,
847
+ );
848
+
849
+ // 3. Parse review outputs to extract scores
850
+ const rawReviewOutput = extractResponseText(reviewsResponse);
851
+ const reviews = parseReviewOutputs(rawReviewOutput, reviewerTasks.length);
852
+
853
+ const scores = reviews.map((r) => r.score);
854
+ const average = scores.reduce((a, b) => a + b, 0) / scores.length;
855
+ const allFeedback = reviews
856
+ .map((r) => r.feedback)
857
+ .filter(Boolean)
858
+ .join("\n\n");
859
+
860
+ if (ctx.hasUI) {
861
+ ctx.ui.notify(
862
+ `📊 Stage "${stage.id}" round ${round}: scores [${scores.join(", ")}] avg=${average.toFixed(1)} target=${gate.targetScore}`,
863
+ average >= gate.targetScore ? "info" : "warning",
864
+ );
865
+ }
866
+
867
+ // 4. Check if passed
868
+ if (average >= gate.targetScore) {
869
+ if (ctx.hasUI) {
870
+ ctx.ui.notify(
871
+ `✅ Stage "${stage.id}" passed gate (${average.toFixed(1)}≥${gate.targetScore})`,
872
+ "info",
873
+ );
874
+ }
875
+
876
+ const reviewSummary = reviews
877
+ .map(
878
+ (r, i) => `### Reviewer ${i + 1}\n- Score: ${r.score}/10\n- ${r.feedback.slice(0, 500)}`,
879
+ )
880
+ .join("\n\n");
881
+
882
+ const finalOutput = `${lastOutput}\n\n---\n\n## Gate Review Passed (Round ${round})\n\n${reviewSummary}`;
883
+
884
+ return {
885
+ stageId: stage.id,
886
+ success: true,
887
+ output: finalOutput,
888
+ rounds: round,
889
+ scores,
890
+ durationMs: Date.now() - stageStart,
891
+ };
892
+ }
893
+
894
+ // 5. Not passed — prepare feedback for next round
895
+ if (ctx.hasUI) {
896
+ ctx.ui.notify(
897
+ `⚠ Stage "${stage.id}" round ${round}: score ${average.toFixed(1)} < ${gate.targetScore}, retrying...`,
898
+ "warning",
899
+ );
900
+ }
901
+
902
+ currentFeedback = [
903
+ `## Reviewer Feedback (Round ${round})`,
904
+ "",
905
+ `Average Score: ${average.toFixed(1)}/${gate.targetScore}`,
906
+ "",
907
+ "Issues to fix:",
908
+ allFeedback,
909
+ "",
910
+ "Please address ALL of the above issues in your next attempt.",
911
+ ].join("\n");
912
+ }
913
+
914
+ // Exhausted max rounds — gate failed
915
+ return {
916
+ stageId: stage.id,
917
+ success: false,
918
+ output: lastOutput,
919
+ error: `Failed to pass review gate after ${gate.maxRounds} rounds. Scores: [${gate.reviewers.map(() => 0).join(", ")}]`,
920
+ rounds: gate.maxRounds,
921
+ scores: [],
922
+ durationMs: Date.now() - stageStart,
923
+ lastFeedback: currentFeedback,
924
+ };
925
+ }
926
+
927
+ /**
928
+ * Build a reviewer prompt that includes the work product and asks for a score.
929
+ */
930
+ /** @internal Exported for testing. See pipeline-runner.test.ts */
931
+ export function buildReviewerTask(focus: string, output: string, stageId: string): string {
932
+ return `You are a reviewer for stage "${stageId}" of a multi-agent pipeline.
933
+
934
+ ## Review Focus
935
+ ${focus}
936
+
937
+ ## Work Product to Review
938
+
939
+ ${output}
940
+
941
+ ## Instructions
942
+ 1. Analyze the work product critically against the review focus.
943
+ 2. Write your detailed review analysis.
944
+ 3. On the VERY LAST LINE of your response, output exactly:
945
+
946
+ SCORE: <number between 0 and 10>
947
+
948
+ Where 10 = perfect, 8+ = acceptable, 5+ = needs improvement, <5 = unacceptable.
949
+ Be honest and critical. Do NOT inflate scores.`;
950
+ }
951
+
952
+ /**
953
+ * Parse review outputs to extract scores and feedback.
954
+ * Handles the case where all reviewers come back in one combined response.
955
+ */
956
+ /** @internal Exported for testing. See pipeline-runner.test.ts */
957
+ export function parseReviewOutputs(
958
+ rawOutput: string,
959
+ expectedCount: number,
960
+ ): Array<{ score: number; feedback: string }> {
961
+ // Try to split by reviewer sections
962
+ const sections = rawOutput.split(/(?=SCORE:\s*\d+(?:\.\d+)?)/i).filter(Boolean);
963
+
964
+ // Collect sections that actually contain a SCORE line (skip preamble text)
965
+ const scoredSections = sections.filter((s) => /SCORE:\s*\d+(?:\.\d+)?/i.test(s));
966
+
967
+ if (scoredSections.length >= expectedCount) {
968
+ return scoredSections.slice(0, expectedCount).map((section) => {
969
+ const scoreMatch = section.match(/SCORE:\s*(\d+(?:\.\d+)?)/i);
970
+ const score = scoreMatch ? Math.max(0, Math.min(10, parseFloat(scoreMatch[1]!))) : 5;
971
+ const feedback = section.replace(/SCORE:\s*\d+(?:\.\d+)?/i, "").trim();
972
+ return { score, feedback: feedback || "(no feedback)" };
973
+ });
974
+ }
975
+
976
+ // Fallback: try to find all SCORE lines in the combined output
977
+ const allScoreLines = [...rawOutput.matchAll(/SCORE:\s*(\d+(?:\.\d+)?)/gi)];
978
+ if (allScoreLines.length >= expectedCount) {
979
+ return allScoreLines.slice(0, expectedCount).map((match, i) => {
980
+ const score = Math.max(0, Math.min(10, parseFloat(match[1]!)));
981
+ return { score, feedback: `Reviewer ${i + 1}` };
982
+ });
983
+ }
984
+
985
+ // Last resort: treat entire output as one reviewer's response
986
+ const lastScoreLine = [...rawOutput.matchAll(/SCORE:\s*(\d+(?:\.\d+)?)/gi)].pop();
987
+ const score = lastScoreLine ? Math.max(0, Math.min(10, parseFloat(lastScoreLine[1]!))) : 0;
988
+ const feedback = rawOutput.replace(/SCORE:\s*\d+(?:\.\d+)?/gi, "").trim();
989
+
990
+ // Replicate for expected count
991
+ return Array.from({ length: expectedCount }, (_, i) => ({
992
+ score,
993
+ feedback: i === 0 ? feedback || "(no feedback)" : "(combined response)",
994
+ }));
995
+ }
996
+
997
+ /**
998
+ * A single item from source stage output, used for dynamic stage expansion.
999
+ * Each key becomes available as {item.key} in the template.
1000
+ */
1001
+ export interface StageItem {
1002
+ [key: string]: unknown;
1003
+ }
1004
+
1005
+ /**
1006
+ * Parse a source stage's output into an array of items for dynamic expansion.
1007
+ *
1008
+ * Strategy:
1009
+ * 1. Try JSON — expects an array of objects, or an object with "items" key
1010
+ * 2. Try YAML — expects a sequence
1011
+ * 3. Fallback — parse as markdown list (-, *, or 1.)
1012
+ *
1013
+ * Throws if the output cannot be parsed into items.
1014
+ */
1015
+ /** @internal Exported for testing. See pipeline-expand.test.ts */
1016
+ export function parseItems(output: string): StageItem[] {
1017
+ if (!output.trim()) return [];
1018
+
1019
+ // Strategy 1: JSON
1020
+ try {
1021
+ const parsed = JSON.parse(output);
1022
+ if (Array.isArray(parsed)) {
1023
+ return parsed.map((item: unknown) =>
1024
+ typeof item === "string"
1025
+ ? { value: item }
1026
+ : typeof item === "object" && item !== null
1027
+ ? (item as StageItem)
1028
+ : { value: String(item) },
1029
+ );
1030
+ }
1031
+ if (typeof parsed === "object" && parsed !== null) {
1032
+ const items = (parsed as Record<string, unknown>).items;
1033
+ if (Array.isArray(items)) {
1034
+ return items.map((item: unknown) =>
1035
+ typeof item === "string"
1036
+ ? { value: item }
1037
+ : typeof item === "object" && item !== null
1038
+ ? (item as StageItem)
1039
+ : { value: String(item) },
1040
+ );
1041
+ }
1042
+ }
1043
+ } catch {
1044
+ // Not JSON, continue to next strategy
1045
+ }
1046
+
1047
+ // Strategy 2: YAML list
1048
+ try {
1049
+ const parsed = yaml.load(output);
1050
+ if (Array.isArray(parsed)) {
1051
+ return parsed.map((item: unknown) =>
1052
+ typeof item === "string"
1053
+ ? { value: item }
1054
+ : typeof item === "object" && item !== null
1055
+ ? (item as StageItem)
1056
+ : { value: String(item) },
1057
+ );
1058
+ }
1059
+ } catch {
1060
+ // Not YAML, continue to fallback
1061
+ }
1062
+
1063
+ // Strategy 3: markdown list fallback
1064
+ const lines = output
1065
+ .split("\n")
1066
+ .map((l) => l.trim())
1067
+ .filter((l) => l.startsWith("- ") || l.startsWith("* ") || /^\d+[.)]\s/.test(l))
1068
+ .map((l) => l.replace(/^[-*]\s+/, "").replace(/^\d+[.)]\s+/, ""));
1069
+
1070
+ if (lines.length > 0) {
1071
+ return lines.map((value) => ({ value }));
1072
+ }
1073
+
1074
+ throw new Error(
1075
+ "Cannot parse items from output. Expected JSON array, YAML list, or markdown list.",
1076
+ );
1077
+ }
1078
+
1079
+ /**
1080
+ * Resolve {item.key} and {item} variables in an already-resolved task template.
1081
+ *
1082
+ * For string-valued items (from markdown fallback), {item} resolves to the
1083
+ * value directly. For object items, {item} resolves to JSON.stringify.
1084
+ */
1085
+ /** @internal Exported for testing. See pipeline-expand.test.ts */
1086
+ export function expandItemTemplate(template: string, item: StageItem): string {
1087
+ let result = template;
1088
+
1089
+ // Replace {item.key} with the specific value
1090
+ // Object.entries skips Symbol keys; String(value ?? "") handles null/undefined safely
1091
+ for (const [key, value] of Object.entries(item)) {
1092
+ const placeholder = `{item.${key}}`;
1093
+ if (value === null || value === undefined) {
1094
+ result = result.split(placeholder).join("");
1095
+ } else {
1096
+ result = result.split(placeholder).join(String(value));
1097
+ }
1098
+ }
1099
+
1100
+ // Replace {item} with the full item representation
1101
+ // For string items ({ value: "..." }), use value directly
1102
+ if (Object.keys(item).length === 1 && "value" in item) {
1103
+ result = result.split("{item}").join(String(item.value ?? ""));
1104
+ } else {
1105
+ result = result.split("{item}").join(JSON.stringify(item));
1106
+ }
1107
+
1108
+ return result;
1109
+ }
1110
+
1111
+ /**
1112
+ * Build dynamic stages from a template stage and parsed items.
1113
+ *
1114
+ * Each dynamic stage gets:
1115
+ * - id: "{template.id}-{i+1}"
1116
+ * - The same agent, model, gate, etc. as the template
1117
+ * - Template resolved with item variables
1118
+ *
1119
+ * NOTE: Gates are propagated on the Stage type for schema compatibility
1120
+ * but are NOT executed by the runner in v1 (see runPipeline).
1121
+ *
1122
+ * @param template — the expand stage template from YAML
1123
+ * @param items — parsed items from the source stage
1124
+ * @param task — original user task
1125
+ * @param outputs — outputs from all previous stages (for {outputs.*} resolution)
1126
+ */
1127
+ /** @internal Exported for testing. See pipeline-expand.test.ts */
1128
+ export function buildExpandStages(
1129
+ template: Stage,
1130
+ items: StageItem[],
1131
+ task: string,
1132
+ outputs: Map<string, string>,
1133
+ ): Stage[] {
1134
+ if (items.length === 0) return [];
1135
+
1136
+ // First resolve {task}, {outputs.*}, {lastFeedback} — same for all items
1137
+ const baseTask = resolveTemplate(template.task ?? "", task, outputs, undefined);
1138
+
1139
+ return items.map((item, i) => {
1140
+ const resolvedTask = expandItemTemplate(baseTask, item);
1141
+ return {
1142
+ id: `${template.id}-${i + 1}`,
1143
+ agent: template.agent,
1144
+ task: resolvedTask,
1145
+ model: template.model,
1146
+ gate: template.gate,
1147
+ output: template.output,
1148
+ reads: template.reads,
1149
+ maxSubagentDepth: template.maxSubagentDepth,
1150
+ };
1151
+ });
1152
+ }
1153
+
1154
+ /**
1155
+ * Resolve template variables in a task string.
1156
+ *
1157
+ * Supported:
1158
+ * {task} — original user task
1159
+ * {outputs.<id>} — output from a previous stage
1160
+ * {lastFeedback} — last review feedback (inside gate retries)
1161
+ */
1162
+ /** @internal Exported for testing. See pipeline-runner.test.ts */
1163
+ export function resolveTemplate(
1164
+ template: string,
1165
+ task: string,
1166
+ outputs: Map<string, string>,
1167
+ lastFeedback: string | undefined,
1168
+ ): string {
1169
+ let result = template.replace(/\{task\}/g, task);
1170
+
1171
+ result = result.replace(/\{outputs\.(\w+)\}/g, (_, stageId: string) => {
1172
+ return outputs.get(stageId) ?? `[No output from stage "${stageId}"]`;
1173
+ });
1174
+
1175
+ if (lastFeedback !== undefined) {
1176
+ result = result.replace(/\{lastFeedback\}/g, lastFeedback);
1177
+ }
1178
+
1179
+ return result;
1180
+ }
1181
+
1182
+ /**
1183
+ * Format a pipeline result as a readable summary string.
1184
+ */
1185
+ export function formatPipelineResult(result: PipelineResult): string {
1186
+ const lines: string[] = [];
1187
+
1188
+ // --- Pipeline header ---
1189
+ lines.push(`# ${result.success ? "✅" : "❌"} Pipeline: ${result.pipelineName}`);
1190
+ lines.push(`Task: ${result.task}`);
1191
+ lines.push(
1192
+ `Status: ${result.success ? "✅ PASSED" : "❌ FAILED"} · ${result.stages.filter((s) => s.success).length}/${result.stages.length} stages passed · ${formatDuration(result.totalDurationMs)}`,
1193
+ );
1194
+ lines.push("");
1195
+
1196
+ // --- Synthesis report (prominently at the top) ---
1197
+ if (result.synthesis) {
1198
+ lines.push("> 📋 **Pipeline Report**");
1199
+ lines.push(">");
1200
+ // Indent each line of the synthesis as a blockquote
1201
+ for (const synLine of result.synthesis.trim().split("\n")) {
1202
+ lines.push(`> ${synLine}`);
1203
+ }
1204
+ lines.push("");
1205
+ }
1206
+
1207
+ if (result.synthesisError) {
1208
+ lines.push(`> ⚠️ *Report synthesis note: ${result.synthesisError}*`);
1209
+ lines.push("");
1210
+ }
1211
+
1212
+ // --- Stage details ---
1213
+ lines.push("## Stages");
1214
+ lines.push("");
1215
+
1216
+ for (const stage of result.stages) {
1217
+ const icon = stage.success ? "✅" : "❌";
1218
+ const rounds = stage.rounds ? ` (${stage.rounds} round${stage.rounds > 1 ? "s" : ""})` : "";
1219
+ const scores = stage.scores?.length ? ` scores=[${stage.scores.join(", ")}]` : "";
1220
+ lines.push(`### ${icon} ${stage.stageId}${rounds}${scores}`);
1221
+ lines.push(`Duration: ${formatDuration(stage.durationMs)}`);
1222
+ if (stage.error) {
1223
+ lines.push(`Error: ${stage.error}`);
1224
+ }
1225
+ if (stage.output) {
1226
+ lines.push(`Output:\n${stage.output}`);
1227
+ }
1228
+ lines.push("");
1229
+ }
1230
+
1231
+ if (result.error && result.stages.every((s) => !s.error)) {
1232
+ lines.push(`---\n**Fatal:** ${result.error}`);
1233
+ }
1234
+
1235
+ return lines.join("\n");
1236
+ }
1237
+
1238
+ /**
1239
+ * Build a structured context message for LLM injection.
1240
+ * Used after pipeline completion to give the agent a clear picture
1241
+ * with stage excerpts, synthesis, and an instruction to write a narrative summary.
1242
+ */
1243
+ export function buildPipelineContextMessage(result: PipelineResult): string {
1244
+ const lines: string[] = [];
1245
+
1246
+ lines.push(`## \u2705 Pipeline Result: ${result.pipelineName}`);
1247
+ lines.push("");
1248
+ const icon = result.success ? "\u2705" : "\u274C";
1249
+ lines.push(
1250
+ `**Status:** ${icon} ${result.success ? "PASSED" : "FAILED"} \u00B7 ${result.stages.filter((s) => s.success).length}/${result.stages.length} stages passed \u00B7 ${formatDuration(result.totalDurationMs)}`,
1251
+ );
1252
+ if (result.task) lines.push(`**Task:** ${result.task}`);
1253
+ lines.push("");
1254
+
1255
+ // Stage results table
1256
+ lines.push("### Stage Results");
1257
+ lines.push("");
1258
+ lines.push("| Stage | Agent | Result | Duration |");
1259
+ lines.push("|-------|-------|--------|----------|");
1260
+ for (const stage of result.stages) {
1261
+ const stageIcon = stage.success ? "\u2705" : "\u274C";
1262
+ const duration = formatDuration(stage.durationMs);
1263
+ lines.push(
1264
+ `| ${stage.stageId} | ${stage.stageId.includes("review") ? "reviewer" : stage.stageId.includes("stability") ? "worker" : "agent"} | ${stageIcon} ${stage.success ? "Pass" : "Fail"}${stage.rounds ? ` (${stage.rounds}r)` : ""}${stage.scores?.length ? ` [${stage.scores.join(", ")}]` : ""} | ${duration} |`,
1265
+ );
1266
+ }
1267
+ lines.push("");
1268
+
1269
+ // Stage output highlights (truncated)
1270
+ lines.push("### Stage Output Highlights");
1271
+ lines.push("");
1272
+ for (const stage of result.stages) {
1273
+ lines.push(`**${stage.stageId}**`);
1274
+ if (stage.error) {
1275
+ lines.push("```");
1276
+ lines.push(`Error: ${stage.error}`);
1277
+ lines.push("```");
1278
+ }
1279
+ if (stage.output) {
1280
+ const truncated = stage.output.length > 500 ? "..." + stage.output.slice(-500) : stage.output;
1281
+ lines.push("```");
1282
+ lines.push(truncated);
1283
+ lines.push("```");
1284
+ }
1285
+ lines.push("");
1286
+ }
1287
+
1288
+ // Pipeline synthesis
1289
+ if (result.synthesis) {
1290
+ lines.push("### Pipeline Synthesis");
1291
+ lines.push("> \uD83D\uDCCB " + result.pipelineName + " synthesis");
1292
+ lines.push(">");
1293
+ for (const synLine of result.synthesis.trim().split("\n")) {
1294
+ lines.push(`> ${synLine}`);
1295
+ }
1296
+ lines.push("");
1297
+ }
1298
+ if (result.synthesisError) {
1299
+ lines.push(`> \u26A0\uFE0F *Synthesis note: ${result.synthesisError}*`);
1300
+ lines.push("");
1301
+ }
1302
+
1303
+ // Instruction for the LLM
1304
+ lines.push("---");
1305
+ lines.push("");
1306
+ lines.push("**Instructions for the Agent:**");
1307
+ lines.push("");
1308
+ lines.push("Please analyze the pipeline results above and provide a narrative summary. Cover:");
1309
+ lines.push("");
1310
+ lines.push("1. **Overall outcome** \u2014 did the pipeline pass or fail? What does this mean?");
1311
+ lines.push(
1312
+ "2. **Stage breakdown** \u2014 which stages succeeded, which failed, and their key findings",
1313
+ );
1314
+ lines.push(
1315
+ "3. **Key issues** \u2014 blocking issues, review scores, recommendations from each stage",
1316
+ );
1317
+ lines.push("4. **Synthesis** \u2014 the pipeline\u2019s own assessment (if available)");
1318
+ lines.push("5. **Next steps** \u2014 what should be done next based on the results");
1319
+ lines.push("");
1320
+ lines.push(
1321
+ "Write your summary in the same language as this conversation. Be concise but informative.",
1322
+ );
1323
+ lines.push("");
1324
+ lines.push(
1325
+ "If all stages passed, state that clearly. If any failed, explain what failed and why.",
1326
+ );
1327
+
1328
+ return lines.join("\n");
1329
+ }
1330
+
1331
+ /** @internal Exported for testing. See pipeline-runner.test.ts */
1332
+ export function failResult(
1333
+ pipelineName: string,
1334
+ task: string,
1335
+ error: string,
1336
+ startTime: number,
1337
+ ): PipelineResult {
1338
+ return {
1339
+ pipelineName,
1340
+ task,
1341
+ success: false,
1342
+ stages: [],
1343
+ totalDurationMs: Date.now() - startTime,
1344
+ error,
1345
+ };
1346
+ }