pi-pipelines 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/LICENSE +21 -0
- package/README.md +367 -0
- package/extensions/config-loader.ts +444 -0
- package/extensions/index.ts +447 -0
- package/extensions/pipeline-runner.ts +1346 -0
- package/extensions/subagent-bridge.ts +291 -0
- package/extensions/tui-widgets.ts +68 -0
- package/extensions/types.ts +153 -0
- package/extensions/utils.ts +15 -0
- package/package.json +79 -0
- package/pipelines/dev-sprint.pipeline.yaml +104 -0
- package/pipelines/hello-world.pipeline.yaml +25 -0
- package/pipelines/refactor.pipeline.yaml +59 -0
- package/pipelines/release-check.pipeline.yaml +60 -0
- package/pipelines/tdd-review.pipeline.yaml +78 -0
- package/skills/pi-pipelines/SKILL.md +575 -0
|
@@ -0,0 +1,1346 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline Runner — core orchestration engine
|
|
3
|
+
*
|
|
4
|
+
* Executes pipeline stages by dispatching subagent commands through
|
|
5
|
+
* pi-subagents' event bridge. This means:
|
|
6
|
+
* - No child Pi processes needed
|
|
7
|
+
* - pi-subagents handles all agent execution natively
|
|
8
|
+
* - Chain and parallel execution are native to pi-subagents
|
|
9
|
+
* - Review gates add the iterative scoring loop on top
|
|
10
|
+
*
|
|
11
|
+
* For simple linear pipelines, the runner emits a single chain request.
|
|
12
|
+
* For pipelines with gates, it runs stages one-by-one with programmatic
|
|
13
|
+
* control over the review loop.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import * as path from "node:path";
|
|
17
|
+
import yaml from "js-yaml";
|
|
18
|
+
import type { ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
|
|
19
|
+
import type { PipelineDef, Stage, StageResult, PipelineResult } from "./types.ts";
|
|
20
|
+
import { findPipelineFile, listPipelines, loadPipeline } from "./config-loader.ts";
|
|
21
|
+
import { executeSubagent, extractResponseText } from "./subagent-bridge.ts";
|
|
22
|
+
import { formatDuration } from "./utils.ts";
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Per-round timeout for the worker agent inside a review gate.
|
|
26
|
+
* Prevents a stuck worker from blocking the entire pipeline.
|
|
27
|
+
* @internal Exported for testing.
|
|
28
|
+
*/
|
|
29
|
+
export const GATE_WORKER_TIMEOUT_MS = 600_000; // 10 min
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Per-round timeout for the reviewer agents inside a review gate.
|
|
33
|
+
* Prevents stuck reviewers from blocking the entire pipeline.
|
|
34
|
+
* @internal Exported for testing.
|
|
35
|
+
*/
|
|
36
|
+
export const GATE_REVIEWER_TIMEOUT_MS = 300_000; // 5 min
|
|
37
|
+
|
|
38
|
+
/** Default pipelines directory relative to cwd */
|
|
39
|
+
const PIPELINES_DIR = ".pi/pipelines";
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Combine a parent AbortSignal with an optional timeout.
|
|
43
|
+
* Returns a signal that aborts when EITHER the parent aborts OR the timeout fires.
|
|
44
|
+
* Returns undefined when neither is provided.
|
|
45
|
+
*/
|
|
46
|
+
function createStageSignal(
|
|
47
|
+
parentSignal?: AbortSignal,
|
|
48
|
+
timeoutMs?: number,
|
|
49
|
+
): AbortSignal | undefined {
|
|
50
|
+
const signals: AbortSignal[] = [];
|
|
51
|
+
if (parentSignal) signals.push(parentSignal);
|
|
52
|
+
if (timeoutMs && timeoutMs > 0) signals.push(AbortSignal.timeout(timeoutMs));
|
|
53
|
+
if (signals.length === 0) return undefined;
|
|
54
|
+
if (signals.length === 1) return signals[0];
|
|
55
|
+
return AbortSignal.any(signals);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Build a condensed context string from stage results for the report synthesizer.
|
|
60
|
+
* Truncates each stage output to a preview to avoid blowing up the context.
|
|
61
|
+
*/
|
|
62
|
+
/** @internal Exported for testing. See pipeline-runner.test.ts */
|
|
63
|
+
export function buildReportContext(
|
|
64
|
+
pipelineName: string,
|
|
65
|
+
pipelineDescription: string,
|
|
66
|
+
task: string,
|
|
67
|
+
stages: StageResult[],
|
|
68
|
+
focus?: string,
|
|
69
|
+
): string {
|
|
70
|
+
const lines: string[] = [];
|
|
71
|
+
lines.push(`# Pipeline: ${pipelineName}`);
|
|
72
|
+
lines.push(`Description: ${pipelineDescription}`);
|
|
73
|
+
lines.push(`Task: ${task}`);
|
|
74
|
+
if (focus) lines.push(`Focus: ${focus}`);
|
|
75
|
+
const passed = stages.filter((s) => s.success).length;
|
|
76
|
+
const total = stages.length;
|
|
77
|
+
const totalMs = stages.reduce((sum, s) => sum + s.durationMs, 0);
|
|
78
|
+
lines.push(
|
|
79
|
+
`Status: ${passed === total ? "PASSED" : "FAILED"} (${passed}/${total} stages passed, ${formatDuration(totalMs)})`,
|
|
80
|
+
);
|
|
81
|
+
lines.push("");
|
|
82
|
+
lines.push("## Stage Results");
|
|
83
|
+
lines.push("");
|
|
84
|
+
|
|
85
|
+
for (let i = 0; i < stages.length; i++) {
|
|
86
|
+
const s = stages[i]!;
|
|
87
|
+
const icon = s.success ? "✅ PASS" : "❌ FAIL";
|
|
88
|
+
const rounds = s.rounds ? ` (${s.rounds} round${s.rounds > 1 ? "s" : ""})` : "";
|
|
89
|
+
const scores = s.scores?.length ? ` scores=[${s.scores.join(", ")}]` : "";
|
|
90
|
+
lines.push(`### ${i + 1}. ${s.stageId} ${icon}${rounds}${scores}`);
|
|
91
|
+
lines.push(`Duration: ${formatDuration(s.durationMs)}`);
|
|
92
|
+
if (s.error) {
|
|
93
|
+
lines.push(`Error: ${s.error}`);
|
|
94
|
+
}
|
|
95
|
+
if (s.output) {
|
|
96
|
+
lines.push(`Output:\n${s.output}`);
|
|
97
|
+
}
|
|
98
|
+
lines.push("");
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return lines.join("\n");
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* The default max length for a stage summary (characters).
|
|
106
|
+
*/
|
|
107
|
+
const STAGE_SUMMARY_MAX_LENGTH = 500;
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Summarize a single stage's output so the next stage doesn't get the full firehose.
|
|
111
|
+
* Uses a fast LLM call when mode='summary'.
|
|
112
|
+
* Returns the raw output unchanged when mode is not 'summary'.
|
|
113
|
+
*/
|
|
114
|
+
/** @internal Exported for testing. See pipeline-runner.test.ts */
|
|
115
|
+
export async function applyStageReport(
|
|
116
|
+
pi: ExtensionAPI,
|
|
117
|
+
stage: Stage,
|
|
118
|
+
rawOutput: string,
|
|
119
|
+
task: string,
|
|
120
|
+
signal?: AbortSignal,
|
|
121
|
+
): Promise<string> {
|
|
122
|
+
const report = stage.report;
|
|
123
|
+
if (!report || report.mode === "full" || report.mode === undefined) {
|
|
124
|
+
return rawOutput;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const maxLen = report.maxLength ?? STAGE_SUMMARY_MAX_LENGTH;
|
|
128
|
+
const instruction = report.instruction ?? "Summarize the key findings and results concisely.";
|
|
129
|
+
|
|
130
|
+
const prompt = `You are a stage output summarizer. A pipeline stage has just completed and its output
|
|
131
|
+
needs to be condensed so the next stage does not receive the full raw text.
|
|
132
|
+
|
|
133
|
+
Stage: ${stage.id}
|
|
134
|
+
Full task: ${task}
|
|
135
|
+
|
|
136
|
+
Summarization instruction: ${instruction}
|
|
137
|
+
|
|
138
|
+
Raw output (${rawOutput.length} chars):
|
|
139
|
+
${rawOutput}
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
Produce a concise summary of the stage output, maximum ${maxLen} characters.
|
|
143
|
+
Focus on key facts, decisions, and results relevant to downstream stages.
|
|
144
|
+
Ignore verbose reasoning, iterative exploration, and internal commentary.`;
|
|
145
|
+
|
|
146
|
+
const response = await executeSubagent(
|
|
147
|
+
pi,
|
|
148
|
+
{
|
|
149
|
+
agent: "worker",
|
|
150
|
+
task: prompt,
|
|
151
|
+
context: "fresh",
|
|
152
|
+
},
|
|
153
|
+
signal,
|
|
154
|
+
);
|
|
155
|
+
|
|
156
|
+
const summary = extractResponseText(response) || "(summary unavailable)";
|
|
157
|
+
return summary.length > maxLen ? summary.slice(0, maxLen) + "..." : summary;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Run the post-pipeline report synthesizer.
|
|
162
|
+
* Calls a planner agent with all stage results to produce a summary.
|
|
163
|
+
* This is best-effort — errors do not fail the pipeline.
|
|
164
|
+
*/
|
|
165
|
+
/** @internal Exported for testing. See pipeline-runner.test.ts */
|
|
166
|
+
export async function runReportSynthesis(
|
|
167
|
+
pi: ExtensionAPI,
|
|
168
|
+
pipeline: PipelineDef,
|
|
169
|
+
task: string,
|
|
170
|
+
stages: StageResult[],
|
|
171
|
+
agentName?: string,
|
|
172
|
+
focus?: string,
|
|
173
|
+
signal?: AbortSignal,
|
|
174
|
+
): Promise<string> {
|
|
175
|
+
const context = buildReportContext(pipeline.name, pipeline.description, task, stages, focus);
|
|
176
|
+
|
|
177
|
+
const synthesisAgent = agentName || "planner";
|
|
178
|
+
|
|
179
|
+
const prompt = `You are a pipeline report synthesizer. A pipeline has just completed its execution.
|
|
180
|
+
|
|
181
|
+
Below is the execution summary of all stages. Based on this, produce a concise report covering:
|
|
182
|
+
1. **What was achieved** — what the pipeline accomplished
|
|
183
|
+
2. **Key findings** — important results, decisions, or artifacts produced
|
|
184
|
+
3. **Issues** — any failures, problems, or areas needing attention
|
|
185
|
+
4. **Next steps / recommendations** — what should be done next
|
|
186
|
+
|
|
187
|
+
Keep the report concise and actionable. Focus on the substance of what happened.
|
|
188
|
+
|
|
189
|
+
${context}
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
Generate the pipeline report now.`;
|
|
193
|
+
|
|
194
|
+
const response = await executeSubagent(
|
|
195
|
+
pi,
|
|
196
|
+
{
|
|
197
|
+
agent: synthesisAgent,
|
|
198
|
+
task: prompt,
|
|
199
|
+
context: "fresh",
|
|
200
|
+
},
|
|
201
|
+
signal,
|
|
202
|
+
);
|
|
203
|
+
|
|
204
|
+
return extractResponseText(response) || "(report synthesis produced no output)";
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Options passed to the pipeline runner.
|
|
209
|
+
*/
|
|
210
|
+
export interface RunOptions {
|
|
211
|
+
pipeline: string;
|
|
212
|
+
task: string;
|
|
213
|
+
pipelinesDir?: string;
|
|
214
|
+
/** Optional AbortSignal to cancel the pipeline mid-execution */
|
|
215
|
+
signal?: AbortSignal;
|
|
216
|
+
/** Optional per-stage timeout in milliseconds (default: no timeout) */
|
|
217
|
+
stageTimeoutMs?: number;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Main entry point: run a pipeline by name.
|
|
222
|
+
*/
|
|
223
|
+
export async function runPipeline(
|
|
224
|
+
pi: ExtensionAPI,
|
|
225
|
+
ctx: ExtensionContext,
|
|
226
|
+
options: RunOptions,
|
|
227
|
+
): Promise<PipelineResult> {
|
|
228
|
+
const startTime = Date.now();
|
|
229
|
+
const pipelinesDir = options.pipelinesDir ?? path.join(ctx.cwd, PIPELINES_DIR);
|
|
230
|
+
|
|
231
|
+
// Resolve pipeline file
|
|
232
|
+
const filePath = findPipelineFile(pipelinesDir, options.pipeline);
|
|
233
|
+
if (!filePath) {
|
|
234
|
+
const available = listPipelines(pipelinesDir);
|
|
235
|
+
const names = available.map((p) => ` - ${p.name}`).join("\n");
|
|
236
|
+
const msg =
|
|
237
|
+
available.length > 0
|
|
238
|
+
? `Pipeline "${options.pipeline}" not found.\nAvailable pipelines:\n${names}`
|
|
239
|
+
: `Pipeline "${options.pipeline}" not found.\nNo pipelines defined in ${pipelinesDir}/`;
|
|
240
|
+
return failResult(options.pipeline, options.task, msg, startTime);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Load and validate pipeline
|
|
244
|
+
let pipeline: PipelineDef;
|
|
245
|
+
try {
|
|
246
|
+
pipeline = loadPipeline(filePath);
|
|
247
|
+
} catch (err) {
|
|
248
|
+
return failResult(
|
|
249
|
+
options.pipeline,
|
|
250
|
+
options.task,
|
|
251
|
+
`Failed to load pipeline: ${(err as Error).message}`,
|
|
252
|
+
startTime,
|
|
253
|
+
);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
const parentSignal = options.signal;
|
|
257
|
+
const stageTimeoutMs = options.stageTimeoutMs ?? 1_800_000; // default 30 min
|
|
258
|
+
const stageSignal = createStageSignal(parentSignal, stageTimeoutMs);
|
|
259
|
+
|
|
260
|
+
// Check for pre-existing cancellation
|
|
261
|
+
if (parentSignal?.aborted) {
|
|
262
|
+
return failResult(
|
|
263
|
+
options.pipeline,
|
|
264
|
+
options.task,
|
|
265
|
+
"Pipeline cancelled before execution",
|
|
266
|
+
startTime,
|
|
267
|
+
);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Execution context
|
|
271
|
+
const outputs = new Map<string, string>();
|
|
272
|
+
let lastFeedback: string | undefined;
|
|
273
|
+
const stages: StageResult[] = [];
|
|
274
|
+
|
|
275
|
+
if (ctx.hasUI) {
|
|
276
|
+
ctx.ui.setStatus("pipeline", `🚀 Pipeline: ${pipeline.name}`);
|
|
277
|
+
ctx.ui.notify(
|
|
278
|
+
`🧪 Running pipeline "${pipeline.name}" (${pipeline.stages.length} stages)`,
|
|
279
|
+
"info",
|
|
280
|
+
);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Execute each stage sequentially
|
|
284
|
+
for (let i = 0; i < pipeline.stages.length; i++) {
|
|
285
|
+
const stage = pipeline.stages[i]!;
|
|
286
|
+
const stageStart = Date.now();
|
|
287
|
+
const stageLabel = `Stage ${i + 1}/${pipeline.stages.length}: ${stage.id}`;
|
|
288
|
+
|
|
289
|
+
try {
|
|
290
|
+
if (stage.parallel && stage.parallel.length > 0) {
|
|
291
|
+
// === PARALLEL STAGE ===
|
|
292
|
+
if (ctx.hasUI) {
|
|
293
|
+
ctx.ui.setStatus(
|
|
294
|
+
"pipeline",
|
|
295
|
+
`⚡ ${stageLabel} (parallel, ${stage.parallel.length} agents)`,
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
const parallelResult = await runParallelStage(
|
|
300
|
+
pi,
|
|
301
|
+
ctx,
|
|
302
|
+
stage,
|
|
303
|
+
options.task,
|
|
304
|
+
outputs,
|
|
305
|
+
stageSignal,
|
|
306
|
+
);
|
|
307
|
+
const rawOutput = parallelResult.output;
|
|
308
|
+
const stageOutput = await applyStageReport(pi, stage, rawOutput, options.task, stageSignal);
|
|
309
|
+
outputs.set(stage.id, stageOutput);
|
|
310
|
+
|
|
311
|
+
stages.push({
|
|
312
|
+
...parallelResult,
|
|
313
|
+
stageId: stage.id,
|
|
314
|
+
output: stageOutput,
|
|
315
|
+
rawOutput,
|
|
316
|
+
});
|
|
317
|
+
} else if (stage.expand) {
|
|
318
|
+
// === EXPAND STAGE (dynamic stage expansion) ===
|
|
319
|
+
if (ctx.hasUI) {
|
|
320
|
+
ctx.ui.setStatus("pipeline", `✦ ${stageLabel} (expand from "${stage.expand.from}")`);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
const expandResult = await runExpandStage(
|
|
324
|
+
pi,
|
|
325
|
+
ctx,
|
|
326
|
+
stage,
|
|
327
|
+
options.task,
|
|
328
|
+
outputs,
|
|
329
|
+
stageSignal,
|
|
330
|
+
);
|
|
331
|
+
|
|
332
|
+
if (expandResult.success) {
|
|
333
|
+
outputs.set(stage.id, expandResult.output);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
stages.push({
|
|
337
|
+
stageId: stage.id,
|
|
338
|
+
success: expandResult.success,
|
|
339
|
+
output: expandResult.output,
|
|
340
|
+
error: expandResult.error,
|
|
341
|
+
durationMs: Date.now() - stageStart,
|
|
342
|
+
});
|
|
343
|
+
} else if (stage.gate) {
|
|
344
|
+
// === REVIEW GATE STAGE ===
|
|
345
|
+
if (ctx.hasUI) {
|
|
346
|
+
ctx.ui.setStatus(
|
|
347
|
+
"pipeline",
|
|
348
|
+
`🔍 ${stageLabel} (gate, max ${stage.gate.maxRounds} rounds)`,
|
|
349
|
+
);
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
const gateResult = await runReviewGate(
|
|
353
|
+
pi,
|
|
354
|
+
ctx,
|
|
355
|
+
stage,
|
|
356
|
+
options.task,
|
|
357
|
+
outputs,
|
|
358
|
+
lastFeedback,
|
|
359
|
+
stageSignal,
|
|
360
|
+
pipeline.judgeModel,
|
|
361
|
+
);
|
|
362
|
+
const rawOutput = gateResult.output;
|
|
363
|
+
const stageOutput = await applyStageReport(pi, stage, rawOutput, options.task, stageSignal);
|
|
364
|
+
outputs.set(stage.id, stageOutput);
|
|
365
|
+
lastFeedback = gateResult.lastFeedback;
|
|
366
|
+
|
|
367
|
+
stages.push({
|
|
368
|
+
stageId: stage.id,
|
|
369
|
+
success: gateResult.success,
|
|
370
|
+
output: stageOutput,
|
|
371
|
+
rawOutput,
|
|
372
|
+
error: gateResult.error,
|
|
373
|
+
durationMs: Date.now() - stageStart,
|
|
374
|
+
rounds: gateResult.rounds,
|
|
375
|
+
scores: gateResult.scores,
|
|
376
|
+
});
|
|
377
|
+
} else {
|
|
378
|
+
// === SIMPLE AGENT STAGE ===
|
|
379
|
+
if (ctx.hasUI) {
|
|
380
|
+
ctx.ui.setStatus("pipeline", `▶ ${stageLabel} (${stage.agent})`);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
const result = await runSingleStage(
|
|
384
|
+
pi,
|
|
385
|
+
ctx,
|
|
386
|
+
stage,
|
|
387
|
+
options.task,
|
|
388
|
+
outputs,
|
|
389
|
+
lastFeedback,
|
|
390
|
+
stageSignal,
|
|
391
|
+
);
|
|
392
|
+
const rawOutput = result;
|
|
393
|
+
const stageOutput = await applyStageReport(pi, stage, rawOutput, options.task, stageSignal);
|
|
394
|
+
outputs.set(stage.id, stageOutput);
|
|
395
|
+
|
|
396
|
+
stages.push({
|
|
397
|
+
stageId: stage.id,
|
|
398
|
+
success: true,
|
|
399
|
+
output: stageOutput,
|
|
400
|
+
rawOutput,
|
|
401
|
+
durationMs: Date.now() - stageStart,
|
|
402
|
+
});
|
|
403
|
+
}
|
|
404
|
+
} catch (err) {
|
|
405
|
+
// If the pipeline was aborted, return partial results gracefully
|
|
406
|
+
if ((err as Error).name === "AbortError" || stageSignal?.aborted) {
|
|
407
|
+
if (ctx.hasUI) {
|
|
408
|
+
ctx.ui.setStatus("pipeline", "");
|
|
409
|
+
ctx.ui.notify(`⏹ Pipeline "${pipeline.name}" cancelled`, "warning");
|
|
410
|
+
}
|
|
411
|
+
return {
|
|
412
|
+
pipelineName: pipeline.name,
|
|
413
|
+
task: options.task,
|
|
414
|
+
success: false,
|
|
415
|
+
stages,
|
|
416
|
+
totalDurationMs: Date.now() - startTime,
|
|
417
|
+
error: `Pipeline cancelled at stage "${stage.id}" after ${stages.length} completed stages`,
|
|
418
|
+
};
|
|
419
|
+
}
|
|
420
|
+
const errorMsg = `Stage "${stage.id}" failed: ${(err as Error).message}`;
|
|
421
|
+
stages.push({
|
|
422
|
+
stageId: stage.id,
|
|
423
|
+
success: false,
|
|
424
|
+
output: "",
|
|
425
|
+
error: errorMsg,
|
|
426
|
+
durationMs: Date.now() - stageStart,
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
if (ctx.hasUI) {
|
|
430
|
+
ctx.ui.setStatus("pipeline", "");
|
|
431
|
+
ctx.ui.notify(`❌ ${errorMsg}`, "error");
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
return {
|
|
435
|
+
pipelineName: pipeline.name,
|
|
436
|
+
task: options.task,
|
|
437
|
+
success: false,
|
|
438
|
+
stages,
|
|
439
|
+
totalDurationMs: Date.now() - startTime,
|
|
440
|
+
error: errorMsg,
|
|
441
|
+
};
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// --- Report synthesis ---
|
|
446
|
+
// Run BEFORE the pipeline-complete notification to avoid the appearance
|
|
447
|
+
// of completion while synthesis is still running.
|
|
448
|
+
const pipelineSuccess = stages.every((s) => s.success);
|
|
449
|
+
const result: PipelineResult = {
|
|
450
|
+
pipelineName: pipeline.name,
|
|
451
|
+
task: options.task,
|
|
452
|
+
success: pipelineSuccess,
|
|
453
|
+
stages,
|
|
454
|
+
totalDurationMs: Date.now() - startTime,
|
|
455
|
+
};
|
|
456
|
+
|
|
457
|
+
// Run the report synthesizer unless explicitly disabled
|
|
458
|
+
const reportCfg = pipeline.report !== false ? (pipeline.report ?? {}) : null;
|
|
459
|
+
if (reportCfg) {
|
|
460
|
+
try {
|
|
461
|
+
// Synthesis gets the parent signal without the stage timeout,
|
|
462
|
+
// but with a 2-minute self-imposed limit so it doesn't hang
|
|
463
|
+
const synthSignal = parentSignal
|
|
464
|
+
? AbortSignal.any([parentSignal, AbortSignal.timeout(120_000)])
|
|
465
|
+
: AbortSignal.timeout(120_000);
|
|
466
|
+
|
|
467
|
+
const synthesis = await runReportSynthesis(
|
|
468
|
+
pi,
|
|
469
|
+
pipeline,
|
|
470
|
+
options.task,
|
|
471
|
+
stages,
|
|
472
|
+
reportCfg.agent,
|
|
473
|
+
reportCfg.focus,
|
|
474
|
+
synthSignal,
|
|
475
|
+
);
|
|
476
|
+
result.synthesis = synthesis;
|
|
477
|
+
} catch (synthErr) {
|
|
478
|
+
// Synthesis failure does NOT fail the pipeline — it's best-effort
|
|
479
|
+
result.synthesisError = `Report synthesis failed: ${(synthErr as Error).message}`;
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// Pipeline complete (after synthesis to avoid showing "done" while synthesis runs)
|
|
484
|
+
if (ctx.hasUI) {
|
|
485
|
+
ctx.ui.setStatus("pipeline", "");
|
|
486
|
+
if (result.synthesisError) {
|
|
487
|
+
ctx.ui.notify(
|
|
488
|
+
`✅ Pipeline "${pipeline.name}" complete (report synthesis: ⚠️ ${result.synthesisError})`,
|
|
489
|
+
"info",
|
|
490
|
+
);
|
|
491
|
+
} else {
|
|
492
|
+
ctx.ui.notify(
|
|
493
|
+
`✅ Pipeline "${pipeline.name}" complete: ${stages.filter((s) => s.success).length}/${stages.length} stages passed`,
|
|
494
|
+
"info",
|
|
495
|
+
);
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
return result;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
/**
|
|
503
|
+
* Run a single agent stage via pi-subagents.
|
|
504
|
+
*/
|
|
505
|
+
/** @internal Exported for testing. See pipeline-runner.test.ts */
|
|
506
|
+
export async function runSingleStage(
|
|
507
|
+
pi: ExtensionAPI,
|
|
508
|
+
ctx: ExtensionContext,
|
|
509
|
+
stage: Stage,
|
|
510
|
+
task: string,
|
|
511
|
+
outputs: Map<string, string>,
|
|
512
|
+
lastFeedback: string | undefined,
|
|
513
|
+
signal?: AbortSignal,
|
|
514
|
+
): Promise<string> {
|
|
515
|
+
const resolvedTask = resolveTemplate(stage.task ?? "", task, outputs, lastFeedback);
|
|
516
|
+
const agentName = stage.agent!;
|
|
517
|
+
|
|
518
|
+
const response = await executeSubagent(
|
|
519
|
+
pi,
|
|
520
|
+
{
|
|
521
|
+
agent: agentName,
|
|
522
|
+
task: resolvedTask,
|
|
523
|
+
clarify: false,
|
|
524
|
+
model: stage.model,
|
|
525
|
+
agentScope: "both",
|
|
526
|
+
cwd: ctx.cwd,
|
|
527
|
+
},
|
|
528
|
+
signal,
|
|
529
|
+
);
|
|
530
|
+
|
|
531
|
+
if (response.isError) {
|
|
532
|
+
throw new Error(`Agent "${agentName}" failed: ${response.errorText ?? "(unknown error)"}`);
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
return extractResponseText(response);
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
/**
|
|
539
|
+
* Run a parallel stage — fan out multiple agents concurrently.
|
|
540
|
+
*/
|
|
541
|
+
/** @internal Exported for testing. See pipeline-runner.test.ts */
|
|
542
|
+
export async function runParallelStage(
|
|
543
|
+
pi: ExtensionAPI,
|
|
544
|
+
ctx: ExtensionContext,
|
|
545
|
+
stage: Stage,
|
|
546
|
+
task: string,
|
|
547
|
+
outputs: Map<string, string>,
|
|
548
|
+
signal?: AbortSignal,
|
|
549
|
+
): Promise<StageResult> {
|
|
550
|
+
const stageStart = Date.now();
|
|
551
|
+
const parallelStages = stage.parallel!;
|
|
552
|
+
|
|
553
|
+
if (ctx.hasUI) {
|
|
554
|
+
ctx.ui.notify(`⚡ Running ${parallelStages.length} agents in parallel`, "info");
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
// Build task params for pi-subagents parallel execution
|
|
558
|
+
const subagentTasks = parallelStages.map((child) => ({
|
|
559
|
+
agent: child.agent ?? "worker",
|
|
560
|
+
task: resolveTemplate(child.task ?? "", task, outputs, undefined),
|
|
561
|
+
model: child.model,
|
|
562
|
+
}));
|
|
563
|
+
|
|
564
|
+
// Use pi-subagents' parallel execution via the bridge
|
|
565
|
+
const response = await executeSubagent(
|
|
566
|
+
pi,
|
|
567
|
+
{
|
|
568
|
+
tasks: subagentTasks,
|
|
569
|
+
clarify: false,
|
|
570
|
+
agentScope: "both",
|
|
571
|
+
cwd: ctx.cwd,
|
|
572
|
+
},
|
|
573
|
+
signal,
|
|
574
|
+
);
|
|
575
|
+
|
|
576
|
+
const combinedOutput = extractResponseText(response);
|
|
577
|
+
|
|
578
|
+
if (response.isError) {
|
|
579
|
+
return {
|
|
580
|
+
stageId: stage.id,
|
|
581
|
+
success: false,
|
|
582
|
+
output: combinedOutput,
|
|
583
|
+
error: response.errorText,
|
|
584
|
+
durationMs: Date.now() - stageStart,
|
|
585
|
+
};
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
// Preserve child outputs so later stages can reference {outputs.<childId>}.
|
|
589
|
+
parallelStages.forEach((child, index) => {
|
|
590
|
+
outputs.set(child.id, extractTaskOutput(combinedOutput, index, parallelStages.length));
|
|
591
|
+
});
|
|
592
|
+
|
|
593
|
+
return {
|
|
594
|
+
stageId: stage.id,
|
|
595
|
+
success: true,
|
|
596
|
+
output: combinedOutput,
|
|
597
|
+
durationMs: Date.now() - stageStart,
|
|
598
|
+
};
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
/**
|
|
602
|
+
* Run an expand stage — dynamically expand a template into N parallel stages
|
|
603
|
+
* based on items parsed from a source stage's output, then aggregate results.
|
|
604
|
+
*
|
|
605
|
+
* Flow:
|
|
606
|
+
* 1. Get source stage output from outputs map
|
|
607
|
+
* 2. Parse items (JSON, YAML, or markdown list)
|
|
608
|
+
* 3. Build dynamic stages (one per item, with {item.*} resolved)
|
|
609
|
+
* 4. Run all dynamic stages as parallel pi-subagents tasks
|
|
610
|
+
* 5. Aggregate outputs into a single combined result
|
|
611
|
+
*
|
|
612
|
+
* NOTE: Gates on expand templates are NOT executed in v1. Quality checks
|
|
613
|
+
* should use a separate parallel/review stage after the expand stage.
|
|
614
|
+
*/
|
|
615
|
+
/** @internal Exported for testing. See pipeline-expand.test.ts */
|
|
616
|
+
export async function runExpandStage(
|
|
617
|
+
pi: ExtensionAPI,
|
|
618
|
+
ctx: ExtensionContext,
|
|
619
|
+
stage: Stage,
|
|
620
|
+
task: string,
|
|
621
|
+
outputs: Map<string, string>,
|
|
622
|
+
signal?: AbortSignal,
|
|
623
|
+
): Promise<StageResult> {
|
|
624
|
+
const stageStart = Date.now();
|
|
625
|
+
const expand = stage.expand!;
|
|
626
|
+
|
|
627
|
+
// 1. Get source stage output
|
|
628
|
+
const sourceOutput = outputs.get(expand.from);
|
|
629
|
+
if (!sourceOutput) {
|
|
630
|
+
return {
|
|
631
|
+
stageId: stage.id,
|
|
632
|
+
success: false,
|
|
633
|
+
output: "",
|
|
634
|
+
error: `Expand source stage "${expand.from}" has no output. Stages before "${stage.id}": ${[...outputs.keys()].join(", ")}`,
|
|
635
|
+
durationMs: Date.now() - stageStart,
|
|
636
|
+
};
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
// 2. Parse items
|
|
640
|
+
let items: StageItem[];
|
|
641
|
+
try {
|
|
642
|
+
items = parseItems(sourceOutput);
|
|
643
|
+
} catch (err) {
|
|
644
|
+
return {
|
|
645
|
+
stageId: stage.id,
|
|
646
|
+
success: false,
|
|
647
|
+
output: "",
|
|
648
|
+
error: `Failed to parse items from stage "${expand.from}": ${(err as Error).message}`,
|
|
649
|
+
durationMs: Date.now() - stageStart,
|
|
650
|
+
};
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
// 3. Limit items
|
|
654
|
+
const maxItems = expand.maxItems ?? 10;
|
|
655
|
+
const limitedItems = items.slice(0, maxItems);
|
|
656
|
+
|
|
657
|
+
if (limitedItems.length === 0) {
|
|
658
|
+
// No items to expand — not an error, just nothing to do
|
|
659
|
+
if (ctx.hasUI) {
|
|
660
|
+
ctx.ui.notify(`✦ Stage "${stage.id}": no items to expand`, "info");
|
|
661
|
+
}
|
|
662
|
+
return {
|
|
663
|
+
stageId: stage.id,
|
|
664
|
+
success: true,
|
|
665
|
+
output: "(no items to expand)",
|
|
666
|
+
durationMs: Date.now() - stageStart,
|
|
667
|
+
};
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
// 4. Build dynamic stages
|
|
671
|
+
const dynamicStages = buildExpandStages(stage, limitedItems, task, outputs);
|
|
672
|
+
|
|
673
|
+
if (ctx.hasUI) {
|
|
674
|
+
ctx.ui.notify(
|
|
675
|
+
`✦ Stage "${stage.id}": expanding into ${dynamicStages.length} parallel tasks`,
|
|
676
|
+
"info",
|
|
677
|
+
);
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
// 5. Run all dynamic stages as parallel subagent tasks
|
|
681
|
+
// Each dynamic stage is a simple agent call (no gates in v1)
|
|
682
|
+
const subagentTasks = dynamicStages.map((ds) => ({
|
|
683
|
+
agent: ds.agent ?? "worker",
|
|
684
|
+
task: ds.task ?? "",
|
|
685
|
+
model: ds.model,
|
|
686
|
+
}));
|
|
687
|
+
|
|
688
|
+
try {
|
|
689
|
+
const response = await executeSubagent(
|
|
690
|
+
pi,
|
|
691
|
+
{
|
|
692
|
+
tasks: subagentTasks,
|
|
693
|
+
clarify: false,
|
|
694
|
+
agentScope: "both",
|
|
695
|
+
cwd: ctx.cwd,
|
|
696
|
+
},
|
|
697
|
+
signal,
|
|
698
|
+
);
|
|
699
|
+
|
|
700
|
+
const combinedOutput = extractResponseText(response);
|
|
701
|
+
|
|
702
|
+
if (response.isError) {
|
|
703
|
+
return {
|
|
704
|
+
stageId: stage.id,
|
|
705
|
+
success: false,
|
|
706
|
+
output: combinedOutput,
|
|
707
|
+
error: response.errorText,
|
|
708
|
+
durationMs: Date.now() - stageStart,
|
|
709
|
+
};
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
// Aggregate — prefix each task's output with its stage ID
|
|
713
|
+
const aggregated = dynamicStages
|
|
714
|
+
.map((ds, i) => {
|
|
715
|
+
// Extract each task's output from combined output
|
|
716
|
+
// We split by known markers if available, or trust the raw combined output
|
|
717
|
+
return `### ${ds.id}
|
|
718
|
+
${extractTaskOutput(combinedOutput, i, dynamicStages.length)}`;
|
|
719
|
+
})
|
|
720
|
+
.join("\n\n---\n\n");
|
|
721
|
+
|
|
722
|
+
return {
|
|
723
|
+
stageId: stage.id,
|
|
724
|
+
success: true,
|
|
725
|
+
output: aggregated,
|
|
726
|
+
durationMs: Date.now() - stageStart,
|
|
727
|
+
};
|
|
728
|
+
} catch (err) {
|
|
729
|
+
if ((err as Error).name === "AbortError" || signal?.aborted) {
|
|
730
|
+
throw err;
|
|
731
|
+
}
|
|
732
|
+
return {
|
|
733
|
+
stageId: stage.id,
|
|
734
|
+
success: false,
|
|
735
|
+
output: "",
|
|
736
|
+
error: `Expand stage "${stage.id}" failed: ${(err as Error).message}`,
|
|
737
|
+
durationMs: Date.now() - stageStart,
|
|
738
|
+
};
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
/**
|
|
743
|
+
* Extract a single task's output from a combined parallel response.
|
|
744
|
+
* When pi-subagents runs N tasks in parallel, the combined response
|
|
745
|
+
* preserves each task's output sequentially. This function splits
|
|
746
|
+
* by reasonable boundaries (double newlines) as a heuristic.
|
|
747
|
+
*/
|
|
748
|
+
/** @internal Exported for testing. See pipeline-expand.test.ts */
|
|
749
|
+
export function extractTaskOutput(combined: string, taskIndex: number, totalTasks: number): string {
|
|
750
|
+
if (totalTasks <= 1) return combined.trim();
|
|
751
|
+
|
|
752
|
+
// Split by double newlines as a heuristic for task boundaries
|
|
753
|
+
const parts = combined.split(/\n{2,}/).filter(Boolean);
|
|
754
|
+
|
|
755
|
+
if (parts.length >= totalTasks) {
|
|
756
|
+
// Each part likely corresponds to one task
|
|
757
|
+
return (parts[taskIndex] ?? "").trim();
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
// Fallback: give each task a proportional share
|
|
761
|
+
const charsPerTask = Math.floor(combined.length / totalTasks);
|
|
762
|
+
const start = taskIndex * charsPerTask;
|
|
763
|
+
const end = taskIndex === totalTasks - 1 ? combined.length : start + charsPerTask;
|
|
764
|
+
return combined.slice(start, end).trim();
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
/**
|
|
768
|
+
* Run a review gate stage.
|
|
769
|
+
*
|
|
770
|
+
* Pattern:
|
|
771
|
+
* 1. Worker executes the task
|
|
772
|
+
* 2. N reviewers evaluate the result (parallel)
|
|
773
|
+
* 3. Scores are averaged
|
|
774
|
+
* 4. If avg >= targetScore → pass
|
|
775
|
+
* 5. If avg < targetScore → worker receives feedback → repeat (max maxRounds)
|
|
776
|
+
*/
|
|
777
|
+
/** @internal Exported for testing. See pipeline-runner.test.ts */
|
|
778
|
+
export async function runReviewGate(
|
|
779
|
+
pi: ExtensionAPI,
|
|
780
|
+
ctx: ExtensionContext,
|
|
781
|
+
stage: Stage,
|
|
782
|
+
task: string,
|
|
783
|
+
outputs: Map<string, string>,
|
|
784
|
+
lastFeedback: string | undefined,
|
|
785
|
+
signal?: AbortSignal,
|
|
786
|
+
pipelineJudgeModel?: string,
|
|
787
|
+
): Promise<StageResult & { lastFeedback?: string }> {
|
|
788
|
+
const stageStart = Date.now();
|
|
789
|
+
const agentName = stage.agent!;
|
|
790
|
+
const gate = stage.gate!;
|
|
791
|
+
let currentFeedback = lastFeedback;
|
|
792
|
+
let lastOutput = "";
|
|
793
|
+
|
|
794
|
+
for (let round = 1; round <= gate.maxRounds; round++) {
|
|
795
|
+
if (ctx.hasUI) {
|
|
796
|
+
ctx.ui.notify(`🔄 Stage "${stage.id}": round ${round}/${gate.maxRounds}`, "info");
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
// 1. Worker executes the task (with per-round timeout)
|
|
800
|
+
const resolvedTask = resolveTemplate(stage.task ?? "", task, outputs, currentFeedback);
|
|
801
|
+
|
|
802
|
+
if (signal?.aborted) throw signal.reason ?? new DOMException("Aborted", "AbortError");
|
|
803
|
+
|
|
804
|
+
const workerSignal = signal
|
|
805
|
+
? AbortSignal.any([signal, AbortSignal.timeout(GATE_WORKER_TIMEOUT_MS)])
|
|
806
|
+
: AbortSignal.timeout(GATE_WORKER_TIMEOUT_MS);
|
|
807
|
+
|
|
808
|
+
const workerResponse = await executeSubagent(
|
|
809
|
+
pi,
|
|
810
|
+
{
|
|
811
|
+
agent: agentName,
|
|
812
|
+
task: resolvedTask,
|
|
813
|
+
clarify: false,
|
|
814
|
+
model: stage.model,
|
|
815
|
+
agentScope: "both",
|
|
816
|
+
cwd: ctx.cwd,
|
|
817
|
+
},
|
|
818
|
+
workerSignal,
|
|
819
|
+
);
|
|
820
|
+
|
|
821
|
+
if (workerResponse.isError) {
|
|
822
|
+
throw new Error(`Worker round ${round} failed: ${workerResponse.errorText}`);
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
lastOutput = extractResponseText(workerResponse);
|
|
826
|
+
|
|
827
|
+
// 2. Run reviewers in parallel (with per-round timeout)
|
|
828
|
+
const reviewerTasks = gate.reviewers.map((reviewer) => ({
|
|
829
|
+
agent: reviewer.agent ?? "reviewer",
|
|
830
|
+
task: buildReviewerTask(reviewer.focus, lastOutput, stage.id),
|
|
831
|
+
model: gate.judgeModel ?? pipelineJudgeModel ?? stage.model,
|
|
832
|
+
}));
|
|
833
|
+
|
|
834
|
+
const reviewerSignal = signal
|
|
835
|
+
? AbortSignal.any([signal, AbortSignal.timeout(GATE_REVIEWER_TIMEOUT_MS)])
|
|
836
|
+
: AbortSignal.timeout(GATE_REVIEWER_TIMEOUT_MS);
|
|
837
|
+
|
|
838
|
+
const reviewsResponse = await executeSubagent(
|
|
839
|
+
pi,
|
|
840
|
+
{
|
|
841
|
+
tasks: reviewerTasks,
|
|
842
|
+
clarify: false,
|
|
843
|
+
agentScope: "both",
|
|
844
|
+
cwd: ctx.cwd,
|
|
845
|
+
},
|
|
846
|
+
reviewerSignal,
|
|
847
|
+
);
|
|
848
|
+
|
|
849
|
+
// 3. Parse review outputs to extract scores
|
|
850
|
+
const rawReviewOutput = extractResponseText(reviewsResponse);
|
|
851
|
+
const reviews = parseReviewOutputs(rawReviewOutput, reviewerTasks.length);
|
|
852
|
+
|
|
853
|
+
const scores = reviews.map((r) => r.score);
|
|
854
|
+
const average = scores.reduce((a, b) => a + b, 0) / scores.length;
|
|
855
|
+
const allFeedback = reviews
|
|
856
|
+
.map((r) => r.feedback)
|
|
857
|
+
.filter(Boolean)
|
|
858
|
+
.join("\n\n");
|
|
859
|
+
|
|
860
|
+
if (ctx.hasUI) {
|
|
861
|
+
ctx.ui.notify(
|
|
862
|
+
`📊 Stage "${stage.id}" round ${round}: scores [${scores.join(", ")}] avg=${average.toFixed(1)} target=${gate.targetScore}`,
|
|
863
|
+
average >= gate.targetScore ? "info" : "warning",
|
|
864
|
+
);
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
// 4. Check if passed
|
|
868
|
+
if (average >= gate.targetScore) {
|
|
869
|
+
if (ctx.hasUI) {
|
|
870
|
+
ctx.ui.notify(
|
|
871
|
+
`✅ Stage "${stage.id}" passed gate (${average.toFixed(1)}≥${gate.targetScore})`,
|
|
872
|
+
"info",
|
|
873
|
+
);
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
const reviewSummary = reviews
|
|
877
|
+
.map(
|
|
878
|
+
(r, i) => `### Reviewer ${i + 1}\n- Score: ${r.score}/10\n- ${r.feedback.slice(0, 500)}`,
|
|
879
|
+
)
|
|
880
|
+
.join("\n\n");
|
|
881
|
+
|
|
882
|
+
const finalOutput = `${lastOutput}\n\n---\n\n## Gate Review Passed (Round ${round})\n\n${reviewSummary}`;
|
|
883
|
+
|
|
884
|
+
return {
|
|
885
|
+
stageId: stage.id,
|
|
886
|
+
success: true,
|
|
887
|
+
output: finalOutput,
|
|
888
|
+
rounds: round,
|
|
889
|
+
scores,
|
|
890
|
+
durationMs: Date.now() - stageStart,
|
|
891
|
+
};
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
// 5. Not passed — prepare feedback for next round
|
|
895
|
+
if (ctx.hasUI) {
|
|
896
|
+
ctx.ui.notify(
|
|
897
|
+
`⚠ Stage "${stage.id}" round ${round}: score ${average.toFixed(1)} < ${gate.targetScore}, retrying...`,
|
|
898
|
+
"warning",
|
|
899
|
+
);
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
currentFeedback = [
|
|
903
|
+
`## Reviewer Feedback (Round ${round})`,
|
|
904
|
+
"",
|
|
905
|
+
`Average Score: ${average.toFixed(1)}/${gate.targetScore}`,
|
|
906
|
+
"",
|
|
907
|
+
"Issues to fix:",
|
|
908
|
+
allFeedback,
|
|
909
|
+
"",
|
|
910
|
+
"Please address ALL of the above issues in your next attempt.",
|
|
911
|
+
].join("\n");
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
// Exhausted max rounds — gate failed
|
|
915
|
+
return {
|
|
916
|
+
stageId: stage.id,
|
|
917
|
+
success: false,
|
|
918
|
+
output: lastOutput,
|
|
919
|
+
error: `Failed to pass review gate after ${gate.maxRounds} rounds. Scores: [${gate.reviewers.map(() => 0).join(", ")}]`,
|
|
920
|
+
rounds: gate.maxRounds,
|
|
921
|
+
scores: [],
|
|
922
|
+
durationMs: Date.now() - stageStart,
|
|
923
|
+
lastFeedback: currentFeedback,
|
|
924
|
+
};
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
/**
|
|
928
|
+
* Build a reviewer prompt that includes the work product and asks for a score.
|
|
929
|
+
*/
|
|
930
|
+
/** @internal Exported for testing. See pipeline-runner.test.ts */
|
|
931
|
+
export function buildReviewerTask(focus: string, output: string, stageId: string): string {
|
|
932
|
+
return `You are a reviewer for stage "${stageId}" of a multi-agent pipeline.
|
|
933
|
+
|
|
934
|
+
## Review Focus
|
|
935
|
+
${focus}
|
|
936
|
+
|
|
937
|
+
## Work Product to Review
|
|
938
|
+
|
|
939
|
+
${output}
|
|
940
|
+
|
|
941
|
+
## Instructions
|
|
942
|
+
1. Analyze the work product critically against the review focus.
|
|
943
|
+
2. Write your detailed review analysis.
|
|
944
|
+
3. On the VERY LAST LINE of your response, output exactly:
|
|
945
|
+
|
|
946
|
+
SCORE: <number between 0 and 10>
|
|
947
|
+
|
|
948
|
+
Where 10 = perfect, 8+ = acceptable, 5+ = needs improvement, <5 = unacceptable.
|
|
949
|
+
Be honest and critical. Do NOT inflate scores.`;
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
/**
|
|
953
|
+
* Parse review outputs to extract scores and feedback.
|
|
954
|
+
* Handles the case where all reviewers come back in one combined response.
|
|
955
|
+
*/
|
|
956
|
+
/** @internal Exported for testing. See pipeline-runner.test.ts */
|
|
957
|
+
export function parseReviewOutputs(
|
|
958
|
+
rawOutput: string,
|
|
959
|
+
expectedCount: number,
|
|
960
|
+
): Array<{ score: number; feedback: string }> {
|
|
961
|
+
// Try to split by reviewer sections
|
|
962
|
+
const sections = rawOutput.split(/(?=SCORE:\s*\d+(?:\.\d+)?)/i).filter(Boolean);
|
|
963
|
+
|
|
964
|
+
// Collect sections that actually contain a SCORE line (skip preamble text)
|
|
965
|
+
const scoredSections = sections.filter((s) => /SCORE:\s*\d+(?:\.\d+)?/i.test(s));
|
|
966
|
+
|
|
967
|
+
if (scoredSections.length >= expectedCount) {
|
|
968
|
+
return scoredSections.slice(0, expectedCount).map((section) => {
|
|
969
|
+
const scoreMatch = section.match(/SCORE:\s*(\d+(?:\.\d+)?)/i);
|
|
970
|
+
const score = scoreMatch ? Math.max(0, Math.min(10, parseFloat(scoreMatch[1]!))) : 5;
|
|
971
|
+
const feedback = section.replace(/SCORE:\s*\d+(?:\.\d+)?/i, "").trim();
|
|
972
|
+
return { score, feedback: feedback || "(no feedback)" };
|
|
973
|
+
});
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
// Fallback: try to find all SCORE lines in the combined output
|
|
977
|
+
const allScoreLines = [...rawOutput.matchAll(/SCORE:\s*(\d+(?:\.\d+)?)/gi)];
|
|
978
|
+
if (allScoreLines.length >= expectedCount) {
|
|
979
|
+
return allScoreLines.slice(0, expectedCount).map((match, i) => {
|
|
980
|
+
const score = Math.max(0, Math.min(10, parseFloat(match[1]!)));
|
|
981
|
+
return { score, feedback: `Reviewer ${i + 1}` };
|
|
982
|
+
});
|
|
983
|
+
}
|
|
984
|
+
|
|
985
|
+
// Last resort: treat entire output as one reviewer's response
|
|
986
|
+
const lastScoreLine = [...rawOutput.matchAll(/SCORE:\s*(\d+(?:\.\d+)?)/gi)].pop();
|
|
987
|
+
const score = lastScoreLine ? Math.max(0, Math.min(10, parseFloat(lastScoreLine[1]!))) : 0;
|
|
988
|
+
const feedback = rawOutput.replace(/SCORE:\s*\d+(?:\.\d+)?/gi, "").trim();
|
|
989
|
+
|
|
990
|
+
// Replicate for expected count
|
|
991
|
+
return Array.from({ length: expectedCount }, (_, i) => ({
|
|
992
|
+
score,
|
|
993
|
+
feedback: i === 0 ? feedback || "(no feedback)" : "(combined response)",
|
|
994
|
+
}));
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
/**
|
|
998
|
+
* A single item from source stage output, used for dynamic stage expansion.
|
|
999
|
+
* Each key becomes available as {item.key} in the template.
|
|
1000
|
+
*/
|
|
1001
|
+
export interface StageItem {
|
|
1002
|
+
[key: string]: unknown;
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
/**
|
|
1006
|
+
* Parse a source stage's output into an array of items for dynamic expansion.
|
|
1007
|
+
*
|
|
1008
|
+
* Strategy:
|
|
1009
|
+
* 1. Try JSON — expects an array of objects, or an object with "items" key
|
|
1010
|
+
* 2. Try YAML — expects a sequence
|
|
1011
|
+
* 3. Fallback — parse as markdown list (-, *, or 1.)
|
|
1012
|
+
*
|
|
1013
|
+
* Throws if the output cannot be parsed into items.
|
|
1014
|
+
*/
|
|
1015
|
+
/** @internal Exported for testing. See pipeline-expand.test.ts */
|
|
1016
|
+
export function parseItems(output: string): StageItem[] {
|
|
1017
|
+
if (!output.trim()) return [];
|
|
1018
|
+
|
|
1019
|
+
// Strategy 1: JSON
|
|
1020
|
+
try {
|
|
1021
|
+
const parsed = JSON.parse(output);
|
|
1022
|
+
if (Array.isArray(parsed)) {
|
|
1023
|
+
return parsed.map((item: unknown) =>
|
|
1024
|
+
typeof item === "string"
|
|
1025
|
+
? { value: item }
|
|
1026
|
+
: typeof item === "object" && item !== null
|
|
1027
|
+
? (item as StageItem)
|
|
1028
|
+
: { value: String(item) },
|
|
1029
|
+
);
|
|
1030
|
+
}
|
|
1031
|
+
if (typeof parsed === "object" && parsed !== null) {
|
|
1032
|
+
const items = (parsed as Record<string, unknown>).items;
|
|
1033
|
+
if (Array.isArray(items)) {
|
|
1034
|
+
return items.map((item: unknown) =>
|
|
1035
|
+
typeof item === "string"
|
|
1036
|
+
? { value: item }
|
|
1037
|
+
: typeof item === "object" && item !== null
|
|
1038
|
+
? (item as StageItem)
|
|
1039
|
+
: { value: String(item) },
|
|
1040
|
+
);
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
} catch {
|
|
1044
|
+
// Not JSON, continue to next strategy
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
// Strategy 2: YAML list
|
|
1048
|
+
try {
|
|
1049
|
+
const parsed = yaml.load(output);
|
|
1050
|
+
if (Array.isArray(parsed)) {
|
|
1051
|
+
return parsed.map((item: unknown) =>
|
|
1052
|
+
typeof item === "string"
|
|
1053
|
+
? { value: item }
|
|
1054
|
+
: typeof item === "object" && item !== null
|
|
1055
|
+
? (item as StageItem)
|
|
1056
|
+
: { value: String(item) },
|
|
1057
|
+
);
|
|
1058
|
+
}
|
|
1059
|
+
} catch {
|
|
1060
|
+
// Not YAML, continue to fallback
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1063
|
+
// Strategy 3: markdown list fallback
|
|
1064
|
+
const lines = output
|
|
1065
|
+
.split("\n")
|
|
1066
|
+
.map((l) => l.trim())
|
|
1067
|
+
.filter((l) => l.startsWith("- ") || l.startsWith("* ") || /^\d+[.)]\s/.test(l))
|
|
1068
|
+
.map((l) => l.replace(/^[-*]\s+/, "").replace(/^\d+[.)]\s+/, ""));
|
|
1069
|
+
|
|
1070
|
+
if (lines.length > 0) {
|
|
1071
|
+
return lines.map((value) => ({ value }));
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
throw new Error(
|
|
1075
|
+
"Cannot parse items from output. Expected JSON array, YAML list, or markdown list.",
|
|
1076
|
+
);
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
/**
|
|
1080
|
+
* Resolve {item.key} and {item} variables in an already-resolved task template.
|
|
1081
|
+
*
|
|
1082
|
+
* For string-valued items (from markdown fallback), {item} resolves to the
|
|
1083
|
+
* value directly. For object items, {item} resolves to JSON.stringify.
|
|
1084
|
+
*/
|
|
1085
|
+
/** @internal Exported for testing. See pipeline-expand.test.ts */
|
|
1086
|
+
export function expandItemTemplate(template: string, item: StageItem): string {
|
|
1087
|
+
let result = template;
|
|
1088
|
+
|
|
1089
|
+
// Replace {item.key} with the specific value
|
|
1090
|
+
// Object.entries skips Symbol keys; String(value ?? "") handles null/undefined safely
|
|
1091
|
+
for (const [key, value] of Object.entries(item)) {
|
|
1092
|
+
const placeholder = `{item.${key}}`;
|
|
1093
|
+
if (value === null || value === undefined) {
|
|
1094
|
+
result = result.split(placeholder).join("");
|
|
1095
|
+
} else {
|
|
1096
|
+
result = result.split(placeholder).join(String(value));
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
// Replace {item} with the full item representation
|
|
1101
|
+
// For string items ({ value: "..." }), use value directly
|
|
1102
|
+
if (Object.keys(item).length === 1 && "value" in item) {
|
|
1103
|
+
result = result.split("{item}").join(String(item.value ?? ""));
|
|
1104
|
+
} else {
|
|
1105
|
+
result = result.split("{item}").join(JSON.stringify(item));
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
return result;
|
|
1109
|
+
}
|
|
1110
|
+
|
|
1111
|
+
/**
|
|
1112
|
+
* Build dynamic stages from a template stage and parsed items.
|
|
1113
|
+
*
|
|
1114
|
+
* Each dynamic stage gets:
|
|
1115
|
+
* - id: "{template.id}-{i+1}"
|
|
1116
|
+
* - The same agent, model, gate, etc. as the template
|
|
1117
|
+
* - Template resolved with item variables
|
|
1118
|
+
*
|
|
1119
|
+
* NOTE: Gates are propagated on the Stage type for schema compatibility
|
|
1120
|
+
* but are NOT executed by the runner in v1 (see runPipeline).
|
|
1121
|
+
*
|
|
1122
|
+
* @param template — the expand stage template from YAML
|
|
1123
|
+
* @param items — parsed items from the source stage
|
|
1124
|
+
* @param task — original user task
|
|
1125
|
+
* @param outputs — outputs from all previous stages (for {outputs.*} resolution)
|
|
1126
|
+
*/
|
|
1127
|
+
/** @internal Exported for testing. See pipeline-expand.test.ts */
|
|
1128
|
+
export function buildExpandStages(
|
|
1129
|
+
template: Stage,
|
|
1130
|
+
items: StageItem[],
|
|
1131
|
+
task: string,
|
|
1132
|
+
outputs: Map<string, string>,
|
|
1133
|
+
): Stage[] {
|
|
1134
|
+
if (items.length === 0) return [];
|
|
1135
|
+
|
|
1136
|
+
// First resolve {task}, {outputs.*}, {lastFeedback} — same for all items
|
|
1137
|
+
const baseTask = resolveTemplate(template.task ?? "", task, outputs, undefined);
|
|
1138
|
+
|
|
1139
|
+
return items.map((item, i) => {
|
|
1140
|
+
const resolvedTask = expandItemTemplate(baseTask, item);
|
|
1141
|
+
return {
|
|
1142
|
+
id: `${template.id}-${i + 1}`,
|
|
1143
|
+
agent: template.agent,
|
|
1144
|
+
task: resolvedTask,
|
|
1145
|
+
model: template.model,
|
|
1146
|
+
gate: template.gate,
|
|
1147
|
+
output: template.output,
|
|
1148
|
+
reads: template.reads,
|
|
1149
|
+
maxSubagentDepth: template.maxSubagentDepth,
|
|
1150
|
+
};
|
|
1151
|
+
});
|
|
1152
|
+
}
|
|
1153
|
+
|
|
1154
|
+
/**
|
|
1155
|
+
* Resolve template variables in a task string.
|
|
1156
|
+
*
|
|
1157
|
+
* Supported:
|
|
1158
|
+
* {task} — original user task
|
|
1159
|
+
* {outputs.<id>} — output from a previous stage
|
|
1160
|
+
* {lastFeedback} — last review feedback (inside gate retries)
|
|
1161
|
+
*/
|
|
1162
|
+
/** @internal Exported for testing. See pipeline-runner.test.ts */
|
|
1163
|
+
export function resolveTemplate(
|
|
1164
|
+
template: string,
|
|
1165
|
+
task: string,
|
|
1166
|
+
outputs: Map<string, string>,
|
|
1167
|
+
lastFeedback: string | undefined,
|
|
1168
|
+
): string {
|
|
1169
|
+
let result = template.replace(/\{task\}/g, task);
|
|
1170
|
+
|
|
1171
|
+
result = result.replace(/\{outputs\.(\w+)\}/g, (_, stageId: string) => {
|
|
1172
|
+
return outputs.get(stageId) ?? `[No output from stage "${stageId}"]`;
|
|
1173
|
+
});
|
|
1174
|
+
|
|
1175
|
+
if (lastFeedback !== undefined) {
|
|
1176
|
+
result = result.replace(/\{lastFeedback\}/g, lastFeedback);
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
return result;
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1182
|
+
/**
|
|
1183
|
+
* Format a pipeline result as a readable summary string.
|
|
1184
|
+
*/
|
|
1185
|
+
export function formatPipelineResult(result: PipelineResult): string {
|
|
1186
|
+
const lines: string[] = [];
|
|
1187
|
+
|
|
1188
|
+
// --- Pipeline header ---
|
|
1189
|
+
lines.push(`# ${result.success ? "✅" : "❌"} Pipeline: ${result.pipelineName}`);
|
|
1190
|
+
lines.push(`Task: ${result.task}`);
|
|
1191
|
+
lines.push(
|
|
1192
|
+
`Status: ${result.success ? "✅ PASSED" : "❌ FAILED"} · ${result.stages.filter((s) => s.success).length}/${result.stages.length} stages passed · ${formatDuration(result.totalDurationMs)}`,
|
|
1193
|
+
);
|
|
1194
|
+
lines.push("");
|
|
1195
|
+
|
|
1196
|
+
// --- Synthesis report (prominently at the top) ---
|
|
1197
|
+
if (result.synthesis) {
|
|
1198
|
+
lines.push("> 📋 **Pipeline Report**");
|
|
1199
|
+
lines.push(">");
|
|
1200
|
+
// Indent each line of the synthesis as a blockquote
|
|
1201
|
+
for (const synLine of result.synthesis.trim().split("\n")) {
|
|
1202
|
+
lines.push(`> ${synLine}`);
|
|
1203
|
+
}
|
|
1204
|
+
lines.push("");
|
|
1205
|
+
}
|
|
1206
|
+
|
|
1207
|
+
if (result.synthesisError) {
|
|
1208
|
+
lines.push(`> ⚠️ *Report synthesis note: ${result.synthesisError}*`);
|
|
1209
|
+
lines.push("");
|
|
1210
|
+
}
|
|
1211
|
+
|
|
1212
|
+
// --- Stage details ---
|
|
1213
|
+
lines.push("## Stages");
|
|
1214
|
+
lines.push("");
|
|
1215
|
+
|
|
1216
|
+
for (const stage of result.stages) {
|
|
1217
|
+
const icon = stage.success ? "✅" : "❌";
|
|
1218
|
+
const rounds = stage.rounds ? ` (${stage.rounds} round${stage.rounds > 1 ? "s" : ""})` : "";
|
|
1219
|
+
const scores = stage.scores?.length ? ` scores=[${stage.scores.join(", ")}]` : "";
|
|
1220
|
+
lines.push(`### ${icon} ${stage.stageId}${rounds}${scores}`);
|
|
1221
|
+
lines.push(`Duration: ${formatDuration(stage.durationMs)}`);
|
|
1222
|
+
if (stage.error) {
|
|
1223
|
+
lines.push(`Error: ${stage.error}`);
|
|
1224
|
+
}
|
|
1225
|
+
if (stage.output) {
|
|
1226
|
+
lines.push(`Output:\n${stage.output}`);
|
|
1227
|
+
}
|
|
1228
|
+
lines.push("");
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
if (result.error && result.stages.every((s) => !s.error)) {
|
|
1232
|
+
lines.push(`---\n**Fatal:** ${result.error}`);
|
|
1233
|
+
}
|
|
1234
|
+
|
|
1235
|
+
return lines.join("\n");
|
|
1236
|
+
}
|
|
1237
|
+
|
|
1238
|
+
/**
|
|
1239
|
+
* Build a structured context message for LLM injection.
|
|
1240
|
+
* Used after pipeline completion to give the agent a clear picture
|
|
1241
|
+
* with stage excerpts, synthesis, and an instruction to write a narrative summary.
|
|
1242
|
+
*/
|
|
1243
|
+
export function buildPipelineContextMessage(result: PipelineResult): string {
|
|
1244
|
+
const lines: string[] = [];
|
|
1245
|
+
|
|
1246
|
+
lines.push(`## \u2705 Pipeline Result: ${result.pipelineName}`);
|
|
1247
|
+
lines.push("");
|
|
1248
|
+
const icon = result.success ? "\u2705" : "\u274C";
|
|
1249
|
+
lines.push(
|
|
1250
|
+
`**Status:** ${icon} ${result.success ? "PASSED" : "FAILED"} \u00B7 ${result.stages.filter((s) => s.success).length}/${result.stages.length} stages passed \u00B7 ${formatDuration(result.totalDurationMs)}`,
|
|
1251
|
+
);
|
|
1252
|
+
if (result.task) lines.push(`**Task:** ${result.task}`);
|
|
1253
|
+
lines.push("");
|
|
1254
|
+
|
|
1255
|
+
// Stage results table
|
|
1256
|
+
lines.push("### Stage Results");
|
|
1257
|
+
lines.push("");
|
|
1258
|
+
lines.push("| Stage | Agent | Result | Duration |");
|
|
1259
|
+
lines.push("|-------|-------|--------|----------|");
|
|
1260
|
+
for (const stage of result.stages) {
|
|
1261
|
+
const stageIcon = stage.success ? "\u2705" : "\u274C";
|
|
1262
|
+
const duration = formatDuration(stage.durationMs);
|
|
1263
|
+
lines.push(
|
|
1264
|
+
`| ${stage.stageId} | ${stage.stageId.includes("review") ? "reviewer" : stage.stageId.includes("stability") ? "worker" : "agent"} | ${stageIcon} ${stage.success ? "Pass" : "Fail"}${stage.rounds ? ` (${stage.rounds}r)` : ""}${stage.scores?.length ? ` [${stage.scores.join(", ")}]` : ""} | ${duration} |`,
|
|
1265
|
+
);
|
|
1266
|
+
}
|
|
1267
|
+
lines.push("");
|
|
1268
|
+
|
|
1269
|
+
// Stage output highlights (truncated)
|
|
1270
|
+
lines.push("### Stage Output Highlights");
|
|
1271
|
+
lines.push("");
|
|
1272
|
+
for (const stage of result.stages) {
|
|
1273
|
+
lines.push(`**${stage.stageId}**`);
|
|
1274
|
+
if (stage.error) {
|
|
1275
|
+
lines.push("```");
|
|
1276
|
+
lines.push(`Error: ${stage.error}`);
|
|
1277
|
+
lines.push("```");
|
|
1278
|
+
}
|
|
1279
|
+
if (stage.output) {
|
|
1280
|
+
const truncated = stage.output.length > 500 ? "..." + stage.output.slice(-500) : stage.output;
|
|
1281
|
+
lines.push("```");
|
|
1282
|
+
lines.push(truncated);
|
|
1283
|
+
lines.push("```");
|
|
1284
|
+
}
|
|
1285
|
+
lines.push("");
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
// Pipeline synthesis
|
|
1289
|
+
if (result.synthesis) {
|
|
1290
|
+
lines.push("### Pipeline Synthesis");
|
|
1291
|
+
lines.push("> \uD83D\uDCCB " + result.pipelineName + " synthesis");
|
|
1292
|
+
lines.push(">");
|
|
1293
|
+
for (const synLine of result.synthesis.trim().split("\n")) {
|
|
1294
|
+
lines.push(`> ${synLine}`);
|
|
1295
|
+
}
|
|
1296
|
+
lines.push("");
|
|
1297
|
+
}
|
|
1298
|
+
if (result.synthesisError) {
|
|
1299
|
+
lines.push(`> \u26A0\uFE0F *Synthesis note: ${result.synthesisError}*`);
|
|
1300
|
+
lines.push("");
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
// Instruction for the LLM
|
|
1304
|
+
lines.push("---");
|
|
1305
|
+
lines.push("");
|
|
1306
|
+
lines.push("**Instructions for the Agent:**");
|
|
1307
|
+
lines.push("");
|
|
1308
|
+
lines.push("Please analyze the pipeline results above and provide a narrative summary. Cover:");
|
|
1309
|
+
lines.push("");
|
|
1310
|
+
lines.push("1. **Overall outcome** \u2014 did the pipeline pass or fail? What does this mean?");
|
|
1311
|
+
lines.push(
|
|
1312
|
+
"2. **Stage breakdown** \u2014 which stages succeeded, which failed, and their key findings",
|
|
1313
|
+
);
|
|
1314
|
+
lines.push(
|
|
1315
|
+
"3. **Key issues** \u2014 blocking issues, review scores, recommendations from each stage",
|
|
1316
|
+
);
|
|
1317
|
+
lines.push("4. **Synthesis** \u2014 the pipeline\u2019s own assessment (if available)");
|
|
1318
|
+
lines.push("5. **Next steps** \u2014 what should be done next based on the results");
|
|
1319
|
+
lines.push("");
|
|
1320
|
+
lines.push(
|
|
1321
|
+
"Write your summary in the same language as this conversation. Be concise but informative.",
|
|
1322
|
+
);
|
|
1323
|
+
lines.push("");
|
|
1324
|
+
lines.push(
|
|
1325
|
+
"If all stages passed, state that clearly. If any failed, explain what failed and why.",
|
|
1326
|
+
);
|
|
1327
|
+
|
|
1328
|
+
return lines.join("\n");
|
|
1329
|
+
}
|
|
1330
|
+
|
|
1331
|
+
/** @internal Exported for testing. See pipeline-runner.test.ts */
|
|
1332
|
+
export function failResult(
|
|
1333
|
+
pipelineName: string,
|
|
1334
|
+
task: string,
|
|
1335
|
+
error: string,
|
|
1336
|
+
startTime: number,
|
|
1337
|
+
): PipelineResult {
|
|
1338
|
+
return {
|
|
1339
|
+
pipelineName,
|
|
1340
|
+
task,
|
|
1341
|
+
success: false,
|
|
1342
|
+
stages: [],
|
|
1343
|
+
totalDurationMs: Date.now() - startTime,
|
|
1344
|
+
error,
|
|
1345
|
+
};
|
|
1346
|
+
}
|