pi-taskflow 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -13
- package/examples/conditional-research.json +1 -1
- package/examples/guarded-refactor.json +2 -2
- package/extensions/agents.ts +54 -34
- package/extensions/index.ts +19 -7
- package/extensions/interpolate.ts +25 -4
- package/extensions/render.ts +41 -36
- package/extensions/runner.ts +97 -15
- package/extensions/runs-view.ts +3 -0
- package/extensions/runtime.ts +216 -28
- package/extensions/schema.ts +151 -5
- package/extensions/store.ts +77 -7
- package/package.json +1 -1
- package/skills/taskflow/SKILL.md +112 -1
- package/skills/taskflow/configuration.md +0 -2
package/extensions/runner.ts
CHANGED
|
@@ -48,12 +48,67 @@ export function isFailed(r: RunResult): boolean {
|
|
|
48
48
|
return r.exitCode !== 0 || r.stopReason === "error" || r.stopReason === "aborted";
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
+
/** Placeholder written to a failed phase's `output` so downstream interpolation
|
|
52
|
+
* can detect "upstream failed" without being polluted by raw HTML/JSON. */
|
|
53
|
+
export const TRANSPORT_ERROR_PLACEHOLDER = "(upstream error: subagent failed; see error)";
|
|
54
|
+
|
|
55
|
+
/** Hard cap on the errorMessage field stored in PhaseState (≈ 4 KB). */
|
|
56
|
+
export const ERROR_MESSAGE_MAX_LEN = 4096;
|
|
57
|
+
|
|
58
|
+
/** Cheap HTML/JSON detector so we can summarize upstream garbage. */
|
|
59
|
+
export function looksLikeHtmlOrJson(s: string): boolean {
|
|
60
|
+
const t = s.trimStart();
|
|
61
|
+
if (!t) return false;
|
|
62
|
+
if (t.startsWith("<")) {
|
|
63
|
+
// HTML/XML/Cloudflare challenge pages
|
|
64
|
+
return /^<(?:!doctype\s+html|html|head|body|script|svg|div|iframe|span|p)\b/i.test(t);
|
|
65
|
+
}
|
|
66
|
+
if (t.startsWith("{")) {
|
|
67
|
+
// Truncated JSON. A genuine JSON envelope is fine to keep; an unwrapped
|
|
68
|
+
// {error: "..."} from an SDK is short. We only treat it as "garbage" if
|
|
69
|
+
// it parses and is huge — but that's caught by the size cap below.
|
|
70
|
+
return false;
|
|
71
|
+
}
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Truncate and (when obviously HTML) summarize an errorMessage before it is
|
|
77
|
+
* persisted. Returns the cleaned string. Empty input returns empty.
|
|
78
|
+
*/
|
|
79
|
+
export function sanitizeErrorMessage(raw: string | undefined): string {
|
|
80
|
+
if (!raw) return "";
|
|
81
|
+
const cleaned = raw.replace(/\s+/g, " ").trim();
|
|
82
|
+
if (!cleaned) return "";
|
|
83
|
+
// Decide the sanitization branch on the RAW length, not the whitespace-
|
|
84
|
+
// collapsed length — otherwise an HTML page padded with spaces would slip
|
|
85
|
+
// through the "looks like HTML" branch and be persisted as-is.
|
|
86
|
+
const rawLen = raw.length;
|
|
87
|
+
if (rawLen > ERROR_MESSAGE_MAX_LEN) {
|
|
88
|
+
const head = cleaned.slice(0, 200);
|
|
89
|
+
const tail = cleaned.slice(-200);
|
|
90
|
+
return `${head} ... [truncated ${rawLen - 400} chars] ... ${tail}`;
|
|
91
|
+
}
|
|
92
|
+
if (looksLikeHtmlOrJson(cleaned)) {
|
|
93
|
+
// Any document-like HTML (Cloudflare challenge pages, proxy error pages,
|
|
94
|
+
// gateway error pages) is a strong signal the upstream returned a page
|
|
95
|
+
// instead of JSON. Summarize it instead of letting HTML pollute the
|
|
96
|
+
// phase's error and downstream interpolation contexts.
|
|
97
|
+
const title = cleaned.match(/<title[^>]*>([^<]*)<\/title>/i)?.[1]?.trim();
|
|
98
|
+
const stripped = cleaned.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
|
|
99
|
+
const m = stripped.match(/(?:Unable to load site|Ray ID[: ]+([A-Za-z0-9]+)|[A-Z][a-z]+Error[: ]+(.{0,200}))/i);
|
|
100
|
+
const hint = title || (m ? (m[1] || m[0]).trim() : stripped.slice(0, 200));
|
|
101
|
+
return `Upstream returned non-JSON response (${rawLen} chars). Hint: ${hint}`;
|
|
102
|
+
}
|
|
103
|
+
return cleaned;
|
|
104
|
+
}
|
|
105
|
+
|
|
51
106
|
function getFinalOutput(messages: Message[]): string {
|
|
52
107
|
for (let i = messages.length - 1; i >= 0; i--) {
|
|
53
108
|
const msg = messages[i];
|
|
54
109
|
if (msg.role === "assistant") {
|
|
55
110
|
for (const part of msg.content) {
|
|
56
|
-
if (part.type === "text") return part.text;
|
|
111
|
+
if (part.type === "text" && part.text.trim()) return part.text;
|
|
57
112
|
}
|
|
58
113
|
}
|
|
59
114
|
}
|
|
@@ -148,14 +203,10 @@ function summarizeToolCall(name: string, args: Record<string, unknown>): string
|
|
|
148
203
|
}
|
|
149
204
|
}
|
|
150
205
|
|
|
151
|
-
async function writePromptToTempFile(
|
|
152
|
-
const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
|
|
153
|
-
const safeName = agentName.replace(/[^\w.-]+/g, "_");
|
|
154
|
-
const filePath = path.join(tmpDir, `prompt-${safeName}.md`);
|
|
206
|
+
async function writePromptToTempFile(filePath: string, prompt: string): Promise<void> {
|
|
155
207
|
await withFileMutationQueue(filePath, async () => {
|
|
156
208
|
await fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 });
|
|
157
209
|
});
|
|
158
|
-
return { dir: tmpDir, filePath };
|
|
159
210
|
}
|
|
160
211
|
|
|
161
212
|
function getPiInvocation(args: string[]): { command: string; args: string[] } {
|
|
@@ -229,9 +280,13 @@ export async function runAgentTask(
|
|
|
229
280
|
|
|
230
281
|
try {
|
|
231
282
|
if (agent.systemPrompt.trim()) {
|
|
232
|
-
|
|
233
|
-
tmpPromptDir
|
|
234
|
-
|
|
283
|
+
// Allocate the temp dir + path BEFORE any fallible I/O so that if
|
|
284
|
+
// writeFile throws, tmpPromptDir/tmpPromptPath are already set and
|
|
285
|
+
// the finally block can clean up the directory (F-004).
|
|
286
|
+
tmpPromptDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
|
|
287
|
+
const safeName = agent.name.replace(/[^\w.-]+/g, "_");
|
|
288
|
+
tmpPromptPath = path.join(tmpPromptDir, `prompt-${safeName}.md`);
|
|
289
|
+
await writePromptToTempFile(tmpPromptPath, agent.systemPrompt);
|
|
235
290
|
args.push("--append-system-prompt", tmpPromptPath);
|
|
236
291
|
}
|
|
237
292
|
args.push(`Task: ${task}`);
|
|
@@ -264,15 +319,25 @@ export async function runAgentTask(
|
|
|
264
319
|
if (buffer.trim()) processLine(buffer);
|
|
265
320
|
resolve(code ?? 0);
|
|
266
321
|
});
|
|
267
|
-
proc.on("error", () =>
|
|
322
|
+
proc.on("error", (err) => {
|
|
323
|
+
if (!result.stderr) result.stderr = err.message;
|
|
324
|
+
if (!result.errorMessage) result.errorMessage = err.message;
|
|
325
|
+
resolve(1);
|
|
326
|
+
});
|
|
268
327
|
|
|
269
328
|
if (opts.signal) {
|
|
270
329
|
const kill = () => {
|
|
271
330
|
wasAborted = true;
|
|
272
331
|
proc.kill("SIGTERM");
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
332
|
+
// Force-kill fallback. proc.kill("SIGKILL") is idempotent if
|
|
333
|
+
// the process already exited, and `proc.killed` is set true
|
|
334
|
+
// synchronously by the SIGTERM above — so the previous
|
|
335
|
+
// `if (!proc.killed)` guard would skip SIGKILL entirely,
|
|
336
|
+
// hanging forever on a child that ignores SIGTERM.
|
|
337
|
+
// .unref() keeps the timer from holding the event loop open
|
|
338
|
+
// after the process is gone.
|
|
339
|
+
const forceKill = setTimeout(() => proc.kill("SIGKILL"), 5000);
|
|
340
|
+
forceKill.unref();
|
|
276
341
|
};
|
|
277
342
|
if (opts.signal.aborted) kill();
|
|
278
343
|
else opts.signal.addEventListener("abort", kill, { once: true });
|
|
@@ -289,8 +354,25 @@ export async function runAgentTask(
|
|
|
289
354
|
result.stopReason = "aborted";
|
|
290
355
|
result.errorMessage = "Subagent was aborted";
|
|
291
356
|
}
|
|
292
|
-
|
|
293
|
-
|
|
357
|
+
// On failure, build a short, structured errorMessage + a placeholder
|
|
358
|
+
// output. We deliberately do NOT copy the raw errorMessage into
|
|
359
|
+
// `output`: upstream providers (e.g. a Cloudflare challenge page) can
|
|
360
|
+
// surface huge HTML/JSON in errorMessage, and that garbage would
|
|
361
|
+
// otherwise flow into downstream phase interpolations.
|
|
362
|
+
// Sanitization must run whenever the run failed, even if some output
|
|
363
|
+
// was already emitted (e.g. crash mid-stream with a partial result):
|
|
364
|
+
// an unsanitized errorMessage would still leak into PhaseState and
|
|
365
|
+
// downstream interpolation contexts. (F-013)
|
|
366
|
+
if (isFailed(result)) {
|
|
367
|
+
if (!result.output) {
|
|
368
|
+
result.output = TRANSPORT_ERROR_PLACEHOLDER;
|
|
369
|
+
if (!result.errorMessage) {
|
|
370
|
+
result.errorMessage = result.stderr || `Subagent exited with code ${result.exitCode} (stopReason: ${result.stopReason ?? "unknown"})`;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
if (result.errorMessage) {
|
|
374
|
+
result.errorMessage = sanitizeErrorMessage(result.errorMessage);
|
|
375
|
+
}
|
|
294
376
|
}
|
|
295
377
|
return result;
|
|
296
378
|
} finally {
|
package/extensions/runs-view.ts
CHANGED
|
@@ -50,6 +50,9 @@ export class RunHistoryComponent {
|
|
|
50
50
|
private cachedLines?: string[];
|
|
51
51
|
|
|
52
52
|
constructor(runs: RunState[], theme: Theme, onDone: (result?: RunHistoryResult) => void) {
|
|
53
|
+
if (!runs.length) {
|
|
54
|
+
throw new Error("RunHistoryComponent requires at least one run");
|
|
55
|
+
}
|
|
53
56
|
this.runs = runs;
|
|
54
57
|
this.theme = theme;
|
|
55
58
|
this.onDone = onDone;
|
package/extensions/runtime.ts
CHANGED
|
@@ -10,6 +10,8 @@
|
|
|
10
10
|
* result are skipped.
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
+
import * as path from "node:path";
|
|
14
|
+
import * as fs from "node:fs";
|
|
13
15
|
import type { AgentConfig } from "./agents.ts";
|
|
14
16
|
import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
|
|
15
17
|
import { isFailed, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
|
|
@@ -147,6 +149,9 @@ function mergePhaseState(
|
|
|
147
149
|
const ran = results.filter((r) => r.stopReason !== "budget-skipped");
|
|
148
150
|
const anyFailed = ran.some(isFailed);
|
|
149
151
|
const usage = aggregateUsage(results.map((r) => r.usage));
|
|
152
|
+
// B12: surface the model(s) used in the fan-out so consumers can show
|
|
153
|
+
// which model produced the merged output.
|
|
154
|
+
const model = ran.find((r) => r.model !== undefined)?.model;
|
|
150
155
|
// Combine outputs as a labelled list; also expose a JSON array of outputs.
|
|
151
156
|
const combinedText = ran
|
|
152
157
|
.map((r, i) => `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}\n\n${r.output}`)
|
|
@@ -163,6 +168,7 @@ function mergePhaseState(
|
|
|
163
168
|
output: combinedText,
|
|
164
169
|
json: jsonArray,
|
|
165
170
|
usage,
|
|
171
|
+
model,
|
|
166
172
|
attempts: attempts > results.length ? attempts : undefined,
|
|
167
173
|
budgetTruncated: budgetSkips.length > 0 || undefined,
|
|
168
174
|
subProgress: { done: ran.length, total: results.length, running: 0, failed: failedCount },
|
|
@@ -188,6 +194,89 @@ function liveSink(state: RunState, phaseId: string, emitProgress: () => void): (
|
|
|
188
194
|
};
|
|
189
195
|
}
|
|
190
196
|
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Pre-read files listed in a phase's `context` field and return them as
|
|
200
|
+
* markdown code blocks. Handles:
|
|
201
|
+
* - literal paths
|
|
202
|
+
* - interpolation refs (e.g. `{steps.scout.json}` resolving to `["a.ts"]`)
|
|
203
|
+
* - per-file truncation via `contextLimit`
|
|
204
|
+
*
|
|
205
|
+
* The result is a single string that should be prepended to the phase task so
|
|
206
|
+
* the subagent never needs to spend turns on file exploration.
|
|
207
|
+
*/
|
|
208
|
+
const CONTEXT_MAX_FILE_BYTES = 10 * 1024 * 1024; // 10 MB
|
|
209
|
+
const MAX_TOTAL_CONTEXT_CHARS = 200_000;
|
|
210
|
+
|
|
211
|
+
async function resolvePhaseContext(
|
|
212
|
+
phase: Phase,
|
|
213
|
+
ctx: InterpolationContext,
|
|
214
|
+
): Promise<string> {
|
|
215
|
+
const entries = phase.context;
|
|
216
|
+
if (!entries || entries.length === 0) return "";
|
|
217
|
+
const limit = phase.contextLimit ?? 8000;
|
|
218
|
+
|
|
219
|
+
const paths: string[] = [];
|
|
220
|
+
for (const entry of entries) {
|
|
221
|
+
const r = interpolate(entry, ctx);
|
|
222
|
+
if (r.text !== entry) {
|
|
223
|
+
// Resolved — may be a JSON array from {steps.X.json}
|
|
224
|
+
const parsed = safeParse(r.text);
|
|
225
|
+
if (Array.isArray(parsed)) {
|
|
226
|
+
for (const item of parsed) {
|
|
227
|
+
if (typeof item === "string" && item.trim()) paths.push(item.trim());
|
|
228
|
+
}
|
|
229
|
+
} else if (typeof r.text === "string" && r.text.trim()) {
|
|
230
|
+
paths.push(r.text.trim());
|
|
231
|
+
}
|
|
232
|
+
} else {
|
|
233
|
+
// Unchanged — literal path
|
|
234
|
+
paths.push(entry);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
const unique = Array.from(new Set(paths));
|
|
239
|
+
|
|
240
|
+
// Diagnose JSON blobs masquerading as file paths — common when a context
|
|
241
|
+
// entry like {steps.discover.output} resolves to {"files":[...]} instead
|
|
242
|
+
// of a flat path or JSON array. The author should use {steps.discover.json.files}.
|
|
243
|
+
const jsonBlobs = unique.filter((p) => p.startsWith("{"));
|
|
244
|
+
for (const blob of jsonBlobs) {
|
|
245
|
+
console.warn(
|
|
246
|
+
`[taskflow] Context entry "${blob.slice(0, 80)}…" looks like a JSON object, not a file path. ` +
|
|
247
|
+
`Use {steps.<id>.json.<field>} to extract a specific field.`,
|
|
248
|
+
);
|
|
249
|
+
}
|
|
250
|
+
const filtered = jsonBlobs.length ? unique.filter((p) => !p.startsWith("{")) : unique;
|
|
251
|
+
|
|
252
|
+
const blocks: string[] = [];
|
|
253
|
+
for (const p of filtered) {
|
|
254
|
+
try {
|
|
255
|
+
const abs = path.resolve(p);
|
|
256
|
+
const stat = fs.statSync(abs);
|
|
257
|
+
if (!stat.isFile()) continue;
|
|
258
|
+
if (stat.size > CONTEXT_MAX_FILE_BYTES) continue;
|
|
259
|
+
const content = fs.readFileSync(abs, "utf-8");
|
|
260
|
+
const truncated =
|
|
261
|
+
content.length > limit
|
|
262
|
+
? content.slice(0, limit) + `\n... [truncated ${content.length - limit} chars]`
|
|
263
|
+
: content;
|
|
264
|
+
const ext = path.extname(p).slice(1) || "txt";
|
|
265
|
+
blocks.push(`## File: ${p}\n\n\`\`\`${ext}\n${truncated}\n\`\`\``);
|
|
266
|
+
} catch {
|
|
267
|
+
console.warn(`[taskflow] Skipped unreadable context file: ${p}`);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Safety cap: truncate total context when too many files are listed.
|
|
272
|
+
let result = blocks.join("\n\n") + "\n\n";
|
|
273
|
+
if (result.length > MAX_TOTAL_CONTEXT_CHARS) {
|
|
274
|
+
result = result.slice(0, MAX_TOTAL_CONTEXT_CHARS) + `\n\n... [truncated ${result.length - MAX_TOTAL_CONTEXT_CHARS} total chars]`;
|
|
275
|
+
}
|
|
276
|
+
return result;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
|
|
191
280
|
async function executePhase(
|
|
192
281
|
phase: Phase,
|
|
193
282
|
state: RunState,
|
|
@@ -200,6 +289,12 @@ async function executePhase(
|
|
|
200
289
|
const previousOutput = lastCompletedOutput(state, phase);
|
|
201
290
|
const run = deps.runTask ?? runAgentTask;
|
|
202
291
|
|
|
292
|
+
// Resolve context pre-read files once, before any type branching.
|
|
293
|
+
// The content is prepended to every task so the subagent never spends
|
|
294
|
+
// turns on file exploration for files the flow author already knows.
|
|
295
|
+
const ctx = buildInterpolationContext(state, previousOutput);
|
|
296
|
+
const preRead = await resolvePhaseContext(phase, ctx);
|
|
297
|
+
|
|
203
298
|
const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
|
|
204
299
|
run(
|
|
205
300
|
deps.cwd,
|
|
@@ -228,6 +323,10 @@ async function executePhase(
|
|
|
228
323
|
if (deps.signal?.aborted) break;
|
|
229
324
|
last = await baseRun(agentName, task, onLive);
|
|
230
325
|
usages.push(last.usage);
|
|
326
|
+
// B6: aggregate and surface cumulative usage before the retry decision,
|
|
327
|
+
// so the TUI / budget guard see the in-flight spend on every attempt.
|
|
328
|
+
const liveRetry = state.phases[phase.id];
|
|
329
|
+
if (liveRetry) liveRetry.usage = aggregateUsage(usages);
|
|
231
330
|
if (!isFailed(last)) break;
|
|
232
331
|
// Stop retrying on abort or once the run is over budget.
|
|
233
332
|
if (deps.signal?.aborted || overBudget(state).over) break;
|
|
@@ -313,24 +412,26 @@ async function executePhase(
|
|
|
313
412
|
// interpolated task. gate additionally parses a verdict; reduce simply pulls
|
|
314
413
|
// its inputs from `from` phases (already exposed via interpolation).
|
|
315
414
|
if (type === "agent" || type === "gate" || type === "reduce") {
|
|
316
|
-
const ctx = buildInterpolationContext(state, previousOutput);
|
|
317
415
|
const { text } = interpolate(phase.task ?? "", ctx);
|
|
318
|
-
const
|
|
416
|
+
const fullTask = preRead + text;
|
|
417
|
+
const inputHash = hashInput(phase.id, phase.agent ?? "", fullTask);
|
|
319
418
|
const cached = cachedPhase(prior, inputHash);
|
|
320
419
|
if (cached) return cached;
|
|
321
420
|
|
|
322
|
-
const r = await runOne(phase.agent ?? defaultAgent(deps),
|
|
421
|
+
const r = await runOne(phase.agent ?? defaultAgent(deps), fullTask, liveSink(state, phase.id, emitProgress));
|
|
323
422
|
const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
|
|
324
423
|
if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
|
|
325
424
|
return ps;
|
|
326
425
|
}
|
|
327
426
|
|
|
328
427
|
if (type === "parallel") {
|
|
329
|
-
const
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
428
|
+
const branches = (phase.branches ?? []).map((b) => {
|
|
429
|
+
const r = interpolate(b.task, ctx);
|
|
430
|
+
return {
|
|
431
|
+
agent: b.agent ?? phase.agent ?? defaultAgent(deps),
|
|
432
|
+
task: preRead + r.text,
|
|
433
|
+
};
|
|
434
|
+
});
|
|
334
435
|
const inputHash = hashInput(phase.id, JSON.stringify(branches));
|
|
335
436
|
const cached = cachedPhase(prior, inputHash);
|
|
336
437
|
if (cached) return cached;
|
|
@@ -340,7 +441,6 @@ async function executePhase(
|
|
|
340
441
|
}
|
|
341
442
|
|
|
342
443
|
if (type === "map") {
|
|
343
|
-
const ctx = buildInterpolationContext(state, previousOutput);
|
|
344
444
|
const overResolved = interpolate(phase.over ?? "", ctx).text;
|
|
345
445
|
// `over` may itself be a placeholder that resolved to a JSON string.
|
|
346
446
|
const arr = coerceArray(safeParse(overResolved)) ?? coerceArray(directRef(phase.over ?? "", state));
|
|
@@ -359,7 +459,7 @@ async function executePhase(
|
|
|
359
459
|
const localCtx = buildInterpolationContext(state, previousOutput, { [loopVar]: item });
|
|
360
460
|
return {
|
|
361
461
|
agent: phase.agent ?? defaultAgent(deps),
|
|
362
|
-
task: interpolate(phase.task ?? "", localCtx).text,
|
|
462
|
+
task: preRead + interpolate(phase.task ?? "", localCtx).text,
|
|
363
463
|
};
|
|
364
464
|
});
|
|
365
465
|
const inputHash = hashInput(phase.id, JSON.stringify(tasks));
|
|
@@ -424,7 +524,7 @@ async function executePhase(
|
|
|
424
524
|
provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
|
|
425
525
|
}
|
|
426
526
|
const subArgs = resolveArgs(subDef, provided);
|
|
427
|
-
const inputHash = hashInput(phase.id, `flow:${name}`, JSON.stringify(subArgs));
|
|
527
|
+
const inputHash = hashInput(phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs));
|
|
428
528
|
const cached = cachedPhase(prior, inputHash);
|
|
429
529
|
if (cached) return cached;
|
|
430
530
|
|
|
@@ -442,17 +542,29 @@ async function executePhase(
|
|
|
442
542
|
phases: {},
|
|
443
543
|
createdAt: Date.now(),
|
|
444
544
|
updatedAt: Date.now(),
|
|
445
|
-
cwd: deps.cwd,
|
|
545
|
+
cwd: phase.cwd ?? deps.cwd,
|
|
446
546
|
};
|
|
547
|
+
// B8: pass this flow phase's preRead content to every sub-flow phase by
|
|
548
|
+
// wrapping runTask — sub-phase preRead still gets prepended on top of it.
|
|
549
|
+
const baseRunTask = deps.runTask ?? runAgentTask;
|
|
550
|
+
const subRunTask: typeof runAgentTask = (cwd, agents, agentName, subTask, opts, globalThinking) =>
|
|
551
|
+
baseRunTask(cwd, agents, agentName, preRead + subTask, opts, globalThinking);
|
|
447
552
|
const subResult = await executeTaskflow(subState, {
|
|
448
553
|
...deps,
|
|
554
|
+
// Override deps.cwd with the flow phase's own cwd so that sub-flow
|
|
555
|
+
// phases without an explicit cwd derive their subagents from the
|
|
556
|
+
// flow's cwd (not the caller's cwd).
|
|
557
|
+
cwd: phase.cwd ?? deps.cwd,
|
|
558
|
+
runTask: subRunTask,
|
|
449
559
|
_stack: [...stack, state.flowName],
|
|
450
560
|
persist: undefined,
|
|
451
561
|
onProgress: () => {
|
|
452
562
|
if (live) {
|
|
453
563
|
const ph = Object.values(subState.phases);
|
|
564
|
+
// B-F015: `done` must include both success and failure so the
|
|
565
|
+
// renderer's `done - failed` shows the true success count.
|
|
454
566
|
live.subProgress = {
|
|
455
|
-
done: ph.filter((p) => p.status === "done").length,
|
|
567
|
+
done: ph.filter((p) => p.status === "done" || p.status === "failed").length,
|
|
456
568
|
total: subDef.phases.length,
|
|
457
569
|
running: ph.filter((p) => p.status === "running").length,
|
|
458
570
|
failed: ph.filter((p) => p.status === "failed").length,
|
|
@@ -471,8 +583,11 @@ async function executePhase(
|
|
|
471
583
|
output: subResult.finalOutput,
|
|
472
584
|
json: parseJson ? safeParse(subResult.finalOutput) : undefined,
|
|
473
585
|
usage: subResult.totalUsage,
|
|
586
|
+
// B-F015: include failed in `done` so the renderer's
|
|
587
|
+
// `done - failed` formula gives the success count (matches the
|
|
588
|
+
// map/parallel runner's overlapping-counter convention).
|
|
474
589
|
subProgress: {
|
|
475
|
-
done: sp.filter((p) => p.status === "done").length,
|
|
590
|
+
done: sp.filter((p) => p.status === "done" || p.status === "failed").length,
|
|
476
591
|
total: subDef.phases.length,
|
|
477
592
|
running: 0,
|
|
478
593
|
failed: sp.filter((p) => p.status === "failed").length,
|
|
@@ -494,7 +609,7 @@ async function executePhase(
|
|
|
494
609
|
|
|
495
610
|
/** Resolve a `{steps.x.json}`-style ref directly to its parsed value (bypassing stringify). */
|
|
496
611
|
function directRef(over: string, state: RunState): unknown {
|
|
497
|
-
const m = over.match(/^\{steps\.([a-zA-Z0-9_]+)\.(output|json)(?:\.([a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)*))?\}$/);
|
|
612
|
+
const m = over.match(/^\{steps\.([a-zA-Z0-9_-]+)\.(output|json)(?:\.([a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*))?\}$/);
|
|
498
613
|
if (!m) return undefined;
|
|
499
614
|
const step = state.phases[m[1]];
|
|
500
615
|
if (!step || step.status !== "done") return undefined;
|
|
@@ -543,7 +658,10 @@ export function parseGateVerdict(output: string): { verdict: "pass" | "block"; r
|
|
|
543
658
|
if (typeof o.continue === "boolean") return { verdict: o.continue ? "pass" : "block", reason: asReason(o.reason) };
|
|
544
659
|
if (typeof o.pass === "boolean") return { verdict: o.pass ? "pass" : "block", reason: asReason(o.reason) };
|
|
545
660
|
if (typeof o.verdict === "string") {
|
|
546
|
-
|
|
661
|
+
// Note: do NOT include standalone "no" — natural-language verdicts like
|
|
662
|
+
// "No issues found" / "no errors" would otherwise be false-positive BLOCK.
|
|
663
|
+
// Fail-open covers any ambiguous text.
|
|
664
|
+
const block = /block|fail|stop|reject|halt/i.test(o.verdict);
|
|
547
665
|
return { verdict: block ? "block" : "pass", reason: asReason(o.reason) };
|
|
548
666
|
}
|
|
549
667
|
}
|
|
@@ -560,11 +678,86 @@ function asReason(v: unknown): string | undefined {
|
|
|
560
678
|
return typeof v === "string" && v.trim() ? v.trim() : undefined;
|
|
561
679
|
}
|
|
562
680
|
|
|
681
|
+
/**
|
|
682
|
+
* Best-effort invocation of the user-provided `persist` + `onProgress` callbacks.
|
|
683
|
+
*
|
|
684
|
+
* A throw from a host-supplied callback must NEVER replace the runtime's
|
|
685
|
+
* outcome — neither the original crash message in `executeTaskflow`'s catch
|
|
686
|
+
* block, nor the final output of a successful run. Callbacks are observability
|
|
687
|
+
* hooks; the run survives their failure.
|
|
688
|
+
*
|
|
689
|
+
* Used at every "checkpoint" call site (phase start, phase end, terminal state).
|
|
690
|
+
* For high-frequency live updates inside a phase, see `safeProgress` below.
|
|
691
|
+
*/
|
|
692
|
+
function safeEmit(deps: RuntimeDeps, state: RunState): void {
|
|
693
|
+
try {
|
|
694
|
+
deps.persist?.(state);
|
|
695
|
+
} catch {
|
|
696
|
+
// user callback — must not break the run
|
|
697
|
+
}
|
|
698
|
+
try {
|
|
699
|
+
deps.onProgress?.(state);
|
|
700
|
+
} catch {
|
|
701
|
+
// user callback — must not break the run
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
/**
|
|
706
|
+
* Like `safeEmit` but for the high-frequency live-update channel only.
|
|
707
|
+
* Skips `persist` (which is intentionally checkpoint-only) and swallows any
|
|
708
|
+
* throw from the user-supplied `onProgress` so a misbehaving TUI sink cannot
|
|
709
|
+
* disrupt an in-flight phase.
|
|
710
|
+
*/
|
|
711
|
+
function safeProgress(deps: RuntimeDeps, state: RunState): void {
|
|
712
|
+
try {
|
|
713
|
+
deps.onProgress?.(state);
|
|
714
|
+
} catch {
|
|
715
|
+
// user callback — must not break the run
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
|
|
563
719
|
/**
|
|
564
720
|
* Execute a full taskflow. Mutates and persists `state` as it progresses.
|
|
565
721
|
*/
|
|
722
|
+
function ensureImplicitGate(def: Taskflow): void {
|
|
723
|
+
// Respect explicit opt-out
|
|
724
|
+
if ((def as any).implicitGate === false) return;
|
|
725
|
+
|
|
726
|
+
const hasGate = def.phases.some(
|
|
727
|
+
(p) => p.type === "gate" || p.type === "approval" || p.id === "_implicit-gate",
|
|
728
|
+
);
|
|
729
|
+
if (hasGate || def.phases.length === 0) return;
|
|
730
|
+
|
|
731
|
+
// The last existing phase is the effective "final" phase — pin it so the
|
|
732
|
+
// injected gate doesn't become the finalOutput.
|
|
733
|
+
const lastPhase = def.phases[def.phases.length - 1];
|
|
734
|
+
if (!lastPhase.final && !def.phases.some((p) => p.final)) {
|
|
735
|
+
lastPhase.final = true;
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
const allIds = def.phases.map((p) => p.id);
|
|
739
|
+
def.phases.push({
|
|
740
|
+
id: "_implicit-gate",
|
|
741
|
+
type: "gate",
|
|
742
|
+
dependsOn: allIds,
|
|
743
|
+
agent: "reviewer",
|
|
744
|
+
task: `Review all phase outputs from this taskflow for accuracy and consistency.
|
|
745
|
+
|
|
746
|
+
For each upstream phase, scan its output for:
|
|
747
|
+
1. **Factual accuracy**: Any file paths, line numbers, or code snippets that are wrong?
|
|
748
|
+
2. **Internal contradictions**: Do any phases contradict each other?
|
|
749
|
+
3. **Completeness**: Is any output truncated, empty, or anomalously short?
|
|
750
|
+
4. **Hallucination markers**: Wrong file names, impossible line ranges, circular logic, information not in the given context.
|
|
751
|
+
|
|
752
|
+
Output:
|
|
753
|
+
- If ALL outputs look consistent and plausible: output **VERDICT: PASS** with a one-line summary.
|
|
754
|
+
- If ANY issues found: output **VERDICT: BLOCK** listing each issue with the phase ID and specific concern.`,
|
|
755
|
+
});
|
|
756
|
+
}
|
|
757
|
+
|
|
566
758
|
export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
|
|
567
759
|
const def: Taskflow = state.def;
|
|
760
|
+
ensureImplicitGate(def);
|
|
568
761
|
try {
|
|
569
762
|
return await runTaskflowLayers(state, deps);
|
|
570
763
|
} catch (e) {
|
|
@@ -579,8 +772,7 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
|
|
|
579
772
|
}
|
|
580
773
|
}
|
|
581
774
|
state.status = "failed";
|
|
582
|
-
deps
|
|
583
|
-
deps.onProgress?.(state);
|
|
775
|
+
safeEmit(deps, state);
|
|
584
776
|
const totalUsage = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
|
|
585
777
|
return { state, finalOutput: `Taskflow '${def.name}' crashed: ${message}`, ok: false, totalUsage };
|
|
586
778
|
}
|
|
@@ -591,8 +783,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
591
783
|
const layers = topoLayers(def.phases);
|
|
592
784
|
|
|
593
785
|
state.status = "running";
|
|
594
|
-
deps
|
|
595
|
-
deps.onProgress?.(state);
|
|
786
|
+
safeEmit(deps, state);
|
|
596
787
|
|
|
597
788
|
let aborted = false;
|
|
598
789
|
let gateBlocked = false;
|
|
@@ -650,8 +841,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
650
841
|
endedAt: Date.now(),
|
|
651
842
|
usage: emptyUsage(),
|
|
652
843
|
};
|
|
653
|
-
deps
|
|
654
|
-
deps.onProgress?.(state);
|
|
844
|
+
safeEmit(deps, state);
|
|
655
845
|
return;
|
|
656
846
|
}
|
|
657
847
|
|
|
@@ -662,9 +852,9 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
662
852
|
status: "running",
|
|
663
853
|
startedAt,
|
|
664
854
|
};
|
|
665
|
-
deps
|
|
855
|
+
safeProgress(deps, state);
|
|
666
856
|
|
|
667
|
-
const ps = await executePhase(phase, state, deps, prior, () => deps
|
|
857
|
+
const ps = await executePhase(phase, state, deps, prior, () => safeProgress(deps, state));
|
|
668
858
|
// Preserve the phase start time: executePhase returns a fresh PhaseState
|
|
669
859
|
// that omits startedAt (cached/resumed results carry their own).
|
|
670
860
|
state.phases[phase.id] = ps.startedAt ? ps : { ...ps, startedAt };
|
|
@@ -687,8 +877,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
687
877
|
budgetBlocked = true;
|
|
688
878
|
budgetReason = ob.reason;
|
|
689
879
|
}
|
|
690
|
-
deps
|
|
691
|
-
deps.onProgress?.(state);
|
|
880
|
+
safeEmit(deps, state);
|
|
692
881
|
});
|
|
693
882
|
}
|
|
694
883
|
|
|
@@ -712,8 +901,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
712
901
|
: anyFailed
|
|
713
902
|
? "failed"
|
|
714
903
|
: "completed";
|
|
715
|
-
deps
|
|
716
|
-
deps.onProgress?.(state);
|
|
904
|
+
safeEmit(deps, state);
|
|
717
905
|
|
|
718
906
|
let finalOutput = finalState?.output ?? "(no output)";
|
|
719
907
|
if (gateBlocked) {
|