pi-taskflow 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,12 +48,67 @@ export function isFailed(r: RunResult): boolean {
48
48
  return r.exitCode !== 0 || r.stopReason === "error" || r.stopReason === "aborted";
49
49
  }
50
50
 
51
+ /** Placeholder written to a failed phase's `output` so downstream interpolation
52
+ * can detect "upstream failed" without being polluted by raw HTML/JSON. */
53
+ export const TRANSPORT_ERROR_PLACEHOLDER = "(upstream error: subagent failed; see error)";
54
+
55
+ /** Hard cap on the errorMessage field stored in PhaseState (≈ 4 KB). */
56
+ export const ERROR_MESSAGE_MAX_LEN = 4096;
57
+
58
+ /** Cheap HTML/JSON detector so we can summarize upstream garbage. */
59
+ export function looksLikeHtmlOrJson(s: string): boolean {
60
+ const t = s.trimStart();
61
+ if (!t) return false;
62
+ if (t.startsWith("<")) {
63
+ // HTML/XML/Cloudflare challenge pages
64
+ return /^<(?:!doctype\s+html|html|head|body|script|svg|div|iframe|span|p)\b/i.test(t);
65
+ }
66
+ if (t.startsWith("{")) {
67
+ // Truncated JSON. A genuine JSON envelope is fine to keep; an unwrapped
68
+ // {error: "..."} from an SDK is short. We only treat it as "garbage" if
69
+ // it parses and is huge — but that's caught by the size cap below.
70
+ return false;
71
+ }
72
+ return false;
73
+ }
74
+
75
+ /**
76
+ * Truncate and (when obviously HTML) summarize an errorMessage before it is
77
+ * persisted. Returns the cleaned string. Empty input returns empty.
78
+ */
79
+ export function sanitizeErrorMessage(raw: string | undefined): string {
80
+ if (!raw) return "";
81
+ const cleaned = raw.replace(/\s+/g, " ").trim();
82
+ if (!cleaned) return "";
83
+ // Decide the sanitization branch on the RAW length, not the whitespace-
84
+ // collapsed length — otherwise an HTML page padded with spaces would slip
85
+ // through the "looks like HTML" branch and be persisted as-is.
86
+ const rawLen = raw.length;
87
+ if (rawLen > ERROR_MESSAGE_MAX_LEN) {
88
+ const head = cleaned.slice(0, 200);
89
+ const tail = cleaned.slice(-200);
90
+ return `${head} ... [truncated ${rawLen - 400} chars] ... ${tail}`;
91
+ }
92
+ if (looksLikeHtmlOrJson(cleaned)) {
93
+ // Any document-like HTML (Cloudflare challenge pages, proxy error pages,
94
+ // gateway error pages) is a strong signal the upstream returned a page
95
+ // instead of JSON. Summarize it instead of letting HTML pollute the
96
+ // phase's error and downstream interpolation contexts.
97
+ const title = cleaned.match(/<title[^>]*>([^<]*)<\/title>/i)?.[1]?.trim();
98
+ const stripped = cleaned.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
99
+ const m = stripped.match(/(?:Unable to load site|Ray ID[: ]+([A-Za-z0-9]+)|[A-Z][a-z]+Error[: ]+(.{0,200}))/i);
100
+ const hint = title || (m ? (m[1] || m[0]).trim() : stripped.slice(0, 200));
101
+ return `Upstream returned non-JSON response (${rawLen} chars). Hint: ${hint}`;
102
+ }
103
+ return cleaned;
104
+ }
105
+
51
106
  function getFinalOutput(messages: Message[]): string {
52
107
  for (let i = messages.length - 1; i >= 0; i--) {
53
108
  const msg = messages[i];
54
109
  if (msg.role === "assistant") {
55
110
  for (const part of msg.content) {
56
- if (part.type === "text") return part.text;
111
+ if (part.type === "text" && part.text.trim()) return part.text;
57
112
  }
58
113
  }
59
114
  }
@@ -148,14 +203,10 @@ function summarizeToolCall(name: string, args: Record<string, unknown>): string
148
203
  }
149
204
  }
150
205
 
151
- async function writePromptToTempFile(agentName: string, prompt: string): Promise<{ dir: string; filePath: string }> {
152
- const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
153
- const safeName = agentName.replace(/[^\w.-]+/g, "_");
154
- const filePath = path.join(tmpDir, `prompt-${safeName}.md`);
206
+ async function writePromptToTempFile(filePath: string, prompt: string): Promise<void> {
155
207
  await withFileMutationQueue(filePath, async () => {
156
208
  await fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 });
157
209
  });
158
- return { dir: tmpDir, filePath };
159
210
  }
160
211
 
161
212
  function getPiInvocation(args: string[]): { command: string; args: string[] } {
@@ -229,9 +280,13 @@ export async function runAgentTask(
229
280
 
230
281
  try {
231
282
  if (agent.systemPrompt.trim()) {
232
- const tmp = await writePromptToTempFile(agent.name, agent.systemPrompt);
233
- tmpPromptDir = tmp.dir;
234
- tmpPromptPath = tmp.filePath;
283
+ // Allocate the temp dir + path BEFORE any fallible I/O so that if
284
+ // writeFile throws, tmpPromptDir/tmpPromptPath are already set and
285
+ // the finally block can clean up the directory (F-004).
286
+ tmpPromptDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
287
+ const safeName = agent.name.replace(/[^\w.-]+/g, "_");
288
+ tmpPromptPath = path.join(tmpPromptDir, `prompt-${safeName}.md`);
289
+ await writePromptToTempFile(tmpPromptPath, agent.systemPrompt);
235
290
  args.push("--append-system-prompt", tmpPromptPath);
236
291
  }
237
292
  args.push(`Task: ${task}`);
@@ -264,15 +319,25 @@ export async function runAgentTask(
264
319
  if (buffer.trim()) processLine(buffer);
265
320
  resolve(code ?? 0);
266
321
  });
267
- proc.on("error", () => resolve(1));
322
+ proc.on("error", (err) => {
323
+ if (!result.stderr) result.stderr = err.message;
324
+ if (!result.errorMessage) result.errorMessage = err.message;
325
+ resolve(1);
326
+ });
268
327
 
269
328
  if (opts.signal) {
270
329
  const kill = () => {
271
330
  wasAborted = true;
272
331
  proc.kill("SIGTERM");
273
- setTimeout(() => {
274
- if (!proc.killed) proc.kill("SIGKILL");
275
- }, 5000);
332
+ // Force-kill fallback. proc.kill("SIGKILL") is idempotent if
333
+ // the process already exited, and `proc.killed` is set true
334
+ // synchronously by the SIGTERM above — so the previous
335
+ // `if (!proc.killed)` guard would skip SIGKILL entirely,
336
+ // hanging forever on a child that ignores SIGTERM.
337
+ // .unref() keeps the timer from holding the event loop open
338
+ // after the process is gone.
339
+ const forceKill = setTimeout(() => proc.kill("SIGKILL"), 5000);
340
+ forceKill.unref();
276
341
  };
277
342
  if (opts.signal.aborted) kill();
278
343
  else opts.signal.addEventListener("abort", kill, { once: true });
@@ -289,8 +354,25 @@ export async function runAgentTask(
289
354
  result.stopReason = "aborted";
290
355
  result.errorMessage = "Subagent was aborted";
291
356
  }
292
- if (isFailed(result) && !result.output) {
293
- result.output = result.errorMessage || result.stderr || "(no output)";
357
+ // On failure, build a short, structured errorMessage + a placeholder
358
+ // output. We deliberately do NOT copy the raw errorMessage into
359
+ // `output`: upstream providers (e.g. a Cloudflare challenge page) can
360
+ // surface huge HTML/JSON in errorMessage, and that garbage would
361
+ // otherwise flow into downstream phase interpolations.
362
+ // Sanitization must run whenever the run failed, even if some output
363
+ // was already emitted (e.g. crash mid-stream with a partial result):
364
+ // an unsanitized errorMessage would still leak into PhaseState and
365
+ // downstream interpolation contexts. (F-013)
366
+ if (isFailed(result)) {
367
+ if (!result.output) {
368
+ result.output = TRANSPORT_ERROR_PLACEHOLDER;
369
+ if (!result.errorMessage) {
370
+ result.errorMessage = result.stderr || `Subagent exited with code ${result.exitCode} (stopReason: ${result.stopReason ?? "unknown"})`;
371
+ }
372
+ }
373
+ if (result.errorMessage) {
374
+ result.errorMessage = sanitizeErrorMessage(result.errorMessage);
375
+ }
294
376
  }
295
377
  return result;
296
378
  } finally {
@@ -50,6 +50,9 @@ export class RunHistoryComponent {
50
50
  private cachedLines?: string[];
51
51
 
52
52
  constructor(runs: RunState[], theme: Theme, onDone: (result?: RunHistoryResult) => void) {
53
+ if (!runs.length) {
54
+ throw new Error("RunHistoryComponent requires at least one run");
55
+ }
53
56
  this.runs = runs;
54
57
  this.theme = theme;
55
58
  this.onDone = onDone;
@@ -10,6 +10,8 @@
10
10
  * result are skipped.
11
11
  */
12
12
 
13
+ import * as path from "node:path";
14
+ import * as fs from "node:fs";
13
15
  import type { AgentConfig } from "./agents.ts";
14
16
  import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
15
17
  import { isFailed, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
@@ -147,6 +149,9 @@ function mergePhaseState(
147
149
  const ran = results.filter((r) => r.stopReason !== "budget-skipped");
148
150
  const anyFailed = ran.some(isFailed);
149
151
  const usage = aggregateUsage(results.map((r) => r.usage));
152
+ // B12: surface the model(s) used in the fan-out so consumers can show
153
+ // which model produced the merged output.
154
+ const model = ran.find((r) => r.model !== undefined)?.model;
150
155
  // Combine outputs as a labelled list; also expose a JSON array of outputs.
151
156
  const combinedText = ran
152
157
  .map((r, i) => `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}\n\n${r.output}`)
@@ -163,6 +168,7 @@ function mergePhaseState(
163
168
  output: combinedText,
164
169
  json: jsonArray,
165
170
  usage,
171
+ model,
166
172
  attempts: attempts > results.length ? attempts : undefined,
167
173
  budgetTruncated: budgetSkips.length > 0 || undefined,
168
174
  subProgress: { done: ran.length, total: results.length, running: 0, failed: failedCount },
@@ -188,6 +194,89 @@ function liveSink(state: RunState, phaseId: string, emitProgress: () => void): (
188
194
  };
189
195
  }
190
196
 
197
+
198
+ /**
199
+ * Pre-read files listed in a phase's `context` field and return them as
200
+ * markdown code blocks. Handles:
201
+ * - literal paths
202
+ * - interpolation refs (e.g. `{steps.scout.json}` resolving to `["a.ts"]`)
203
+ * - per-file truncation via `contextLimit`
204
+ *
205
+ * The result is a single string that should be prepended to the phase task so
206
+ * the subagent never needs to spend turns on file exploration.
207
+ */
208
+ const CONTEXT_MAX_FILE_BYTES = 10 * 1024 * 1024; // 10 MB
209
+ const MAX_TOTAL_CONTEXT_CHARS = 200_000;
210
+
211
+ async function resolvePhaseContext(
212
+ phase: Phase,
213
+ ctx: InterpolationContext,
214
+ ): Promise<string> {
215
+ const entries = phase.context;
216
+ if (!entries || entries.length === 0) return "";
217
+ const limit = phase.contextLimit ?? 8000;
218
+
219
+ const paths: string[] = [];
220
+ for (const entry of entries) {
221
+ const r = interpolate(entry, ctx);
222
+ if (r.text !== entry) {
223
+ // Resolved — may be a JSON array from {steps.X.json}
224
+ const parsed = safeParse(r.text);
225
+ if (Array.isArray(parsed)) {
226
+ for (const item of parsed) {
227
+ if (typeof item === "string" && item.trim()) paths.push(item.trim());
228
+ }
229
+ } else if (typeof r.text === "string" && r.text.trim()) {
230
+ paths.push(r.text.trim());
231
+ }
232
+ } else {
233
+ // Unchanged — literal path
234
+ paths.push(entry);
235
+ }
236
+ }
237
+
238
+ const unique = Array.from(new Set(paths));
239
+
240
+ // Diagnose JSON blobs masquerading as file paths — common when a context
241
+ // entry like {steps.discover.output} resolves to {"files":[...]} instead
242
+ // of a flat path or JSON array. The author should use {steps.discover.json.files}.
243
+ const jsonBlobs = unique.filter((p) => p.startsWith("{"));
244
+ for (const blob of jsonBlobs) {
245
+ console.warn(
246
+ `[taskflow] Context entry "${blob.slice(0, 80)}…" looks like a JSON object, not a file path. ` +
247
+ `Use {steps.<id>.json.<field>} to extract a specific field.`,
248
+ );
249
+ }
250
+ const filtered = jsonBlobs.length ? unique.filter((p) => !p.startsWith("{")) : unique;
251
+
252
+ const blocks: string[] = [];
253
+ for (const p of filtered) {
254
+ try {
255
+ const abs = path.resolve(p);
256
+ const stat = fs.statSync(abs);
257
+ if (!stat.isFile()) continue;
258
+ if (stat.size > CONTEXT_MAX_FILE_BYTES) continue;
259
+ const content = fs.readFileSync(abs, "utf-8");
260
+ const truncated =
261
+ content.length > limit
262
+ ? content.slice(0, limit) + `\n... [truncated ${content.length - limit} chars]`
263
+ : content;
264
+ const ext = path.extname(p).slice(1) || "txt";
265
+ blocks.push(`## File: ${p}\n\n\`\`\`${ext}\n${truncated}\n\`\`\``);
266
+ } catch {
267
+ console.warn(`[taskflow] Skipped unreadable context file: ${p}`);
268
+ }
269
+ }
270
+
271
+ // Safety cap: truncate total context when too many files are listed.
272
+ let result = blocks.join("\n\n") + "\n\n";
273
+ if (result.length > MAX_TOTAL_CONTEXT_CHARS) {
274
+ result = result.slice(0, MAX_TOTAL_CONTEXT_CHARS) + `\n\n... [truncated ${result.length - MAX_TOTAL_CONTEXT_CHARS} total chars]`;
275
+ }
276
+ return result;
277
+ }
278
+
279
+
191
280
  async function executePhase(
192
281
  phase: Phase,
193
282
  state: RunState,
@@ -200,6 +289,12 @@ async function executePhase(
200
289
  const previousOutput = lastCompletedOutput(state, phase);
201
290
  const run = deps.runTask ?? runAgentTask;
202
291
 
292
+ // Resolve context pre-read files once, before any type branching.
293
+ // The content is prepended to every task so the subagent never spends
294
+ // turns on file exploration for files the flow author already knows.
295
+ const ctx = buildInterpolationContext(state, previousOutput);
296
+ const preRead = await resolvePhaseContext(phase, ctx);
297
+
203
298
  const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
204
299
  run(
205
300
  deps.cwd,
@@ -228,6 +323,10 @@ async function executePhase(
228
323
  if (deps.signal?.aborted) break;
229
324
  last = await baseRun(agentName, task, onLive);
230
325
  usages.push(last.usage);
326
+ // B6: aggregate and surface cumulative usage before the retry decision,
327
+ // so the TUI / budget guard see the in-flight spend on every attempt.
328
+ const liveRetry = state.phases[phase.id];
329
+ if (liveRetry) liveRetry.usage = aggregateUsage(usages);
231
330
  if (!isFailed(last)) break;
232
331
  // Stop retrying on abort or once the run is over budget.
233
332
  if (deps.signal?.aborted || overBudget(state).over) break;
@@ -313,24 +412,26 @@ async function executePhase(
313
412
  // interpolated task. gate additionally parses a verdict; reduce simply pulls
314
413
  // its inputs from `from` phases (already exposed via interpolation).
315
414
  if (type === "agent" || type === "gate" || type === "reduce") {
316
- const ctx = buildInterpolationContext(state, previousOutput);
317
415
  const { text } = interpolate(phase.task ?? "", ctx);
318
- const inputHash = hashInput(phase.id, phase.agent ?? "", text);
416
+ const fullTask = preRead + text;
417
+ const inputHash = hashInput(phase.id, phase.agent ?? "", fullTask);
319
418
  const cached = cachedPhase(prior, inputHash);
320
419
  if (cached) return cached;
321
420
 
322
- const r = await runOne(phase.agent ?? defaultAgent(deps), text, liveSink(state, phase.id, emitProgress));
421
+ const r = await runOne(phase.agent ?? defaultAgent(deps), fullTask, liveSink(state, phase.id, emitProgress));
323
422
  const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
324
423
  if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
325
424
  return ps;
326
425
  }
327
426
 
328
427
  if (type === "parallel") {
329
- const ctx = buildInterpolationContext(state, previousOutput);
330
- const branches = (phase.branches ?? []).map((b) => ({
331
- agent: b.agent ?? phase.agent ?? defaultAgent(deps),
332
- task: interpolate(b.task, ctx).text,
333
- }));
428
+ const branches = (phase.branches ?? []).map((b) => {
429
+ const r = interpolate(b.task, ctx);
430
+ return {
431
+ agent: b.agent ?? phase.agent ?? defaultAgent(deps),
432
+ task: preRead + r.text,
433
+ };
434
+ });
334
435
  const inputHash = hashInput(phase.id, JSON.stringify(branches));
335
436
  const cached = cachedPhase(prior, inputHash);
336
437
  if (cached) return cached;
@@ -340,7 +441,6 @@ async function executePhase(
340
441
  }
341
442
 
342
443
  if (type === "map") {
343
- const ctx = buildInterpolationContext(state, previousOutput);
344
444
  const overResolved = interpolate(phase.over ?? "", ctx).text;
345
445
  // `over` may itself be a placeholder that resolved to a JSON string.
346
446
  const arr = coerceArray(safeParse(overResolved)) ?? coerceArray(directRef(phase.over ?? "", state));
@@ -359,7 +459,7 @@ async function executePhase(
359
459
  const localCtx = buildInterpolationContext(state, previousOutput, { [loopVar]: item });
360
460
  return {
361
461
  agent: phase.agent ?? defaultAgent(deps),
362
- task: interpolate(phase.task ?? "", localCtx).text,
462
+ task: preRead + interpolate(phase.task ?? "", localCtx).text,
363
463
  };
364
464
  });
365
465
  const inputHash = hashInput(phase.id, JSON.stringify(tasks));
@@ -424,7 +524,7 @@ async function executePhase(
424
524
  provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
425
525
  }
426
526
  const subArgs = resolveArgs(subDef, provided);
427
- const inputHash = hashInput(phase.id, `flow:${name}`, JSON.stringify(subArgs));
527
+ const inputHash = hashInput(phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs));
428
528
  const cached = cachedPhase(prior, inputHash);
429
529
  if (cached) return cached;
430
530
 
@@ -442,17 +542,29 @@ async function executePhase(
442
542
  phases: {},
443
543
  createdAt: Date.now(),
444
544
  updatedAt: Date.now(),
445
- cwd: deps.cwd,
545
+ cwd: phase.cwd ?? deps.cwd,
446
546
  };
547
+ // B8: pass this flow phase's preRead content to every sub-flow phase by
548
+ // wrapping runTask — sub-phase preRead still gets prepended on top of it.
549
+ const baseRunTask = deps.runTask ?? runAgentTask;
550
+ const subRunTask: typeof runAgentTask = (cwd, agents, agentName, subTask, opts, globalThinking) =>
551
+ baseRunTask(cwd, agents, agentName, preRead + subTask, opts, globalThinking);
447
552
  const subResult = await executeTaskflow(subState, {
448
553
  ...deps,
554
+ // Override deps.cwd with the flow phase's own cwd so that sub-flow
555
+ // phases without an explicit cwd derive their subagents from the
556
+ // flow's cwd (not the caller's cwd).
557
+ cwd: phase.cwd ?? deps.cwd,
558
+ runTask: subRunTask,
449
559
  _stack: [...stack, state.flowName],
450
560
  persist: undefined,
451
561
  onProgress: () => {
452
562
  if (live) {
453
563
  const ph = Object.values(subState.phases);
564
+ // B-F015: `done` must include both success and failure so the
565
+ // renderer's `done - failed` shows the true success count.
454
566
  live.subProgress = {
455
- done: ph.filter((p) => p.status === "done").length,
567
+ done: ph.filter((p) => p.status === "done" || p.status === "failed").length,
456
568
  total: subDef.phases.length,
457
569
  running: ph.filter((p) => p.status === "running").length,
458
570
  failed: ph.filter((p) => p.status === "failed").length,
@@ -471,8 +583,11 @@ async function executePhase(
471
583
  output: subResult.finalOutput,
472
584
  json: parseJson ? safeParse(subResult.finalOutput) : undefined,
473
585
  usage: subResult.totalUsage,
586
+ // B-F015: include failed in `done` so the renderer's
587
+ // `done - failed` formula gives the success count (matches the
588
+ // map/parallel runner's overlapping-counter convention).
474
589
  subProgress: {
475
- done: sp.filter((p) => p.status === "done").length,
590
+ done: sp.filter((p) => p.status === "done" || p.status === "failed").length,
476
591
  total: subDef.phases.length,
477
592
  running: 0,
478
593
  failed: sp.filter((p) => p.status === "failed").length,
@@ -494,7 +609,7 @@ async function executePhase(
494
609
 
495
610
  /** Resolve a `{steps.x.json}`-style ref directly to its parsed value (bypassing stringify). */
496
611
  function directRef(over: string, state: RunState): unknown {
497
- const m = over.match(/^\{steps\.([a-zA-Z0-9_]+)\.(output|json)(?:\.([a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)*))?\}$/);
612
+ const m = over.match(/^\{steps\.([a-zA-Z0-9_-]+)\.(output|json)(?:\.([a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*))?\}$/);
498
613
  if (!m) return undefined;
499
614
  const step = state.phases[m[1]];
500
615
  if (!step || step.status !== "done") return undefined;
@@ -543,7 +658,10 @@ export function parseGateVerdict(output: string): { verdict: "pass" | "block"; r
543
658
  if (typeof o.continue === "boolean") return { verdict: o.continue ? "pass" : "block", reason: asReason(o.reason) };
544
659
  if (typeof o.pass === "boolean") return { verdict: o.pass ? "pass" : "block", reason: asReason(o.reason) };
545
660
  if (typeof o.verdict === "string") {
546
- const block = /block|fail|stop|reject|halt|\bno\b/i.test(o.verdict);
661
+ // Note: do NOT include standalone "no" — natural-language verdicts like
662
+ // "No issues found" / "no errors" would otherwise be false-positive BLOCK.
663
+ // Fail-open covers any ambiguous text.
664
+ const block = /block|fail|stop|reject|halt/i.test(o.verdict);
547
665
  return { verdict: block ? "block" : "pass", reason: asReason(o.reason) };
548
666
  }
549
667
  }
@@ -560,11 +678,86 @@ function asReason(v: unknown): string | undefined {
560
678
  return typeof v === "string" && v.trim() ? v.trim() : undefined;
561
679
  }
562
680
 
681
+ /**
682
+ * Best-effort invocation of the user-provided `persist` + `onProgress` callbacks.
683
+ *
684
+ * A throw from a host-supplied callback must NEVER replace the runtime's
685
+ * outcome — neither the original crash message in `executeTaskflow`'s catch
686
+ * block, nor the final output of a successful run. Callbacks are observability
687
+ * hooks; the run survives their failure.
688
+ *
689
+ * Used at every "checkpoint" call site (phase start, phase end, terminal state).
690
+ * For high-frequency live updates inside a phase, see `safeProgress` below.
691
+ */
692
+ function safeEmit(deps: RuntimeDeps, state: RunState): void {
693
+ try {
694
+ deps.persist?.(state);
695
+ } catch {
696
+ // user callback — must not break the run
697
+ }
698
+ try {
699
+ deps.onProgress?.(state);
700
+ } catch {
701
+ // user callback — must not break the run
702
+ }
703
+ }
704
+
705
+ /**
706
+ * Like `safeEmit` but for the high-frequency live-update channel only.
707
+ * Skips `persist` (which is intentionally checkpoint-only) and swallows any
708
+ * throw from the user-supplied `onProgress` so a misbehaving TUI sink cannot
709
+ * disrupt an in-flight phase.
710
+ */
711
+ function safeProgress(deps: RuntimeDeps, state: RunState): void {
712
+ try {
713
+ deps.onProgress?.(state);
714
+ } catch {
715
+ // user callback — must not break the run
716
+ }
717
+ }
718
+
563
719
  /**
564
720
  * Execute a full taskflow. Mutates and persists `state` as it progresses.
565
721
  */
722
+ function ensureImplicitGate(def: Taskflow): void {
723
+ // Respect explicit opt-out
724
+ if ((def as any).implicitGate === false) return;
725
+
726
+ const hasGate = def.phases.some(
727
+ (p) => p.type === "gate" || p.type === "approval" || p.id === "_implicit-gate",
728
+ );
729
+ if (hasGate || def.phases.length === 0) return;
730
+
731
+ // The last existing phase is the effective "final" phase — pin it so the
732
+ // injected gate doesn't become the finalOutput.
733
+ const lastPhase = def.phases[def.phases.length - 1];
734
+ if (!lastPhase.final && !def.phases.some((p) => p.final)) {
735
+ lastPhase.final = true;
736
+ }
737
+
738
+ const allIds = def.phases.map((p) => p.id);
739
+ def.phases.push({
740
+ id: "_implicit-gate",
741
+ type: "gate",
742
+ dependsOn: allIds,
743
+ agent: "reviewer",
744
+ task: `Review all phase outputs from this taskflow for accuracy and consistency.
745
+
746
+ For each upstream phase, scan its output for:
747
+ 1. **Factual accuracy**: Any file paths, line numbers, or code snippets that are wrong?
748
+ 2. **Internal contradictions**: Do any phases contradict each other?
749
+ 3. **Completeness**: Is any output truncated, empty, or anomalously short?
750
+ 4. **Hallucination markers**: Wrong file names, impossible line ranges, circular logic, information not in the given context.
751
+
752
+ Output:
753
+ - If ALL outputs look consistent and plausible: output **VERDICT: PASS** with a one-line summary.
754
+ - If ANY issues found: output **VERDICT: BLOCK** listing each issue with the phase ID and specific concern.`,
755
+ });
756
+ }
757
+
566
758
  export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
567
759
  const def: Taskflow = state.def;
760
+ ensureImplicitGate(def);
568
761
  try {
569
762
  return await runTaskflowLayers(state, deps);
570
763
  } catch (e) {
@@ -579,8 +772,7 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
579
772
  }
580
773
  }
581
774
  state.status = "failed";
582
- deps.persist?.(state);
583
- deps.onProgress?.(state);
775
+ safeEmit(deps, state);
584
776
  const totalUsage = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
585
777
  return { state, finalOutput: `Taskflow '${def.name}' crashed: ${message}`, ok: false, totalUsage };
586
778
  }
@@ -591,8 +783,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
591
783
  const layers = topoLayers(def.phases);
592
784
 
593
785
  state.status = "running";
594
- deps.persist?.(state);
595
- deps.onProgress?.(state);
786
+ safeEmit(deps, state);
596
787
 
597
788
  let aborted = false;
598
789
  let gateBlocked = false;
@@ -650,8 +841,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
650
841
  endedAt: Date.now(),
651
842
  usage: emptyUsage(),
652
843
  };
653
- deps.persist?.(state);
654
- deps.onProgress?.(state);
844
+ safeEmit(deps, state);
655
845
  return;
656
846
  }
657
847
 
@@ -662,9 +852,9 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
662
852
  status: "running",
663
853
  startedAt,
664
854
  };
665
- deps.onProgress?.(state);
855
+ safeProgress(deps, state);
666
856
 
667
- const ps = await executePhase(phase, state, deps, prior, () => deps.onProgress?.(state));
857
+ const ps = await executePhase(phase, state, deps, prior, () => safeProgress(deps, state));
668
858
  // Preserve the phase start time: executePhase returns a fresh PhaseState
669
859
  // that omits startedAt (cached/resumed results carry their own).
670
860
  state.phases[phase.id] = ps.startedAt ? ps : { ...ps, startedAt };
@@ -687,8 +877,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
687
877
  budgetBlocked = true;
688
878
  budgetReason = ob.reason;
689
879
  }
690
- deps.persist?.(state);
691
- deps.onProgress?.(state);
880
+ safeEmit(deps, state);
692
881
  });
693
882
  }
694
883
 
@@ -712,8 +901,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
712
901
  : anyFailed
713
902
  ? "failed"
714
903
  : "completed";
715
- deps.persist?.(state);
716
- deps.onProgress?.(state);
904
+ safeEmit(deps, state);
717
905
 
718
906
  let finalOutput = finalState?.output ?? "(no output)";
719
907
  if (gateBlocked) {