npm - pi-taskflow - Versions diffs - 0.0.6 → 0.0.8 - Mend

pi-taskflow 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +77 -13
package/examples/conditional-research.json +1 -1
package/examples/guarded-refactor.json +2 -2
package/extensions/agents.ts +54 -34
package/extensions/index.ts +19 -7
package/extensions/interpolate.ts +25 -4
package/extensions/render.ts +41 -36
package/extensions/runner.ts +97 -15
package/extensions/runs-view.ts +3 -0
package/extensions/runtime.ts +216 -28
package/extensions/schema.ts +151 -5
package/extensions/store.ts +77 -7
package/package.json +1 -1
package/skills/taskflow/SKILL.md +112 -1
package/skills/taskflow/configuration.md +0 -2

package/extensions/runner.ts CHANGED Viewed

@@ -48,12 +48,67 @@ export function isFailed(r: RunResult): boolean {
 	return r.exitCode !== 0 || r.stopReason === "error" || r.stopReason === "aborted";
 }
+/** Placeholder written to a failed phase's `output` so downstream interpolation
+ *  can detect "upstream failed" without being polluted by raw HTML/JSON. */
+export const TRANSPORT_ERROR_PLACEHOLDER = "(upstream error: subagent failed; see error)";
+/** Hard cap on the errorMessage field stored in PhaseState (≈ 4 KB). */
+export const ERROR_MESSAGE_MAX_LEN = 4096;
+/** Cheap HTML/JSON detector so we can summarize upstream garbage. */
+export function looksLikeHtmlOrJson(s: string): boolean {
+	const t = s.trimStart();
+	if (!t) return false;
+	if (t.startsWith("<")) {
+		// HTML/XML/Cloudflare challenge pages
+		return /^<(?:!doctype\s+html|html|head|body|script|svg|div|iframe|span|p)\b/i.test(t);
+	}
+	if (t.startsWith("{")) {
+		// Truncated JSON. A genuine JSON envelope is fine to keep; an unwrapped
+		// {error: "..."} from an SDK is short. We only treat it as "garbage" if
+		// it parses and is huge — but that's caught by the size cap below.
+		return false;
+	}
+	return false;
+}
+/**
+ * Truncate and (when obviously HTML) summarize an errorMessage before it is
+ * persisted. Returns the cleaned string. Empty input returns empty.
+ */
+export function sanitizeErrorMessage(raw: string | undefined): string {
+	if (!raw) return "";
+	const cleaned = raw.replace(/\s+/g, " ").trim();
+	if (!cleaned) return "";
+	// Decide the sanitization branch on the RAW length, not the whitespace-
+	// collapsed length — otherwise an HTML page padded with spaces would slip
+	// through the "looks like HTML" branch and be persisted as-is.
+	const rawLen = raw.length;
+	if (rawLen > ERROR_MESSAGE_MAX_LEN) {
+		const head = cleaned.slice(0, 200);
+		const tail = cleaned.slice(-200);
+		return `${head} ... [truncated ${rawLen - 400} chars] ... ${tail}`;
+	}
+	if (looksLikeHtmlOrJson(cleaned)) {
+		// Any document-like HTML (Cloudflare challenge pages, proxy error pages,
+		// gateway error pages) is a strong signal the upstream returned a page
+		// instead of JSON. Summarize it instead of letting HTML pollute the
+		// phase's error and downstream interpolation contexts.
+		const title = cleaned.match(/<title[^>]*>([^<]*)<\/title>/i)?.[1]?.trim();
+		const stripped = cleaned.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
+		const m = stripped.match(/(?:Unable to load site|Ray ID[: ]+([A-Za-z0-9]+)|[A-Z][a-z]+Error[: ]+(.{0,200}))/i);
+		const hint = title || (m ? (m[1] || m[0]).trim() : stripped.slice(0, 200));
+		return `Upstream returned non-JSON response (${rawLen} chars). Hint: ${hint}`;
+	}
+	return cleaned;
+}
 function getFinalOutput(messages: Message[]): string {
 	for (let i = messages.length - 1; i >= 0; i--) {
 		const msg = messages[i];
 		if (msg.role === "assistant") {
 			for (const part of msg.content) {
-				if (part.type === "text") return part.text;
+				if (part.type === "text" && part.text.trim()) return part.text;
 			}
 		}
 	}
@@ -148,14 +203,10 @@ function summarizeToolCall(name: string, args: Record<string, unknown>): string
 	}
 }
-async function writePromptToTempFile(agentName: string, prompt: string): Promise<{ dir: string; filePath: string }> {
-	const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
-	const safeName = agentName.replace(/[^\w.-]+/g, "_");
-	const filePath = path.join(tmpDir, `prompt-${safeName}.md`);
+async function writePromptToTempFile(filePath: string, prompt: string): Promise<void> {
 	await withFileMutationQueue(filePath, async () => {
 		await fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 });
 	});
-	return { dir: tmpDir, filePath };
 }
 function getPiInvocation(args: string[]): { command: string; args: string[] } {
@@ -229,9 +280,13 @@ export async function runAgentTask(
 	try {
 		if (agent.systemPrompt.trim()) {
-			const tmp = await writePromptToTempFile(agent.name, agent.systemPrompt);
-			tmpPromptDir = tmp.dir;
-			tmpPromptPath = tmp.filePath;
+			// Allocate the temp dir + path BEFORE any fallible I/O so that if
+			// writeFile throws, tmpPromptDir/tmpPromptPath are already set and
+			// the finally block can clean up the directory (F-004).
+			tmpPromptDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
+			const safeName = agent.name.replace(/[^\w.-]+/g, "_");
+			tmpPromptPath = path.join(tmpPromptDir, `prompt-${safeName}.md`);
+			await writePromptToTempFile(tmpPromptPath, agent.systemPrompt);
 			args.push("--append-system-prompt", tmpPromptPath);
 		}
 		args.push(`Task: ${task}`);
@@ -264,15 +319,25 @@ export async function runAgentTask(
 				if (buffer.trim()) processLine(buffer);
 				resolve(code ?? 0);
 			});
-			proc.on("error", () => resolve(1));
+			proc.on("error", (err) => {
+				if (!result.stderr) result.stderr = err.message;
+				if (!result.errorMessage) result.errorMessage = err.message;
+				resolve(1);
+			});
 			if (opts.signal) {
 				const kill = () => {
 					wasAborted = true;
 					proc.kill("SIGTERM");
-					setTimeout(() => {
-						if (!proc.killed) proc.kill("SIGKILL");
-					}, 5000);
+					// Force-kill fallback. proc.kill("SIGKILL") is idempotent if
+					// the process already exited, and `proc.killed` is set true
+					// synchronously by the SIGTERM above — so the previous
+					// `if (!proc.killed)` guard would skip SIGKILL entirely,
+					// hanging forever on a child that ignores SIGTERM.
+					// .unref() keeps the timer from holding the event loop open
+					// after the process is gone.
+					const forceKill = setTimeout(() => proc.kill("SIGKILL"), 5000);
+					forceKill.unref();
 				};
 				if (opts.signal.aborted) kill();
 				else opts.signal.addEventListener("abort", kill, { once: true });
@@ -289,8 +354,25 @@ export async function runAgentTask(
 			result.stopReason = "aborted";
 			result.errorMessage = "Subagent was aborted";
 		}
-		if (isFailed(result) && !result.output) {
-			result.output = result.errorMessage || result.stderr || "(no output)";
+		// On failure, build a short, structured errorMessage + a placeholder
+		// output. We deliberately do NOT copy the raw errorMessage into
+		// `output`: upstream providers (e.g. a Cloudflare challenge page) can
+		// surface huge HTML/JSON in errorMessage, and that garbage would
+		// otherwise flow into downstream phase interpolations.
+		// Sanitization must run whenever the run failed, even if some output
+		// was already emitted (e.g. crash mid-stream with a partial result):
+		// an unsanitized errorMessage would still leak into PhaseState and
+		// downstream interpolation contexts. (F-013)
+		if (isFailed(result)) {
+			if (!result.output) {
+				result.output = TRANSPORT_ERROR_PLACEHOLDER;
+				if (!result.errorMessage) {
+					result.errorMessage = result.stderr || `Subagent exited with code ${result.exitCode} (stopReason: ${result.stopReason ?? "unknown"})`;
+				}
+			}
+			if (result.errorMessage) {
+				result.errorMessage = sanitizeErrorMessage(result.errorMessage);
+			}
 		}
 		return result;
 	} finally {

package/extensions/runs-view.ts CHANGED Viewed

@@ -50,6 +50,9 @@ export class RunHistoryComponent {
 	private cachedLines?: string[];
 	constructor(runs: RunState[], theme: Theme, onDone: (result?: RunHistoryResult) => void) {
+		if (!runs.length) {
+			throw new Error("RunHistoryComponent requires at least one run");
+		}
 		this.runs = runs;
 		this.theme = theme;
 		this.onDone = onDone;

package/extensions/runtime.ts CHANGED Viewed

@@ -10,6 +10,8 @@
  * result are skipped.
  */
+import * as path from "node:path";
+import * as fs from "node:fs";
 import type { AgentConfig } from "./agents.ts";
 import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
 import { isFailed, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
@@ -147,6 +149,9 @@ function mergePhaseState(
 	const ran = results.filter((r) => r.stopReason !== "budget-skipped");
 	const anyFailed = ran.some(isFailed);
 	const usage = aggregateUsage(results.map((r) => r.usage));
+	// B12: surface the model(s) used in the fan-out so consumers can show
+	// which model produced the merged output.
+	const model = ran.find((r) => r.model !== undefined)?.model;
 	// Combine outputs as a labelled list; also expose a JSON array of outputs.
 	const combinedText = ran
 		.map((r, i) => `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}\n\n${r.output}`)
@@ -163,6 +168,7 @@ function mergePhaseState(
 		output: combinedText,
 		json: jsonArray,
 		usage,
+		model,
 		attempts: attempts > results.length ? attempts : undefined,
 		budgetTruncated: budgetSkips.length > 0 || undefined,
 		subProgress: { done: ran.length, total: results.length, running: 0, failed: failedCount },
@@ -188,6 +194,89 @@ function liveSink(state: RunState, phaseId: string, emitProgress: () => void): (
 	};
 }
+/**
+ * Pre-read files listed in a phase's `context` field and return them as
+ * markdown code blocks. Handles:
+ * - literal paths
+ * - interpolation refs (e.g. `{steps.scout.json}` resolving to `["a.ts"]`)
+ * - per-file truncation via `contextLimit`
+ *
+ * The result is a single string that should be prepended to the phase task so
+ * the subagent never needs to spend turns on file exploration.
+ */
+const CONTEXT_MAX_FILE_BYTES = 10 * 1024 * 1024; // 10 MB
+const MAX_TOTAL_CONTEXT_CHARS = 200_000;
+async function resolvePhaseContext(
+	phase: Phase,
+	ctx: InterpolationContext,
+): Promise<string> {
+	const entries = phase.context;
+	if (!entries || entries.length === 0) return "";
+	const limit = phase.contextLimit ?? 8000;
+	const paths: string[] = [];
+	for (const entry of entries) {
+		const r = interpolate(entry, ctx);
+		if (r.text !== entry) {
+			// Resolved — may be a JSON array from {steps.X.json}
+			const parsed = safeParse(r.text);
+			if (Array.isArray(parsed)) {
+				for (const item of parsed) {
+					if (typeof item === "string" && item.trim()) paths.push(item.trim());
+				}
+			} else if (typeof r.text === "string" && r.text.trim()) {
+				paths.push(r.text.trim());
+			}
+		} else {
+			// Unchanged — literal path
+			paths.push(entry);
+		}
+	}
+	const unique = Array.from(new Set(paths));
+	// Diagnose JSON blobs masquerading as file paths — common when a context
+	// entry like {steps.discover.output} resolves to {"files":[...]} instead
+	// of a flat path or JSON array. The author should use {steps.discover.json.files}.
+	const jsonBlobs = unique.filter((p) => p.startsWith("{"));
+	for (const blob of jsonBlobs) {
+		console.warn(
+			`[taskflow] Context entry "${blob.slice(0, 80)}…" looks like a JSON object, not a file path. ` +
+				`Use {steps.<id>.json.<field>} to extract a specific field.`,
+		);
+	}
+	const filtered = jsonBlobs.length ? unique.filter((p) => !p.startsWith("{")) : unique;
+	const blocks: string[] = [];
+	for (const p of filtered) {
+		try {
+			const abs = path.resolve(p);
+			const stat = fs.statSync(abs);
+			if (!stat.isFile()) continue;
+			if (stat.size > CONTEXT_MAX_FILE_BYTES) continue;
+			const content = fs.readFileSync(abs, "utf-8");
+			const truncated =
+				content.length > limit
+					? content.slice(0, limit) + `\n... [truncated ${content.length - limit} chars]`
+					: content;
+			const ext = path.extname(p).slice(1) || "txt";
+			blocks.push(`## File: ${p}\n\n\`\`\`${ext}\n${truncated}\n\`\`\``);
+		} catch {
+			console.warn(`[taskflow] Skipped unreadable context file: ${p}`);
+		}
+	}
+	// Safety cap: truncate total context when too many files are listed.
+	let result = blocks.join("\n\n") + "\n\n";
+	if (result.length > MAX_TOTAL_CONTEXT_CHARS) {
+		result = result.slice(0, MAX_TOTAL_CONTEXT_CHARS) + `\n\n... [truncated ${result.length - MAX_TOTAL_CONTEXT_CHARS} total chars]`;
+	}
+	return result;
+}
 async function executePhase(
 	phase: Phase,
 	state: RunState,
@@ -200,6 +289,12 @@ async function executePhase(
 	const previousOutput = lastCompletedOutput(state, phase);
 	const run = deps.runTask ?? runAgentTask;
+	// Resolve context pre-read files once, before any type branching.
+	// The content is prepended to every task so the subagent never spends
+	// turns on file exploration for files the flow author already knows.
+	const ctx = buildInterpolationContext(state, previousOutput);
+	const preRead = await resolvePhaseContext(phase, ctx);
 	const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
 		run(
 			deps.cwd,
@@ -228,6 +323,10 @@ async function executePhase(
 			if (deps.signal?.aborted) break;
 			last = await baseRun(agentName, task, onLive);
 			usages.push(last.usage);
+			// B6: aggregate and surface cumulative usage before the retry decision,
+			// so the TUI / budget guard see the in-flight spend on every attempt.
+			const liveRetry = state.phases[phase.id];
+			if (liveRetry) liveRetry.usage = aggregateUsage(usages);
 			if (!isFailed(last)) break;
 			// Stop retrying on abort or once the run is over budget.
 			if (deps.signal?.aborted || overBudget(state).over) break;
@@ -313,24 +412,26 @@ async function executePhase(
 	// interpolated task. gate additionally parses a verdict; reduce simply pulls
 	// its inputs from `from` phases (already exposed via interpolation).
 	if (type === "agent" || type === "gate" || type === "reduce") {
-		const ctx = buildInterpolationContext(state, previousOutput);
 		const { text } = interpolate(phase.task ?? "", ctx);
-		const inputHash = hashInput(phase.id, phase.agent ?? "", text);
+		const fullTask = preRead + text;
+		const inputHash = hashInput(phase.id, phase.agent ?? "", fullTask);
 		const cached = cachedPhase(prior, inputHash);
 		if (cached) return cached;
-		const r = await runOne(phase.agent ?? defaultAgent(deps), text, liveSink(state, phase.id, emitProgress));
+		const r = await runOne(phase.agent ?? defaultAgent(deps), fullTask, liveSink(state, phase.id, emitProgress));
 		const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
 		if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
 		return ps;
 	}
 	if (type === "parallel") {
-		const ctx = buildInterpolationContext(state, previousOutput);
-		const branches = (phase.branches ?? []).map((b) => ({
-			agent: b.agent ?? phase.agent ?? defaultAgent(deps),
-			task: interpolate(b.task, ctx).text,
-		}));
+		const branches = (phase.branches ?? []).map((b) => {
+			const r = interpolate(b.task, ctx);
+			return {
+				agent: b.agent ?? phase.agent ?? defaultAgent(deps),
+				task: preRead + r.text,
+			};
+		});
 		const inputHash = hashInput(phase.id, JSON.stringify(branches));
 		const cached = cachedPhase(prior, inputHash);
 		if (cached) return cached;
@@ -340,7 +441,6 @@ async function executePhase(
 	}
 	if (type === "map") {
-		const ctx = buildInterpolationContext(state, previousOutput);
 		const overResolved = interpolate(phase.over ?? "", ctx).text;
 		// `over` may itself be a placeholder that resolved to a JSON string.
 		const arr = coerceArray(safeParse(overResolved)) ?? coerceArray(directRef(phase.over ?? "", state));
@@ -359,7 +459,7 @@ async function executePhase(
 			const localCtx = buildInterpolationContext(state, previousOutput, { [loopVar]: item });
 			return {
 				agent: phase.agent ?? defaultAgent(deps),
-				task: interpolate(phase.task ?? "", localCtx).text,
+				task: preRead + interpolate(phase.task ?? "", localCtx).text,
 			};
 		});
 		const inputHash = hashInput(phase.id, JSON.stringify(tasks));
@@ -424,7 +524,7 @@ async function executePhase(
 			provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
 		}
 		const subArgs = resolveArgs(subDef, provided);
-		const inputHash = hashInput(phase.id, `flow:${name}`, JSON.stringify(subArgs));
+		const inputHash = hashInput(phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs));
 		const cached = cachedPhase(prior, inputHash);
 		if (cached) return cached;
@@ -442,17 +542,29 @@ async function executePhase(
 			phases: {},
 			createdAt: Date.now(),
 			updatedAt: Date.now(),
-			cwd: deps.cwd,
+			cwd: phase.cwd ?? deps.cwd,
 		};
+		// B8: pass this flow phase's preRead content to every sub-flow phase by
+		// wrapping runTask — sub-phase preRead still gets prepended on top of it.
+		const baseRunTask = deps.runTask ?? runAgentTask;
+		const subRunTask: typeof runAgentTask = (cwd, agents, agentName, subTask, opts, globalThinking) =>
+			baseRunTask(cwd, agents, agentName, preRead + subTask, opts, globalThinking);
 		const subResult = await executeTaskflow(subState, {
 			...deps,
+			// Override deps.cwd with the flow phase's own cwd so that sub-flow
+			// phases without an explicit cwd derive their subagents from the
+			// flow's cwd (not the caller's cwd).
+			cwd: phase.cwd ?? deps.cwd,
+			runTask: subRunTask,
 			_stack: [...stack, state.flowName],
 			persist: undefined,
 			onProgress: () => {
 				if (live) {
 					const ph = Object.values(subState.phases);
+					// B-F015: `done` must include both success and failure so the
+					// renderer's `done - failed` shows the true success count.
 					live.subProgress = {
-						done: ph.filter((p) => p.status === "done").length,
+						done: ph.filter((p) => p.status === "done" || p.status === "failed").length,
 						total: subDef.phases.length,
 						running: ph.filter((p) => p.status === "running").length,
 						failed: ph.filter((p) => p.status === "failed").length,
@@ -471,8 +583,11 @@ async function executePhase(
 			output: subResult.finalOutput,
 			json: parseJson ? safeParse(subResult.finalOutput) : undefined,
 			usage: subResult.totalUsage,
+			// B-F015: include failed in `done` so the renderer's
+			// `done - failed` formula gives the success count (matches the
+			// map/parallel runner's overlapping-counter convention).
 			subProgress: {
-				done: sp.filter((p) => p.status === "done").length,
+				done: sp.filter((p) => p.status === "done" || p.status === "failed").length,
 				total: subDef.phases.length,
 				running: 0,
 				failed: sp.filter((p) => p.status === "failed").length,
@@ -494,7 +609,7 @@ async function executePhase(
 /** Resolve a `{steps.x.json}`-style ref directly to its parsed value (bypassing stringify). */
 function directRef(over: string, state: RunState): unknown {
-	const m = over.match(/^\{steps\.([a-zA-Z0-9_]+)\.(output|json)(?:\.([a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)*))?\}$/);
+	const m = over.match(/^\{steps\.([a-zA-Z0-9_-]+)\.(output|json)(?:\.([a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*))?\}$/);
 	if (!m) return undefined;
 	const step = state.phases[m[1]];
 	if (!step || step.status !== "done") return undefined;
@@ -543,7 +658,10 @@ export function parseGateVerdict(output: string): { verdict: "pass" | "block"; r
 		if (typeof o.continue === "boolean") return { verdict: o.continue ? "pass" : "block", reason: asReason(o.reason) };
 		if (typeof o.pass === "boolean") return { verdict: o.pass ? "pass" : "block", reason: asReason(o.reason) };
 		if (typeof o.verdict === "string") {
-			const block = /block|fail|stop|reject|halt|\bno\b/i.test(o.verdict);
+			// Note: do NOT include standalone "no" — natural-language verdicts like
+			// "No issues found" / "no errors" would otherwise be false-positive BLOCK.
+			// Fail-open covers any ambiguous text.
+			const block = /block|fail|stop|reject|halt/i.test(o.verdict);
 			return { verdict: block ? "block" : "pass", reason: asReason(o.reason) };
 		}
 	}
@@ -560,11 +678,86 @@ function asReason(v: unknown): string | undefined {
 	return typeof v === "string" && v.trim() ? v.trim() : undefined;
 }
+/**
+ * Best-effort invocation of the user-provided `persist` + `onProgress` callbacks.
+ *
+ * A throw from a host-supplied callback must NEVER replace the runtime's
+ * outcome — neither the original crash message in `executeTaskflow`'s catch
+ * block, nor the final output of a successful run. Callbacks are observability
+ * hooks; the run survives their failure.
+ *
+ * Used at every "checkpoint" call site (phase start, phase end, terminal state).
+ * For high-frequency live updates inside a phase, see `safeProgress` below.
+ */
+function safeEmit(deps: RuntimeDeps, state: RunState): void {
+	try {
+		deps.persist?.(state);
+	} catch {
+		// user callback — must not break the run
+	}
+	try {
+		deps.onProgress?.(state);
+	} catch {
+		// user callback — must not break the run
+	}
+}
+/**
+ * Like `safeEmit` but for the high-frequency live-update channel only.
+ * Skips `persist` (which is intentionally checkpoint-only) and swallows any
+ * throw from the user-supplied `onProgress` so a misbehaving TUI sink cannot
+ * disrupt an in-flight phase.
+ */
+function safeProgress(deps: RuntimeDeps, state: RunState): void {
+	try {
+		deps.onProgress?.(state);
+	} catch {
+		// user callback — must not break the run
+	}
+}
 /**
  * Execute a full taskflow. Mutates and persists `state` as it progresses.
  */
+function ensureImplicitGate(def: Taskflow): void {
+	// Respect explicit opt-out
+	if ((def as any).implicitGate === false) return;
+	const hasGate = def.phases.some(
+		(p) => p.type === "gate" || p.type === "approval" || p.id === "_implicit-gate",
+	);
+	if (hasGate || def.phases.length === 0) return;
+	// The last existing phase is the effective "final" phase — pin it so the
+	// injected gate doesn't become the finalOutput.
+	const lastPhase = def.phases[def.phases.length - 1];
+	if (!lastPhase.final && !def.phases.some((p) => p.final)) {
+		lastPhase.final = true;
+	}
+	const allIds = def.phases.map((p) => p.id);
+	def.phases.push({
+		id: "_implicit-gate",
+		type: "gate",
+		dependsOn: allIds,
+		agent: "reviewer",
+		task: `Review all phase outputs from this taskflow for accuracy and consistency.
+For each upstream phase, scan its output for:
+1. **Factual accuracy**: Any file paths, line numbers, or code snippets that are wrong?
+2. **Internal contradictions**: Do any phases contradict each other?
+3. **Completeness**: Is any output truncated, empty, or anomalously short?
+4. **Hallucination markers**: Wrong file names, impossible line ranges, circular logic, information not in the given context.
+Output:
+- If ALL outputs look consistent and plausible: output **VERDICT: PASS** with a one-line summary.
+- If ANY issues found: output **VERDICT: BLOCK** listing each issue with the phase ID and specific concern.`,
+	});
+}
 export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
 	const def: Taskflow = state.def;
+	ensureImplicitGate(def);
 	try {
 		return await runTaskflowLayers(state, deps);
 	} catch (e) {
@@ -579,8 +772,7 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
 			}
 		}
 		state.status = "failed";
-		deps.persist?.(state);
-		deps.onProgress?.(state);
+		safeEmit(deps, state);
 		const totalUsage = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
 		return { state, finalOutput: `Taskflow '${def.name}' crashed: ${message}`, ok: false, totalUsage };
 	}
@@ -591,8 +783,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 	const layers = topoLayers(def.phases);
 	state.status = "running";
-	deps.persist?.(state);
-	deps.onProgress?.(state);
+	safeEmit(deps, state);
 	let aborted = false;
 	let gateBlocked = false;
@@ -650,8 +841,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 					endedAt: Date.now(),
 					usage: emptyUsage(),
 				};
-				deps.persist?.(state);
-				deps.onProgress?.(state);
+				safeEmit(deps, state);
 				return;
 			}
@@ -662,9 +852,9 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 				status: "running",
 				startedAt,
 			};
-			deps.onProgress?.(state);
+			safeProgress(deps, state);
-			const ps = await executePhase(phase, state, deps, prior, () => deps.onProgress?.(state));
+			const ps = await executePhase(phase, state, deps, prior, () => safeProgress(deps, state));
 			// Preserve the phase start time: executePhase returns a fresh PhaseState
 			// that omits startedAt (cached/resumed results carry their own).
 			state.phases[phase.id] = ps.startedAt ? ps : { ...ps, startedAt };
@@ -687,8 +877,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 				budgetBlocked = true;
 				budgetReason = ob.reason;
 			}
-			deps.persist?.(state);
-			deps.onProgress?.(state);
+			safeEmit(deps, state);
 		});
 	}
@@ -712,8 +901,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 			: anyFailed
 				? "failed"
 				: "completed";
-	deps.persist?.(state);
-	deps.onProgress?.(state);
+	safeEmit(deps, state);
 	let finalOutput = finalState?.output ?? "(no output)";
 	if (gateBlocked) {