npm - pi-taskflow - Versions diffs - 0.0.7 → 0.0.9 - Mend

pi-taskflow 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/examples/conditional-research.json +1 -1
package/examples/guarded-refactor.json +2 -2
package/extensions/agents.ts +54 -34
package/extensions/index.ts +25 -7
package/extensions/interpolate.ts +24 -3
package/extensions/render.ts +7 -3
package/extensions/runner.ts +35 -17
package/extensions/runs-view.ts +3 -0
package/extensions/runtime.ts +123 -19
package/extensions/schema.ts +22 -5
package/extensions/store.ts +30 -6
package/package.json +1 -1
package/skills/taskflow/SKILL.md +42 -10

package/examples/conditional-research.json CHANGED Viewed

@@ -47,7 +47,7 @@
       "id": "report",
       "type": "reduce",
       "from": ["review"],
-      "dependsOn": ["review"],
+      "dependsOn": ["review", "deep", "quick"],
       "agent": "doc-writer",
       "task": "Write a clean markdown brief on \"{args.topic}\" from the validated research:\n\n{steps.deep.output}{steps.quick.output}",
       "final": true

package/examples/guarded-refactor.json CHANGED Viewed

@@ -26,7 +26,7 @@
       "id": "implement",
       "type": "agent",
       "agent": "executor_code",
-      "dependsOn": ["approve"],
+      "dependsOn": ["approve", "plan"],
       "task": "Implement the approved plan for {args.target}.\nPlan:\n{steps.plan.output}\nExtra human guidance (if any):\n{steps.approve.output}",
       "retry": { "max": 1, "backoffMs": 1000 }
     },
@@ -41,7 +41,7 @@
       "id": "summary",
       "type": "reduce",
       "from": ["review"],
-      "dependsOn": ["review"],
+      "dependsOn": ["review", "implement"],
       "agent": "doc-writer",
       "task": "Write a short changelog entry summarizing what was done:\n\n{steps.implement.output}",
       "final": true

package/extensions/agents.ts CHANGED Viewed

@@ -44,42 +44,56 @@ function loadAgentsFromDir(dir: string, source: "user" | "project"): AgentConfig
 	}
 	for (const entry of entries) {
-		if (!entry.name.endsWith(".md")) continue;
-		if (!entry.isFile() && !entry.isSymbolicLink()) continue;
-		const filePath = path.join(dir, entry.name);
-		let content: string;
 		try {
-			content = fs.readFileSync(filePath, "utf-8");
-		} catch {
-			continue;
-		}
+			if (!entry.name.endsWith(".md")) continue;
+			if (!entry.isFile() && !entry.isSymbolicLink()) continue;
-		const { frontmatter, body } = (() => {
+			const filePath = path.join(dir, entry.name);
+			let content: string;
 			try {
-				return parseFrontmatter<Record<string, string>>(content);
+				content = fs.readFileSync(filePath, "utf-8");
 			} catch {
-				// A single malformed agent file must not break discovery for every flow.
-				return { frontmatter: {} as Record<string, string>, body: "" };
+				continue;
 			}
-		})();
-		if (!frontmatter.name || !frontmatter.description) continue;
-		const tools = frontmatter.tools
-			?.split(",")
-			.map((t) => t.trim())
-			.filter(Boolean);
-		agents.push({
-			name: frontmatter.name,
-			description: frontmatter.description,
-			tools: tools && tools.length > 0 ? tools : undefined,
-			model: frontmatter.model,
-			thinking: frontmatter.thinking,
-			systemPrompt: body,
-			source,
-			filePath,
-		});
+			const { frontmatter, body } = (() => {
+				try {
+					return parseFrontmatter<Record<string, unknown>>(content);
+				} catch {
+					// A single malformed agent file must not break discovery for every flow.
+					return { frontmatter: {} as Record<string, unknown>, body: "" };
+				}
+			})();
+			if (!frontmatter.name || !frontmatter.description) continue;
+			// frontmatter is YAML-parsed: tools may be a comma-separated string ("a, b")
+			// OR a YAML sequence ([a, b]). Handle both forms.
+			const rawTools = frontmatter.tools;
+			const tools: string[] | undefined = Array.isArray(rawTools)
+				? rawTools.map((t) => String(t).trim()).filter(Boolean)
+				: rawTools !== undefined && rawTools !== null
+					? String(rawTools)
+							.split(",")
+							.map((t) => t.trim())
+							.filter(Boolean)
+					: undefined;
+			agents.push({
+				name: String(frontmatter.name),
+				description: String(frontmatter.description),
+				tools: tools && tools.length > 0 ? tools : undefined,
+				model: frontmatter.model === undefined ? undefined : String(frontmatter.model),
+				thinking: frontmatter.thinking === undefined ? undefined : String(frontmatter.thinking),
+				systemPrompt: body,
+				source,
+				filePath,
+			});
+		} catch {
+			// Defense-in-depth: a single bad agent file must not break discovery
+			// for the entire flow (e.g. exotic YAML shapes, runtime errors in
+			// field access, symlink races, etc.).
+			continue;
+		}
 	}
 	return agents;
 }
@@ -128,9 +142,15 @@ export function discoverAgents(
 		for (const [name, override] of Object.entries(overrides)) {
 			const agent = agentMap.get(name);
 			if (agent) {
-				if (override.model !== undefined) agent.model = override.model;
-				if (override.thinking !== undefined) agent.thinking = override.thinking;
-				if (override.tools !== undefined) agent.tools = override.tools;
+				// Clone before mutating: agentMap owns the original AgentConfig
+				// (loaded from disk in loadAgentsFromDir). Mutating it in place
+				// would cause cross-contamination for any caller that retains a
+				// reference and invokes discoverAgents again with different overrides.
+				const mutated: AgentConfig = { ...agent };
+				if (override.model !== undefined) mutated.model = override.model;
+				if (override.thinking !== undefined) mutated.thinking = override.thinking;
+				if (override.tools !== undefined) mutated.tools = override.tools;
+				agentMap.set(name, mutated);
 			}
 		}
 	}

package/extensions/index.ts CHANGED Viewed

@@ -50,8 +50,8 @@ const ShorthandStep = Type.Object(
 );
 const TaskflowParams = Type.Object({
-	action: StringEnum(["run", "save", "resume", "list"] as const, {
-		description: "What to do: run a flow, save a definition, resume a paused run, or list saved flows",
+	action: StringEnum(["run", "save", "resume", "list", "agents"] as const, {
+		description: "What to do: run a flow, save a definition, resume a paused run, list saved flows, or list available agents you can use in phases",
 		default: "run",
 	}),
 	name: Type.Optional(Type.String({ description: "Name of a saved flow (for run/save without inline define)" })),
@@ -108,10 +108,6 @@ async function runFlow(
 	onUpdate: ((p: AgentToolResult<TaskflowDetails>) => void) | undefined,
 	existing?: RunState,
 ): Promise<RuntimeResult> {
-	const settings = readSubagentSettings();
-	const scope: AgentScope = def.agentScope ?? "user";
-	const { agents } = discoverAgents(ctx.cwd, scope, settings.agentOverrides);
 	const state = existing ?? makeRunState(def, args, ctx.cwd);
 	const emit = (s: RunState, finalOutput?: string) => {
@@ -166,6 +162,13 @@ async function runFlow(
 		: undefined;
 	try {
+		// Discover settings/agents inside try so a YAML/IO crash in
+		// discoverAgents or readSubagentSettings (F-001) is caught and
+		// the heartbeat timer is cleared by the finally block below.
+		const settings = readSubagentSettings();
+		const scope: AgentScope = def.agentScope ?? "user";
+		const { agents } = discoverAgents(ctx.cwd, scope, settings.agentOverrides);
 		const result = await executeTaskflow(state, {
 			cwd: ctx.cwd,
 			agents,
@@ -216,7 +219,7 @@ export default function (pi: ExtensionAPI) {
 			"Phases (agent, parallel, map, gate, reduce, approval, flow) form a DAG; intermediate outputs stay out of your context — only the final phase output is returned.",
 			"Use action=run with an inline `define` (you write the DSL) or a saved `name`.",
 			"For simple non-DAG delegations (like the subagent tool) skip the DSL: pass `task` (+optional `agent`) for one task, `tasks:[{task,agent?}]` to run in parallel, or `chain:[{task,agent?}]` to run sequentially (reference the prior step with {previous.output}).",
-			"Use action=save to persist a definition as a reusable /tf:<name> command. action=resume continues a paused run. action=list shows saved flows.",
+			"Use action=save to persist a definition as a reusable /tf:<name> command. action=resume continues a paused run. action=list shows saved flows. Use action=agents to list available agents — do NOT invent agent names; either use an agent from that list or omit the 'agent' field to auto-select the default agent.",
 			"DSL: {name, args?, concurrency?, budget?:{maxUSD,maxTokens}, phases:[{id, type, agent, task, dependsOn?, join?:'all'|'any', when?, retry?:{max,backoffMs,factor}, over?(map), as?(map), branches?(parallel), from?(reduce), use?(flow), with?(flow), output?:'json', final?}]}.",
 			"Phase types: agent (one subagent), parallel (static branches), map (dynamic fan-out over an array), gate (VERDICT: PASS/BLOCK quality gate), reduce (aggregate from N phases), approval (human-in-the-loop pause), flow (run a saved sub-flow). join:'any' is an OR-join; when is a conditional guard; retry adds backoff; budget caps run cost.",
 			"Interpolation: {args.X}, {steps.ID.output}, {steps.ID.json}, {item} (map), {previous.output}.",
@@ -232,6 +235,21 @@ export default function (pi: ExtensionAPI) {
 		async execute(_id, params, signal, onUpdate, ctx) {
 			const action = params.action ?? "run";
+			// agents — list available agents the LLM can use in phase definitions
+			if (action === "agents") {
+				const scope = params.scope ?? "both";
+				const { agents } = discoverAgents(ctx.cwd, scope as AgentScope, undefined);
+				const text = agents.length
+					? agents
+							.map(
+								(a) =>
+									`- ${a.name} (${a.source}): ${a.description}${a.model ? ` [model: ${a.model}]` : ""}${a.tools?.length ? ` [tools: ${a.tools.join(", ")}]` : ""}`,
+							)
+							.join("\n")
+					: "No agents found. Use the default agent by omitting the 'agent' field in phases.";
+				return { content: [{ type: "text", text }], details: { action } satisfies TaskflowDetails };
+			}
 			// list
 			if (action === "list") {
 				const flows = listFlows(ctx.cwd);

package/extensions/interpolate.ts CHANGED Viewed

@@ -27,10 +27,13 @@ export interface InterpolationResult {
 	missing: string[];
 }
-export function interpolate(template: string, ctx: InterpolationContext): InterpolationResult {
+export function interpolate(
+	template: string | null | undefined,
+	ctx: InterpolationContext,
+): InterpolationResult {
 	const missing: string[] = [];
-	const text = template.replace(PLACEHOLDER, (whole, path: string) => {
+	const text = String(template ?? "").replace(PLACEHOLDER, (whole, path: string) => {
 		const value = resolvePath(path, ctx);
 		if (value === undefined) {
 			missing.push(path);
@@ -134,6 +137,24 @@ export function safeParse(text: string): unknown {
 			}
 		}
 	}
+	// Anti-pattern detection (v0.0.8.1): array followed by a stray top-level
+	// "key": value. A common LLM mistake — the model appends
+	// `"deferred": [...]` after a JSON array, producing a non-JSON hybrid that
+	// none of the above strategies can recover. We surface a diagnostic hint
+	// so flow authors can spot the bug fast.
+	//
+	// We check the original (trimmed) input rather than the slice tail,
+	// because `lastIndexOf(close)` lands on the *last* bracket — for the
+	// anti-pattern the stray key is between the array's `]` and the trailing
+	// `]`, not after the last one.
+	if (/]\s*[\},]?\s*"[^"\n]+"\s*:/.test(trimmed)) {
+		console.warn(
+			"[pi-taskflow safeParse] input looks like a JSON array followed by a stray top-level key " +
+				`(pattern: [{...}], "key": ...). This is not valid JSON. ` +
+				`Hint: put extra data as array members (e.g. {"id":"D-001","status":"deferred",...}) ` +
+				`or split into a separate phase.`,
+		);
+	}
 	return undefined;
 }
@@ -142,7 +163,7 @@ export function coerceArray(value: unknown): unknown[] | null {
 	if (Array.isArray(value)) return value;
 	if (value && typeof value === "object") {
 		// {items: [...]} or {results: [...]} convenience
-		for (const key of ["items", "results", "list", "data"]) {
+		for (const key of ["items", "results", "list", "data", "findings"]) {
 			const v = (value as Record<string, unknown>)[key];
 			if (Array.isArray(v)) return v;
 		}

package/extensions/render.ts CHANGED Viewed

@@ -71,7 +71,10 @@ function agentRole(phase: Phase, ps: PhaseState | undefined, theme: Theme): stri
 function costStr(usage: UsageStats | undefined, theme: Theme): string {
 	if (!usage?.cost) return "";
-	return theme.fg("muted", `$${usage.cost.toFixed(3)}`);
+	const c = usage.cost;
+	return c >= 0.01
+		? theme.fg("muted", `$${c.toFixed(2)}`)
+		: theme.fg("muted", `$${c.toFixed(4)}`);
 }
 function aggregateCost(state: RunState): number {
@@ -174,6 +177,7 @@ function phaseDetail(phase: Phase, ps: PhaseState | undefined, theme: Theme): st
 		const color = d === "reject" ? "error" : d === "edit" ? "warning" : "success";
 		let a = theme.fg("warning", "⚠") + " " + theme.fg(color as Parameters<typeof theme.fg>[0], theme.bold(d.toUpperCase()));
 		if (ps.approval.auto) a += theme.fg("dim", " auto");
+		if (cost) a += `  ${cost}`;
 		if (time) a += `  ${time}`;
 		if (ps.warnings?.length) a += theme.fg("warning", `  ⚠${ps.warnings.length}`);
 		return a;
@@ -228,8 +232,8 @@ function headerLine(state: RunState, theme: Theme): string {
 	if (state.status === "blocked") line += theme.fg("error", " · blocked");
 	const cost = aggregateCost(state);
 	const budget = state.def.budget;
-	if (budget?.maxUSD !== undefined) line += theme.fg("muted", ` · $${cost.toFixed(3)}/$${budget.maxUSD}`);
-	else if (cost) line += theme.fg("muted", ` · $${cost.toFixed(3)}`);
+	if (budget?.maxUSD !== undefined) line += theme.fg("muted", ` · $${cost >= 0.01 ? cost.toFixed(2) : cost.toFixed(4)}/$${budget.maxUSD}`);
+	else if (cost) line += theme.fg("muted", ` · $${cost >= 0.01 ? cost.toFixed(2) : cost.toFixed(4)}`);
 	const el = runElapsed(state);
 	if (el) line += theme.fg("dim", ` · ${elapsed(el)}`);
 	return line;

package/extensions/runner.ts CHANGED Viewed

@@ -203,14 +203,10 @@ function summarizeToolCall(name: string, args: Record<string, unknown>): string
 	}
 }
-async function writePromptToTempFile(agentName: string, prompt: string): Promise<{ dir: string; filePath: string }> {
-	const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
-	const safeName = agentName.replace(/[^\w.-]+/g, "_");
-	const filePath = path.join(tmpDir, `prompt-${safeName}.md`);
+async function writePromptToTempFile(filePath: string, prompt: string): Promise<void> {
 	await withFileMutationQueue(filePath, async () => {
 		await fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 });
 	});
-	return { dir: tmpDir, filePath };
 }
 function getPiInvocation(args: string[]): { command: string; args: string[] } {
@@ -284,9 +280,13 @@ export async function runAgentTask(
 	try {
 		if (agent.systemPrompt.trim()) {
-			const tmp = await writePromptToTempFile(agent.name, agent.systemPrompt);
-			tmpPromptDir = tmp.dir;
-			tmpPromptPath = tmp.filePath;
+			// Allocate the temp dir + path BEFORE any fallible I/O so that if
+			// writeFile throws, tmpPromptDir/tmpPromptPath are already set and
+			// the finally block can clean up the directory (F-004).
+			tmpPromptDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
+			const safeName = agent.name.replace(/[^\w.-]+/g, "_");
+			tmpPromptPath = path.join(tmpPromptDir, `prompt-${safeName}.md`);
+			await writePromptToTempFile(tmpPromptPath, agent.systemPrompt);
 			args.push("--append-system-prompt", tmpPromptPath);
 		}
 		args.push(`Task: ${task}`);
@@ -319,15 +319,25 @@ export async function runAgentTask(
 				if (buffer.trim()) processLine(buffer);
 				resolve(code ?? 0);
 			});
-			proc.on("error", () => resolve(1));
+			proc.on("error", (err) => {
+				if (!result.stderr) result.stderr = err.message;
+				if (!result.errorMessage) result.errorMessage = err.message;
+				resolve(1);
+			});
 			if (opts.signal) {
 				const kill = () => {
 					wasAborted = true;
 					proc.kill("SIGTERM");
-					setTimeout(() => {
-						if (!proc.killed) proc.kill("SIGKILL");
-					}, 5000);
+					// Force-kill fallback. proc.kill("SIGKILL") is idempotent if
+					// the process already exited, and `proc.killed` is set true
+					// synchronously by the SIGTERM above — so the previous
+					// `if (!proc.killed)` guard would skip SIGKILL entirely,
+					// hanging forever on a child that ignores SIGTERM.
+					// .unref() keeps the timer from holding the event loop open
+					// after the process is gone.
+					const forceKill = setTimeout(() => proc.kill("SIGKILL"), 5000);
+					forceKill.unref();
 				};
 				if (opts.signal.aborted) kill();
 				else opts.signal.addEventListener("abort", kill, { once: true });
@@ -349,12 +359,20 @@ export async function runAgentTask(
 		// `output`: upstream providers (e.g. a Cloudflare challenge page) can
 		// surface huge HTML/JSON in errorMessage, and that garbage would
 		// otherwise flow into downstream phase interpolations.
-		if (isFailed(result) && !result.output) {
-			result.output = TRANSPORT_ERROR_PLACEHOLDER;
-			if (!result.errorMessage) {
-				result.errorMessage = result.stderr || `Subagent exited with code ${result.exitCode} (stopReason: ${result.stopReason ?? "unknown"})`;
+		// Sanitization must run whenever the run failed, even if some output
+		// was already emitted (e.g. crash mid-stream with a partial result):
+		// an unsanitized errorMessage would still leak into PhaseState and
+		// downstream interpolation contexts. (F-013)
+		if (isFailed(result)) {
+			if (!result.output) {
+				result.output = TRANSPORT_ERROR_PLACEHOLDER;
+				if (!result.errorMessage) {
+					result.errorMessage = result.stderr || `Subagent exited with code ${result.exitCode} (stopReason: ${result.stopReason ?? "unknown"})`;
+				}
+			}
+			if (result.errorMessage) {
+				result.errorMessage = sanitizeErrorMessage(result.errorMessage);
 			}
-			result.errorMessage = sanitizeErrorMessage(result.errorMessage);
 		}
 		return result;
 	} finally {

package/extensions/runs-view.ts CHANGED Viewed

@@ -50,6 +50,9 @@ export class RunHistoryComponent {
 	private cachedLines?: string[];
 	constructor(runs: RunState[], theme: Theme, onDone: (result?: RunHistoryResult) => void) {
+		if (!runs.length) {
+			throw new Error("RunHistoryComponent requires at least one run");
+		}
 		this.runs = runs;
 		this.theme = theme;
 		this.onDone = onDone;

package/extensions/runtime.ts CHANGED Viewed

@@ -414,11 +414,12 @@ async function executePhase(
 	if (type === "agent" || type === "gate" || type === "reduce") {
 		const { text } = interpolate(phase.task ?? "", ctx);
 		const fullTask = preRead + text;
-		const inputHash = hashInput(phase.id, phase.agent ?? "", fullTask);
+		const agentName = resolveAgent(phase.agent, deps, state);
+		const inputHash = hashInput(phase.id, agentName, fullTask);
 		const cached = cachedPhase(prior, inputHash);
 		if (cached) return cached;
-		const r = await runOne(phase.agent ?? defaultAgent(deps), fullTask, liveSink(state, phase.id, emitProgress));
+		const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress));
 		const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
 		if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
 		return ps;
@@ -428,7 +429,7 @@ async function executePhase(
 		const branches = (phase.branches ?? []).map((b) => {
 			const r = interpolate(b.task, ctx);
 			return {
-				agent: b.agent ?? phase.agent ?? defaultAgent(deps),
+				agent: resolveAgent(b.agent ?? phase.agent, deps, state),
 				task: preRead + r.text,
 			};
 		});
@@ -458,7 +459,7 @@ async function executePhase(
 		const tasks = arr.map((item) => {
 			const localCtx = buildInterpolationContext(state, previousOutput, { [loopVar]: item });
 			return {
-				agent: phase.agent ?? defaultAgent(deps),
+				agent: resolveAgent(phase.agent, deps, state),
 				task: preRead + interpolate(phase.task ?? "", localCtx).text,
 			};
 		});
@@ -551,14 +552,20 @@ async function executePhase(
 			baseRunTask(cwd, agents, agentName, preRead + subTask, opts, globalThinking);
 		const subResult = await executeTaskflow(subState, {
 			...deps,
+			// Override deps.cwd with the flow phase's own cwd so that sub-flow
+			// phases without an explicit cwd derive their subagents from the
+			// flow's cwd (not the caller's cwd).
+			cwd: phase.cwd ?? deps.cwd,
 			runTask: subRunTask,
 			_stack: [...stack, state.flowName],
 			persist: undefined,
 			onProgress: () => {
 				if (live) {
 					const ph = Object.values(subState.phases);
+					// B-F015: `done` must include both success and failure so the
+					// renderer's `done - failed` shows the true success count.
 					live.subProgress = {
-						done: ph.filter((p) => p.status === "done").length,
+						done: ph.filter((p) => p.status === "done" || p.status === "failed").length,
 						total: subDef.phases.length,
 						running: ph.filter((p) => p.status === "running").length,
 						failed: ph.filter((p) => p.status === "failed").length,
@@ -577,8 +584,11 @@ async function executePhase(
 			output: subResult.finalOutput,
 			json: parseJson ? safeParse(subResult.finalOutput) : undefined,
 			usage: subResult.totalUsage,
+			// B-F015: include failed in `done` so the renderer's
+			// `done - failed` formula gives the success count (matches the
+			// map/parallel runner's overlapping-counter convention).
 			subProgress: {
-				done: sp.filter((p) => p.status === "done").length,
+				done: sp.filter((p) => p.status === "done" || p.status === "failed").length,
 				total: subDef.phases.length,
 				running: 0,
 				failed: sp.filter((p) => p.status === "failed").length,
@@ -632,6 +642,27 @@ function cachedPhase(prior: PhaseState | undefined, inputHash: string): PhaseSta
 	return null;
 }
+/**
+ * Resolve an agent name against available agents. Falls back to the default
+ * agent if the requested agent isn't found, logging a warning via safeEmit.
+ */
+function resolveAgent(name: string | undefined, deps: RuntimeDeps, state: RunState): string {
+	const resolved = name ?? defaultAgent(deps);
+	if (name && !deps.agents.some((a) => a.name === name)) {
+		const fallback = defaultAgent(deps);
+		// Log only once per run to avoid noise.
+		if (!(state as any).__unknownAgentWarned) {
+			(state as any).__unknownAgentWarned = new Set<string>();
+		}
+		if (!(state as any).__unknownAgentWarned.has(name)) {
+			(state as any).__unknownAgentWarned.add(name);
+			console.warn(`[taskflow] Unknown agent "${name}", falling back to "${fallback}". Use action=agents to list available agents.`);
+		}
+		return fallback;
+	}
+	return resolved;
+}
 function defaultAgent(deps: RuntimeDeps): string {
 	return deps.agents[0]?.name ?? "default";
 }
@@ -649,7 +680,10 @@ export function parseGateVerdict(output: string): { verdict: "pass" | "block"; r
 		if (typeof o.continue === "boolean") return { verdict: o.continue ? "pass" : "block", reason: asReason(o.reason) };
 		if (typeof o.pass === "boolean") return { verdict: o.pass ? "pass" : "block", reason: asReason(o.reason) };
 		if (typeof o.verdict === "string") {
-			const block = /block|fail|stop|reject|halt|\bno\b/i.test(o.verdict);
+			// Note: do NOT include standalone "no" — natural-language verdicts like
+			// "No issues found" / "no errors" would otherwise be false-positive BLOCK.
+			// Fail-open covers any ambiguous text.
+			const block = /block|fail|stop|reject|halt/i.test(o.verdict);
 			return { verdict: block ? "block" : "pass", reason: asReason(o.reason) };
 		}
 	}
@@ -666,11 +700,86 @@ function asReason(v: unknown): string | undefined {
 	return typeof v === "string" && v.trim() ? v.trim() : undefined;
 }
+/**
+ * Best-effort invocation of the user-provided `persist` + `onProgress` callbacks.
+ *
+ * A throw from a host-supplied callback must NEVER replace the runtime's
+ * outcome — neither the original crash message in `executeTaskflow`'s catch
+ * block, nor the final output of a successful run. Callbacks are observability
+ * hooks; the run survives their failure.
+ *
+ * Used at every "checkpoint" call site (phase start, phase end, terminal state).
+ * For high-frequency live updates inside a phase, see `safeProgress` below.
+ */
+function safeEmit(deps: RuntimeDeps, state: RunState): void {
+	try {
+		deps.persist?.(state);
+	} catch {
+		// user callback — must not break the run
+	}
+	try {
+		deps.onProgress?.(state);
+	} catch {
+		// user callback — must not break the run
+	}
+}
+/**
+ * Like `safeEmit` but for the high-frequency live-update channel only.
+ * Skips `persist` (which is intentionally checkpoint-only) and swallows any
+ * throw from the user-supplied `onProgress` so a misbehaving TUI sink cannot
+ * disrupt an in-flight phase.
+ */
+function safeProgress(deps: RuntimeDeps, state: RunState): void {
+	try {
+		deps.onProgress?.(state);
+	} catch {
+		// user callback — must not break the run
+	}
+}
 /**
  * Execute a full taskflow. Mutates and persists `state` as it progresses.
  */
+function ensureImplicitGate(def: Taskflow): void {
+	// Respect explicit opt-out
+	if ((def as any).implicitGate === false) return;
+	const hasGate = def.phases.some(
+		(p) => p.type === "gate" || p.type === "approval" || p.id === "_implicit-gate",
+	);
+	if (hasGate || def.phases.length === 0) return;
+	// The last existing phase is the effective "final" phase — pin it so the
+	// injected gate doesn't become the finalOutput.
+	const lastPhase = def.phases[def.phases.length - 1];
+	if (!lastPhase.final && !def.phases.some((p) => p.final)) {
+		lastPhase.final = true;
+	}
+	const allIds = def.phases.map((p) => p.id);
+	def.phases.push({
+		id: "_implicit-gate",
+		type: "gate",
+		dependsOn: allIds,
+		agent: "reviewer",
+		task: `Review all phase outputs from this taskflow for accuracy and consistency.
+For each upstream phase, scan its output for:
+1. **Factual accuracy**: Any file paths, line numbers, or code snippets that are wrong?
+2. **Internal contradictions**: Do any phases contradict each other?
+3. **Completeness**: Is any output truncated, empty, or anomalously short?
+4. **Hallucination markers**: Wrong file names, impossible line ranges, circular logic, information not in the given context.
+Output:
+- If ALL outputs look consistent and plausible: output **VERDICT: PASS** with a one-line summary.
+- If ANY issues found: output **VERDICT: BLOCK** listing each issue with the phase ID and specific concern.`,
+	});
+}
 export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
 	const def: Taskflow = state.def;
+	ensureImplicitGate(def);
 	try {
 		return await runTaskflowLayers(state, deps);
 	} catch (e) {
@@ -685,8 +794,7 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
 			}
 		}
 		state.status = "failed";
-		deps.persist?.(state);
-		deps.onProgress?.(state);
+		safeEmit(deps, state);
 		const totalUsage = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
 		return { state, finalOutput: `Taskflow '${def.name}' crashed: ${message}`, ok: false, totalUsage };
 	}
@@ -697,8 +805,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 	const layers = topoLayers(def.phases);
 	state.status = "running";
-	deps.persist?.(state);
-	deps.onProgress?.(state);
+	safeEmit(deps, state);
 	let aborted = false;
 	let gateBlocked = false;
@@ -756,8 +863,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 					endedAt: Date.now(),
 					usage: emptyUsage(),
 				};
-				deps.persist?.(state);
-				deps.onProgress?.(state);
+				safeEmit(deps, state);
 				return;
 			}
@@ -768,9 +874,9 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 				status: "running",
 				startedAt,
 			};
-			deps.onProgress?.(state);
+			safeProgress(deps, state);
-			const ps = await executePhase(phase, state, deps, prior, () => deps.onProgress?.(state));
+			const ps = await executePhase(phase, state, deps, prior, () => safeProgress(deps, state));
 			// Preserve the phase start time: executePhase returns a fresh PhaseState
 			// that omits startedAt (cached/resumed results carry their own).
 			state.phases[phase.id] = ps.startedAt ? ps : { ...ps, startedAt };
@@ -793,8 +899,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 				budgetBlocked = true;
 				budgetReason = ob.reason;
 			}
-			deps.persist?.(state);
-			deps.onProgress?.(state);
+			safeEmit(deps, state);
 		});
 	}
@@ -818,8 +923,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 			: anyFailed
 				? "failed"
 				: "completed";
-	deps.persist?.(state);
-	deps.onProgress?.(state);
+	safeEmit(deps, state);
 	let finalOutput = finalState?.output ?? "(no output)";
 	if (gateBlocked) {

package/extensions/schema.ts CHANGED Viewed

@@ -147,6 +147,12 @@ export const TaskflowSchema = Type.Object(
 			}),
 		),
 		phases: Type.Array(PhaseSchema, { minItems: 1, description: "Ordered phase definitions (DAG via dependsOn)" }),
+		implicitGate: Type.Optional(
+			Type.Boolean({
+				description: "When true (default), a reviewer gate is auto-injected after all phases if no explicit gate or approval exists",
+				default: true,
+			}),
+		),
 	},
 	{ additionalProperties: false },
 );
@@ -184,7 +190,11 @@ export function isShorthand(def: unknown): boolean {
 	if (typeof def !== "object" || def === null) return false;
 	const d = def as Record<string, unknown>;
 	if (Array.isArray(d.phases)) return false;
-	return Array.isArray(d.chain) || Array.isArray(d.tasks) || typeof d.task === "string";
+	return (
+		(Array.isArray(d.chain) && d.chain.length > 0) ||
+		(Array.isArray(d.tasks) && d.tasks.length > 0) ||
+		typeof d.task === "string"
+	);
 }
 function readStep(s: unknown): ShorthandStep {
@@ -355,20 +365,27 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
 	const finals = (flow.phases as Phase[]).filter((p) => p?.final);
 	if (finals.length > 1) errors.push(`Only one phase may be marked 'final' (found ${finals.length})`);
-	// --- Soft warnings: {steps.X.*} references that aren't declared deps -------
+	// --- Hard errors: {steps.X.*} references that aren't declared deps ------
 	// Catches the most common authoring mistake: the task talks about
 	// `{steps.review.output}` but `dependsOn: ["review"]` is missing, so the
 	// phase runs in parallel with `review` and the model sees the literal
-	// placeholder string. The runtime can't infer the intent.
+	// placeholder string. The runtime can't infer the intent — fail fast at
+	// validation time so the mistake is caught before the run starts.
+	//
+	// Phases with `join: "any"` are exempt: by design they only need ONE of
+	// their declared deps to complete, and may reference other phases as
+	// informational context (not as true dependencies).
 	if (errors.length === 0) {
 		const idToPhase = new Map((flow.phases as Phase[]).map((p) => [p.id, p]));
 		for (const p of flow.phases as Phase[]) {
 			if (!p?.id) continue;
+			const isJoinAny = p.join === "any";
+			if (isJoinAny) continue;
 			const deps = new Set(dependenciesOf(p));
 			const refs = collectRefs(p);
 			for (const ref of refs.steps) {
 				if (ref === p.id) {
-					warnings.push(`Phase '${p.id}': references its own output via {steps.${ref}.*}; this is almost always a bug.`);
+					errors.push(`Phase '${p.id}': references its own output via {steps.${ref}.*}; this is almost always a bug.`);
 					continue;
 				}
 				if (!idToPhase.has(ref)) {
@@ -378,7 +395,7 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
 					continue;
 				}
 				if (!deps.has(ref)) {
-					warnings.push(
+					errors.push(
 						`Phase '${p.id}': task references {steps.${ref}.*} but '${ref}' is not in dependsOn. ` +
 							`The phase will run in parallel with '${ref}' and see the literal placeholder. ` +
 							`Add "dependsOn": ["${ref}"] (or include '${ref}' transitively).`,

package/extensions/store.ts CHANGED Viewed

@@ -8,6 +8,7 @@
 import * as crypto from "node:crypto";
 import * as fs from "node:fs";
+import * as os from "node:os";
 import * as path from "node:path";
 import { getAgentDir } from "@earendil-works/pi-coding-agent";
 import type { Taskflow } from "./schema.ts";
@@ -69,12 +70,20 @@ function userFlowsDir(): string {
 	return path.join(getAgentDir(), "taskflows");
 }
-function findProjectFlowsDir(cwd: string, create = false): string | null {
+function findProjectFlowsDirInternal(cwd: string, create = false): string | null {
 	// Prefer an existing .pi dir up the tree; else use cwd/.pi when creating.
+	// **Never treat `~/.pi/` as a project flow dir** — the home directory is
+	// the user-scope boundary, and the user's `~/.pi/` is the agent dir, not a
+	// project. We skip the home entry entirely during the walk-up, so even a
+	// deeply nested cwd under home will return null (create=false) when no
+	// project `.pi` exists on the path.
+	const home = os.homedir();
 	let dir = cwd;
 	while (true) {
-		const candidate = path.join(dir, ".pi");
-		if (fs.existsSync(candidate)) return path.join(candidate, "taskflows");
+		if (dir !== home) {
+			const candidate = path.join(dir, ".pi");
+			if (fs.existsSync(candidate)) return path.join(candidate, "taskflows");
+		}
 		const parent = path.dirname(dir);
 		if (parent === dir) break;
 		dir = parent;
@@ -94,6 +103,11 @@ function readFlowFile(filePath: string, scope: "user" | "project"): SavedFlow |
 }
 /** List all saved flows (project overrides user on name collision). */
+/** Internal-but-exported for tests: walk-up `.pi` finder with home-dir stop. */
+export function findProjectFlowsDir(cwd: string, create = false): string | null {
+	return findProjectFlowsDirInternal(cwd, create);
+}
 export function listFlows(cwd: string): SavedFlow[] {
 	const map = new Map<string, SavedFlow>();
 	const dirs: Array<{ dir: string; scope: "user" | "project" }> = [{ dir: userFlowsDir(), scope: "user" }];
@@ -149,8 +163,11 @@ export function newRunId(flowName: string): string {
 export function saveRun(state: RunState): void {
 	const dir = runsDir(state.cwd);
 	fs.mkdirSync(dir, { recursive: true });
-	state.updatedAt = Date.now();
-	writeFileAtomic(path.join(dir, `${state.runId}.json`), JSON.stringify(state, null, 2));
+	// Clone before stamping updatedAt so the caller's RunState reference is not
+	// mutated as a hidden side effect (v0.0.6 audit, F-009). Shallow clone is
+	// sufficient: saveRun only serializes; it does not mutate nested objects.
+	const toSave = { ...state, updatedAt: Date.now() };
+	writeFileAtomic(path.join(dir, `${state.runId}.json`), JSON.stringify(toSave, null, 2));
 }
 export function loadRun(cwd: string, runId: string): RunState | null {
@@ -219,7 +236,14 @@ export function listRuns(cwd: string, limit = 20): RunState[] {
 			/* ignore */
 		}
 	}
-	return runs.sort((a, b) => b.updatedAt - a.updatedAt).slice(0, limit);
+	// Guard against records missing/with non-numeric `updatedAt` — a bare
+	// `JSON.parse` may yield an object without it, and `undefined - undefined`
+	// is NaN, which makes `Array.prototype.sort` produce implementation-defined
+	// order. Drop those before sorting. (v0.0.8 audit, F-010.)
+	return runs
+		.filter((r) => typeof r.updatedAt === "number" && !Number.isNaN(r.updatedAt))
+		.sort((a, b) => b.updatedAt - a.updatedAt)
+		.slice(0, limit);
 }
 /** Stable hash of a phase's resolved task + inputs, for resume caching. */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-taskflow",
-  "version": "0.0.7",
+  "version": "0.0.9",
   "description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
   "keywords": [
     "pi-package",

package/skills/taskflow/SKILL.md CHANGED Viewed

@@ -172,6 +172,36 @@ Review the audit results below. If any endpoint is missing auth, end with
 {steps.audit.output}
 ```
+### Structured-verify phases (v0.0.8.1)
+A "verify" phase typically runs `npx tsc --noEmit && npm test && git diff --stat`
+and reports whether everything is green. **Don't** delegate this to a generic
+verifier subagent that summarizes the output in prose — LLMs commonly misread
+shell output (e.g., 234 tests reported as 230, 745 insertions as 599, "1 type
+error" reported as "clean"). Instead, **use a dedicated agent whose task is a
+structured shell pipeline** that echoes structured key/value lines the next
+phase can parse directly. Recommended pattern:
+```jsonc
+{
+  "id": "verify",
+  "type": "agent",
+  "agent": "verifier",
+  "dependsOn": ["apply-fixes"],
+  "task": "Run the verification pipeline and report structured results.\n\nExecute:\n```bash\ncd $REPO && npx tsc --noEmit 2>&1 | tee /tmp/tsc.log\ncd $REPO && npm test 2>&1 | tee /tmp/test.log | tail -10\ncd $REPO && git diff --shortstat HEAD | tee /tmp/diff.log\n```\n\nReport EXACTLY in this format (one key=value pair per line, no prose):\ntypecheck=PASS|FAIL\ntests_total=N\ntests_pass=N\ntests_fail=N\ninsertions=N\ndeletions=N\nfiles_changed=N\n\nIf any field is missing, you failed the task — re-run the command and re-read the output.",
+  "tools": ["read", "edit", "write", "bash"]
+}
+```
+The key insight: **LLMs are bad at summarizing shell output, good at copying
+structured data**. Asking for `key=value` pairs with explicit fields and "if
+missing, you failed" forces the agent to read each field carefully. Downstream
+phases that consume `{steps.verify.output}` can then `safeParse`-it into a
+JSON object and assert against expected values.
+For audits where the upstream is LLM-generated prose (not shell output), use a
+plain `gate` phase with `VERDICT:` instead.
 ### Interpolation
 - `{args.X}` — invocation argument
@@ -188,12 +218,11 @@ Review the audit results below. If any endpoint is missing auth, end with
 3. Reference upstream results explicitly with `{steps.ID...}` and set `dependsOn`.
 4. Mark the result-bearing phase with `"final": true` (else the last phase wins).
-## Common mistakes (the runtime will warn you, but don't trip them)
+## Common mistakes (the runtime will reject these at validation time)
-The runtime validates your flow at startup and at each phase's interpolation.
-Two patterns account for ~all the broken runs in the wild — avoid them. If you
-want warnings like these to become hard failures, set `"strictInterpolation": true`
-on the flow.
+The runtime validates your flow at startup. As of v0.0.8.1, the two most
+common authoring mistakes below are **hard validation errors** (the flow
+refuses to start). Fix the flow before running it.
 ### 1. Referencing `{steps.X}` without `dependsOn: ["X"]`
@@ -209,10 +238,9 @@ on the flow.
 }
 ```
-The runtime logs a warning at run start (`Phase 'fix-issues': task references
-{steps.code-review-1.*} but 'code-review-1' is not in dependsOn`) and the phase
-itself gets a `warnings` field with a non-fatal `unresolved placeholders` line.
-The TUI shows a `⚠N` badge. **Always declare the chain:**
+Validation now rejects this with: `Phase 'fix-issues': task references
+{steps.code-review-1.*} but 'code-review-1' is not in dependsOn. ...`
+**Always declare the chain:**
 ```jsonc
 // ✅ RIGHT
@@ -233,7 +261,11 @@ The TUI shows a `⚠N` badge. **Always declare the chain:**
 Tip: write the `task` first (it tells you what each phase needs), then scan for
 `{steps.*}` references and add the matching `dependsOn`. If a phase truly does
-not depend on anything in its task, you can ignore the warning.
+not depend on anything in its task, you can omit the reference.
+Exception: phases with `join: "any"` are exempt from this check, since they
+deliberately wait for only one of their declared deps to complete and may
+reference others as informational context.
 ### 2. Assuming the runtime knows "this is a chain"