npm - pi-taskflow - Versions diffs - 0.0.16 → 0.0.17 - Mend

pi-taskflow 0.0.16 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +2 -12
package/README.zh-CN.md +2 -12
package/extensions/agents.ts +13 -37
package/extensions/cache.ts +5 -1
package/extensions/index.ts +68 -15
package/extensions/interpolate.ts +32 -5
package/extensions/render.ts +2 -2
package/extensions/runner.ts +38 -2
package/extensions/runs-view.ts +2 -2
package/extensions/runtime.ts +56 -9
package/extensions/schema.ts +1 -1
package/extensions/store.ts +34 -13
package/extensions/verify.ts +11 -0
package/package.json +1 -1
package/skills/taskflow/SKILL.md +1 -1
package/skills/taskflow/configuration.md +10 -11
package/DESIGN.md +0 -338

package/README.md CHANGED Viewed

@@ -522,19 +522,9 @@ Your choices are written to `~/.pi/agent/settings.json`:
 }
 ```
-Edit the values manually any time, or just re-run `/tf init`. You can also override individual agents via `subagents.agentOverrides` in the same file:
+Edit the values manually any time, or just re-run `/tf init`.
-```json
-{
-  "modelRoles": { ... },
-  "subagents": {
-    "agentOverrides": {
-      "executor": { "model": "anthropic/claude-sonnet-4-20250514" },
-      "reviewer": { "thinking": "xhigh" }
-    }
-  }
-}
-```
+To customize a specific agent's model or thinking without changing `modelRoles`, create an agent file at `~/.pi/agent/agents/<name>.md` with the desired overrides in the YAML frontmatter.
 ### Tool path (`action="init"`)

package/README.zh-CN.md CHANGED Viewed

@@ -524,19 +524,9 @@ Taskflow 自带 **18 个内置代理**——每个代理是一个 `.md` 文件
 }
 ```
-随时手动编辑这些值，或重新运行 `/tf init`。你也可以通过同级文件中的 `subagents.agentOverrides` 覆盖单个代理：
+随时手动编辑这些值，或重新运行 `/tf init`。
-```json
-{
-  "modelRoles": { ... },
-  "subagents": {
-    "agentOverrides": {
-      "executor": { "model": "anthropic/claude-sonnet-4-20250514" },
-      "reviewer": { "thinking": "xhigh" }
-    }
-  }
-}
-```
+若需自定义特定代理的模型或 thinking 而不修改 `modelRoles`，可在 `~/.pi/agent/agents/<name>.md` 创建代理文件，在 YAML frontmatter 中覆盖。
 ### 工具路径（`action="init"`）

package/extensions/agents.ts CHANGED Viewed

@@ -21,7 +21,7 @@ export interface TaskflowSettings {
 	maxRunAgeDays: number;
 }
-import { DEFAULT_KEPT_RUNS, DEFAULT_RUN_AGE_DAYS } from "./store.ts";
+import { DEFAULT_KEPT_RUNS, DEFAULT_RUN_AGE_DAYS, writeFileAtomic } from "./store.ts";
 export const DEFAULT_TASKFLOW_SETTINGS: TaskflowSettings = {
 	builtInAgents: true,
@@ -63,12 +63,6 @@ export function shouldSyncBuiltinAgentsToProject(settings: TaskflowSettings = DE
 	return settings.builtInAgents && settings.syncBuiltinAgentsToProject;
 }
-export interface AgentOverride {
-	model?: string;
-	thinking?: string;
-	tools?: string[];
-}
 export interface AgentConfig {
 	name: string;
 	description: string;
@@ -120,16 +114,18 @@ function loadAgentsFromDir(dir: string, source: "user" | "project" | "built-in")
 			if (!frontmatter.name || !frontmatter.description) continue;
 			// frontmatter is YAML-parsed: tools may be a comma-separated string ("a, b")
-			// OR a YAML sequence ([a, b]). Handle both forms.
+			// OR a YAML sequence ([a, b]). Handle both forms; reject other types to
+			// prevent garbage output from malformed YAML (e.g. boolean, number).
 			const rawTools = frontmatter.tools;
-			const tools: string[] | undefined = Array.isArray(rawTools)
-				? rawTools.map((t) => String(t).trim()).filter(Boolean)
-				: rawTools !== undefined && rawTools !== null
-					? String(rawTools)
-							.split(",")
-							.map((t) => t.trim())
-							.filter(Boolean)
-					: undefined;
+			let tools: string[] | undefined;
+			if (Array.isArray(rawTools)) {
+				tools = rawTools.map((t) => String(t).trim()).filter(Boolean);
+			} else if (typeof rawTools === "string") {
+				tools = rawTools.split(",").map((t) => t.trim()).filter(Boolean);
+			} else if (rawTools !== undefined && rawTools !== null) {
+				console.warn(`[taskflow] Agent '${String(frontmatter.name)}': 'tools' must be a string or array, got ${typeof rawTools}. Ignoring.`);
+				tools = undefined;
+			}
 			agents.push({
 				name: String(frontmatter.name),
@@ -173,7 +169,6 @@ function findNearestProjectAgentsDir(cwd: string): string | null {
 export function discoverAgents(
 	cwd: string,
 	scope: AgentScope,
-	overrides?: Record<string, AgentOverride>,
 	modelRoles?: Record<string, string>,
 	taskflowSettings: TaskflowSettings = DEFAULT_TASKFLOW_SETTINGS,
 ): AgentDiscoveryResult {
@@ -202,23 +197,6 @@ export function discoverAgents(
 		for (const a of projectAgents) agentMap.set(a.name, a);
 	}
-	if (overrides) {
-		for (const [name, override] of Object.entries(overrides)) {
-			const agent = agentMap.get(name);
-			if (agent) {
-				// Clone before mutating: agentMap owns the original AgentConfig
-				// (loaded from disk in loadAgentsFromDir). Mutating it in place
-				// would cause cross-contamination for any caller that retains a
-				// reference and invokes discoverAgents again with different overrides.
-				const mutated: AgentConfig = { ...agent };
-				if (override.model !== undefined) mutated.model = override.model;
-				if (override.thinking !== undefined) mutated.thinking = override.thinking;
-				if (override.tools !== undefined) mutated.tools = override.tools;
-				agentMap.set(name, mutated);
-			}
-		}
-	}
 	// Resolve {{role}} model references (e.g. {{fast}} → openrouter/deepseek/v4-flash)
 	// Clone before mutating, consistent with the overrides block above.
 	if (modelRoles) {
@@ -236,7 +214,6 @@ export function discoverAgents(
 }
 export interface SubagentSettings {
-	agentOverrides?: Record<string, AgentOverride>;
 	globalThinking?: string;
 	modelRoles?: Record<string, string>;
 	taskflow: TaskflowSettings;
@@ -261,7 +238,6 @@ export function readSubagentSettings(): SubagentSettings {
 		if (!fs.existsSync(settingsPath)) return { taskflow: { ...DEFAULT_TASKFLOW_SETTINGS } };
 		const raw = JSON.parse(fs.readFileSync(settingsPath, "utf-8"));
 		return {
-			agentOverrides: raw.subagents?.agentOverrides,
 			globalThinking: raw.subagents?.globalThinking ?? raw.defaultThinkingLevel,
 			modelRoles: raw.modelRoles,
 			taskflow: normalizeTaskflowSettings(raw.taskflow),
@@ -311,7 +287,7 @@ export function syncBuiltinAgentsToProject(cwd: string): void {
 		try {
 			const content = fs.readFileSync(src, "utf-8");
-			fs.writeFileSync(dst, content, "utf-8");
+			writeFileAtomic(dst, content);
 		} catch {
 			// Best-effort: a locked file must not block the sync.
 		}

package/extensions/cache.ts CHANGED Viewed

@@ -47,9 +47,13 @@ function resolveOne(entry: string, cwd: string): string {
 					cwd,
 					encoding: "utf-8",
 					stdio: ["ignore", "pipe", "ignore"],
+					timeout: 30_000,
 				}).trim();
 				return `git:${ref}=${sha}`;
-			} catch {
+			} catch (e: unknown) {
+				if ((e as NodeJS.ErrnoException).code === "ETIMEDOUT") {
+					return `git:${ref}=<timeout>`;
+				}
 				return `git:${ref}=<no-git>`;
 			}
 		}

package/extensions/index.ts CHANGED Viewed

@@ -42,6 +42,7 @@ import {
 	DEFAULT_RUN_AGE_DAYS,
 } from "./store.ts";
 import { CacheStore } from "./cache.ts";
+import { safeParse } from "./interpolate.ts";
 interface TaskflowDetails {
 	state?: RunState;
@@ -195,7 +196,7 @@ async function runFlow(
 		cleanupConfig.maxKeep = settings.taskflow.maxKeptRuns;
 		cleanupConfig.maxAgeDays = settings.taskflow.maxRunAgeDays;
 		const scope: AgentScope = def.agentScope ?? "user";
-		const { agents } = discoverAgents(ctx.cwd, scope, settings.agentOverrides, settings.modelRoles, settings.taskflow);
+		const { agents } = discoverAgents(ctx.cwd, scope, settings.modelRoles, settings.taskflow);
 		// Hint: if any agent still has unresolved {{role}} references, suggest configuring modelRoles
 		const unresolvedRoles = agents
@@ -416,7 +417,7 @@ export default function (pi: ExtensionAPI) {
 			if (action === "agents") {
 				const scope = params.scope ?? "both";
 				const settings2 = readSubagentSettings();
-				const { agents } = discoverAgents(ctx.cwd, scope as AgentScope, undefined, settings2.modelRoles, settings2.taskflow);
+				const { agents } = discoverAgents(ctx.cwd, scope as AgentScope, settings2.modelRoles, settings2.taskflow);
 				const text = agents.length
 					? agents
 							.map(
@@ -441,13 +442,18 @@ export default function (pi: ExtensionAPI) {
 				const { verifyTaskflow } = await import("./verify.ts");
 				// Load definition: inline define takes priority, then saved name
 				let def: Taskflow | undefined;
-				if (params.define) {
-					const d = params.define as Record<string, unknown>;
+				let resolvedDefine: unknown = params.define;
+				if (typeof resolvedDefine === "string") {
+					const parsed = safeParse(resolvedDefine);
+					if (parsed && typeof parsed === "object") resolvedDefine = parsed;
+				}
+				if (resolvedDefine) {
+					const d = resolvedDefine as Record<string, unknown>;
 					if (typeof d === "object" && d !== null && Array.isArray(d.phases)) {
 						def = d as unknown as Taskflow;
-					} else if (isShorthand(params.define)) {
-						const r = validateTaskflow(params.define);
-						if (r.ok) def = params.define as unknown as Taskflow;
+					} else if (isShorthand(resolvedDefine)) {
+						const r = validateTaskflow(resolvedDefine);
+						if (r.ok) def = resolvedDefine as unknown as Taskflow;
 					}
 				} else if (params.name) {
 					const saved = getFlow(ctx.cwd, params.name);
@@ -505,9 +511,25 @@ export default function (pi: ExtensionAPI) {
 			// resolve the definition: inline `define` / shorthand (single|parallel|chain), else saved `name`.
 			let def: Taskflow | undefined;
+			// Auto-parse string `define` — LLMs sometimes pass a JSON string
+			// instead of a parsed object. safeParse handles markdown fences too.
+			let resolvedDefine: unknown = params.define;
+			if (typeof resolvedDefine === "string") {
+				const parsed = safeParse(resolvedDefine);
+				if (parsed && typeof parsed === "object") {
+					resolvedDefine = parsed;
+				} else {
+					return errorResult(
+						action,
+						`'define' was passed as a string, not a JSON object. Pass it as a proper object, e.g.:\n` +
+							`define: {"name":"my-flow","phases":[{"id":"step1","task":"do something"}]}`,
+					);
+				}
+			}
 			// A shorthand spec can come from `define` (no phases) or top-level params.
 			const shorthandSpec: unknown =
-				params.define ??
+				resolvedDefine ??
 				(params.chain
 					? { chain: params.chain, name: params.name }
 					: params.tasks
@@ -530,11 +552,25 @@ export default function (pi: ExtensionAPI) {
 				def = candidate as Taskflow;
 			} else if (params.name) {
 				const saved = getFlow(ctx.cwd, params.name);
-				if (!saved) return errorResult(action, `Saved flow not found: ${params.name}`);
+				if (!saved) {
+					const available = listFlows(ctx.cwd);
+					const hint = available.length
+						? ` Available flows: ${available.map((f) => f.name).join(", ")}.`
+						: " No saved flows found. Use action=save to create one, or pass 'define' for an inline flow.";
+					return errorResult(action, `Saved flow '${params.name}' not found.${hint}`);
+				}
 				def = saved.def;
 			}
 			if (!def)
-				return errorResult(action, "Provide 'define' (DSL), shorthand 'task'/'tasks'/'chain', or 'name' (saved).");
+				return errorResult(
+					action,
+					`No taskflow definition provided. Use one of:\n` +
+						`- define: {"name":"...","phases":[...]} (inline DSL object)\n` +
+						`- task: "..." (shorthand single agent)\n` +
+						`- tasks: [{"task":"..."},...] (shorthand parallel)\n` +
+						`- chain: [{"task":"..."},...] (shorthand sequential)\n` +
+						`- name: "saved-flow-name" (run a previously saved flow)`,
+				);
 			// save
 			if (action === "save") {
@@ -562,7 +598,17 @@ export default function (pi: ExtensionAPI) {
 			}
 			// run
-			const args = resolveArgs(def, params.args);
+			// Auto-parse string args — LLMs sometimes pass a JSON string.
+			let resolvedArgs: Record<string, unknown> | undefined;
+			if (typeof params.args === "string") {
+				const parsed = safeParse(params.args);
+				if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
+					resolvedArgs = parsed as Record<string, unknown>;
+				}
+			} else if (params.args && typeof params.args === "object") {
+				resolvedArgs = params.args as Record<string, unknown>;
+			}
+			const args = resolveArgs(def, resolvedArgs);
 			const v = validateTaskflow(def, { args, cwd: ctx.cwd });
 			if (!v.ok) return errorResult(action, `Invalid taskflow:\n- ${v.errors.join("\n- ")}`);
 			for (const w of v.warnings) {
@@ -579,7 +625,14 @@ export default function (pi: ExtensionAPI) {
 		renderCall(args, theme) {
 			const action = args.action ?? "run";
-			let label = args.name || (args.define as { name?: string } | undefined)?.name;
+			let label = args.name;
+		if (!label) {
+			let define = args.define;
+			if (typeof define === "string") {
+				try { define = JSON.parse(define); } catch { /* not JSON */ }
+			}
+			label = (define as { name?: string } | undefined)?.name;
+		}
 			let suffix = "";
 			const phases = (args.define as Taskflow | undefined)?.phases;
 			if (phases) suffix = ` (${phases.length} phases)`;
@@ -613,7 +666,7 @@ export default function (pi: ExtensionAPI) {
 	pi.registerCommand("tf", {
 		description: "Taskflow: list | run <name> | show <name> | runs | init",
 		getArgumentCompletions: (prefix) => {
-			const subs = ["list", "run", "show", "runs", "resume", "init"];
+			const subs = ["list", "run", "show", "runs", "resume", "init", "save", "verify"];
 			const items = subs.map((s) => ({ value: s, label: s }));
 			const filtered = items.filter((i) => i.value.startsWith(prefix));
 			return filtered.length > 0 ? filtered : null;
@@ -797,13 +850,13 @@ function parseArgsString(input: string, def: Taskflow): Record<string, unknown>
 	}
 	// key=value pairs
 	const out: Record<string, unknown> = {};
-	const pairs = trimmed.match(/(\w+)=("[^"]*"|\S+)/g);
+	const pairs = trimmed.match(/(\w+)=("(?:[^"\\]|\\.)*"|\S+)/g);
 	if (pairs) {
 		for (const p of pairs) {
 			const idx = p.indexOf("=");
 			const k = p.slice(0, idx);
 			let v: string = p.slice(idx + 1);
-			if (v.startsWith('"') && v.endsWith('"')) v = v.slice(1, -1);
+			if (v.startsWith('"') && v.endsWith('"')) v = v.slice(1, -1).replace(/\\"/g, '"');
 			out[k] = v;
 		}
 		return out;

package/extensions/interpolate.ts CHANGED Viewed

@@ -66,7 +66,13 @@ function resolvePath(path: string, ctx: InterpolationContext): unknown {
 		const step = stepId ? ctx.steps[stepId] : undefined;
 		if (!step) return undefined;
 		const field = parts[2];
-		if (field === "output") return step.output;
+		if (field === "output") {
+			// Guard: {steps.X.output.trailing} — trailing segments after output are
+			// likely author errors (output is a string, not an object). Return
+			// undefined so the placeholder is left intact with a missing warning.
+			if (parts.length > 3) return undefined;
+			return step.output;
+		}
 		if (field === "json") {
 			const json = step.json ?? safeParse(step.output);
 			return dig(json, parts.slice(3));
@@ -82,6 +88,12 @@ function resolvePath(path: string, ctx: InterpolationContext): unknown {
 	return undefined;
 }
+/**
+ * Traverse an object by a sequence of property keys. Returns `undefined`
+ * when any segment is missing or the current value is not an object —
+ * never throws, so extra path segments like {steps.X.json.a.b} where the
+ * data is shallower resolve gracefully to undefined (M-8).
+ */
 function dig(obj: unknown, parts: string[]): unknown {
 	let cur: unknown = obj;
 	for (const part of parts) {
@@ -219,10 +231,25 @@ function tokenize(input: string): Tok[] {
 		}
 		// quoted string
 		if (c === '"' || c === "'") {
-			const end = input.indexOf(c, i + 1);
-			if (end === -1) throw new Error("unterminated string");
-			toks.push({ t: "str", v: input.slice(i + 1, end) });
-			i = end + 1;
+			// Handle escaped quotes. Note: ALL \X sequences are interpreted as literal X
+			// (including \n → n, \t → t). This differs from JSON/JS escaping but is
+			// correct for condition strings which only need quote escaping.
+			let j = i + 1;
+			let val = "";
+			while (j < n) {
+				if (input[j] === "\\" && j + 1 < n) {
+					val += input[j + 1];
+					j += 2;
+				} else if (input[j] === c) {
+					break;
+				} else {
+					val += input[j];
+					j++;
+				}
+			}
+			if (j >= n) throw new Error("unterminated string");
+			toks.push({ t: "str", v: val });
+			i = j + 1;
 			continue;
 		}
 		// multi/single char operators

package/extensions/render.ts CHANGED Viewed

@@ -104,7 +104,7 @@ export function summarizeRun(state: RunState): string {
 	const done = phases.filter((p) => p.status === "done").length;
 	const failed = phases.filter((p) => p.status === "failed").length;
 	const running = phases.filter((p) => p.status === "running").length;
-	const total = state.def.phases.length;
+	const total = Object.keys(state.phases).length;
 	const bits = [`${done}/${total} done`];
 	if (running) bits.push(`${running} running`);
 	if (failed) bits.push(`${failed} failed`);
@@ -254,7 +254,7 @@ function headerLine(state: RunState, theme: Theme): string {
 	const done = phases.filter((p) => p.status === "done").length;
 	const failed = phases.filter((p) => p.status === "failed").length;
 	const running = phases.filter((p) => p.status === "running").length;
-	const total = state.def.phases.length;
+	const total = Object.keys(state.phases).length;
 	const head =
 		state.status === "completed"

package/extensions/runner.ts CHANGED Viewed

@@ -25,6 +25,8 @@ export interface RunResult {
 	errorMessage?: string;
 	/** Total subagent attempts incl. retries (set by the runtime's retry wrapper). */
 	attempts?: number;
+	/** Set when the subagent was killed by the idle watchdog (not a user abort). */
+	idleTimeout?: boolean;
 }
 export interface LiveUpdate {
@@ -74,6 +76,8 @@ const TRANSIENT_ERROR_RE =
 	/rate[_\s-]?limit|too\s+many\s+requests|overloaded|\b429\b|\b503\b|\b502\b|\b504\b|service\s+unavailable|temporarily\s+unavailable|timeout|timed?\s+out|econnreset|etimedout|socket\s+hang\s*up/i;
 export function isTransientError(r: RunResult): boolean {
 	if (r.stopReason === "aborted") return false;
+	// Idle timeout is a deterministic stall — retrying won't help.
+	if (r.stopReason === "error" && r.idleTimeout) return false;
 	const hay = `${r.errorMessage ?? ""} ${r.stderr ?? ""} ${r.output ?? ""}`;
 	return TRANSIENT_ERROR_RE.test(hay);
 }
@@ -153,6 +157,8 @@ export interface EventAccumulator {
 	stopReason?: string;
 	errorMessage?: string;
 	lastActivity: string;
+	/** Set when message cap was hit — output gets a truncation notice. */
+	truncated?: boolean;
 }
 export function newAccumulator(model?: string): EventAccumulator {
@@ -175,7 +181,15 @@ export function foldEventLine(acc: EventAccumulator, line: string): LiveUpdate |
 	}
 	if (event.type !== "message_end" || !event.message) return null;
 	const msg = event.message as Message;
-	acc.messages.push(msg);
+	// Cap prevents OOM from misconfigured loops. 500 messages is generous for
+	// normal subagent tasks (50 turns × 10 messages each). Messages beyond the
+	// cap are still parsed for usage/model/stopReason extraction.
+	const MAX_MESSAGES = 500;
+	if (acc.messages.length < MAX_MESSAGES) {
+		acc.messages.push(msg);
+	} else {
+		acc.truncated = true;
+	}
 	if (msg.role !== "assistant") return null;
 	acc.usage.turns++;
 	const u = (msg as any).usage;
@@ -323,6 +337,7 @@ export async function runAgentTask(
 		let wasAborted = false;
 		let idleTimedOut = false;
+		let killedBySignal: string | undefined;
 		const exitCode = await new Promise<number>((resolve) => {
 			const invocation = getPiInvocation(args);
 			const proc = spawn(invocation.command, invocation.args, {
@@ -371,12 +386,19 @@ export async function runAgentTask(
 				buffer = lines.pop() || "";
 				for (const line of lines) processLine(line);
 			});
+			// Cap prevents OOM from verbose tool output (e.g., npm install). 64 KB is
+			// generous for error diagnosis while preventing memory exhaustion.
+			const STDERR_MAX_LEN = 64 * 1024;
 			proc.stderr.on("data", (data) => {
 				result.stderr += data.toString();
+				if (result.stderr.length >= STDERR_MAX_LEN) {
+					result.stderr = result.stderr.slice(0, STDERR_MAX_LEN) + "\n[...stderr truncated at 64KB]";
+				}
 			});
-			proc.on("close", (code) => {
+			proc.on("close", (code, signal) => {
 				clearTimers();
 				if (buffer.trim()) processLine(buffer);
+				if (code === null && signal) killedBySignal = signal;
 				resolve(code ?? 0);
 			});
 			proc.on("error", (err) => {
@@ -411,11 +433,25 @@ export async function runAgentTask(
 		result.stopReason = acc.stopReason;
 		result.errorMessage = acc.errorMessage;
 		result.output = getFinalOutput(acc.messages);
+		// M-6: surface truncation when the message cap was hit so downstream
+		// phases and the user know output was cut short.
+		if (acc.truncated) {
+			result.output += "\n\n[...output truncated after 500 messages]";
+		}
+		// Signal kill detection: process exited 0 but was killed by a signal
+		// (e.g. OOM killer, cgroup limit). Treat as failure so the runtime's
+		// retry/fail handling doesn't silently accept a truncated result.
+		if (exitCode === 0 && killedBySignal && !idleTimedOut && !wasAborted) {
+			result.exitCode = 1;
+			result.stopReason = "error";
+			result.errorMessage = `Subagent killed by signal ${killedBySignal}`;
+		}
 		if (idleTimedOut) {
 			// Distinct, actionable signal: the child was killed for being idle, not
 			// a user abort. stopReason "error" keeps it in the failed bucket so the
 			// runtime's retry/fail handling treats it as a real failure.
 			result.stopReason = "error";
+			result.idleTimeout = true;
 			result.errorMessage = `Subagent stalled: no output for ${Math.round((opts.idleTimeoutMs ?? DEFAULT_IDLE_TIMEOUT_MS) / 1000)}s (idle timeout) — killed`;
 		} else if (wasAborted) {
 			result.stopReason = "aborted";

package/extensions/runs-view.ts CHANGED Viewed

@@ -29,7 +29,7 @@ function statusBadge(status: RunState["status"], theme: Theme): string {
 }
 function timeAgo(ts: number): string {
-	const s = Math.floor((Date.now() - ts) / 1000);
+	const s = Math.max(0, Math.floor((Date.now() - ts) / 1000));
 	if (s < 60) return `${s}s ago`;
 	if (s < 3600) return `${Math.floor(s / 60)}m ago`;
 	if (s < 86400) return `${Math.floor(s / 3600)}h ago`;
@@ -37,7 +37,7 @@ function timeAgo(ts: number): string {
 }
 function isResumable(r: RunState): boolean {
-	return r.status === "paused" || r.status === "failed" || r.status === "blocked";
+	return r.status === "paused" || r.status === "failed";
 }
 export class RunHistoryComponent {

package/extensions/runtime.ts CHANGED Viewed

@@ -70,8 +70,17 @@ function buildInterpolationContext(
 ): InterpolationContext {
 	const steps: Record<string, { output: string; json?: unknown }> = {};
 	for (const [id, ps] of Object.entries(state.phases)) {
-		if (ps.status === "done" && ps.output !== undefined) {
-			steps[id] = { output: ps.output, json: ps.json };
+		// Include both done AND failed phases so downstream phases can see
+		// error info. Skipped phases (upstream failure cascade) are excluded.
+		if (ps.status === "done" || ps.status === "failed") {
+			if (ps.output !== undefined) {
+				steps[id] = { output: ps.output, json: ps.json };
+			} else if (ps.status === "failed") {
+				// M-3: Failed phases without output get a placeholder so
+				// downstream references like {steps.X.output} resolve to a
+				// sensible value instead of leaving the raw placeholder intact.
+				steps[id] = { output: "[previous phase failed]", json: undefined };
+			}
 		}
 	}
 	return { args: state.args, steps, previousOutput, locals };
@@ -80,10 +89,16 @@ function buildInterpolationContext(
 function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJson: boolean): PhaseState {
 	const failed = isFailed(r);
 	const attempts = attemptsOf(r);
+	// For failed phases, embed the error info in the output so downstream
+	// phases (and the user) can see what went wrong. The raw r.output is
+	// often a useless placeholder like "(upstream error: subagent failed)".
+	const output = failed
+		? r.errorMessage || r.stderr || r.output
+		: r.output;
 	return {
 		id,
 		status: failed ? "failed" : "done",
-		output: r.output,
+		output,
 		json: parseJson && !failed ? safeParse(r.output) : undefined,
 		usage: r.usage,
 		model: r.model,
@@ -156,8 +171,13 @@ function mergePhaseState(
 	// which model produced the merged output.
 	const model = ran.find((r) => r.model !== undefined)?.model;
 	// Combine outputs as a labelled list; also expose a JSON array of outputs.
+	// For failed items, use the error message instead of the useless placeholder.
 	const combinedText = ran
-		.map((r, i) => `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}\n\n${r.output}`)
+		.map((r, i) => {
+			const label = `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}`;
+			const content = isFailed(r) ? (r.errorMessage || r.stderr || r.output) : r.output;
+			return `${label}\n\n${content}`;
+		})
 		.join("\n\n---\n\n");
 	// Only successful runs feed the parsed JSON array (no error/skip strings).
 	const jsonArray = parseJson ? ran.filter((r) => !isFailed(r)).map((r) => safeParse(r.output) ?? r.output) : undefined;
@@ -373,7 +393,14 @@ async function executePhase(
 			// Backoff: prefer the explicit policy's curve when the phase defines one
 			// (covers transient retries too, and keeps tests fast with backoffMs:0),
 			// otherwise use the transient defaults.
-			const baseMs = retry ? (retry.backoffMs ?? 0) : DEFAULT_TRANSIENT_BACKOFF_MS;
+			const baseMs = retry?.backoffMs != null ? retry.backoffMs : DEFAULT_TRANSIENT_BACKOFF_MS;
+			// Factor asymmetry is intentional:
+			// - Explicit retry: backoffMs * (factor ?? 1) ^ attempt — user's
+			//   curve, defaults to flat (factor=1 → constant backoff).
+			// - Transient fallback: backoffMs * 2 ^ attempt — exponential.
+			// This lets users opt into flat retry with retry: {max:3} without
+			// specifying factor, while transient errors get proper exponential
+			// backoff.
 			const factor = retry ? (retry.factor ?? 1) : DEFAULT_TRANSIENT_FACTOR;
 			const wait = Math.min(60000, Math.round(baseMs * factor ** attempt));
 			if (wait > 0) await delay(wait, deps.signal);
@@ -742,7 +769,7 @@ async function executePhase(
 		for (let i = 1; i <= maxIters; i++) {
 			if (deps.signal?.aborted) {
-				stop = "failed";
+				stop = "aborted";
 				break;
 			}
 			iterations = i;
@@ -788,14 +815,14 @@ async function executePhase(
 		}
 		const aggUsage = usages.length ? aggregateUsage(usages) : emptyUsage();
-		if (failedResult) {
+		if (failedResult || stop === "failed" || stop === "aborted") {
 			return {
 				id: phase.id,
 				status: "failed",
 				output: lastOutput || undefined,
 				usage: aggUsage,
-				error: failedResult.errorMessage || failedResult.stderr || `loop '${phase.id}' iteration ${iterations} failed`,
-				loop: { iterations, stop: "failed" },
+				error: failedResult?.errorMessage || failedResult?.stderr || (stop === "aborted" ? "Aborted" : `loop '${phase.id}' iteration ${iterations} failed`),
+				loop: { iterations, stop },
 				warnings: loopWarnings.length ? loopWarnings : undefined,
 				inputHash: hashInput(phase.id, "loop", phase.until ?? ""),
 				endedAt: Date.now(),
@@ -868,6 +895,22 @@ async function executePhase(
 			};
 		}
+		// Guard: skip the judge if the run is over budget or aborted.
+		if (deps.signal?.aborted || overBudget(state).over) {
+			return {
+				id: phase.id,
+				status: "done",
+				output: ok[0].output,
+				json: parseJson ? safeParse(ok[0].output) : undefined,
+				usage: variantUsage,
+				model: ok[0].model,
+				warnings: ["judge skipped: run aborted or budget exceeded"],
+				tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge skipped" },
+				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
+				endedAt: Date.now(),
+			};
+		}
 		// Build the judge prompt: label every variant output, then the rubric.
 		const labelled = ran
 			.map((r, i) => `### Variant ${i + 1}${isFailed(r) ? " (failed — ineligible)" : ""}\n\n${r.output}`)
@@ -1288,6 +1331,10 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 				if (!budgetReason) budgetReason = "fan-out truncated by budget";
 			}
 			// Budget ceiling: once exceeded, remaining phases are skipped.
+			// For concurrent same-layer phases, the check runs after each phase
+			// completes, so at most (concurrency - 1) extra phases may run before
+			// the budget is detected as exceeded. This bounded overshoot is
+			// acceptable: budgetBlocked prevents cascading into subsequent layers.
 			const ob = overBudget(state);
 			if (ob.over && !budgetBlocked) {
 				budgetBlocked = true;

package/extensions/schema.ts CHANGED Viewed

@@ -235,7 +235,7 @@ const ArgSpecSchema = Type.Object(
 export const TaskflowSchema = Type.Object(
 	{
-		name: Type.String({ description: "Workflow name (becomes /tf:<name> command when saved)" }),
+		name: Type.String({ minLength: 1, description: "Workflow name (becomes /tf:<name> command when saved)" }),
 		description: Type.Optional(Type.String()),
 		version: Type.Optional(Type.Number({ default: 1 })),
 		args: Type.Optional(Type.Record(Type.String(), ArgSpecSchema, { description: "Declared invocation arguments" })),

package/extensions/store.ts CHANGED Viewed

@@ -59,7 +59,7 @@ export interface PhaseState {
 	/** Human-in-the-loop outcome (approval phases only). */
 	approval?: { decision: "approve" | "reject" | "edit"; note?: string; auto?: boolean };
 	/** Loop iteration accounting (loop phases only). */
-	loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" };
+	loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" | "aborted" };
 	/** Tournament outcome (tournament phases only). */
 	tournament?: { variants: number; winner: number; mode: "best" | "aggregate"; reason?: string };
 	/** Non-fatal diagnostic warnings accumulated during this phase (e.g.
@@ -128,6 +128,9 @@ export const DEFAULT_RUN_AGE_DAYS = DEFAULT_MAX_AGE_DAYS;
 /** Last cleanup timestamp — module-level so it persists across calls. */
 let lastCleanupAt = 0;
+/** Shared buffer for Atomics.wait in acquireLock busy-wait (Finding 6). */
+const LOCK_WAIT_BUF = new Int32Array(new SharedArrayBuffer(4));
 // ---------------------------------------------------------------------------
 // Internal helpers — path construction & sanitisation
 // ---------------------------------------------------------------------------
@@ -142,7 +145,7 @@ let lastCleanupAt = 0;
  * bare-dot / leading-dot components after the character substitution so the
  * write path can never escape runs/ (risk-reviewer v0.0.9 audit, H1).
  */
-function safeFlowDirName(flowName: string): string {
+export function safeFlowDirName(flowName: string): string {
 	let safe = flowName.replace(/[^\w.-]+/g, "_");
 	// Collapse leading dots: blocks ".", "..", and hidden-dir names like ".git".
 	safe = safe.replace(/^\.+/, "_");
@@ -245,7 +248,7 @@ function acquireLock(lockPath: string, timeoutMs: number = LOCK_TIMEOUT_MS): voi
 				throw new Error(`Lock timeout after ${timeoutMs}ms waiting for ${path.basename(lockPath)}`);
 			}
 			// Busy-wait with Atomics.wait (CPU-efficient sleep).
-			Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, LOCK_POLL_MS);
+			Atomics.wait(LOCK_WAIT_BUF, 0, 0, LOCK_POLL_MS);
 		}
 	}
 }
@@ -392,11 +395,18 @@ function rebuildIndex(runsRoot: string): RunIndexEntry[] {
 		} catch { /* skip corrupt */ }
 	}
-	const result = Array.from(entries.values());
-	// Persist the rebuilt index under the index lock so it does not race a
-	// concurrent updateIndexEntry / cleanup write (M1).
-	withLock(indexLockPath(runsRoot), () => writeIndex(runsRoot, result));
-	return result;
+	const scanned = Array.from(entries.values());
+	// Persist the rebuilt index under the index lock. Re-read the current
+	// index inside the lock and merge by runId so concurrent writes are not
+	// clobbered — scanned entries win on conflict (Finding 5).
+	withLock(indexLockPath(runsRoot), () => {
+		const currentIndex = readIndex(runsRoot);
+		const merged = new Map<string, RunIndexEntry>();
+		for (const e of currentIndex) merged.set(e.runId, e);
+		for (const e of scanned) merged.set(e.runId, e); // scanned wins
+		writeIndex(runsRoot, Array.from(merged.values()));
+	});
+	return scanned;
 }
 // ---------------------------------------------------------------------------
@@ -422,7 +432,8 @@ function cleanupTerminalRuns(
 	maxKeep: number = DEFAULT_MAX_KEPT_TERMINAL,
 	maxAgeDays: number = DEFAULT_MAX_AGE_DAYS,
 ): void {
-	const now = Date.now();
+	const cleanupStarted = Date.now();
+	const now = cleanupStarted;
 	if (now - lastCleanupAt < CLEANUP_INTERVAL_MS) return;
 	lastCleanupAt = now;
@@ -473,6 +484,8 @@ function cleanupTerminalRuns(
 	// Delete run files + lock files (outside the index lock).
 	for (const e of toRemove) {
 		const filePath = path.join(runsRoot, e.relPath);
+		// Race guard: skip files modified after cleanup started (Finding 2).
+		try { if (fs.statSync(filePath).mtimeMs > cleanupStarted) continue; } catch { continue; }
 		try { fs.unlinkSync(filePath); } catch { /* already gone */ }
 		// Also remove any orphaned lock file.
 		try { fs.unlinkSync(filePath + ".lock"); } catch { /* ignore */ }
@@ -566,16 +579,19 @@ export function saveFlow(
 	scope: "user" | "project" = "project",
 ): { filePath: string } {
 	const dir = scope === "user" ? userFlowsDir() : (findProjectFlowsDir(cwd, true) ?? path.join(cwd, ".pi", "taskflows"));
+	if (!def.name || def.name.trim().length === 0) throw new Error("Flow name must not be empty");
 	fs.mkdirSync(dir, { recursive: true });
-	const safe = def.name.replace(/[^\w.-]+/g, "_");
+	const safe = safeFlowDirName(def.name);
 	const filePath = path.join(dir, `${safe}.json`);
-	writeFileAtomic(filePath, `${JSON.stringify(def, null, 2)}\n`);
+	const fileLockPath = filePath + ".lock";
+	withLock(fileLockPath, () => { writeFileAtomic(filePath, `${JSON.stringify(def, null, 2)}\n`); });
-	// One-shot: let the user know we're creating a .pi/ directory on first save.
+	// One-shot: let the user know about .pi/ directory on first save (Finding 8).
 	if (!_piCreationHinted) {
 		_piCreationHinted = true;
+		const piExisted = fs.existsSync(path.join(dir, "..", ".."));
 		console.warn(
-			`[taskflow] Created .pi/taskflows/ for project-scoped flow storage. ` +
+			`[taskflow] ${piExisted ? "Using" : "Created"} .pi/taskflows/ for project-scoped flow storage. ` +
 			`Add .pi/ to .gitignore if desired.`,
 		);
 	}
@@ -587,6 +603,8 @@ export function saveFlow(
 // --- Run state ---
 function runsDir(cwd: string): string {
+	// Safe non-null assertion: create=true guarantees a non-null return because
+	// findProjectFlowsDirInternal falls back to path.join(cwd, ".pi", "taskflows").
 	const projDir = findProjectFlowsDir(cwd, true)!;
 	return path.join(projDir, "runs");
 }
@@ -614,6 +632,9 @@ export function newRunId(flowName: string): string {
  * caller's reference.
  */
 export function saveRun(state: RunState, cleanup?: { maxKeep?: number; maxAgeDays?: number }): void {
+	// Reject unsafe runIds before any filesystem access (Finding 1).
+	if (!validateRunId(state.runId)) return;
 	const root = runsDir(state.cwd);
 	const flowDir = flowRunDir(root, state.flowName);
 	fs.mkdirSync(flowDir, { recursive: true });

package/extensions/verify.ts CHANGED Viewed

@@ -253,6 +253,7 @@ function detectBudgetOverflow(flow: VerifiableFlow): VerificationIssue[] {
 		}
 	}
+	const ESTIMATED_COST_PER_PHASE = 0.001; // $0.001 minimum per subagent call
 	if (budget.maxTokens !== undefined && budget.maxTokens > 0 && minTokens > budget.maxTokens) {
 		issues.push({
 			message:
@@ -263,6 +264,16 @@ function detectBudgetOverflow(flow: VerifiableFlow): VerificationIssue[] {
 			category: "budget-overflow",
 		});
 	}
+	if (budget.maxUSD !== undefined && budget.maxUSD > 0 && minTokens * ESTIMATED_COST_PER_PHASE > budget.maxUSD) {
+		issues.push({
+			message:
+				`Budget cap ($${budget.maxUSD}) is below the estimated minimum of ~$${(minTokens * ESTIMATED_COST_PER_PHASE).toFixed(3)} ` +
+				`for ${flow.phases.length} phase(s). The flow will likely be truncated before completion. ` +
+				`Increase maxUSD or reduce the number of phases.`,
+			severity: "warning",
+			category: "budget-overflow",
+		});
+	}
 	return issues;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-taskflow",
-  "version": "0.0.16",
+  "version": "0.0.17",
   "description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
   "keywords": [
     "pi-package",

package/skills/taskflow/SKILL.md CHANGED Viewed

@@ -310,7 +310,7 @@ Quick reference:
 - **Flow:** `name`, `description`, `concurrency` (default 8), `budget` (`maxUSD`/`maxTokens`), `agentScope` (user|project|both), `args`, `strictInterpolation`.
 - **Phase:** `model`, `thinking`, `tools` (whitelist), `cwd`, `output:"json"`, `concurrency` (map/parallel fan-out), `when`, `join` (all|any), `retry`, `use`/`with` (flow), `final`.
-- **Precedence (model/thinking/tools):** phase value → `settings.subagents.agentOverrides[agent]` → agent frontmatter → global/default.
+- **Precedence (model/thinking/tools):** phase value → agent frontmatter (resolved via `modelRoles`) → global/default.
 - **Concurrency:** same-layer phases use `flow.concurrency`; a `map`/`parallel` phase uses `phase.concurrency ?? flow.concurrency ?? 8`.
 ## Actions

package/skills/taskflow/configuration.md CHANGED Viewed

@@ -11,7 +11,7 @@ Configuration lives in **five layers**, from most local to most global:
 | Phase | a phase object in the DSL | per-step model/thinking/tools/cwd/output/concurrency |
 | Flow | the top-level DSL object | name, args, default concurrency, agent scope |
 | Agent | `~/.pi/agent/agents/*.md`, `.pi/agents/*.md` frontmatter | per-agent default model/thinking/tools + system prompt |
-| Settings | `~/.pi/agent/settings.json` | `subagents.agentOverrides`, global thinking |
+| Settings | `~/.pi/agent/settings.json` | `modelRoles`, global thinking |
 | Environment | shell env | `PI_TASKFLOW_PI_BIN` |
 ---
@@ -156,9 +156,9 @@ For any phase, the effective value is resolved in this **precedence order**
 | Setting | Precedence (high → low) |
 |---------|-------------------------|
-| **model** | `phase.model` → `settings.agentOverrides[agent].model` → agent frontmatter `model` → pi default |
-| **thinking** | `phase.thinking` → `settings.agentOverrides[agent].thinking` → agent frontmatter `thinking` → `settings` global thinking → pi default |
-| **tools** | `phase.tools` → `settings.agentOverrides[agent].tools` → agent frontmatter `tools` → all tools |
+| **model** | `phase.model` → agent frontmatter `model` (resolved via `modelRoles`) → pi default |
+| **thinking** | `phase.thinking` → agent frontmatter `thinking` → `settings` global thinking → pi default |
+| **tools** | `phase.tools` → agent frontmatter `tools` → all tools |
 Notes:
 - `tools` is a **whitelist** passed as `--tools a,b,c`. Omit it to allow all.
@@ -192,19 +192,18 @@ Taskflow shares the subagent settings file at `~/.pi/agent/settings.json`:
 ```jsonc
 {
+  "modelRoles": {
+    "fast": "openrouter/deepseek/deepseek-v4-flash",
+    "strong": "openrouter/xiaomi/mimo-v2.5-pro"
+  },
   "subagents": {
-    "globalThinking": "medium",          // fallback thinking for all subagents
-    "agentOverrides": {
-      "analyst": { "model": "claude-sonnet-4-5", "thinking": "high" },
-      "scout":   { "tools": ["read", "bash", "grep"] }
-    }
+    "globalThinking": "medium"              // fallback thinking for all subagents
   },
   "defaultThinkingLevel": "low"          // used if subagents.globalThinking is absent
 }
 ```
-- `subagents.agentOverrides` — per-agent overrides applied at discovery; they beat
-  agent frontmatter but lose to a phase-level value (see §5).
+- `modelRoles` — maps `{{role}}` references in agent frontmatter to actual model identifiers.
 - `subagents.globalThinking` (or top-level `defaultThinkingLevel`) — global
   thinking fallback.

package/DESIGN.md DELETED Viewed

@@ -1,338 +0,0 @@
-# pi-taskflow — 设计与可行性方案
-> 轻量工作流编排框架 for [pi coding agent](https://pi.dev)
-> 灵感来自 Claude Code Dynamic Workflows（2026-05-28 发布），适配 pi extension 生态。
----
-## 0. 一句话定位
-**让 LLM（或用户）用声明式 DSL 描述一个多阶段工作流，由确定性 runtime 编排 subagent 执行，中间结果不污染主 context，最终只回收结论；工作流可保存为命令、可复用、可恢复。**
----
-## 1. 市场调研结论
-### 1.1 命名
-| 名字 | 状态 | 说明 |
-|------|------|------|
-| `pi-workflow` | ❌ 已占 | VSCode GUI 扩展（聊天面板/侧栏），**非编排框架**，不冲突 |
-| **`pi-taskflow`** | ✅ 可用 | 本项目 |
-### 1.2 竞品分析（pi 生态无同类）
-| 包 | 模式 | 与 pi-taskflow 差异 |
-|----|------|------|
-| `pi-pipeline` | SPEC→PLAN→TASKS→VERIFY 固定流水线 | 固定流程，非动态可定义 DSL |
-| `pi-agent-flow` | fork subagent 并行调用器（scout/audit…） | 一次性并行调用，无 DAG / 无保存 / 无恢复 |
-| `pi-crew` | 重型多 agent 编排 + worktree + 异步 | 太重，用户已弃用 |
-| `pi-loop` | planner-worker-judge 固定循环 | 固定架构 |
-| `pi-subagents`（官方） | single/parallel/chain 即时调用 | 无持久化工作流定义、无 fan-out scale、无恢复 |
-**结论：声明式、可保存、可恢复、支持动态 fan-out 的轻量编排框架在 pi 生态是空白。**
-### 1.3 Claude Code Dynamic Workflows 借鉴要点
-| 特性 | Claude Code | pi-taskflow 对应 |
-|------|-------------|------------------|
-| 计划进代码 | Claude 写 JS 脚本 | LLM 产出 **声明式 JSON DSL**（更轻、可审、更安全） |
-| 中间结果隔离 | 脚本变量 | runtime 内存 Map，不进 context |
-| 规模 | 16 并发 / 1000 agent | 可配置并发上限 + `map` 动态 fan-out |
-| 可复用 | 保存为 `/command` | 保存到 `.pi/taskflows/`，注册为 `/tf:<name>` |
-| 可恢复 | 同 session 缓存 | run 状态落盘，**跨 session 可恢复**（超越 CC） |
-| 质量模式 | 对抗式 review | `gate` / `review` 阶段类型 |
----
-## 2. 深度可行性验证（逐项对照 pi 真实 API）
-> 全部基于阅读 `@earendil-works/pi-coding-agent` 的 extensions.md / packages.md / json.md / skills.md / prompt-templates.md / development.md + 现有 `~/.pi/agent/extensions/subagent/` 源码。
-### ✅ V1. 生成隔离上下文的 subagent，并拿到结构化输出
-- **机制**：`spawn("pi", ["--mode","json","-p","--no-session", ...])`，逐行解析 JSON 事件（`message_end` / `tool_result_end`）。
-- **证据**：现有 subagent extension 的 `runSingleAgent()` 已完整实现，含 usage 统计、stopReason、错误处理、abort 信号。
-- **结论**：**直接复用**，零风险。
-### ✅ V2. 并发控制（matching CC 的 scale）
-- **机制**：`mapWithConcurrencyLimit(items, concurrency, fn)`。
-- **证据**：subagent extension 已有该函数（worker pool 实现）。
-- **结论**：复用 + 提高默认上限（CC=16），新增 `map` 阶段做动态 fan-out。
-### ✅ V3. 中间结果不进 context window
-- **机制**：phase 结果存 runtime 内存 `Map<phaseName, PhaseResult>`；只有最终 phase 的 output 写进 tool `content`；完整轨迹放 `details`（默认不送 LLM，仅 TUI 渲染）。
-- **证据**：tool result 的 `content` vs `details` 分离（json.md / 现有 subagent）。
-- **结论**：可行，这是相对"裸 subagent 串联"的核心优势。
-### ⚠️ V4. 后台执行（session 保持响应）—— 已知约束 + 取舍
-- **pi 现实**：工具调用在一个 agent turn 内是**同步阻塞**的；没有 CC 那种独立 workflow runtime 进程。
-- **可用手段**：
-  - 工具 `onUpdate(partial)` 回调可**实时流式**推进度（subagent parallel 模式已验证）。
-  - `ctx.ui.setStatus()` / `ctx.ui.setWidget()` footer/widget 进度。
-- **取舍**：
-  - **v1（采用）**：工作流作为**单次长工具调用**执行，期间实时流式进度。session 在该 turn 内"忙"，但有完整 phase 进度可视化 —— 与 subagent 现有体验一致，符合"轻量"。
-  - **v2（路线图）**：detached 子进程 + 文件状态轮询 + `/tf status` 命令实现**真后台**。复杂度高，非首版。
-- **结论**：v1 可行，体验对标 subagent；真后台留作演进。诚实记录此约束。
-### ✅ V5. 保存工作流 → 可复用命令
-- **三条可用路径**（均已读文档确认）：
-  1. `pi.registerCommand()` —— 文档明确支持**运行时注册**（与 registerTool 同源刷新）。
-  2. `resources_discover` 事件 —— 动态贡献 prompt/skill 路径（dynamic-resources 示例验证）。
-  3. prompt templates（`.pi/prompts/*.md`）—— `/name` 展开为文本。
-- **采用方案**：
-  - 工作流定义存 `.pi/taskflows/<name>.json`（项目级）/ `~/.pi/agent/taskflows/<name>.json`（用户级）。
-  - `session_start` 时扫描目录，为每个工作流 `registerCommand("tf:<name>")`。
-  - 始终提供通用 `taskflow` 工具（LLM 调用）+ `/tf run <name> [args]` 命令（用户调用）。
-  - 保存新工作流后 `registerCommand` 立即生效（同 session 可用），无需 reload。
-- **结论**：可行，比 prompt-template 方案更强（命令直接驱动 runtime）。
-### ✅ V6. 状态持久化 / 恢复
-- **机制**：
-  - `pi.appendEntry(customType, data)` —— 会话内持久化（survive reload）。
-  - run 状态额外落盘 `.pi/taskflows/runs/<runId>.json` —— **跨 session 恢复**。
-  - 恢复逻辑：按 `phaseName + inputHash` 缓存结果；重跑跳过已完成 phase（与 CC "cached results" 一致）。
-- **证据**：todo.ts 示例（从 session entries 重建状态）；appendEntry API（extensions.md）。
-- **结论**：可行，且跨 session 恢复**超越 CC**（CC 仅同 session）。
-### ✅ V7. 进度可视化（TUI）
-- **机制**：复用 subagent 的 `renderCall` / `renderResult`；新增 phase 进度条 / DAG 状态。`ctx.ui.custom()` 做全屏 run 视图（todo.ts 模式）。
-- **结论**：可行，有现成范式。
-### ✅ V8. 打包发布
-- **机制**：`package.json` + `pi` manifest + `pi-package` keyword；pi 核心走 `peerDependencies`；`extensions/` 约定目录。`pi install npm:pi-taskflow`。
-- **证据**：packages.md。
-- **结论**：可行。
-### ✅ V9. Agent 复用
-- **机制**：复用 `discoverAgents(cwd, scope, overrides)`，从 `~/.pi/agent/agents/*.md` + `.pi/agents/*.md` 加载；工作流按 agent 名引用；支持 settings.json 的 `subagents.agentOverrides`。
-- **结论**：与现有 subagent 体系无缝衔接。
-### 可行性总评
-| 项 | 结论 |
-|----|------|
-| 核心编排（spawn/并发/隔离） | ✅ 复用现成代码，零风险 |
-| 保存/命令/恢复 | ✅ API 齐全 |
-| 真·后台执行 | ⚠️ v1 用流式长调用替代，v2 演进 |
-| TUI/打包/agent | ✅ 有范式 |
-**整体：高度可行。唯一妥协是"真后台"留 v2，v1 用流式长工具调用，体验对标现有 subagent。**
----
-## 3. 架构设计
-### 3.1 包结构
-```
-pi-taskflow/
-├── package.json              # pi manifest + peerDeps + pi-package keyword
-├── tsconfig.json
-├── README.md
-├── DESIGN.md                 # 本文件
-├── extensions/
-│   ├── index.ts              # 入口：注册 tool + commands + 事件
-│   ├── runtime.ts            # 编排引擎（DAG 解析 + 调度 + 恢复）
-│   ├── runner.ts             # subagent spawn（复用/移植 runSingleAgent）
-│   ├── agents.ts             # agent discovery（移植自 subagent/agents.ts）
-│   ├── schema.ts             # Taskflow DSL typebox schema + 校验
-│   ├── store.ts              # 工作流定义/run 状态读写（.pi/taskflows/）
-│   ├── interpolate.ts        # 模板插值 {steps.x.output} / {args.y}
-│   └── render.ts             # TUI renderCall/renderResult + 进度视图
-├── skills/
-│   └── taskflow/
-│       └── SKILL.md          # 教 LLM 何时/如何写 taskflow 定义
-└── examples/
-    ├── audit-endpoints.json
-    ├── deep-research.json
-    └── migrate-files.json
-```
-### 3.2 DSL（声明式工作流定义）
-```jsonc
-{
-  "name": "audit-endpoints",
-  "description": "审计 src/routes/ 下所有 API 端点的认证检查",
-  "version": 1,
-  "args": {                          // 调用时传入，{args.dir}
-    "dir": { "default": "src/routes" }
-  },
-  "concurrency": 8,                   // 默认并发上限
-  "phases": [
-    {
-      "id": "discover",
-      "type": "agent",               // 单 agent
-      "agent": "analyst",
-      "task": "列出 {args.dir} 下所有 API 端点，输出 JSON 数组 [{file, route}]",
-      "output": "json"               // 解析为结构化数据供 map 用
-    },
-    {
-      "id": "audit",
-      "type": "map",                 // ★ 动态 fan-out（scale 核心）
-      "over": "{steps.discover.output}",   // 对数组每项起一个 agent
-      "as": "item",
-      "agent": "analyst",
-      "task": "审计端点 {item.route}（文件 {item.file}）的认证检查，列出风险",
-      "dependsOn": ["discover"]
-    },
-    {
-      "id": "review",
-      "type": "gate",                // ★ 对抗式质量门
-      "agent": "reviewer",
-      "task": "复核以下审计结果，剔除误报，标注置信度：\n{steps.audit.output}",
-      "dependsOn": ["audit"]
-    },
-    {
-      "id": "report",
-      "type": "agent",
-      "agent": "planner",
-      "task": "汇总成最终报告：\n{steps.review.output}",
-      "dependsOn": ["review"],
-      "final": true                  // 该 phase 输出回收到主 session
-    }
-  ]
-}
-```
-### 3.3 Phase 类型
-| type | 语义 | 并发 |
-|------|------|------|
-| `agent` | 单 subagent 调用 | 1 |
-| `parallel` | 静态多任务并行（固定 task 列表） | ≤concurrency |
-| `map` | 对上游数组**动态 fan-out**，每项一个 agent | ≤concurrency |
-| `gate` | 质量门 / 对抗 review（可决定是否继续） | 1+ |
-| `reduce` | 把多结果聚合为一（synthesize） | 1 |
-| `approval` | **人在环**：暂停等待 approve / reject / edit | 1 |
-| `flow` | 把一个**已保存的 taskflow** 当作单个 phase 运行（组合复用） | 子流程并发 |
-### 3.3b 控制流 / 可靠性字段（任意 phase）
-| 字段 | 语义 |
-|------|------|
-| `when` | 条件守卫：表达式为假则 skip 该 phase。支持 `{refs}`、`== != < > <= >=`、`&& \|\| !`、括号、字符串/数字字面量。解析失败 fail-open（仍运行） |
-| `join` | 依赖 join：`all`（默认，等全部 dep）/ `any`（OR-join，任一 dep 完成即运行） |
-| `retry` | `{max, backoffMs, factor}`：失败重试，延迟 = `backoffMs * factor^attempt` |
-| `use` / `with` | `flow` 子流程的名字与入参（入参字符串值会插值） |
-顶层 `budget: {maxUSD, maxTokens}`：累计成本/token 超限即停（剩余 phase skip，运行态 `blocked`）。
-### 3.4 模板插值
-| 占位符 | 含义 |
-|--------|------|
-| `{args.X}` | 调用参数 |
-| `{steps.ID.output}` | 某 phase 的最终输出（字符串） |
-| `{steps.ID.json}` | 某 phase 输出解析为 JSON |
-| `{item}` / `{item.field}` | map 阶段当前项 |
-| `{previous.output}` | 上一 phase 输出（链式简写） |
-### 3.5 执行引擎（runtime.ts）
-```
-1. 校验 DSL（schema.ts）
-2. 拓扑排序 phases（dependsOn 建 DAG，检测环）
-3. 按层调度：
-   - 同层无依赖 phase 并行
-   - map 阶段展开为 N 子任务，受 concurrency 限流
-4. 每个 phase：
-   - 插值 task
-   - 命中缓存（phaseName+inputHash 在 run 状态里）→ 跳过
-   - 否则 spawn subagent（runner.ts），流式 onUpdate
-   - 存结果到内存 Map + 落盘 run 状态
-5. gate 阶段可返回 {continue:false} 中止
-6. final phase（或最后一个）输出 → tool content 回主 session
-7. 全程 details 累积完整轨迹供 TUI
-```
-### 3.6 对外接口
-**(a) LLM 工具：`taskflow`**
-```jsonc
-// 内联定义直接跑（LLM 动态生成工作流 —— 对标 CC "Claude 写脚本"）
-{ "define": { /* 完整 DSL */ }, "args": { "dir": "src/api" } }
-// 跑已保存的工作流
-{ "run": "audit-endpoints", "args": { "dir": "src/api" } }
-// 保存定义为可复用命令
-{ "save": "audit-endpoints", "define": { /* DSL */ } }
-// 从中断处恢复
-{ "resume": "<runId>" }
-```
-**(b) 用户命令**
-| 命令 | 作用 |
-|------|------|
-| `/tf list` | 列出已保存工作流 + 最近 run |
-| `/tf run <name> [args]` | 运行 |
-| `/tf:<name> [args]` | 每个保存的工作流自动注册的快捷命令 |
-| `/tf resume <runId>` | 恢复中断的 run |
-| `/tf show <name>` | 查看定义 |
-| `/tf runs` | 全屏 run 历史/状态视图（ctx.ui.custom） |
-**(c) 编程接口（供其他 extension）**
-```ts
-export async function runTaskflow(def, args, ctx): Promise<TaskflowResult>
-```
-### 3.7 存储布局
-```
-.pi/taskflows/                       # 项目级定义（可入库共享）
-  audit-endpoints.json
-~/.pi/agent/taskflows/               # 用户级定义
-  deep-research.json
-.pi/taskflows/runs/                  # run 状态（恢复用，gitignore）
-  <runId>.json                       # {def, args, phases:{id:{status,output,usage,hash}}}
-```
----
-## 4. 与现有 subagent 的关系
-- **不替代，是上层编排**。subagent = 即时调用；taskflow = 可定义/保存/恢复的编排。
-- 复用其 spawn / 并发 / usage / TUI 代码（移植进 `runner.ts`，避免硬依赖一个非 npm 的本地扩展）。
-- 共享 agent 体系（`~/.pi/agent/agents/*.md` + settings `subagents.agentOverrides`）。
----
-## 5. 路线图
-| 版本 | 范围 | 状态 |
-|------|------|------|
-| **v0.1** | DSL + schema + runtime（agent/parallel/map/reduce）+ `taskflow` 工具 + `/tf run` + 内存隔离 + 流式进度 | ✅ 已发布 (npm 0.0.1) |
-| **v0.2** | 保存/动态命令注册 + 跨 session 恢复 + `gate` 真门控 + run 历史交互 TUI | ✅ 已完成 (npm 0.0.3) |
-| **v0.3** | examples + SKILL.md（教 LLM 写定义）+ YAML 支持 + 发布 npm | 🚧 examples/SKILL/npm 已做；YAML 待办 |
-| **v0.6** | 控制流 & 可靠性：`when` 条件分支 + `join:any` OR-join + 声明式 `retry` + `approval` 人在环 + `flow` 子流程组合 + `budget` 成本上限 | ✅ 已完成 |
-| **v0.7+** | 真·后台执行（detached + 轮询）+ 事件/cron 触发 + 成本**预估** + mermaid DAG 导出 + 内置 `deep-research` 工作流 | ⏳ 待办 |
----
-## 6. 风险与缓解
-| 风险 | 缓解 |
-|------|------|
-| 真后台执行 v1 缺失 | 流式长调用 + 明确文档；v4 补 |
-| map 依赖上游输出结构化 JSON | `output:"json"` + 容错解析 + schema 提示 agent |
-| spawn pi 路径解析（bun/node/standalone） | 移植 subagent 的 `getPiInvocation()`（已处理三种运行时） |
-| 并发过高耗 token/限流 | concurrency 上限 + 成本预估（v4） |
-| 运行时命令注册兼容性 | session_start 扫描注册兜底；保存即注册为增强 |
-| DSL 过度复杂 | 保持声明式、5 种 phase 封顶；JS 逃生舱不做（保持"轻量"） |
----
-## 7. 下一步
-1. 创建 `package.json` + `tsconfig.json` + 骨架目录
-2. 实现 `schema.ts`（DSL 校验）+ `interpolate.ts`
-3. 移植 `runner.ts` / `agents.ts`（自 subagent）
-4. 实现 `runtime.ts`（DAG 调度 + map fan-out）
-5. `index.ts` 接线 tool + `/tf` 命令
-6. 本地 `pi -e ./extensions/index.ts` 联调
-7. examples + SKILL.md + README
-8. 发布 `npm publish` → `pi install npm:pi-taskflow`
-</content>
-</invoke>