npm - pi-taskflow - Versions diffs - 0.0.15 → 0.0.17 - Mend

pi-taskflow 0.0.15 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/CHANGELOG.md +135 -0
package/README.md +16 -15
package/README.zh-CN.md +635 -0
package/extensions/agents.ts +79 -41
package/extensions/cache.ts +5 -1
package/extensions/index.ts +117 -34
package/extensions/init.ts +80 -1
package/extensions/interpolate.ts +32 -5
package/extensions/render.ts +2 -2
package/extensions/runner.ts +38 -2
package/extensions/runs-view.ts +2 -2
package/extensions/runtime.ts +56 -9
package/extensions/schema.ts +1 -1
package/extensions/store.ts +61 -13
package/extensions/verify.ts +11 -0
package/package.json +6 -3
package/skills/taskflow/SKILL.md +1 -1
package/skills/taskflow/configuration.md +10 -11
package/DESIGN.md +0 -338

package/extensions/interpolate.ts CHANGED Viewed

@@ -66,7 +66,13 @@ function resolvePath(path: string, ctx: InterpolationContext): unknown {
 		const step = stepId ? ctx.steps[stepId] : undefined;
 		if (!step) return undefined;
 		const field = parts[2];
-		if (field === "output") return step.output;
+		if (field === "output") {
+			// Guard: {steps.X.output.trailing} — trailing segments after output are
+			// likely author errors (output is a string, not an object). Return
+			// undefined so the placeholder is left intact with a missing warning.
+			if (parts.length > 3) return undefined;
+			return step.output;
+		}
 		if (field === "json") {
 			const json = step.json ?? safeParse(step.output);
 			return dig(json, parts.slice(3));
@@ -82,6 +88,12 @@ function resolvePath(path: string, ctx: InterpolationContext): unknown {
 	return undefined;
 }
+/**
+ * Traverse an object by a sequence of property keys. Returns `undefined`
+ * when any segment is missing or the current value is not an object —
+ * never throws, so extra path segments like {steps.X.json.a.b} where the
+ * data is shallower resolve gracefully to undefined (M-8).
+ */
 function dig(obj: unknown, parts: string[]): unknown {
 	let cur: unknown = obj;
 	for (const part of parts) {
@@ -219,10 +231,25 @@ function tokenize(input: string): Tok[] {
 		}
 		// quoted string
 		if (c === '"' || c === "'") {
-			const end = input.indexOf(c, i + 1);
-			if (end === -1) throw new Error("unterminated string");
-			toks.push({ t: "str", v: input.slice(i + 1, end) });
-			i = end + 1;
+			// Handle escaped quotes. Note: ALL \X sequences are interpreted as literal X
+			// (including \n → n, \t → t). This differs from JSON/JS escaping but is
+			// correct for condition strings which only need quote escaping.
+			let j = i + 1;
+			let val = "";
+			while (j < n) {
+				if (input[j] === "\\" && j + 1 < n) {
+					val += input[j + 1];
+					j += 2;
+				} else if (input[j] === c) {
+					break;
+				} else {
+					val += input[j];
+					j++;
+				}
+			}
+			if (j >= n) throw new Error("unterminated string");
+			toks.push({ t: "str", v: val });
+			i = j + 1;
 			continue;
 		}
 		// multi/single char operators

package/extensions/render.ts CHANGED Viewed

@@ -104,7 +104,7 @@ export function summarizeRun(state: RunState): string {
 	const done = phases.filter((p) => p.status === "done").length;
 	const failed = phases.filter((p) => p.status === "failed").length;
 	const running = phases.filter((p) => p.status === "running").length;
-	const total = state.def.phases.length;
+	const total = Object.keys(state.phases).length;
 	const bits = [`${done}/${total} done`];
 	if (running) bits.push(`${running} running`);
 	if (failed) bits.push(`${failed} failed`);
@@ -254,7 +254,7 @@ function headerLine(state: RunState, theme: Theme): string {
 	const done = phases.filter((p) => p.status === "done").length;
 	const failed = phases.filter((p) => p.status === "failed").length;
 	const running = phases.filter((p) => p.status === "running").length;
-	const total = state.def.phases.length;
+	const total = Object.keys(state.phases).length;
 	const head =
 		state.status === "completed"

package/extensions/runner.ts CHANGED Viewed

@@ -25,6 +25,8 @@ export interface RunResult {
 	errorMessage?: string;
 	/** Total subagent attempts incl. retries (set by the runtime's retry wrapper). */
 	attempts?: number;
+	/** Set when the subagent was killed by the idle watchdog (not a user abort). */
+	idleTimeout?: boolean;
 }
 export interface LiveUpdate {
@@ -74,6 +76,8 @@ const TRANSIENT_ERROR_RE =
 	/rate[_\s-]?limit|too\s+many\s+requests|overloaded|\b429\b|\b503\b|\b502\b|\b504\b|service\s+unavailable|temporarily\s+unavailable|timeout|timed?\s+out|econnreset|etimedout|socket\s+hang\s*up/i;
 export function isTransientError(r: RunResult): boolean {
 	if (r.stopReason === "aborted") return false;
+	// Idle timeout is a deterministic stall — retrying won't help.
+	if (r.stopReason === "error" && r.idleTimeout) return false;
 	const hay = `${r.errorMessage ?? ""} ${r.stderr ?? ""} ${r.output ?? ""}`;
 	return TRANSIENT_ERROR_RE.test(hay);
 }
@@ -153,6 +157,8 @@ export interface EventAccumulator {
 	stopReason?: string;
 	errorMessage?: string;
 	lastActivity: string;
+	/** Set when message cap was hit — output gets a truncation notice. */
+	truncated?: boolean;
 }
 export function newAccumulator(model?: string): EventAccumulator {
@@ -175,7 +181,15 @@ export function foldEventLine(acc: EventAccumulator, line: string): LiveUpdate |
 	}
 	if (event.type !== "message_end" || !event.message) return null;
 	const msg = event.message as Message;
-	acc.messages.push(msg);
+	// Cap prevents OOM from misconfigured loops. 500 messages is generous for
+	// normal subagent tasks (50 turns × 10 messages each). Messages beyond the
+	// cap are still parsed for usage/model/stopReason extraction.
+	const MAX_MESSAGES = 500;
+	if (acc.messages.length < MAX_MESSAGES) {
+		acc.messages.push(msg);
+	} else {
+		acc.truncated = true;
+	}
 	if (msg.role !== "assistant") return null;
 	acc.usage.turns++;
 	const u = (msg as any).usage;
@@ -323,6 +337,7 @@ export async function runAgentTask(
 		let wasAborted = false;
 		let idleTimedOut = false;
+		let killedBySignal: string | undefined;
 		const exitCode = await new Promise<number>((resolve) => {
 			const invocation = getPiInvocation(args);
 			const proc = spawn(invocation.command, invocation.args, {
@@ -371,12 +386,19 @@ export async function runAgentTask(
 				buffer = lines.pop() || "";
 				for (const line of lines) processLine(line);
 			});
+			// Cap prevents OOM from verbose tool output (e.g., npm install). 64 KB is
+			// generous for error diagnosis while preventing memory exhaustion.
+			const STDERR_MAX_LEN = 64 * 1024;
 			proc.stderr.on("data", (data) => {
 				result.stderr += data.toString();
+				if (result.stderr.length >= STDERR_MAX_LEN) {
+					result.stderr = result.stderr.slice(0, STDERR_MAX_LEN) + "\n[...stderr truncated at 64KB]";
+				}
 			});
-			proc.on("close", (code) => {
+			proc.on("close", (code, signal) => {
 				clearTimers();
 				if (buffer.trim()) processLine(buffer);
+				if (code === null && signal) killedBySignal = signal;
 				resolve(code ?? 0);
 			});
 			proc.on("error", (err) => {
@@ -411,11 +433,25 @@ export async function runAgentTask(
 		result.stopReason = acc.stopReason;
 		result.errorMessage = acc.errorMessage;
 		result.output = getFinalOutput(acc.messages);
+		// M-6: surface truncation when the message cap was hit so downstream
+		// phases and the user know output was cut short.
+		if (acc.truncated) {
+			result.output += "\n\n[...output truncated after 500 messages]";
+		}
+		// Signal kill detection: process exited 0 but was killed by a signal
+		// (e.g. OOM killer, cgroup limit). Treat as failure so the runtime's
+		// retry/fail handling doesn't silently accept a truncated result.
+		if (exitCode === 0 && killedBySignal && !idleTimedOut && !wasAborted) {
+			result.exitCode = 1;
+			result.stopReason = "error";
+			result.errorMessage = `Subagent killed by signal ${killedBySignal}`;
+		}
 		if (idleTimedOut) {
 			// Distinct, actionable signal: the child was killed for being idle, not
 			// a user abort. stopReason "error" keeps it in the failed bucket so the
 			// runtime's retry/fail handling treats it as a real failure.
 			result.stopReason = "error";
+			result.idleTimeout = true;
 			result.errorMessage = `Subagent stalled: no output for ${Math.round((opts.idleTimeoutMs ?? DEFAULT_IDLE_TIMEOUT_MS) / 1000)}s (idle timeout) — killed`;
 		} else if (wasAborted) {
 			result.stopReason = "aborted";

package/extensions/runs-view.ts CHANGED Viewed

@@ -29,7 +29,7 @@ function statusBadge(status: RunState["status"], theme: Theme): string {
 }
 function timeAgo(ts: number): string {
-	const s = Math.floor((Date.now() - ts) / 1000);
+	const s = Math.max(0, Math.floor((Date.now() - ts) / 1000));
 	if (s < 60) return `${s}s ago`;
 	if (s < 3600) return `${Math.floor(s / 60)}m ago`;
 	if (s < 86400) return `${Math.floor(s / 3600)}h ago`;
@@ -37,7 +37,7 @@ function timeAgo(ts: number): string {
 }
 function isResumable(r: RunState): boolean {
-	return r.status === "paused" || r.status === "failed" || r.status === "blocked";
+	return r.status === "paused" || r.status === "failed";
 }
 export class RunHistoryComponent {

package/extensions/runtime.ts CHANGED Viewed

@@ -70,8 +70,17 @@ function buildInterpolationContext(
 ): InterpolationContext {
 	const steps: Record<string, { output: string; json?: unknown }> = {};
 	for (const [id, ps] of Object.entries(state.phases)) {
-		if (ps.status === "done" && ps.output !== undefined) {
-			steps[id] = { output: ps.output, json: ps.json };
+		// Include both done AND failed phases so downstream phases can see
+		// error info. Skipped phases (upstream failure cascade) are excluded.
+		if (ps.status === "done" || ps.status === "failed") {
+			if (ps.output !== undefined) {
+				steps[id] = { output: ps.output, json: ps.json };
+			} else if (ps.status === "failed") {
+				// M-3: Failed phases without output get a placeholder so
+				// downstream references like {steps.X.output} resolve to a
+				// sensible value instead of leaving the raw placeholder intact.
+				steps[id] = { output: "[previous phase failed]", json: undefined };
+			}
 		}
 	}
 	return { args: state.args, steps, previousOutput, locals };
@@ -80,10 +89,16 @@ function buildInterpolationContext(
 function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJson: boolean): PhaseState {
 	const failed = isFailed(r);
 	const attempts = attemptsOf(r);
+	// For failed phases, embed the error info in the output so downstream
+	// phases (and the user) can see what went wrong. The raw r.output is
+	// often a useless placeholder like "(upstream error: subagent failed)".
+	const output = failed
+		? r.errorMessage || r.stderr || r.output
+		: r.output;
 	return {
 		id,
 		status: failed ? "failed" : "done",
-		output: r.output,
+		output,
 		json: parseJson && !failed ? safeParse(r.output) : undefined,
 		usage: r.usage,
 		model: r.model,
@@ -156,8 +171,13 @@ function mergePhaseState(
 	// which model produced the merged output.
 	const model = ran.find((r) => r.model !== undefined)?.model;
 	// Combine outputs as a labelled list; also expose a JSON array of outputs.
+	// For failed items, use the error message instead of the useless placeholder.
 	const combinedText = ran
-		.map((r, i) => `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}\n\n${r.output}`)
+		.map((r, i) => {
+			const label = `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}`;
+			const content = isFailed(r) ? (r.errorMessage || r.stderr || r.output) : r.output;
+			return `${label}\n\n${content}`;
+		})
 		.join("\n\n---\n\n");
 	// Only successful runs feed the parsed JSON array (no error/skip strings).
 	const jsonArray = parseJson ? ran.filter((r) => !isFailed(r)).map((r) => safeParse(r.output) ?? r.output) : undefined;
@@ -373,7 +393,14 @@ async function executePhase(
 			// Backoff: prefer the explicit policy's curve when the phase defines one
 			// (covers transient retries too, and keeps tests fast with backoffMs:0),
 			// otherwise use the transient defaults.
-			const baseMs = retry ? (retry.backoffMs ?? 0) : DEFAULT_TRANSIENT_BACKOFF_MS;
+			const baseMs = retry?.backoffMs != null ? retry.backoffMs : DEFAULT_TRANSIENT_BACKOFF_MS;
+			// Factor asymmetry is intentional:
+			// - Explicit retry: backoffMs * (factor ?? 1) ^ attempt — user's
+			//   curve, defaults to flat (factor=1 → constant backoff).
+			// - Transient fallback: backoffMs * 2 ^ attempt — exponential.
+			// This lets users opt into flat retry with retry: {max:3} without
+			// specifying factor, while transient errors get proper exponential
+			// backoff.
 			const factor = retry ? (retry.factor ?? 1) : DEFAULT_TRANSIENT_FACTOR;
 			const wait = Math.min(60000, Math.round(baseMs * factor ** attempt));
 			if (wait > 0) await delay(wait, deps.signal);
@@ -742,7 +769,7 @@ async function executePhase(
 		for (let i = 1; i <= maxIters; i++) {
 			if (deps.signal?.aborted) {
-				stop = "failed";
+				stop = "aborted";
 				break;
 			}
 			iterations = i;
@@ -788,14 +815,14 @@ async function executePhase(
 		}
 		const aggUsage = usages.length ? aggregateUsage(usages) : emptyUsage();
-		if (failedResult) {
+		if (failedResult || stop === "failed" || stop === "aborted") {
 			return {
 				id: phase.id,
 				status: "failed",
 				output: lastOutput || undefined,
 				usage: aggUsage,
-				error: failedResult.errorMessage || failedResult.stderr || `loop '${phase.id}' iteration ${iterations} failed`,
-				loop: { iterations, stop: "failed" },
+				error: failedResult?.errorMessage || failedResult?.stderr || (stop === "aborted" ? "Aborted" : `loop '${phase.id}' iteration ${iterations} failed`),
+				loop: { iterations, stop },
 				warnings: loopWarnings.length ? loopWarnings : undefined,
 				inputHash: hashInput(phase.id, "loop", phase.until ?? ""),
 				endedAt: Date.now(),
@@ -868,6 +895,22 @@ async function executePhase(
 			};
 		}
+		// Guard: skip the judge if the run is over budget or aborted.
+		if (deps.signal?.aborted || overBudget(state).over) {
+			return {
+				id: phase.id,
+				status: "done",
+				output: ok[0].output,
+				json: parseJson ? safeParse(ok[0].output) : undefined,
+				usage: variantUsage,
+				model: ok[0].model,
+				warnings: ["judge skipped: run aborted or budget exceeded"],
+				tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge skipped" },
+				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
+				endedAt: Date.now(),
+			};
+		}
 		// Build the judge prompt: label every variant output, then the rubric.
 		const labelled = ran
 			.map((r, i) => `### Variant ${i + 1}${isFailed(r) ? " (failed — ineligible)" : ""}\n\n${r.output}`)
@@ -1288,6 +1331,10 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 				if (!budgetReason) budgetReason = "fan-out truncated by budget";
 			}
 			// Budget ceiling: once exceeded, remaining phases are skipped.
+			// For concurrent same-layer phases, the check runs after each phase
+			// completes, so at most (concurrency - 1) extra phases may run before
+			// the budget is detected as exceeded. This bounded overshoot is
+			// acceptable: budgetBlocked prevents cascading into subsequent layers.
 			const ob = overBudget(state);
 			if (ob.over && !budgetBlocked) {
 				budgetBlocked = true;

package/extensions/schema.ts CHANGED Viewed

@@ -235,7 +235,7 @@ const ArgSpecSchema = Type.Object(
 export const TaskflowSchema = Type.Object(
 	{
-		name: Type.String({ description: "Workflow name (becomes /tf:<name> command when saved)" }),
+		name: Type.String({ minLength: 1, description: "Workflow name (becomes /tf:<name> command when saved)" }),
 		description: Type.Optional(Type.String()),
 		version: Type.Optional(Type.Number({ default: 1 })),
 		args: Type.Optional(Type.Record(Type.String(), ArgSpecSchema, { description: "Declared invocation arguments" })),

package/extensions/store.ts CHANGED Viewed

@@ -59,7 +59,7 @@ export interface PhaseState {
 	/** Human-in-the-loop outcome (approval phases only). */
 	approval?: { decision: "approve" | "reject" | "edit"; note?: string; auto?: boolean };
 	/** Loop iteration accounting (loop phases only). */
-	loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" };
+	loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" | "aborted" };
 	/** Tournament outcome (tournament phases only). */
 	tournament?: { variants: number; winner: number; mode: "best" | "aggregate"; reason?: string };
 	/** Non-fatal diagnostic warnings accumulated during this phase (e.g.
@@ -121,9 +121,16 @@ const DEFAULT_MAX_KEPT_TERMINAL = 100;
 /** Remove terminal runs older than this (days). */
 const DEFAULT_MAX_AGE_DAYS = 30;
+// Re-exported for use in TaskflowSettings defaults (agents.ts).
+export const DEFAULT_KEPT_RUNS = DEFAULT_MAX_KEPT_TERMINAL;
+export const DEFAULT_RUN_AGE_DAYS = DEFAULT_MAX_AGE_DAYS;
 /** Last cleanup timestamp — module-level so it persists across calls. */
 let lastCleanupAt = 0;
+/** Shared buffer for Atomics.wait in acquireLock busy-wait (Finding 6). */
+const LOCK_WAIT_BUF = new Int32Array(new SharedArrayBuffer(4));
 // ---------------------------------------------------------------------------
 // Internal helpers — path construction & sanitisation
 // ---------------------------------------------------------------------------
@@ -138,7 +145,7 @@ let lastCleanupAt = 0;
  * bare-dot / leading-dot components after the character substitution so the
  * write path can never escape runs/ (risk-reviewer v0.0.9 audit, H1).
  */
-function safeFlowDirName(flowName: string): string {
+export function safeFlowDirName(flowName: string): string {
 	let safe = flowName.replace(/[^\w.-]+/g, "_");
 	// Collapse leading dots: blocks ".", "..", and hidden-dir names like ".git".
 	safe = safe.replace(/^\.+/, "_");
@@ -241,7 +248,7 @@ function acquireLock(lockPath: string, timeoutMs: number = LOCK_TIMEOUT_MS): voi
 				throw new Error(`Lock timeout after ${timeoutMs}ms waiting for ${path.basename(lockPath)}`);
 			}
 			// Busy-wait with Atomics.wait (CPU-efficient sleep).
-			Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, LOCK_POLL_MS);
+			Atomics.wait(LOCK_WAIT_BUF, 0, 0, LOCK_POLL_MS);
 		}
 	}
 }
@@ -388,11 +395,18 @@ function rebuildIndex(runsRoot: string): RunIndexEntry[] {
 		} catch { /* skip corrupt */ }
 	}
-	const result = Array.from(entries.values());
-	// Persist the rebuilt index under the index lock so it does not race a
-	// concurrent updateIndexEntry / cleanup write (M1).
-	withLock(indexLockPath(runsRoot), () => writeIndex(runsRoot, result));
-	return result;
+	const scanned = Array.from(entries.values());
+	// Persist the rebuilt index under the index lock. Re-read the current
+	// index inside the lock and merge by runId so concurrent writes are not
+	// clobbered — scanned entries win on conflict (Finding 5).
+	withLock(indexLockPath(runsRoot), () => {
+		const currentIndex = readIndex(runsRoot);
+		const merged = new Map<string, RunIndexEntry>();
+		for (const e of currentIndex) merged.set(e.runId, e);
+		for (const e of scanned) merged.set(e.runId, e); // scanned wins
+		writeIndex(runsRoot, Array.from(merged.values()));
+	});
+	return scanned;
 }
 // ---------------------------------------------------------------------------
@@ -418,7 +432,8 @@ function cleanupTerminalRuns(
 	maxKeep: number = DEFAULT_MAX_KEPT_TERMINAL,
 	maxAgeDays: number = DEFAULT_MAX_AGE_DAYS,
 ): void {
-	const now = Date.now();
+	const cleanupStarted = Date.now();
+	const now = cleanupStarted;
 	if (now - lastCleanupAt < CLEANUP_INTERVAL_MS) return;
 	lastCleanupAt = now;
@@ -460,9 +475,17 @@ function cleanupTerminalRuns(
 	if (toRemove.length === 0) return;
+	console.warn(
+		`[taskflow] Cleaning up ${toRemove.length} old run(s) ` +
+		`(max ${maxKeep} runs, ${maxAgeDays} day age limit). ` +
+		`Configure 'taskflow.maxKeptRuns' / 'taskflow.maxRunAgeDays' in settings.json (0 = keep all).`,
+	);
 	// Delete run files + lock files (outside the index lock).
 	for (const e of toRemove) {
 		const filePath = path.join(runsRoot, e.relPath);
+		// Race guard: skip files modified after cleanup started (Finding 2).
+		try { if (fs.statSync(filePath).mtimeMs > cleanupStarted) continue; } catch { continue; }
 		try { fs.unlinkSync(filePath); } catch { /* already gone */ }
 		// Also remove any orphaned lock file.
 		try { fs.unlinkSync(filePath + ".lock"); } catch { /* ignore */ }
@@ -548,22 +571,40 @@ export function getFlow(cwd: string, name: string): SavedFlow | null {
 	return listFlows(cwd).find((f) => f.name === name) ?? null;
 }
+let _piCreationHinted = false;
 export function saveFlow(
 	cwd: string,
 	def: Taskflow,
 	scope: "user" | "project" = "project",
 ): { filePath: string } {
 	const dir = scope === "user" ? userFlowsDir() : (findProjectFlowsDir(cwd, true) ?? path.join(cwd, ".pi", "taskflows"));
+	if (!def.name || def.name.trim().length === 0) throw new Error("Flow name must not be empty");
 	fs.mkdirSync(dir, { recursive: true });
-	const safe = def.name.replace(/[^\w.-]+/g, "_");
+	const safe = safeFlowDirName(def.name);
 	const filePath = path.join(dir, `${safe}.json`);
-	writeFileAtomic(filePath, `${JSON.stringify(def, null, 2)}\n`);
+	const fileLockPath = filePath + ".lock";
+	withLock(fileLockPath, () => { writeFileAtomic(filePath, `${JSON.stringify(def, null, 2)}\n`); });
+	// One-shot: let the user know about .pi/ directory on first save (Finding 8).
+	if (!_piCreationHinted) {
+		_piCreationHinted = true;
+		const piExisted = fs.existsSync(path.join(dir, "..", ".."));
+		console.warn(
+			`[taskflow] ${piExisted ? "Using" : "Created"} .pi/taskflows/ for project-scoped flow storage. ` +
+			`Add .pi/ to .gitignore if desired.`,
+		);
+	}
 	return { filePath };
 }
 // --- Run state ---
 function runsDir(cwd: string): string {
+	// Safe non-null assertion: create=true guarantees a non-null return because
+	// findProjectFlowsDirInternal falls back to path.join(cwd, ".pi", "taskflows").
 	const projDir = findProjectFlowsDir(cwd, true)!;
 	return path.join(projDir, "runs");
 }
@@ -590,7 +631,10 @@ export function newRunId(flowName: string): string {
  * F-009: shallow-clones state before stamping updatedAt to avoid mutating the
  * caller's reference.
  */
-export function saveRun(state: RunState): void {
+export function saveRun(state: RunState, cleanup?: { maxKeep?: number; maxAgeDays?: number }): void {
+	// Reject unsafe runIds before any filesystem access (Finding 1).
+	if (!validateRunId(state.runId)) return;
 	const root = runsDir(state.cwd);
 	const flowDir = flowRunDir(root, state.flowName);
 	fs.mkdirSync(flowDir, { recursive: true });
@@ -608,7 +652,11 @@ export function saveRun(state: RunState): void {
 	});
 	// Opportunistic cleanup — throttled to once per CLEANUP_INTERVAL_MS.
-	cleanupTerminalRuns(root);
+	const maxKeep = cleanup?.maxKeep ?? DEFAULT_MAX_KEPT_TERMINAL;
+	const maxAgeDays = cleanup?.maxAgeDays ?? DEFAULT_MAX_AGE_DAYS;
+	if (maxKeep > 0 || maxAgeDays > 0) {
+		cleanupTerminalRuns(root, maxKeep, maxAgeDays);
+	}
 }
 /**

package/extensions/verify.ts CHANGED Viewed

@@ -253,6 +253,7 @@ function detectBudgetOverflow(flow: VerifiableFlow): VerificationIssue[] {
 		}
 	}
+	const ESTIMATED_COST_PER_PHASE = 0.001; // $0.001 minimum per subagent call
 	if (budget.maxTokens !== undefined && budget.maxTokens > 0 && minTokens > budget.maxTokens) {
 		issues.push({
 			message:
@@ -263,6 +264,16 @@ function detectBudgetOverflow(flow: VerifiableFlow): VerificationIssue[] {
 			category: "budget-overflow",
 		});
 	}
+	if (budget.maxUSD !== undefined && budget.maxUSD > 0 && minTokens * ESTIMATED_COST_PER_PHASE > budget.maxUSD) {
+		issues.push({
+			message:
+				`Budget cap ($${budget.maxUSD}) is below the estimated minimum of ~$${(minTokens * ESTIMATED_COST_PER_PHASE).toFixed(3)} ` +
+				`for ${flow.phases.length} phase(s). The flow will likely be truncated before completion. ` +
+				`Increase maxUSD or reduce the number of phases.`,
+			severity: "warning",
+			category: "budget-overflow",
+		});
+	}
 	return issues;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-taskflow",
-  "version": "0.0.15",
+  "version": "0.0.17",
   "description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
   "keywords": [
     "pi-package",
@@ -31,12 +31,14 @@
     "skills",
     "examples",
     "README.md",
+    "README.zh-CN.md",
+    "CHANGELOG.md",
     "DESIGN.md",
     "LICENSE"
   ],
   "scripts": {
     "typecheck": "tsc --noEmit",
-    "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts",
+    "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts",
     "test:e2e": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e.mts",
     "test:dogfood-cache": "node --experimental-strip-types test/dogfood-cache.mts"
   },
@@ -46,7 +48,8 @@
     ],
     "skills": [
       "./skills"
-    ]
+    ],
+    "image": "https://raw.githubusercontent.com/heggria/pi-taskflow/main/assets/social-preview.png"
   },
   "peerDependencies": {
     "@earendil-works/pi-agent-core": "*",

package/skills/taskflow/SKILL.md CHANGED Viewed

@@ -310,7 +310,7 @@ Quick reference:
 - **Flow:** `name`, `description`, `concurrency` (default 8), `budget` (`maxUSD`/`maxTokens`), `agentScope` (user|project|both), `args`, `strictInterpolation`.
 - **Phase:** `model`, `thinking`, `tools` (whitelist), `cwd`, `output:"json"`, `concurrency` (map/parallel fan-out), `when`, `join` (all|any), `retry`, `use`/`with` (flow), `final`.
-- **Precedence (model/thinking/tools):** phase value → `settings.subagents.agentOverrides[agent]` → agent frontmatter → global/default.
+- **Precedence (model/thinking/tools):** phase value → agent frontmatter (resolved via `modelRoles`) → global/default.
 - **Concurrency:** same-layer phases use `flow.concurrency`; a `map`/`parallel` phase uses `phase.concurrency ?? flow.concurrency ?? 8`.
 ## Actions

package/skills/taskflow/configuration.md CHANGED Viewed

@@ -11,7 +11,7 @@ Configuration lives in **five layers**, from most local to most global:
 | Phase | a phase object in the DSL | per-step model/thinking/tools/cwd/output/concurrency |
 | Flow | the top-level DSL object | name, args, default concurrency, agent scope |
 | Agent | `~/.pi/agent/agents/*.md`, `.pi/agents/*.md` frontmatter | per-agent default model/thinking/tools + system prompt |
-| Settings | `~/.pi/agent/settings.json` | `subagents.agentOverrides`, global thinking |
+| Settings | `~/.pi/agent/settings.json` | `modelRoles`, global thinking |
 | Environment | shell env | `PI_TASKFLOW_PI_BIN` |
 ---
@@ -156,9 +156,9 @@ For any phase, the effective value is resolved in this **precedence order**
 | Setting | Precedence (high → low) |
 |---------|-------------------------|
-| **model** | `phase.model` → `settings.agentOverrides[agent].model` → agent frontmatter `model` → pi default |
-| **thinking** | `phase.thinking` → `settings.agentOverrides[agent].thinking` → agent frontmatter `thinking` → `settings` global thinking → pi default |
-| **tools** | `phase.tools` → `settings.agentOverrides[agent].tools` → agent frontmatter `tools` → all tools |
+| **model** | `phase.model` → agent frontmatter `model` (resolved via `modelRoles`) → pi default |
+| **thinking** | `phase.thinking` → agent frontmatter `thinking` → `settings` global thinking → pi default |
+| **tools** | `phase.tools` → agent frontmatter `tools` → all tools |
 Notes:
 - `tools` is a **whitelist** passed as `--tools a,b,c`. Omit it to allow all.
@@ -192,19 +192,18 @@ Taskflow shares the subagent settings file at `~/.pi/agent/settings.json`:
 ```jsonc
 {
+  "modelRoles": {
+    "fast": "openrouter/deepseek/deepseek-v4-flash",
+    "strong": "openrouter/xiaomi/mimo-v2.5-pro"
+  },
   "subagents": {
-    "globalThinking": "medium",          // fallback thinking for all subagents
-    "agentOverrides": {
-      "analyst": { "model": "claude-sonnet-4-5", "thinking": "high" },
-      "scout":   { "tools": ["read", "bash", "grep"] }
-    }
+    "globalThinking": "medium"              // fallback thinking for all subagents
   },
   "defaultThinkingLevel": "low"          // used if subagents.globalThinking is absent
 }
 ```
-- `subagents.agentOverrides` — per-agent overrides applied at discovery; they beat
-  agent frontmatter but lose to a phase-level value (see §5).
+- `modelRoles` — maps `{{role}}` references in agent frontmatter to actual model identifiers.
 - `subagents.globalThinking` (or top-level `defaultThinkingLevel`) — global
   thinking fallback.