npm - pi-taskflow - Versions diffs - 0.0.19 → 0.0.21 - Mend

pi-taskflow 0.0.19 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/CHANGELOG.md +23 -0
package/README.md +8 -8
package/extensions/agents.ts +8 -1
package/extensions/approval-view.ts +264 -0
package/extensions/cache.ts +1 -0
package/extensions/detached-runner.ts +79 -0
package/extensions/index.ts +102 -11
package/extensions/interpolate.ts +1 -1
package/extensions/runner.ts +19 -4
package/extensions/runtime.ts +35 -17
package/extensions/schema.ts +82 -17
package/extensions/store.ts +40 -6
package/package.json +2 -2
package/skills/taskflow/SKILL.md +35 -9
package/skills/taskflow/configuration.md +3 -3

package/extensions/runner.ts CHANGED Viewed

@@ -13,6 +13,15 @@ import { withFileMutationQueue } from "@earendil-works/pi-coding-agent";
 import type { AgentConfig } from "./agents.ts";
 import { emptyUsage, type UsageStats } from "./usage.ts";
+const activeChildren = new Set<number>();
+const killAll = () => {
+	for (const pid of activeChildren) {
+		try { process.kill(pid, "SIGKILL"); } catch { /* already dead */ }
+	}
+};
+process.on("exit", killAll);
+process.on("SIGTERM", () => { killAll(); process.exit(143); });
 export interface RunResult {
 	agent: string;
 	task: string;
@@ -60,7 +69,7 @@ export interface RunOptions {
  * 5 minutes is generous enough for slow reasoning/long tool calls while still
  * bounding a true hang.
  */
-export const DEFAULT_IDLE_TIMEOUT_MS = 5 * 60_000;
+const DEFAULT_IDLE_TIMEOUT_MS = 5 * 60_000;
 export function isFailed(r: RunResult): boolean {
 	return r.exitCode !== 0 || r.stopReason === "error" || r.stopReason === "aborted";
@@ -345,6 +354,7 @@ export async function runAgentTask(
 				shell: false,
 				stdio: ["ignore", "pipe", "pipe"],
 			});
+			if (proc.pid) activeChildren.add(proc.pid);
 			let buffer = "";
 			// Idle watchdog: a subagent that goes silent on stdout for too long is
@@ -389,13 +399,18 @@ export async function runAgentTask(
 			// Cap prevents OOM from verbose tool output (e.g., npm install). 64 KB is
 			// generous for error diagnosis while preventing memory exhaustion.
 			const STDERR_MAX_LEN = 64 * 1024;
+			let stderrCapped = false;
 			proc.stderr.on("data", (data) => {
-				result.stderr += data.toString();
-				if (result.stderr.length >= STDERR_MAX_LEN) {
-					result.stderr = result.stderr.slice(0, STDERR_MAX_LEN) + "\n[...stderr truncated at 64KB]";
+				if (!stderrCapped) {
+					result.stderr += data.toString();
+					if (result.stderr.length >= STDERR_MAX_LEN) {
+						result.stderr = result.stderr.slice(0, STDERR_MAX_LEN) + "\n[...stderr truncated at 64KB]";
+						stderrCapped = true;
+					}
 				}
 			});
 			proc.on("close", (code, signal) => {
+				if (proc.pid) activeChildren.delete(proc.pid);
 				clearTimers();
 				if (buffer.trim()) processLine(buffer);
 				if (code === null && signal) killedBySignal = signal;

package/extensions/runtime.ts CHANGED Viewed

@@ -47,7 +47,7 @@ export interface RuntimeDeps {
 	onProgress?: (state: RunState) => void;
 	/** Injectable task runner (defaults to spawning a real subagent). Enables testing. */
 	runTask?: typeof runAgentTask;
-	/** Resolve an `approval` phase. Omit for non-interactive runs (auto-approve). */
+	/** Resolve an `approval` phase. Omit for non-interactive runs (auto-reject). */
 	requestApproval?: (req: ApprovalRequest) => Promise<ApprovalDecision>;
 	/** Resolve a saved taskflow by name for `flow` (sub-workflow) phases. */
 	loadFlow?: (name: string) => Taskflow | undefined;
@@ -392,6 +392,7 @@ async function executePhase(
 		runId: state.runId,
 		thinking: phase.thinking,
 		tools: phase.tools,
+		preRead,
 	};
 	const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
@@ -700,13 +701,16 @@ async function executePhase(
 		const cached = cachedPhase(cc, inputHash);
 		if (cached) return cached;
-		// Non-interactive (headless/CI/tests): auto-approve, fail-open, but record it.
+		// Non-interactive (headless/CI/detached): auto-REJECT, fail-open, but record it.
+		// Approval gates are safety boundaries — bypassing them silently in CI would
+		// let unreviewed work ship. Detached/CI runs must not bypass approval gates.
 		if (!deps.requestApproval) {
 			return {
 				id: phase.id,
 				status: "done",
-				output: "(auto-approved: no interactive approver available)",
-				approval: { decision: "approve", auto: true },
+				output: "(auto-rejected: no interactive approver available)",
+				approval: { decision: "reject", auto: true },
+				gate: { verdict: "block", reason: "(auto-rejected: no interactive approver available)" },
 				usage: emptyUsage(),
 				inputHash,
 				endedAt: Date.now(),
@@ -1025,6 +1029,7 @@ async function executePhase(
 		// Using indexOf on the stable `ran` array is reference-based and correct even
 		// when two variants produce byte-identical output.
 		const ranIdx = (r: RunResult) => ran.indexOf(r) + 1;
+		const budgetSkipCount = results.filter((r) => r.stopReason === "budget-skipped").length;
 		// All competitors failed → the tournament fails (nothing to judge).
 		if (ok.length === 0) {
@@ -1033,6 +1038,7 @@ async function executePhase(
 				status: "failed",
 				usage: variantUsage,
 				error: `tournament '${phase.id}': all ${competitors.length} variants failed`,
+				budgetTruncated: budgetSkipCount > 0 || undefined,
 				tournament: { variants: competitors.length, winner: 0, mode },
 				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
 				endedAt: Date.now(),
@@ -1047,6 +1053,7 @@ async function executePhase(
 				json: parseJson ? safeParse(ok[0].output) : undefined,
 				usage: variantUsage,
 				model: ok[0].model,
+				budgetTruncated: budgetSkipCount > 0 || undefined,
 				tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "only surviving variant" },
 				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
 				endedAt: Date.now(),
@@ -1062,6 +1069,7 @@ async function executePhase(
 				json: parseJson ? safeParse(ok[0].output) : undefined,
 				usage: variantUsage,
 				model: ok[0].model,
+				budgetTruncated: budgetSkipCount > 0 || undefined,
 				warnings: ["judge skipped: run aborted or budget exceeded"],
 				tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge skipped" },
 				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
@@ -1095,6 +1103,7 @@ async function executePhase(
 				json: parseJson ? safeParse(ok[0].output) : undefined,
 				usage: judgeUsage,
 				model: ok[0].model,
+				budgetTruncated: budgetSkipCount > 0 || undefined,
 				warnings: [`judge failed (${judgeRes.errorMessage ?? "error"}); used variant ${ranIdx(ok[0])}`],
 				tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge failed" },
 				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
@@ -1117,6 +1126,7 @@ async function executePhase(
 			json: parseJson ? safeParse(output) : undefined,
 			usage: judgeUsage,
 			model: mode === "aggregate" ? judgeRes.model : chosen.model,
+			budgetTruncated: budgetSkipCount > 0 || undefined,
 			warnings: winnerIneligible ? [`judge picked an ineligible variant; used variant ${winnerIdx}`] : undefined,
 			tournament: { variants: competitors.length, winner: winnerIdx, mode, reason },
 			inputHash: hashInput(phase.id, "tournament", String(competitors.length), mode),
@@ -1179,15 +1189,26 @@ interface PhaseCacheCtx {
 	 *  silently serve a stale cross-run hit). */
 	thinking?: string;
 	tools?: string[];
+	/** Resolved `context` pre-read content. Explicitly part of the cache identity
+	 *  so a context-file change always invalidates the phase — independent of
+	 *  whether a given branch happens to fold preRead into its task string
+	 *  (previously this was only incidentally true via `fullTask`). */
+	preRead?: string;
 }
 /** Fold the phase fingerprint into the base hash parts to form the final cache key. */
 function cacheKey(cc: PhaseCacheCtx, baseParts: string[]): string {
 	// Fold the full cache identity into the hash: flow name (prevents collisions
 	// across different flows that share a phase.id + task + model), the per-phase
-	// thinking/tools config (changing either changes the subagent's output), and
-	// the resolved world-state fingerprint.
-	const parts = [`flow:${cc.flowName}`, ...baseParts, `think:${cc.thinking ?? ""}`, `tools:${JSON.stringify(cc.tools ?? [])}`];
+	// thinking/tools config (changing either changes the subagent's output), the
+	// resolved context pre-read content, and the world-state fingerprint.
+	const parts = [
+		`flow:${cc.flowName}`,
+		...baseParts,
+		`think:${cc.thinking ?? ""}`,
+		`tools:${JSON.stringify(cc.tools ?? [])}`,
+		`ctx:${cc.preRead ?? ""}`,
+	];
 	return cc.fingerprint ? hashInput(...parts, cc.fingerprint) : hashInput(...parts);
 }
@@ -1398,12 +1419,10 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 	let gateBlocked = false;
 	let gateReason = "";
 	let gateOutput = "";
-	// `budgetBlocked` gates the skipping of remaining phases once the cap is hit.
-	// `budgetSkipped` records that a phase was *actually* skipped/truncated for
-	// budget — only then is the run terminal-status "blocked" (a cap crossed by the
-	// very last phase, with nothing left to skip, must NOT mark a good run failed).
+	// `budgetBlocked` gates the skipping of remaining phases once the cap is hit
+	// and also drives the terminal "blocked" status — a maxUSD ceiling must never
+	// silently do nothing.
 	let budgetBlocked = false;
-	let budgetSkipped = false;
 	let budgetReason = "";
 	const byId = new Map(def.phases.map((p) => [p.id, p]));
@@ -1442,7 +1461,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 			}
 			if (skipReason) {
-				if (skipReason.startsWith("Budget exceeded")) budgetSkipped = true;
+				if (skipReason.startsWith("Budget exceeded")) budgetBlocked = true;
 				state.phases[phase.id] = {
 					id: phase.id,
 					status: "skipped",
@@ -1485,7 +1504,6 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 			// A fan-out cut short by the cap is itself a budget skip.
 			if (ps.budgetTruncated) {
 				budgetBlocked = true;
-				budgetSkipped = true;
 				if (!budgetReason) budgetReason = "fan-out truncated by budget";
 			}
 			// Budget ceiling: once exceeded, remaining phases are skipped.
@@ -1494,7 +1512,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 			// the budget is detected as exceeded. This bounded overshoot is
 			// acceptable: budgetBlocked prevents cascading into subsequent layers.
 			const ob = overBudget(state);
-			if (ob.over && !budgetBlocked) {
+			if (ob.over) {
 				budgetBlocked = true;
 				budgetReason = ob.reason;
 			}
@@ -1517,7 +1535,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 	state.status = aborted
 		? "paused"
-		: gateBlocked || budgetSkipped
+		: gateBlocked || budgetBlocked
 			? "blocked"
 			: anyFailed
 				? "failed"
@@ -1527,7 +1545,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 	let finalOutput = finalState?.output ?? "(no output)";
 	if (gateBlocked) {
 		finalOutput = `Gate blocked the workflow.${gateReason ? `\nReason: ${gateReason}` : ""}${gateOutput ? `\n\n${gateOutput}` : ""}`;
-	} else if (budgetSkipped) {
+	} else if (budgetBlocked) {
 		finalOutput = `Budget exceeded — run halted.${budgetReason ? `\nReason: ${budgetReason}` : ""}${finalState?.output ? `\n\n${finalState.output}` : ""}`;
 	}

package/extensions/schema.ts CHANGED Viewed

@@ -13,8 +13,8 @@ import { Type, type Static } from "typebox";
 // Phase types
 // ---------------------------------------------------------------------------
-export const PHASE_TYPES = ["agent", "parallel", "map", "gate", "reduce", "approval", "flow", "loop", "tournament"] as const;
-export type PhaseType = (typeof PHASE_TYPES)[number];
+const PHASE_TYPES = ["agent", "parallel", "map", "gate", "reduce", "approval", "flow", "loop", "tournament"] as const;
+type PhaseType = (typeof PHASE_TYPES)[number];
 /** Loop iteration bounds. Authors may lower the max; the hard cap is a runaway guard. */
 export const LOOP_DEFAULT_MAX_ITERATIONS = 10;
@@ -36,17 +36,18 @@ export const MAX_DYNAMIC_CONCURRENCY = 16;
 /** Tournament competitor bounds. */
 export const TOURNAMENT_DEFAULT_VARIANTS = 3;
 export const TOURNAMENT_HARD_MAX_VARIANTS = 20;
-export const TOURNAMENT_MODES = ["best", "aggregate"] as const;
+const TOURNAMENT_MODES = ["best", "aggregate"] as const;
+/** @internal */
 export type TournamentMode = (typeof TOURNAMENT_MODES)[number];
-export const OUTPUT_FORMATS = ["text", "json"] as const;
-export const JOIN_MODES = ["all", "any"] as const;
-export const CACHE_SCOPES = ["run-only", "cross-run", "off"] as const;
+const OUTPUT_FORMATS = ["text", "json"] as const;
+const JOIN_MODES = ["all", "any"] as const;
+const CACHE_SCOPES = ["run-only", "cross-run", "off"] as const;
 export type CacheScope = (typeof CACHE_SCOPES)[number];
 /** Allowed fingerprint entry prefixes. `glob!:` = content-hash variant of `glob:`. */
-export const CACHE_FINGERPRINT_PREFIXES = ["git:", "glob:", "glob!:", "file:", "env:"] as const;
+const CACHE_FINGERPRINT_PREFIXES = ["git:", "glob:", "glob!:", "file:", "env:"] as const;
 /** Phase types that must NOT be cached across runs (a fresh result is required each run). */
-export const CACHE_CROSS_RUN_BLOCKED_TYPES = ["gate", "approval", "loop", "tournament"] as const;
+const CACHE_CROSS_RUN_BLOCKED_TYPES = ["gate", "approval", "loop", "tournament"] as const;
 const ParallelTaskSchema = Type.Object(
 	{
@@ -282,7 +283,7 @@ export type ArgSpec = Static<typeof ArgSpecSchema>;
 export type RetryPolicy = Static<typeof RetrySchema>;
 export type Budget = Static<typeof BudgetSchema>;
 export type CachePolicy = Static<typeof CacheSchema>;
-export type JoinMode = (typeof JOIN_MODES)[number];
+type JoinMode = (typeof JOIN_MODES)[number];
 // ---------------------------------------------------------------------------
 // Shorthand (non-DAG) specs — subagent-style ergonomics
@@ -302,6 +303,10 @@ export type JoinMode = (typeof JOIN_MODES)[number];
 export interface ShorthandStep {
 	agent?: string;
 	task: string;
+	/** Files to pre-read and inject before the task (pass-through to Phase.context). */
+	context?: string[];
+	/** Max characters per context file (pass-through to Phase.contextLimit). */
+	contextLimit?: number;
 }
 /** True when `def` is a shorthand spec (no `phases`, but a task/tasks/chain field). */
@@ -316,11 +321,22 @@ export function isShorthand(def: unknown): boolean {
 	);
 }
+/** Coerce an unknown value into a non-empty list of non-empty strings (or undefined). */
+function readContextList(v: unknown): string[] | undefined {
+	if (!Array.isArray(v)) return undefined;
+	const list = v.filter((x): x is string => typeof x === "string" && x.trim().length > 0);
+	return list.length ? list : undefined;
+}
 function readStep(s: unknown): ShorthandStep {
 	if (typeof s === "string") return { task: s };
 	if (s && typeof s === "object") {
 		const o = s as Record<string, unknown>;
-		return { agent: typeof o.agent === "string" ? o.agent : undefined, task: String(o.task ?? "") };
+		const step: ShorthandStep = { agent: typeof o.agent === "string" ? o.agent : undefined, task: String(o.task ?? "") };
+		const ctx = readContextList(o.context);
+		if (ctx) step.context = ctx;
+		if (typeof o.contextLimit === "number") step.contextLimit = o.contextLimit;
+		return step;
 	}
 	return { task: "" };
 }
@@ -345,10 +361,19 @@ export function desugar(def: unknown): Taskflow {
 	// chain → sequential agent phases
 	if (Array.isArray(d.chain) && d.chain.length > 0) {
+		// Spec-level context in chain mode would be a flow-level default (every
+		// step), which is deliberately NOT supported — declare it per step instead.
+		if (d.context !== undefined || d.contextLimit !== undefined) {
+			console.warn(
+				"[taskflow] Shorthand chain ignores top-level 'context'/'contextLimit' — put them on individual steps instead.",
+			);
+		}
 		const steps = d.chain.map(readStep);
 		const phases: Phase[] = steps.map((s, i) => {
 			const phase: Phase = { id: `step${i + 1}`, type: "agent", task: s.task };
 			if (s.agent) phase.agent = s.agent;
+			if (s.context) phase.context = s.context;
+			if (s.contextLimit !== undefined) phase.contextLimit = s.contextLimit;
 			if (i > 0) phase.dependsOn = [`step${i}`];
 			if (i === steps.length - 1) phase.final = true;
 			return phase;
@@ -356,16 +381,30 @@ export function desugar(def: unknown): Taskflow {
 		return { name: nameOf("chain"), ...meta, phases };
 	}
-	// tasks → one parallel phase (fan-out + merge), no extra aggregation agent
+	// tasks → one parallel phase (fan-out + merge), no extra aggregation agent.
+	// Context is SHARED across all branches (the runtime pre-reads per phase, not
+	// per branch): spec-level context plus the union of step-level contexts.
 	if (Array.isArray(d.tasks) && d.tasks.length > 0) {
-		const branches: ParallelTask[] = d.tasks.map(readStep).map((s) => (s.agent ? { task: s.task, agent: s.agent } : { task: s.task }));
-		return { name: nameOf("parallel"), ...meta, phases: [{ id: "parallel", type: "parallel", branches, final: true }] };
+		const steps = d.tasks.map(readStep);
+		const branches: ParallelTask[] = steps.map((s) => (s.agent ? { task: s.task, agent: s.agent } : { task: s.task }));
+		const phase: Phase = { id: "parallel", type: "parallel", branches, final: true };
+		const shared = [...(readContextList(d.context) ?? []), ...steps.flatMap((s) => s.context ?? [])];
+		if (shared.length) phase.context = Array.from(new Set(shared));
+		const limits = [
+			typeof d.contextLimit === "number" ? d.contextLimit : undefined,
+			...steps.map((s) => s.contextLimit),
+		].filter((n): n is number => typeof n === "number");
+		if (limits.length) phase.contextLimit = Math.max(...limits);
+		return { name: nameOf("parallel"), ...meta, phases: [phase] };
 	}
-	// single task → one agent phase
+	// single task → one agent phase (the spec itself is the step)
 	if (typeof d.task === "string") {
 		const phase: Phase = { id: "main", type: "agent", task: d.task, final: true };
 		if (typeof d.agent === "string") phase.agent = d.agent;
+		const ctx = readContextList(d.context);
+		if (ctx) phase.context = ctx;
+		if (typeof d.contextLimit === "number") phase.contextLimit = d.contextLimit;
 		return { name: nameOf("task"), ...meta, phases: [phase] };
 	}
@@ -376,6 +415,7 @@ export function desugar(def: unknown): Taskflow {
 // Validation (beyond schema: DAG integrity, phase-type requirements)
 // ---------------------------------------------------------------------------
+/** @internal */
 export interface ValidationResult {
 	ok: boolean;
 	errors: string[];
@@ -618,16 +658,41 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
 	// placeholder string. The runtime can't infer the intent — fail fast at
 	// validation time so the mistake is caught before the run starts.
 	//
+	// The check uses TRANSITIVE ancestors: if phase B depends on A, and C depends
+	// on B, then C may reference {steps.A.*} transitively. Only truly unreachable
+	// refs are errors.
+	//
 	// Phases with `join: "any"` are exempt: by design they only need ONE of
 	// their declared deps to complete, and may reference other phases as
 	// informational context (not as true dependencies).
 	if (errors.length === 0) {
 		const idToPhase = new Map((flow.phases as Phase[]).map((p) => [p.id, p]));
+		// Precompute transitive ancestors for every phase via BFS over dependsOn.
+		const transitiveCache = new Map<string, Set<string>>();
+		const transitiveAncestors = (phaseId: string): Set<string> => {
+			const cached = transitiveCache.get(phaseId);
+			if (cached) return cached;
+			const result = new Set<string>();
+			const queue = [...(idToPhase.get(phaseId)?.dependsOn ?? []), ...(idToPhase.get(phaseId)?.from ?? [])];
+			while (queue.length) {
+				const id = queue.shift()!;
+				if (result.has(id)) continue;
+				result.add(id);
+				const dep = idToPhase.get(id);
+				if (dep) {
+					for (const d of [...(dep.dependsOn ?? []), ...(dep.from ?? [])]) {
+						if (!result.has(d)) queue.push(d);
+					}
+				}
+			}
+			transitiveCache.set(phaseId, result);
+			return result;
+		};
 		for (const p of flow.phases as Phase[]) {
 			if (!p?.id) continue;
 			const isJoinAny = p.join === "any";
 			if (isJoinAny) continue;
-			const deps = new Set(dependenciesOf(p));
+			const transitive = transitiveAncestors(p.id);
 			const refs = collectRefs(p);
 			for (const ref of refs.steps) {
 				if (ref === p.id) {
@@ -640,9 +705,9 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
 					// double-warn — the dependsOn loop above already flags it.
 					continue;
 				}
-				if (!deps.has(ref)) {
+				if (!transitive.has(ref)) {
 					errors.push(
-						`Phase '${p.id}': task references {steps.${ref}.*} but '${ref}' is not in dependsOn. ` +
+						`Phase '${p.id}': task references {steps.${ref}.*} but '${ref}' is not reachable via dependsOn. ` +
 							`The phase will run in parallel with '${ref}' and see the literal placeholder. ` +
 							`Add "dependsOn": ["${ref}"] (or include '${ref}' transitively).`,
 					);

package/extensions/store.ts CHANGED Viewed

@@ -29,6 +29,7 @@ export interface SavedFlow {
 	def: Taskflow;
 }
+/** @internal */
 export type PhaseStatus = "pending" | "running" | "done" | "failed" | "skipped";
 export interface PhaseState {
@@ -84,6 +85,10 @@ export interface RunState {
 	createdAt: number;
 	updatedAt: number;
 	cwd: string;
+	/** OS PID of a detached runner process (set only for background runs). */
+	pid?: number;
+	/** True for runs spawned via `detach: true` (background execution). */
+	detached?: boolean;
 }
 // ---------------------------------------------------------------------------
@@ -458,10 +463,21 @@ function cleanupTerminalRuns(
 		}
 		// Sort terminal by updatedAt desc (newest first).
-		terminal.sort((a, b) => b.updatedAt - a.updatedAt);
+		// Filter out entries with corrupt updatedAt (non-numeric/NaN) BEFORE sorting
+		// to prevent NaN from corrupting sort order. Corrupt entries cannot be
+		// reliably aged, so they are always moved to toRemove.
+		const cleanTerminal: RunIndexEntry[] = [];
+		for (const e of terminal) {
+			if (typeof e.updatedAt === "number" && !Number.isNaN(e.updatedAt)) {
+				cleanTerminal.push(e);
+			} else {
+				toRemove.push(e);
+			}
+		}
+		cleanTerminal.sort((a, b) => b.updatedAt - a.updatedAt);
-		for (let i = 0; i < terminal.length; i++) {
-			const e = terminal[i]!;
+		for (let i = 0; i < cleanTerminal.length; i++) {
+			const e = cleanTerminal[i]!;
 			const expiredByAge = now - e.updatedAt > maxAgeMs;
 			const excessByCount = i >= maxKeep;
 			if (expiredByAge || excessByCount) {
@@ -473,7 +489,7 @@ function cleanupTerminalRuns(
 		// Commit the pruned index while holding the lock so a concurrent
 		// updateIndexEntry cannot interleave and lose entries.
-		const remaining = terminal.filter((e) => !toRemove.includes(e));
+		const remaining = cleanTerminal.filter((e) => !toRemove.includes(e));
 		writeIndex(runsRoot, [...active, ...remaining]);
 	});
@@ -783,8 +799,12 @@ export function listRuns(cwd: string, limit = 20): RunState[] {
 	}
 	// Sort by updatedAt desc, slice to limit.
-	entries.sort((a, b) => b.updatedAt - a.updatedAt);
-	const sliced = entries.slice(0, limit);
+	// Filter out entries with non-numeric/NaN updatedAt BEFORE sorting to
+	// prevent NaN from corrupting V8's sort order (which can displace valid
+	// entries when a limit is applied).
+	const valid = entries.filter((e) => typeof e.updatedAt === "number" && !Number.isNaN(e.updatedAt));
+	valid.sort((a, b) => b.updatedAt - a.updatedAt);
+	const sliced = valid.slice(0, limit);
 	// Read full RunState for each entry.
 	const runs: RunState[] = [];
@@ -804,6 +824,20 @@ export function hashInput(...parts: string[]): string {
 	return crypto.createHash("sha256").update(parts.join("\u0000")).digest("hex").slice(0, 16);
 }
+/**
+ * Check whether a process with the given PID is still alive.
+ * Uses signal 0 (no signal sent) — succeeds if the process exists and we have
+ * permission to signal it, throws ESRCH if it doesn't exist.
+ */
+export function isProcessAlive(pid: number): boolean {
+	try {
+		process.kill(pid, 0);
+		return true;
+	} catch {
+		return false;
+	}
+}
 /**
  * Write a file atomically: write to a unique temp file in the same directory,
  * then rename over the target (rename is atomic on the same filesystem). Prevents

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-taskflow",
-  "version": "0.0.19",
+  "version": "0.0.21",
   "description": "A declarative, verifiable graph of task nodes for the Pi coding agent — not a workflow you script, but a DAG you declare: statically verified before it runs, with dynamic fan-out, gates, isolated subagent context, resumable runs, and saveable commands.",
   "keywords": [
     "pi-package",
@@ -37,7 +37,7 @@
   ],
   "scripts": {
     "typecheck": "tsc --noEmit",
-    "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts test/flow-def.test.ts",
+    "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/approval-view.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts test/flow-def.test.ts test/detached.test.ts",
     "test:e2e": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e.mts",
     "test:dogfood-cache": "node --experimental-strip-types test/dogfood-cache.mts"
   },

package/skills/taskflow/SKILL.md CHANGED Viewed

@@ -43,10 +43,25 @@ proper flow, so you still get progress, persistence, resume, and `save`.
 ```
 - `agent` is optional (defaults to the first available agent).
+- `context` (optional, per step or top-level in single mode): file paths to
+  pre-read and inject before the task — same as the full-DSL `Phase.context`
+  (per-file `contextLimit`, default 8000 chars). In **parallel `tasks` mode**
+  all branches SHARE the union of step contexts (the runtime pre-reads per
+  phase, not per branch). In **chain mode** declare `context` on individual
+  steps; a top-level `context` is ignored (with a warning).
 - Add `name` to label the run (and to `save` it as a `/tf:<name>` command).
 - Precedence if several are given: `chain` > `tasks` > `task`.
 - You can pass these as top-level tool params **or** inside `define`.
+```jsonc
+// context pre-read in shorthand — the file content is injected before the task
+{ "chain": [
+  { "task": "Map the public API of src/lib", "agent": "scout" },
+  { "task": "Write docs for:\n{previous.output}", "agent": "doc-writer",
+    "context": ["AGENTS.md", "docs/style-guide.md"] }
+] }
+```
 ## How to author a taskflow
 Call the `taskflow` tool. To run a brand-new flow you write inline, pass
@@ -128,7 +143,8 @@ deciding. The (interpolated) `task` is the prompt shown.
 - **Reject** → halt the flow (same mechanism as a blocking gate).
 - **Edit** → the typed note becomes this phase's `output`, so you can inject
   guidance mid-run: reference it downstream with `{steps.<id>.output}`.
-- **Non-interactive** runs (headless/CI/print mode) **auto-approve** and record it.
+- **Non-interactive** runs (headless/CI/print mode) **auto-reject** and record it — approval gates are safety boundaries that must never be silently bypassed.
+- **Background (detached)** runs **auto-reject** (no interactive approver) — downstream sees the rejection; the flow continues (fail-open).
 ```jsonc
 { "id": "checkpoint", "type": "approval", "dependsOn": ["plan"],
@@ -169,9 +185,10 @@ Use hyphens in ids, never underscores. Sub-flow phases reference each other in
 their **own** `{steps.x.output}` namespace (no parent-id prefixing needed).
 **Fail-open & limits:** if the `def` doesn't parse, has the wrong shape, or fails
-validation, the phase fails *open* — it's marked failed with a `defError`, the
-upstream output is preserved, and the run continues (use `optional: true` on the
-flow phase so a bad plan never aborts the run). An **empty** `phases` array is a
+validation, the phase completes with `status: "done"` and carries a `defError`
+diagnostic field; downstream phases receive empty output. Authors who want a
+hard failure can add a gate that checks for `defError`. The run continues
+(add `optional: true` on the flow phase so a bad plan never aborts the run). An **empty** `phases` array is a
 valid no-op (the planner decided there's nothing to do). Inline nesting is capped
 at `MAX_DYNAMIC_NESTING` (5) to bound runaway self-spawning.
@@ -216,7 +233,7 @@ A `tournament` phase runs `variants` competing attempts in parallel, then a
 (`mode: "aggregate"`). Use it when one shot is unreliable and you want the best
 of several drafts, or a synthesis of diverse approaches.
-- `variants` — the competing attempts: a number (run the same `task` N times) or an array of `{task, agent?}` for genuinely different approaches.
+- `variants` — a number specifying how many competing variants to spawn from 'task' (default 3, max 20). For genuinely different approaches, use the `branches` field instead — an explicit array of `{task, agent?}` definitions.
 - `mode` — `"best"` (judge picks one winner, default) or `"aggregate"` (judge merges all into one output).
 - `judge` — the judge's rubric/instructions (how to choose or merge).
 - `judgeAgent` — *(optional)* the agent that runs the judge step; defaults to the phase `agent`.
@@ -434,19 +451,28 @@ Quick reference:
 ## Actions
-- `action: "run"` — run an inline `define` (a one-off DAG) **or** a saved `name` (with optional `args`). Use `define` for an ad-hoc flow; use `name` to invoke something previously saved.
+- `action: "run"` — run an inline `define` (a one-off DAG) **or** a saved `name` (with optional `args`). Use `define` for an ad-hoc flow; use `name` to invoke something previously saved. Add `detach: true` to run in the background (returns immediately with the runId; poll the store for status).
 - `action: "save"` — persist `define` (scope `project` — default, committed/shared — or `user`); it becomes `/tf:<name>`. On a name collision, project overrides user.
 - `action: "resume"` — continue a paused/failed run by `runId`.
 - `action: "list"` — list saved flows. `action: "verify"` — static-check a `define` (zero tokens). `action: "agents"` — list available agents.
+## Background (detached) runs
+Add `detach: true` to `action: "run"` to spawn the flow in a detached child process. The tool returns immediately with the `runId`; the flow continues running even if the host session exits. Status is polled via the store (`/tf runs` or `action: "resume"`).
+- **Approval phases auto-reject** in detached mode (no interactive approver). Downstream phases see the rejection; the flow continues (fail-open).
+- **Crash resilience:** if the detached process crashes, the store persists `status: "failed"`; resume with `action: "resume"`.
+- **Same flow, both modes:** a flow can run foreground or background — `detach` is a dispatch-time decision, not a flow property.
 ## Operating a run (lifecycle, resume, inspection)
-A run moves through: **running →** `completed` (a `final` phase produced output) **/** `blocked` (a gate emitted BLOCK, an `approval` was rejected, or the `budget` cap was hit) **/** `failed` (a non-`optional` phase errored) **/** `paused` (the run was aborted). `failed` and `paused` runs are resumable; `blocked` is terminal (fix the gate/budget and re-run).
+A run moves through: **running →** `completed` (a `final` phase produced output) **/** `blocked` (a gate emitted BLOCK, an `approval` was rejected, or the `budget` cap was hit) **/** `failed` (a non-`optional` phase errored) **/** `paused` (the run was aborted). `failed` and `paused` runs are resumable.
-- **Resume is cache-aware.** `action: "resume"` re-runs only what didn't finish: every phase already `done` is reused from its recorded output (within-run cache), so resuming after a crash or a `blocked`/`failed` stop never repeats completed work. A phase that was mid-flight is re-executed cleanly (stale `error`/`endedAt` are cleared first).
+- **`blocked` runs:** a blocked status halts the current run — the flow status is set to `blocked` and remaining phases are skipped. Re-running the flow resumes from the last completed state: `done` phases with matching input hashes are skipped; blocked/failed/skipped phases are re-attempted. Fix the gate condition or budget before re-running.
+- **Resume is cache-aware.** `action: "resume"` re-runs only what didn't finish: every phase already `done` is reused from its recorded output (within-run cache), so resuming after a crash or a failed/blocked stop never repeats completed work. A phase that was mid-flight is re-executed cleanly (stale `error`/`endedAt` are cleared first).
 - **When to resume vs. re-run.** Resume when the inputs are unchanged and you just want to continue/retry the tail (fixed a gate, raised the budget, approved a checkpoint). Re-run from scratch when the task or upstream inputs changed — resume would reuse now-stale outputs. (For reuse *across* runs, opt a phase into `cache: {scope:"cross-run"}` — see configuration.md.)
 - **Budget mid-run.** When the run-wide `budget` is exceeded, remaining phases are skipped and an in-flight `map`/`parallel` stops spawning new items; the run ends `blocked` with the partial outputs preserved.
-- **Inspect runs.** `/tf runs` lists recent runs with status; `/tf show <name>` prints a saved flow's definition. Run state lives at `<project .pi>/taskflows/runs/<runId>.json` (gitignored).
+- **Inspect runs.** `/tf runs` lists recent runs with status; `/tf show <name>` prints a saved flow's definition. Run state lives at `<project .pi>/taskflows/runs/<flowName>/<runId>.json` (gitignored).
 ## User commands

package/skills/taskflow/configuration.md CHANGED Viewed

@@ -286,7 +286,7 @@ for the design.
 ### `ttl` (cross-run only)
 Max age before a cross-run hit is treated as a miss: e.g. `"30m"`, `"6h"`, `"7d"`.
-Omit for no time bound. A hit older than the TTL re-executes the phase.
+Omit for no time bound. A hit older than the TTL re-executes the phase. Cross-run cache entries are hard-evicted after 90 days regardless of per-entry TTL. This ceiling is not configurable.
 ### `fingerprint` (cross-run only)
@@ -298,7 +298,7 @@ Each entry is one of:
 | Entry | Becomes a miss when… | Resolves to |
 |-------|----------------------|-------------|
 | `git:HEAD` / `git:<ref>` | the commit moves | the resolved SHA (30s timeout → `<timeout>`; no git → `<no-git>`) |
-| `glob:<pattern>` | the **set of matching paths** changes | sorted path list (mtime-free) |
+| `glob:<pattern>` | the **set of matching paths** or their metadata changes | sorted path list with size + mtime (content-hashed globs use `glob!:` instead, which is mtime-independent) |
 | `glob!:<pattern>` | the **contents** of matching files change | content hashes (capped at 5000 matches) |
 | `file:<path>` | that file's content changes | sha256 of the file (>10 MB or missing → `<skip>`/`<missing>`) |
 | `env:<NAME>` | the env var changes | the env value |
@@ -333,7 +333,7 @@ Each entry is one of:
 |------|------|---------|
 | User-scoped flow | `~/.pi/agent/taskflows/<name>.json` | personal |
 | Project-scoped flow | `<nearest .pi>/taskflows/<name>.json` | ✅ commit to share |
-| Run state (resume) | `<project .pi>/taskflows/runs/<runId>.json` | ❌ gitignore |
+| Run state (resume) | `<project .pi>/taskflows/runs/<flowName>/<runId>.json` | ❌ gitignore |
 - `action: "save"` takes `scope: "project"` (default) or `"user"`.
 - Saved flows auto-register as `/tf:<name>` (immediately for the current session,