npm - pi-taskflow - Versions diffs - 0.0.17 → 0.0.19 - Mend

pi-taskflow 0.0.17 → 0.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +48 -0
package/README.md +57 -24
package/README.zh-CN.md +50 -17
package/examples/dynamic-plan-execute.json +34 -0
package/examples/iterative-replan.json +30 -0
package/extensions/index.ts +2 -2
package/extensions/runtime.ts +169 -11
package/extensions/schema.ts +56 -1
package/extensions/store.ts +5 -1
package/package.json +3 -4
package/skills/taskflow/SKILL.md +146 -18
package/skills/taskflow/configuration.md +115 -5

package/extensions/runtime.ts CHANGED Viewed

@@ -16,7 +16,8 @@ import type { AgentConfig } from "./agents.ts";
 import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse, tryEvaluateCondition } from "./interpolate.ts";
 import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
 import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
-import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode } from "./schema.ts";
+import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, MAX_DYNAMIC_MAP_ITEMS, MAX_DYNAMIC_NESTING, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode, validateTaskflow } from "./schema.ts";
+import { verifyTaskflow } from "./verify.ts";
 import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
 import { CacheStore, resolveFingerprint } from "./cache.ts";
@@ -142,6 +143,63 @@ function failPhase(id: string, error: string): PhaseState {
 	return { id, status: "failed", error, inputHash: hashInput(id, error), endedAt: Date.now(), usage: emptyUsage() };
 }
+/**
+ * Normalize an inline `flow.def` payload into a full Taskflow shape.
+ * Accepts: a full Taskflow ({name?,phases:[...]}), a bare phases array, or
+ * {phases:[...]}. Returns undefined if the shape is unrecognized. A recognized
+ * shape with ZERO phases is returned as-is (caller treats it as a no-op) so the
+ * empty-plan case is distinguishable from a malformed one.
+ *
+ * The payload is deep-cloned so the runtime never shares references with (or
+ * mutates) the upstream phase's parsed JSON. Cloning also drops any non-own /
+ * prototype-shadowing `__proto__` own-property that a crafted JSON could carry.
+ */
+function normalizeInlineDef(parsed: unknown, phaseId: string): Taskflow | undefined {
+	let shaped: Taskflow | undefined;
+	if (Array.isArray(parsed)) {
+		shaped = { name: `${phaseId}-inline`, phases: parsed as Taskflow["phases"] };
+	} else if (parsed && typeof parsed === "object") {
+		const o = parsed as Record<string, unknown>;
+		if (Array.isArray(o.phases)) {
+			const name = typeof o.name === "string" && o.name.length > 0 ? (o.name as string) : `${phaseId}-inline`;
+			shaped = { ...(o as object), name, phases: o.phases as Taskflow["phases"] } as Taskflow;
+		}
+	}
+	if (!shaped) return undefined;
+	// Deep clone via JSON round-trip: severs shared references with upstream output
+	// and drops any own "__proto__" key (JSON.stringify omits it). As belt-and-
+	// suspenders, also delete inert `constructor`/`prototype` own-keys a crafted
+	// payload could carry, so the returned object is clean of pollution vectors.
+	try {
+		const clone = JSON.parse(JSON.stringify(shaped)) as Record<string, unknown>;
+		for (const k of ["__proto__", "constructor", "prototype"]) {
+			if (Object.prototype.hasOwnProperty.call(clone, k)) delete clone[k];
+		}
+		return clone as unknown as Taskflow;
+	} catch {
+		return undefined;
+	}
+}
+/**
+ * Clamp a runtime-generated sub-flow's budget so it can only ever be TIGHTER
+ * than the parent's, never looser. A generated def cannot raise the spend cap by
+ * declaring its own large budget. Each dimension becomes min(child, parent).
+ */
+function clampSubFlowBudget(sub: Taskflow, parentBudget: Budget | undefined): Taskflow {
+	if (!parentBudget) return sub;
+	const child = sub.budget;
+	const clamped: Budget = {
+		maxUSD: Math.min(child?.maxUSD ?? Infinity, parentBudget.maxUSD ?? Infinity),
+		maxTokens: Math.min(child?.maxTokens ?? Infinity, parentBudget.maxTokens ?? Infinity),
+	};
+	// Drop Infinity dimensions (no cap on that axis).
+	const budget: Budget = {};
+	if (Number.isFinite(clamped.maxUSD)) budget.maxUSD = clamped.maxUSD;
+	if (Number.isFinite(clamped.maxTokens)) budget.maxTokens = clamped.maxTokens;
+	return { ...sub, budget: budget.maxUSD === undefined && budget.maxTokens === undefined ? undefined : budget };
+}
 /** Aggregate run cost/tokens so far and test against the budget. */
 function overBudget(state: RunState): { over: boolean; reason: string } {
 	const budget: Budget | undefined = state.def.budget;
@@ -592,7 +650,15 @@ async function executePhase(
 	if (type === "map") {
 		const overResolved = interpolate(phase.over ?? "", ctx).text;
 		// `over` may itself be a placeholder that resolved to a JSON string.
-		const arr = coerceArray(safeParse(overResolved)) ?? coerceArray(directRef(phase.over ?? "", state));
+		let arr = coerceArray(safeParse(overResolved)) ?? coerceArray(directRef(phase.over ?? "", state));
+		// Breadth cap for untrusted dynamic sub-flows: a `def:` frame in the stack
+		// means we are inside a runtime-generated flow. Truncate giant fan-outs to
+		// bound subprocess blast radius (fail-open: keep the first N rather than abort).
+		let mapTruncated = false;
+		if (arr && (deps._stack ?? []).some((s) => s.startsWith("def:")) && arr.length > MAX_DYNAMIC_MAP_ITEMS) {
+			arr = arr.slice(0, MAX_DYNAMIC_MAP_ITEMS);
+			mapTruncated = true;
+		}
 		if (!arr) {
 			return {
 				id: phase.id,
@@ -617,6 +683,12 @@ async function executePhase(
 		const results = await runFanout(tasks);
 		const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
+		if (mapTruncated) {
+			ps.warnings = [...(ps.warnings ?? []), `map fan-out truncated to MAX_DYNAMIC_MAP_ITEMS (${MAX_DYNAMIC_MAP_ITEMS}) inside a dynamic sub-flow`];
+			// NB: do NOT set ps.budgetTruncated — that field drives the run-level
+			// budget-blocked path and would mislabel the run as "budget exceeded".
+			// This is a safety fan-out cap, not a cost overrun; a warning is enough.
+		}
 		recordCache(cc, ps);
 		return ps;
 	}
@@ -660,14 +732,96 @@ async function executePhase(
 	if (type === "flow") {
 		const ctx = buildInterpolationContext(state, previousOutput);
-		const name = phase.use;
-		if (!name) return failPhase(phase.id, `flow phase '${phase.id}' requires 'use'`);
-		if (!deps.loadFlow) return failPhase(phase.id, `flow phase '${phase.id}': no sub-flow loader available`);
-		const subDef = deps.loadFlow(name);
-		if (!subDef) return failPhase(phase.id, `flow phase '${phase.id}': saved flow not found: '${name}'`);
+		const hasDef = (phase as { def?: unknown }).def !== undefined;
 		const stack = deps._stack ?? [];
-		if (name === state.flowName || stack.includes(name)) {
-			return failPhase(phase.id, `flow phase '${phase.id}': recursive sub-flow ${[...stack, state.flowName, name].join(" -> ")}`);
+		let subDef: Taskflow | undefined;
+		let name: string;
+		let recursionKey: string; // identity used for cache key + recursion guard
+		if (hasDef) {
+			// --- Inline `def`: resolve at runtime, validate, fail-OPEN on any error. ---
+			// Fail-open contract: a bad def NEVER aborts the run. The phase resolves
+			// as `done` with empty output and a `defError` diagnostic, and the
+			// upstream output is preserved for downstream phases. (Authors who want
+			// a bad plan to be a hard failure can add their own gate downstream.)
+			const defFailOpen = (diag: string): PhaseState => ({
+				id: phase.id,
+				status: "done",
+				output: "",
+				json: parseJson ? safeParse("") : undefined,
+				usage: emptyUsage(),
+				inputHash: hashInput(phase.id, `flow-def-error:${diag}`),
+				endedAt: Date.now(),
+				defError: diag,
+			});
+			// Nesting guard: each `flow{def}` adds a frame to _stack; cap inline depth.
+			const inlineDepth = stack.filter((s) => s.startsWith("def:")).length;
+			if (inlineDepth >= MAX_DYNAMIC_NESTING) {
+				return defFailOpen(`inline sub-flow nesting exceeded MAX_DYNAMIC_NESTING (${MAX_DYNAMIC_NESTING}): depth ${inlineDepth}`);
+			}
+			const rawDef = (phase as { def?: unknown }).def;
+			// String defs are interpolated then JSON-parsed; objects are used directly.
+			let parsed: unknown;
+			if (typeof rawDef === "string") {
+				const resolved = interpolate(rawDef, ctx).text;
+				parsed = safeParse(resolved);
+				if (parsed === undefined) {
+					return defFailOpen("inline def string did not parse as JSON");
+				}
+			} else {
+				parsed = rawDef;
+			}
+			// Accept a full Taskflow, a bare phases array, or {phases:[...]}; wrap the latter two.
+			const wrapped = normalizeInlineDef(parsed, phase.id);
+			if (!wrapped) {
+				return defFailOpen("inline def is not a Taskflow, phases array, or {phases:[...]}");
+			}
+			// Empty plan is a valid no-op (a planner deciding there is nothing to do):
+			// succeed with empty output instead of failing validation on zero phases.
+			if (wrapped.phases.length === 0) {
+				return {
+					id: phase.id,
+					status: "done",
+					output: "",
+					json: parseJson ? safeParse("") : undefined,
+					usage: emptyUsage(),
+					inputHash: hashInput(phase.id, "flow-def-empty"),
+					endedAt: Date.now(),
+				};
+			}
+			// Validate with `dynamic` hardening (breadth caps + cwd containment) since
+			// this content is LLM-authored / untrusted. cwd anchors containment checks.
+			const dynCwd = phase.cwd ?? deps.cwd;
+			const v = validateTaskflow(wrapped, { dynamic: true, cwd: dynCwd });
+			if (!v.ok) {
+				return defFailOpen(`inline def failed validation: ${v.errors.join("; ")}`);
+			}
+			// Static verification (dead-ends, unreachable, gate-exhaustion, budget,
+			// concurrency). Only error-severity issues block; warnings are advisory.
+			const ver = verifyTaskflow({ name: wrapped.name, phases: wrapped.phases as Phase[], budget: wrapped.budget, concurrency: wrapped.concurrency });
+			if (!ver.ok) {
+				const errs = ver.issues.filter((i) => i.severity === "error").map((i) => i.message);
+				return defFailOpen(`inline def failed verification: ${errs.join("; ")}`);
+			}
+			// Budget containment: a generated def may not raise the parent's cap. Clamp
+			// each dimension to min(child, parent) so it can only ever be tighter.
+			subDef = clampSubFlowBudget(wrapped, state.def.budget);
+			name = subDef.name;
+			recursionKey = `def:${name}`;
+		} else {
+			// --- Saved flow via `use` (unchanged behavior). ---
+			const useName = phase.use;
+			if (!useName) return failPhase(phase.id, `flow phase '${phase.id}' requires 'use' or 'def'`);
+			if (!deps.loadFlow) return failPhase(phase.id, `flow phase '${phase.id}': no sub-flow loader available`);
+			subDef = deps.loadFlow(useName);
+			if (!subDef) return failPhase(phase.id, `flow phase '${phase.id}': saved flow not found: '${useName}'`);
+			name = useName;
+			recursionKey = useName;
+		}
+		if (recursionKey === state.flowName || stack.includes(recursionKey)) {
+			return failPhase(phase.id, `flow phase '${phase.id}': recursive sub-flow ${[...stack, state.flowName, recursionKey].join(" -> ")}`);
 		}
 		// Resolve sub-flow args (interpolate string values), then apply declared defaults.
 		const provided: Record<string, unknown> = {};
@@ -675,7 +829,11 @@ async function executePhase(
 			provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
 		}
 		const subArgs = resolveArgs(subDef, provided);
-		const inputHash = cacheKey(cc, [phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs)]);
+		// For inline defs the cache identity must include the resolved def content so
+		// that a different generated plan yields a different key (and an identical plan
+		// hits cache). For saved flows the name is the identity (historical behavior).
+		const flowIdentity = hasDef ? `def:${JSON.stringify(subDef)}` : `flow:${name}`;
+		const inputHash = cacheKey(cc, [phase.id, flowIdentity, preRead, JSON.stringify(subArgs)]);
 		const cached = cachedPhase(cc, inputHash);
 		if (cached) return cached;
@@ -707,7 +865,7 @@ async function executePhase(
 			// flow's cwd (not the caller's cwd).
 			cwd: phase.cwd ?? deps.cwd,
 			runTask: subRunTask,
-			_stack: [...stack, state.flowName],
+			_stack: hasDef ? [...stack, state.flowName, recursionKey] : [...stack, state.flowName],
 			persist: undefined,
 			onProgress: () => {
 				if (live) {

package/extensions/schema.ts CHANGED Viewed

@@ -20,6 +20,19 @@ export type PhaseType = (typeof PHASE_TYPES)[number];
 export const LOOP_DEFAULT_MAX_ITERATIONS = 10;
 export const LOOP_HARD_MAX_ITERATIONS = 100;
+/** Max depth of runtime `flow { def }` sub-flow nesting (runaway guard for
+ *  LLM-generated sub-flows that themselves spawn more sub-flows). The existing
+ *  `_stack` recursion check guards saved-flow cycles; this bounds inline depth. */
+export const MAX_DYNAMIC_NESTING = 5;
+/** Breadth caps applied ONLY to runtime-generated (`flow { def }`) sub-flows,
+ *  whose content is LLM-authored and therefore untrusted. Authored/saved flows
+ *  are not subject to these (a human reviewed them). They bound DoS blast radius
+ *  from a model emitting a graph with thousands of phases / a giant fan-out. */
+export const MAX_DYNAMIC_PHASES = 100;
+export const MAX_DYNAMIC_MAP_ITEMS = 200;
+export const MAX_DYNAMIC_CONCURRENCY = 16;
 /** Tournament competitor bounds. */
 export const TOURNAMENT_DEFAULT_VARIANTS = 3;
 export const TOURNAMENT_HARD_MAX_VARIANTS = 20;
@@ -119,6 +132,12 @@ const PhaseSchema = Type.Object(
 		// sub-workflow (flow)
 		use: Type.Optional(Type.String({ description: "[flow] Name of a saved taskflow to run as this phase" })),
+		def: Type.Optional(
+			Type.Unknown({
+				description:
+					"[flow] Inline sub-flow definition, resolved at runtime. Mutually exclusive with 'use'. A string is interpolated (e.g. '{steps.plan.json}') then JSON-parsed; an object is used directly. The result must be a Taskflow ({name,phases}) or a bare phases array / {phases:[...]} (auto-wrapped). Validated + verified before execution; on any failure the phase fails-open (defError) without aborting the run.",
+			}),
+		),
 		with: Type.Optional(
 			Type.Record(Type.String(), Type.Unknown(), {
 				description: "[flow] Args passed to the sub-flow (string values support interpolation)",
@@ -388,6 +407,10 @@ export interface ValidationOptions {
 	cwd?: string;
 	/** Override the flow's own `strictInterpolation` flag for this validation call. */
 	strict?: boolean;
+	/** When true, this flow is a runtime-generated (`flow { def }`) sub-flow whose
+	 *  content is LLM-authored / untrusted. Enables hardening checks: breadth caps
+	 *  (phase count, map items, concurrency) and cwd containment under `cwd`. */
+	dynamic?: boolean;
 }
 export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): ValidationResult {
@@ -406,6 +429,32 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
 		return { ok: false, errors, warnings };
 	}
+	// Hardening for runtime-generated (untrusted) sub-flows: bound breadth and
+	// contain filesystem access. These do NOT apply to authored/saved flows.
+	if (opts.dynamic) {
+		if (flow.phases.length > MAX_DYNAMIC_PHASES) {
+			errors.push(`Dynamic sub-flow has too many phases (${flow.phases.length}, max ${MAX_DYNAMIC_PHASES})`);
+		}
+		if (typeof flow.concurrency === "number" && flow.concurrency > MAX_DYNAMIC_CONCURRENCY) {
+			errors.push(`Dynamic sub-flow concurrency too high (${flow.concurrency}, max ${MAX_DYNAMIC_CONCURRENCY})`);
+		}
+		const root = opts.cwd ? path.resolve(opts.cwd) : undefined;
+		for (const p of flow.phases) {
+			if (!p || typeof p !== "object") continue;
+			// Per-phase concurrency override is also capped.
+			if (typeof p.concurrency === "number" && p.concurrency > MAX_DYNAMIC_CONCURRENCY) {
+				errors.push(`Dynamic sub-flow phase '${p.id}': concurrency too high (${p.concurrency}, max ${MAX_DYNAMIC_CONCURRENCY})`);
+			}
+			// cwd containment: a generated phase may not escape the run's cwd.
+			if (typeof p.cwd === "string" && root) {
+				const resolved = path.resolve(root, p.cwd);
+				if (resolved !== root && !resolved.startsWith(root + path.sep)) {
+					errors.push(`Dynamic sub-flow phase '${p.id}': cwd '${p.cwd}' escapes the run directory`);
+				}
+			}
+		}
+	}
 	const ids = new Set<string>();
 	for (const p of flow.phases) {
 		if (!p || typeof p !== "object") {
@@ -439,7 +488,13 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
 			if (!p.task) errors.push(`Phase '${p.id}' (reduce) requires 'task'`);
 		}
 		if (type === "flow") {
-			if (!p.use) errors.push(`Phase '${p.id}' (flow) requires 'use' (a saved flow name)`);
+			const hasUse = typeof p.use === "string" && p.use.length > 0;
+			const hasDef = (p as { def?: unknown }).def !== undefined;
+			if (!hasUse && !hasDef) {
+				errors.push(`Phase '${p.id}' (flow) requires 'use' (a saved flow name) or 'def' (an inline definition)`);
+			} else if (hasUse && hasDef) {
+				errors.push(`Phase '${p.id}' (flow): 'use' and 'def' are mutually exclusive — provide exactly one`);
+			}
 		}
 		if (type === "loop") {
 			if (!p.task) errors.push(`Phase '${p.id}' (loop) requires 'task' (the iteration body)`);

package/extensions/store.ts CHANGED Viewed

@@ -54,7 +54,8 @@ export interface PhaseState {
 	gate?: { verdict: "pass" | "block"; reason?: string };
 	/** Total subagent attempts incl. retries (when > calls, a retry happened). */
 	attempts?: number;
-	/** True when a map/parallel fan-out was cut short by the budget cap. */
+	/** True when a map/parallel fan-out was cut short by the budget cap, or by the
+	 *  dynamic sub-flow fan-out safety limit (MAX_DYNAMIC_MAP_ITEMS). */
 	budgetTruncated?: boolean;
 	/** Human-in-the-loop outcome (approval phases only). */
 	approval?: { decision: "approve" | "reject" | "edit"; note?: string; auto?: boolean };
@@ -62,6 +63,9 @@ export interface PhaseState {
 	loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" | "aborted" };
 	/** Tournament outcome (tournament phases only). */
 	tournament?: { variants: number; winner: number; mode: "best" | "aggregate"; reason?: string };
+	/** Set when a `flow { def }` inline sub-flow definition could not be resolved,
+	 *  parsed, validated, or verified. The phase fails-open: this records why. */
+	defError?: string;
 	/** Non-fatal diagnostic warnings accumulated during this phase (e.g.
 	 *  unresolved interpolation placeholders, suspicious templates). */
 	warnings?: string[];

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "pi-taskflow",
-  "version": "0.0.17",
-  "description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
+  "version": "0.0.19",
+  "description": "A declarative, verifiable graph of task nodes for the Pi coding agent — not a workflow you script, but a DAG you declare: statically verified before it runs, with dynamic fan-out, gates, isolated subagent context, resumable runs, and saveable commands.",
   "keywords": [
     "pi-package",
     "pi",
@@ -33,12 +33,11 @@
     "README.md",
     "README.zh-CN.md",
     "CHANGELOG.md",
-    "DESIGN.md",
     "LICENSE"
   ],
   "scripts": {
     "typecheck": "tsc --noEmit",
-    "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts",
+    "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts test/flow-def.test.ts",
     "test:e2e": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e.mts",
     "test:dogfood-cache": "node --experimental-strip-types test/dogfood-cache.mts"
   },

package/skills/taskflow/SKILL.md CHANGED Viewed

@@ -79,15 +79,17 @@ Call the `taskflow` tool. To run a brand-new flow you write inline, pass
 ### Phase types
-| type | meaning |
-|------|---------|
-| `agent` | one subagent runs `task` |
-| `parallel` | run `branches[]` concurrently |
-| `map` | fan out over `over` (an array) — one subagent per item, `{item}` bound |
-| `gate` | quality/review step that can **halt the flow** (see below) |
-| `reduce` | aggregate `from[]` phases into one output |
-| `approval` | **human-in-the-loop** pause: ask a person to approve / reject / edit before continuing |
-| `flow` | run a **saved sub-flow** (by `use`) as a single phase — composition/reuse |
+| type | meaning | details |
+|------|---------|---------|
+| `agent` | one subagent runs `task` | DSL shape |
+| `parallel` | run `branches[]` concurrently | Conditional routing |
+| `map` | fan out over `over` (an array) — one subagent per item, `{item}` bound | DSL shape |
+| `gate` | quality/review step that can **halt the flow** | Gate phases |
+| `reduce` | aggregate `from[]` phases into one output | DSL shape |
+| `approval` | **human-in-the-loop** pause: ask a person to approve / reject / edit before continuing | Approval phases |
+| `flow` | run a **sub-flow** as one phase — **saved** (`use`) or **runtime-generated** (`def`) | Sub-flows |
+| `loop` | repeat a body until a condition / convergence / `maxIterations` | Loop phases |
+| `tournament` | run N competing `variants`, a `judge` picks the best or aggregates | Tournament phases |
 ### Control-flow fields (any phase)
@@ -100,7 +102,9 @@ Call the `taskflow` tool. To run a brand-new flow you write inline, pass
 ### Conditional routing (when + gate/branches)
 Pair `when` with an upstream phase that emits a decision to build real if/else
-routing. Use `join: "any"` on the merge phase so it runs whichever branch fired:
+routing. Use `join: "any"` on the merge phase so it runs whichever branch fired. For
+static (non-conditional) concurrency, a `parallel` phase runs fixed `branches[]`
+instead — `{ "type": "parallel", "branches": [{"task":"..."}, {"task":"...","agent":"reviewer"}] }`.
 ```jsonc
 { "id": "triage", "type": "agent", "agent": "analyst", "output": "json",
@@ -133,15 +137,105 @@ deciding. The (interpolated) `task` is the prompt shown.
 ### Sub-flows (composition)
-A `flow` phase runs another **saved** taskflow by name and bubbles up its final
-output. Pass args via `with` (string values interpolate). Recursion is detected
-and rejected.
+A `flow` phase runs another taskflow as a single phase and bubbles up its final
+output. Two sources, **mutually exclusive**:
+**Saved** (`use`) — run a previously saved flow by name. Pass args via `with`
+(string values interpolate). Recursion is detected and rejected.
 ```jsonc
 { "id": "research", "type": "flow", "use": "deep-research",
   "with": { "topic": "{item}" }, "dependsOn": ["plan"] }
 ```
+**Runtime-generated** (`def`) — resolve a sub-flow *at runtime*, usually from an
+upstream phase's JSON output. The runtime interpolates + JSON-parses the `def`,
+**validates it** (cycles / dangling refs / duplicate ids), then runs it as a
+nested sub-flow. This is how a planner decides *at runtime* what work to spawn —
+the declarative answer to a code-mode `for`/`if` loop, with each generated plan
+checked before it spends a token.
+```jsonc
+// 1) A planner emits a plan as JSON. 2) flow{def} runs it.
+{ "id": "plan", "type": "agent", "agent": "planner", "output": "json",
+  "task": "Scan the repo. Output ONLY JSON {\"name\":\"audit\",\"phases\":[...]} — one audit phase per file." },
+{ "id": "run", "type": "flow", "def": "{steps.plan.json}", "dependsOn": ["plan"], "final": true }
+```
+**LLM output contract for `def`:** the upstream phase must output a *full*
+Taskflow `{"name":"...","phases":[...]}`, a bare `phases` array, or
+`{"phases":[...]}` — pure JSON (a ```json fence is tolerated and stripped).
+Use hyphens in ids, never underscores. Sub-flow phases reference each other in
+their **own** `{steps.x.output}` namespace (no parent-id prefixing needed).
+**Fail-open & limits:** if the `def` doesn't parse, has the wrong shape, or fails
+validation, the phase fails *open* — it's marked failed with a `defError`, the
+upstream output is preserved, and the run continues (use `optional: true` on the
+flow phase so a bad plan never aborts the run). An **empty** `phases` array is a
+valid no-op (the planner decided there's nothing to do). Inline nesting is capped
+at `MAX_DYNAMIC_NESTING` (5) to bound runaway self-spawning.
+**Iterative replanning** — pair `flow{def}` (or a JSON-emitting body) with `loop`
+so round N's plan depends on round N-1's **result** (not a one-shot fan-out):
+the declarative equivalent of `for (...) { read result; decide next }`. See
+`examples/dynamic-plan-execute.json` and `examples/iterative-replan.json`.
+### Loop phases (iterate until done)
+A `loop` phase runs its body repeatedly, exposing each iteration's output as
+`{steps.<thisId>.output}` / `.json` so the next round can react to the last. It
+stops on the first of: `until` truthy, **convergence** (output stops changing),
+or `maxIterations` (hard cap). This is the declarative "keep going until good
+enough" — the runtime always terminates (the cap is mandatory).
+- `until` — stop condition, same operators as `when` (a parse error stops the loop, fail-safe).
+- `maxIterations` — hard iteration cap (required to bound the loop).
+- `convergence` — `true` to stop early when an iteration's output equals the previous one.
+```jsonc
+{
+  "id": "refine",
+  "type": "loop",
+  "agent": "executor",
+  "maxIterations": 5,
+  "until": "{steps.refine.json.done} == true",
+  "convergence": true,
+  "task": "Improve the draft. When nothing else needs fixing, output JSON {\"done\":true,\"draft\":\"...\"}; otherwise {\"done\":false,\"draft\":\"...\"}.",
+  "output": "json",
+  "final": true
+}
+```
+For data-dependent **replanning** each round, pair a `loop` body that emits a
+plan with `flow{def}` (see Sub-flows above). See `examples/iterative-replan.json`.
+### Tournament phases (N variants, judge picks best)
+A `tournament` phase runs `variants` competing attempts in parallel, then a
+**judge** sub-phase selects the winner (`mode: "best"`) or merges them
+(`mode: "aggregate"`). Use it when one shot is unreliable and you want the best
+of several drafts, or a synthesis of diverse approaches.
+- `variants` — the competing attempts: a number (run the same `task` N times) or an array of `{task, agent?}` for genuinely different approaches.
+- `mode` — `"best"` (judge picks one winner, default) or `"aggregate"` (judge merges all into one output).
+- `judge` — the judge's rubric/instructions (how to choose or merge).
+- `judgeAgent` — *(optional)* the agent that runs the judge step; defaults to the phase `agent`.
+- Fail-open: if the judge's pick is unparseable, variant 1 is returned (work is never lost).
+```jsonc
+{
+  "id": "headline",
+  "type": "tournament",
+  "agent": "executor",
+  "variants": 3,
+  "mode": "best",
+  "judge": "Pick the clearest, most accurate headline. End with: WINNER: <n>.",
+  "task": "Write one headline for the article below.\n\n{steps.draft.output}",
+  "dependsOn": ["draft"],
+  "final": true
+}
+```
 ### Budget (cost / token caps)
 Add a run-wide ceiling at the top level. When accumulated cost/tokens exceed it,
@@ -172,6 +266,30 @@ Review the audit results below. If any endpoint is missing auth, end with
 {steps.audit.output}
 ```
+**Zero-token machine checks (`eval`).** Before spending a token on the LLM gate,
+list machine-checkable assertions in `eval`. If **all** pass, the gate
+auto-passes with **no LLM call**; if any fails, it falls through to the LLM
+`task` (the qualitative residue). Each entry supports the `when` operators plus
+`X contains Y` (substring). A parse error fails **open** (consistent with the
+gate invariant).
+```jsonc
+{ "id": "quality", "type": "gate", "dependsOn": ["build","test"],
+  "eval": ["{steps.build.output} contains BUILD SUCCESS", "{steps.test.json.failures} == 0"],
+  "task": "Review the diff for subtle logic errors a linter can't catch. VERDICT: PASS or BLOCK." }
+```
+**Self-healing (`onBlock: "retry"`).** By default a blocking gate halts the run
+(`onBlock: "halt"`). With `onBlock: "retry"` the gate instead **re-runs its
+upstream `dependsOn` phases and re-evaluates**, up to `retry.max` rounds (or
+until PASS / budget / abort) — a generate→critique→regenerate rework loop.
+```jsonc
+{ "id": "spec-gate", "type": "gate", "onBlock": "retry", "retry": { "max": 3 },
+  "dependsOn": ["implement"],
+  "task": "Does the implementation satisfy ALL acceptance criteria? VERDICT: PASS or BLOCK with reasons." }
+```
 ### Structured-verify phases (v0.0.8.1)
 A "verify" phase typically runs `npx tsc --noEmit && npm test && git diff --stat`
@@ -309,16 +427,26 @@ variables, and storage paths — read `configuration.md` (next to this file).
 Quick reference:
 - **Flow:** `name`, `description`, `concurrency` (default 8), `budget` (`maxUSD`/`maxTokens`), `agentScope` (user|project|both), `args`, `strictInterpolation`.
-- **Phase:** `model`, `thinking`, `tools` (whitelist), `cwd`, `output:"json"`, `concurrency` (map/parallel fan-out), `when`, `join` (all|any), `retry`, `use`/`with` (flow), `final`.
+- **Phase:** `model`, `thinking`, `tools` (whitelist), `cwd`, `output:"json"`, `concurrency` (map/parallel fan-out), `when`, `join` (all|any), `retry`, `use`/`with` (flow), `optional` (fail-soft — a failed/blocked phase won't abort the run), `final`.
+- **Cross-run caching:** add `cache: { "scope": "cross-run" }` to a phase to memoize its output across runs (same input → instant reuse, zero tokens). See `configuration.md` for `ttl`, `fingerprint` (git/glob/file/env invalidation), and scope options.
 - **Precedence (model/thinking/tools):** phase value → agent frontmatter (resolved via `modelRoles`) → global/default.
 - **Concurrency:** same-layer phases use `flow.concurrency`; a `map`/`parallel` phase uses `phase.concurrency ?? flow.concurrency ?? 8`.
 ## Actions
-- `action: "run"` — run inline `define` or a saved `name` (with optional `args`).
-- `action: "save"` — persist `define` (scope `project` or `user`); becomes `/tf:<name>`.
-- `action: "resume"` — continue a paused/failed run by `runId` (completed phases are cached).
-- `action: "list"` — list saved flows.
+- `action: "run"` — run an inline `define` (a one-off DAG) **or** a saved `name` (with optional `args`). Use `define` for an ad-hoc flow; use `name` to invoke something previously saved.
+- `action: "save"` — persist `define` (scope `project` — default, committed/shared — or `user`); it becomes `/tf:<name>`. On a name collision, project overrides user.
+- `action: "resume"` — continue a paused/failed run by `runId`.
+- `action: "list"` — list saved flows. `action: "verify"` — static-check a `define` (zero tokens). `action: "agents"` — list available agents.
+## Operating a run (lifecycle, resume, inspection)
+A run moves through: **running →** `completed` (a `final` phase produced output) **/** `blocked` (a gate emitted BLOCK, an `approval` was rejected, or the `budget` cap was hit) **/** `failed` (a non-`optional` phase errored) **/** `paused` (the run was aborted). `failed` and `paused` runs are resumable; `blocked` is terminal (fix the gate/budget and re-run).
+- **Resume is cache-aware.** `action: "resume"` re-runs only what didn't finish: every phase already `done` is reused from its recorded output (within-run cache), so resuming after a crash or a `blocked`/`failed` stop never repeats completed work. A phase that was mid-flight is re-executed cleanly (stale `error`/`endedAt` are cleared first).
+- **When to resume vs. re-run.** Resume when the inputs are unchanged and you just want to continue/retry the tail (fixed a gate, raised the budget, approved a checkpoint). Re-run from scratch when the task or upstream inputs changed — resume would reuse now-stale outputs. (For reuse *across* runs, opt a phase into `cache: {scope:"cross-run"}` — see configuration.md.)
+- **Budget mid-run.** When the run-wide `budget` is exceeded, remaining phases are skipped and an in-flight `map`/`parallel` stops spawning new items; the run ends `blocked` with the partial outputs preserved.
+- **Inspect runs.** `/tf runs` lists recent runs with status; `/tf show <name>` prints a saved flow's definition. Run state lives at `<project .pi>/taskflows/runs/<runId>.json` (gitignored).
 ## User commands