npm - pi-taskflow - Versions diffs - 0.0.16 → 0.0.18 - Mend

pi-taskflow 0.0.16 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CHANGELOG.md +36 -0
package/README.md +59 -36
package/README.zh-CN.md +52 -29
package/examples/dynamic-plan-execute.json +34 -0
package/examples/iterative-replan.json +30 -0
package/extensions/agents.ts +13 -37
package/extensions/cache.ts +5 -1
package/extensions/index.ts +70 -17
package/extensions/interpolate.ts +32 -5
package/extensions/render.ts +2 -2
package/extensions/runner.ts +38 -2
package/extensions/runs-view.ts +2 -2
package/extensions/runtime.ts +225 -20
package/extensions/schema.ts +57 -2
package/extensions/store.ts +39 -14
package/extensions/verify.ts +11 -0
package/package.json +3 -4
package/skills/taskflow/SKILL.md +39 -5
package/skills/taskflow/configuration.md +10 -11
package/DESIGN.md +0 -338

package/extensions/runtime.ts CHANGED Viewed

@@ -16,7 +16,8 @@ import type { AgentConfig } from "./agents.ts";
 import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse, tryEvaluateCondition } from "./interpolate.ts";
 import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
 import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
-import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode } from "./schema.ts";
+import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, MAX_DYNAMIC_MAP_ITEMS, MAX_DYNAMIC_NESTING, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode, validateTaskflow } from "./schema.ts";
+import { verifyTaskflow } from "./verify.ts";
 import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
 import { CacheStore, resolveFingerprint } from "./cache.ts";
@@ -70,8 +71,17 @@ function buildInterpolationContext(
 ): InterpolationContext {
 	const steps: Record<string, { output: string; json?: unknown }> = {};
 	for (const [id, ps] of Object.entries(state.phases)) {
-		if (ps.status === "done" && ps.output !== undefined) {
-			steps[id] = { output: ps.output, json: ps.json };
+		// Include both done AND failed phases so downstream phases can see
+		// error info. Skipped phases (upstream failure cascade) are excluded.
+		if (ps.status === "done" || ps.status === "failed") {
+			if (ps.output !== undefined) {
+				steps[id] = { output: ps.output, json: ps.json };
+			} else if (ps.status === "failed") {
+				// M-3: Failed phases without output get a placeholder so
+				// downstream references like {steps.X.output} resolve to a
+				// sensible value instead of leaving the raw placeholder intact.
+				steps[id] = { output: "[previous phase failed]", json: undefined };
+			}
 		}
 	}
 	return { args: state.args, steps, previousOutput, locals };
@@ -80,10 +90,16 @@ function buildInterpolationContext(
 function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJson: boolean): PhaseState {
 	const failed = isFailed(r);
 	const attempts = attemptsOf(r);
+	// For failed phases, embed the error info in the output so downstream
+	// phases (and the user) can see what went wrong. The raw r.output is
+	// often a useless placeholder like "(upstream error: subagent failed)".
+	const output = failed
+		? r.errorMessage || r.stderr || r.output
+		: r.output;
 	return {
 		id,
 		status: failed ? "failed" : "done",
-		output: r.output,
+		output,
 		json: parseJson && !failed ? safeParse(r.output) : undefined,
 		usage: r.usage,
 		model: r.model,
@@ -127,6 +143,63 @@ function failPhase(id: string, error: string): PhaseState {
 	return { id, status: "failed", error, inputHash: hashInput(id, error), endedAt: Date.now(), usage: emptyUsage() };
 }
+/**
+ * Normalize an inline `flow.def` payload into a full Taskflow shape.
+ * Accepts: a full Taskflow ({name?,phases:[...]}), a bare phases array, or
+ * {phases:[...]}. Returns undefined if the shape is unrecognized. A recognized
+ * shape with ZERO phases is returned as-is (caller treats it as a no-op) so the
+ * empty-plan case is distinguishable from a malformed one.
+ *
+ * The payload is deep-cloned so the runtime never shares references with (or
+ * mutates) the upstream phase's parsed JSON. Cloning also drops any non-own /
+ * prototype-shadowing `__proto__` own-property that a crafted JSON could carry.
+ */
+function normalizeInlineDef(parsed: unknown, phaseId: string): Taskflow | undefined {
+	let shaped: Taskflow | undefined;
+	if (Array.isArray(parsed)) {
+		shaped = { name: `${phaseId}-inline`, phases: parsed as Taskflow["phases"] };
+	} else if (parsed && typeof parsed === "object") {
+		const o = parsed as Record<string, unknown>;
+		if (Array.isArray(o.phases)) {
+			const name = typeof o.name === "string" && o.name.length > 0 ? (o.name as string) : `${phaseId}-inline`;
+			shaped = { ...(o as object), name, phases: o.phases as Taskflow["phases"] } as Taskflow;
+		}
+	}
+	if (!shaped) return undefined;
+	// Deep clone via JSON round-trip: severs shared references with upstream output
+	// and drops any own "__proto__" key (JSON.stringify omits it). As belt-and-
+	// suspenders, also delete inert `constructor`/`prototype` own-keys a crafted
+	// payload could carry, so the returned object is clean of pollution vectors.
+	try {
+		const clone = JSON.parse(JSON.stringify(shaped)) as Record<string, unknown>;
+		for (const k of ["__proto__", "constructor", "prototype"]) {
+			if (Object.prototype.hasOwnProperty.call(clone, k)) delete clone[k];
+		}
+		return clone as unknown as Taskflow;
+	} catch {
+		return undefined;
+	}
+}
+/**
+ * Clamp a runtime-generated sub-flow's budget so it can only ever be TIGHTER
+ * than the parent's, never looser. A generated def cannot raise the spend cap by
+ * declaring its own large budget. Each dimension becomes min(child, parent).
+ */
+function clampSubFlowBudget(sub: Taskflow, parentBudget: Budget | undefined): Taskflow {
+	if (!parentBudget) return sub;
+	const child = sub.budget;
+	const clamped: Budget = {
+		maxUSD: Math.min(child?.maxUSD ?? Infinity, parentBudget.maxUSD ?? Infinity),
+		maxTokens: Math.min(child?.maxTokens ?? Infinity, parentBudget.maxTokens ?? Infinity),
+	};
+	// Drop Infinity dimensions (no cap on that axis).
+	const budget: Budget = {};
+	if (Number.isFinite(clamped.maxUSD)) budget.maxUSD = clamped.maxUSD;
+	if (Number.isFinite(clamped.maxTokens)) budget.maxTokens = clamped.maxTokens;
+	return { ...sub, budget: budget.maxUSD === undefined && budget.maxTokens === undefined ? undefined : budget };
+}
 /** Aggregate run cost/tokens so far and test against the budget. */
 function overBudget(state: RunState): { over: boolean; reason: string } {
 	const budget: Budget | undefined = state.def.budget;
@@ -156,8 +229,13 @@ function mergePhaseState(
 	// which model produced the merged output.
 	const model = ran.find((r) => r.model !== undefined)?.model;
 	// Combine outputs as a labelled list; also expose a JSON array of outputs.
+	// For failed items, use the error message instead of the useless placeholder.
 	const combinedText = ran
-		.map((r, i) => `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}\n\n${r.output}`)
+		.map((r, i) => {
+			const label = `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}`;
+			const content = isFailed(r) ? (r.errorMessage || r.stderr || r.output) : r.output;
+			return `${label}\n\n${content}`;
+		})
 		.join("\n\n---\n\n");
 	// Only successful runs feed the parsed JSON array (no error/skip strings).
 	const jsonArray = parseJson ? ran.filter((r) => !isFailed(r)).map((r) => safeParse(r.output) ?? r.output) : undefined;
@@ -373,7 +451,14 @@ async function executePhase(
 			// Backoff: prefer the explicit policy's curve when the phase defines one
 			// (covers transient retries too, and keeps tests fast with backoffMs:0),
 			// otherwise use the transient defaults.
-			const baseMs = retry ? (retry.backoffMs ?? 0) : DEFAULT_TRANSIENT_BACKOFF_MS;
+			const baseMs = retry?.backoffMs != null ? retry.backoffMs : DEFAULT_TRANSIENT_BACKOFF_MS;
+			// Factor asymmetry is intentional:
+			// - Explicit retry: backoffMs * (factor ?? 1) ^ attempt — user's
+			//   curve, defaults to flat (factor=1 → constant backoff).
+			// - Transient fallback: backoffMs * 2 ^ attempt — exponential.
+			// This lets users opt into flat retry with retry: {max:3} without
+			// specifying factor, while transient errors get proper exponential
+			// backoff.
 			const factor = retry ? (retry.factor ?? 1) : DEFAULT_TRANSIENT_FACTOR;
 			const wait = Math.min(60000, Math.round(baseMs * factor ** attempt));
 			if (wait > 0) await delay(wait, deps.signal);
@@ -565,7 +650,15 @@ async function executePhase(
 	if (type === "map") {
 		const overResolved = interpolate(phase.over ?? "", ctx).text;
 		// `over` may itself be a placeholder that resolved to a JSON string.
-		const arr = coerceArray(safeParse(overResolved)) ?? coerceArray(directRef(phase.over ?? "", state));
+		let arr = coerceArray(safeParse(overResolved)) ?? coerceArray(directRef(phase.over ?? "", state));
+		// Breadth cap for untrusted dynamic sub-flows: a `def:` frame in the stack
+		// means we are inside a runtime-generated flow. Truncate giant fan-outs to
+		// bound subprocess blast radius (fail-open: keep the first N rather than abort).
+		let mapTruncated = false;
+		if (arr && (deps._stack ?? []).some((s) => s.startsWith("def:")) && arr.length > MAX_DYNAMIC_MAP_ITEMS) {
+			arr = arr.slice(0, MAX_DYNAMIC_MAP_ITEMS);
+			mapTruncated = true;
+		}
 		if (!arr) {
 			return {
 				id: phase.id,
@@ -590,6 +683,12 @@ async function executePhase(
 		const results = await runFanout(tasks);
 		const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
+		if (mapTruncated) {
+			ps.warnings = [...(ps.warnings ?? []), `map fan-out truncated to MAX_DYNAMIC_MAP_ITEMS (${MAX_DYNAMIC_MAP_ITEMS}) inside a dynamic sub-flow`];
+			// NB: do NOT set ps.budgetTruncated — that field drives the run-level
+			// budget-blocked path and would mislabel the run as "budget exceeded".
+			// This is a safety fan-out cap, not a cost overrun; a warning is enough.
+		}
 		recordCache(cc, ps);
 		return ps;
 	}
@@ -633,14 +732,96 @@ async function executePhase(
 	if (type === "flow") {
 		const ctx = buildInterpolationContext(state, previousOutput);
-		const name = phase.use;
-		if (!name) return failPhase(phase.id, `flow phase '${phase.id}' requires 'use'`);
-		if (!deps.loadFlow) return failPhase(phase.id, `flow phase '${phase.id}': no sub-flow loader available`);
-		const subDef = deps.loadFlow(name);
-		if (!subDef) return failPhase(phase.id, `flow phase '${phase.id}': saved flow not found: '${name}'`);
+		const hasDef = (phase as { def?: unknown }).def !== undefined;
 		const stack = deps._stack ?? [];
-		if (name === state.flowName || stack.includes(name)) {
-			return failPhase(phase.id, `flow phase '${phase.id}': recursive sub-flow ${[...stack, state.flowName, name].join(" -> ")}`);
+		let subDef: Taskflow | undefined;
+		let name: string;
+		let recursionKey: string; // identity used for cache key + recursion guard
+		if (hasDef) {
+			// --- Inline `def`: resolve at runtime, validate, fail-OPEN on any error. ---
+			// Fail-open contract: a bad def NEVER aborts the run. The phase resolves
+			// as `done` with empty output and a `defError` diagnostic, and the
+			// upstream output is preserved for downstream phases. (Authors who want
+			// a bad plan to be a hard failure can add their own gate downstream.)
+			const defFailOpen = (diag: string): PhaseState => ({
+				id: phase.id,
+				status: "done",
+				output: "",
+				json: parseJson ? safeParse("") : undefined,
+				usage: emptyUsage(),
+				inputHash: hashInput(phase.id, `flow-def-error:${diag}`),
+				endedAt: Date.now(),
+				defError: diag,
+			});
+			// Nesting guard: each `flow{def}` adds a frame to _stack; cap inline depth.
+			const inlineDepth = stack.filter((s) => s.startsWith("def:")).length;
+			if (inlineDepth >= MAX_DYNAMIC_NESTING) {
+				return defFailOpen(`inline sub-flow nesting exceeded MAX_DYNAMIC_NESTING (${MAX_DYNAMIC_NESTING}): depth ${inlineDepth}`);
+			}
+			const rawDef = (phase as { def?: unknown }).def;
+			// String defs are interpolated then JSON-parsed; objects are used directly.
+			let parsed: unknown;
+			if (typeof rawDef === "string") {
+				const resolved = interpolate(rawDef, ctx).text;
+				parsed = safeParse(resolved);
+				if (parsed === undefined) {
+					return defFailOpen("inline def string did not parse as JSON");
+				}
+			} else {
+				parsed = rawDef;
+			}
+			// Accept a full Taskflow, a bare phases array, or {phases:[...]}; wrap the latter two.
+			const wrapped = normalizeInlineDef(parsed, phase.id);
+			if (!wrapped) {
+				return defFailOpen("inline def is not a Taskflow, phases array, or {phases:[...]}");
+			}
+			// Empty plan is a valid no-op (a planner deciding there is nothing to do):
+			// succeed with empty output instead of failing validation on zero phases.
+			if (wrapped.phases.length === 0) {
+				return {
+					id: phase.id,
+					status: "done",
+					output: "",
+					json: parseJson ? safeParse("") : undefined,
+					usage: emptyUsage(),
+					inputHash: hashInput(phase.id, "flow-def-empty"),
+					endedAt: Date.now(),
+				};
+			}
+			// Validate with `dynamic` hardening (breadth caps + cwd containment) since
+			// this content is LLM-authored / untrusted. cwd anchors containment checks.
+			const dynCwd = phase.cwd ?? deps.cwd;
+			const v = validateTaskflow(wrapped, { dynamic: true, cwd: dynCwd });
+			if (!v.ok) {
+				return defFailOpen(`inline def failed validation: ${v.errors.join("; ")}`);
+			}
+			// Static verification (dead-ends, unreachable, gate-exhaustion, budget,
+			// concurrency). Only error-severity issues block; warnings are advisory.
+			const ver = verifyTaskflow({ name: wrapped.name, phases: wrapped.phases as Phase[], budget: wrapped.budget, concurrency: wrapped.concurrency });
+			if (!ver.ok) {
+				const errs = ver.issues.filter((i) => i.severity === "error").map((i) => i.message);
+				return defFailOpen(`inline def failed verification: ${errs.join("; ")}`);
+			}
+			// Budget containment: a generated def may not raise the parent's cap. Clamp
+			// each dimension to min(child, parent) so it can only ever be tighter.
+			subDef = clampSubFlowBudget(wrapped, state.def.budget);
+			name = subDef.name;
+			recursionKey = `def:${name}`;
+		} else {
+			// --- Saved flow via `use` (unchanged behavior). ---
+			const useName = phase.use;
+			if (!useName) return failPhase(phase.id, `flow phase '${phase.id}' requires 'use' or 'def'`);
+			if (!deps.loadFlow) return failPhase(phase.id, `flow phase '${phase.id}': no sub-flow loader available`);
+			subDef = deps.loadFlow(useName);
+			if (!subDef) return failPhase(phase.id, `flow phase '${phase.id}': saved flow not found: '${useName}'`);
+			name = useName;
+			recursionKey = useName;
+		}
+		if (recursionKey === state.flowName || stack.includes(recursionKey)) {
+			return failPhase(phase.id, `flow phase '${phase.id}': recursive sub-flow ${[...stack, state.flowName, recursionKey].join(" -> ")}`);
 		}
 		// Resolve sub-flow args (interpolate string values), then apply declared defaults.
 		const provided: Record<string, unknown> = {};
@@ -648,7 +829,11 @@ async function executePhase(
 			provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
 		}
 		const subArgs = resolveArgs(subDef, provided);
-		const inputHash = cacheKey(cc, [phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs)]);
+		// For inline defs the cache identity must include the resolved def content so
+		// that a different generated plan yields a different key (and an identical plan
+		// hits cache). For saved flows the name is the identity (historical behavior).
+		const flowIdentity = hasDef ? `def:${JSON.stringify(subDef)}` : `flow:${name}`;
+		const inputHash = cacheKey(cc, [phase.id, flowIdentity, preRead, JSON.stringify(subArgs)]);
 		const cached = cachedPhase(cc, inputHash);
 		if (cached) return cached;
@@ -680,7 +865,7 @@ async function executePhase(
 			// flow's cwd (not the caller's cwd).
 			cwd: phase.cwd ?? deps.cwd,
 			runTask: subRunTask,
-			_stack: [...stack, state.flowName],
+			_stack: hasDef ? [...stack, state.flowName, recursionKey] : [...stack, state.flowName],
 			persist: undefined,
 			onProgress: () => {
 				if (live) {
@@ -742,7 +927,7 @@ async function executePhase(
 		for (let i = 1; i <= maxIters; i++) {
 			if (deps.signal?.aborted) {
-				stop = "failed";
+				stop = "aborted";
 				break;
 			}
 			iterations = i;
@@ -788,14 +973,14 @@ async function executePhase(
 		}
 		const aggUsage = usages.length ? aggregateUsage(usages) : emptyUsage();
-		if (failedResult) {
+		if (failedResult || stop === "failed" || stop === "aborted") {
 			return {
 				id: phase.id,
 				status: "failed",
 				output: lastOutput || undefined,
 				usage: aggUsage,
-				error: failedResult.errorMessage || failedResult.stderr || `loop '${phase.id}' iteration ${iterations} failed`,
-				loop: { iterations, stop: "failed" },
+				error: failedResult?.errorMessage || failedResult?.stderr || (stop === "aborted" ? "Aborted" : `loop '${phase.id}' iteration ${iterations} failed`),
+				loop: { iterations, stop },
 				warnings: loopWarnings.length ? loopWarnings : undefined,
 				inputHash: hashInput(phase.id, "loop", phase.until ?? ""),
 				endedAt: Date.now(),
@@ -868,6 +1053,22 @@ async function executePhase(
 			};
 		}
+		// Guard: skip the judge if the run is over budget or aborted.
+		if (deps.signal?.aborted || overBudget(state).over) {
+			return {
+				id: phase.id,
+				status: "done",
+				output: ok[0].output,
+				json: parseJson ? safeParse(ok[0].output) : undefined,
+				usage: variantUsage,
+				model: ok[0].model,
+				warnings: ["judge skipped: run aborted or budget exceeded"],
+				tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge skipped" },
+				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
+				endedAt: Date.now(),
+			};
+		}
 		// Build the judge prompt: label every variant output, then the rubric.
 		const labelled = ran
 			.map((r, i) => `### Variant ${i + 1}${isFailed(r) ? " (failed — ineligible)" : ""}\n\n${r.output}`)
@@ -1288,6 +1489,10 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 				if (!budgetReason) budgetReason = "fan-out truncated by budget";
 			}
 			// Budget ceiling: once exceeded, remaining phases are skipped.
+			// For concurrent same-layer phases, the check runs after each phase
+			// completes, so at most (concurrency - 1) extra phases may run before
+			// the budget is detected as exceeded. This bounded overshoot is
+			// acceptable: budgetBlocked prevents cascading into subsequent layers.
 			const ob = overBudget(state);
 			if (ob.over && !budgetBlocked) {
 				budgetBlocked = true;

package/extensions/schema.ts CHANGED Viewed

@@ -20,6 +20,19 @@ export type PhaseType = (typeof PHASE_TYPES)[number];
 export const LOOP_DEFAULT_MAX_ITERATIONS = 10;
 export const LOOP_HARD_MAX_ITERATIONS = 100;
+/** Max depth of runtime `flow { def }` sub-flow nesting (runaway guard for
+ *  LLM-generated sub-flows that themselves spawn more sub-flows). The existing
+ *  `_stack` recursion check guards saved-flow cycles; this bounds inline depth. */
+export const MAX_DYNAMIC_NESTING = 5;
+/** Breadth caps applied ONLY to runtime-generated (`flow { def }`) sub-flows,
+ *  whose content is LLM-authored and therefore untrusted. Authored/saved flows
+ *  are not subject to these (a human reviewed them). They bound DoS blast radius
+ *  from a model emitting a graph with thousands of phases / a giant fan-out. */
+export const MAX_DYNAMIC_PHASES = 100;
+export const MAX_DYNAMIC_MAP_ITEMS = 200;
+export const MAX_DYNAMIC_CONCURRENCY = 16;
 /** Tournament competitor bounds. */
 export const TOURNAMENT_DEFAULT_VARIANTS = 3;
 export const TOURNAMENT_HARD_MAX_VARIANTS = 20;
@@ -119,6 +132,12 @@ const PhaseSchema = Type.Object(
 		// sub-workflow (flow)
 		use: Type.Optional(Type.String({ description: "[flow] Name of a saved taskflow to run as this phase" })),
+		def: Type.Optional(
+			Type.Unknown({
+				description:
+					"[flow] Inline sub-flow definition, resolved at runtime. Mutually exclusive with 'use'. A string is interpolated (e.g. '{steps.plan.json}') then JSON-parsed; an object is used directly. The result must be a Taskflow ({name,phases}) or a bare phases array / {phases:[...]} (auto-wrapped). Validated + verified before execution; on any failure the phase fails-open (defError) without aborting the run.",
+			}),
+		),
 		with: Type.Optional(
 			Type.Record(Type.String(), Type.Unknown(), {
 				description: "[flow] Args passed to the sub-flow (string values support interpolation)",
@@ -235,7 +254,7 @@ const ArgSpecSchema = Type.Object(
 export const TaskflowSchema = Type.Object(
 	{
-		name: Type.String({ description: "Workflow name (becomes /tf:<name> command when saved)" }),
+		name: Type.String({ minLength: 1, description: "Workflow name (becomes /tf:<name> command when saved)" }),
 		description: Type.Optional(Type.String()),
 		version: Type.Optional(Type.Number({ default: 1 })),
 		args: Type.Optional(Type.Record(Type.String(), ArgSpecSchema, { description: "Declared invocation arguments" })),
@@ -388,6 +407,10 @@ export interface ValidationOptions {
 	cwd?: string;
 	/** Override the flow's own `strictInterpolation` flag for this validation call. */
 	strict?: boolean;
+	/** When true, this flow is a runtime-generated (`flow { def }`) sub-flow whose
+	 *  content is LLM-authored / untrusted. Enables hardening checks: breadth caps
+	 *  (phase count, map items, concurrency) and cwd containment under `cwd`. */
+	dynamic?: boolean;
 }
 export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): ValidationResult {
@@ -406,6 +429,32 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
 		return { ok: false, errors, warnings };
 	}
+	// Hardening for runtime-generated (untrusted) sub-flows: bound breadth and
+	// contain filesystem access. These do NOT apply to authored/saved flows.
+	if (opts.dynamic) {
+		if (flow.phases.length > MAX_DYNAMIC_PHASES) {
+			errors.push(`Dynamic sub-flow has too many phases (${flow.phases.length}, max ${MAX_DYNAMIC_PHASES})`);
+		}
+		if (typeof flow.concurrency === "number" && flow.concurrency > MAX_DYNAMIC_CONCURRENCY) {
+			errors.push(`Dynamic sub-flow concurrency too high (${flow.concurrency}, max ${MAX_DYNAMIC_CONCURRENCY})`);
+		}
+		const root = opts.cwd ? path.resolve(opts.cwd) : undefined;
+		for (const p of flow.phases) {
+			if (!p || typeof p !== "object") continue;
+			// Per-phase concurrency override is also capped.
+			if (typeof p.concurrency === "number" && p.concurrency > MAX_DYNAMIC_CONCURRENCY) {
+				errors.push(`Dynamic sub-flow phase '${p.id}': concurrency too high (${p.concurrency}, max ${MAX_DYNAMIC_CONCURRENCY})`);
+			}
+			// cwd containment: a generated phase may not escape the run's cwd.
+			if (typeof p.cwd === "string" && root) {
+				const resolved = path.resolve(root, p.cwd);
+				if (resolved !== root && !resolved.startsWith(root + path.sep)) {
+					errors.push(`Dynamic sub-flow phase '${p.id}': cwd '${p.cwd}' escapes the run directory`);
+				}
+			}
+		}
+	}
 	const ids = new Set<string>();
 	for (const p of flow.phases) {
 		if (!p || typeof p !== "object") {
@@ -439,7 +488,13 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
 			if (!p.task) errors.push(`Phase '${p.id}' (reduce) requires 'task'`);
 		}
 		if (type === "flow") {
-			if (!p.use) errors.push(`Phase '${p.id}' (flow) requires 'use' (a saved flow name)`);
+			const hasUse = typeof p.use === "string" && p.use.length > 0;
+			const hasDef = (p as { def?: unknown }).def !== undefined;
+			if (!hasUse && !hasDef) {
+				errors.push(`Phase '${p.id}' (flow) requires 'use' (a saved flow name) or 'def' (an inline definition)`);
+			} else if (hasUse && hasDef) {
+				errors.push(`Phase '${p.id}' (flow): 'use' and 'def' are mutually exclusive — provide exactly one`);
+			}
 		}
 		if (type === "loop") {
 			if (!p.task) errors.push(`Phase '${p.id}' (loop) requires 'task' (the iteration body)`);

package/extensions/store.ts CHANGED Viewed

@@ -54,14 +54,18 @@ export interface PhaseState {
 	gate?: { verdict: "pass" | "block"; reason?: string };
 	/** Total subagent attempts incl. retries (when > calls, a retry happened). */
 	attempts?: number;
-	/** True when a map/parallel fan-out was cut short by the budget cap. */
+	/** True when a map/parallel fan-out was cut short by the budget cap, or by the
+	 *  dynamic sub-flow fan-out safety limit (MAX_DYNAMIC_MAP_ITEMS). */
 	budgetTruncated?: boolean;
 	/** Human-in-the-loop outcome (approval phases only). */
 	approval?: { decision: "approve" | "reject" | "edit"; note?: string; auto?: boolean };
 	/** Loop iteration accounting (loop phases only). */
-	loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" };
+	loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" | "aborted" };
 	/** Tournament outcome (tournament phases only). */
 	tournament?: { variants: number; winner: number; mode: "best" | "aggregate"; reason?: string };
+	/** Set when a `flow { def }` inline sub-flow definition could not be resolved,
+	 *  parsed, validated, or verified. The phase fails-open: this records why. */
+	defError?: string;
 	/** Non-fatal diagnostic warnings accumulated during this phase (e.g.
 	 *  unresolved interpolation placeholders, suspicious templates). */
 	warnings?: string[];
@@ -128,6 +132,9 @@ export const DEFAULT_RUN_AGE_DAYS = DEFAULT_MAX_AGE_DAYS;
 /** Last cleanup timestamp — module-level so it persists across calls. */
 let lastCleanupAt = 0;
+/** Shared buffer for Atomics.wait in acquireLock busy-wait (Finding 6). */
+const LOCK_WAIT_BUF = new Int32Array(new SharedArrayBuffer(4));
 // ---------------------------------------------------------------------------
 // Internal helpers — path construction & sanitisation
 // ---------------------------------------------------------------------------
@@ -142,7 +149,7 @@ let lastCleanupAt = 0;
  * bare-dot / leading-dot components after the character substitution so the
  * write path can never escape runs/ (risk-reviewer v0.0.9 audit, H1).
  */
-function safeFlowDirName(flowName: string): string {
+export function safeFlowDirName(flowName: string): string {
 	let safe = flowName.replace(/[^\w.-]+/g, "_");
 	// Collapse leading dots: blocks ".", "..", and hidden-dir names like ".git".
 	safe = safe.replace(/^\.+/, "_");
@@ -245,7 +252,7 @@ function acquireLock(lockPath: string, timeoutMs: number = LOCK_TIMEOUT_MS): voi
 				throw new Error(`Lock timeout after ${timeoutMs}ms waiting for ${path.basename(lockPath)}`);
 			}
 			// Busy-wait with Atomics.wait (CPU-efficient sleep).
-			Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, LOCK_POLL_MS);
+			Atomics.wait(LOCK_WAIT_BUF, 0, 0, LOCK_POLL_MS);
 		}
 	}
 }
@@ -392,11 +399,18 @@ function rebuildIndex(runsRoot: string): RunIndexEntry[] {
 		} catch { /* skip corrupt */ }
 	}
-	const result = Array.from(entries.values());
-	// Persist the rebuilt index under the index lock so it does not race a
-	// concurrent updateIndexEntry / cleanup write (M1).
-	withLock(indexLockPath(runsRoot), () => writeIndex(runsRoot, result));
-	return result;
+	const scanned = Array.from(entries.values());
+	// Persist the rebuilt index under the index lock. Re-read the current
+	// index inside the lock and merge by runId so concurrent writes are not
+	// clobbered — scanned entries win on conflict (Finding 5).
+	withLock(indexLockPath(runsRoot), () => {
+		const currentIndex = readIndex(runsRoot);
+		const merged = new Map<string, RunIndexEntry>();
+		for (const e of currentIndex) merged.set(e.runId, e);
+		for (const e of scanned) merged.set(e.runId, e); // scanned wins
+		writeIndex(runsRoot, Array.from(merged.values()));
+	});
+	return scanned;
 }
 // ---------------------------------------------------------------------------
@@ -422,7 +436,8 @@ function cleanupTerminalRuns(
 	maxKeep: number = DEFAULT_MAX_KEPT_TERMINAL,
 	maxAgeDays: number = DEFAULT_MAX_AGE_DAYS,
 ): void {
-	const now = Date.now();
+	const cleanupStarted = Date.now();
+	const now = cleanupStarted;
 	if (now - lastCleanupAt < CLEANUP_INTERVAL_MS) return;
 	lastCleanupAt = now;
@@ -473,6 +488,8 @@ function cleanupTerminalRuns(
 	// Delete run files + lock files (outside the index lock).
 	for (const e of toRemove) {
 		const filePath = path.join(runsRoot, e.relPath);
+		// Race guard: skip files modified after cleanup started (Finding 2).
+		try { if (fs.statSync(filePath).mtimeMs > cleanupStarted) continue; } catch { continue; }
 		try { fs.unlinkSync(filePath); } catch { /* already gone */ }
 		// Also remove any orphaned lock file.
 		try { fs.unlinkSync(filePath + ".lock"); } catch { /* ignore */ }
@@ -566,16 +583,19 @@ export function saveFlow(
 	scope: "user" | "project" = "project",
 ): { filePath: string } {
 	const dir = scope === "user" ? userFlowsDir() : (findProjectFlowsDir(cwd, true) ?? path.join(cwd, ".pi", "taskflows"));
+	if (!def.name || def.name.trim().length === 0) throw new Error("Flow name must not be empty");
 	fs.mkdirSync(dir, { recursive: true });
-	const safe = def.name.replace(/[^\w.-]+/g, "_");
+	const safe = safeFlowDirName(def.name);
 	const filePath = path.join(dir, `${safe}.json`);
-	writeFileAtomic(filePath, `${JSON.stringify(def, null, 2)}\n`);
+	const fileLockPath = filePath + ".lock";
+	withLock(fileLockPath, () => { writeFileAtomic(filePath, `${JSON.stringify(def, null, 2)}\n`); });
-	// One-shot: let the user know we're creating a .pi/ directory on first save.
+	// One-shot: let the user know about .pi/ directory on first save (Finding 8).
 	if (!_piCreationHinted) {
 		_piCreationHinted = true;
+		const piExisted = fs.existsSync(path.join(dir, "..", ".."));
 		console.warn(
-			`[taskflow] Created .pi/taskflows/ for project-scoped flow storage. ` +
+			`[taskflow] ${piExisted ? "Using" : "Created"} .pi/taskflows/ for project-scoped flow storage. ` +
 			`Add .pi/ to .gitignore if desired.`,
 		);
 	}
@@ -587,6 +607,8 @@ export function saveFlow(
 // --- Run state ---
 function runsDir(cwd: string): string {
+	// Safe non-null assertion: create=true guarantees a non-null return because
+	// findProjectFlowsDirInternal falls back to path.join(cwd, ".pi", "taskflows").
 	const projDir = findProjectFlowsDir(cwd, true)!;
 	return path.join(projDir, "runs");
 }
@@ -614,6 +636,9 @@ export function newRunId(flowName: string): string {
  * caller's reference.
  */
 export function saveRun(state: RunState, cleanup?: { maxKeep?: number; maxAgeDays?: number }): void {
+	// Reject unsafe runIds before any filesystem access (Finding 1).
+	if (!validateRunId(state.runId)) return;
 	const root = runsDir(state.cwd);
 	const flowDir = flowRunDir(root, state.flowName);
 	fs.mkdirSync(flowDir, { recursive: true });

package/extensions/verify.ts CHANGED Viewed

@@ -253,6 +253,7 @@ function detectBudgetOverflow(flow: VerifiableFlow): VerificationIssue[] {
 		}
 	}
+	const ESTIMATED_COST_PER_PHASE = 0.001; // $0.001 minimum per subagent call
 	if (budget.maxTokens !== undefined && budget.maxTokens > 0 && minTokens > budget.maxTokens) {
 		issues.push({
 			message:
@@ -263,6 +264,16 @@ function detectBudgetOverflow(flow: VerifiableFlow): VerificationIssue[] {
 			category: "budget-overflow",
 		});
 	}
+	if (budget.maxUSD !== undefined && budget.maxUSD > 0 && minTokens * ESTIMATED_COST_PER_PHASE > budget.maxUSD) {
+		issues.push({
+			message:
+				`Budget cap ($${budget.maxUSD}) is below the estimated minimum of ~$${(minTokens * ESTIMATED_COST_PER_PHASE).toFixed(3)} ` +
+				`for ${flow.phases.length} phase(s). The flow will likely be truncated before completion. ` +
+				`Increase maxUSD or reduce the number of phases.`,
+			severity: "warning",
+			category: "budget-overflow",
+		});
+	}
 	return issues;
 }

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "pi-taskflow",
-  "version": "0.0.16",
-  "description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
+  "version": "0.0.18",
+  "description": "A declarative, verifiable graph of task nodes for the Pi coding agent — not a workflow you script, but a DAG you declare: statically verified before it runs, with dynamic fan-out, gates, isolated subagent context, resumable runs, and saveable commands.",
   "keywords": [
     "pi-package",
     "pi",
@@ -33,12 +33,11 @@
     "README.md",
     "README.zh-CN.md",
     "CHANGELOG.md",
-    "DESIGN.md",
     "LICENSE"
   ],
   "scripts": {
     "typecheck": "tsc --noEmit",
-    "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts",
+    "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts test/flow-def.test.ts",
     "test:e2e": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e.mts",
     "test:dogfood-cache": "node --experimental-strip-types test/dogfood-cache.mts"
   },