npm - pi-taskflow - Versions diffs - 0.0.24 → 0.0.26 - Mend

pi-taskflow 0.0.24 → 0.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/CHANGELOG.md +110 -0
package/extensions/cache.ts +6 -1
package/extensions/flowir/hash.ts +97 -0
package/extensions/flowir/index.ts +73 -0
package/extensions/flowir/meta.ts +126 -0
package/extensions/flowir/translate.ts +163 -0
package/extensions/index.ts +292 -5
package/extensions/interpolate.ts +17 -0
package/extensions/runtime.ts +417 -49
package/extensions/schema.ts +3 -1
package/extensions/stale.ts +193 -0
package/extensions/store.ts +25 -0
package/package.json +1 -1

package/extensions/runtime.ts CHANGED Viewed

@@ -20,6 +20,8 @@ import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_
 import { verifyTaskflow } from "./verify.ts";
 import { hashInput, newRunId, type PhaseState, type RunState, runsDir } from "./store.ts";
 import { CacheStore, resolveFingerprint } from "./cache.ts";
+import { compileTaskflowToIR } from "./flowir/index.ts";
+import { computeStaleFrontier, declaredReadMapOfDef, readMapOf } from "./stale.ts";
 import { ctxDirFor, drainPendingSpawns, initCtxDir, registerNode, setNodeStatus, type SpawnAssignment } from "./context-store.ts";
 import { allocateWorkspace, isWorkspaceKeyword, type Workspace } from "./workspace.ts";
@@ -55,6 +57,8 @@ export interface RuntimeDeps {
 	loadFlow?: (name: string) => Taskflow | undefined;
 	/** Cross-run memoization store. Omit to construct a default one for `deps.cwd`. */
 	cacheStore?: CacheStore;
+	/** Default cache scope for phases that don't specify one. */
+	cacheScopeDefault?: CacheScope;
 	/** Internal: sub-flow call stack, for recursion detection. */
 	_stack?: string[];
 	/** Internal: pre-resolved Shared Context Tree dir for this run (sub-flows inherit the parent's). */
@@ -74,6 +78,7 @@ function buildInterpolationContext(
 	state: RunState,
 	previousOutput: string | undefined,
 	locals?: Record<string, unknown>,
+	onRead?: (ref: string) => void,
 ): InterpolationContext {
 	const steps: Record<string, { output: string; json?: unknown }> = {};
 	for (const [id, ps] of Object.entries(state.phases)) {
@@ -90,7 +95,7 @@ function buildInterpolationContext(
 			}
 		}
 	}
-	return { args: state.args, steps, previousOutput, locals };
+	return { args: state.args, steps, previousOutput, locals, onRead };
 }
 function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJson: boolean): PhaseState {	const failed = isFailed(r);
@@ -115,6 +120,27 @@ function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJs
 	};
 }
+/** Convert observed read refs (e.g. "steps.scout.output") into a structured
+ *  readSet keyed by upstream phase id, tagging each with the version
+ *  (= inputHash) that was current when read. Only `steps.*` refs are upstream
+ *  phase dependencies; args/item/previous are invocation/loop values. */
+function readRefsToReads(
+	refs: string[],
+	state: RunState,
+): Array<{ stepId: string; version?: string }> {
+	const out: Array<{ stepId: string; version?: string }> = [];
+	const seen = new Set<string>();
+	for (const ref of refs) {
+		const m = /^steps\.([A-Za-z0-9_-]+)\b/.exec(ref);
+		if (!m) continue;
+		const stepId = m[1] as string;
+		if (seen.has(stepId)) continue;
+		seen.add(stepId);
+		out.push({ stepId, version: state.phases[stepId]?.inputHash });
+	}
+	return out;
+}
 /**
  * Surface unresolved interpolation placeholders (the `missing[]` from
  * `interpolate()`). Without this they are silently left intact in the task —
@@ -551,6 +577,15 @@ async function runSpawnedChildren(
  * and tears it down afterwards. All allocation is fail-open: a failed allocation
  * degrades to the base cwd so a phase never fails to run because of isolation.
  */
+/** Optional per-invocation execution flags (e.g. M5 recompute forces a
+ *  phase to re-run, bypassing the cross-run cache so the result refreshes). */
+interface PhaseExecOpts {
+	/** Bypass the cache entirely (within-run prior AND cross-run store) and
+	 *  re-execute. Used by `/tf recompute` on the seeded phase so its new
+	 *  output — and only the downstream whose inputHash actually moves — refreshes. */
+	forceRerun?: boolean;
+}
 async function executePhase(
 	phase: Phase,
 	state: RunState,
@@ -558,10 +593,11 @@ async function executePhase(
 	prior: PhaseState | undefined,
 	emitProgress: () => void,
 	_retryDepth = 0,
+	opts?: PhaseExecOpts,
 ): Promise<PhaseState> {
 	// Non-keyword cwd (or none): no workspace lifecycle — run directly.
 	if (!isWorkspaceKeyword(phase.cwd)) {
-		return executePhaseInner(phase, state, deps, prior, emitProgress, _retryDepth);
+		return executePhaseInner(phase, state, deps, prior, emitProgress, _retryDepth, opts);
 	}
 	let ws: Workspace | undefined;
 	try {
@@ -576,7 +612,7 @@ async function executePhase(
 	}
 	const innerDeps: RuntimeDeps = ws ? { ...deps, _cwdOverride: ws.dir } : deps;
 	try {
-		const ps = await executePhaseInner(phase, state, innerDeps, prior, emitProgress, _retryDepth);
+		const ps = await executePhaseInner(phase, state, innerDeps, prior, emitProgress, _retryDepth, opts);
 		if (ws && (ws.kind !== "inherited" || ws.note)) {
 			const tag = ws.kind === "inherited" ? "workspace" : `workspace:${ws.kind}`;
 			const msg = ws.note ? `${tag} — ${ws.note}` : `${tag} at ${ws.dir}`;
@@ -599,6 +635,7 @@ async function executePhaseInner(
 	prior: PhaseState | undefined,
 	emitProgress: () => void,
 	_retryDepth = 0,
+	opts?: PhaseExecOpts,
 ): Promise<PhaseState> {
 	const type = phase.type ?? "agent";
 	const concurrency = phase.concurrency ?? state.def.concurrency ?? 8;
@@ -631,13 +668,49 @@ async function executePhaseInner(
 	// Resolve context pre-read files once, before any type branching.
 	// The content is prepended to every task so the subagent never spends
 	// turns on file exploration for files the flow author already knows.
-	const ctx = buildInterpolationContext(state, previousOutput);
+	// M3 observed-readSet: collect every upstream ref this phase resolves, so we
+	// can record what its result ACTUALLY depended on (not just its declared
+	// dependsOn). Shared by every interpolation in this phase (task / when / …).
+	const readRefs: string[] = [];
+	const onRead = (ref: string): void => {
+		readRefs.push(ref);
+	};
+	const ctx = buildInterpolationContext(state, previousOutput, undefined, onRead);
+	// M3 observed-readSet: when conditions are part of the phase's real
+	// dependencies. Evaluate them inside executePhaseInner so every upstream
+	// interpolation is captured by the shared onRead hook, not silently dropped
+	// by a separate out-of-band context.
+	if (phase.when !== undefined) {
+		if (!evaluateCondition(phase.when, ctx)) {
+			return {
+				id: phase.id,
+				status: "skipped",
+				error: `Condition not met: ${phase.when}`,
+				endedAt: Date.now(),
+				usage: emptyUsage(),
+				reads: readRefsToReads(readRefs, state),
+			};
+		}
+	}
 	const preRead = await resolvePhaseContext(phase, ctx);
 	// Resolve this phase's cache policy once. Default scope is "run-only" (the
 	// historical within-run resume behavior). Only "cross-run" phases resolve a
 	// fingerprint and consult the persistent store.
-	const cacheScope: CacheScope = (phase.cache?.scope ?? "run-only") as CacheScope;
+	let cacheScope: CacheScope = (phase.cache?.scope ?? deps.cacheScopeDefault ?? "run-only") as CacheScope;
+	// Defense in depth: gate/approval/loop/tournament must produce a fresh result
+	// each run (schema already rejects explicit cross-run, but the default-scope
+	// path must also be blocked). If flowDefHash failed, cross-run is unsafe
+	// because the key degrades to flowName-only and reopens cross-flow collisions.
+	const CROSS_RUN_BLOCKED_TYPES = new Set(["gate", "approval", "loop", "tournament"]);
+	if (cacheScope === "cross-run" && CROSS_RUN_BLOCKED_TYPES.has(type)) {
+		cacheScope = "run-only";
+	}
+	if (state.flowDefHash === "failed" && cacheScope === "cross-run") {
+		cacheScope = "run-only";
+	}
 	const cc: PhaseCacheCtx = {
 		scope: cacheScope,
 		ttlMs: phase.cache?.ttl ? (parseTtlMs(phase.cache.ttl) ?? undefined) : undefined,
@@ -647,6 +720,8 @@ async function executePhaseInner(
 		phaseId: phase.id,
 		flowName: state.flowName,
 		runId: state.runId,
+		flowDefHash: state.flowDefHash === "failed" ? undefined : state.flowDefHash,
+		forceRerun: opts?.forceRerun,
 		thinking: phase.thinking,
 		tools: phase.tools,
 		preRead,
@@ -823,7 +898,7 @@ async function executePhaseInner(
 	if (type === "agent" || type === "gate" || type === "reduce") {
 		// Eval gate: zero-token machine checks before the LLM gate.
 		if (type === "gate" && Array.isArray(phase.eval) && phase.eval.length > 0) {
-			const evalCtx = buildInterpolationContext(state, previousOutput);
+			const evalCtx = buildInterpolationContext(state, previousOutput, undefined, onRead);
 			let allPassed = true;
 			for (const check of phase.eval) {
 				let expr = check;
@@ -848,7 +923,7 @@ async function executePhaseInner(
 			}
 			if (allPassed) {
 				// All evals passed — skip the LLM gate, return an auto-pass.
-				const inputHash = cacheKey(cc, [phase.id, "eval-skip"]);
+				const inputHash = cacheKeys(cc, [phase.id, "eval-skip"]).key;
 				const ps: PhaseState = {
 					id: phase.id,
 					status: "done",
@@ -858,6 +933,7 @@ async function executePhaseInner(
 					inputHash,
 					endedAt: Date.now(),
 				};
+				if (readRefs.length) ps.reads = readRefsToReads(readRefs, state);
 				recordCache(cc, ps);
 				return ps;
 			}
@@ -867,12 +943,14 @@ async function executePhaseInner(
 		const refWarning = warnUnresolvedRefs(phase.id, interp.missing);
 		const fullTask = preRead + text;
 		const agentName = resolveAgent(phase.agent, deps, state);
-		const inputHash = cacheKey(cc, [phase.id, agentName, phase.model ?? "", fullTask]);
-		const cached = cachedPhase(cc, inputHash);
+		const ck = cacheKeys(cc, [phase.id, agentName, phase.model ?? "", fullTask]);
+		const inputHash = ck.key;
+		const cached = cachedPhase(cc, ck);
 		if (cached) return cached;
 		const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress), nodeIdFor());
 		const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
+		if (readRefs.length) ps.reads = readRefsToReads(readRefs, state);
 		if (refWarning) ps.warnings = [...(ps.warnings ?? []), refWarning];
 		if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
@@ -919,14 +997,14 @@ async function executePhaseInner(
 					for (const depId of phase.dependsOn ?? []) {
 						const d = state.def.phases.find((p) => p.id === depId);
 						if (!d) continue;
-						const dPs = await executePhase(d, state, depsForUpstream, prior, emitProgress, _retryDepth + 1);
+						const dPs = await executePhase(d, state, depsForUpstream, prior, emitProgress, _retryDepth + 1, undefined);
 						state.phases[depId] = dPs;
 					}
 				}
 				const retryCtx = buildInterpolationContext(state, lastCompletedOutput(state, phase));
 				const retryText = interpolate(phase.task ?? "", retryCtx).text;
 				const retryTask = preRead + retryText;
-				const retryIH = cacheKey(cc, [phase.id, agentName, phase.model ?? "", retryTask]);
+				const retryIH = cacheKeys(cc, [phase.id, agentName, phase.model ?? "", retryTask]).key;
 				const retryR = await runOne(agentName, retryTask, liveSink(state, phase.id, emitProgress));
 				gatePs = resultToPhaseState(phase.id, retryR, retryIH, parseJson);
 				if (gatePs.status === "done") gatePs.gate = parseGateVerdict(retryR.output);
@@ -948,12 +1026,14 @@ async function executePhaseInner(
 				task: preRead + r.text,
 			};
 		});
-		const inputHash = cacheKey(cc, [phase.id, phase.model ?? "", JSON.stringify(branches)]);
-		const cached = cachedPhase(cc, inputHash);
+		const ck = cacheKeys(cc, [phase.id, phase.model ?? "", JSON.stringify(branches)]);
+		const inputHash = ck.key;
+		const cached = cachedPhase(cc, ck);
 		if (cached) return cached;
 		const results = await runFanout(branches);
 		const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
+		if (readRefs.length) ps.reads = readRefsToReads(readRefs, state);
 		recordCache(cc, ps);
 		return ps;
 	}
@@ -982,18 +1062,20 @@ async function executePhaseInner(
 		}
 		const loopVar = phase.as ?? "item";
 		const tasks = arr.map((item) => {
-			const localCtx = buildInterpolationContext(state, previousOutput, { [loopVar]: item });
+			const localCtx = buildInterpolationContext(state, previousOutput, { [loopVar]: item }, onRead);
 			return {
 				agent: resolveAgent(phase.agent, deps, state),
 				task: preRead + interpolate(phase.task ?? "", localCtx).text,
 			};
 		});
-		const inputHash = cacheKey(cc, [phase.id, phase.model ?? "", JSON.stringify(tasks)]);
-		const cached = cachedPhase(cc, inputHash);
+		const ck = cacheKeys(cc, [phase.id, phase.model ?? "", JSON.stringify(tasks)]);
+		const inputHash = ck.key;
+		const cached = cachedPhase(cc, ck);
 		if (cached) return cached;
 		const results = await runFanout(tasks);
 		const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
+		if (readRefs.length) ps.reads = readRefsToReads(readRefs, state);
 		if (mapTruncated) {
 			ps.warnings = [...(ps.warnings ?? []), `map fan-out truncated to MAX_DYNAMIC_MAP_ITEMS (${MAX_DYNAMIC_MAP_ITEMS}) inside a dynamic sub-flow`];
 			// NB: do NOT set ps.budgetTruncated — that field drives the run-level
@@ -1005,10 +1087,12 @@ async function executePhaseInner(
 	}
 	if (type === "approval") {
-		const ctx = buildInterpolationContext(state, previousOutput);
+		const readRefs: string[] = [];
+		const ctx = buildInterpolationContext(state, previousOutput, undefined, (ref) => readRefs.push(ref));
 		const message = interpolate(phase.task ?? "Approve to continue?", ctx).text;
-		const inputHash = hashInput(phase.id, phase.model ?? "", "approval", message);
-		const cached = cachedPhase(cc, inputHash);
+		const ck = cacheKeys(cc, [phase.id, phase.model ?? "", "approval", message]);
+		const inputHash = ck.key;
+		const cached = cachedPhase(cc, ck);
 		if (cached) return cached;
 		// Non-interactive (headless/CI/detached): auto-REJECT, fail-open, but record it.
@@ -1023,6 +1107,7 @@ async function executePhaseInner(
 				gate: { verdict: "block", reason: "(auto-rejected: no interactive approver available)" },
 				usage: emptyUsage(),
 				inputHash,
+				reads: readRefsToReads(readRefs, state),
 				endedAt: Date.now(),
 			};
 		}
@@ -1035,6 +1120,7 @@ async function executePhaseInner(
 			approval: { decision: decision.decision, note },
 			usage: emptyUsage(),
 			inputHash,
+			reads: readRefsToReads(readRefs, state),
 			endedAt: Date.now(),
 		};
 		// A rejection halts the flow via the same mechanism as a blocking gate.
@@ -1045,7 +1131,8 @@ async function executePhaseInner(
 	}
 	if (type === "flow") {
-		const ctx = buildInterpolationContext(state, previousOutput);
+		const readRefs: string[] = [];
+		const ctx = buildInterpolationContext(state, previousOutput, undefined, (ref) => readRefs.push(ref));
 		const hasDef = (phase as { def?: unknown }).def !== undefined;
 		const stack = deps._stack ?? [];
@@ -1066,6 +1153,7 @@ async function executePhaseInner(
 				json: parseJson ? safeParse("") : undefined,
 				usage: emptyUsage(),
 				inputHash: hashInput(phase.id, `flow-def-error:${diag}`),
+				reads: readRefsToReads(readRefs, state),
 				endedAt: Date.now(),
 				defError: diag,
 			});
@@ -1101,6 +1189,7 @@ async function executePhaseInner(
 					json: parseJson ? safeParse("") : undefined,
 					usage: emptyUsage(),
 					inputHash: hashInput(phase.id, "flow-def-empty"),
+					reads: readRefsToReads(readRefs, state),
 					endedAt: Date.now(),
 				};
 			}
@@ -1147,8 +1236,9 @@ async function executePhaseInner(
 		// that a different generated plan yields a different key (and an identical plan
 		// hits cache). For saved flows the name is the identity (historical behavior).
 		const flowIdentity = hasDef ? `def:${JSON.stringify(subDef)}` : `flow:${name}`;
-		const inputHash = cacheKey(cc, [phase.id, flowIdentity, preRead, JSON.stringify(subArgs)]);
-		const cached = cachedPhase(cc, inputHash);
+		const ck = cacheKeys(cc, [phase.id, flowIdentity, preRead, JSON.stringify(subArgs)]);
+		const inputHash = ck.key;
+		const cached = cachedPhase(cc, ck);
 		if (cached) return cached;
 		const live = state.phases[phase.id];
@@ -1222,6 +1312,7 @@ async function executePhaseInner(
 			},
 			error: subResult.ok ? undefined : `sub-flow '${name}' ${subResult.state.status}`,
 			inputHash,
+			reads: readRefsToReads(readRefs, state),
 			endedAt: Date.now(),
 		};
 		recordCache(cc, flowPs);
@@ -1231,11 +1322,21 @@ async function executePhaseInner(
 	// loop-until-done: run the body repeatedly until `until` is truthy, the output
 	// converges to a fixed point, or maxIterations is hit (always terminates).
 	if (type === "loop") {
+		const readRefs: string[] = [];
 		const agentName = resolveAgent(phase.agent, deps, state);
 		const rawMax = phase.maxIterations ?? LOOP_DEFAULT_MAX_ITERATIONS;
 		const maxIters = Math.max(1, Math.min(LOOP_HARD_MAX_ITERATIONS, Math.floor(rawMax)));
 		const convergence = phase.convergence ?? true;
+		// Canonical first-iteration body for the cache key. It must fold in the
+		// interpolated task/upstream refs so that a changed upstream changes the
+		// key and recompute no longer silently reuses a stale loop (critic finding).
+		const firstBodyCtx = buildInterpolationContext(state, previousOutput, {
+			loop: { iteration: 1, lastOutput: "", maxIterations: maxIters },
+		}, (ref) => readRefs.push(ref));
+		const firstBody = preRead + interpolate(phase.task ?? "", firstBodyCtx).text;
+		const inputHash = hashInput(phase.id, "loop", phase.until ?? "", firstBody, String(maxIters));
 		const usages: UsageStats[] = [];
 		const loopWarnings: string[] = [];
 		let lastOutput = "";
@@ -1253,7 +1354,7 @@ async function executePhaseInner(
 			// The body sees its iteration number and the prior iteration's output.
 			const bodyCtx = buildInterpolationContext(state, previousOutput, {
 				loop: { iteration: i, lastOutput, maxIterations: maxIters },
-			});
+			}, (ref) => readRefs.push(ref));
 			const body = preRead + interpolate(phase.task ?? "", bodyCtx).text;
 			const r = await runOne(agentName, body, liveSink(state, phase.id, emitProgress));
 			usages.push(r.usage);
@@ -1270,7 +1371,7 @@ async function executePhaseInner(
 			// Loop locals ({loop.iteration} etc.) are available to the condition too.
 			const untilCtx = buildInterpolationContext(state, previousOutput, {
 				loop: { iteration: i, lastOutput, maxIterations: maxIters },
-			});
+			}, (ref) => readRefs.push(ref));
 			untilCtx.steps[phase.id] = { output: lastOutput, json: safeParse(lastOutput) };
 			const { value: done, error: condErr } = tryEvaluateCondition(phase.until ?? "", untilCtx);
 			// A malformed condition must not spin forever: stop and surface a warning
@@ -1301,7 +1402,8 @@ async function executePhaseInner(
 				error: failedResult?.errorMessage || failedResult?.stderr || (stop === "aborted" ? "Aborted" : `loop '${phase.id}' iteration ${iterations} failed`),
 				loop: { iterations, stop },
 				warnings: loopWarnings.length ? loopWarnings : undefined,
-				inputHash: hashInput(phase.id, "loop", phase.until ?? ""),
+				inputHash,
+				reads: readRefsToReads(readRefs, state),
 				endedAt: Date.now(),
 			};
 		}
@@ -1313,7 +1415,8 @@ async function executePhaseInner(
 			usage: aggUsage,
 			loop: { iterations, stop },
 			warnings: loopWarnings.length ? loopWarnings : undefined,
-			inputHash: hashInput(phase.id, "loop", phase.until ?? "", String(iterations)),
+			inputHash,
+			reads: readRefsToReads(readRefs, state),
 			endedAt: Date.now(),
 		};
 	}
@@ -1336,6 +1439,20 @@ async function executePhaseInner(
 			competitors = Array.from({ length: n }, () => ({ agent: resolveAgent(phase.agent, deps, state), task: body }));
 		}
+		// The inputHash must fold in the resolved competitors (which embed the
+		// interpolated task/upstream refs) and the judge rubric, otherwise a changed
+		// upstream produces the same key and recompute silently reuses a stale
+		// tournament (critic finding: unsound for cross-run/recompute).
+		const rubric = interpolate(phase.judge ?? "", ctx).text.trim();
+		const inputHash = hashInput(
+			phase.id,
+			"tournament",
+			mode,
+			String(competitors.length),
+			JSON.stringify(competitors.map((c) => ({ agent: c.agent, task: c.task }))),
+			rubric,
+		);
 		const results = await runFanout(competitors);
 		const ran = results.filter((r) => r.stopReason !== "budget-skipped");
 		const ok = ran.filter((r) => !isFailed(r));
@@ -1355,7 +1472,8 @@ async function executePhaseInner(
 				error: `tournament '${phase.id}': all ${competitors.length} variants failed`,
 				budgetTruncated: budgetSkipCount > 0 || undefined,
 				tournament: { variants: competitors.length, winner: 0, mode },
-				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
+				inputHash,
+				reads: readRefsToReads(readRefs, state),
 				endedAt: Date.now(),
 			};
 		}
@@ -1370,7 +1488,8 @@ async function executePhaseInner(
 				model: ok[0].model,
 				budgetTruncated: budgetSkipCount > 0 || undefined,
 				tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "only surviving variant" },
-				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
+				inputHash,
+				reads: readRefsToReads(readRefs, state),
 				endedAt: Date.now(),
 			};
 		}
@@ -1387,7 +1506,8 @@ async function executePhaseInner(
 				budgetTruncated: budgetSkipCount > 0 || undefined,
 				warnings: ["judge skipped: run aborted or budget exceeded"],
 				tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge skipped" },
-				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
+				inputHash,
+				reads: readRefsToReads(readRefs, state),
 				endedAt: Date.now(),
 			};
 		}
@@ -1396,14 +1516,14 @@ async function executePhaseInner(
 		const labelled = ran
 			.map((r, i) => `### Variant ${i + 1}${isFailed(r) ? " (failed — ineligible)" : ""}\n\n${r.output}`)
 			.join("\n\n---\n\n");
-		const rubric =
-			interpolate(phase.judge ?? "", ctx).text.trim() ||
+		const finalRubric =
+			rubric ||
 			"You are judging competing answers to the same task. Pick the single best variant on correctness, completeness, and clarity.";
 		const directive =
 			mode === "best"
 				? `End your reply with a line exactly: WINNER: <number> (1–${ran.length}), choosing the strongest eligible variant.`
 				: `Synthesize the strongest possible answer by combining the best parts of the eligible variants. Then end with a line: WINNER: <number> indicating which variant contributed most.`;
-		const judgeTask = `${rubric}\n\nThe candidate variants:\n\n${labelled}\n\n${directive}`;
+		const judgeTask = `${finalRubric}\n\nThe candidate variants:\n\n${labelled}\n\n${directive}`;
 		const judgeAgent = resolveAgent(phase.judgeAgent ?? phase.agent, deps, state);
 		const judgeRes = await runOne(judgeAgent, judgeTask, liveSink(state, phase.id, emitProgress));
 		const judgeUsage = aggregateUsage([variantUsage, judgeRes.usage]);
@@ -1421,7 +1541,8 @@ async function executePhaseInner(
 				budgetTruncated: budgetSkipCount > 0 || undefined,
 				warnings: [`judge failed (${judgeRes.errorMessage ?? "error"}); used variant ${ranIdx(ok[0])}`],
 				tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge failed" },
-				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
+				inputHash,
+				reads: readRefsToReads(readRefs, state),
 				endedAt: Date.now(),
 			};
 		}
@@ -1444,7 +1565,8 @@ async function executePhaseInner(
 			budgetTruncated: budgetSkipCount > 0 || undefined,
 			warnings: winnerIneligible ? [`judge picked an ineligible variant; used variant ${winnerIdx}`] : undefined,
 			tournament: { variants: competitors.length, winner: winnerIdx, mode, reason },
-			inputHash: hashInput(phase.id, "tournament", String(competitors.length), mode),
+			inputHash,
+			reads: readRefsToReads(readRefs, state),
 			endedAt: Date.now(),
 		};
 	}
@@ -1490,7 +1612,7 @@ function lastCompletedOutput(state: RunState, phase: Phase): string | undefined
  * scope, optional TTL, and a pre-resolved fingerprint string so each phase-type
  * branch can fold it into its inputHash and consult the cross-run store uniformly.
  */
-interface PhaseCacheCtx {
+export interface PhaseCacheCtx {
 	scope: CacheScope;
 	ttlMs?: number;
 	fingerprint: string;
@@ -1509,22 +1631,62 @@ interface PhaseCacheCtx {
 	 *  whether a given branch happens to fold preRead into its task string
 	 *  (previously this was only incidentally true via `fullTask`). */
 	preRead?: string;
+	/** Content fingerprint of the desugared flow definition — folded into the
+	 *  key so two structurally-different flows that share a name can never
+	 *  collide, and a changed flow never serves a stale cross-run hit. */
+	flowDefHash?: string | "failed";
+	/** Force this phase to re-execute, ignoring the within-run prior AND the
+	 *  cross-run store (M5 recompute seed). Downstream phases are NOT forced —
+	 *  they re-evaluate naturally: if the seed's new output changed their
+	 *  inputHash they miss and re-run, otherwise they hit (early cutoff). */
+	forceRerun?: boolean;
 }
 /** Fold the phase fingerprint into the base hash parts to form the final cache key. */
-function cacheKey(cc: PhaseCacheCtx, baseParts: string[]): string {
+/** A computed cache identity: the new (versioned) key plus the read-only
+ *  fallback keys used to honor entries written by older releases. The `key`
+ *  is what we WRITE under and what `PhaseState.inputHash` carries; the
+ *  `legacyKey`/`bareKey` are consulted READ-ONLY on a miss so an upgrade
+ *  never produces a miss-storm. See docs/internal/cache-migration.md. */
+export interface CacheKeys {
+	/** Current key: folds `v2:flowdef:<hash>` (the overstory content fingerprint). */
+	key: string;
+	/** Pre-flowDefHash-era key: the flowdef line OMITTED entirely. Read-only. */
+	legacyKey: string;
+	/** Bare (unversioned) `flowdef:` key — written by pre-H1 code that folded
+	 *  the hash without a `v2:` prefix. Read-only. Removed in v0.1.0. */
+	bareKey: string;
+}
+/** Fold the phase fingerprint into the base hash parts to form the cache keys.
+ *
+ *  Three keys are produced for backward compatibility (see
+ *  docs/internal/cache-migration.md):
+ *    - `key`      : `v2:flowdef:<hash>` — the current write key.
+ *    - `legacyKey`: the flowdef line omitted — pre-flowDefHash entries.
+ *    - `bareKey`  : bare `flowdef:<hash>` (unversioned) — pre-H1 entries that
+ *      folded the hash without the `v2:` prefix.
+ *  `cachedPhase` consults all three READ-ONLY on a miss; `recordCache` writes
+ *  only `key`. This means an upgrade never produces a miss-storm: existing
+ *  entries (whichever shape) still hit, and new writes converge on `key`. */
+export function cacheKeys(cc: PhaseCacheCtx, baseParts: string[]): CacheKeys {
 	// Fold the full cache identity into the hash: flow name (prevents collisions
 	// across different flows that share a phase.id + task + model), the per-phase
 	// thinking/tools config (changing either changes the subagent's output), the
 	// resolved context pre-read content, and the world-state fingerprint.
-	const parts = [
-		`flow:${cc.flowName}`,
+	const tail = [
 		...baseParts,
 		`think:${cc.thinking ?? ""}`,
 		`tools:${JSON.stringify(cc.tools ?? [])}`,
 		`ctx:${cc.preRead ?? ""}`,
 	];
-	return cc.fingerprint ? hashInput(...parts, cc.fingerprint) : hashInput(...parts);
+	const fold = (parts: string[]): string =>
+		cc.fingerprint ? hashInput(...parts, cc.fingerprint) : hashInput(...parts);
+	return {
+		key: fold([`flow:${cc.flowName}`, `v2:flowdef:${cc.flowDefHash ?? ""}`, ...tail]),
+		legacyKey: fold([`flow:${cc.flowName}`, ...tail]),
+		bareKey: fold([`flow:${cc.flowName}`, `flowdef:${cc.flowDefHash ?? ""}`, ...tail]),
+	};
 }
 /**
@@ -1533,23 +1695,39 @@ function cacheKey(cc: PhaseCacheCtx, baseParts: string[]): string {
  *   - "run-only": within-run resume only (historical behavior).
  *   - "cross-run": within-run first, then the persistent cross-run store.
  * On a cross-run hit, usage is zeroed and `cacheHit` records the source.
+ *
+ * The cross-run read is THREE-TIER and READ-ONLY for fallback keys: it tries
+ * `keys.key` (current `v2:flowdef:` shape) first, then `keys.bareKey` (pre-H1
+ * bare `flowdef:`), then `keys.legacyKey` (pre-flowDefHash, no flowdef line).
+ * A hit on ANY tier is restored as a cache hit; we do NOT write-through (no
+ * re-store under the new key) so the cache size stays stable and the legacy
+ * entry ages out naturally. See docs/internal/cache-migration.md.
  */
-function cachedPhase(cc: PhaseCacheCtx, inputHash: string): PhaseState | null {
+function cachedPhase(cc: PhaseCacheCtx, keys: CacheKeys): PhaseState | null {
 	if (cc.scope === "off") return null;
+	if (cc.forceRerun) return null;
 	// 1. within-run resume (fastest; always allowed unless scope is off)
-	if (cc.prior && cc.prior.status === "done" && cc.prior.inputHash === inputHash) {
+	if (cc.prior && cc.prior.status === "done" && cc.prior.inputHash === keys.key) {
 		return { ...cc.prior, status: "done" };
 	}
-	// 2. cross-run memoization (opt-in)
+	// 2. cross-run memoization (opt-in) — three-tier read-only fallback.
 	if (cc.scope === "cross-run") {
-		const e = cc.store.get(inputHash, cc.ttlMs);
-		if (e) {
+		for (const k of [keys.key, keys.bareKey, keys.legacyKey]) {
+			const e = cc.store.get(k, cc.ttlMs);
+			if (!e) continue;
+			// If we stored the full PhaseState, restore it (preserving gate,
+			// approval, reads, loop/tournament metadata, warnings) and just mark
+			// the cache hit + zero usage. Fallback to the legacy trimmed surface
+			// for entries written before this change.
+			if (e.state) {
+				return { ...e.state, inputHash: keys.key, usage: emptyUsage(), cacheHit: "cross-run", endedAt: Date.now() };
+			}
 			return {
 				id: cc.phaseId,
 				status: "done",
-				inputHash,
+				inputHash: keys.key,
 				output: e.output,
 				json: e.json,
 				model: e.model,
@@ -1573,6 +1751,7 @@ function recordCache(cc: PhaseCacheCtx, ps: PhaseState): void {
 		output: ps.output,
 		json: ps.json,
 		model: ps.model,
+		state: ps,
 		flowName: cc.flowName,
 		phaseId: cc.phaseId,
 		runId: cc.runId,
@@ -1701,6 +1880,167 @@ function safeProgress(deps: RuntimeDeps, state: RunState): void {
 /**
  * Execute a full taskflow. Mutates and persists `state` as it progresses.
  */
+/** Result of a recompute: what was (or would be) re-executed vs reused.
+ *  `cutoff` is the prize — phases in the stale frontier whose inputHash did
+ *  NOT move, so they hit their cached result instead of re-running (early
+ *  cutoff). That is what makes recompute cheaper than a full re-run. */
+export interface RecomputeReport {
+	readonly dryRun: boolean;
+	readonly aborted: boolean;
+	readonly seeds: readonly string[];
+	/** Phases that were (dry-run: would be) re-executed, or whose result moved. */
+	readonly rerun: readonly string[];
+	/** Phases outside the frontier — untouched, reused verbatim. */
+	readonly reused: readonly string[];
+	/** Phases in the frontier whose inputHash did NOT move → cached result
+	 *  reused, no re-execution (early cutoff). Empty in dry-run (unknowable). */
+	readonly cutoff: readonly string[];
+}
+/** Scan a flow for dependencies that cannot be observed through the readSet.
+ *  These include Shared Context Tree, sub-flows, context: file pre-reads, and
+ *  interpolation placeholders that do not resolve through `steps.*` (previous,
+ *  args, item). Recomputing flows with such deps with dryRun:false risks
+ *  silently reusing stale upstream state. */
+function hasUnobservedDependencies(state: RunState): boolean {
+	const scan = (text: string): boolean => /\{(previous\.output|args\.|item\b|item\.)/.test(text);
+	for (const p of state.def.phases) {
+		if (p.shareContext === true) return true;
+		if (state.def.contextSharing === true) return true;
+		if (p.type === "flow") return true;
+		if (p.context && p.context.length > 0) return true;
+		if (scan(p.task ?? "")) return true;
+		if (p.when && scan(p.when)) return true;
+		if (p.until && scan(p.until)) return true;
+		if (Array.isArray(p.eval) && p.eval.some(scan)) return true;
+	}
+	return false;
+}
+/** Recompute a completed run minimally: force-rerun the `seeds`, then walk
+ *  their stale frontier in topological order. The cache provides early cutoff
+ *  for free — a downstream whose inputHash didn't move (because the seed's new
+ *  output happened to equal the old) hits its prior and is reused rather than
+ *  re-executed. `dryRun` computes the worst-case frontier without spending a
+ *  token. Returns a fresh state + a report. Throws only when dryRun:false is
+ *  requested for a flow with unobserved dependencies; callers should surface
+ *  that as a user-facing error. */
+export async function recomputeTaskflow(
+	state: RunState,
+	deps: RuntimeDeps,
+	seeds: readonly string[],
+	// Fail-safe default: a real recompute overwrites the run and spends tokens.
+	// The tool/command wrappers can explicitly opt into dryRun:false.
+	opts: { dryRun?: boolean } = { dryRun: true },
+): Promise<{ report: RecomputeReport; state: RunState }> {
+	// Never mutate the caller's RunState in-place. Recompute is a speculative
+	// replay; only the caller decides whether to persist the new state.
+	const newState = structuredClone(state) as RunState;
+	const reads = readMapOf(newState.phases);
+	// M2: derive the declared read-map fresh from the def so the frontier uses
+	// the UNION (observed ∪ declared). Derived here (not read from the persisted
+	// `RunState.declaredDeps`) so old runs — pre-H1, no persisted declaredDeps —
+	// also get union semantics. The persisted field is audit/provenance only.
+	const declared = declaredReadMapOfDef(newState.def);
+	const frontier = computeStaleFrontier(reads, seeds, declared);
+	const allIds = Object.keys(newState.phases);
+	if (opts.dryRun) {
+		return {
+			report: {
+				dryRun: true,
+				aborted: false,
+				seeds,
+				rerun: [...frontier],
+				reused: allIds.filter((id) => !frontier.has(id)),
+				cutoff: [],
+			},
+			state: newState,
+		};
+	}
+	// Guard: observed readSet only tracks `{steps.X.*}` interpolation refs. It is
+	// blind to Shared Context Tree (ctx_read/ctx_write), sub-flow internals,
+	// context: file pre-reads, {previous.output}, and loop locals ({args.*},
+	// {item.*}). Recomputing such a run with dryRun:false could silently skip
+	// phases whose deps changed outside the observed frontier and then persist a
+	// corrupted run over the original.
+	if (hasUnobservedDependencies(newState)) {
+		throw new Error(
+			"recompute dryRun:false is unsafe for this run: it contains dependencies " +
+				"(shareContext, flow/ctx_spawn, context: files, {previous.output}, {args.*}, or {item.*}) " +
+				"that are not tracked by the observed readSet. Use dryRun:true to inspect " +
+				"the frontier, or change the upstream phase and re-run the whole flow.",
+		);
+	}
+	// Real recompute: topological order over the frontier so a downstream always
+	// sees its (already-refreshed) upstreams when it re-evaluates its cache key.
+	// The order must respect declared dependsOn, observed reads, AND declared
+	// reads (M2 union): pi-taskflow allows interpolation refs without an
+	// explicit dependsOn edge, and a declared-but-unobserved edge (e.g. a `when`
+	// ref that never fired) must still order the reader after its upstream so
+	// the reader evaluates its cache key against the refreshed upstream (no
+	// false early-cutoff).
+	const seedSet = new Set(seeds);
+	function depsFor(phaseId: string): string[] {
+		// A phase reading its own prior output (e.g. a loop `until` checking
+		// `{steps.thisId.output}`) must not create a self-edge in the scheduling
+		// graph — otherwise topoLayers would deadlock on the self-loop.
+		const observed = (newState.phases[phaseId]?.reads ?? [])
+			.map((r) => r.stepId)
+			.filter((id) => id !== phaseId);
+		const declared_ = (declared.get(phaseId) ?? []).filter((id) => id !== phaseId);
+		return [...new Set([...observed, ...declared_])];
+	}
+	const augmentedPhases = newState.def.phases.map((p) => ({
+		...p,
+		dependsOn: [...new Set([...(p.dependsOn ?? []), ...depsFor(p.id)])],
+	}));
+	const order = topoLayers(augmentedPhases)
+		.flat()
+		.map((p) => p.id)
+		.filter((id) => frontier.has(id));
+	const rerun: string[] = [];
+	const cutoff: string[] = [];
+	const noop = () => {};
+	let aborted = false;
+	for (const id of order) {
+		// A partial recompute must NOT be persisted over the original run — the
+		// caller discards `state` when `aborted` is set.
+		if (deps.signal?.aborted) {
+			aborted = true;
+			break;
+		}
+		const phase = newState.def.phases.find((p) => p.id === id);
+		if (!phase) continue;
+		const before = newState.phases[id]?.inputHash;
+		const execOpts = seedSet.has(id) ? { forceRerun: true } : undefined;
+		try {
+			const ps = await executePhase(phase, newState, deps, newState.phases[id], noop, 0, execOpts);
+			newState.phases[id] = ps;
+			// A phase counts as "rerun" if it was a forced seed OR its result moved;
+			// otherwise it hit its cache (inputHash unchanged) → early cutoff.
+			if (seedSet.has(id) || ps.inputHash !== before) rerun.push(id);
+			else cutoff.push(id);
+		} catch {
+			// A failing recompute phase is recorded as rerun (it was attempted).
+			rerun.push(id);
+		}
+	}
+	return {
+		report: {
+			dryRun: false,
+			aborted,
+			seeds,
+			rerun,
+			reused: allIds.filter((id) => !frontier.has(id)),
+			cutoff,
+		},
+		state: newState,
+	};
+}
 export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
 	const def: Taskflow = state.def;
 	try {
@@ -1726,6 +2066,38 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
 async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
 	const def: Taskflow = state.def;
 	const layers = topoLayers(def.phases);
+	// Content-fingerprint the desugared definition ONCE per run and fold it into
+	// every phase's cache key (overstory hash algorithm; see ./flowir/hash.ts).
+	// Reused by every phase, persisted on the RunState for audit/resume.
+	// Never throws into the run — a hash failure leaves the field unset and the
+	// cache key degrades to the legacy flowName-only shape.
+	//
+	// Routed through the FlowIR compile seam (M1): `compileTaskflowToIR`
+	// produces the content-addressed IR whose `hash` (== flowDefHash in the
+	// stub) folds into the cache key, and whose `meta.declaredDeps` (M2 declared
+	// plane) is persisted for audit/provenance. The declared plane is also
+	// derived fresh from `def` in recompute (so old runs get union semantics
+	// too); the persisted copy is for display.
+	if (state.flowDefHash === undefined) {
+		try {
+			const ir = await compileTaskflowToIR(def);
+			state.flowDefHash = ir.hash ?? "failed";
+			state.declaredDeps = ir.meta.declaredDeps;
+			if (ir.errors.length) {
+				console.warn(
+					`[taskflow] IR compile errors for '${def.name}': ${ir.errors.map((e) => e.message).join("; ")}`,
+				);
+			}
+		} catch (e) {
+			// Fail-safe: warn loudly rather than silently degrading to the legacy
+			// flowName-only key, which would reopen the cross-flow collision hole.
+			console.warn(
+				`[taskflow] flowDefHash failed for '${def.name}': ${e instanceof Error ? e.message : String(e)}. ` +
+				"Cross-run cache is disabled for this run to prevent stale cross-flow hits.",
+			);
+			state.flowDefHash = "failed";
+		}
+	}
 	state.status = "running";
 	safeEmit(deps, state);
@@ -1770,10 +2142,6 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 			else if (budgetBlocked) skipReason = `Budget exceeded${budgetReason ? `: ${budgetReason}` : ""}`;
 			else if (!depsSatisfied)
 				skipReason = join === "any" ? "All dependencies failed or were skipped" : "Upstream dependency not satisfied";
-			else if (phase.when !== undefined) {
-				const condCtx = buildInterpolationContext(state, lastCompletedOutput(state, phase));
-				if (!evaluateCondition(phase.when, condCtx)) skipReason = `Condition not met: ${phase.when}`;
-			}
 			if (skipReason) {
 				if (skipReason.startsWith("Budget exceeded")) budgetBlocked = true;