npm - pi-taskflow - Versions diffs - 0.0.24 → 0.0.25 - Mend

pi-taskflow 0.0.24 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/extensions/cache.ts +6 -1
package/extensions/flowir/hash.ts +97 -0
package/extensions/index.ts +188 -5
package/extensions/interpolate.ts +17 -0
package/extensions/runtime.ts +326 -27
package/extensions/stale.ts +137 -0
package/extensions/store.ts +14 -0
package/package.json +1 -1

package/extensions/cache.ts CHANGED Viewed

@@ -17,7 +17,7 @@ import { execFileSync } from "node:child_process";
 import * as crypto from "node:crypto";
 import * as fs from "node:fs";
 import * as path from "node:path";
-import { cacheDir, withLock, writeFileAtomic } from "./store.ts";
+import { cacheDir, withLock, writeFileAtomic, type PhaseState } from "./store.ts";
 // ---------------------------------------------------------------------------
 // Fingerprint resolution
@@ -144,6 +144,11 @@ export interface CacheEntry {
 	output?: string;
 	json?: unknown;
 	model?: string;
+	/** Full PhaseState payload preserved so cross-run reuse is semantically
+	 *  equivalent to within-run resume. Storing only output/json would drop
+	 *  `gate`, `approval`, `reads`, `loop`, `tournament`, `warnings`, etc.,
+	 *  breaking recompute soundness and gate-block detection. */
+	state?: PhaseState;
 	/** Provenance for audit / cleanup. */
 	flowName?: string;
 	phaseId?: string;

package/extensions/flowir/hash.ts ADDED Viewed

@@ -0,0 +1,97 @@
+/**
+ * Content-addressed hashing for flow definitions.
+ *
+ * The canonical-JSON + SHA-256-truncation algorithm here is **vendored from
+ * overstory `packages/core/src/ir/hash.ts`** (pinned commit) so that
+ * pi-taskflow and overstory share one byte-identical hashing contract. This is
+ * the `M1` slice of the overstory-convergence roadmap: we are *not* compiling
+ * to overstory FlowIR yet (the IR compiler expects an explicit inject/emits
+ * model pi-taskflow doesn't have), but we share the **hash algorithm** now —
+ * the cheapest, lowest-risk piece of the contract — and put it to immediate
+ * work folding the flow *definition* into the cross-run cache key (M2).
+ *
+ * Why this matters: previously the cache key folded only the flow **name**
+ * (`flow:${flowName}`), so two structurally-different flows that happened to
+ * share a name + phase id + task could collide in the cross-run cache, and a
+ * flow that changed structure (but not name) could serve a stale hit. Folding
+ * `flowDefHash` (a content fingerprint of the desugared definition) closes
+ * that hole and is the foundation of "identical re-run is free ($0.00)".
+ *
+ * Pure module: no IO. Uses Web Crypto (`globalThis.crypto.subtle`) — therefore
+ * async — exactly like overstory's `hashIR`, so the contract is identical.
+ *
+ * @see docs/internal/overstory-convergence-roadmap.md §3 (M1, "cut B")
+ * @see docs/internal/rfc-flowir-compilation.md
+ */
+import type { Taskflow } from "../schema.ts";
+// ---------------------------------------------------------------------------
+// Canonical JSON (vendored from overstory ir/hash.ts — byte-identical)
+// ---------------------------------------------------------------------------
+/**
+ * Deterministic JSON: recursively key-sorted (UTF-16 code units), no
+ * whitespace, `undefined` values dropped. Arrays keep their order (the
+ * desugared Taskflow is already in a canonical shape). Byte-identical to
+ * overstory's `canonicalJson` — do not diverge without bumping the contract
+ * and updating the parity test.
+ */
+export function canonicalJson(value: unknown): string {
+	if (value === null || typeof value === "number" || typeof value === "boolean") {
+		return JSON.stringify(value);
+	}
+	if (typeof value === "string") {
+		return JSON.stringify(value);
+	}
+	if (Array.isArray(value)) {
+		return `[${value.map((item) => canonicalJson(item === undefined ? null : item)).join(",")}]`;
+	}
+	if (typeof value === "object") {
+		const record = value as Record<string, unknown>;
+		const keys = Object.keys(record)
+			.filter((key) => record[key] !== undefined)
+			.sort();
+		const body = keys.map((key) => `${JSON.stringify(key)}:${canonicalJson(record[key])}`);
+		return `{${body.join(",")}}`;
+	}
+	// undefined / function / symbol at the top level — not representable.
+	return "null";
+}
+// ---------------------------------------------------------------------------
+// Hashing (vendored from overstory ir/hash.ts — byte-identical)
+// ---------------------------------------------------------------------------
+/** SHA-256 of the canonical serialization, first 16 bytes, lowercase hex.
+ *  Same shape as overstory's `hashCanonical` / RFC-001 content hashes. */
+export async function hashCanonical(canonical: string): Promise<string> {
+	const bytes = new TextEncoder().encode(canonical);
+	const digest = await globalThis.crypto.subtle.digest("SHA-256", bytes);
+	const view = new Uint8Array(digest).slice(0, 16);
+	let hex = "";
+	for (const byte of view) {
+		hex += byte.toString(16).padStart(2, "0");
+	}
+	return hex;
+}
+// ---------------------------------------------------------------------------
+// Flow-definition fingerprint
+// ---------------------------------------------------------------------------
+/**
+ * Content fingerprint of a desugared `Taskflow` definition.
+ *
+ * Hashes the **definition** (structure + task text + declared deps), NOT the
+ * runtime `args` values — args vary per invocation and are already folded into
+ * each phase's `inputHash` via the interpolated task. `flowDefHash` answers a
+ * different question: "did the flow *itself* change?" Two flows are
+ * definitionally identical ⟺ this hash matches (key order / whitespace /
+ * optional-field presence do not affect it).
+ *
+ * Deterministic and async (Web Crypto), matching overstory's `hashIR` shape.
+ */
+export async function flowDefHash(def: Taskflow): Promise<string> {
+	return hashCanonical(canonicalJson(def));
+}

package/extensions/index.ts CHANGED Viewed

@@ -28,7 +28,8 @@ import { type AgentScope, discoverAgents, readSubagentSettings, shouldSyncBuilti
 import { renderRunResult, summarizeRun } from "./render.ts";
 import { RunHistoryComponent, type RunHistoryResult } from "./runs-view.ts";
 import { ApprovalViewComponent, type ApprovalChoice } from "./approval-view.ts";
-import { executeTaskflow, type ApprovalDecision, type ApprovalRequest, type RuntimeResult } from "./runtime.ts";
+import { executeTaskflow, recomputeTaskflow, type ApprovalDecision, type ApprovalRequest, type RecomputeReport, type RuntimeDeps, type RuntimeResult } from "./runtime.ts";
+import { type UsageStats } from "./usage.ts";
 import { finalPhase, resolveArgs, type Taskflow, validateTaskflow, desugar, isShorthand } from "./schema.ts";
 import {
 	getFlow,
@@ -44,6 +45,7 @@ import {
 } from "./store.ts";
 import { CacheStore } from "./cache.ts";
 import { safeParse } from "./interpolate.ts";
+import { formatWhyStale, readMapOf } from "./stale.ts";
 import {
 	isValidKey,
 	queueSpawn,
@@ -60,6 +62,7 @@ interface TaskflowDetails {
 	finalOutput?: string;
 	action: string;
 	message?: string;
+	cacheReport?: string;
 }
 /** pi reads `isError` at runtime to mark tool failures; it is not in the public type. */
@@ -83,8 +86,8 @@ const ShorthandStep = Type.Object(
 );
 const TaskflowParams = Type.Object({
-	action: StringEnum(["run", "save", "resume", "list", "agents", "init", "verify", "compile", "cache-clear"] as const, {
-		description: "What to do: run a flow, save a definition, resume a paused run, list saved flows, list available agents, init model role configuration, verify the DAG, compile the DAG to a Mermaid diagram + verification report, or clear the cross-run memoization cache",
+	action: StringEnum(["run", "save", "resume", "list", "agents", "init", "verify", "compile", "provenance", "why-stale", "recompute", "cache-clear"] as const, {
+		description: "What to do: run a flow, save a definition, resume a paused run, list saved flows, list available agents, init model role configuration, verify the DAG, compile the DAG to a Mermaid diagram + verification report, show observed readSet provenance, explain why a run is stale, minimally recompute a stale run, or clear the cross-run memoization cache",
 		default: "run",
 	}),
 	name: Type.Optional(Type.String({ description: "Name of a saved flow (for run/save without inline define)" })),
@@ -123,6 +126,8 @@ const TaskflowParams = Type.Object({
 	),
 	args: Type.Optional(Type.Record(Type.String(), Type.Unknown(), { description: "Invocation arguments for the flow" })),
 	runId: Type.Optional(Type.String({ description: "Run id to resume (for action=resume)" })),
+	phaseId: Type.Optional(Type.String({ description: "Phase id — the assumed-changed seed for action=why-stale, or the phase to re-run for action=recompute" })),
+	dryRun: Type.Optional(Type.Boolean({ description: "For action=recompute: compute the stale frontier without re-executing anything (no tokens spent). Defaults to true (safe); set false to actually re-run the seed + stale frontier and persist the updated run" })),
 	scope: Type.Optional(
 		StringEnum(["user", "project"] as const, { description: "Where to save (action=save)", default: "project" }),
 	),
@@ -146,6 +151,45 @@ const TaskflowParams = Type.Object({
 	),
 });
+function formatProvenance(run: RunState): string {
+	const lines: string[] = [];
+	lines.push(`Provenance — run ${run.runId} · flow "${run.flowName}" · ${run.status}`);
+	lines.push("");
+	const finalIds = new Set(run.def.phases.filter((p) => p.final).map((p) => p.id));
+	const phases = Object.values(run.phases);
+	const any = phases.some((p) => p.reads && p.reads.length > 0);
+	if (!any) {
+		lines.push(
+			"(No observed readSets recorded. Reads are captured for agent/gate/reduce phases that interpolate {steps.*} — the overstory \"observed readSet@version\" moat.)",
+		);
+		return lines.join("\n");
+	}
+	for (const p of phases) {
+		const reads = p.reads ?? [];
+		lines.push(`■ ${p.id}  [${p.status}]${finalIds.has(p.id) ? " ★ final" : ""}`);
+		if (reads.length) {
+			lines.push("   observed reads:");
+			for (const r of reads) lines.push(`     ← ${r.stepId}@${r.version ?? "?"}`);
+		} else {
+			lines.push("   (source — no upstream reads)");
+		}
+	}
+	return lines.join("\n");
+}
+function formatRecompute(r: RecomputeReport): string {
+	const lines: string[] = [];
+	lines.push(`Recompute — seed: ${r.seeds.join(", ")}${r.dryRun ? "  (DRY RUN — worst-case, no execution)" : ""}`);
+	lines.push("");
+	lines.push(`▲ re-run (${r.rerun.length}): ${r.rerun.join(", ") || "—"}`);
+	if (!r.dryRun) {
+		lines.push(`✂ early-cutoff (cached — inputHash unchanged): ${r.cutoff.join(", ") || "—"}`);
+		if (r.cutoff.length > 0) lines.push(`   → saved ${r.cutoff.length} re-execution(s).`);
+	}
+	lines.push(`✓ reused (outside frontier): ${r.reused.join(", ") || "—"}`);
+	return lines.join("\n");
+}
 function makeRunState(def: Taskflow, args: Record<string, unknown>, cwd: string): RunState {
 	return {
 		runId: newRunId(def.name),
@@ -292,7 +336,18 @@ async function runFlow(
 			persist: persistThrottled,
 			requestApproval,
 			loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
+			// Cross-run cache is opt-in per phase (cache:{scope:"cross-run"}).
+			// Defaulting every real run to cross-run was reviewed out: it silently
+			// persists phase outputs and can serve stale results for phases whose
+			// agents read files at runtime (those files are not in the cache key).
+			cacheScopeDefault: "run-only",
 		});
+		// Auto-report cache savings at the end of a real run so the user sees the
+		// M1-M5 effect without running a separate /tf command.
+		if (result.ok) {
+			const report = formatCacheReport(result.state, result.totalUsage);
+			if (report) ctx.ui.notify(report, "info");
+		}
 		return result;
 	} finally {
 		if (heartbeat) clearInterval(heartbeat);
@@ -629,6 +684,60 @@ export default function (pi: ExtensionAPI) {
 				return finalResult(action, result);
 			}
+			if (action === "provenance") {
+				if (!params.runId)
+					return errorResult(action, "action=provenance requires 'runId'");
+				const run = loadRun(ctx.cwd, params.runId);
+				if (!run) return errorResult(action, `Run not found: ${params.runId}`);
+				return {
+					content: [{ type: "text", text: formatProvenance(run) }],
+					details: { action } satisfies TaskflowDetails,
+				};
+			}
+			if (action === "why-stale") {
+				if (!params.runId)
+					return errorResult(action, "action=why-stale requires 'runId'");
+				const run = loadRun(ctx.cwd, params.runId);
+				if (!run) return errorResult(action, `Run not found: ${params.runId}`);
+				const reads = readMapOf(run.phases);
+				const seeds = params.phaseId ? [String(params.phaseId)] : [];
+				return {
+					content: [{ type: "text", text: formatWhyStale(run.runId, run.flowName, reads, seeds) }],
+					details: { action } satisfies TaskflowDetails,
+				};
+			}
+			if (action === "recompute") {
+				if (!params.runId)
+					return errorResult(action, "action=recompute requires 'runId'");
+				if (!params.phaseId)
+					return errorResult(action, "action=recompute requires 'phaseId' (the seed phase to re-run)");
+				const prev = loadRun(ctx.cwd, params.runId);
+				if (!prev) return errorResult(action, `Run not found: ${params.runId}`);
+				// H1: the LLM-callable tool defaults to a SAFE dry-run (no tokens, no
+				// mutation). A real recompute — which spends money and overwrites the
+				// run — requires an explicit dryRun:false.
+				const dryRun = params.dryRun !== false;
+				const settings = readSubagentSettings();
+				const { agents } = discoverAgents(ctx.cwd, prev.def.agentScope ?? "user", settings.modelRoles, settings.taskflow);
+				const deps: RuntimeDeps = {
+					cwd: ctx.cwd,
+					agents,
+					globalThinking: settings.globalThinking,
+					signal,
+					loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
+				};
+				const { report, state } = await recomputeTaskflow(prev, deps, [String(params.phaseId)], { dryRun });
+				// H2: never persist a partial/aborted recompute over the original run.
+				if (!dryRun && !report.aborted) saveRun(state, { maxKeep: settings.taskflow.maxKeptRuns, maxAgeDays: settings.taskflow.maxRunAgeDays });
+				const prefix = report.aborted ? "⚠ ABORTED mid-recompute — original run left unchanged.\n\n" : "";
+				return {
+					content: [{ type: "text", text: prefix + formatRecompute(report) }],
+					details: { action } satisfies TaskflowDetails,
+				};
+			}
 			// resolve the definition: inline `define` / shorthand (single|parallel|chain), else saved `name`.
 			let def: Taskflow | undefined;
@@ -822,7 +931,7 @@ export default function (pi: ExtensionAPI) {
 	pi.registerCommand("tf", {
 		description: "Taskflow: list | run <name> | show <name> | compile <name> | runs | init",
 		getArgumentCompletions: (prefix) => {
-			const subs = ["list", "run", "show", "runs", "resume", "init", "save", "verify", "compile"];
+			const subs = ["list", "run", "show", "runs", "resume", "init", "save", "verify", "compile", "provenance", "why-stale", "recompute"];
 			const items = subs.map((s) => ({ value: s, label: s }));
 			const filtered = items.filter((i) => i.value.startsWith(prefix));
 			return filtered.length > 0 ? filtered : null;
@@ -878,6 +987,69 @@ export default function (pi: ExtensionAPI) {
 				return;
 			}
+			if (sub === "provenance") {
+				if (!arg) {
+					ctx.ui.notify("Usage: /tf provenance <runId>", "warning");
+					return;
+				}
+				const run = loadRun(ctx.cwd, arg);
+				if (!run) {
+					ctx.ui.notify(`Run not found: ${arg}`, "error");
+					return;
+				}
+				ctx.ui.notify(formatProvenance(run), "info");
+				return;
+			}
+			if (sub === "why-stale") {
+				if (!arg) {
+					ctx.ui.notify("Usage: /tf why-stale <runId> [phaseId]", "warning");
+					return;
+				}
+				const [rid, ...rest] = arg.trim().split(/\s+/);
+				const run = loadRun(ctx.cwd, rid);
+				if (!run) {
+					ctx.ui.notify(`Run not found: ${rid}`, "error");
+					return;
+				}
+				const reads = readMapOf(run.phases);
+				ctx.ui.notify(formatWhyStale(run.runId, run.flowName, reads, rest), "info");
+				return;
+			}
+			if (sub === "recompute") {
+				const tokens = (arg ?? "").trim().split(/\s+/).filter(Boolean);
+				const rid = tokens[0];
+				const seed = tokens.find((t) => t !== rid && !t.startsWith("--"));
+				const apply = tokens.includes("--apply");
+				if (!rid || !seed) {
+					ctx.ui.notify("Usage: /tf recompute <runId> <phaseId> [--apply]\n(default is a safe dry-run; --apply spends tokens)", "warning");
+					return;
+				}
+				const prev = loadRun(ctx.cwd, rid);
+				if (!prev) {
+					ctx.ui.notify(`Run not found: ${rid}`, "error");
+					return;
+				}
+				const settings = readSubagentSettings();
+				const { agents } = discoverAgents(ctx.cwd, prev.def.agentScope ?? "user", settings.modelRoles, settings.taskflow);
+				const deps: RuntimeDeps = {
+					cwd: ctx.cwd,
+					agents,
+					globalThinking: settings.globalThinking,
+					loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
+				};
+				if (apply) {
+					const { report, state } = await recomputeTaskflow(prev, deps, [seed], { dryRun: false });
+					if (!report.aborted) saveRun(state, { maxKeep: settings.taskflow.maxKeptRuns, maxAgeDays: settings.taskflow.maxRunAgeDays });
+					ctx.ui.notify(formatRecompute(report), report.aborted ? "warning" : "info");
+				} else {
+					const { report } = await recomputeTaskflow(prev, deps, [seed], { dryRun: true });
+					ctx.ui.notify(formatRecompute(report), "info");
+				}
+				return;
+			}
 			if (sub === "runs") {
 				const runs = listRuns(ctx.cwd, 50);
 				if (runs.length === 0) {
@@ -1123,6 +1295,17 @@ function errorResult(action: string, message: string): ToolResult {
 	};
 }
+function formatCacheReport(state: RunState, totalUsage: UsageStats): string {
+	const cached = Object.values(state.phases).filter((p) => p.cacheHit === "cross-run");
+	if (cached.length === 0) return "";
+	// Honest reporting: we know these phases spent 0 tokens *this run* because
+	// they were served from cache. We do NOT estimate dollars/tokens "saved" —
+	// that requires guessing what a re-execution would have cost, and the mix of
+	// cheap vs expensive phases (tournament/loop) makes such a guess misleading.
+	const cachedTokens = cached.reduce((sum, p) => sum + ((p.usage?.input ?? 0) + (p.usage?.output ?? 0)), 0);
+	return `💾 ${cached.length} phase(s) reused from cross-run cache (${cachedTokens.toLocaleString()} tokens spent on them this run)`;
+}
 function finalResult(action: string, result: RuntimeResult): ToolResult {
 	const fp = finalPhase(result.state.def.phases);
 	const header = result.ok
@@ -1130,7 +1313,7 @@ function finalResult(action: string, result: RuntimeResult): ToolResult {
 		: `Taskflow '${result.state.flowName}' ${result.state.status} (${summarizeRun(result.state)}). Run id: ${result.state.runId} — resume with action=resume.`;
 	return {
 		content: [{ type: "text", text: `${header}\n\n--- ${fp.id} ---\n${result.finalOutput}` }],
-		details: { action, state: result.state, finalOutput: result.finalOutput },
+		details: { action, state: result.state, finalOutput: result.finalOutput, cacheReport: formatCacheReport(result.state, result.totalUsage) },
 		isError: !result.ok,
 	};
 }

package/extensions/interpolate.ts CHANGED Viewed

@@ -21,6 +21,12 @@ export interface InterpolationContext {
 	previousOutput?: string;
 	/** loop variable bindings, e.g. { item: {...} } */
 	locals?: Record<string, unknown>;
+	/** Observed-read hook (M3): invoked once per successfully-resolved
+	 *  placeholder path, so the runtime can capture which upstream phases a
+	 *  phase actually consumed (its observed readSet). Unresolved refs do NOT
+	 *  fire it (they become `missing` warnings instead). Default undefined →
+	 *  zero overhead, fully backward-compatible. */
+	onRead?: (ref: string) => void;
 }
 const PLACEHOLDER = /\{([a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*)\}/g;
@@ -48,7 +54,18 @@ export function interpolate(
 	return { text, missing };
 }
+/** Resolve + record an observed read (M3 observed-readSet). Fires only on
+ *  successful resolution so an unresolved ref is NOT logged as a dependency
+ *  (it stays a `missing` warning). The runtime threads a collector here to
+ *  capture which upstream phases this phase actually consumed — the overstory
+ *  "observed readSet@version" moat (nobody else records this). */
 function resolvePath(path: string, ctx: InterpolationContext): unknown {
+	const value = _resolvePath(path, ctx);
+	if (value !== undefined) ctx.onRead?.(path);
+	return value;
+}
+function _resolvePath(path: string, ctx: InterpolationContext): unknown {
 	const parts = path.split(".");
 	const head = parts[0];

package/extensions/runtime.ts CHANGED Viewed

@@ -20,6 +20,8 @@ import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_
 import { verifyTaskflow } from "./verify.ts";
 import { hashInput, newRunId, type PhaseState, type RunState, runsDir } from "./store.ts";
 import { CacheStore, resolveFingerprint } from "./cache.ts";
+import { flowDefHash } from "./flowir/hash.ts";
+import { computeStaleFrontier, readMapOf } from "./stale.ts";
 import { ctxDirFor, drainPendingSpawns, initCtxDir, registerNode, setNodeStatus, type SpawnAssignment } from "./context-store.ts";
 import { allocateWorkspace, isWorkspaceKeyword, type Workspace } from "./workspace.ts";
@@ -55,6 +57,8 @@ export interface RuntimeDeps {
 	loadFlow?: (name: string) => Taskflow | undefined;
 	/** Cross-run memoization store. Omit to construct a default one for `deps.cwd`. */
 	cacheStore?: CacheStore;
+	/** Default cache scope for phases that don't specify one. */
+	cacheScopeDefault?: CacheScope;
 	/** Internal: sub-flow call stack, for recursion detection. */
 	_stack?: string[];
 	/** Internal: pre-resolved Shared Context Tree dir for this run (sub-flows inherit the parent's). */
@@ -74,6 +78,7 @@ function buildInterpolationContext(
 	state: RunState,
 	previousOutput: string | undefined,
 	locals?: Record<string, unknown>,
+	onRead?: (ref: string) => void,
 ): InterpolationContext {
 	const steps: Record<string, { output: string; json?: unknown }> = {};
 	for (const [id, ps] of Object.entries(state.phases)) {
@@ -90,7 +95,7 @@ function buildInterpolationContext(
 			}
 		}
 	}
-	return { args: state.args, steps, previousOutput, locals };
+	return { args: state.args, steps, previousOutput, locals, onRead };
 }
 function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJson: boolean): PhaseState {	const failed = isFailed(r);
@@ -115,6 +120,27 @@ function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJs
 	};
 }
+/** Convert observed read refs (e.g. "steps.scout.output") into a structured
+ *  readSet keyed by upstream phase id, tagging each with the version
+ *  (= inputHash) that was current when read. Only `steps.*` refs are upstream
+ *  phase dependencies; args/item/previous are invocation/loop values. */
+function readRefsToReads(
+	refs: string[],
+	state: RunState,
+): Array<{ stepId: string; version?: string }> {
+	const out: Array<{ stepId: string; version?: string }> = [];
+	const seen = new Set<string>();
+	for (const ref of refs) {
+		const m = /^steps\.([A-Za-z0-9_-]+)\b/.exec(ref);
+		if (!m) continue;
+		const stepId = m[1] as string;
+		if (seen.has(stepId)) continue;
+		seen.add(stepId);
+		out.push({ stepId, version: state.phases[stepId]?.inputHash });
+	}
+	return out;
+}
 /**
  * Surface unresolved interpolation placeholders (the `missing[]` from
  * `interpolate()`). Without this they are silently left intact in the task —
@@ -551,6 +577,15 @@ async function runSpawnedChildren(
  * and tears it down afterwards. All allocation is fail-open: a failed allocation
  * degrades to the base cwd so a phase never fails to run because of isolation.
  */
+/** Optional per-invocation execution flags (e.g. M5 recompute forces a
+ *  phase to re-run, bypassing the cross-run cache so the result refreshes). */
+interface PhaseExecOpts {
+	/** Bypass the cache entirely (within-run prior AND cross-run store) and
+	 *  re-execute. Used by `/tf recompute` on the seeded phase so its new
+	 *  output — and only the downstream whose inputHash actually moves — refreshes. */
+	forceRerun?: boolean;
+}
 async function executePhase(
 	phase: Phase,
 	state: RunState,
@@ -558,10 +593,11 @@ async function executePhase(
 	prior: PhaseState | undefined,
 	emitProgress: () => void,
 	_retryDepth = 0,
+	opts?: PhaseExecOpts,
 ): Promise<PhaseState> {
 	// Non-keyword cwd (or none): no workspace lifecycle — run directly.
 	if (!isWorkspaceKeyword(phase.cwd)) {
-		return executePhaseInner(phase, state, deps, prior, emitProgress, _retryDepth);
+		return executePhaseInner(phase, state, deps, prior, emitProgress, _retryDepth, opts);
 	}
 	let ws: Workspace | undefined;
 	try {
@@ -576,7 +612,7 @@ async function executePhase(
 	}
 	const innerDeps: RuntimeDeps = ws ? { ...deps, _cwdOverride: ws.dir } : deps;
 	try {
-		const ps = await executePhaseInner(phase, state, innerDeps, prior, emitProgress, _retryDepth);
+		const ps = await executePhaseInner(phase, state, innerDeps, prior, emitProgress, _retryDepth, opts);
 		if (ws && (ws.kind !== "inherited" || ws.note)) {
 			const tag = ws.kind === "inherited" ? "workspace" : `workspace:${ws.kind}`;
 			const msg = ws.note ? `${tag} — ${ws.note}` : `${tag} at ${ws.dir}`;
@@ -599,6 +635,7 @@ async function executePhaseInner(
 	prior: PhaseState | undefined,
 	emitProgress: () => void,
 	_retryDepth = 0,
+	opts?: PhaseExecOpts,
 ): Promise<PhaseState> {
 	const type = phase.type ?? "agent";
 	const concurrency = phase.concurrency ?? state.def.concurrency ?? 8;
@@ -631,13 +668,49 @@ async function executePhaseInner(
 	// Resolve context pre-read files once, before any type branching.
 	// The content is prepended to every task so the subagent never spends
 	// turns on file exploration for files the flow author already knows.
-	const ctx = buildInterpolationContext(state, previousOutput);
+	// M3 observed-readSet: collect every upstream ref this phase resolves, so we
+	// can record what its result ACTUALLY depended on (not just its declared
+	// dependsOn). Shared by every interpolation in this phase (task / when / …).
+	const readRefs: string[] = [];
+	const onRead = (ref: string): void => {
+		readRefs.push(ref);
+	};
+	const ctx = buildInterpolationContext(state, previousOutput, undefined, onRead);
+	// M3 observed-readSet: when conditions are part of the phase's real
+	// dependencies. Evaluate them inside executePhaseInner so every upstream
+	// interpolation is captured by the shared onRead hook, not silently dropped
+	// by a separate out-of-band context.
+	if (phase.when !== undefined) {
+		if (!evaluateCondition(phase.when, ctx)) {
+			return {
+				id: phase.id,
+				status: "skipped",
+				error: `Condition not met: ${phase.when}`,
+				endedAt: Date.now(),
+				usage: emptyUsage(),
+				reads: readRefsToReads(readRefs, state),
+			};
+		}
+	}
 	const preRead = await resolvePhaseContext(phase, ctx);
 	// Resolve this phase's cache policy once. Default scope is "run-only" (the
 	// historical within-run resume behavior). Only "cross-run" phases resolve a
 	// fingerprint and consult the persistent store.
-	const cacheScope: CacheScope = (phase.cache?.scope ?? "run-only") as CacheScope;
+	let cacheScope: CacheScope = (phase.cache?.scope ?? deps.cacheScopeDefault ?? "run-only") as CacheScope;
+	// Defense in depth: gate/approval/loop/tournament must produce a fresh result
+	// each run (schema already rejects explicit cross-run, but the default-scope
+	// path must also be blocked). If flowDefHash failed, cross-run is unsafe
+	// because the key degrades to flowName-only and reopens cross-flow collisions.
+	const CROSS_RUN_BLOCKED_TYPES = new Set(["gate", "approval", "loop", "tournament"]);
+	if (cacheScope === "cross-run" && CROSS_RUN_BLOCKED_TYPES.has(type)) {
+		cacheScope = "run-only";
+	}
+	if (state.flowDefHash === "failed" && cacheScope === "cross-run") {
+		cacheScope = "run-only";
+	}
 	const cc: PhaseCacheCtx = {
 		scope: cacheScope,
 		ttlMs: phase.cache?.ttl ? (parseTtlMs(phase.cache.ttl) ?? undefined) : undefined,
@@ -647,6 +720,8 @@ async function executePhaseInner(
 		phaseId: phase.id,
 		flowName: state.flowName,
 		runId: state.runId,
+		flowDefHash: state.flowDefHash === "failed" ? undefined : state.flowDefHash,
+		forceRerun: opts?.forceRerun,
 		thinking: phase.thinking,
 		tools: phase.tools,
 		preRead,
@@ -823,7 +898,7 @@ async function executePhaseInner(
 	if (type === "agent" || type === "gate" || type === "reduce") {
 		// Eval gate: zero-token machine checks before the LLM gate.
 		if (type === "gate" && Array.isArray(phase.eval) && phase.eval.length > 0) {
-			const evalCtx = buildInterpolationContext(state, previousOutput);
+			const evalCtx = buildInterpolationContext(state, previousOutput, undefined, onRead);
 			let allPassed = true;
 			for (const check of phase.eval) {
 				let expr = check;
@@ -858,6 +933,7 @@ async function executePhaseInner(
 					inputHash,
 					endedAt: Date.now(),
 				};
+				if (readRefs.length) ps.reads = readRefsToReads(readRefs, state);
 				recordCache(cc, ps);
 				return ps;
 			}
@@ -873,6 +949,7 @@ async function executePhaseInner(
 		const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress), nodeIdFor());
 		const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
+		if (readRefs.length) ps.reads = readRefsToReads(readRefs, state);
 		if (refWarning) ps.warnings = [...(ps.warnings ?? []), refWarning];
 		if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
@@ -919,7 +996,7 @@ async function executePhaseInner(
 					for (const depId of phase.dependsOn ?? []) {
 						const d = state.def.phases.find((p) => p.id === depId);
 						if (!d) continue;
-						const dPs = await executePhase(d, state, depsForUpstream, prior, emitProgress, _retryDepth + 1);
+						const dPs = await executePhase(d, state, depsForUpstream, prior, emitProgress, _retryDepth + 1, undefined);
 						state.phases[depId] = dPs;
 					}
 				}
@@ -954,6 +1031,7 @@ async function executePhaseInner(
 		const results = await runFanout(branches);
 		const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
+		if (readRefs.length) ps.reads = readRefsToReads(readRefs, state);
 		recordCache(cc, ps);
 		return ps;
 	}
@@ -982,7 +1060,7 @@ async function executePhaseInner(
 		}
 		const loopVar = phase.as ?? "item";
 		const tasks = arr.map((item) => {
-			const localCtx = buildInterpolationContext(state, previousOutput, { [loopVar]: item });
+			const localCtx = buildInterpolationContext(state, previousOutput, { [loopVar]: item }, onRead);
 			return {
 				agent: resolveAgent(phase.agent, deps, state),
 				task: preRead + interpolate(phase.task ?? "", localCtx).text,
@@ -994,6 +1072,7 @@ async function executePhaseInner(
 		const results = await runFanout(tasks);
 		const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
+		if (readRefs.length) ps.reads = readRefsToReads(readRefs, state);
 		if (mapTruncated) {
 			ps.warnings = [...(ps.warnings ?? []), `map fan-out truncated to MAX_DYNAMIC_MAP_ITEMS (${MAX_DYNAMIC_MAP_ITEMS}) inside a dynamic sub-flow`];
 			// NB: do NOT set ps.budgetTruncated — that field drives the run-level
@@ -1005,9 +1084,10 @@ async function executePhaseInner(
 	}
 	if (type === "approval") {
-		const ctx = buildInterpolationContext(state, previousOutput);
+		const readRefs: string[] = [];
+		const ctx = buildInterpolationContext(state, previousOutput, undefined, (ref) => readRefs.push(ref));
 		const message = interpolate(phase.task ?? "Approve to continue?", ctx).text;
-		const inputHash = hashInput(phase.id, phase.model ?? "", "approval", message);
+		const inputHash = cacheKey(cc, [phase.id, phase.model ?? "", "approval", message]);
 		const cached = cachedPhase(cc, inputHash);
 		if (cached) return cached;
@@ -1023,6 +1103,7 @@ async function executePhaseInner(
 				gate: { verdict: "block", reason: "(auto-rejected: no interactive approver available)" },
 				usage: emptyUsage(),
 				inputHash,
+				reads: readRefsToReads(readRefs, state),
 				endedAt: Date.now(),
 			};
 		}
@@ -1035,6 +1116,7 @@ async function executePhaseInner(
 			approval: { decision: decision.decision, note },
 			usage: emptyUsage(),
 			inputHash,
+			reads: readRefsToReads(readRefs, state),
 			endedAt: Date.now(),
 		};
 		// A rejection halts the flow via the same mechanism as a blocking gate.
@@ -1045,7 +1127,8 @@ async function executePhaseInner(
 	}
 	if (type === "flow") {
-		const ctx = buildInterpolationContext(state, previousOutput);
+		const readRefs: string[] = [];
+		const ctx = buildInterpolationContext(state, previousOutput, undefined, (ref) => readRefs.push(ref));
 		const hasDef = (phase as { def?: unknown }).def !== undefined;
 		const stack = deps._stack ?? [];
@@ -1066,6 +1149,7 @@ async function executePhaseInner(
 				json: parseJson ? safeParse("") : undefined,
 				usage: emptyUsage(),
 				inputHash: hashInput(phase.id, `flow-def-error:${diag}`),
+				reads: readRefsToReads(readRefs, state),
 				endedAt: Date.now(),
 				defError: diag,
 			});
@@ -1101,6 +1185,7 @@ async function executePhaseInner(
 					json: parseJson ? safeParse("") : undefined,
 					usage: emptyUsage(),
 					inputHash: hashInput(phase.id, "flow-def-empty"),
+					reads: readRefsToReads(readRefs, state),
 					endedAt: Date.now(),
 				};
 			}
@@ -1222,6 +1307,7 @@ async function executePhaseInner(
 			},
 			error: subResult.ok ? undefined : `sub-flow '${name}' ${subResult.state.status}`,
 			inputHash,
+			reads: readRefsToReads(readRefs, state),
 			endedAt: Date.now(),
 		};
 		recordCache(cc, flowPs);
@@ -1231,11 +1317,21 @@ async function executePhaseInner(
 	// loop-until-done: run the body repeatedly until `until` is truthy, the output
 	// converges to a fixed point, or maxIterations is hit (always terminates).
 	if (type === "loop") {
+		const readRefs: string[] = [];
 		const agentName = resolveAgent(phase.agent, deps, state);
 		const rawMax = phase.maxIterations ?? LOOP_DEFAULT_MAX_ITERATIONS;
 		const maxIters = Math.max(1, Math.min(LOOP_HARD_MAX_ITERATIONS, Math.floor(rawMax)));
 		const convergence = phase.convergence ?? true;
+		// Canonical first-iteration body for the cache key. It must fold in the
+		// interpolated task/upstream refs so that a changed upstream changes the
+		// key and recompute no longer silently reuses a stale loop (critic finding).
+		const firstBodyCtx = buildInterpolationContext(state, previousOutput, {
+			loop: { iteration: 1, lastOutput: "", maxIterations: maxIters },
+		}, (ref) => readRefs.push(ref));
+		const firstBody = preRead + interpolate(phase.task ?? "", firstBodyCtx).text;
+		const inputHash = hashInput(phase.id, "loop", phase.until ?? "", firstBody, String(maxIters));
 		const usages: UsageStats[] = [];
 		const loopWarnings: string[] = [];
 		let lastOutput = "";
@@ -1253,7 +1349,7 @@ async function executePhaseInner(
 			// The body sees its iteration number and the prior iteration's output.
 			const bodyCtx = buildInterpolationContext(state, previousOutput, {
 				loop: { iteration: i, lastOutput, maxIterations: maxIters },
-			});
+			}, (ref) => readRefs.push(ref));
 			const body = preRead + interpolate(phase.task ?? "", bodyCtx).text;
 			const r = await runOne(agentName, body, liveSink(state, phase.id, emitProgress));
 			usages.push(r.usage);
@@ -1270,7 +1366,7 @@ async function executePhaseInner(
 			// Loop locals ({loop.iteration} etc.) are available to the condition too.
 			const untilCtx = buildInterpolationContext(state, previousOutput, {
 				loop: { iteration: i, lastOutput, maxIterations: maxIters },
-			});
+			}, (ref) => readRefs.push(ref));
 			untilCtx.steps[phase.id] = { output: lastOutput, json: safeParse(lastOutput) };
 			const { value: done, error: condErr } = tryEvaluateCondition(phase.until ?? "", untilCtx);
 			// A malformed condition must not spin forever: stop and surface a warning
@@ -1301,7 +1397,8 @@ async function executePhaseInner(
 				error: failedResult?.errorMessage || failedResult?.stderr || (stop === "aborted" ? "Aborted" : `loop '${phase.id}' iteration ${iterations} failed`),
 				loop: { iterations, stop },
 				warnings: loopWarnings.length ? loopWarnings : undefined,
-				inputHash: hashInput(phase.id, "loop", phase.until ?? ""),
+				inputHash,
+				reads: readRefsToReads(readRefs, state),
 				endedAt: Date.now(),
 			};
 		}
@@ -1313,7 +1410,8 @@ async function executePhaseInner(
 			usage: aggUsage,
 			loop: { iterations, stop },
 			warnings: loopWarnings.length ? loopWarnings : undefined,
-			inputHash: hashInput(phase.id, "loop", phase.until ?? "", String(iterations)),
+			inputHash,
+			reads: readRefsToReads(readRefs, state),
 			endedAt: Date.now(),
 		};
 	}
@@ -1336,6 +1434,20 @@ async function executePhaseInner(
 			competitors = Array.from({ length: n }, () => ({ agent: resolveAgent(phase.agent, deps, state), task: body }));
 		}
+		// The inputHash must fold in the resolved competitors (which embed the
+		// interpolated task/upstream refs) and the judge rubric, otherwise a changed
+		// upstream produces the same key and recompute silently reuses a stale
+		// tournament (critic finding: unsound for cross-run/recompute).
+		const rubric = interpolate(phase.judge ?? "", ctx).text.trim();
+		const inputHash = hashInput(
+			phase.id,
+			"tournament",
+			mode,
+			String(competitors.length),
+			JSON.stringify(competitors.map((c) => ({ agent: c.agent, task: c.task }))),
+			rubric,
+		);
 		const results = await runFanout(competitors);
 		const ran = results.filter((r) => r.stopReason !== "budget-skipped");
 		const ok = ran.filter((r) => !isFailed(r));
@@ -1355,7 +1467,8 @@ async function executePhaseInner(
 				error: `tournament '${phase.id}': all ${competitors.length} variants failed`,
 				budgetTruncated: budgetSkipCount > 0 || undefined,
 				tournament: { variants: competitors.length, winner: 0, mode },
-				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
+				inputHash,
+				reads: readRefsToReads(readRefs, state),
 				endedAt: Date.now(),
 			};
 		}
@@ -1370,7 +1483,8 @@ async function executePhaseInner(
 				model: ok[0].model,
 				budgetTruncated: budgetSkipCount > 0 || undefined,
 				tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "only surviving variant" },
-				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
+				inputHash,
+				reads: readRefsToReads(readRefs, state),
 				endedAt: Date.now(),
 			};
 		}
@@ -1387,7 +1501,8 @@ async function executePhaseInner(
 				budgetTruncated: budgetSkipCount > 0 || undefined,
 				warnings: ["judge skipped: run aborted or budget exceeded"],
 				tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge skipped" },
-				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
+				inputHash,
+				reads: readRefsToReads(readRefs, state),
 				endedAt: Date.now(),
 			};
 		}
@@ -1396,14 +1511,14 @@ async function executePhaseInner(
 		const labelled = ran
 			.map((r, i) => `### Variant ${i + 1}${isFailed(r) ? " (failed — ineligible)" : ""}\n\n${r.output}`)
 			.join("\n\n---\n\n");
-		const rubric =
-			interpolate(phase.judge ?? "", ctx).text.trim() ||
+		const finalRubric =
+			rubric ||
 			"You are judging competing answers to the same task. Pick the single best variant on correctness, completeness, and clarity.";
 		const directive =
 			mode === "best"
 				? `End your reply with a line exactly: WINNER: <number> (1–${ran.length}), choosing the strongest eligible variant.`
 				: `Synthesize the strongest possible answer by combining the best parts of the eligible variants. Then end with a line: WINNER: <number> indicating which variant contributed most.`;
-		const judgeTask = `${rubric}\n\nThe candidate variants:\n\n${labelled}\n\n${directive}`;
+		const judgeTask = `${finalRubric}\n\nThe candidate variants:\n\n${labelled}\n\n${directive}`;
 		const judgeAgent = resolveAgent(phase.judgeAgent ?? phase.agent, deps, state);
 		const judgeRes = await runOne(judgeAgent, judgeTask, liveSink(state, phase.id, emitProgress));
 		const judgeUsage = aggregateUsage([variantUsage, judgeRes.usage]);
@@ -1421,7 +1536,8 @@ async function executePhaseInner(
 				budgetTruncated: budgetSkipCount > 0 || undefined,
 				warnings: [`judge failed (${judgeRes.errorMessage ?? "error"}); used variant ${ranIdx(ok[0])}`],
 				tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge failed" },
-				inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
+				inputHash,
+				reads: readRefsToReads(readRefs, state),
 				endedAt: Date.now(),
 			};
 		}
@@ -1444,7 +1560,8 @@ async function executePhaseInner(
 			budgetTruncated: budgetSkipCount > 0 || undefined,
 			warnings: winnerIneligible ? [`judge picked an ineligible variant; used variant ${winnerIdx}`] : undefined,
 			tournament: { variants: competitors.length, winner: winnerIdx, mode, reason },
-			inputHash: hashInput(phase.id, "tournament", String(competitors.length), mode),
+			inputHash,
+			reads: readRefsToReads(readRefs, state),
 			endedAt: Date.now(),
 		};
 	}
@@ -1509,6 +1626,15 @@ interface PhaseCacheCtx {
 	 *  whether a given branch happens to fold preRead into its task string
 	 *  (previously this was only incidentally true via `fullTask`). */
 	preRead?: string;
+	/** Content fingerprint of the desugared flow definition — folded into the
+	 *  key so two structurally-different flows that share a name can never
+	 *  collide, and a changed flow never serves a stale cross-run hit. */
+	flowDefHash?: string | "failed";
+	/** Force this phase to re-execute, ignoring the within-run prior AND the
+	 *  cross-run store (M5 recompute seed). Downstream phases are NOT forced —
+	 *  they re-evaluate naturally: if the seed's new output changed their
+	 *  inputHash they miss and re-run, otherwise they hit (early cutoff). */
+	forceRerun?: boolean;
 }
 /** Fold the phase fingerprint into the base hash parts to form the final cache key. */
@@ -1519,6 +1645,7 @@ function cacheKey(cc: PhaseCacheCtx, baseParts: string[]): string {
 	// resolved context pre-read content, and the world-state fingerprint.
 	const parts = [
 		`flow:${cc.flowName}`,
+		`flowdef:${cc.flowDefHash ?? ""}`,
 		...baseParts,
 		`think:${cc.thinking ?? ""}`,
 		`tools:${JSON.stringify(cc.tools ?? [])}`,
@@ -1536,6 +1663,7 @@ function cacheKey(cc: PhaseCacheCtx, baseParts: string[]): string {
  */
 function cachedPhase(cc: PhaseCacheCtx, inputHash: string): PhaseState | null {
 	if (cc.scope === "off") return null;
+	if (cc.forceRerun) return null;
 	// 1. within-run resume (fastest; always allowed unless scope is off)
 	if (cc.prior && cc.prior.status === "done" && cc.prior.inputHash === inputHash) {
@@ -1546,6 +1674,13 @@ function cachedPhase(cc: PhaseCacheCtx, inputHash: string): PhaseState | null {
 	if (cc.scope === "cross-run") {
 		const e = cc.store.get(inputHash, cc.ttlMs);
 		if (e) {
+			// If we stored the full PhaseState, restore it (preserving gate,
+			// approval, reads, loop/tournament metadata, warnings) and just mark
+			// the cache hit + zero usage. Fallback to the legacy trimmed surface
+			// for entries written before this change.
+			if (e.state) {
+				return { ...e.state, inputHash, usage: emptyUsage(), cacheHit: "cross-run", endedAt: Date.now() };
+			}
 			return {
 				id: cc.phaseId,
 				status: "done",
@@ -1573,6 +1708,7 @@ function recordCache(cc: PhaseCacheCtx, ps: PhaseState): void {
 		output: ps.output,
 		json: ps.json,
 		model: ps.model,
+		state: ps,
 		flowName: cc.flowName,
 		phaseId: cc.phaseId,
 		runId: cc.runId,
@@ -1701,6 +1837,155 @@ function safeProgress(deps: RuntimeDeps, state: RunState): void {
 /**
  * Execute a full taskflow. Mutates and persists `state` as it progresses.
  */
+/** Result of a recompute: what was (or would be) re-executed vs reused.
+ *  `cutoff` is the prize — phases in the stale frontier whose inputHash did
+ *  NOT move, so they hit their cached result instead of re-running (early
+ *  cutoff). That is what makes recompute cheaper than a full re-run. */
+export interface RecomputeReport {
+	readonly dryRun: boolean;
+	readonly aborted: boolean;
+	readonly seeds: readonly string[];
+	/** Phases that were (dry-run: would be) re-executed, or whose result moved. */
+	readonly rerun: readonly string[];
+	/** Phases outside the frontier — untouched, reused verbatim. */
+	readonly reused: readonly string[];
+	/** Phases in the frontier whose inputHash did NOT move → cached result
+	 *  reused, no re-execution (early cutoff). Empty in dry-run (unknowable). */
+	readonly cutoff: readonly string[];
+}
+/** Scan a flow for dependencies that cannot be observed through the readSet.
+ *  These include Shared Context Tree, sub-flows, context: file pre-reads, and
+ *  interpolation placeholders that do not resolve through `steps.*` (previous,
+ *  args, item). Recomputing flows with such deps with dryRun:false risks
+ *  silently reusing stale upstream state. */
+function hasUnobservedDependencies(state: RunState): boolean {
+	const scan = (text: string): boolean => /\{(previous\.output|args\.|item\b|item\.)/.test(text);
+	for (const p of state.def.phases) {
+		if (p.shareContext === true) return true;
+		if (state.def.contextSharing === true) return true;
+		if (p.type === "flow") return true;
+		if (p.context && p.context.length > 0) return true;
+		if (scan(p.task ?? "")) return true;
+		if (p.when && scan(p.when)) return true;
+		if (Array.isArray(p.eval) && p.eval.some(scan)) return true;
+	}
+	return false;
+}
+/** Recompute a completed run minimally: force-rerun the `seeds`, then walk
+ *  their stale frontier in topological order. The cache provides early cutoff
+ *  for free — a downstream whose inputHash didn't move (because the seed's new
+ *  output happened to equal the old) hits its prior and is reused rather than
+ *  re-executed. `dryRun` computes the worst-case frontier without spending a
+ *  token. Returns a fresh state + a report. Throws only when dryRun:false is
+ *  requested for a flow with unobserved dependencies; callers should surface
+ *  that as a user-facing error. */
+export async function recomputeTaskflow(
+	state: RunState,
+	deps: RuntimeDeps,
+	seeds: readonly string[],
+	// Fail-safe default: a real recompute overwrites the run and spends tokens.
+	// The tool/command wrappers can explicitly opt into dryRun:false.
+	opts: { dryRun?: boolean } = { dryRun: true },
+): Promise<{ report: RecomputeReport; state: RunState }> {
+	// Never mutate the caller's RunState in-place. Recompute is a speculative
+	// replay; only the caller decides whether to persist the new state.
+	const newState = structuredClone(state) as RunState;
+	const reads = readMapOf(newState.phases);
+	const frontier = computeStaleFrontier(reads, seeds);
+	const allIds = Object.keys(newState.phases);
+	if (opts.dryRun) {
+		return {
+			report: {
+				dryRun: true,
+				aborted: false,
+				seeds,
+				rerun: [...frontier],
+				reused: allIds.filter((id) => !frontier.has(id)),
+				cutoff: [],
+			},
+			state: newState,
+		};
+	}
+	// Guard: observed readSet only tracks `{steps.X.*}` interpolation refs. It is
+	// blind to Shared Context Tree (ctx_read/ctx_write), sub-flow internals,
+	// context: file pre-reads, {previous.output}, and loop locals ({args.*},
+	// {item.*}). Recomputing such a run with dryRun:false could silently skip
+	// phases whose deps changed outside the observed frontier and then persist a
+	// corrupted run over the original.
+	if (hasUnobservedDependencies(newState)) {
+		throw new Error(
+			"recompute dryRun:false is unsafe for this run: it contains dependencies " +
+				"(shareContext, flow/ctx_spawn, context: files, {previous.output}, {args.*}, or {item.*}) " +
+				"that are not tracked by the observed readSet. Use dryRun:true to inspect " +
+				"the frontier, or change the upstream phase and re-run the whole flow.",
+		);
+	}
+	// Real recompute: topological order over the frontier so a downstream always
+	// sees its (already-refreshed) upstreams when it re-evaluates its cache key.
+	// The order must respect both declared dependsOn AND observed reads, because
+	// pi-taskflow allows interpolation refs without an explicit dependsOn edge.
+	const seedSet = new Set(seeds);
+	function observedDeps(phaseId: string): string[] {
+		// A phase reading its own prior output (e.g. a loop `until` checking
+		// `{steps.thisId.output}`) must not create a self-edge in the scheduling
+		// graph — otherwise topoLayers would deadlock on the self-loop.
+		return (newState.phases[phaseId]?.reads ?? [])
+			.map((r) => r.stepId)
+			.filter((id) => id !== phaseId);
+	}
+	const augmentedPhases = newState.def.phases.map((p) => ({
+		...p,
+		dependsOn: [...new Set([...(p.dependsOn ?? []), ...observedDeps(p.id)])],
+	}));
+	const order = topoLayers(augmentedPhases)
+		.flat()
+		.map((p) => p.id)
+		.filter((id) => frontier.has(id));
+	const rerun: string[] = [];
+	const cutoff: string[] = [];
+	const noop = () => {};
+	let aborted = false;
+	for (const id of order) {
+		// A partial recompute must NOT be persisted over the original run — the
+		// caller discards `state` when `aborted` is set.
+		if (deps.signal?.aborted) {
+			aborted = true;
+			break;
+		}
+		const phase = newState.def.phases.find((p) => p.id === id);
+		if (!phase) continue;
+		const before = newState.phases[id]?.inputHash;
+		const execOpts = seedSet.has(id) ? { forceRerun: true } : undefined;
+		try {
+			const ps = await executePhase(phase, newState, deps, newState.phases[id], noop, 0, execOpts);
+			newState.phases[id] = ps;
+			// A phase counts as "rerun" if it was a forced seed OR its result moved;
+			// otherwise it hit its cache (inputHash unchanged) → early cutoff.
+			if (seedSet.has(id) || ps.inputHash !== before) rerun.push(id);
+			else cutoff.push(id);
+		} catch {
+			// A failing recompute phase is recorded as rerun (it was attempted).
+			rerun.push(id);
+		}
+	}
+	return {
+		report: {
+			dryRun: false,
+			aborted,
+			seeds,
+			rerun,
+			reused: allIds.filter((id) => !frontier.has(id)),
+			cutoff,
+		},
+		state: newState,
+	};
+}
 export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
 	const def: Taskflow = state.def;
 	try {
@@ -1726,6 +2011,24 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
 async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
 	const def: Taskflow = state.def;
 	const layers = topoLayers(def.phases);
+	// Content-fingerprint the desugared definition ONCE per run and fold it into
+	// every phase's cache key (overstory hash algorithm; see ./flowir/hash.ts).
+	// Reused by every phase, persisted on the RunState for audit/resume.
+	// Never throws into the run — a hash failure leaves the field unset and the
+	// cache key degrades to the legacy flowName-only shape.
+	if (state.flowDefHash === undefined) {
+		try {
+			state.flowDefHash = await flowDefHash(def);
+		} catch (e) {
+			// Fail-safe: warn loudly rather than silently degrading to the legacy
+			// flowName-only key, which would reopen the cross-flow collision hole.
+			console.warn(
+				`[taskflow] flowDefHash failed for '${def.name}': ${e instanceof Error ? e.message : String(e)}. ` +
+				"Cross-run cache is disabled for this run to prevent stale cross-flow hits.",
+			);
+			state.flowDefHash = "failed";
+		}
+	}
 	state.status = "running";
 	safeEmit(deps, state);
@@ -1770,10 +2073,6 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
 			else if (budgetBlocked) skipReason = `Budget exceeded${budgetReason ? `: ${budgetReason}` : ""}`;
 			else if (!depsSatisfied)
 				skipReason = join === "any" ? "All dependencies failed or were skipped" : "Upstream dependency not satisfied";
-			else if (phase.when !== undefined) {
-				const condCtx = buildInterpolationContext(state, lastCompletedOutput(state, phase));
-				if (!evaluateCondition(phase.when, condCtx)) skipReason = `Condition not met: ${phase.when}`;
-			}
 			if (skipReason) {
 				if (skipReason.startsWith("Budget exceeded")) budgetBlocked = true;

package/extensions/stale.ts ADDED Viewed

@@ -0,0 +1,137 @@
+/**
+ * Stale-marking (M4) — conservative transitive invalidation over the observed
+ * readSet captured in M3.
+ *
+ * This is the "mark stale, don't rerun" half of overstory's cost-asymmetric
+ * reactivity (VISION §2.3): the cheap effects (figuring out what WOULD be
+ * invalidated) run for free; the expensive effects (actually re-running an LLM
+ * phase) are gated for M5. Given a run's observed readSets and a set of phases
+ * assumed to have changed, `computeStaleFrontier` returns the transitive
+ * closure of phases whose recorded dependencies are no longer trustworthy.
+ *
+ * Pure module: no IO, no Date, no randomness. Deterministic.
+ *
+ * Scope (honest): this is TOPOLOGICAL propagation only — a changed seed
+ * invalidates everything that (transitively) read it. The overstory
+ * "early cutoff" refinement (a re-run whose output HASH is unchanged does NOT
+ * invalidate, even if the version advanced) needs before/after content hashes,
+ * which only exist when a phase is actually re-run — that is the M5
+ * recomputation concern, deliberately out of scope here. Marking is the safe,
+ * conservative prerequisite that lets M5 rerun with confidence.
+ *
+ * @see docs/internal/overstory-convergence-roadmap.md §3 (M4)
+ */
+import type { PhaseState } from "./store.ts";
+// ---------------------------------------------------------------------------
+// Read graph
+// ---------------------------------------------------------------------------
+/** phaseId → the upstream stepIds it observed-reading (M3 PhaseState.reads). */
+export type ReadMap = Map<string, readonly string[]>;
+/** Fold a run's PhaseStates into a read map (drops phases with no reads). */
+export function readMapOf(phases: Record<string, PhaseState>): ReadMap {
+	const m: ReadMap = new Map();
+	for (const [id, ps] of Object.entries(phases)) {
+		const deps = (ps.reads ?? []).map((r) => r.stepId);
+		if (deps.length) m.set(id, deps);
+	}
+	return m;
+}
+/** Phases that directly read `phaseId` (its immediate dependents). */
+export function dependentsOf(reads: ReadMap, phaseId: string): string[] {
+	const out: string[] = [];
+	for (const [reader, deps] of reads) {
+		if (deps.includes(phaseId)) out.push(reader);
+	}
+	return out;
+}
+// ---------------------------------------------------------------------------
+// Stale frontier (transitive closure, union semantics)
+// ---------------------------------------------------------------------------
+/**
+ * The set of phases that are stale if `seeds` change, transitively. A reader
+ * is stale if ANY phase it observed-reading is stale (union/I5: when in doubt,
+ * assume dependency). Includes the seeds themselves.
+ *
+ * Deterministic. O(phases + read-edges). Cycles in the read graph (which a
+ * correct DAG can't produce, but a pathological one could) terminate because a
+ * phase is enqueued at most once.
+ */
+export function computeStaleFrontier(reads: ReadMap, seeds: Iterable<string>): Set<string> {
+	const stale = new Set<string>();
+	const queue: string[] = [...seeds];
+	while (queue.length) {
+		const s = queue.shift() as string;
+		if (stale.has(s)) continue;
+		stale.add(s);
+		for (const dep of dependentsOf(reads, s)) {
+			if (!stale.has(dep)) queue.push(dep);
+		}
+	}
+	return stale;
+}
+// ---------------------------------------------------------------------------
+// Rendering
+// ---------------------------------------------------------------------------
+/**
+ * Render either the full observed dependency graph (no seeds) or the stale
+ * frontier given assumed-changed seeds. Each stale phase lists the stale
+ * upstreams that caused it (its "why").
+ */
+export function formatWhyStale(
+	runId: string,
+	flowName: string,
+	reads: ReadMap,
+	seeds: readonly string[],
+): string {
+	const lines: string[] = [];
+	lines.push(`why-stale — run ${runId} · flow "${flowName}"`);
+	lines.push("");
+	if (seeds.length === 0) {
+		// No seeds → show the full observed dependency graph (who reads what).
+		if (reads.size === 0) {
+			lines.push("(No observed readSets in this run — provenance is empty.)");
+			return lines.join("\n");
+		}
+		lines.push("Observed dependency graph (who reads what):");
+		lines.push("");
+		for (const [reader, deps] of reads) {
+			lines.push(`■ ${reader}  reads: ${deps.join(", ")}`);
+		}
+		lines.push("");
+		lines.push("Pass a phase id to compute its stale frontier: /tf why-stale <runId> <phaseId>");
+		return lines.join("\n");
+	}
+	const frontier = computeStaleFrontier(reads, seeds);
+	const seedSet = new Set(seeds);
+	lines.push(`Assuming changed: ${[...seedSet].join(", ")}`);
+	lines.push("");
+	if (frontier.size <= seedSet.size) {
+		lines.push(`Stale frontier: only the seed(s) themselves — nothing else observed-reading them.`);
+		return lines.join("\n");
+	}
+	lines.push(`Stale frontier (transitive, ${frontier.size} phases):`);
+	// Order: seeds first, then the rest, for readability.
+	const ordered = [...seeds.filter((s) => frontier.has(s)), ...[...frontier].filter((s) => !seedSet.has(s))];
+	for (const id of ordered) {
+		if (seedSet.has(id)) {
+			lines.push(`  ■ ${id}  (changed — seed)`);
+		} else {
+			// Why is it stale? The stale upstreams it read.
+			const deps = reads.get(id) ?? [];
+			const causes = deps.filter((d) => frontier.has(d));
+			lines.push(`  ■ ${id}  ← reads ${causes.length ? causes.join(", ") : "(nothing stale?)"}`);
+		}
+	}
+	return lines.join("\n");
+}

package/extensions/store.ts CHANGED Viewed

@@ -70,6 +70,14 @@ export interface PhaseState {
 	/** Non-fatal diagnostic warnings accumulated during this phase (e.g.
 	 *  unresolved interpolation placeholders, suspicious templates). */
 	warnings?: string[];
+	/** Observed readSet (M3): the upstream phase outputs this phase actually
+	 *  consumed at interpolation time — not what it *declared* to depend on
+	 *  (dependsOn), but what it truly *read* (`{steps.X...}`). Each entry
+	 *  carries the version (= the read phase's inputHash) it consumed, so a
+	 *  later staleness check (M4/M5) can tell whether the upstream has moved.
+	 *  This is the overstory "observed readSet@version" moat: no other
+	 *  orchestrator records what a result actually depended on. */
+	reads?: Array<{ stepId: string; version?: string }>;
 	/** Truncated previews of interpolated strings used to execute this phase,
 	 *  useful when diagnosing why a model saw a literal placeholder. */
 	interpolation?: Array<{ source: string; text: string; missing?: string[] }>;
@@ -89,6 +97,12 @@ export interface RunState {
 	pid?: number;
 	/** True for runs spawned via `detach: true` (background execution). */
 	detached?: boolean;
+	/** Content fingerprint of the desugared flow definition (overstory hash
+	 *  algorithm). Folded into every phase's cache key so a structural change
+	 *  to the flow always invalidates cross-run cache hits — and an identical
+	 *  re-run always reuses them. Filled once at run start; persisted for
+	 *  audit/resume consistency. */
+	flowDefHash?: string | "failed";
 }
 // ---------------------------------------------------------------------------

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-taskflow",
-  "version": "0.0.24",
+  "version": "0.0.25",
   "description": "A declarative, verifiable graph of task nodes for the Pi coding agent — not a workflow you script, but a DAG you declare: statically verified before it runs, with dynamic fan-out, gates, isolated subagent context, resumable runs, and saveable commands.",
   "keywords": [
     "pi-package",