npm - pi-taskflow - Versions diffs - 0.0.23 → 0.0.25 - Mend

pi-taskflow 0.0.23 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/CHANGELOG.md +37 -0
package/README.md +5 -4
package/README.zh-CN.md +1 -1
package/extensions/cache.ts +6 -1
package/extensions/compile.ts +371 -0
package/extensions/flowir/hash.ts +97 -0
package/extensions/index.ts +257 -6
package/extensions/interpolate.ts +17 -0
package/extensions/runtime.ts +326 -27
package/extensions/stale.ts +137 -0
package/extensions/store.ts +14 -0
package/package.json +1 -1
package/skills/taskflow/SKILL.md +2 -2

package/extensions/index.ts CHANGED Viewed

@@ -28,7 +28,8 @@ import { type AgentScope, discoverAgents, readSubagentSettings, shouldSyncBuilti
 import { renderRunResult, summarizeRun } from "./render.ts";
 import { RunHistoryComponent, type RunHistoryResult } from "./runs-view.ts";
 import { ApprovalViewComponent, type ApprovalChoice } from "./approval-view.ts";
-import { executeTaskflow, type ApprovalDecision, type ApprovalRequest, type RuntimeResult } from "./runtime.ts";
+import { executeTaskflow, recomputeTaskflow, type ApprovalDecision, type ApprovalRequest, type RecomputeReport, type RuntimeDeps, type RuntimeResult } from "./runtime.ts";
+import { type UsageStats } from "./usage.ts";
 import { finalPhase, resolveArgs, type Taskflow, validateTaskflow, desugar, isShorthand } from "./schema.ts";
 import {
 	getFlow,
@@ -44,6 +45,7 @@ import {
 } from "./store.ts";
 import { CacheStore } from "./cache.ts";
 import { safeParse } from "./interpolate.ts";
+import { formatWhyStale, readMapOf } from "./stale.ts";
 import {
 	isValidKey,
 	queueSpawn,
@@ -60,6 +62,7 @@ interface TaskflowDetails {
 	finalOutput?: string;
 	action: string;
 	message?: string;
+	cacheReport?: string;
 }
 /** pi reads `isError` at runtime to mark tool failures; it is not in the public type. */
@@ -83,8 +86,8 @@ const ShorthandStep = Type.Object(
 );
 const TaskflowParams = Type.Object({
-	action: StringEnum(["run", "save", "resume", "list", "agents", "init", "verify", "cache-clear"] as const, {
-		description: "What to do: run a flow, save a definition, resume a paused run, list saved flows, list available agents, init model role configuration, or clear the cross-run memoization cache",
+	action: StringEnum(["run", "save", "resume", "list", "agents", "init", "verify", "compile", "provenance", "why-stale", "recompute", "cache-clear"] as const, {
+		description: "What to do: run a flow, save a definition, resume a paused run, list saved flows, list available agents, init model role configuration, verify the DAG, compile the DAG to a Mermaid diagram + verification report, show observed readSet provenance, explain why a run is stale, minimally recompute a stale run, or clear the cross-run memoization cache",
 		default: "run",
 	}),
 	name: Type.Optional(Type.String({ description: "Name of a saved flow (for run/save without inline define)" })),
@@ -123,6 +126,8 @@ const TaskflowParams = Type.Object({
 	),
 	args: Type.Optional(Type.Record(Type.String(), Type.Unknown(), { description: "Invocation arguments for the flow" })),
 	runId: Type.Optional(Type.String({ description: "Run id to resume (for action=resume)" })),
+	phaseId: Type.Optional(Type.String({ description: "Phase id — the assumed-changed seed for action=why-stale, or the phase to re-run for action=recompute" })),
+	dryRun: Type.Optional(Type.Boolean({ description: "For action=recompute: compute the stale frontier without re-executing anything (no tokens spent). Defaults to true (safe); set false to actually re-run the seed + stale frontier and persist the updated run" })),
 	scope: Type.Optional(
 		StringEnum(["user", "project"] as const, { description: "Where to save (action=save)", default: "project" }),
 	),
@@ -146,6 +151,45 @@ const TaskflowParams = Type.Object({
 	),
 });
+function formatProvenance(run: RunState): string {
+	const lines: string[] = [];
+	lines.push(`Provenance — run ${run.runId} · flow "${run.flowName}" · ${run.status}`);
+	lines.push("");
+	const finalIds = new Set(run.def.phases.filter((p) => p.final).map((p) => p.id));
+	const phases = Object.values(run.phases);
+	const any = phases.some((p) => p.reads && p.reads.length > 0);
+	if (!any) {
+		lines.push(
+			"(No observed readSets recorded. Reads are captured for agent/gate/reduce phases that interpolate {steps.*} — the overstory \"observed readSet@version\" moat.)",
+		);
+		return lines.join("\n");
+	}
+	for (const p of phases) {
+		const reads = p.reads ?? [];
+		lines.push(`■ ${p.id}  [${p.status}]${finalIds.has(p.id) ? " ★ final" : ""}`);
+		if (reads.length) {
+			lines.push("   observed reads:");
+			for (const r of reads) lines.push(`     ← ${r.stepId}@${r.version ?? "?"}`);
+		} else {
+			lines.push("   (source — no upstream reads)");
+		}
+	}
+	return lines.join("\n");
+}
+function formatRecompute(r: RecomputeReport): string {
+	const lines: string[] = [];
+	lines.push(`Recompute — seed: ${r.seeds.join(", ")}${r.dryRun ? "  (DRY RUN — worst-case, no execution)" : ""}`);
+	lines.push("");
+	lines.push(`▲ re-run (${r.rerun.length}): ${r.rerun.join(", ") || "—"}`);
+	if (!r.dryRun) {
+		lines.push(`✂ early-cutoff (cached — inputHash unchanged): ${r.cutoff.join(", ") || "—"}`);
+		if (r.cutoff.length > 0) lines.push(`   → saved ${r.cutoff.length} re-execution(s).`);
+	}
+	lines.push(`✓ reused (outside frontier): ${r.reused.join(", ") || "—"}`);
+	return lines.join("\n");
+}
 function makeRunState(def: Taskflow, args: Record<string, unknown>, cwd: string): RunState {
 	return {
 		runId: newRunId(def.name),
@@ -292,7 +336,18 @@ async function runFlow(
 			persist: persistThrottled,
 			requestApproval,
 			loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
+			// Cross-run cache is opt-in per phase (cache:{scope:"cross-run"}).
+			// Defaulting every real run to cross-run was reviewed out: it silently
+			// persists phase outputs and can serve stale results for phases whose
+			// agents read files at runtime (those files are not in the cache key).
+			cacheScopeDefault: "run-only",
 		});
+		// Auto-report cache savings at the end of a real run so the user sees the
+		// M1-M5 effect without running a separate /tf command.
+		if (result.ok) {
+			const report = formatCacheReport(result.state, result.totalUsage);
+			if (report) ctx.ui.notify(report, "info");
+		}
 		return result;
 	} finally {
 		if (heartbeat) clearInterval(heartbeat);
@@ -402,6 +457,7 @@ export default function (pi: ExtensionAPI) {
 			"Every delegation is tracked (runId), resumable across sessions, and saveable as /tf:<name> via action=save.",
 			"Use action=agents to list the 18 built-in agents (executor, scout, planner, analyst, critic, reviewer, risk-reviewer, security-reviewer, plan-arbiter, final-arbiter, test-engineer, doc-writer, executor-code, executor-fast, executor-ui, recover, verifier, visual-explorer). Do NOT invent agent names.",
 			"Phase types: agent, parallel (static branches), map (dynamic fan-out over array), gate (VERDICT: PASS/BLOCK), reduce (aggregate from N), approval (human-in-the-loop), flow (run saved sub-flow), loop (iterate until condition/convergence/cap), tournament (N variants, judge picks best/aggregate).",
+			"Use action=compile to generate a Mermaid diagram + verification report from a saved or inline flow — 0 tokens.",
 			"Interpolation: {args.X}, {steps.ID.output}, {steps.ID.json}, {item} (map), {previous.output}.",
 		].join(" "),
 		parameters: TaskflowParams,
@@ -570,6 +626,46 @@ export default function (pi: ExtensionAPI) {
 				return { content: [{ type: "text", text: lines.join("\n") }], details: { action } satisfies TaskflowDetails };
 			}
+			if (action === "compile") {
+				const { compileTaskflow } = await import("./compile.ts");
+				// Resolve definition: inline define (object or JSON/fenced string) then saved name.
+				let def: Taskflow | undefined;
+				let resolvedDefine: unknown = params.define;
+				if (typeof resolvedDefine === "string") {
+					const parsed = safeParse(resolvedDefine);
+					if (parsed && typeof parsed === "object") resolvedDefine = parsed;
+				}
+				if (resolvedDefine) {
+					const d = resolvedDefine as Record<string, unknown>;
+					if (typeof d === "object" && d !== null && Array.isArray(d.phases)) {
+						def = d as unknown as Taskflow;
+					} else if (isShorthand(resolvedDefine)) {
+						try {
+							def = desugar(resolvedDefine) as Taskflow;
+						} catch (e) {
+							return errorResult(action, `Invalid shorthand: ${e instanceof Error ? e.message : String(e)}`);
+						}
+					}
+				} else if (params.name) {
+					const saved = getFlow(ctx.cwd, params.name);
+					if (saved) def = saved.def;
+				}
+				if (!def) {
+					return errorResult(action, "Provide 'define' (DSL) or 'name' (saved flow) to compile.");
+				}
+				// Schema validation first so a malformed graph gives a clean error
+				// rather than a half-rendered diagram.
+				const vr = validateTaskflow(def, { cwd: ctx.cwd ? String(ctx.cwd) : undefined });
+				if (!vr.ok) {
+					return errorResult(action, `Schema validation failed:\n${vr.errors.join("\n")}`);
+				}
+				const compiled = compileTaskflow(def);
+				return {
+					content: [{ type: "text", text: compiled.markdown }],
+					details: { action } satisfies TaskflowDetails,
+				};
+			}
 			if (action === "cache-clear") {
 				const removed = new CacheStore(ctx.cwd).clear();
 				return {
@@ -588,6 +684,60 @@ export default function (pi: ExtensionAPI) {
 				return finalResult(action, result);
 			}
+			if (action === "provenance") {
+				if (!params.runId)
+					return errorResult(action, "action=provenance requires 'runId'");
+				const run = loadRun(ctx.cwd, params.runId);
+				if (!run) return errorResult(action, `Run not found: ${params.runId}`);
+				return {
+					content: [{ type: "text", text: formatProvenance(run) }],
+					details: { action } satisfies TaskflowDetails,
+				};
+			}
+			if (action === "why-stale") {
+				if (!params.runId)
+					return errorResult(action, "action=why-stale requires 'runId'");
+				const run = loadRun(ctx.cwd, params.runId);
+				if (!run) return errorResult(action, `Run not found: ${params.runId}`);
+				const reads = readMapOf(run.phases);
+				const seeds = params.phaseId ? [String(params.phaseId)] : [];
+				return {
+					content: [{ type: "text", text: formatWhyStale(run.runId, run.flowName, reads, seeds) }],
+					details: { action } satisfies TaskflowDetails,
+				};
+			}
+			if (action === "recompute") {
+				if (!params.runId)
+					return errorResult(action, "action=recompute requires 'runId'");
+				if (!params.phaseId)
+					return errorResult(action, "action=recompute requires 'phaseId' (the seed phase to re-run)");
+				const prev = loadRun(ctx.cwd, params.runId);
+				if (!prev) return errorResult(action, `Run not found: ${params.runId}`);
+				// H1: the LLM-callable tool defaults to a SAFE dry-run (no tokens, no
+				// mutation). A real recompute — which spends money and overwrites the
+				// run — requires an explicit dryRun:false.
+				const dryRun = params.dryRun !== false;
+				const settings = readSubagentSettings();
+				const { agents } = discoverAgents(ctx.cwd, prev.def.agentScope ?? "user", settings.modelRoles, settings.taskflow);
+				const deps: RuntimeDeps = {
+					cwd: ctx.cwd,
+					agents,
+					globalThinking: settings.globalThinking,
+					signal,
+					loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
+				};
+				const { report, state } = await recomputeTaskflow(prev, deps, [String(params.phaseId)], { dryRun });
+				// H2: never persist a partial/aborted recompute over the original run.
+				if (!dryRun && !report.aborted) saveRun(state, { maxKeep: settings.taskflow.maxKeptRuns, maxAgeDays: settings.taskflow.maxRunAgeDays });
+				const prefix = report.aborted ? "⚠ ABORTED mid-recompute — original run left unchanged.\n\n" : "";
+				return {
+					content: [{ type: "text", text: prefix + formatRecompute(report) }],
+					details: { action } satisfies TaskflowDetails,
+				};
+			}
 			// resolve the definition: inline `define` / shorthand (single|parallel|chain), else saved `name`.
 			let def: Taskflow | undefined;
@@ -779,9 +929,9 @@ export default function (pi: ExtensionAPI) {
 	// ---- The /tf user command ----
 	pi.registerCommand("tf", {
-		description: "Taskflow: list | run <name> | show <name> | runs | init",
+		description: "Taskflow: list | run <name> | show <name> | compile <name> | runs | init",
 		getArgumentCompletions: (prefix) => {
-			const subs = ["list", "run", "show", "runs", "resume", "init", "save", "verify"];
+			const subs = ["list", "run", "show", "runs", "resume", "init", "save", "verify", "compile", "provenance", "why-stale", "recompute"];
 			const items = subs.map((s) => ({ value: s, label: s }));
 			const filtered = items.filter((i) => i.value.startsWith(prefix));
 			return filtered.length > 0 ? filtered : null;
@@ -810,6 +960,96 @@ export default function (pi: ExtensionAPI) {
 				return;
 			}
+			if (sub === "compile") {
+				if (!arg) {
+					ctx.ui.notify("Usage: /tf compile <name> [lr|td]", "warning");
+					return;
+				}
+				// `arg` may carry an optional direction suffix: "<name> lr" / "<name> td".
+				const parts = arg.trim().split(/\s+/);
+				const flowName = parts[0];
+				const direction = parts[1]?.toLowerCase() === "lr" ? "LR" : "TD";
+				const flow = getFlow(ctx.cwd, flowName);
+				if (!flow) {
+					ctx.ui.notify(`Flow not found: ${flowName}`, "error");
+					return;
+				}
+				// Schema-validate before compiling so a malformed saved flow yields a
+				// clean error rather than a half-rendered diagram (mirrors the tool action).
+				const vr = validateTaskflow(flow.def, { cwd: ctx.cwd ? String(ctx.cwd) : undefined });
+				if (!vr.ok) {
+					ctx.ui.notify(`Schema validation failed:\n${vr.errors.join("\n")}`, "error");
+					return;
+				}
+				const { compileTaskflow } = await import("./compile.ts");
+				const compiled = compileTaskflow(flow.def, { direction });
+				ctx.ui.notify(compiled.markdown, compiled.verification.ok ? "info" : "warning");
+				return;
+			}
+			if (sub === "provenance") {
+				if (!arg) {
+					ctx.ui.notify("Usage: /tf provenance <runId>", "warning");
+					return;
+				}
+				const run = loadRun(ctx.cwd, arg);
+				if (!run) {
+					ctx.ui.notify(`Run not found: ${arg}`, "error");
+					return;
+				}
+				ctx.ui.notify(formatProvenance(run), "info");
+				return;
+			}
+			if (sub === "why-stale") {
+				if (!arg) {
+					ctx.ui.notify("Usage: /tf why-stale <runId> [phaseId]", "warning");
+					return;
+				}
+				const [rid, ...rest] = arg.trim().split(/\s+/);
+				const run = loadRun(ctx.cwd, rid);
+				if (!run) {
+					ctx.ui.notify(`Run not found: ${rid}`, "error");
+					return;
+				}
+				const reads = readMapOf(run.phases);
+				ctx.ui.notify(formatWhyStale(run.runId, run.flowName, reads, rest), "info");
+				return;
+			}
+			if (sub === "recompute") {
+				const tokens = (arg ?? "").trim().split(/\s+/).filter(Boolean);
+				const rid = tokens[0];
+				const seed = tokens.find((t) => t !== rid && !t.startsWith("--"));
+				const apply = tokens.includes("--apply");
+				if (!rid || !seed) {
+					ctx.ui.notify("Usage: /tf recompute <runId> <phaseId> [--apply]\n(default is a safe dry-run; --apply spends tokens)", "warning");
+					return;
+				}
+				const prev = loadRun(ctx.cwd, rid);
+				if (!prev) {
+					ctx.ui.notify(`Run not found: ${rid}`, "error");
+					return;
+				}
+				const settings = readSubagentSettings();
+				const { agents } = discoverAgents(ctx.cwd, prev.def.agentScope ?? "user", settings.modelRoles, settings.taskflow);
+				const deps: RuntimeDeps = {
+					cwd: ctx.cwd,
+					agents,
+					globalThinking: settings.globalThinking,
+					loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
+				};
+				if (apply) {
+					const { report, state } = await recomputeTaskflow(prev, deps, [seed], { dryRun: false });
+					if (!report.aborted) saveRun(state, { maxKeep: settings.taskflow.maxKeptRuns, maxAgeDays: settings.taskflow.maxRunAgeDays });
+					ctx.ui.notify(formatRecompute(report), report.aborted ? "warning" : "info");
+				} else {
+					const { report } = await recomputeTaskflow(prev, deps, [seed], { dryRun: true });
+					ctx.ui.notify(formatRecompute(report), "info");
+				}
+				return;
+			}
 			if (sub === "runs") {
 				const runs = listRuns(ctx.cwd, 50);
 				if (runs.length === 0) {
@@ -1055,6 +1295,17 @@ function errorResult(action: string, message: string): ToolResult {
 	};
 }
+function formatCacheReport(state: RunState, totalUsage: UsageStats): string {
+	const cached = Object.values(state.phases).filter((p) => p.cacheHit === "cross-run");
+	if (cached.length === 0) return "";
+	// Honest reporting: we know these phases spent 0 tokens *this run* because
+	// they were served from cache. We do NOT estimate dollars/tokens "saved" —
+	// that requires guessing what a re-execution would have cost, and the mix of
+	// cheap vs expensive phases (tournament/loop) makes such a guess misleading.
+	const cachedTokens = cached.reduce((sum, p) => sum + ((p.usage?.input ?? 0) + (p.usage?.output ?? 0)), 0);
+	return `💾 ${cached.length} phase(s) reused from cross-run cache (${cachedTokens.toLocaleString()} tokens spent on them this run)`;
+}
 function finalResult(action: string, result: RuntimeResult): ToolResult {
 	const fp = finalPhase(result.state.def.phases);
 	const header = result.ok
@@ -1062,7 +1313,7 @@ function finalResult(action: string, result: RuntimeResult): ToolResult {
 		: `Taskflow '${result.state.flowName}' ${result.state.status} (${summarizeRun(result.state)}). Run id: ${result.state.runId} — resume with action=resume.`;
 	return {
 		content: [{ type: "text", text: `${header}\n\n--- ${fp.id} ---\n${result.finalOutput}` }],
-		details: { action, state: result.state, finalOutput: result.finalOutput },
+		details: { action, state: result.state, finalOutput: result.finalOutput, cacheReport: formatCacheReport(result.state, result.totalUsage) },
 		isError: !result.ok,
 	};
 }

package/extensions/interpolate.ts CHANGED Viewed

@@ -21,6 +21,12 @@ export interface InterpolationContext {
 	previousOutput?: string;
 	/** loop variable bindings, e.g. { item: {...} } */
 	locals?: Record<string, unknown>;
+	/** Observed-read hook (M3): invoked once per successfully-resolved
+	 *  placeholder path, so the runtime can capture which upstream phases a
+	 *  phase actually consumed (its observed readSet). Unresolved refs do NOT
+	 *  fire it (they become `missing` warnings instead). Default undefined →
+	 *  zero overhead, fully backward-compatible. */
+	onRead?: (ref: string) => void;
 }
 const PLACEHOLDER = /\{([a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*)\}/g;
@@ -48,7 +54,18 @@ export function interpolate(
 	return { text, missing };
 }
+/** Resolve + record an observed read (M3 observed-readSet). Fires only on
+ *  successful resolution so an unresolved ref is NOT logged as a dependency
+ *  (it stays a `missing` warning). The runtime threads a collector here to
+ *  capture which upstream phases this phase actually consumed — the overstory
+ *  "observed readSet@version" moat (nobody else records this). */
 function resolvePath(path: string, ctx: InterpolationContext): unknown {
+	const value = _resolvePath(path, ctx);
+	if (value !== undefined) ctx.onRead?.(path);
+	return value;
+}
+function _resolvePath(path: string, ctx: InterpolationContext): unknown {
 	const parts = path.split(".");
 	const head = parts[0];