pi-taskflow 0.0.24 → 0.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,8 @@ import { type AgentScope, discoverAgents, readSubagentSettings, shouldSyncBuilti
28
28
  import { renderRunResult, summarizeRun } from "./render.ts";
29
29
  import { RunHistoryComponent, type RunHistoryResult } from "./runs-view.ts";
30
30
  import { ApprovalViewComponent, type ApprovalChoice } from "./approval-view.ts";
31
- import { executeTaskflow, type ApprovalDecision, type ApprovalRequest, type RuntimeResult } from "./runtime.ts";
31
+ import { executeTaskflow, recomputeTaskflow, type ApprovalDecision, type ApprovalRequest, type RecomputeReport, type RuntimeDeps, type RuntimeResult } from "./runtime.ts";
32
+ import { type UsageStats } from "./usage.ts";
32
33
  import { finalPhase, resolveArgs, type Taskflow, validateTaskflow, desugar, isShorthand } from "./schema.ts";
33
34
  import {
34
35
  getFlow,
@@ -44,6 +45,8 @@ import {
44
45
  } from "./store.ts";
45
46
  import { CacheStore } from "./cache.ts";
46
47
  import { safeParse } from "./interpolate.ts";
48
+ import { declaredReadMapOfDef, formatWhyStale, readMapOf } from "./stale.ts";
49
+ import type { TaskflowIR } from "./flowir/index.ts";
47
50
  import {
48
51
  isValidKey,
49
52
  queueSpawn,
@@ -60,6 +63,7 @@ interface TaskflowDetails {
60
63
  finalOutput?: string;
61
64
  action: string;
62
65
  message?: string;
66
+ cacheReport?: string;
63
67
  }
64
68
 
65
69
  /** pi reads `isError` at runtime to mark tool failures; it is not in the public type. */
@@ -83,8 +87,8 @@ const ShorthandStep = Type.Object(
83
87
  );
84
88
 
85
89
  const TaskflowParams = Type.Object({
86
- action: StringEnum(["run", "save", "resume", "list", "agents", "init", "verify", "compile", "cache-clear"] as const, {
87
- description: "What to do: run a flow, save a definition, resume a paused run, list saved flows, list available agents, init model role configuration, verify the DAG, compile the DAG to a Mermaid diagram + verification report, or clear the cross-run memoization cache",
90
+ action: StringEnum(["run", "save", "resume", "list", "agents", "init", "verify", "compile", "ir", "provenance", "why-stale", "recompute", "cache-clear"] as const, {
91
+ description: "What to do: run a flow, save a definition, resume a paused run, list saved flows, list available agents, init model role configuration, verify the DAG, compile the DAG to a Mermaid diagram + verification report, compile to FlowIR + content hash, show observed readSet provenance, explain why a run is stale, minimally recompute a stale run, or clear the cross-run memoization cache",
88
92
  default: "run",
89
93
  }),
90
94
  name: Type.Optional(Type.String({ description: "Name of a saved flow (for run/save without inline define)" })),
@@ -123,6 +127,8 @@ const TaskflowParams = Type.Object({
123
127
  ),
124
128
  args: Type.Optional(Type.Record(Type.String(), Type.Unknown(), { description: "Invocation arguments for the flow" })),
125
129
  runId: Type.Optional(Type.String({ description: "Run id to resume (for action=resume)" })),
130
+ phaseId: Type.Optional(Type.String({ description: "Phase id — the assumed-changed seed for action=why-stale, or the phase to re-run for action=recompute" })),
131
+ dryRun: Type.Optional(Type.Boolean({ description: "For action=recompute: compute the stale frontier without re-executing anything (no tokens spent). Defaults to true (safe); set false to actually re-run the seed + stale frontier and persist the updated run" })),
126
132
  scope: Type.Optional(
127
133
  StringEnum(["user", "project"] as const, { description: "Where to save (action=save)", default: "project" }),
128
134
  ),
@@ -146,6 +152,82 @@ const TaskflowParams = Type.Object({
146
152
  ),
147
153
  });
148
154
 
155
+ function formatFlowIR(ir: TaskflowIR): string {
156
+ const lines: string[] = [];
157
+ lines.push(`# FlowIR — "${ir.meta.sourceFlowName}"`);
158
+ lines.push("");
159
+ if (ir.hash) {
160
+ lines.push(`**content hash:** \`${ir.hash}\`${ir.usedFallbackHash ? " (fallback — stub projection)" : " (overstory-canonical)"}`);
161
+ lines.push("");
162
+ } else {
163
+ lines.push("**content hash:** _(unavailable — computation failed)_");
164
+ lines.push("");
165
+ }
166
+ if (ir.errors.length) {
167
+ lines.push(`## Errors (${ir.errors.length})`);
168
+ for (const e of ir.errors) lines.push(`- [${e.code}]${e.phaseId ? ` [${e.phaseId}]` : ""}: ${e.message}`);
169
+ lines.push("");
170
+ }
171
+ if (ir.warnings.length) {
172
+ lines.push(`## Warnings (${ir.warnings.length})`);
173
+ for (const w of ir.warnings) lines.push(`- ${w.phaseId ? `[${w.phaseId}] ` : ""}${w.message}`);
174
+ lines.push("");
175
+ }
176
+ lines.push("## Nodes (1:1 projection)");
177
+ lines.push("");
178
+ for (const n of ir.ir?.nodes ?? []) {
179
+ lines.push(`- **${n.id}** (kind: \`${n.kind}\`) inject:[${n.inject.join(", ") || ""}] emits:[${n.emits.join(", ")}]${n.when ? ` when: \`${n.when}\`` : ""}`);
180
+ }
181
+ lines.push("");
182
+ lines.push("## Declared dependencies (M2)");
183
+ lines.push("");
184
+ lines.push("| phase | reads | writes |");
185
+ lines.push("|-------|-------|--------|");
186
+ for (const [id, deps] of Object.entries(ir.meta.declaredDeps)) {
187
+ lines.push(`| ${id} | ${deps.reads.join(", ") || "—"} | ${deps.writes.join(", ")} |`);
188
+ }
189
+ return lines.join("\n");
190
+ }
191
+
192
+ function formatProvenance(run: RunState): string {
193
+ const lines: string[] = [];
194
+ lines.push(`Provenance — run ${run.runId} · flow "${run.flowName}" · ${run.status}`);
195
+ lines.push("");
196
+ const finalIds = new Set(run.def.phases.filter((p) => p.final).map((p) => p.id));
197
+ const phases = Object.values(run.phases);
198
+ const any = phases.some((p) => p.reads && p.reads.length > 0);
199
+ if (!any) {
200
+ lines.push(
201
+ "(No observed readSets recorded. Reads are captured for agent/gate/reduce phases that interpolate {steps.*} — the overstory \"observed readSet@version\" moat.)",
202
+ );
203
+ return lines.join("\n");
204
+ }
205
+ for (const p of phases) {
206
+ const reads = p.reads ?? [];
207
+ lines.push(`■ ${p.id} [${p.status}]${finalIds.has(p.id) ? " ★ final" : ""}`);
208
+ if (reads.length) {
209
+ lines.push(" observed reads:");
210
+ for (const r of reads) lines.push(` ← ${r.stepId}@${r.version ?? "?"}`);
211
+ } else {
212
+ lines.push(" (source — no upstream reads)");
213
+ }
214
+ }
215
+ return lines.join("\n");
216
+ }
217
+
218
+ function formatRecompute(r: RecomputeReport): string {
219
+ const lines: string[] = [];
220
+ lines.push(`Recompute — seed: ${r.seeds.join(", ")}${r.dryRun ? " (DRY RUN — worst-case, no execution)" : ""}`);
221
+ lines.push("");
222
+ lines.push(`▲ re-run (${r.rerun.length}): ${r.rerun.join(", ") || "—"}`);
223
+ if (!r.dryRun) {
224
+ lines.push(`✂ early-cutoff (cached — inputHash unchanged): ${r.cutoff.join(", ") || "—"}`);
225
+ if (r.cutoff.length > 0) lines.push(` → saved ${r.cutoff.length} re-execution(s).`);
226
+ }
227
+ lines.push(`✓ reused (outside frontier): ${r.reused.join(", ") || "—"}`);
228
+ return lines.join("\n");
229
+ }
230
+
149
231
  function makeRunState(def: Taskflow, args: Record<string, unknown>, cwd: string): RunState {
150
232
  return {
151
233
  runId: newRunId(def.name),
@@ -292,7 +374,18 @@ async function runFlow(
292
374
  persist: persistThrottled,
293
375
  requestApproval,
294
376
  loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
377
+ // Cross-run cache is opt-in per phase (cache:{scope:"cross-run"}).
378
+ // Defaulting every real run to cross-run was reviewed out: it silently
379
+ // persists phase outputs and can serve stale results for phases whose
380
+ // agents read files at runtime (those files are not in the cache key).
381
+ cacheScopeDefault: "run-only",
295
382
  });
383
+ // Auto-report cache savings at the end of a real run so the user sees the
384
+ // M1-M5 effect without running a separate /tf command.
385
+ if (result.ok) {
386
+ const report = formatCacheReport(result.state, result.totalUsage);
387
+ if (report) ctx.ui.notify(report, "info");
388
+ }
296
389
  return result;
297
390
  } finally {
298
391
  if (heartbeat) clearInterval(heartbeat);
@@ -611,6 +704,46 @@ export default function (pi: ExtensionAPI) {
611
704
  };
612
705
  }
613
706
 
707
+ if (action === "ir") {
708
+ const { compileTaskflowToIR } = await import("./flowir/index.ts");
709
+ // Resolve definition: inline define (object or JSON/fenced string), shorthand,
710
+ // or saved name. Mirrors action=compile / action=verify.
711
+ let def: Taskflow | undefined;
712
+ let resolvedDefine: unknown = params.define;
713
+ if (typeof resolvedDefine === "string") {
714
+ const parsed = safeParse(resolvedDefine);
715
+ if (parsed && typeof parsed === "object") resolvedDefine = parsed;
716
+ }
717
+ if (resolvedDefine) {
718
+ const d = resolvedDefine as Record<string, unknown>;
719
+ if (typeof d === "object" && d !== null && Array.isArray(d.phases)) {
720
+ def = d as unknown as Taskflow;
721
+ } else if (isShorthand(resolvedDefine)) {
722
+ try {
723
+ def = desugar(resolvedDefine) as Taskflow;
724
+ } catch (e) {
725
+ return errorResult(action, `Invalid shorthand: ${e instanceof Error ? e.message : String(e)}`);
726
+ }
727
+ }
728
+ } else if (params.name) {
729
+ const saved = getFlow(ctx.cwd, params.name);
730
+ if (saved) def = saved.def;
731
+ }
732
+ if (!def) {
733
+ return errorResult(action, "Provide 'define' (DSL) or 'name' (saved flow) to compile to IR.");
734
+ }
735
+ // Schema validation first so a malformed graph gives a clean error.
736
+ const vr = validateTaskflow(def, { cwd: ctx.cwd ? String(ctx.cwd) : undefined });
737
+ if (!vr.ok) {
738
+ return errorResult(action, `Schema validation failed:\n${vr.errors.join("\n")}`);
739
+ }
740
+ const ir = await compileTaskflowToIR(def) as TaskflowIR;
741
+ return {
742
+ content: [{ type: "text", text: formatFlowIR(ir) }],
743
+ details: { action } satisfies TaskflowDetails,
744
+ };
745
+ }
746
+
614
747
  if (action === "cache-clear") {
615
748
  const removed = new CacheStore(ctx.cwd).clear();
616
749
  return {
@@ -629,6 +762,61 @@ export default function (pi: ExtensionAPI) {
629
762
  return finalResult(action, result);
630
763
  }
631
764
 
765
+ if (action === "provenance") {
766
+ if (!params.runId)
767
+ return errorResult(action, "action=provenance requires 'runId'");
768
+ const run = loadRun(ctx.cwd, params.runId);
769
+ if (!run) return errorResult(action, `Run not found: ${params.runId}`);
770
+ return {
771
+ content: [{ type: "text", text: formatProvenance(run) }],
772
+ details: { action } satisfies TaskflowDetails,
773
+ };
774
+ }
775
+
776
+ if (action === "why-stale") {
777
+ if (!params.runId)
778
+ return errorResult(action, "action=why-stale requires 'runId'");
779
+ const run = loadRun(ctx.cwd, params.runId);
780
+ if (!run) return errorResult(action, `Run not found: ${params.runId}`);
781
+ const reads = readMapOf(run.phases);
782
+ const declared = declaredReadMapOfDef(run.def);
783
+ const seeds = params.phaseId ? [String(params.phaseId)] : [];
784
+ return {
785
+ content: [{ type: "text", text: formatWhyStale(run.runId, run.flowName, reads, seeds, declared) }],
786
+ details: { action } satisfies TaskflowDetails,
787
+ };
788
+ }
789
+
790
+ if (action === "recompute") {
791
+ if (!params.runId)
792
+ return errorResult(action, "action=recompute requires 'runId'");
793
+ if (!params.phaseId)
794
+ return errorResult(action, "action=recompute requires 'phaseId' (the seed phase to re-run)");
795
+ const prev = loadRun(ctx.cwd, params.runId);
796
+ if (!prev) return errorResult(action, `Run not found: ${params.runId}`);
797
+ // H1: the LLM-callable tool defaults to a SAFE dry-run (no tokens, no
798
+ // mutation). A real recompute — which spends money and overwrites the
799
+ // run — requires an explicit dryRun:false.
800
+ const dryRun = params.dryRun !== false;
801
+ const settings = readSubagentSettings();
802
+ const { agents } = discoverAgents(ctx.cwd, prev.def.agentScope ?? "user", settings.modelRoles, settings.taskflow);
803
+ const deps: RuntimeDeps = {
804
+ cwd: ctx.cwd,
805
+ agents,
806
+ globalThinking: settings.globalThinking,
807
+ signal,
808
+ loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
809
+ };
810
+ const { report, state } = await recomputeTaskflow(prev, deps, [String(params.phaseId)], { dryRun });
811
+ // H2: never persist a partial/aborted recompute over the original run.
812
+ if (!dryRun && !report.aborted) saveRun(state, { maxKeep: settings.taskflow.maxKeptRuns, maxAgeDays: settings.taskflow.maxRunAgeDays });
813
+ const prefix = report.aborted ? "⚠ ABORTED mid-recompute — original run left unchanged.\n\n" : "";
814
+ return {
815
+ content: [{ type: "text", text: prefix + formatRecompute(report) }],
816
+ details: { action } satisfies TaskflowDetails,
817
+ };
818
+ }
819
+
632
820
  // resolve the definition: inline `define` / shorthand (single|parallel|chain), else saved `name`.
633
821
  let def: Taskflow | undefined;
634
822
 
@@ -822,7 +1010,7 @@ export default function (pi: ExtensionAPI) {
822
1010
  pi.registerCommand("tf", {
823
1011
  description: "Taskflow: list | run <name> | show <name> | compile <name> | runs | init",
824
1012
  getArgumentCompletions: (prefix) => {
825
- const subs = ["list", "run", "show", "runs", "resume", "init", "save", "verify", "compile"];
1013
+ const subs = ["list", "run", "show", "runs", "resume", "init", "save", "verify", "compile", "ir", "provenance", "why-stale", "recompute"];
826
1014
  const items = subs.map((s) => ({ value: s, label: s }));
827
1015
  const filtered = items.filter((i) => i.value.startsWith(prefix));
828
1016
  return filtered.length > 0 ? filtered : null;
@@ -878,6 +1066,94 @@ export default function (pi: ExtensionAPI) {
878
1066
  return;
879
1067
  }
880
1068
 
1069
+ if (sub === "ir") {
1070
+ if (!arg) {
1071
+ ctx.ui.notify("Usage: /tf ir <name>", "warning");
1072
+ return;
1073
+ }
1074
+ const flowName = arg.trim().split(/\s+/)[0];
1075
+ const flow = getFlow(ctx.cwd, flowName);
1076
+ if (!flow) {
1077
+ ctx.ui.notify(`Flow not found: ${flowName}`, "error");
1078
+ return;
1079
+ }
1080
+ // Schema-validate before compiling so a malformed saved flow yields a
1081
+ // clean error rather than a half-rendered report (mirrors action=ir).
1082
+ const vr = validateTaskflow(flow.def, { cwd: ctx.cwd ? String(ctx.cwd) : undefined });
1083
+ if (!vr.ok) {
1084
+ ctx.ui.notify(`Schema validation failed:\n${vr.errors.join("\n")}`, "error");
1085
+ return;
1086
+ }
1087
+ const { compileTaskflowToIR } = await import("./flowir/index.ts");
1088
+ const ir = await compileTaskflowToIR(flow.def);
1089
+ ctx.ui.notify(formatFlowIR(ir), "info");
1090
+ return;
1091
+ }
1092
+
1093
+ if (sub === "provenance") {
1094
+ if (!arg) {
1095
+ ctx.ui.notify("Usage: /tf provenance <runId>", "warning");
1096
+ return;
1097
+ }
1098
+ const run = loadRun(ctx.cwd, arg);
1099
+ if (!run) {
1100
+ ctx.ui.notify(`Run not found: ${arg}`, "error");
1101
+ return;
1102
+ }
1103
+ ctx.ui.notify(formatProvenance(run), "info");
1104
+ return;
1105
+ }
1106
+
1107
+ if (sub === "why-stale") {
1108
+ if (!arg) {
1109
+ ctx.ui.notify("Usage: /tf why-stale <runId> [phaseId]", "warning");
1110
+ return;
1111
+ }
1112
+ const [rid, ...rest] = arg.trim().split(/\s+/);
1113
+ const run = loadRun(ctx.cwd, rid);
1114
+ if (!run) {
1115
+ ctx.ui.notify(`Run not found: ${rid}`, "error");
1116
+ return;
1117
+ }
1118
+ const reads = readMapOf(run.phases);
1119
+ const declared = declaredReadMapOfDef(run.def);
1120
+ ctx.ui.notify(formatWhyStale(run.runId, run.flowName, reads, rest, declared), "info");
1121
+ return;
1122
+ }
1123
+
1124
+ if (sub === "recompute") {
1125
+ const tokens = (arg ?? "").trim().split(/\s+/).filter(Boolean);
1126
+ const rid = tokens[0];
1127
+ const seed = tokens.find((t) => t !== rid && !t.startsWith("--"));
1128
+ const apply = tokens.includes("--apply");
1129
+ if (!rid || !seed) {
1130
+ ctx.ui.notify("Usage: /tf recompute <runId> <phaseId> [--apply]\n(default is a safe dry-run; --apply spends tokens)", "warning");
1131
+ return;
1132
+ }
1133
+ const prev = loadRun(ctx.cwd, rid);
1134
+ if (!prev) {
1135
+ ctx.ui.notify(`Run not found: ${rid}`, "error");
1136
+ return;
1137
+ }
1138
+ const settings = readSubagentSettings();
1139
+ const { agents } = discoverAgents(ctx.cwd, prev.def.agentScope ?? "user", settings.modelRoles, settings.taskflow);
1140
+ const deps: RuntimeDeps = {
1141
+ cwd: ctx.cwd,
1142
+ agents,
1143
+ globalThinking: settings.globalThinking,
1144
+ loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
1145
+ };
1146
+ if (apply) {
1147
+ const { report, state } = await recomputeTaskflow(prev, deps, [seed], { dryRun: false });
1148
+ if (!report.aborted) saveRun(state, { maxKeep: settings.taskflow.maxKeptRuns, maxAgeDays: settings.taskflow.maxRunAgeDays });
1149
+ ctx.ui.notify(formatRecompute(report), report.aborted ? "warning" : "info");
1150
+ } else {
1151
+ const { report } = await recomputeTaskflow(prev, deps, [seed], { dryRun: true });
1152
+ ctx.ui.notify(formatRecompute(report), "info");
1153
+ }
1154
+ return;
1155
+ }
1156
+
881
1157
  if (sub === "runs") {
882
1158
  const runs = listRuns(ctx.cwd, 50);
883
1159
  if (runs.length === 0) {
@@ -1123,6 +1399,17 @@ function errorResult(action: string, message: string): ToolResult {
1123
1399
  };
1124
1400
  }
1125
1401
 
1402
+ function formatCacheReport(state: RunState, totalUsage: UsageStats): string {
1403
+ const cached = Object.values(state.phases).filter((p) => p.cacheHit === "cross-run");
1404
+ if (cached.length === 0) return "";
1405
+ // Honest reporting: we know these phases spent 0 tokens *this run* because
1406
+ // they were served from cache. We do NOT estimate dollars/tokens "saved" —
1407
+ // that requires guessing what a re-execution would have cost, and the mix of
1408
+ // cheap vs expensive phases (tournament/loop) makes such a guess misleading.
1409
+ const cachedTokens = cached.reduce((sum, p) => sum + ((p.usage?.input ?? 0) + (p.usage?.output ?? 0)), 0);
1410
+ return `💾 ${cached.length} phase(s) reused from cross-run cache (${cachedTokens.toLocaleString()} tokens spent on them this run)`;
1411
+ }
1412
+
1126
1413
  function finalResult(action: string, result: RuntimeResult): ToolResult {
1127
1414
  const fp = finalPhase(result.state.def.phases);
1128
1415
  const header = result.ok
@@ -1130,7 +1417,7 @@ function finalResult(action: string, result: RuntimeResult): ToolResult {
1130
1417
  : `Taskflow '${result.state.flowName}' ${result.state.status} (${summarizeRun(result.state)}). Run id: ${result.state.runId} — resume with action=resume.`;
1131
1418
  return {
1132
1419
  content: [{ type: "text", text: `${header}\n\n--- ${fp.id} ---\n${result.finalOutput}` }],
1133
- details: { action, state: result.state, finalOutput: result.finalOutput },
1420
+ details: { action, state: result.state, finalOutput: result.finalOutput, cacheReport: formatCacheReport(result.state, result.totalUsage) },
1134
1421
  isError: !result.ok,
1135
1422
  };
1136
1423
  }
@@ -21,6 +21,12 @@ export interface InterpolationContext {
21
21
  previousOutput?: string;
22
22
  /** loop variable bindings, e.g. { item: {...} } */
23
23
  locals?: Record<string, unknown>;
24
+ /** Observed-read hook (M3): invoked once per successfully-resolved
25
+ * placeholder path, so the runtime can capture which upstream phases a
26
+ * phase actually consumed (its observed readSet). Unresolved refs do NOT
27
+ * fire it (they become `missing` warnings instead). Default undefined →
28
+ * zero overhead, fully backward-compatible. */
29
+ onRead?: (ref: string) => void;
24
30
  }
25
31
 
26
32
  const PLACEHOLDER = /\{([a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*)\}/g;
@@ -48,7 +54,18 @@ export function interpolate(
48
54
  return { text, missing };
49
55
  }
50
56
 
57
+ /** Resolve + record an observed read (M3 observed-readSet). Fires only on
58
+ * successful resolution so an unresolved ref is NOT logged as a dependency
59
+ * (it stays a `missing` warning). The runtime threads a collector here to
60
+ * capture which upstream phases this phase actually consumed — the overstory
61
+ * "observed readSet@version" moat (nobody else records this). */
51
62
  function resolvePath(path: string, ctx: InterpolationContext): unknown {
63
+ const value = _resolvePath(path, ctx);
64
+ if (value !== undefined) ctx.onRead?.(path);
65
+ return value;
66
+ }
67
+
68
+ function _resolvePath(path: string, ctx: InterpolationContext): unknown {
52
69
  const parts = path.split(".");
53
70
  const head = parts[0];
54
71