pi-taskflow 0.0.23 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,8 @@ import { type AgentScope, discoverAgents, readSubagentSettings, shouldSyncBuilti
28
28
  import { renderRunResult, summarizeRun } from "./render.ts";
29
29
  import { RunHistoryComponent, type RunHistoryResult } from "./runs-view.ts";
30
30
  import { ApprovalViewComponent, type ApprovalChoice } from "./approval-view.ts";
31
- import { executeTaskflow, type ApprovalDecision, type ApprovalRequest, type RuntimeResult } from "./runtime.ts";
31
+ import { executeTaskflow, recomputeTaskflow, type ApprovalDecision, type ApprovalRequest, type RecomputeReport, type RuntimeDeps, type RuntimeResult } from "./runtime.ts";
32
+ import { type UsageStats } from "./usage.ts";
32
33
  import { finalPhase, resolveArgs, type Taskflow, validateTaskflow, desugar, isShorthand } from "./schema.ts";
33
34
  import {
34
35
  getFlow,
@@ -44,6 +45,7 @@ import {
44
45
  } from "./store.ts";
45
46
  import { CacheStore } from "./cache.ts";
46
47
  import { safeParse } from "./interpolate.ts";
48
+ import { formatWhyStale, readMapOf } from "./stale.ts";
47
49
  import {
48
50
  isValidKey,
49
51
  queueSpawn,
@@ -60,6 +62,7 @@ interface TaskflowDetails {
60
62
  finalOutput?: string;
61
63
  action: string;
62
64
  message?: string;
65
+ cacheReport?: string;
63
66
  }
64
67
 
65
68
  /** pi reads `isError` at runtime to mark tool failures; it is not in the public type. */
@@ -83,8 +86,8 @@ const ShorthandStep = Type.Object(
83
86
  );
84
87
 
85
88
  const TaskflowParams = Type.Object({
86
- action: StringEnum(["run", "save", "resume", "list", "agents", "init", "verify", "cache-clear"] as const, {
87
- description: "What to do: run a flow, save a definition, resume a paused run, list saved flows, list available agents, init model role configuration, or clear the cross-run memoization cache",
89
+ action: StringEnum(["run", "save", "resume", "list", "agents", "init", "verify", "compile", "provenance", "why-stale", "recompute", "cache-clear"] as const, {
90
+ description: "What to do: run a flow, save a definition, resume a paused run, list saved flows, list available agents, init model role configuration, verify the DAG, compile the DAG to a Mermaid diagram + verification report, show observed readSet provenance, explain why a run is stale, minimally recompute a stale run, or clear the cross-run memoization cache",
88
91
  default: "run",
89
92
  }),
90
93
  name: Type.Optional(Type.String({ description: "Name of a saved flow (for run/save without inline define)" })),
@@ -123,6 +126,8 @@ const TaskflowParams = Type.Object({
123
126
  ),
124
127
  args: Type.Optional(Type.Record(Type.String(), Type.Unknown(), { description: "Invocation arguments for the flow" })),
125
128
  runId: Type.Optional(Type.String({ description: "Run id to resume (for action=resume)" })),
129
+ phaseId: Type.Optional(Type.String({ description: "Phase id — the assumed-changed seed for action=why-stale, or the phase to re-run for action=recompute" })),
130
+ dryRun: Type.Optional(Type.Boolean({ description: "For action=recompute: compute the stale frontier without re-executing anything (no tokens spent). Defaults to true (safe); set false to actually re-run the seed + stale frontier and persist the updated run" })),
126
131
  scope: Type.Optional(
127
132
  StringEnum(["user", "project"] as const, { description: "Where to save (action=save)", default: "project" }),
128
133
  ),
@@ -146,6 +151,45 @@ const TaskflowParams = Type.Object({
146
151
  ),
147
152
  });
148
153
 
154
+ function formatProvenance(run: RunState): string {
155
+ const lines: string[] = [];
156
+ lines.push(`Provenance — run ${run.runId} · flow "${run.flowName}" · ${run.status}`);
157
+ lines.push("");
158
+ const finalIds = new Set(run.def.phases.filter((p) => p.final).map((p) => p.id));
159
+ const phases = Object.values(run.phases);
160
+ const any = phases.some((p) => p.reads && p.reads.length > 0);
161
+ if (!any) {
162
+ lines.push(
163
+ "(No observed readSets recorded. Reads are captured for agent/gate/reduce phases that interpolate {steps.*} — the overstory \"observed readSet@version\" moat.)",
164
+ );
165
+ return lines.join("\n");
166
+ }
167
+ for (const p of phases) {
168
+ const reads = p.reads ?? [];
169
+ lines.push(`■ ${p.id} [${p.status}]${finalIds.has(p.id) ? " ★ final" : ""}`);
170
+ if (reads.length) {
171
+ lines.push(" observed reads:");
172
+ for (const r of reads) lines.push(` ← ${r.stepId}@${r.version ?? "?"}`);
173
+ } else {
174
+ lines.push(" (source — no upstream reads)");
175
+ }
176
+ }
177
+ return lines.join("\n");
178
+ }
179
+
180
+ function formatRecompute(r: RecomputeReport): string {
181
+ const lines: string[] = [];
182
+ lines.push(`Recompute — seed: ${r.seeds.join(", ")}${r.dryRun ? " (DRY RUN — worst-case, no execution)" : ""}`);
183
+ lines.push("");
184
+ lines.push(`▲ re-run (${r.rerun.length}): ${r.rerun.join(", ") || "—"}`);
185
+ if (!r.dryRun) {
186
+ lines.push(`✂ early-cutoff (cached — inputHash unchanged): ${r.cutoff.join(", ") || "—"}`);
187
+ if (r.cutoff.length > 0) lines.push(` → saved ${r.cutoff.length} re-execution(s).`);
188
+ }
189
+ lines.push(`✓ reused (outside frontier): ${r.reused.join(", ") || "—"}`);
190
+ return lines.join("\n");
191
+ }
192
+
149
193
  function makeRunState(def: Taskflow, args: Record<string, unknown>, cwd: string): RunState {
150
194
  return {
151
195
  runId: newRunId(def.name),
@@ -292,7 +336,18 @@ async function runFlow(
292
336
  persist: persistThrottled,
293
337
  requestApproval,
294
338
  loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
339
+ // Cross-run cache is opt-in per phase (cache:{scope:"cross-run"}).
340
+ // Defaulting every real run to cross-run was reviewed out: it silently
341
+ // persists phase outputs and can serve stale results for phases whose
342
+ // agents read files at runtime (those files are not in the cache key).
343
+ cacheScopeDefault: "run-only",
295
344
  });
345
+ // Auto-report cache savings at the end of a real run so the user sees the
346
+ // M1-M5 effect without running a separate /tf command.
347
+ if (result.ok) {
348
+ const report = formatCacheReport(result.state, result.totalUsage);
349
+ if (report) ctx.ui.notify(report, "info");
350
+ }
296
351
  return result;
297
352
  } finally {
298
353
  if (heartbeat) clearInterval(heartbeat);
@@ -402,6 +457,7 @@ export default function (pi: ExtensionAPI) {
402
457
  "Every delegation is tracked (runId), resumable across sessions, and saveable as /tf:<name> via action=save.",
403
458
  "Use action=agents to list the 18 built-in agents (executor, scout, planner, analyst, critic, reviewer, risk-reviewer, security-reviewer, plan-arbiter, final-arbiter, test-engineer, doc-writer, executor-code, executor-fast, executor-ui, recover, verifier, visual-explorer). Do NOT invent agent names.",
404
459
  "Phase types: agent, parallel (static branches), map (dynamic fan-out over array), gate (VERDICT: PASS/BLOCK), reduce (aggregate from N), approval (human-in-the-loop), flow (run saved sub-flow), loop (iterate until condition/convergence/cap), tournament (N variants, judge picks best/aggregate).",
460
+ "Use action=compile to generate a Mermaid diagram + verification report from a saved or inline flow — 0 tokens.",
405
461
  "Interpolation: {args.X}, {steps.ID.output}, {steps.ID.json}, {item} (map), {previous.output}.",
406
462
  ].join(" "),
407
463
  parameters: TaskflowParams,
@@ -570,6 +626,46 @@ export default function (pi: ExtensionAPI) {
570
626
  return { content: [{ type: "text", text: lines.join("\n") }], details: { action } satisfies TaskflowDetails };
571
627
  }
572
628
 
629
+ if (action === "compile") {
630
+ const { compileTaskflow } = await import("./compile.ts");
631
+ // Resolve definition: inline define (object or JSON/fenced string) then saved name.
632
+ let def: Taskflow | undefined;
633
+ let resolvedDefine: unknown = params.define;
634
+ if (typeof resolvedDefine === "string") {
635
+ const parsed = safeParse(resolvedDefine);
636
+ if (parsed && typeof parsed === "object") resolvedDefine = parsed;
637
+ }
638
+ if (resolvedDefine) {
639
+ const d = resolvedDefine as Record<string, unknown>;
640
+ if (typeof d === "object" && d !== null && Array.isArray(d.phases)) {
641
+ def = d as unknown as Taskflow;
642
+ } else if (isShorthand(resolvedDefine)) {
643
+ try {
644
+ def = desugar(resolvedDefine) as Taskflow;
645
+ } catch (e) {
646
+ return errorResult(action, `Invalid shorthand: ${e instanceof Error ? e.message : String(e)}`);
647
+ }
648
+ }
649
+ } else if (params.name) {
650
+ const saved = getFlow(ctx.cwd, params.name);
651
+ if (saved) def = saved.def;
652
+ }
653
+ if (!def) {
654
+ return errorResult(action, "Provide 'define' (DSL) or 'name' (saved flow) to compile.");
655
+ }
656
+ // Schema validation first so a malformed graph gives a clean error
657
+ // rather than a half-rendered diagram.
658
+ const vr = validateTaskflow(def, { cwd: ctx.cwd ? String(ctx.cwd) : undefined });
659
+ if (!vr.ok) {
660
+ return errorResult(action, `Schema validation failed:\n${vr.errors.join("\n")}`);
661
+ }
662
+ const compiled = compileTaskflow(def);
663
+ return {
664
+ content: [{ type: "text", text: compiled.markdown }],
665
+ details: { action } satisfies TaskflowDetails,
666
+ };
667
+ }
668
+
573
669
  if (action === "cache-clear") {
574
670
  const removed = new CacheStore(ctx.cwd).clear();
575
671
  return {
@@ -588,6 +684,60 @@ export default function (pi: ExtensionAPI) {
588
684
  return finalResult(action, result);
589
685
  }
590
686
 
687
+ if (action === "provenance") {
688
+ if (!params.runId)
689
+ return errorResult(action, "action=provenance requires 'runId'");
690
+ const run = loadRun(ctx.cwd, params.runId);
691
+ if (!run) return errorResult(action, `Run not found: ${params.runId}`);
692
+ return {
693
+ content: [{ type: "text", text: formatProvenance(run) }],
694
+ details: { action } satisfies TaskflowDetails,
695
+ };
696
+ }
697
+
698
+ if (action === "why-stale") {
699
+ if (!params.runId)
700
+ return errorResult(action, "action=why-stale requires 'runId'");
701
+ const run = loadRun(ctx.cwd, params.runId);
702
+ if (!run) return errorResult(action, `Run not found: ${params.runId}`);
703
+ const reads = readMapOf(run.phases);
704
+ const seeds = params.phaseId ? [String(params.phaseId)] : [];
705
+ return {
706
+ content: [{ type: "text", text: formatWhyStale(run.runId, run.flowName, reads, seeds) }],
707
+ details: { action } satisfies TaskflowDetails,
708
+ };
709
+ }
710
+
711
+ if (action === "recompute") {
712
+ if (!params.runId)
713
+ return errorResult(action, "action=recompute requires 'runId'");
714
+ if (!params.phaseId)
715
+ return errorResult(action, "action=recompute requires 'phaseId' (the seed phase to re-run)");
716
+ const prev = loadRun(ctx.cwd, params.runId);
717
+ if (!prev) return errorResult(action, `Run not found: ${params.runId}`);
718
+ // H1: the LLM-callable tool defaults to a SAFE dry-run (no tokens, no
719
+ // mutation). A real recompute — which spends money and overwrites the
720
+ // run — requires an explicit dryRun:false.
721
+ const dryRun = params.dryRun !== false;
722
+ const settings = readSubagentSettings();
723
+ const { agents } = discoverAgents(ctx.cwd, prev.def.agentScope ?? "user", settings.modelRoles, settings.taskflow);
724
+ const deps: RuntimeDeps = {
725
+ cwd: ctx.cwd,
726
+ agents,
727
+ globalThinking: settings.globalThinking,
728
+ signal,
729
+ loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
730
+ };
731
+ const { report, state } = await recomputeTaskflow(prev, deps, [String(params.phaseId)], { dryRun });
732
+ // H2: never persist a partial/aborted recompute over the original run.
733
+ if (!dryRun && !report.aborted) saveRun(state, { maxKeep: settings.taskflow.maxKeptRuns, maxAgeDays: settings.taskflow.maxRunAgeDays });
734
+ const prefix = report.aborted ? "⚠ ABORTED mid-recompute — original run left unchanged.\n\n" : "";
735
+ return {
736
+ content: [{ type: "text", text: prefix + formatRecompute(report) }],
737
+ details: { action } satisfies TaskflowDetails,
738
+ };
739
+ }
740
+
591
741
  // resolve the definition: inline `define` / shorthand (single|parallel|chain), else saved `name`.
592
742
  let def: Taskflow | undefined;
593
743
 
@@ -779,9 +929,9 @@ export default function (pi: ExtensionAPI) {
779
929
 
780
930
  // ---- The /tf user command ----
781
931
  pi.registerCommand("tf", {
782
- description: "Taskflow: list | run <name> | show <name> | runs | init",
932
+ description: "Taskflow: list | run <name> | show <name> | compile <name> | runs | init",
783
933
  getArgumentCompletions: (prefix) => {
784
- const subs = ["list", "run", "show", "runs", "resume", "init", "save", "verify"];
934
+ const subs = ["list", "run", "show", "runs", "resume", "init", "save", "verify", "compile", "provenance", "why-stale", "recompute"];
785
935
  const items = subs.map((s) => ({ value: s, label: s }));
786
936
  const filtered = items.filter((i) => i.value.startsWith(prefix));
787
937
  return filtered.length > 0 ? filtered : null;
@@ -810,6 +960,96 @@ export default function (pi: ExtensionAPI) {
810
960
  return;
811
961
  }
812
962
 
963
+ if (sub === "compile") {
964
+ if (!arg) {
965
+ ctx.ui.notify("Usage: /tf compile <name> [lr|td]", "warning");
966
+ return;
967
+ }
968
+ // `arg` may carry an optional direction suffix: "<name> lr" / "<name> td".
969
+ const parts = arg.trim().split(/\s+/);
970
+ const flowName = parts[0];
971
+ const direction = parts[1]?.toLowerCase() === "lr" ? "LR" : "TD";
972
+ const flow = getFlow(ctx.cwd, flowName);
973
+ if (!flow) {
974
+ ctx.ui.notify(`Flow not found: ${flowName}`, "error");
975
+ return;
976
+ }
977
+ // Schema-validate before compiling so a malformed saved flow yields a
978
+ // clean error rather than a half-rendered diagram (mirrors the tool action).
979
+ const vr = validateTaskflow(flow.def, { cwd: ctx.cwd ? String(ctx.cwd) : undefined });
980
+ if (!vr.ok) {
981
+ ctx.ui.notify(`Schema validation failed:\n${vr.errors.join("\n")}`, "error");
982
+ return;
983
+ }
984
+ const { compileTaskflow } = await import("./compile.ts");
985
+ const compiled = compileTaskflow(flow.def, { direction });
986
+ ctx.ui.notify(compiled.markdown, compiled.verification.ok ? "info" : "warning");
987
+ return;
988
+ }
989
+
990
+ if (sub === "provenance") {
991
+ if (!arg) {
992
+ ctx.ui.notify("Usage: /tf provenance <runId>", "warning");
993
+ return;
994
+ }
995
+ const run = loadRun(ctx.cwd, arg);
996
+ if (!run) {
997
+ ctx.ui.notify(`Run not found: ${arg}`, "error");
998
+ return;
999
+ }
1000
+ ctx.ui.notify(formatProvenance(run), "info");
1001
+ return;
1002
+ }
1003
+
1004
+ if (sub === "why-stale") {
1005
+ if (!arg) {
1006
+ ctx.ui.notify("Usage: /tf why-stale <runId> [phaseId]", "warning");
1007
+ return;
1008
+ }
1009
+ const [rid, ...rest] = arg.trim().split(/\s+/);
1010
+ const run = loadRun(ctx.cwd, rid);
1011
+ if (!run) {
1012
+ ctx.ui.notify(`Run not found: ${rid}`, "error");
1013
+ return;
1014
+ }
1015
+ const reads = readMapOf(run.phases);
1016
+ ctx.ui.notify(formatWhyStale(run.runId, run.flowName, reads, rest), "info");
1017
+ return;
1018
+ }
1019
+
1020
+ if (sub === "recompute") {
1021
+ const tokens = (arg ?? "").trim().split(/\s+/).filter(Boolean);
1022
+ const rid = tokens[0];
1023
+ const seed = tokens.find((t) => t !== rid && !t.startsWith("--"));
1024
+ const apply = tokens.includes("--apply");
1025
+ if (!rid || !seed) {
1026
+ ctx.ui.notify("Usage: /tf recompute <runId> <phaseId> [--apply]\n(default is a safe dry-run; --apply spends tokens)", "warning");
1027
+ return;
1028
+ }
1029
+ const prev = loadRun(ctx.cwd, rid);
1030
+ if (!prev) {
1031
+ ctx.ui.notify(`Run not found: ${rid}`, "error");
1032
+ return;
1033
+ }
1034
+ const settings = readSubagentSettings();
1035
+ const { agents } = discoverAgents(ctx.cwd, prev.def.agentScope ?? "user", settings.modelRoles, settings.taskflow);
1036
+ const deps: RuntimeDeps = {
1037
+ cwd: ctx.cwd,
1038
+ agents,
1039
+ globalThinking: settings.globalThinking,
1040
+ loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
1041
+ };
1042
+ if (apply) {
1043
+ const { report, state } = await recomputeTaskflow(prev, deps, [seed], { dryRun: false });
1044
+ if (!report.aborted) saveRun(state, { maxKeep: settings.taskflow.maxKeptRuns, maxAgeDays: settings.taskflow.maxRunAgeDays });
1045
+ ctx.ui.notify(formatRecompute(report), report.aborted ? "warning" : "info");
1046
+ } else {
1047
+ const { report } = await recomputeTaskflow(prev, deps, [seed], { dryRun: true });
1048
+ ctx.ui.notify(formatRecompute(report), "info");
1049
+ }
1050
+ return;
1051
+ }
1052
+
813
1053
  if (sub === "runs") {
814
1054
  const runs = listRuns(ctx.cwd, 50);
815
1055
  if (runs.length === 0) {
@@ -1055,6 +1295,17 @@ function errorResult(action: string, message: string): ToolResult {
1055
1295
  };
1056
1296
  }
1057
1297
 
1298
+ function formatCacheReport(state: RunState, totalUsage: UsageStats): string {
1299
+ const cached = Object.values(state.phases).filter((p) => p.cacheHit === "cross-run");
1300
+ if (cached.length === 0) return "";
1301
+ // Honest reporting: we know these phases spent 0 tokens *this run* because
1302
+ // they were served from cache. We do NOT estimate dollars/tokens "saved" —
1303
+ // that requires guessing what a re-execution would have cost, and the mix of
1304
+ // cheap vs expensive phases (tournament/loop) makes such a guess misleading.
1305
+ const cachedTokens = cached.reduce((sum, p) => sum + ((p.usage?.input ?? 0) + (p.usage?.output ?? 0)), 0);
1306
+ return `💾 ${cached.length} phase(s) reused from cross-run cache (${cachedTokens.toLocaleString()} tokens spent on them this run)`;
1307
+ }
1308
+
1058
1309
  function finalResult(action: string, result: RuntimeResult): ToolResult {
1059
1310
  const fp = finalPhase(result.state.def.phases);
1060
1311
  const header = result.ok
@@ -1062,7 +1313,7 @@ function finalResult(action: string, result: RuntimeResult): ToolResult {
1062
1313
  : `Taskflow '${result.state.flowName}' ${result.state.status} (${summarizeRun(result.state)}). Run id: ${result.state.runId} — resume with action=resume.`;
1063
1314
  return {
1064
1315
  content: [{ type: "text", text: `${header}\n\n--- ${fp.id} ---\n${result.finalOutput}` }],
1065
- details: { action, state: result.state, finalOutput: result.finalOutput },
1316
+ details: { action, state: result.state, finalOutput: result.finalOutput, cacheReport: formatCacheReport(result.state, result.totalUsage) },
1066
1317
  isError: !result.ok,
1067
1318
  };
1068
1319
  }
@@ -21,6 +21,12 @@ export interface InterpolationContext {
21
21
  previousOutput?: string;
22
22
  /** loop variable bindings, e.g. { item: {...} } */
23
23
  locals?: Record<string, unknown>;
24
+ /** Observed-read hook (M3): invoked once per successfully-resolved
25
+ * placeholder path, so the runtime can capture which upstream phases a
26
+ * phase actually consumed (its observed readSet). Unresolved refs do NOT
27
+ * fire it (they become `missing` warnings instead). Default undefined →
28
+ * zero overhead, fully backward-compatible. */
29
+ onRead?: (ref: string) => void;
24
30
  }
25
31
 
26
32
  const PLACEHOLDER = /\{([a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*)\}/g;
@@ -48,7 +54,18 @@ export function interpolate(
48
54
  return { text, missing };
49
55
  }
50
56
 
57
+ /** Resolve + record an observed read (M3 observed-readSet). Fires only on
58
+ * successful resolution so an unresolved ref is NOT logged as a dependency
59
+ * (it stays a `missing` warning). The runtime threads a collector here to
60
+ * capture which upstream phases this phase actually consumed — the overstory
61
+ * "observed readSet@version" moat (nobody else records this). */
51
62
  function resolvePath(path: string, ctx: InterpolationContext): unknown {
63
+ const value = _resolvePath(path, ctx);
64
+ if (value !== undefined) ctx.onRead?.(path);
65
+ return value;
66
+ }
67
+
68
+ function _resolvePath(path: string, ctx: InterpolationContext): unknown {
52
69
  const parts = path.split(".");
53
70
  const head = parts[0];
54
71