pi-taskflow 0.0.24 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/extensions/cache.ts +6 -1
- package/extensions/flowir/hash.ts +97 -0
- package/extensions/index.ts +188 -5
- package/extensions/interpolate.ts +17 -0
- package/extensions/runtime.ts +326 -27
- package/extensions/stale.ts +137 -0
- package/extensions/store.ts +14 -0
- package/package.json +1 -1
package/extensions/cache.ts
CHANGED
|
@@ -17,7 +17,7 @@ import { execFileSync } from "node:child_process";
|
|
|
17
17
|
import * as crypto from "node:crypto";
|
|
18
18
|
import * as fs from "node:fs";
|
|
19
19
|
import * as path from "node:path";
|
|
20
|
-
import { cacheDir, withLock, writeFileAtomic } from "./store.ts";
|
|
20
|
+
import { cacheDir, withLock, writeFileAtomic, type PhaseState } from "./store.ts";
|
|
21
21
|
|
|
22
22
|
// ---------------------------------------------------------------------------
|
|
23
23
|
// Fingerprint resolution
|
|
@@ -144,6 +144,11 @@ export interface CacheEntry {
|
|
|
144
144
|
output?: string;
|
|
145
145
|
json?: unknown;
|
|
146
146
|
model?: string;
|
|
147
|
+
/** Full PhaseState payload preserved so cross-run reuse is semantically
|
|
148
|
+
* equivalent to within-run resume. Storing only output/json would drop
|
|
149
|
+
* `gate`, `approval`, `reads`, `loop`, `tournament`, `warnings`, etc.,
|
|
150
|
+
* breaking recompute soundness and gate-block detection. */
|
|
151
|
+
state?: PhaseState;
|
|
147
152
|
/** Provenance for audit / cleanup. */
|
|
148
153
|
flowName?: string;
|
|
149
154
|
phaseId?: string;
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content-addressed hashing for flow definitions.
|
|
3
|
+
*
|
|
4
|
+
* The canonical-JSON + SHA-256-truncation algorithm here is **vendored from
|
|
5
|
+
* overstory `packages/core/src/ir/hash.ts`** (pinned commit) so that
|
|
6
|
+
* pi-taskflow and overstory share one byte-identical hashing contract. This is
|
|
7
|
+
* the `M1` slice of the overstory-convergence roadmap: we are *not* compiling
|
|
8
|
+
* to overstory FlowIR yet (the IR compiler expects an explicit inject/emits
|
|
9
|
+
* model pi-taskflow doesn't have), but we share the **hash algorithm** now —
|
|
10
|
+
* the cheapest, lowest-risk piece of the contract — and put it to immediate
|
|
11
|
+
* work folding the flow *definition* into the cross-run cache key (M2).
|
|
12
|
+
*
|
|
13
|
+
* Why this matters: previously the cache key folded only the flow **name**
|
|
14
|
+
* (`flow:${flowName}`), so two structurally-different flows that happened to
|
|
15
|
+
* share a name + phase id + task could collide in the cross-run cache, and a
|
|
16
|
+
* flow that changed structure (but not name) could serve a stale hit. Folding
|
|
17
|
+
* `flowDefHash` (a content fingerprint of the desugared definition) closes
|
|
18
|
+
* that hole and is the foundation of "identical re-run is free ($0.00)".
|
|
19
|
+
*
|
|
20
|
+
* Pure module: no IO. Uses Web Crypto (`globalThis.crypto.subtle`) — therefore
|
|
21
|
+
* async — exactly like overstory's `hashIR`, so the contract is identical.
|
|
22
|
+
*
|
|
23
|
+
* @see docs/internal/overstory-convergence-roadmap.md §3 (M1, "cut B")
|
|
24
|
+
* @see docs/internal/rfc-flowir-compilation.md
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import type { Taskflow } from "../schema.ts";
|
|
28
|
+
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// Canonical JSON (vendored from overstory ir/hash.ts — byte-identical)
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Deterministic JSON: recursively key-sorted (UTF-16 code units), no
|
|
35
|
+
* whitespace, `undefined` values dropped. Arrays keep their order (the
|
|
36
|
+
* desugared Taskflow is already in a canonical shape). Byte-identical to
|
|
37
|
+
* overstory's `canonicalJson` — do not diverge without bumping the contract
|
|
38
|
+
* and updating the parity test.
|
|
39
|
+
*/
|
|
40
|
+
export function canonicalJson(value: unknown): string {
|
|
41
|
+
if (value === null || typeof value === "number" || typeof value === "boolean") {
|
|
42
|
+
return JSON.stringify(value);
|
|
43
|
+
}
|
|
44
|
+
if (typeof value === "string") {
|
|
45
|
+
return JSON.stringify(value);
|
|
46
|
+
}
|
|
47
|
+
if (Array.isArray(value)) {
|
|
48
|
+
return `[${value.map((item) => canonicalJson(item === undefined ? null : item)).join(",")}]`;
|
|
49
|
+
}
|
|
50
|
+
if (typeof value === "object") {
|
|
51
|
+
const record = value as Record<string, unknown>;
|
|
52
|
+
const keys = Object.keys(record)
|
|
53
|
+
.filter((key) => record[key] !== undefined)
|
|
54
|
+
.sort();
|
|
55
|
+
const body = keys.map((key) => `${JSON.stringify(key)}:${canonicalJson(record[key])}`);
|
|
56
|
+
return `{${body.join(",")}}`;
|
|
57
|
+
}
|
|
58
|
+
// undefined / function / symbol at the top level — not representable.
|
|
59
|
+
return "null";
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
// Hashing (vendored from overstory ir/hash.ts — byte-identical)
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
/** SHA-256 of the canonical serialization, first 16 bytes, lowercase hex.
|
|
67
|
+
* Same shape as overstory's `hashCanonical` / RFC-001 content hashes. */
|
|
68
|
+
export async function hashCanonical(canonical: string): Promise<string> {
|
|
69
|
+
const bytes = new TextEncoder().encode(canonical);
|
|
70
|
+
const digest = await globalThis.crypto.subtle.digest("SHA-256", bytes);
|
|
71
|
+
const view = new Uint8Array(digest).slice(0, 16);
|
|
72
|
+
let hex = "";
|
|
73
|
+
for (const byte of view) {
|
|
74
|
+
hex += byte.toString(16).padStart(2, "0");
|
|
75
|
+
}
|
|
76
|
+
return hex;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
// Flow-definition fingerprint
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Content fingerprint of a desugared `Taskflow` definition.
|
|
85
|
+
*
|
|
86
|
+
* Hashes the **definition** (structure + task text + declared deps), NOT the
|
|
87
|
+
* runtime `args` values — args vary per invocation and are already folded into
|
|
88
|
+
* each phase's `inputHash` via the interpolated task. `flowDefHash` answers a
|
|
89
|
+
* different question: "did the flow *itself* change?" Two flows are
|
|
90
|
+
* definitionally identical ⟺ this hash matches (key order / whitespace /
|
|
91
|
+
* optional-field presence do not affect it).
|
|
92
|
+
*
|
|
93
|
+
* Deterministic and async (Web Crypto), matching overstory's `hashIR` shape.
|
|
94
|
+
*/
|
|
95
|
+
export async function flowDefHash(def: Taskflow): Promise<string> {
|
|
96
|
+
return hashCanonical(canonicalJson(def));
|
|
97
|
+
}
|
package/extensions/index.ts
CHANGED
|
@@ -28,7 +28,8 @@ import { type AgentScope, discoverAgents, readSubagentSettings, shouldSyncBuilti
|
|
|
28
28
|
import { renderRunResult, summarizeRun } from "./render.ts";
|
|
29
29
|
import { RunHistoryComponent, type RunHistoryResult } from "./runs-view.ts";
|
|
30
30
|
import { ApprovalViewComponent, type ApprovalChoice } from "./approval-view.ts";
|
|
31
|
-
import { executeTaskflow, type ApprovalDecision, type ApprovalRequest, type RuntimeResult } from "./runtime.ts";
|
|
31
|
+
import { executeTaskflow, recomputeTaskflow, type ApprovalDecision, type ApprovalRequest, type RecomputeReport, type RuntimeDeps, type RuntimeResult } from "./runtime.ts";
|
|
32
|
+
import { type UsageStats } from "./usage.ts";
|
|
32
33
|
import { finalPhase, resolveArgs, type Taskflow, validateTaskflow, desugar, isShorthand } from "./schema.ts";
|
|
33
34
|
import {
|
|
34
35
|
getFlow,
|
|
@@ -44,6 +45,7 @@ import {
|
|
|
44
45
|
} from "./store.ts";
|
|
45
46
|
import { CacheStore } from "./cache.ts";
|
|
46
47
|
import { safeParse } from "./interpolate.ts";
|
|
48
|
+
import { formatWhyStale, readMapOf } from "./stale.ts";
|
|
47
49
|
import {
|
|
48
50
|
isValidKey,
|
|
49
51
|
queueSpawn,
|
|
@@ -60,6 +62,7 @@ interface TaskflowDetails {
|
|
|
60
62
|
finalOutput?: string;
|
|
61
63
|
action: string;
|
|
62
64
|
message?: string;
|
|
65
|
+
cacheReport?: string;
|
|
63
66
|
}
|
|
64
67
|
|
|
65
68
|
/** pi reads `isError` at runtime to mark tool failures; it is not in the public type. */
|
|
@@ -83,8 +86,8 @@ const ShorthandStep = Type.Object(
|
|
|
83
86
|
);
|
|
84
87
|
|
|
85
88
|
const TaskflowParams = Type.Object({
|
|
86
|
-
action: StringEnum(["run", "save", "resume", "list", "agents", "init", "verify", "compile", "cache-clear"] as const, {
|
|
87
|
-
description: "What to do: run a flow, save a definition, resume a paused run, list saved flows, list available agents, init model role configuration, verify the DAG, compile the DAG to a Mermaid diagram + verification report, or clear the cross-run memoization cache",
|
|
89
|
+
action: StringEnum(["run", "save", "resume", "list", "agents", "init", "verify", "compile", "provenance", "why-stale", "recompute", "cache-clear"] as const, {
|
|
90
|
+
description: "What to do: run a flow, save a definition, resume a paused run, list saved flows, list available agents, init model role configuration, verify the DAG, compile the DAG to a Mermaid diagram + verification report, show observed readSet provenance, explain why a run is stale, minimally recompute a stale run, or clear the cross-run memoization cache",
|
|
88
91
|
default: "run",
|
|
89
92
|
}),
|
|
90
93
|
name: Type.Optional(Type.String({ description: "Name of a saved flow (for run/save without inline define)" })),
|
|
@@ -123,6 +126,8 @@ const TaskflowParams = Type.Object({
|
|
|
123
126
|
),
|
|
124
127
|
args: Type.Optional(Type.Record(Type.String(), Type.Unknown(), { description: "Invocation arguments for the flow" })),
|
|
125
128
|
runId: Type.Optional(Type.String({ description: "Run id to resume (for action=resume)" })),
|
|
129
|
+
phaseId: Type.Optional(Type.String({ description: "Phase id — the assumed-changed seed for action=why-stale, or the phase to re-run for action=recompute" })),
|
|
130
|
+
dryRun: Type.Optional(Type.Boolean({ description: "For action=recompute: compute the stale frontier without re-executing anything (no tokens spent). Defaults to true (safe); set false to actually re-run the seed + stale frontier and persist the updated run" })),
|
|
126
131
|
scope: Type.Optional(
|
|
127
132
|
StringEnum(["user", "project"] as const, { description: "Where to save (action=save)", default: "project" }),
|
|
128
133
|
),
|
|
@@ -146,6 +151,45 @@ const TaskflowParams = Type.Object({
|
|
|
146
151
|
),
|
|
147
152
|
});
|
|
148
153
|
|
|
154
|
+
function formatProvenance(run: RunState): string {
|
|
155
|
+
const lines: string[] = [];
|
|
156
|
+
lines.push(`Provenance — run ${run.runId} · flow "${run.flowName}" · ${run.status}`);
|
|
157
|
+
lines.push("");
|
|
158
|
+
const finalIds = new Set(run.def.phases.filter((p) => p.final).map((p) => p.id));
|
|
159
|
+
const phases = Object.values(run.phases);
|
|
160
|
+
const any = phases.some((p) => p.reads && p.reads.length > 0);
|
|
161
|
+
if (!any) {
|
|
162
|
+
lines.push(
|
|
163
|
+
"(No observed readSets recorded. Reads are captured for agent/gate/reduce phases that interpolate {steps.*} — the overstory \"observed readSet@version\" moat.)",
|
|
164
|
+
);
|
|
165
|
+
return lines.join("\n");
|
|
166
|
+
}
|
|
167
|
+
for (const p of phases) {
|
|
168
|
+
const reads = p.reads ?? [];
|
|
169
|
+
lines.push(`■ ${p.id} [${p.status}]${finalIds.has(p.id) ? " ★ final" : ""}`);
|
|
170
|
+
if (reads.length) {
|
|
171
|
+
lines.push(" observed reads:");
|
|
172
|
+
for (const r of reads) lines.push(` ← ${r.stepId}@${r.version ?? "?"}`);
|
|
173
|
+
} else {
|
|
174
|
+
lines.push(" (source — no upstream reads)");
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return lines.join("\n");
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function formatRecompute(r: RecomputeReport): string {
|
|
181
|
+
const lines: string[] = [];
|
|
182
|
+
lines.push(`Recompute — seed: ${r.seeds.join(", ")}${r.dryRun ? " (DRY RUN — worst-case, no execution)" : ""}`);
|
|
183
|
+
lines.push("");
|
|
184
|
+
lines.push(`▲ re-run (${r.rerun.length}): ${r.rerun.join(", ") || "—"}`);
|
|
185
|
+
if (!r.dryRun) {
|
|
186
|
+
lines.push(`✂ early-cutoff (cached — inputHash unchanged): ${r.cutoff.join(", ") || "—"}`);
|
|
187
|
+
if (r.cutoff.length > 0) lines.push(` → saved ${r.cutoff.length} re-execution(s).`);
|
|
188
|
+
}
|
|
189
|
+
lines.push(`✓ reused (outside frontier): ${r.reused.join(", ") || "—"}`);
|
|
190
|
+
return lines.join("\n");
|
|
191
|
+
}
|
|
192
|
+
|
|
149
193
|
function makeRunState(def: Taskflow, args: Record<string, unknown>, cwd: string): RunState {
|
|
150
194
|
return {
|
|
151
195
|
runId: newRunId(def.name),
|
|
@@ -292,7 +336,18 @@ async function runFlow(
|
|
|
292
336
|
persist: persistThrottled,
|
|
293
337
|
requestApproval,
|
|
294
338
|
loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
|
|
339
|
+
// Cross-run cache is opt-in per phase (cache:{scope:"cross-run"}).
|
|
340
|
+
// Defaulting every real run to cross-run was reviewed out: it silently
|
|
341
|
+
// persists phase outputs and can serve stale results for phases whose
|
|
342
|
+
// agents read files at runtime (those files are not in the cache key).
|
|
343
|
+
cacheScopeDefault: "run-only",
|
|
295
344
|
});
|
|
345
|
+
// Auto-report cache savings at the end of a real run so the user sees the
|
|
346
|
+
// M1-M5 effect without running a separate /tf command.
|
|
347
|
+
if (result.ok) {
|
|
348
|
+
const report = formatCacheReport(result.state, result.totalUsage);
|
|
349
|
+
if (report) ctx.ui.notify(report, "info");
|
|
350
|
+
}
|
|
296
351
|
return result;
|
|
297
352
|
} finally {
|
|
298
353
|
if (heartbeat) clearInterval(heartbeat);
|
|
@@ -629,6 +684,60 @@ export default function (pi: ExtensionAPI) {
|
|
|
629
684
|
return finalResult(action, result);
|
|
630
685
|
}
|
|
631
686
|
|
|
687
|
+
if (action === "provenance") {
|
|
688
|
+
if (!params.runId)
|
|
689
|
+
return errorResult(action, "action=provenance requires 'runId'");
|
|
690
|
+
const run = loadRun(ctx.cwd, params.runId);
|
|
691
|
+
if (!run) return errorResult(action, `Run not found: ${params.runId}`);
|
|
692
|
+
return {
|
|
693
|
+
content: [{ type: "text", text: formatProvenance(run) }],
|
|
694
|
+
details: { action } satisfies TaskflowDetails,
|
|
695
|
+
};
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
if (action === "why-stale") {
|
|
699
|
+
if (!params.runId)
|
|
700
|
+
return errorResult(action, "action=why-stale requires 'runId'");
|
|
701
|
+
const run = loadRun(ctx.cwd, params.runId);
|
|
702
|
+
if (!run) return errorResult(action, `Run not found: ${params.runId}`);
|
|
703
|
+
const reads = readMapOf(run.phases);
|
|
704
|
+
const seeds = params.phaseId ? [String(params.phaseId)] : [];
|
|
705
|
+
return {
|
|
706
|
+
content: [{ type: "text", text: formatWhyStale(run.runId, run.flowName, reads, seeds) }],
|
|
707
|
+
details: { action } satisfies TaskflowDetails,
|
|
708
|
+
};
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
if (action === "recompute") {
|
|
712
|
+
if (!params.runId)
|
|
713
|
+
return errorResult(action, "action=recompute requires 'runId'");
|
|
714
|
+
if (!params.phaseId)
|
|
715
|
+
return errorResult(action, "action=recompute requires 'phaseId' (the seed phase to re-run)");
|
|
716
|
+
const prev = loadRun(ctx.cwd, params.runId);
|
|
717
|
+
if (!prev) return errorResult(action, `Run not found: ${params.runId}`);
|
|
718
|
+
// H1: the LLM-callable tool defaults to a SAFE dry-run (no tokens, no
|
|
719
|
+
// mutation). A real recompute — which spends money and overwrites the
|
|
720
|
+
// run — requires an explicit dryRun:false.
|
|
721
|
+
const dryRun = params.dryRun !== false;
|
|
722
|
+
const settings = readSubagentSettings();
|
|
723
|
+
const { agents } = discoverAgents(ctx.cwd, prev.def.agentScope ?? "user", settings.modelRoles, settings.taskflow);
|
|
724
|
+
const deps: RuntimeDeps = {
|
|
725
|
+
cwd: ctx.cwd,
|
|
726
|
+
agents,
|
|
727
|
+
globalThinking: settings.globalThinking,
|
|
728
|
+
signal,
|
|
729
|
+
loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
|
|
730
|
+
};
|
|
731
|
+
const { report, state } = await recomputeTaskflow(prev, deps, [String(params.phaseId)], { dryRun });
|
|
732
|
+
// H2: never persist a partial/aborted recompute over the original run.
|
|
733
|
+
if (!dryRun && !report.aborted) saveRun(state, { maxKeep: settings.taskflow.maxKeptRuns, maxAgeDays: settings.taskflow.maxRunAgeDays });
|
|
734
|
+
const prefix = report.aborted ? "⚠ ABORTED mid-recompute — original run left unchanged.\n\n" : "";
|
|
735
|
+
return {
|
|
736
|
+
content: [{ type: "text", text: prefix + formatRecompute(report) }],
|
|
737
|
+
details: { action } satisfies TaskflowDetails,
|
|
738
|
+
};
|
|
739
|
+
}
|
|
740
|
+
|
|
632
741
|
// resolve the definition: inline `define` / shorthand (single|parallel|chain), else saved `name`.
|
|
633
742
|
let def: Taskflow | undefined;
|
|
634
743
|
|
|
@@ -822,7 +931,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
822
931
|
pi.registerCommand("tf", {
|
|
823
932
|
description: "Taskflow: list | run <name> | show <name> | compile <name> | runs | init",
|
|
824
933
|
getArgumentCompletions: (prefix) => {
|
|
825
|
-
const subs = ["list", "run", "show", "runs", "resume", "init", "save", "verify", "compile"];
|
|
934
|
+
const subs = ["list", "run", "show", "runs", "resume", "init", "save", "verify", "compile", "provenance", "why-stale", "recompute"];
|
|
826
935
|
const items = subs.map((s) => ({ value: s, label: s }));
|
|
827
936
|
const filtered = items.filter((i) => i.value.startsWith(prefix));
|
|
828
937
|
return filtered.length > 0 ? filtered : null;
|
|
@@ -878,6 +987,69 @@ export default function (pi: ExtensionAPI) {
|
|
|
878
987
|
return;
|
|
879
988
|
}
|
|
880
989
|
|
|
990
|
+
if (sub === "provenance") {
|
|
991
|
+
if (!arg) {
|
|
992
|
+
ctx.ui.notify("Usage: /tf provenance <runId>", "warning");
|
|
993
|
+
return;
|
|
994
|
+
}
|
|
995
|
+
const run = loadRun(ctx.cwd, arg);
|
|
996
|
+
if (!run) {
|
|
997
|
+
ctx.ui.notify(`Run not found: ${arg}`, "error");
|
|
998
|
+
return;
|
|
999
|
+
}
|
|
1000
|
+
ctx.ui.notify(formatProvenance(run), "info");
|
|
1001
|
+
return;
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
if (sub === "why-stale") {
|
|
1005
|
+
if (!arg) {
|
|
1006
|
+
ctx.ui.notify("Usage: /tf why-stale <runId> [phaseId]", "warning");
|
|
1007
|
+
return;
|
|
1008
|
+
}
|
|
1009
|
+
const [rid, ...rest] = arg.trim().split(/\s+/);
|
|
1010
|
+
const run = loadRun(ctx.cwd, rid);
|
|
1011
|
+
if (!run) {
|
|
1012
|
+
ctx.ui.notify(`Run not found: ${rid}`, "error");
|
|
1013
|
+
return;
|
|
1014
|
+
}
|
|
1015
|
+
const reads = readMapOf(run.phases);
|
|
1016
|
+
ctx.ui.notify(formatWhyStale(run.runId, run.flowName, reads, rest), "info");
|
|
1017
|
+
return;
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
if (sub === "recompute") {
|
|
1021
|
+
const tokens = (arg ?? "").trim().split(/\s+/).filter(Boolean);
|
|
1022
|
+
const rid = tokens[0];
|
|
1023
|
+
const seed = tokens.find((t) => t !== rid && !t.startsWith("--"));
|
|
1024
|
+
const apply = tokens.includes("--apply");
|
|
1025
|
+
if (!rid || !seed) {
|
|
1026
|
+
ctx.ui.notify("Usage: /tf recompute <runId> <phaseId> [--apply]\n(default is a safe dry-run; --apply spends tokens)", "warning");
|
|
1027
|
+
return;
|
|
1028
|
+
}
|
|
1029
|
+
const prev = loadRun(ctx.cwd, rid);
|
|
1030
|
+
if (!prev) {
|
|
1031
|
+
ctx.ui.notify(`Run not found: ${rid}`, "error");
|
|
1032
|
+
return;
|
|
1033
|
+
}
|
|
1034
|
+
const settings = readSubagentSettings();
|
|
1035
|
+
const { agents } = discoverAgents(ctx.cwd, prev.def.agentScope ?? "user", settings.modelRoles, settings.taskflow);
|
|
1036
|
+
const deps: RuntimeDeps = {
|
|
1037
|
+
cwd: ctx.cwd,
|
|
1038
|
+
agents,
|
|
1039
|
+
globalThinking: settings.globalThinking,
|
|
1040
|
+
loadFlow: (name: string) => getFlow(ctx.cwd, name)?.def,
|
|
1041
|
+
};
|
|
1042
|
+
if (apply) {
|
|
1043
|
+
const { report, state } = await recomputeTaskflow(prev, deps, [seed], { dryRun: false });
|
|
1044
|
+
if (!report.aborted) saveRun(state, { maxKeep: settings.taskflow.maxKeptRuns, maxAgeDays: settings.taskflow.maxRunAgeDays });
|
|
1045
|
+
ctx.ui.notify(formatRecompute(report), report.aborted ? "warning" : "info");
|
|
1046
|
+
} else {
|
|
1047
|
+
const { report } = await recomputeTaskflow(prev, deps, [seed], { dryRun: true });
|
|
1048
|
+
ctx.ui.notify(formatRecompute(report), "info");
|
|
1049
|
+
}
|
|
1050
|
+
return;
|
|
1051
|
+
}
|
|
1052
|
+
|
|
881
1053
|
if (sub === "runs") {
|
|
882
1054
|
const runs = listRuns(ctx.cwd, 50);
|
|
883
1055
|
if (runs.length === 0) {
|
|
@@ -1123,6 +1295,17 @@ function errorResult(action: string, message: string): ToolResult {
|
|
|
1123
1295
|
};
|
|
1124
1296
|
}
|
|
1125
1297
|
|
|
1298
|
+
function formatCacheReport(state: RunState, totalUsage: UsageStats): string {
|
|
1299
|
+
const cached = Object.values(state.phases).filter((p) => p.cacheHit === "cross-run");
|
|
1300
|
+
if (cached.length === 0) return "";
|
|
1301
|
+
// Honest reporting: we know these phases spent 0 tokens *this run* because
|
|
1302
|
+
// they were served from cache. We do NOT estimate dollars/tokens "saved" —
|
|
1303
|
+
// that requires guessing what a re-execution would have cost, and the mix of
|
|
1304
|
+
// cheap vs expensive phases (tournament/loop) makes such a guess misleading.
|
|
1305
|
+
const cachedTokens = cached.reduce((sum, p) => sum + ((p.usage?.input ?? 0) + (p.usage?.output ?? 0)), 0);
|
|
1306
|
+
return `💾 ${cached.length} phase(s) reused from cross-run cache (${cachedTokens.toLocaleString()} tokens spent on them this run)`;
|
|
1307
|
+
}
|
|
1308
|
+
|
|
1126
1309
|
function finalResult(action: string, result: RuntimeResult): ToolResult {
|
|
1127
1310
|
const fp = finalPhase(result.state.def.phases);
|
|
1128
1311
|
const header = result.ok
|
|
@@ -1130,7 +1313,7 @@ function finalResult(action: string, result: RuntimeResult): ToolResult {
|
|
|
1130
1313
|
: `Taskflow '${result.state.flowName}' ${result.state.status} (${summarizeRun(result.state)}). Run id: ${result.state.runId} — resume with action=resume.`;
|
|
1131
1314
|
return {
|
|
1132
1315
|
content: [{ type: "text", text: `${header}\n\n--- ${fp.id} ---\n${result.finalOutput}` }],
|
|
1133
|
-
details: { action, state: result.state, finalOutput: result.finalOutput },
|
|
1316
|
+
details: { action, state: result.state, finalOutput: result.finalOutput, cacheReport: formatCacheReport(result.state, result.totalUsage) },
|
|
1134
1317
|
isError: !result.ok,
|
|
1135
1318
|
};
|
|
1136
1319
|
}
|
|
@@ -21,6 +21,12 @@ export interface InterpolationContext {
|
|
|
21
21
|
previousOutput?: string;
|
|
22
22
|
/** loop variable bindings, e.g. { item: {...} } */
|
|
23
23
|
locals?: Record<string, unknown>;
|
|
24
|
+
/** Observed-read hook (M3): invoked once per successfully-resolved
|
|
25
|
+
* placeholder path, so the runtime can capture which upstream phases a
|
|
26
|
+
* phase actually consumed (its observed readSet). Unresolved refs do NOT
|
|
27
|
+
* fire it (they become `missing` warnings instead). Default undefined →
|
|
28
|
+
* zero overhead, fully backward-compatible. */
|
|
29
|
+
onRead?: (ref: string) => void;
|
|
24
30
|
}
|
|
25
31
|
|
|
26
32
|
const PLACEHOLDER = /\{([a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*)\}/g;
|
|
@@ -48,7 +54,18 @@ export function interpolate(
|
|
|
48
54
|
return { text, missing };
|
|
49
55
|
}
|
|
50
56
|
|
|
57
|
+
/** Resolve + record an observed read (M3 observed-readSet). Fires only on
|
|
58
|
+
* successful resolution so an unresolved ref is NOT logged as a dependency
|
|
59
|
+
* (it stays a `missing` warning). The runtime threads a collector here to
|
|
60
|
+
* capture which upstream phases this phase actually consumed — the overstory
|
|
61
|
+
* "observed readSet@version" moat (nobody else records this). */
|
|
51
62
|
function resolvePath(path: string, ctx: InterpolationContext): unknown {
|
|
63
|
+
const value = _resolvePath(path, ctx);
|
|
64
|
+
if (value !== undefined) ctx.onRead?.(path);
|
|
65
|
+
return value;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function _resolvePath(path: string, ctx: InterpolationContext): unknown {
|
|
52
69
|
const parts = path.split(".");
|
|
53
70
|
const head = parts[0];
|
|
54
71
|
|
package/extensions/runtime.ts
CHANGED
|
@@ -20,6 +20,8 @@ import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_
|
|
|
20
20
|
import { verifyTaskflow } from "./verify.ts";
|
|
21
21
|
import { hashInput, newRunId, type PhaseState, type RunState, runsDir } from "./store.ts";
|
|
22
22
|
import { CacheStore, resolveFingerprint } from "./cache.ts";
|
|
23
|
+
import { flowDefHash } from "./flowir/hash.ts";
|
|
24
|
+
import { computeStaleFrontier, readMapOf } from "./stale.ts";
|
|
23
25
|
import { ctxDirFor, drainPendingSpawns, initCtxDir, registerNode, setNodeStatus, type SpawnAssignment } from "./context-store.ts";
|
|
24
26
|
import { allocateWorkspace, isWorkspaceKeyword, type Workspace } from "./workspace.ts";
|
|
25
27
|
|
|
@@ -55,6 +57,8 @@ export interface RuntimeDeps {
|
|
|
55
57
|
loadFlow?: (name: string) => Taskflow | undefined;
|
|
56
58
|
/** Cross-run memoization store. Omit to construct a default one for `deps.cwd`. */
|
|
57
59
|
cacheStore?: CacheStore;
|
|
60
|
+
/** Default cache scope for phases that don't specify one. */
|
|
61
|
+
cacheScopeDefault?: CacheScope;
|
|
58
62
|
/** Internal: sub-flow call stack, for recursion detection. */
|
|
59
63
|
_stack?: string[];
|
|
60
64
|
/** Internal: pre-resolved Shared Context Tree dir for this run (sub-flows inherit the parent's). */
|
|
@@ -74,6 +78,7 @@ function buildInterpolationContext(
|
|
|
74
78
|
state: RunState,
|
|
75
79
|
previousOutput: string | undefined,
|
|
76
80
|
locals?: Record<string, unknown>,
|
|
81
|
+
onRead?: (ref: string) => void,
|
|
77
82
|
): InterpolationContext {
|
|
78
83
|
const steps: Record<string, { output: string; json?: unknown }> = {};
|
|
79
84
|
for (const [id, ps] of Object.entries(state.phases)) {
|
|
@@ -90,7 +95,7 @@ function buildInterpolationContext(
|
|
|
90
95
|
}
|
|
91
96
|
}
|
|
92
97
|
}
|
|
93
|
-
return { args: state.args, steps, previousOutput, locals };
|
|
98
|
+
return { args: state.args, steps, previousOutput, locals, onRead };
|
|
94
99
|
}
|
|
95
100
|
|
|
96
101
|
function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJson: boolean): PhaseState { const failed = isFailed(r);
|
|
@@ -115,6 +120,27 @@ function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJs
|
|
|
115
120
|
};
|
|
116
121
|
}
|
|
117
122
|
|
|
123
|
+
/** Convert observed read refs (e.g. "steps.scout.output") into a structured
|
|
124
|
+
* readSet keyed by upstream phase id, tagging each with the version
|
|
125
|
+
* (= inputHash) that was current when read. Only `steps.*` refs are upstream
|
|
126
|
+
* phase dependencies; args/item/previous are invocation/loop values. */
|
|
127
|
+
function readRefsToReads(
|
|
128
|
+
refs: string[],
|
|
129
|
+
state: RunState,
|
|
130
|
+
): Array<{ stepId: string; version?: string }> {
|
|
131
|
+
const out: Array<{ stepId: string; version?: string }> = [];
|
|
132
|
+
const seen = new Set<string>();
|
|
133
|
+
for (const ref of refs) {
|
|
134
|
+
const m = /^steps\.([A-Za-z0-9_-]+)\b/.exec(ref);
|
|
135
|
+
if (!m) continue;
|
|
136
|
+
const stepId = m[1] as string;
|
|
137
|
+
if (seen.has(stepId)) continue;
|
|
138
|
+
seen.add(stepId);
|
|
139
|
+
out.push({ stepId, version: state.phases[stepId]?.inputHash });
|
|
140
|
+
}
|
|
141
|
+
return out;
|
|
142
|
+
}
|
|
143
|
+
|
|
118
144
|
/**
|
|
119
145
|
* Surface unresolved interpolation placeholders (the `missing[]` from
|
|
120
146
|
* `interpolate()`). Without this they are silently left intact in the task —
|
|
@@ -551,6 +577,15 @@ async function runSpawnedChildren(
|
|
|
551
577
|
* and tears it down afterwards. All allocation is fail-open: a failed allocation
|
|
552
578
|
* degrades to the base cwd so a phase never fails to run because of isolation.
|
|
553
579
|
*/
|
|
580
|
+
/** Optional per-invocation execution flags (e.g. M5 recompute forces a
|
|
581
|
+
* phase to re-run, bypassing the cross-run cache so the result refreshes). */
|
|
582
|
+
interface PhaseExecOpts {
|
|
583
|
+
/** Bypass the cache entirely (within-run prior AND cross-run store) and
|
|
584
|
+
* re-execute. Used by `/tf recompute` on the seeded phase so its new
|
|
585
|
+
* output — and only the downstream whose inputHash actually moves — refreshes. */
|
|
586
|
+
forceRerun?: boolean;
|
|
587
|
+
}
|
|
588
|
+
|
|
554
589
|
async function executePhase(
|
|
555
590
|
phase: Phase,
|
|
556
591
|
state: RunState,
|
|
@@ -558,10 +593,11 @@ async function executePhase(
|
|
|
558
593
|
prior: PhaseState | undefined,
|
|
559
594
|
emitProgress: () => void,
|
|
560
595
|
_retryDepth = 0,
|
|
596
|
+
opts?: PhaseExecOpts,
|
|
561
597
|
): Promise<PhaseState> {
|
|
562
598
|
// Non-keyword cwd (or none): no workspace lifecycle — run directly.
|
|
563
599
|
if (!isWorkspaceKeyword(phase.cwd)) {
|
|
564
|
-
return executePhaseInner(phase, state, deps, prior, emitProgress, _retryDepth);
|
|
600
|
+
return executePhaseInner(phase, state, deps, prior, emitProgress, _retryDepth, opts);
|
|
565
601
|
}
|
|
566
602
|
let ws: Workspace | undefined;
|
|
567
603
|
try {
|
|
@@ -576,7 +612,7 @@ async function executePhase(
|
|
|
576
612
|
}
|
|
577
613
|
const innerDeps: RuntimeDeps = ws ? { ...deps, _cwdOverride: ws.dir } : deps;
|
|
578
614
|
try {
|
|
579
|
-
const ps = await executePhaseInner(phase, state, innerDeps, prior, emitProgress, _retryDepth);
|
|
615
|
+
const ps = await executePhaseInner(phase, state, innerDeps, prior, emitProgress, _retryDepth, opts);
|
|
580
616
|
if (ws && (ws.kind !== "inherited" || ws.note)) {
|
|
581
617
|
const tag = ws.kind === "inherited" ? "workspace" : `workspace:${ws.kind}`;
|
|
582
618
|
const msg = ws.note ? `${tag} — ${ws.note}` : `${tag} at ${ws.dir}`;
|
|
@@ -599,6 +635,7 @@ async function executePhaseInner(
|
|
|
599
635
|
prior: PhaseState | undefined,
|
|
600
636
|
emitProgress: () => void,
|
|
601
637
|
_retryDepth = 0,
|
|
638
|
+
opts?: PhaseExecOpts,
|
|
602
639
|
): Promise<PhaseState> {
|
|
603
640
|
const type = phase.type ?? "agent";
|
|
604
641
|
const concurrency = phase.concurrency ?? state.def.concurrency ?? 8;
|
|
@@ -631,13 +668,49 @@ async function executePhaseInner(
|
|
|
631
668
|
// Resolve context pre-read files once, before any type branching.
|
|
632
669
|
// The content is prepended to every task so the subagent never spends
|
|
633
670
|
// turns on file exploration for files the flow author already knows.
|
|
634
|
-
|
|
671
|
+
// M3 observed-readSet: collect every upstream ref this phase resolves, so we
|
|
672
|
+
// can record what its result ACTUALLY depended on (not just its declared
|
|
673
|
+
// dependsOn). Shared by every interpolation in this phase (task / when / …).
|
|
674
|
+
const readRefs: string[] = [];
|
|
675
|
+
const onRead = (ref: string): void => {
|
|
676
|
+
readRefs.push(ref);
|
|
677
|
+
};
|
|
678
|
+
const ctx = buildInterpolationContext(state, previousOutput, undefined, onRead);
|
|
679
|
+
|
|
680
|
+
// M3 observed-readSet: when conditions are part of the phase's real
|
|
681
|
+
// dependencies. Evaluate them inside executePhaseInner so every upstream
|
|
682
|
+
// interpolation is captured by the shared onRead hook, not silently dropped
|
|
683
|
+
// by a separate out-of-band context.
|
|
684
|
+
if (phase.when !== undefined) {
|
|
685
|
+
if (!evaluateCondition(phase.when, ctx)) {
|
|
686
|
+
return {
|
|
687
|
+
id: phase.id,
|
|
688
|
+
status: "skipped",
|
|
689
|
+
error: `Condition not met: ${phase.when}`,
|
|
690
|
+
endedAt: Date.now(),
|
|
691
|
+
usage: emptyUsage(),
|
|
692
|
+
reads: readRefsToReads(readRefs, state),
|
|
693
|
+
};
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
|
|
635
697
|
const preRead = await resolvePhaseContext(phase, ctx);
|
|
636
698
|
|
|
637
699
|
// Resolve this phase's cache policy once. Default scope is "run-only" (the
|
|
638
700
|
// historical within-run resume behavior). Only "cross-run" phases resolve a
|
|
639
701
|
// fingerprint and consult the persistent store.
|
|
640
|
-
|
|
702
|
+
let cacheScope: CacheScope = (phase.cache?.scope ?? deps.cacheScopeDefault ?? "run-only") as CacheScope;
|
|
703
|
+
// Defense in depth: gate/approval/loop/tournament must produce a fresh result
|
|
704
|
+
// each run (schema already rejects explicit cross-run, but the default-scope
|
|
705
|
+
// path must also be blocked). If flowDefHash failed, cross-run is unsafe
|
|
706
|
+
// because the key degrades to flowName-only and reopens cross-flow collisions.
|
|
707
|
+
const CROSS_RUN_BLOCKED_TYPES = new Set(["gate", "approval", "loop", "tournament"]);
|
|
708
|
+
if (cacheScope === "cross-run" && CROSS_RUN_BLOCKED_TYPES.has(type)) {
|
|
709
|
+
cacheScope = "run-only";
|
|
710
|
+
}
|
|
711
|
+
if (state.flowDefHash === "failed" && cacheScope === "cross-run") {
|
|
712
|
+
cacheScope = "run-only";
|
|
713
|
+
}
|
|
641
714
|
const cc: PhaseCacheCtx = {
|
|
642
715
|
scope: cacheScope,
|
|
643
716
|
ttlMs: phase.cache?.ttl ? (parseTtlMs(phase.cache.ttl) ?? undefined) : undefined,
|
|
@@ -647,6 +720,8 @@ async function executePhaseInner(
|
|
|
647
720
|
phaseId: phase.id,
|
|
648
721
|
flowName: state.flowName,
|
|
649
722
|
runId: state.runId,
|
|
723
|
+
flowDefHash: state.flowDefHash === "failed" ? undefined : state.flowDefHash,
|
|
724
|
+
forceRerun: opts?.forceRerun,
|
|
650
725
|
thinking: phase.thinking,
|
|
651
726
|
tools: phase.tools,
|
|
652
727
|
preRead,
|
|
@@ -823,7 +898,7 @@ async function executePhaseInner(
|
|
|
823
898
|
if (type === "agent" || type === "gate" || type === "reduce") {
|
|
824
899
|
// Eval gate: zero-token machine checks before the LLM gate.
|
|
825
900
|
if (type === "gate" && Array.isArray(phase.eval) && phase.eval.length > 0) {
|
|
826
|
-
const evalCtx = buildInterpolationContext(state, previousOutput);
|
|
901
|
+
const evalCtx = buildInterpolationContext(state, previousOutput, undefined, onRead);
|
|
827
902
|
let allPassed = true;
|
|
828
903
|
for (const check of phase.eval) {
|
|
829
904
|
let expr = check;
|
|
@@ -858,6 +933,7 @@ async function executePhaseInner(
|
|
|
858
933
|
inputHash,
|
|
859
934
|
endedAt: Date.now(),
|
|
860
935
|
};
|
|
936
|
+
if (readRefs.length) ps.reads = readRefsToReads(readRefs, state);
|
|
861
937
|
recordCache(cc, ps);
|
|
862
938
|
return ps;
|
|
863
939
|
}
|
|
@@ -873,6 +949,7 @@ async function executePhaseInner(
|
|
|
873
949
|
|
|
874
950
|
const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress), nodeIdFor());
|
|
875
951
|
const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
|
|
952
|
+
if (readRefs.length) ps.reads = readRefsToReads(readRefs, state);
|
|
876
953
|
if (refWarning) ps.warnings = [...(ps.warnings ?? []), refWarning];
|
|
877
954
|
if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
|
|
878
955
|
|
|
@@ -919,7 +996,7 @@ async function executePhaseInner(
|
|
|
919
996
|
for (const depId of phase.dependsOn ?? []) {
|
|
920
997
|
const d = state.def.phases.find((p) => p.id === depId);
|
|
921
998
|
if (!d) continue;
|
|
922
|
-
const dPs = await executePhase(d, state, depsForUpstream, prior, emitProgress, _retryDepth + 1);
|
|
999
|
+
const dPs = await executePhase(d, state, depsForUpstream, prior, emitProgress, _retryDepth + 1, undefined);
|
|
923
1000
|
state.phases[depId] = dPs;
|
|
924
1001
|
}
|
|
925
1002
|
}
|
|
@@ -954,6 +1031,7 @@ async function executePhaseInner(
|
|
|
954
1031
|
|
|
955
1032
|
const results = await runFanout(branches);
|
|
956
1033
|
const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
|
|
1034
|
+
if (readRefs.length) ps.reads = readRefsToReads(readRefs, state);
|
|
957
1035
|
recordCache(cc, ps);
|
|
958
1036
|
return ps;
|
|
959
1037
|
}
|
|
@@ -982,7 +1060,7 @@ async function executePhaseInner(
|
|
|
982
1060
|
}
|
|
983
1061
|
const loopVar = phase.as ?? "item";
|
|
984
1062
|
const tasks = arr.map((item) => {
|
|
985
|
-
const localCtx = buildInterpolationContext(state, previousOutput, { [loopVar]: item });
|
|
1063
|
+
const localCtx = buildInterpolationContext(state, previousOutput, { [loopVar]: item }, onRead);
|
|
986
1064
|
return {
|
|
987
1065
|
agent: resolveAgent(phase.agent, deps, state),
|
|
988
1066
|
task: preRead + interpolate(phase.task ?? "", localCtx).text,
|
|
@@ -994,6 +1072,7 @@ async function executePhaseInner(
|
|
|
994
1072
|
|
|
995
1073
|
const results = await runFanout(tasks);
|
|
996
1074
|
const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
|
|
1075
|
+
if (readRefs.length) ps.reads = readRefsToReads(readRefs, state);
|
|
997
1076
|
if (mapTruncated) {
|
|
998
1077
|
ps.warnings = [...(ps.warnings ?? []), `map fan-out truncated to MAX_DYNAMIC_MAP_ITEMS (${MAX_DYNAMIC_MAP_ITEMS}) inside a dynamic sub-flow`];
|
|
999
1078
|
// NB: do NOT set ps.budgetTruncated — that field drives the run-level
|
|
@@ -1005,9 +1084,10 @@ async function executePhaseInner(
|
|
|
1005
1084
|
}
|
|
1006
1085
|
|
|
1007
1086
|
if (type === "approval") {
|
|
1008
|
-
const
|
|
1087
|
+
const readRefs: string[] = [];
|
|
1088
|
+
const ctx = buildInterpolationContext(state, previousOutput, undefined, (ref) => readRefs.push(ref));
|
|
1009
1089
|
const message = interpolate(phase.task ?? "Approve to continue?", ctx).text;
|
|
1010
|
-
const inputHash =
|
|
1090
|
+
const inputHash = cacheKey(cc, [phase.id, phase.model ?? "", "approval", message]);
|
|
1011
1091
|
const cached = cachedPhase(cc, inputHash);
|
|
1012
1092
|
if (cached) return cached;
|
|
1013
1093
|
|
|
@@ -1023,6 +1103,7 @@ async function executePhaseInner(
|
|
|
1023
1103
|
gate: { verdict: "block", reason: "(auto-rejected: no interactive approver available)" },
|
|
1024
1104
|
usage: emptyUsage(),
|
|
1025
1105
|
inputHash,
|
|
1106
|
+
reads: readRefsToReads(readRefs, state),
|
|
1026
1107
|
endedAt: Date.now(),
|
|
1027
1108
|
};
|
|
1028
1109
|
}
|
|
@@ -1035,6 +1116,7 @@ async function executePhaseInner(
|
|
|
1035
1116
|
approval: { decision: decision.decision, note },
|
|
1036
1117
|
usage: emptyUsage(),
|
|
1037
1118
|
inputHash,
|
|
1119
|
+
reads: readRefsToReads(readRefs, state),
|
|
1038
1120
|
endedAt: Date.now(),
|
|
1039
1121
|
};
|
|
1040
1122
|
// A rejection halts the flow via the same mechanism as a blocking gate.
|
|
@@ -1045,7 +1127,8 @@ async function executePhaseInner(
|
|
|
1045
1127
|
}
|
|
1046
1128
|
|
|
1047
1129
|
if (type === "flow") {
|
|
1048
|
-
const
|
|
1130
|
+
const readRefs: string[] = [];
|
|
1131
|
+
const ctx = buildInterpolationContext(state, previousOutput, undefined, (ref) => readRefs.push(ref));
|
|
1049
1132
|
const hasDef = (phase as { def?: unknown }).def !== undefined;
|
|
1050
1133
|
const stack = deps._stack ?? [];
|
|
1051
1134
|
|
|
@@ -1066,6 +1149,7 @@ async function executePhaseInner(
|
|
|
1066
1149
|
json: parseJson ? safeParse("") : undefined,
|
|
1067
1150
|
usage: emptyUsage(),
|
|
1068
1151
|
inputHash: hashInput(phase.id, `flow-def-error:${diag}`),
|
|
1152
|
+
reads: readRefsToReads(readRefs, state),
|
|
1069
1153
|
endedAt: Date.now(),
|
|
1070
1154
|
defError: diag,
|
|
1071
1155
|
});
|
|
@@ -1101,6 +1185,7 @@ async function executePhaseInner(
|
|
|
1101
1185
|
json: parseJson ? safeParse("") : undefined,
|
|
1102
1186
|
usage: emptyUsage(),
|
|
1103
1187
|
inputHash: hashInput(phase.id, "flow-def-empty"),
|
|
1188
|
+
reads: readRefsToReads(readRefs, state),
|
|
1104
1189
|
endedAt: Date.now(),
|
|
1105
1190
|
};
|
|
1106
1191
|
}
|
|
@@ -1222,6 +1307,7 @@ async function executePhaseInner(
|
|
|
1222
1307
|
},
|
|
1223
1308
|
error: subResult.ok ? undefined : `sub-flow '${name}' ${subResult.state.status}`,
|
|
1224
1309
|
inputHash,
|
|
1310
|
+
reads: readRefsToReads(readRefs, state),
|
|
1225
1311
|
endedAt: Date.now(),
|
|
1226
1312
|
};
|
|
1227
1313
|
recordCache(cc, flowPs);
|
|
@@ -1231,11 +1317,21 @@ async function executePhaseInner(
|
|
|
1231
1317
|
// loop-until-done: run the body repeatedly until `until` is truthy, the output
|
|
1232
1318
|
// converges to a fixed point, or maxIterations is hit (always terminates).
|
|
1233
1319
|
if (type === "loop") {
|
|
1320
|
+
const readRefs: string[] = [];
|
|
1234
1321
|
const agentName = resolveAgent(phase.agent, deps, state);
|
|
1235
1322
|
const rawMax = phase.maxIterations ?? LOOP_DEFAULT_MAX_ITERATIONS;
|
|
1236
1323
|
const maxIters = Math.max(1, Math.min(LOOP_HARD_MAX_ITERATIONS, Math.floor(rawMax)));
|
|
1237
1324
|
const convergence = phase.convergence ?? true;
|
|
1238
1325
|
|
|
1326
|
+
// Canonical first-iteration body for the cache key. It must fold in the
|
|
1327
|
+
// interpolated task/upstream refs so that a changed upstream changes the
|
|
1328
|
+
// key and recompute no longer silently reuses a stale loop (critic finding).
|
|
1329
|
+
const firstBodyCtx = buildInterpolationContext(state, previousOutput, {
|
|
1330
|
+
loop: { iteration: 1, lastOutput: "", maxIterations: maxIters },
|
|
1331
|
+
}, (ref) => readRefs.push(ref));
|
|
1332
|
+
const firstBody = preRead + interpolate(phase.task ?? "", firstBodyCtx).text;
|
|
1333
|
+
const inputHash = hashInput(phase.id, "loop", phase.until ?? "", firstBody, String(maxIters));
|
|
1334
|
+
|
|
1239
1335
|
const usages: UsageStats[] = [];
|
|
1240
1336
|
const loopWarnings: string[] = [];
|
|
1241
1337
|
let lastOutput = "";
|
|
@@ -1253,7 +1349,7 @@ async function executePhaseInner(
|
|
|
1253
1349
|
// The body sees its iteration number and the prior iteration's output.
|
|
1254
1350
|
const bodyCtx = buildInterpolationContext(state, previousOutput, {
|
|
1255
1351
|
loop: { iteration: i, lastOutput, maxIterations: maxIters },
|
|
1256
|
-
});
|
|
1352
|
+
}, (ref) => readRefs.push(ref));
|
|
1257
1353
|
const body = preRead + interpolate(phase.task ?? "", bodyCtx).text;
|
|
1258
1354
|
const r = await runOne(agentName, body, liveSink(state, phase.id, emitProgress));
|
|
1259
1355
|
usages.push(r.usage);
|
|
@@ -1270,7 +1366,7 @@ async function executePhaseInner(
|
|
|
1270
1366
|
// Loop locals ({loop.iteration} etc.) are available to the condition too.
|
|
1271
1367
|
const untilCtx = buildInterpolationContext(state, previousOutput, {
|
|
1272
1368
|
loop: { iteration: i, lastOutput, maxIterations: maxIters },
|
|
1273
|
-
});
|
|
1369
|
+
}, (ref) => readRefs.push(ref));
|
|
1274
1370
|
untilCtx.steps[phase.id] = { output: lastOutput, json: safeParse(lastOutput) };
|
|
1275
1371
|
const { value: done, error: condErr } = tryEvaluateCondition(phase.until ?? "", untilCtx);
|
|
1276
1372
|
// A malformed condition must not spin forever: stop and surface a warning
|
|
@@ -1301,7 +1397,8 @@ async function executePhaseInner(
|
|
|
1301
1397
|
error: failedResult?.errorMessage || failedResult?.stderr || (stop === "aborted" ? "Aborted" : `loop '${phase.id}' iteration ${iterations} failed`),
|
|
1302
1398
|
loop: { iterations, stop },
|
|
1303
1399
|
warnings: loopWarnings.length ? loopWarnings : undefined,
|
|
1304
|
-
inputHash
|
|
1400
|
+
inputHash,
|
|
1401
|
+
reads: readRefsToReads(readRefs, state),
|
|
1305
1402
|
endedAt: Date.now(),
|
|
1306
1403
|
};
|
|
1307
1404
|
}
|
|
@@ -1313,7 +1410,8 @@ async function executePhaseInner(
|
|
|
1313
1410
|
usage: aggUsage,
|
|
1314
1411
|
loop: { iterations, stop },
|
|
1315
1412
|
warnings: loopWarnings.length ? loopWarnings : undefined,
|
|
1316
|
-
inputHash
|
|
1413
|
+
inputHash,
|
|
1414
|
+
reads: readRefsToReads(readRefs, state),
|
|
1317
1415
|
endedAt: Date.now(),
|
|
1318
1416
|
};
|
|
1319
1417
|
}
|
|
@@ -1336,6 +1434,20 @@ async function executePhaseInner(
|
|
|
1336
1434
|
competitors = Array.from({ length: n }, () => ({ agent: resolveAgent(phase.agent, deps, state), task: body }));
|
|
1337
1435
|
}
|
|
1338
1436
|
|
|
1437
|
+
// The inputHash must fold in the resolved competitors (which embed the
|
|
1438
|
+
// interpolated task/upstream refs) and the judge rubric, otherwise a changed
|
|
1439
|
+
// upstream produces the same key and recompute silently reuses a stale
|
|
1440
|
+
// tournament (critic finding: unsound for cross-run/recompute).
|
|
1441
|
+
const rubric = interpolate(phase.judge ?? "", ctx).text.trim();
|
|
1442
|
+
const inputHash = hashInput(
|
|
1443
|
+
phase.id,
|
|
1444
|
+
"tournament",
|
|
1445
|
+
mode,
|
|
1446
|
+
String(competitors.length),
|
|
1447
|
+
JSON.stringify(competitors.map((c) => ({ agent: c.agent, task: c.task }))),
|
|
1448
|
+
rubric,
|
|
1449
|
+
);
|
|
1450
|
+
|
|
1339
1451
|
const results = await runFanout(competitors);
|
|
1340
1452
|
const ran = results.filter((r) => r.stopReason !== "budget-skipped");
|
|
1341
1453
|
const ok = ran.filter((r) => !isFailed(r));
|
|
@@ -1355,7 +1467,8 @@ async function executePhaseInner(
|
|
|
1355
1467
|
error: `tournament '${phase.id}': all ${competitors.length} variants failed`,
|
|
1356
1468
|
budgetTruncated: budgetSkipCount > 0 || undefined,
|
|
1357
1469
|
tournament: { variants: competitors.length, winner: 0, mode },
|
|
1358
|
-
inputHash
|
|
1470
|
+
inputHash,
|
|
1471
|
+
reads: readRefsToReads(readRefs, state),
|
|
1359
1472
|
endedAt: Date.now(),
|
|
1360
1473
|
};
|
|
1361
1474
|
}
|
|
@@ -1370,7 +1483,8 @@ async function executePhaseInner(
|
|
|
1370
1483
|
model: ok[0].model,
|
|
1371
1484
|
budgetTruncated: budgetSkipCount > 0 || undefined,
|
|
1372
1485
|
tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "only surviving variant" },
|
|
1373
|
-
inputHash
|
|
1486
|
+
inputHash,
|
|
1487
|
+
reads: readRefsToReads(readRefs, state),
|
|
1374
1488
|
endedAt: Date.now(),
|
|
1375
1489
|
};
|
|
1376
1490
|
}
|
|
@@ -1387,7 +1501,8 @@ async function executePhaseInner(
|
|
|
1387
1501
|
budgetTruncated: budgetSkipCount > 0 || undefined,
|
|
1388
1502
|
warnings: ["judge skipped: run aborted or budget exceeded"],
|
|
1389
1503
|
tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge skipped" },
|
|
1390
|
-
inputHash
|
|
1504
|
+
inputHash,
|
|
1505
|
+
reads: readRefsToReads(readRefs, state),
|
|
1391
1506
|
endedAt: Date.now(),
|
|
1392
1507
|
};
|
|
1393
1508
|
}
|
|
@@ -1396,14 +1511,14 @@ async function executePhaseInner(
|
|
|
1396
1511
|
const labelled = ran
|
|
1397
1512
|
.map((r, i) => `### Variant ${i + 1}${isFailed(r) ? " (failed — ineligible)" : ""}\n\n${r.output}`)
|
|
1398
1513
|
.join("\n\n---\n\n");
|
|
1399
|
-
const
|
|
1400
|
-
|
|
1514
|
+
const finalRubric =
|
|
1515
|
+
rubric ||
|
|
1401
1516
|
"You are judging competing answers to the same task. Pick the single best variant on correctness, completeness, and clarity.";
|
|
1402
1517
|
const directive =
|
|
1403
1518
|
mode === "best"
|
|
1404
1519
|
? `End your reply with a line exactly: WINNER: <number> (1–${ran.length}), choosing the strongest eligible variant.`
|
|
1405
1520
|
: `Synthesize the strongest possible answer by combining the best parts of the eligible variants. Then end with a line: WINNER: <number> indicating which variant contributed most.`;
|
|
1406
|
-
const judgeTask = `${
|
|
1521
|
+
const judgeTask = `${finalRubric}\n\nThe candidate variants:\n\n${labelled}\n\n${directive}`;
|
|
1407
1522
|
const judgeAgent = resolveAgent(phase.judgeAgent ?? phase.agent, deps, state);
|
|
1408
1523
|
const judgeRes = await runOne(judgeAgent, judgeTask, liveSink(state, phase.id, emitProgress));
|
|
1409
1524
|
const judgeUsage = aggregateUsage([variantUsage, judgeRes.usage]);
|
|
@@ -1421,7 +1536,8 @@ async function executePhaseInner(
|
|
|
1421
1536
|
budgetTruncated: budgetSkipCount > 0 || undefined,
|
|
1422
1537
|
warnings: [`judge failed (${judgeRes.errorMessage ?? "error"}); used variant ${ranIdx(ok[0])}`],
|
|
1423
1538
|
tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge failed" },
|
|
1424
|
-
inputHash
|
|
1539
|
+
inputHash,
|
|
1540
|
+
reads: readRefsToReads(readRefs, state),
|
|
1425
1541
|
endedAt: Date.now(),
|
|
1426
1542
|
};
|
|
1427
1543
|
}
|
|
@@ -1444,7 +1560,8 @@ async function executePhaseInner(
|
|
|
1444
1560
|
budgetTruncated: budgetSkipCount > 0 || undefined,
|
|
1445
1561
|
warnings: winnerIneligible ? [`judge picked an ineligible variant; used variant ${winnerIdx}`] : undefined,
|
|
1446
1562
|
tournament: { variants: competitors.length, winner: winnerIdx, mode, reason },
|
|
1447
|
-
inputHash
|
|
1563
|
+
inputHash,
|
|
1564
|
+
reads: readRefsToReads(readRefs, state),
|
|
1448
1565
|
endedAt: Date.now(),
|
|
1449
1566
|
};
|
|
1450
1567
|
}
|
|
@@ -1509,6 +1626,15 @@ interface PhaseCacheCtx {
|
|
|
1509
1626
|
* whether a given branch happens to fold preRead into its task string
|
|
1510
1627
|
* (previously this was only incidentally true via `fullTask`). */
|
|
1511
1628
|
preRead?: string;
|
|
1629
|
+
/** Content fingerprint of the desugared flow definition — folded into the
|
|
1630
|
+
* key so two structurally-different flows that share a name can never
|
|
1631
|
+
* collide, and a changed flow never serves a stale cross-run hit. */
|
|
1632
|
+
flowDefHash?: string | "failed";
|
|
1633
|
+
/** Force this phase to re-execute, ignoring the within-run prior AND the
|
|
1634
|
+
* cross-run store (M5 recompute seed). Downstream phases are NOT forced —
|
|
1635
|
+
* they re-evaluate naturally: if the seed's new output changed their
|
|
1636
|
+
* inputHash they miss and re-run, otherwise they hit (early cutoff). */
|
|
1637
|
+
forceRerun?: boolean;
|
|
1512
1638
|
}
|
|
1513
1639
|
|
|
1514
1640
|
/** Fold the phase fingerprint into the base hash parts to form the final cache key. */
|
|
@@ -1519,6 +1645,7 @@ function cacheKey(cc: PhaseCacheCtx, baseParts: string[]): string {
|
|
|
1519
1645
|
// resolved context pre-read content, and the world-state fingerprint.
|
|
1520
1646
|
const parts = [
|
|
1521
1647
|
`flow:${cc.flowName}`,
|
|
1648
|
+
`flowdef:${cc.flowDefHash ?? ""}`,
|
|
1522
1649
|
...baseParts,
|
|
1523
1650
|
`think:${cc.thinking ?? ""}`,
|
|
1524
1651
|
`tools:${JSON.stringify(cc.tools ?? [])}`,
|
|
@@ -1536,6 +1663,7 @@ function cacheKey(cc: PhaseCacheCtx, baseParts: string[]): string {
|
|
|
1536
1663
|
*/
|
|
1537
1664
|
function cachedPhase(cc: PhaseCacheCtx, inputHash: string): PhaseState | null {
|
|
1538
1665
|
if (cc.scope === "off") return null;
|
|
1666
|
+
if (cc.forceRerun) return null;
|
|
1539
1667
|
|
|
1540
1668
|
// 1. within-run resume (fastest; always allowed unless scope is off)
|
|
1541
1669
|
if (cc.prior && cc.prior.status === "done" && cc.prior.inputHash === inputHash) {
|
|
@@ -1546,6 +1674,13 @@ function cachedPhase(cc: PhaseCacheCtx, inputHash: string): PhaseState | null {
|
|
|
1546
1674
|
if (cc.scope === "cross-run") {
|
|
1547
1675
|
const e = cc.store.get(inputHash, cc.ttlMs);
|
|
1548
1676
|
if (e) {
|
|
1677
|
+
// If we stored the full PhaseState, restore it (preserving gate,
|
|
1678
|
+
// approval, reads, loop/tournament metadata, warnings) and just mark
|
|
1679
|
+
// the cache hit + zero usage. Fallback to the legacy trimmed surface
|
|
1680
|
+
// for entries written before this change.
|
|
1681
|
+
if (e.state) {
|
|
1682
|
+
return { ...e.state, inputHash, usage: emptyUsage(), cacheHit: "cross-run", endedAt: Date.now() };
|
|
1683
|
+
}
|
|
1549
1684
|
return {
|
|
1550
1685
|
id: cc.phaseId,
|
|
1551
1686
|
status: "done",
|
|
@@ -1573,6 +1708,7 @@ function recordCache(cc: PhaseCacheCtx, ps: PhaseState): void {
|
|
|
1573
1708
|
output: ps.output,
|
|
1574
1709
|
json: ps.json,
|
|
1575
1710
|
model: ps.model,
|
|
1711
|
+
state: ps,
|
|
1576
1712
|
flowName: cc.flowName,
|
|
1577
1713
|
phaseId: cc.phaseId,
|
|
1578
1714
|
runId: cc.runId,
|
|
@@ -1701,6 +1837,155 @@ function safeProgress(deps: RuntimeDeps, state: RunState): void {
|
|
|
1701
1837
|
/**
|
|
1702
1838
|
* Execute a full taskflow. Mutates and persists `state` as it progresses.
|
|
1703
1839
|
*/
|
|
1840
|
+
/** Result of a recompute: what was (or would be) re-executed vs reused.
|
|
1841
|
+
* `cutoff` is the prize — phases in the stale frontier whose inputHash did
|
|
1842
|
+
* NOT move, so they hit their cached result instead of re-running (early
|
|
1843
|
+
* cutoff). That is what makes recompute cheaper than a full re-run. */
|
|
1844
|
+
export interface RecomputeReport {
|
|
1845
|
+
readonly dryRun: boolean;
|
|
1846
|
+
readonly aborted: boolean;
|
|
1847
|
+
readonly seeds: readonly string[];
|
|
1848
|
+
/** Phases that were (dry-run: would be) re-executed, or whose result moved. */
|
|
1849
|
+
readonly rerun: readonly string[];
|
|
1850
|
+
/** Phases outside the frontier — untouched, reused verbatim. */
|
|
1851
|
+
readonly reused: readonly string[];
|
|
1852
|
+
/** Phases in the frontier whose inputHash did NOT move → cached result
|
|
1853
|
+
* reused, no re-execution (early cutoff). Empty in dry-run (unknowable). */
|
|
1854
|
+
readonly cutoff: readonly string[];
|
|
1855
|
+
}
|
|
1856
|
+
|
|
1857
|
+
/** Scan a flow for dependencies that cannot be observed through the readSet.
|
|
1858
|
+
* These include Shared Context Tree, sub-flows, context: file pre-reads, and
|
|
1859
|
+
* interpolation placeholders that do not resolve through `steps.*` (previous,
|
|
1860
|
+
* args, item). Recomputing flows with such deps with dryRun:false risks
|
|
1861
|
+
* silently reusing stale upstream state. */
|
|
1862
|
+
function hasUnobservedDependencies(state: RunState): boolean {
|
|
1863
|
+
const scan = (text: string): boolean => /\{(previous\.output|args\.|item\b|item\.)/.test(text);
|
|
1864
|
+
for (const p of state.def.phases) {
|
|
1865
|
+
if (p.shareContext === true) return true;
|
|
1866
|
+
if (state.def.contextSharing === true) return true;
|
|
1867
|
+
if (p.type === "flow") return true;
|
|
1868
|
+
if (p.context && p.context.length > 0) return true;
|
|
1869
|
+
if (scan(p.task ?? "")) return true;
|
|
1870
|
+
if (p.when && scan(p.when)) return true;
|
|
1871
|
+
if (Array.isArray(p.eval) && p.eval.some(scan)) return true;
|
|
1872
|
+
}
|
|
1873
|
+
return false;
|
|
1874
|
+
}
|
|
1875
|
+
|
|
1876
|
+
/** Recompute a completed run minimally: force-rerun the `seeds`, then walk
|
|
1877
|
+
* their stale frontier in topological order. The cache provides early cutoff
|
|
1878
|
+
* for free — a downstream whose inputHash didn't move (because the seed's new
|
|
1879
|
+
* output happened to equal the old) hits its prior and is reused rather than
|
|
1880
|
+
* re-executed. `dryRun` computes the worst-case frontier without spending a
|
|
1881
|
+
* token. Returns a fresh state + a report. Throws only when dryRun:false is
|
|
1882
|
+
* requested for a flow with unobserved dependencies; callers should surface
|
|
1883
|
+
* that as a user-facing error. */
|
|
1884
|
+
export async function recomputeTaskflow(
|
|
1885
|
+
state: RunState,
|
|
1886
|
+
deps: RuntimeDeps,
|
|
1887
|
+
seeds: readonly string[],
|
|
1888
|
+
// Fail-safe default: a real recompute overwrites the run and spends tokens.
|
|
1889
|
+
// The tool/command wrappers can explicitly opt into dryRun:false.
|
|
1890
|
+
opts: { dryRun?: boolean } = { dryRun: true },
|
|
1891
|
+
): Promise<{ report: RecomputeReport; state: RunState }> {
|
|
1892
|
+
// Never mutate the caller's RunState in-place. Recompute is a speculative
|
|
1893
|
+
// replay; only the caller decides whether to persist the new state.
|
|
1894
|
+
const newState = structuredClone(state) as RunState;
|
|
1895
|
+
const reads = readMapOf(newState.phases);
|
|
1896
|
+
const frontier = computeStaleFrontier(reads, seeds);
|
|
1897
|
+
const allIds = Object.keys(newState.phases);
|
|
1898
|
+
|
|
1899
|
+
if (opts.dryRun) {
|
|
1900
|
+
return {
|
|
1901
|
+
report: {
|
|
1902
|
+
dryRun: true,
|
|
1903
|
+
aborted: false,
|
|
1904
|
+
seeds,
|
|
1905
|
+
rerun: [...frontier],
|
|
1906
|
+
reused: allIds.filter((id) => !frontier.has(id)),
|
|
1907
|
+
cutoff: [],
|
|
1908
|
+
},
|
|
1909
|
+
state: newState,
|
|
1910
|
+
};
|
|
1911
|
+
}
|
|
1912
|
+
|
|
1913
|
+
// Guard: observed readSet only tracks `{steps.X.*}` interpolation refs. It is
|
|
1914
|
+
// blind to Shared Context Tree (ctx_read/ctx_write), sub-flow internals,
|
|
1915
|
+
// context: file pre-reads, {previous.output}, and loop locals ({args.*},
|
|
1916
|
+
// {item.*}). Recomputing such a run with dryRun:false could silently skip
|
|
1917
|
+
// phases whose deps changed outside the observed frontier and then persist a
|
|
1918
|
+
// corrupted run over the original.
|
|
1919
|
+
if (hasUnobservedDependencies(newState)) {
|
|
1920
|
+
throw new Error(
|
|
1921
|
+
"recompute dryRun:false is unsafe for this run: it contains dependencies " +
|
|
1922
|
+
"(shareContext, flow/ctx_spawn, context: files, {previous.output}, {args.*}, or {item.*}) " +
|
|
1923
|
+
"that are not tracked by the observed readSet. Use dryRun:true to inspect " +
|
|
1924
|
+
"the frontier, or change the upstream phase and re-run the whole flow.",
|
|
1925
|
+
);
|
|
1926
|
+
}
|
|
1927
|
+
|
|
1928
|
+
// Real recompute: topological order over the frontier so a downstream always
|
|
1929
|
+
// sees its (already-refreshed) upstreams when it re-evaluates its cache key.
|
|
1930
|
+
// The order must respect both declared dependsOn AND observed reads, because
|
|
1931
|
+
// pi-taskflow allows interpolation refs without an explicit dependsOn edge.
|
|
1932
|
+
const seedSet = new Set(seeds);
|
|
1933
|
+
function observedDeps(phaseId: string): string[] {
|
|
1934
|
+
// A phase reading its own prior output (e.g. a loop `until` checking
|
|
1935
|
+
// `{steps.thisId.output}`) must not create a self-edge in the scheduling
|
|
1936
|
+
// graph — otherwise topoLayers would deadlock on the self-loop.
|
|
1937
|
+
return (newState.phases[phaseId]?.reads ?? [])
|
|
1938
|
+
.map((r) => r.stepId)
|
|
1939
|
+
.filter((id) => id !== phaseId);
|
|
1940
|
+
}
|
|
1941
|
+
const augmentedPhases = newState.def.phases.map((p) => ({
|
|
1942
|
+
...p,
|
|
1943
|
+
dependsOn: [...new Set([...(p.dependsOn ?? []), ...observedDeps(p.id)])],
|
|
1944
|
+
}));
|
|
1945
|
+
const order = topoLayers(augmentedPhases)
|
|
1946
|
+
.flat()
|
|
1947
|
+
.map((p) => p.id)
|
|
1948
|
+
.filter((id) => frontier.has(id));
|
|
1949
|
+
const rerun: string[] = [];
|
|
1950
|
+
const cutoff: string[] = [];
|
|
1951
|
+
const noop = () => {};
|
|
1952
|
+
let aborted = false;
|
|
1953
|
+
for (const id of order) {
|
|
1954
|
+
// A partial recompute must NOT be persisted over the original run — the
|
|
1955
|
+
// caller discards `state` when `aborted` is set.
|
|
1956
|
+
if (deps.signal?.aborted) {
|
|
1957
|
+
aborted = true;
|
|
1958
|
+
break;
|
|
1959
|
+
}
|
|
1960
|
+
const phase = newState.def.phases.find((p) => p.id === id);
|
|
1961
|
+
if (!phase) continue;
|
|
1962
|
+
const before = newState.phases[id]?.inputHash;
|
|
1963
|
+
const execOpts = seedSet.has(id) ? { forceRerun: true } : undefined;
|
|
1964
|
+
try {
|
|
1965
|
+
const ps = await executePhase(phase, newState, deps, newState.phases[id], noop, 0, execOpts);
|
|
1966
|
+
newState.phases[id] = ps;
|
|
1967
|
+
// A phase counts as "rerun" if it was a forced seed OR its result moved;
|
|
1968
|
+
// otherwise it hit its cache (inputHash unchanged) → early cutoff.
|
|
1969
|
+
if (seedSet.has(id) || ps.inputHash !== before) rerun.push(id);
|
|
1970
|
+
else cutoff.push(id);
|
|
1971
|
+
} catch {
|
|
1972
|
+
// A failing recompute phase is recorded as rerun (it was attempted).
|
|
1973
|
+
rerun.push(id);
|
|
1974
|
+
}
|
|
1975
|
+
}
|
|
1976
|
+
return {
|
|
1977
|
+
report: {
|
|
1978
|
+
dryRun: false,
|
|
1979
|
+
aborted,
|
|
1980
|
+
seeds,
|
|
1981
|
+
rerun,
|
|
1982
|
+
reused: allIds.filter((id) => !frontier.has(id)),
|
|
1983
|
+
cutoff,
|
|
1984
|
+
},
|
|
1985
|
+
state: newState,
|
|
1986
|
+
};
|
|
1987
|
+
}
|
|
1988
|
+
|
|
1704
1989
|
export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
|
|
1705
1990
|
const def: Taskflow = state.def;
|
|
1706
1991
|
try {
|
|
@@ -1726,6 +2011,24 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
|
|
|
1726
2011
|
async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
|
|
1727
2012
|
const def: Taskflow = state.def;
|
|
1728
2013
|
const layers = topoLayers(def.phases);
|
|
2014
|
+
// Content-fingerprint the desugared definition ONCE per run and fold it into
|
|
2015
|
+
// every phase's cache key (overstory hash algorithm; see ./flowir/hash.ts).
|
|
2016
|
+
// Reused by every phase, persisted on the RunState for audit/resume.
|
|
2017
|
+
// Never throws into the run — a hash failure leaves the field unset and the
|
|
2018
|
+
// cache key degrades to the legacy flowName-only shape.
|
|
2019
|
+
if (state.flowDefHash === undefined) {
|
|
2020
|
+
try {
|
|
2021
|
+
state.flowDefHash = await flowDefHash(def);
|
|
2022
|
+
} catch (e) {
|
|
2023
|
+
// Fail-safe: warn loudly rather than silently degrading to the legacy
|
|
2024
|
+
// flowName-only key, which would reopen the cross-flow collision hole.
|
|
2025
|
+
console.warn(
|
|
2026
|
+
`[taskflow] flowDefHash failed for '${def.name}': ${e instanceof Error ? e.message : String(e)}. ` +
|
|
2027
|
+
"Cross-run cache is disabled for this run to prevent stale cross-flow hits.",
|
|
2028
|
+
);
|
|
2029
|
+
state.flowDefHash = "failed";
|
|
2030
|
+
}
|
|
2031
|
+
}
|
|
1729
2032
|
|
|
1730
2033
|
state.status = "running";
|
|
1731
2034
|
safeEmit(deps, state);
|
|
@@ -1770,10 +2073,6 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
1770
2073
|
else if (budgetBlocked) skipReason = `Budget exceeded${budgetReason ? `: ${budgetReason}` : ""}`;
|
|
1771
2074
|
else if (!depsSatisfied)
|
|
1772
2075
|
skipReason = join === "any" ? "All dependencies failed or were skipped" : "Upstream dependency not satisfied";
|
|
1773
|
-
else if (phase.when !== undefined) {
|
|
1774
|
-
const condCtx = buildInterpolationContext(state, lastCompletedOutput(state, phase));
|
|
1775
|
-
if (!evaluateCondition(phase.when, condCtx)) skipReason = `Condition not met: ${phase.when}`;
|
|
1776
|
-
}
|
|
1777
2076
|
|
|
1778
2077
|
if (skipReason) {
|
|
1779
2078
|
if (skipReason.startsWith("Budget exceeded")) budgetBlocked = true;
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stale-marking (M4) — conservative transitive invalidation over the observed
|
|
3
|
+
* readSet captured in M3.
|
|
4
|
+
*
|
|
5
|
+
* This is the "mark stale, don't rerun" half of overstory's cost-asymmetric
|
|
6
|
+
* reactivity (VISION §2.3): the cheap effects (figuring out what WOULD be
|
|
7
|
+
* invalidated) run for free; the expensive effects (actually re-running an LLM
|
|
8
|
+
* phase) are gated for M5. Given a run's observed readSets and a set of phases
|
|
9
|
+
* assumed to have changed, `computeStaleFrontier` returns the transitive
|
|
10
|
+
* closure of phases whose recorded dependencies are no longer trustworthy.
|
|
11
|
+
*
|
|
12
|
+
* Pure module: no IO, no Date, no randomness. Deterministic.
|
|
13
|
+
*
|
|
14
|
+
* Scope (honest): this is TOPOLOGICAL propagation only — a changed seed
|
|
15
|
+
* invalidates everything that (transitively) read it. The overstory
|
|
16
|
+
* "early cutoff" refinement (a re-run whose output HASH is unchanged does NOT
|
|
17
|
+
* invalidate, even if the version advanced) needs before/after content hashes,
|
|
18
|
+
* which only exist when a phase is actually re-run — that is the M5
|
|
19
|
+
* recomputation concern, deliberately out of scope here. Marking is the safe,
|
|
20
|
+
* conservative prerequisite that lets M5 rerun with confidence.
|
|
21
|
+
*
|
|
22
|
+
* @see docs/internal/overstory-convergence-roadmap.md §3 (M4)
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import type { PhaseState } from "./store.ts";
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Read graph
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
/** phaseId → the upstream stepIds it observed-reading (M3 PhaseState.reads). */
|
|
32
|
+
export type ReadMap = Map<string, readonly string[]>;
|
|
33
|
+
|
|
34
|
+
/** Fold a run's PhaseStates into a read map (drops phases with no reads). */
|
|
35
|
+
export function readMapOf(phases: Record<string, PhaseState>): ReadMap {
|
|
36
|
+
const m: ReadMap = new Map();
|
|
37
|
+
for (const [id, ps] of Object.entries(phases)) {
|
|
38
|
+
const deps = (ps.reads ?? []).map((r) => r.stepId);
|
|
39
|
+
if (deps.length) m.set(id, deps);
|
|
40
|
+
}
|
|
41
|
+
return m;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** Phases that directly read `phaseId` (its immediate dependents). */
|
|
45
|
+
export function dependentsOf(reads: ReadMap, phaseId: string): string[] {
|
|
46
|
+
const out: string[] = [];
|
|
47
|
+
for (const [reader, deps] of reads) {
|
|
48
|
+
if (deps.includes(phaseId)) out.push(reader);
|
|
49
|
+
}
|
|
50
|
+
return out;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
// Stale frontier (transitive closure, union semantics)
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* The set of phases that are stale if `seeds` change, transitively. A reader
|
|
59
|
+
* is stale if ANY phase it observed-reading is stale (union/I5: when in doubt,
|
|
60
|
+
* assume dependency). Includes the seeds themselves.
|
|
61
|
+
*
|
|
62
|
+
* Deterministic. O(phases + read-edges). Cycles in the read graph (which a
|
|
63
|
+
* correct DAG can't produce, but a pathological one could) terminate because a
|
|
64
|
+
* phase is enqueued at most once.
|
|
65
|
+
*/
|
|
66
|
+
export function computeStaleFrontier(reads: ReadMap, seeds: Iterable<string>): Set<string> {
|
|
67
|
+
const stale = new Set<string>();
|
|
68
|
+
const queue: string[] = [...seeds];
|
|
69
|
+
while (queue.length) {
|
|
70
|
+
const s = queue.shift() as string;
|
|
71
|
+
if (stale.has(s)) continue;
|
|
72
|
+
stale.add(s);
|
|
73
|
+
for (const dep of dependentsOf(reads, s)) {
|
|
74
|
+
if (!stale.has(dep)) queue.push(dep);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return stale;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
// Rendering
|
|
82
|
+
// ---------------------------------------------------------------------------
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Render either the full observed dependency graph (no seeds) or the stale
|
|
86
|
+
* frontier given assumed-changed seeds. Each stale phase lists the stale
|
|
87
|
+
* upstreams that caused it (its "why").
|
|
88
|
+
*/
|
|
89
|
+
export function formatWhyStale(
|
|
90
|
+
runId: string,
|
|
91
|
+
flowName: string,
|
|
92
|
+
reads: ReadMap,
|
|
93
|
+
seeds: readonly string[],
|
|
94
|
+
): string {
|
|
95
|
+
const lines: string[] = [];
|
|
96
|
+
lines.push(`why-stale — run ${runId} · flow "${flowName}"`);
|
|
97
|
+
lines.push("");
|
|
98
|
+
|
|
99
|
+
if (seeds.length === 0) {
|
|
100
|
+
// No seeds → show the full observed dependency graph (who reads what).
|
|
101
|
+
if (reads.size === 0) {
|
|
102
|
+
lines.push("(No observed readSets in this run — provenance is empty.)");
|
|
103
|
+
return lines.join("\n");
|
|
104
|
+
}
|
|
105
|
+
lines.push("Observed dependency graph (who reads what):");
|
|
106
|
+
lines.push("");
|
|
107
|
+
for (const [reader, deps] of reads) {
|
|
108
|
+
lines.push(`■ ${reader} reads: ${deps.join(", ")}`);
|
|
109
|
+
}
|
|
110
|
+
lines.push("");
|
|
111
|
+
lines.push("Pass a phase id to compute its stale frontier: /tf why-stale <runId> <phaseId>");
|
|
112
|
+
return lines.join("\n");
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const frontier = computeStaleFrontier(reads, seeds);
|
|
116
|
+
const seedSet = new Set(seeds);
|
|
117
|
+
lines.push(`Assuming changed: ${[...seedSet].join(", ")}`);
|
|
118
|
+
lines.push("");
|
|
119
|
+
if (frontier.size <= seedSet.size) {
|
|
120
|
+
lines.push(`Stale frontier: only the seed(s) themselves — nothing else observed-reading them.`);
|
|
121
|
+
return lines.join("\n");
|
|
122
|
+
}
|
|
123
|
+
lines.push(`Stale frontier (transitive, ${frontier.size} phases):`);
|
|
124
|
+
// Order: seeds first, then the rest, for readability.
|
|
125
|
+
const ordered = [...seeds.filter((s) => frontier.has(s)), ...[...frontier].filter((s) => !seedSet.has(s))];
|
|
126
|
+
for (const id of ordered) {
|
|
127
|
+
if (seedSet.has(id)) {
|
|
128
|
+
lines.push(` ■ ${id} (changed — seed)`);
|
|
129
|
+
} else {
|
|
130
|
+
// Why is it stale? The stale upstreams it read.
|
|
131
|
+
const deps = reads.get(id) ?? [];
|
|
132
|
+
const causes = deps.filter((d) => frontier.has(d));
|
|
133
|
+
lines.push(` ■ ${id} ← reads ${causes.length ? causes.join(", ") : "(nothing stale?)"}`);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return lines.join("\n");
|
|
137
|
+
}
|
package/extensions/store.ts
CHANGED
|
@@ -70,6 +70,14 @@ export interface PhaseState {
|
|
|
70
70
|
/** Non-fatal diagnostic warnings accumulated during this phase (e.g.
|
|
71
71
|
* unresolved interpolation placeholders, suspicious templates). */
|
|
72
72
|
warnings?: string[];
|
|
73
|
+
/** Observed readSet (M3): the upstream phase outputs this phase actually
|
|
74
|
+
* consumed at interpolation time — not what it *declared* to depend on
|
|
75
|
+
* (dependsOn), but what it truly *read* (`{steps.X...}`). Each entry
|
|
76
|
+
* carries the version (= the read phase's inputHash) it consumed, so a
|
|
77
|
+
* later staleness check (M4/M5) can tell whether the upstream has moved.
|
|
78
|
+
* This is the overstory "observed readSet@version" moat: no other
|
|
79
|
+
* orchestrator records what a result actually depended on. */
|
|
80
|
+
reads?: Array<{ stepId: string; version?: string }>;
|
|
73
81
|
/** Truncated previews of interpolated strings used to execute this phase,
|
|
74
82
|
* useful when diagnosing why a model saw a literal placeholder. */
|
|
75
83
|
interpolation?: Array<{ source: string; text: string; missing?: string[] }>;
|
|
@@ -89,6 +97,12 @@ export interface RunState {
|
|
|
89
97
|
pid?: number;
|
|
90
98
|
/** True for runs spawned via `detach: true` (background execution). */
|
|
91
99
|
detached?: boolean;
|
|
100
|
+
/** Content fingerprint of the desugared flow definition (overstory hash
|
|
101
|
+
* algorithm). Folded into every phase's cache key so a structural change
|
|
102
|
+
* to the flow always invalidates cross-run cache hits — and an identical
|
|
103
|
+
* re-run always reuses them. Filled once at run start; persisted for
|
|
104
|
+
* audit/resume consistency. */
|
|
105
|
+
flowDefHash?: string | "failed";
|
|
92
106
|
}
|
|
93
107
|
|
|
94
108
|
// ---------------------------------------------------------------------------
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-taskflow",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.25",
|
|
4
4
|
"description": "A declarative, verifiable graph of task nodes for the Pi coding agent — not a workflow you script, but a DAG you declare: statically verified before it runs, with dynamic fan-out, gates, isolated subagent context, resumable runs, and saveable commands.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|