pi-taskflow 0.0.17 → 0.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,8 @@ import type { AgentConfig } from "./agents.ts";
16
16
  import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse, tryEvaluateCondition } from "./interpolate.ts";
17
17
  import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
18
18
  import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
19
- import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode } from "./schema.ts";
19
+ import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, MAX_DYNAMIC_MAP_ITEMS, MAX_DYNAMIC_NESTING, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode, validateTaskflow } from "./schema.ts";
20
+ import { verifyTaskflow } from "./verify.ts";
20
21
  import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
21
22
  import { CacheStore, resolveFingerprint } from "./cache.ts";
22
23
 
@@ -142,6 +143,63 @@ function failPhase(id: string, error: string): PhaseState {
142
143
  return { id, status: "failed", error, inputHash: hashInput(id, error), endedAt: Date.now(), usage: emptyUsage() };
143
144
  }
144
145
 
146
+ /**
147
+ * Normalize an inline `flow.def` payload into a full Taskflow shape.
148
+ * Accepts: a full Taskflow ({name?,phases:[...]}), a bare phases array, or
149
+ * {phases:[...]}. Returns undefined if the shape is unrecognized. A recognized
150
+ * shape with ZERO phases is returned as-is (caller treats it as a no-op) so the
151
+ * empty-plan case is distinguishable from a malformed one.
152
+ *
153
+ * The payload is deep-cloned so the runtime never shares references with (or
154
+ * mutates) the upstream phase's parsed JSON. Cloning also drops any non-own /
155
+ * prototype-shadowing `__proto__` own-property that a crafted JSON could carry.
156
+ */
157
+ function normalizeInlineDef(parsed: unknown, phaseId: string): Taskflow | undefined {
158
+ let shaped: Taskflow | undefined;
159
+ if (Array.isArray(parsed)) {
160
+ shaped = { name: `${phaseId}-inline`, phases: parsed as Taskflow["phases"] };
161
+ } else if (parsed && typeof parsed === "object") {
162
+ const o = parsed as Record<string, unknown>;
163
+ if (Array.isArray(o.phases)) {
164
+ const name = typeof o.name === "string" && o.name.length > 0 ? (o.name as string) : `${phaseId}-inline`;
165
+ shaped = { ...(o as object), name, phases: o.phases as Taskflow["phases"] } as Taskflow;
166
+ }
167
+ }
168
+ if (!shaped) return undefined;
169
+ // Deep clone via JSON round-trip: severs shared references with upstream output
170
+ // and drops any own "__proto__" key (JSON.stringify omits it). As belt-and-
171
+ // suspenders, also delete inert `constructor`/`prototype` own-keys a crafted
172
+ // payload could carry, so the returned object is clean of pollution vectors.
173
+ try {
174
+ const clone = JSON.parse(JSON.stringify(shaped)) as Record<string, unknown>;
175
+ for (const k of ["__proto__", "constructor", "prototype"]) {
176
+ if (Object.prototype.hasOwnProperty.call(clone, k)) delete clone[k];
177
+ }
178
+ return clone as unknown as Taskflow;
179
+ } catch {
180
+ return undefined;
181
+ }
182
+ }
183
+
184
+ /**
185
+ * Clamp a runtime-generated sub-flow's budget so it can only ever be TIGHTER
186
+ * than the parent's, never looser. A generated def cannot raise the spend cap by
187
+ * declaring its own large budget. Each dimension becomes min(child, parent).
188
+ */
189
+ function clampSubFlowBudget(sub: Taskflow, parentBudget: Budget | undefined): Taskflow {
190
+ if (!parentBudget) return sub;
191
+ const child = sub.budget;
192
+ const clamped: Budget = {
193
+ maxUSD: Math.min(child?.maxUSD ?? Infinity, parentBudget.maxUSD ?? Infinity),
194
+ maxTokens: Math.min(child?.maxTokens ?? Infinity, parentBudget.maxTokens ?? Infinity),
195
+ };
196
+ // Drop Infinity dimensions (no cap on that axis).
197
+ const budget: Budget = {};
198
+ if (Number.isFinite(clamped.maxUSD)) budget.maxUSD = clamped.maxUSD;
199
+ if (Number.isFinite(clamped.maxTokens)) budget.maxTokens = clamped.maxTokens;
200
+ return { ...sub, budget: budget.maxUSD === undefined && budget.maxTokens === undefined ? undefined : budget };
201
+ }
202
+
145
203
  /** Aggregate run cost/tokens so far and test against the budget. */
146
204
  function overBudget(state: RunState): { over: boolean; reason: string } {
147
205
  const budget: Budget | undefined = state.def.budget;
@@ -592,7 +650,15 @@ async function executePhase(
592
650
  if (type === "map") {
593
651
  const overResolved = interpolate(phase.over ?? "", ctx).text;
594
652
  // `over` may itself be a placeholder that resolved to a JSON string.
595
- const arr = coerceArray(safeParse(overResolved)) ?? coerceArray(directRef(phase.over ?? "", state));
653
+ let arr = coerceArray(safeParse(overResolved)) ?? coerceArray(directRef(phase.over ?? "", state));
654
+ // Breadth cap for untrusted dynamic sub-flows: a `def:` frame in the stack
655
+ // means we are inside a runtime-generated flow. Truncate giant fan-outs to
656
+ // bound subprocess blast radius (fail-open: keep the first N rather than abort).
657
+ let mapTruncated = false;
658
+ if (arr && (deps._stack ?? []).some((s) => s.startsWith("def:")) && arr.length > MAX_DYNAMIC_MAP_ITEMS) {
659
+ arr = arr.slice(0, MAX_DYNAMIC_MAP_ITEMS);
660
+ mapTruncated = true;
661
+ }
596
662
  if (!arr) {
597
663
  return {
598
664
  id: phase.id,
@@ -617,6 +683,12 @@ async function executePhase(
617
683
 
618
684
  const results = await runFanout(tasks);
619
685
  const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
686
+ if (mapTruncated) {
687
+ ps.warnings = [...(ps.warnings ?? []), `map fan-out truncated to MAX_DYNAMIC_MAP_ITEMS (${MAX_DYNAMIC_MAP_ITEMS}) inside a dynamic sub-flow`];
688
+ // NB: do NOT set ps.budgetTruncated — that field drives the run-level
689
+ // budget-blocked path and would mislabel the run as "budget exceeded".
690
+ // This is a safety fan-out cap, not a cost overrun; a warning is enough.
691
+ }
620
692
  recordCache(cc, ps);
621
693
  return ps;
622
694
  }
@@ -660,14 +732,96 @@ async function executePhase(
660
732
 
661
733
  if (type === "flow") {
662
734
  const ctx = buildInterpolationContext(state, previousOutput);
663
- const name = phase.use;
664
- if (!name) return failPhase(phase.id, `flow phase '${phase.id}' requires 'use'`);
665
- if (!deps.loadFlow) return failPhase(phase.id, `flow phase '${phase.id}': no sub-flow loader available`);
666
- const subDef = deps.loadFlow(name);
667
- if (!subDef) return failPhase(phase.id, `flow phase '${phase.id}': saved flow not found: '${name}'`);
735
+ const hasDef = (phase as { def?: unknown }).def !== undefined;
668
736
  const stack = deps._stack ?? [];
669
- if (name === state.flowName || stack.includes(name)) {
670
- return failPhase(phase.id, `flow phase '${phase.id}': recursive sub-flow ${[...stack, state.flowName, name].join(" -> ")}`);
737
+
738
+ let subDef: Taskflow | undefined;
739
+ let name: string;
740
+ let recursionKey: string; // identity used for cache key + recursion guard
741
+
742
+ if (hasDef) {
743
+ // --- Inline `def`: resolve at runtime, validate, fail-OPEN on any error. ---
744
+ // Fail-open contract: a bad def NEVER aborts the run. The phase resolves
745
+ // as `done` with empty output and a `defError` diagnostic, and the
746
+ // upstream output is preserved for downstream phases. (Authors who want
747
+ // a bad plan to be a hard failure can add their own gate downstream.)
748
+ const defFailOpen = (diag: string): PhaseState => ({
749
+ id: phase.id,
750
+ status: "done",
751
+ output: "",
752
+ json: parseJson ? safeParse("") : undefined,
753
+ usage: emptyUsage(),
754
+ inputHash: hashInput(phase.id, `flow-def-error:${diag}`),
755
+ endedAt: Date.now(),
756
+ defError: diag,
757
+ });
758
+ // Nesting guard: each `flow{def}` adds a frame to _stack; cap inline depth.
759
+ const inlineDepth = stack.filter((s) => s.startsWith("def:")).length;
760
+ if (inlineDepth >= MAX_DYNAMIC_NESTING) {
761
+ return defFailOpen(`inline sub-flow nesting exceeded MAX_DYNAMIC_NESTING (${MAX_DYNAMIC_NESTING}): depth ${inlineDepth}`);
762
+ }
763
+ const rawDef = (phase as { def?: unknown }).def;
764
+ // String defs are interpolated then JSON-parsed; objects are used directly.
765
+ let parsed: unknown;
766
+ if (typeof rawDef === "string") {
767
+ const resolved = interpolate(rawDef, ctx).text;
768
+ parsed = safeParse(resolved);
769
+ if (parsed === undefined) {
770
+ return defFailOpen("inline def string did not parse as JSON");
771
+ }
772
+ } else {
773
+ parsed = rawDef;
774
+ }
775
+ // Accept a full Taskflow, a bare phases array, or {phases:[...]}; wrap the latter two.
776
+ const wrapped = normalizeInlineDef(parsed, phase.id);
777
+ if (!wrapped) {
778
+ return defFailOpen("inline def is not a Taskflow, phases array, or {phases:[...]}");
779
+ }
780
+ // Empty plan is a valid no-op (a planner deciding there is nothing to do):
781
+ // succeed with empty output instead of failing validation on zero phases.
782
+ if (wrapped.phases.length === 0) {
783
+ return {
784
+ id: phase.id,
785
+ status: "done",
786
+ output: "",
787
+ json: parseJson ? safeParse("") : undefined,
788
+ usage: emptyUsage(),
789
+ inputHash: hashInput(phase.id, "flow-def-empty"),
790
+ endedAt: Date.now(),
791
+ };
792
+ }
793
+ // Validate with `dynamic` hardening (breadth caps + cwd containment) since
794
+ // this content is LLM-authored / untrusted. cwd anchors containment checks.
795
+ const dynCwd = phase.cwd ?? deps.cwd;
796
+ const v = validateTaskflow(wrapped, { dynamic: true, cwd: dynCwd });
797
+ if (!v.ok) {
798
+ return defFailOpen(`inline def failed validation: ${v.errors.join("; ")}`);
799
+ }
800
+ // Static verification (dead-ends, unreachable, gate-exhaustion, budget,
801
+ // concurrency). Only error-severity issues block; warnings are advisory.
802
+ const ver = verifyTaskflow({ name: wrapped.name, phases: wrapped.phases as Phase[], budget: wrapped.budget, concurrency: wrapped.concurrency });
803
+ if (!ver.ok) {
804
+ const errs = ver.issues.filter((i) => i.severity === "error").map((i) => i.message);
805
+ return defFailOpen(`inline def failed verification: ${errs.join("; ")}`);
806
+ }
807
+ // Budget containment: a generated def may not raise the parent's cap. Clamp
808
+ // each dimension to min(child, parent) so it can only ever be tighter.
809
+ subDef = clampSubFlowBudget(wrapped, state.def.budget);
810
+ name = subDef.name;
811
+ recursionKey = `def:${name}`;
812
+ } else {
813
+ // --- Saved flow via `use` (unchanged behavior). ---
814
+ const useName = phase.use;
815
+ if (!useName) return failPhase(phase.id, `flow phase '${phase.id}' requires 'use' or 'def'`);
816
+ if (!deps.loadFlow) return failPhase(phase.id, `flow phase '${phase.id}': no sub-flow loader available`);
817
+ subDef = deps.loadFlow(useName);
818
+ if (!subDef) return failPhase(phase.id, `flow phase '${phase.id}': saved flow not found: '${useName}'`);
819
+ name = useName;
820
+ recursionKey = useName;
821
+ }
822
+
823
+ if (recursionKey === state.flowName || stack.includes(recursionKey)) {
824
+ return failPhase(phase.id, `flow phase '${phase.id}': recursive sub-flow ${[...stack, state.flowName, recursionKey].join(" -> ")}`);
671
825
  }
672
826
  // Resolve sub-flow args (interpolate string values), then apply declared defaults.
673
827
  const provided: Record<string, unknown> = {};
@@ -675,7 +829,11 @@ async function executePhase(
675
829
  provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
676
830
  }
677
831
  const subArgs = resolveArgs(subDef, provided);
678
- const inputHash = cacheKey(cc, [phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs)]);
832
+ // For inline defs the cache identity must include the resolved def content so
833
+ // that a different generated plan yields a different key (and an identical plan
834
+ // hits cache). For saved flows the name is the identity (historical behavior).
835
+ const flowIdentity = hasDef ? `def:${JSON.stringify(subDef)}` : `flow:${name}`;
836
+ const inputHash = cacheKey(cc, [phase.id, flowIdentity, preRead, JSON.stringify(subArgs)]);
679
837
  const cached = cachedPhase(cc, inputHash);
680
838
  if (cached) return cached;
681
839
 
@@ -707,7 +865,7 @@ async function executePhase(
707
865
  // flow's cwd (not the caller's cwd).
708
866
  cwd: phase.cwd ?? deps.cwd,
709
867
  runTask: subRunTask,
710
- _stack: [...stack, state.flowName],
868
+ _stack: hasDef ? [...stack, state.flowName, recursionKey] : [...stack, state.flowName],
711
869
  persist: undefined,
712
870
  onProgress: () => {
713
871
  if (live) {
@@ -20,6 +20,19 @@ export type PhaseType = (typeof PHASE_TYPES)[number];
20
20
  export const LOOP_DEFAULT_MAX_ITERATIONS = 10;
21
21
  export const LOOP_HARD_MAX_ITERATIONS = 100;
22
22
 
23
+ /** Max depth of runtime `flow { def }` sub-flow nesting (runaway guard for
24
+ * LLM-generated sub-flows that themselves spawn more sub-flows). The existing
25
+ * `_stack` recursion check guards saved-flow cycles; this bounds inline depth. */
26
+ export const MAX_DYNAMIC_NESTING = 5;
27
+
28
+ /** Breadth caps applied ONLY to runtime-generated (`flow { def }`) sub-flows,
29
+ * whose content is LLM-authored and therefore untrusted. Authored/saved flows
30
+ * are not subject to these (a human reviewed them). They bound DoS blast radius
31
+ * from a model emitting a graph with thousands of phases / a giant fan-out. */
32
+ export const MAX_DYNAMIC_PHASES = 100;
33
+ export const MAX_DYNAMIC_MAP_ITEMS = 200;
34
+ export const MAX_DYNAMIC_CONCURRENCY = 16;
35
+
23
36
  /** Tournament competitor bounds. */
24
37
  export const TOURNAMENT_DEFAULT_VARIANTS = 3;
25
38
  export const TOURNAMENT_HARD_MAX_VARIANTS = 20;
@@ -119,6 +132,12 @@ const PhaseSchema = Type.Object(
119
132
 
120
133
  // sub-workflow (flow)
121
134
  use: Type.Optional(Type.String({ description: "[flow] Name of a saved taskflow to run as this phase" })),
135
+ def: Type.Optional(
136
+ Type.Unknown({
137
+ description:
138
+ "[flow] Inline sub-flow definition, resolved at runtime. Mutually exclusive with 'use'. A string is interpolated (e.g. '{steps.plan.json}') then JSON-parsed; an object is used directly. The result must be a Taskflow ({name,phases}) or a bare phases array / {phases:[...]} (auto-wrapped). Validated + verified before execution; on any failure the phase fails-open (defError) without aborting the run.",
139
+ }),
140
+ ),
122
141
  with: Type.Optional(
123
142
  Type.Record(Type.String(), Type.Unknown(), {
124
143
  description: "[flow] Args passed to the sub-flow (string values support interpolation)",
@@ -388,6 +407,10 @@ export interface ValidationOptions {
388
407
  cwd?: string;
389
408
  /** Override the flow's own `strictInterpolation` flag for this validation call. */
390
409
  strict?: boolean;
410
+ /** When true, this flow is a runtime-generated (`flow { def }`) sub-flow whose
411
+ * content is LLM-authored / untrusted. Enables hardening checks: breadth caps
412
+ * (phase count, map items, concurrency) and cwd containment under `cwd`. */
413
+ dynamic?: boolean;
391
414
  }
392
415
 
393
416
  export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): ValidationResult {
@@ -406,6 +429,32 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
406
429
  return { ok: false, errors, warnings };
407
430
  }
408
431
 
432
+ // Hardening for runtime-generated (untrusted) sub-flows: bound breadth and
433
+ // contain filesystem access. These do NOT apply to authored/saved flows.
434
+ if (opts.dynamic) {
435
+ if (flow.phases.length > MAX_DYNAMIC_PHASES) {
436
+ errors.push(`Dynamic sub-flow has too many phases (${flow.phases.length}, max ${MAX_DYNAMIC_PHASES})`);
437
+ }
438
+ if (typeof flow.concurrency === "number" && flow.concurrency > MAX_DYNAMIC_CONCURRENCY) {
439
+ errors.push(`Dynamic sub-flow concurrency too high (${flow.concurrency}, max ${MAX_DYNAMIC_CONCURRENCY})`);
440
+ }
441
+ const root = opts.cwd ? path.resolve(opts.cwd) : undefined;
442
+ for (const p of flow.phases) {
443
+ if (!p || typeof p !== "object") continue;
444
+ // Per-phase concurrency override is also capped.
445
+ if (typeof p.concurrency === "number" && p.concurrency > MAX_DYNAMIC_CONCURRENCY) {
446
+ errors.push(`Dynamic sub-flow phase '${p.id}': concurrency too high (${p.concurrency}, max ${MAX_DYNAMIC_CONCURRENCY})`);
447
+ }
448
+ // cwd containment: a generated phase may not escape the run's cwd.
449
+ if (typeof p.cwd === "string" && root) {
450
+ const resolved = path.resolve(root, p.cwd);
451
+ if (resolved !== root && !resolved.startsWith(root + path.sep)) {
452
+ errors.push(`Dynamic sub-flow phase '${p.id}': cwd '${p.cwd}' escapes the run directory`);
453
+ }
454
+ }
455
+ }
456
+ }
457
+
409
458
  const ids = new Set<string>();
410
459
  for (const p of flow.phases) {
411
460
  if (!p || typeof p !== "object") {
@@ -439,7 +488,13 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
439
488
  if (!p.task) errors.push(`Phase '${p.id}' (reduce) requires 'task'`);
440
489
  }
441
490
  if (type === "flow") {
442
- if (!p.use) errors.push(`Phase '${p.id}' (flow) requires 'use' (a saved flow name)`);
491
+ const hasUse = typeof p.use === "string" && p.use.length > 0;
492
+ const hasDef = (p as { def?: unknown }).def !== undefined;
493
+ if (!hasUse && !hasDef) {
494
+ errors.push(`Phase '${p.id}' (flow) requires 'use' (a saved flow name) or 'def' (an inline definition)`);
495
+ } else if (hasUse && hasDef) {
496
+ errors.push(`Phase '${p.id}' (flow): 'use' and 'def' are mutually exclusive — provide exactly one`);
497
+ }
443
498
  }
444
499
  if (type === "loop") {
445
500
  if (!p.task) errors.push(`Phase '${p.id}' (loop) requires 'task' (the iteration body)`);
@@ -54,7 +54,8 @@ export interface PhaseState {
54
54
  gate?: { verdict: "pass" | "block"; reason?: string };
55
55
  /** Total subagent attempts incl. retries (when > calls, a retry happened). */
56
56
  attempts?: number;
57
- /** True when a map/parallel fan-out was cut short by the budget cap. */
57
+ /** True when a map/parallel fan-out was cut short by the budget cap, or by the
58
+ * dynamic sub-flow fan-out safety limit (MAX_DYNAMIC_MAP_ITEMS). */
58
59
  budgetTruncated?: boolean;
59
60
  /** Human-in-the-loop outcome (approval phases only). */
60
61
  approval?: { decision: "approve" | "reject" | "edit"; note?: string; auto?: boolean };
@@ -62,6 +63,9 @@ export interface PhaseState {
62
63
  loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" | "aborted" };
63
64
  /** Tournament outcome (tournament phases only). */
64
65
  tournament?: { variants: number; winner: number; mode: "best" | "aggregate"; reason?: string };
66
+ /** Set when a `flow { def }` inline sub-flow definition could not be resolved,
67
+ * parsed, validated, or verified. The phase fails-open: this records why. */
68
+ defError?: string;
65
69
  /** Non-fatal diagnostic warnings accumulated during this phase (e.g.
66
70
  * unresolved interpolation placeholders, suspicious templates). */
67
71
  warnings?: string[];
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "pi-taskflow",
3
- "version": "0.0.17",
4
- "description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
3
+ "version": "0.0.19",
4
+ "description": "A declarative, verifiable graph of task nodes for the Pi coding agent — not a workflow you script, but a DAG you declare: statically verified before it runs, with dynamic fan-out, gates, isolated subagent context, resumable runs, and saveable commands.",
5
5
  "keywords": [
6
6
  "pi-package",
7
7
  "pi",
@@ -33,12 +33,11 @@
33
33
  "README.md",
34
34
  "README.zh-CN.md",
35
35
  "CHANGELOG.md",
36
- "DESIGN.md",
37
36
  "LICENSE"
38
37
  ],
39
38
  "scripts": {
40
39
  "typecheck": "tsc --noEmit",
41
- "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts",
40
+ "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts test/flow-def.test.ts",
42
41
  "test:e2e": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e.mts",
43
42
  "test:dogfood-cache": "node --experimental-strip-types test/dogfood-cache.mts"
44
43
  },
@@ -79,15 +79,17 @@ Call the `taskflow` tool. To run a brand-new flow you write inline, pass
79
79
 
80
80
  ### Phase types
81
81
 
82
- | type | meaning |
83
- |------|---------|
84
- | `agent` | one subagent runs `task` |
85
- | `parallel` | run `branches[]` concurrently |
86
- | `map` | fan out over `over` (an array) — one subagent per item, `{item}` bound |
87
- | `gate` | quality/review step that can **halt the flow** (see below) |
88
- | `reduce` | aggregate `from[]` phases into one output |
89
- | `approval` | **human-in-the-loop** pause: ask a person to approve / reject / edit before continuing |
90
- | `flow` | run a **saved sub-flow** (by `use`) as a single phase composition/reuse |
82
+ | type | meaning | details |
83
+ |------|---------|---------|
84
+ | `agent` | one subagent runs `task` | DSL shape |
85
+ | `parallel` | run `branches[]` concurrently | Conditional routing |
86
+ | `map` | fan out over `over` (an array) — one subagent per item, `{item}` bound | DSL shape |
87
+ | `gate` | quality/review step that can **halt the flow** | Gate phases |
88
+ | `reduce` | aggregate `from[]` phases into one output | DSL shape |
89
+ | `approval` | **human-in-the-loop** pause: ask a person to approve / reject / edit before continuing | Approval phases |
90
+ | `flow` | run a **sub-flow** as one phase — **saved** (`use`) or **runtime-generated** (`def`) | Sub-flows |
91
+ | `loop` | repeat a body until a condition / convergence / `maxIterations` | Loop phases |
92
+ | `tournament` | run N competing `variants`, a `judge` picks the best or aggregates | Tournament phases |
91
93
 
92
94
  ### Control-flow fields (any phase)
93
95
 
@@ -100,7 +102,9 @@ Call the `taskflow` tool. To run a brand-new flow you write inline, pass
100
102
  ### Conditional routing (when + gate/branches)
101
103
 
102
104
  Pair `when` with an upstream phase that emits a decision to build real if/else
103
- routing. Use `join: "any"` on the merge phase so it runs whichever branch fired:
105
+ routing. Use `join: "any"` on the merge phase so it runs whichever branch fired. For
106
+ static (non-conditional) concurrency, a `parallel` phase runs fixed `branches[]`
107
+ instead — `{ "type": "parallel", "branches": [{"task":"..."}, {"task":"...","agent":"reviewer"}] }`.
104
108
 
105
109
  ```jsonc
106
110
  { "id": "triage", "type": "agent", "agent": "analyst", "output": "json",
@@ -133,15 +137,105 @@ deciding. The (interpolated) `task` is the prompt shown.
133
137
 
134
138
  ### Sub-flows (composition)
135
139
 
136
- A `flow` phase runs another **saved** taskflow by name and bubbles up its final
137
- output. Pass args via `with` (string values interpolate). Recursion is detected
138
- and rejected.
140
+ A `flow` phase runs another taskflow as a single phase and bubbles up its final
141
+ output. Two sources, **mutually exclusive**:
142
+
143
+ **Saved** (`use`) — run a previously saved flow by name. Pass args via `with`
144
+ (string values interpolate). Recursion is detected and rejected.
139
145
 
140
146
  ```jsonc
141
147
  { "id": "research", "type": "flow", "use": "deep-research",
142
148
  "with": { "topic": "{item}" }, "dependsOn": ["plan"] }
143
149
  ```
144
150
 
151
+ **Runtime-generated** (`def`) — resolve a sub-flow *at runtime*, usually from an
152
+ upstream phase's JSON output. The runtime interpolates + JSON-parses the `def`,
153
+ **validates it** (cycles / dangling refs / duplicate ids), then runs it as a
154
+ nested sub-flow. This is how a planner decides *at runtime* what work to spawn —
155
+ the declarative answer to a code-mode `for`/`if` loop, with each generated plan
156
+ checked before it spends a token.
157
+
158
+ ```jsonc
159
+ // 1) A planner emits a plan as JSON. 2) flow{def} runs it.
160
+ { "id": "plan", "type": "agent", "agent": "planner", "output": "json",
161
+ "task": "Scan the repo. Output ONLY JSON {\"name\":\"audit\",\"phases\":[...]} — one audit phase per file." },
162
+ { "id": "run", "type": "flow", "def": "{steps.plan.json}", "dependsOn": ["plan"], "final": true }
163
+ ```
164
+
165
+ **LLM output contract for `def`:** the upstream phase must output a *full*
166
+ Taskflow `{"name":"...","phases":[...]}`, a bare `phases` array, or
167
+ `{"phases":[...]}` — pure JSON (a ```json fence is tolerated and stripped).
168
+ Use hyphens in ids, never underscores. Sub-flow phases reference each other in
169
+ their **own** `{steps.x.output}` namespace (no parent-id prefixing needed).
170
+
171
+ **Fail-open & limits:** if the `def` doesn't parse, has the wrong shape, or fails
172
+ validation, the phase fails *open* — it's marked failed with a `defError`, the
173
+ upstream output is preserved, and the run continues (use `optional: true` on the
174
+ flow phase so a bad plan never aborts the run). An **empty** `phases` array is a
175
+ valid no-op (the planner decided there's nothing to do). Inline nesting is capped
176
+ at `MAX_DYNAMIC_NESTING` (5) to bound runaway self-spawning.
177
+
178
+ **Iterative replanning** — pair `flow{def}` (or a JSON-emitting body) with `loop`
179
+ so round N's plan depends on round N-1's **result** (not a one-shot fan-out):
180
+ the declarative equivalent of `for (...) { read result; decide next }`. See
181
+ `examples/dynamic-plan-execute.json` and `examples/iterative-replan.json`.
182
+
183
+ ### Loop phases (iterate until done)
184
+
185
+ A `loop` phase runs its body repeatedly, exposing each iteration's output as
186
+ `{steps.<thisId>.output}` / `.json` so the next round can react to the last. It
187
+ stops on the first of: `until` truthy, **convergence** (output stops changing),
188
+ or `maxIterations` (hard cap). This is the declarative "keep going until good
189
+ enough" — the runtime always terminates (the cap is mandatory).
190
+
191
+ - `until` — stop condition, same operators as `when` (a parse error stops the loop, fail-safe).
192
+ - `maxIterations` — hard iteration cap (required to bound the loop).
193
+ - `convergence` — `true` to stop early when an iteration's output equals the previous one.
194
+
195
+ ```jsonc
196
+ {
197
+ "id": "refine",
198
+ "type": "loop",
199
+ "agent": "executor",
200
+ "maxIterations": 5,
201
+ "until": "{steps.refine.json.done} == true",
202
+ "convergence": true,
203
+ "task": "Improve the draft. When nothing else needs fixing, output JSON {\"done\":true,\"draft\":\"...\"}; otherwise {\"done\":false,\"draft\":\"...\"}.",
204
+ "output": "json",
205
+ "final": true
206
+ }
207
+ ```
208
+
209
+ For data-dependent **replanning** each round, pair a `loop` body that emits a
210
+ plan with `flow{def}` (see Sub-flows above). See `examples/iterative-replan.json`.
211
+
212
+ ### Tournament phases (N variants, judge picks best)
213
+
214
+ A `tournament` phase runs `variants` competing attempts in parallel, then a
215
+ **judge** sub-phase selects the winner (`mode: "best"`) or merges them
216
+ (`mode: "aggregate"`). Use it when one shot is unreliable and you want the best
217
+ of several drafts, or a synthesis of diverse approaches.
218
+
219
+ - `variants` — the competing attempts: a number (run the same `task` N times) or an array of `{task, agent?}` for genuinely different approaches.
220
+ - `mode` — `"best"` (judge picks one winner, default) or `"aggregate"` (judge merges all into one output).
221
+ - `judge` — the judge's rubric/instructions (how to choose or merge).
222
+ - `judgeAgent` — *(optional)* the agent that runs the judge step; defaults to the phase `agent`.
223
+ - Fail-open: if the judge's pick is unparseable, variant 1 is returned (work is never lost).
224
+
225
+ ```jsonc
226
+ {
227
+ "id": "headline",
228
+ "type": "tournament",
229
+ "agent": "executor",
230
+ "variants": 3,
231
+ "mode": "best",
232
+ "judge": "Pick the clearest, most accurate headline. End with: WINNER: <n>.",
233
+ "task": "Write one headline for the article below.\n\n{steps.draft.output}",
234
+ "dependsOn": ["draft"],
235
+ "final": true
236
+ }
237
+ ```
238
+
145
239
  ### Budget (cost / token caps)
146
240
 
147
241
  Add a run-wide ceiling at the top level. When accumulated cost/tokens exceed it,
@@ -172,6 +266,30 @@ Review the audit results below. If any endpoint is missing auth, end with
172
266
  {steps.audit.output}
173
267
  ```
174
268
 
269
+ **Zero-token machine checks (`eval`).** Before spending a token on the LLM gate,
270
+ list machine-checkable assertions in `eval`. If **all** pass, the gate
271
+ auto-passes with **no LLM call**; if any fails, it falls through to the LLM
272
+ `task` (the qualitative residue). Each entry supports the `when` operators plus
273
+ `X contains Y` (substring). A parse error fails **open** (consistent with the
274
+ gate invariant).
275
+
276
+ ```jsonc
277
+ { "id": "quality", "type": "gate", "dependsOn": ["build","test"],
278
+ "eval": ["{steps.build.output} contains BUILD SUCCESS", "{steps.test.json.failures} == 0"],
279
+ "task": "Review the diff for subtle logic errors a linter can't catch. VERDICT: PASS or BLOCK." }
280
+ ```
281
+
282
+ **Self-healing (`onBlock: "retry"`).** By default a blocking gate halts the run
283
+ (`onBlock: "halt"`). With `onBlock: "retry"` the gate instead **re-runs its
284
+ upstream `dependsOn` phases and re-evaluates**, up to `retry.max` rounds (or
285
+ until PASS / budget / abort) — a generate→critique→regenerate rework loop.
286
+
287
+ ```jsonc
288
+ { "id": "spec-gate", "type": "gate", "onBlock": "retry", "retry": { "max": 3 },
289
+ "dependsOn": ["implement"],
290
+ "task": "Does the implementation satisfy ALL acceptance criteria? VERDICT: PASS or BLOCK with reasons." }
291
+ ```
292
+
175
293
  ### Structured-verify phases (v0.0.8.1)
176
294
 
177
295
  A "verify" phase typically runs `npx tsc --noEmit && npm test && git diff --stat`
@@ -309,16 +427,26 @@ variables, and storage paths — read `configuration.md` (next to this file).
309
427
  Quick reference:
310
428
 
311
429
  - **Flow:** `name`, `description`, `concurrency` (default 8), `budget` (`maxUSD`/`maxTokens`), `agentScope` (user|project|both), `args`, `strictInterpolation`.
312
- - **Phase:** `model`, `thinking`, `tools` (whitelist), `cwd`, `output:"json"`, `concurrency` (map/parallel fan-out), `when`, `join` (all|any), `retry`, `use`/`with` (flow), `final`.
430
+ - **Phase:** `model`, `thinking`, `tools` (whitelist), `cwd`, `output:"json"`, `concurrency` (map/parallel fan-out), `when`, `join` (all|any), `retry`, `use`/`with` (flow), `optional` (fail-soft — a failed/blocked phase won't abort the run), `final`.
431
+ - **Cross-run caching:** add `cache: { "scope": "cross-run" }` to a phase to memoize its output across runs (same input → instant reuse, zero tokens). See `configuration.md` for `ttl`, `fingerprint` (git/glob/file/env invalidation), and scope options.
313
432
  - **Precedence (model/thinking/tools):** phase value → agent frontmatter (resolved via `modelRoles`) → global/default.
314
433
  - **Concurrency:** same-layer phases use `flow.concurrency`; a `map`/`parallel` phase uses `phase.concurrency ?? flow.concurrency ?? 8`.
315
434
 
316
435
  ## Actions
317
436
 
318
- - `action: "run"` — run inline `define` or a saved `name` (with optional `args`).
319
- - `action: "save"` — persist `define` (scope `project` or `user`); becomes `/tf:<name>`.
320
- - `action: "resume"` — continue a paused/failed run by `runId` (completed phases are cached).
321
- - `action: "list"` — list saved flows.
437
+ - `action: "run"` — run an inline `define` (a one-off DAG) **or** a saved `name` (with optional `args`). Use `define` for an ad-hoc flow; use `name` to invoke something previously saved.
438
+ - `action: "save"` — persist `define` (scope `project` — default, committed/shared — or `user`); it becomes `/tf:<name>`. On a name collision, project overrides user.
439
+ - `action: "resume"` — continue a paused/failed run by `runId`.
440
+ - `action: "list"` — list saved flows. `action: "verify"` — static-check a `define` (zero tokens). `action: "agents"` — list available agents.
441
+
442
+ ## Operating a run (lifecycle, resume, inspection)
443
+
444
+ A run moves through: **running →** `completed` (a `final` phase produced output) **/** `blocked` (a gate emitted BLOCK, an `approval` was rejected, or the `budget` cap was hit) **/** `failed` (a non-`optional` phase errored) **/** `paused` (the run was aborted). `failed` and `paused` runs are resumable; `blocked` is terminal (fix the gate/budget and re-run).
445
+
446
+ - **Resume is cache-aware.** `action: "resume"` re-runs only what didn't finish: every phase already `done` is reused from its recorded output (within-run cache), so resuming after a crash or a `blocked`/`failed` stop never repeats completed work. A phase that was mid-flight is re-executed cleanly (stale `error`/`endedAt` are cleared first).
447
+ - **When to resume vs. re-run.** Resume when the inputs are unchanged and you just want to continue/retry the tail (fixed a gate, raised the budget, approved a checkpoint). Re-run from scratch when the task or upstream inputs changed — resume would reuse now-stale outputs. (For reuse *across* runs, opt a phase into `cache: {scope:"cross-run"}` — see configuration.md.)
448
+ - **Budget mid-run.** When the run-wide `budget` is exceeded, remaining phases are skipped and an in-flight `map`/`parallel` stops spawning new items; the run ends `blocked` with the partial outputs preserved.
449
+ - **Inspect runs.** `/tf runs` lists recent runs with status; `/tf show <name>` prints a saved flow's definition. Run state lives at `<project .pi>/taskflows/runs/<runId>.json` (gitignored).
322
450
 
323
451
  ## User commands
324
452