pi-taskflow 0.0.17 → 0.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +48 -0
- package/README.md +57 -24
- package/README.zh-CN.md +50 -17
- package/examples/dynamic-plan-execute.json +34 -0
- package/examples/iterative-replan.json +30 -0
- package/extensions/index.ts +2 -2
- package/extensions/runtime.ts +169 -11
- package/extensions/schema.ts +56 -1
- package/extensions/store.ts +5 -1
- package/package.json +3 -4
- package/skills/taskflow/SKILL.md +146 -18
- package/skills/taskflow/configuration.md +115 -5
package/extensions/runtime.ts
CHANGED
|
@@ -16,7 +16,8 @@ import type { AgentConfig } from "./agents.ts";
|
|
|
16
16
|
import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse, tryEvaluateCondition } from "./interpolate.ts";
|
|
17
17
|
import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
|
|
18
18
|
import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
|
|
19
|
-
import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode } from "./schema.ts";
|
|
19
|
+
import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, MAX_DYNAMIC_MAP_ITEMS, MAX_DYNAMIC_NESTING, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode, validateTaskflow } from "./schema.ts";
|
|
20
|
+
import { verifyTaskflow } from "./verify.ts";
|
|
20
21
|
import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
|
|
21
22
|
import { CacheStore, resolveFingerprint } from "./cache.ts";
|
|
22
23
|
|
|
@@ -142,6 +143,63 @@ function failPhase(id: string, error: string): PhaseState {
|
|
|
142
143
|
return { id, status: "failed", error, inputHash: hashInput(id, error), endedAt: Date.now(), usage: emptyUsage() };
|
|
143
144
|
}
|
|
144
145
|
|
|
146
|
+
/**
|
|
147
|
+
* Normalize an inline `flow.def` payload into a full Taskflow shape.
|
|
148
|
+
* Accepts: a full Taskflow ({name?,phases:[...]}), a bare phases array, or
|
|
149
|
+
* {phases:[...]}. Returns undefined if the shape is unrecognized. A recognized
|
|
150
|
+
* shape with ZERO phases is returned as-is (caller treats it as a no-op) so the
|
|
151
|
+
* empty-plan case is distinguishable from a malformed one.
|
|
152
|
+
*
|
|
153
|
+
* The payload is deep-cloned so the runtime never shares references with (or
|
|
154
|
+
* mutates) the upstream phase's parsed JSON. Cloning also drops any non-own /
|
|
155
|
+
* prototype-shadowing `__proto__` own-property that a crafted JSON could carry.
|
|
156
|
+
*/
|
|
157
|
+
function normalizeInlineDef(parsed: unknown, phaseId: string): Taskflow | undefined {
|
|
158
|
+
let shaped: Taskflow | undefined;
|
|
159
|
+
if (Array.isArray(parsed)) {
|
|
160
|
+
shaped = { name: `${phaseId}-inline`, phases: parsed as Taskflow["phases"] };
|
|
161
|
+
} else if (parsed && typeof parsed === "object") {
|
|
162
|
+
const o = parsed as Record<string, unknown>;
|
|
163
|
+
if (Array.isArray(o.phases)) {
|
|
164
|
+
const name = typeof o.name === "string" && o.name.length > 0 ? (o.name as string) : `${phaseId}-inline`;
|
|
165
|
+
shaped = { ...(o as object), name, phases: o.phases as Taskflow["phases"] } as Taskflow;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
if (!shaped) return undefined;
|
|
169
|
+
// Deep clone via JSON round-trip: severs shared references with upstream output
|
|
170
|
+
// and drops any own "__proto__" key (JSON.stringify omits it). As belt-and-
|
|
171
|
+
// suspenders, also delete inert `constructor`/`prototype` own-keys a crafted
|
|
172
|
+
// payload could carry, so the returned object is clean of pollution vectors.
|
|
173
|
+
try {
|
|
174
|
+
const clone = JSON.parse(JSON.stringify(shaped)) as Record<string, unknown>;
|
|
175
|
+
for (const k of ["__proto__", "constructor", "prototype"]) {
|
|
176
|
+
if (Object.prototype.hasOwnProperty.call(clone, k)) delete clone[k];
|
|
177
|
+
}
|
|
178
|
+
return clone as unknown as Taskflow;
|
|
179
|
+
} catch {
|
|
180
|
+
return undefined;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Clamp a runtime-generated sub-flow's budget so it can only ever be TIGHTER
|
|
186
|
+
* than the parent's, never looser. A generated def cannot raise the spend cap by
|
|
187
|
+
* declaring its own large budget. Each dimension becomes min(child, parent).
|
|
188
|
+
*/
|
|
189
|
+
function clampSubFlowBudget(sub: Taskflow, parentBudget: Budget | undefined): Taskflow {
|
|
190
|
+
if (!parentBudget) return sub;
|
|
191
|
+
const child = sub.budget;
|
|
192
|
+
const clamped: Budget = {
|
|
193
|
+
maxUSD: Math.min(child?.maxUSD ?? Infinity, parentBudget.maxUSD ?? Infinity),
|
|
194
|
+
maxTokens: Math.min(child?.maxTokens ?? Infinity, parentBudget.maxTokens ?? Infinity),
|
|
195
|
+
};
|
|
196
|
+
// Drop Infinity dimensions (no cap on that axis).
|
|
197
|
+
const budget: Budget = {};
|
|
198
|
+
if (Number.isFinite(clamped.maxUSD)) budget.maxUSD = clamped.maxUSD;
|
|
199
|
+
if (Number.isFinite(clamped.maxTokens)) budget.maxTokens = clamped.maxTokens;
|
|
200
|
+
return { ...sub, budget: budget.maxUSD === undefined && budget.maxTokens === undefined ? undefined : budget };
|
|
201
|
+
}
|
|
202
|
+
|
|
145
203
|
/** Aggregate run cost/tokens so far and test against the budget. */
|
|
146
204
|
function overBudget(state: RunState): { over: boolean; reason: string } {
|
|
147
205
|
const budget: Budget | undefined = state.def.budget;
|
|
@@ -592,7 +650,15 @@ async function executePhase(
|
|
|
592
650
|
if (type === "map") {
|
|
593
651
|
const overResolved = interpolate(phase.over ?? "", ctx).text;
|
|
594
652
|
// `over` may itself be a placeholder that resolved to a JSON string.
|
|
595
|
-
|
|
653
|
+
let arr = coerceArray(safeParse(overResolved)) ?? coerceArray(directRef(phase.over ?? "", state));
|
|
654
|
+
// Breadth cap for untrusted dynamic sub-flows: a `def:` frame in the stack
|
|
655
|
+
// means we are inside a runtime-generated flow. Truncate giant fan-outs to
|
|
656
|
+
// bound subprocess blast radius (fail-open: keep the first N rather than abort).
|
|
657
|
+
let mapTruncated = false;
|
|
658
|
+
if (arr && (deps._stack ?? []).some((s) => s.startsWith("def:")) && arr.length > MAX_DYNAMIC_MAP_ITEMS) {
|
|
659
|
+
arr = arr.slice(0, MAX_DYNAMIC_MAP_ITEMS);
|
|
660
|
+
mapTruncated = true;
|
|
661
|
+
}
|
|
596
662
|
if (!arr) {
|
|
597
663
|
return {
|
|
598
664
|
id: phase.id,
|
|
@@ -617,6 +683,12 @@ async function executePhase(
|
|
|
617
683
|
|
|
618
684
|
const results = await runFanout(tasks);
|
|
619
685
|
const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
|
|
686
|
+
if (mapTruncated) {
|
|
687
|
+
ps.warnings = [...(ps.warnings ?? []), `map fan-out truncated to MAX_DYNAMIC_MAP_ITEMS (${MAX_DYNAMIC_MAP_ITEMS}) inside a dynamic sub-flow`];
|
|
688
|
+
// NB: do NOT set ps.budgetTruncated — that field drives the run-level
|
|
689
|
+
// budget-blocked path and would mislabel the run as "budget exceeded".
|
|
690
|
+
// This is a safety fan-out cap, not a cost overrun; a warning is enough.
|
|
691
|
+
}
|
|
620
692
|
recordCache(cc, ps);
|
|
621
693
|
return ps;
|
|
622
694
|
}
|
|
@@ -660,14 +732,96 @@ async function executePhase(
|
|
|
660
732
|
|
|
661
733
|
if (type === "flow") {
|
|
662
734
|
const ctx = buildInterpolationContext(state, previousOutput);
|
|
663
|
-
const
|
|
664
|
-
if (!name) return failPhase(phase.id, `flow phase '${phase.id}' requires 'use'`);
|
|
665
|
-
if (!deps.loadFlow) return failPhase(phase.id, `flow phase '${phase.id}': no sub-flow loader available`);
|
|
666
|
-
const subDef = deps.loadFlow(name);
|
|
667
|
-
if (!subDef) return failPhase(phase.id, `flow phase '${phase.id}': saved flow not found: '${name}'`);
|
|
735
|
+
const hasDef = (phase as { def?: unknown }).def !== undefined;
|
|
668
736
|
const stack = deps._stack ?? [];
|
|
669
|
-
|
|
670
|
-
|
|
737
|
+
|
|
738
|
+
let subDef: Taskflow | undefined;
|
|
739
|
+
let name: string;
|
|
740
|
+
let recursionKey: string; // identity used for cache key + recursion guard
|
|
741
|
+
|
|
742
|
+
if (hasDef) {
|
|
743
|
+
// --- Inline `def`: resolve at runtime, validate, fail-OPEN on any error. ---
|
|
744
|
+
// Fail-open contract: a bad def NEVER aborts the run. The phase resolves
|
|
745
|
+
// as `done` with empty output and a `defError` diagnostic, and the
|
|
746
|
+
// upstream output is preserved for downstream phases. (Authors who want
|
|
747
|
+
// a bad plan to be a hard failure can add their own gate downstream.)
|
|
748
|
+
const defFailOpen = (diag: string): PhaseState => ({
|
|
749
|
+
id: phase.id,
|
|
750
|
+
status: "done",
|
|
751
|
+
output: "",
|
|
752
|
+
json: parseJson ? safeParse("") : undefined,
|
|
753
|
+
usage: emptyUsage(),
|
|
754
|
+
inputHash: hashInput(phase.id, `flow-def-error:${diag}`),
|
|
755
|
+
endedAt: Date.now(),
|
|
756
|
+
defError: diag,
|
|
757
|
+
});
|
|
758
|
+
// Nesting guard: each `flow{def}` adds a frame to _stack; cap inline depth.
|
|
759
|
+
const inlineDepth = stack.filter((s) => s.startsWith("def:")).length;
|
|
760
|
+
if (inlineDepth >= MAX_DYNAMIC_NESTING) {
|
|
761
|
+
return defFailOpen(`inline sub-flow nesting exceeded MAX_DYNAMIC_NESTING (${MAX_DYNAMIC_NESTING}): depth ${inlineDepth}`);
|
|
762
|
+
}
|
|
763
|
+
const rawDef = (phase as { def?: unknown }).def;
|
|
764
|
+
// String defs are interpolated then JSON-parsed; objects are used directly.
|
|
765
|
+
let parsed: unknown;
|
|
766
|
+
if (typeof rawDef === "string") {
|
|
767
|
+
const resolved = interpolate(rawDef, ctx).text;
|
|
768
|
+
parsed = safeParse(resolved);
|
|
769
|
+
if (parsed === undefined) {
|
|
770
|
+
return defFailOpen("inline def string did not parse as JSON");
|
|
771
|
+
}
|
|
772
|
+
} else {
|
|
773
|
+
parsed = rawDef;
|
|
774
|
+
}
|
|
775
|
+
// Accept a full Taskflow, a bare phases array, or {phases:[...]}; wrap the latter two.
|
|
776
|
+
const wrapped = normalizeInlineDef(parsed, phase.id);
|
|
777
|
+
if (!wrapped) {
|
|
778
|
+
return defFailOpen("inline def is not a Taskflow, phases array, or {phases:[...]}");
|
|
779
|
+
}
|
|
780
|
+
// Empty plan is a valid no-op (a planner deciding there is nothing to do):
|
|
781
|
+
// succeed with empty output instead of failing validation on zero phases.
|
|
782
|
+
if (wrapped.phases.length === 0) {
|
|
783
|
+
return {
|
|
784
|
+
id: phase.id,
|
|
785
|
+
status: "done",
|
|
786
|
+
output: "",
|
|
787
|
+
json: parseJson ? safeParse("") : undefined,
|
|
788
|
+
usage: emptyUsage(),
|
|
789
|
+
inputHash: hashInput(phase.id, "flow-def-empty"),
|
|
790
|
+
endedAt: Date.now(),
|
|
791
|
+
};
|
|
792
|
+
}
|
|
793
|
+
// Validate with `dynamic` hardening (breadth caps + cwd containment) since
|
|
794
|
+
// this content is LLM-authored / untrusted. cwd anchors containment checks.
|
|
795
|
+
const dynCwd = phase.cwd ?? deps.cwd;
|
|
796
|
+
const v = validateTaskflow(wrapped, { dynamic: true, cwd: dynCwd });
|
|
797
|
+
if (!v.ok) {
|
|
798
|
+
return defFailOpen(`inline def failed validation: ${v.errors.join("; ")}`);
|
|
799
|
+
}
|
|
800
|
+
// Static verification (dead-ends, unreachable, gate-exhaustion, budget,
|
|
801
|
+
// concurrency). Only error-severity issues block; warnings are advisory.
|
|
802
|
+
const ver = verifyTaskflow({ name: wrapped.name, phases: wrapped.phases as Phase[], budget: wrapped.budget, concurrency: wrapped.concurrency });
|
|
803
|
+
if (!ver.ok) {
|
|
804
|
+
const errs = ver.issues.filter((i) => i.severity === "error").map((i) => i.message);
|
|
805
|
+
return defFailOpen(`inline def failed verification: ${errs.join("; ")}`);
|
|
806
|
+
}
|
|
807
|
+
// Budget containment: a generated def may not raise the parent's cap. Clamp
|
|
808
|
+
// each dimension to min(child, parent) so it can only ever be tighter.
|
|
809
|
+
subDef = clampSubFlowBudget(wrapped, state.def.budget);
|
|
810
|
+
name = subDef.name;
|
|
811
|
+
recursionKey = `def:${name}`;
|
|
812
|
+
} else {
|
|
813
|
+
// --- Saved flow via `use` (unchanged behavior). ---
|
|
814
|
+
const useName = phase.use;
|
|
815
|
+
if (!useName) return failPhase(phase.id, `flow phase '${phase.id}' requires 'use' or 'def'`);
|
|
816
|
+
if (!deps.loadFlow) return failPhase(phase.id, `flow phase '${phase.id}': no sub-flow loader available`);
|
|
817
|
+
subDef = deps.loadFlow(useName);
|
|
818
|
+
if (!subDef) return failPhase(phase.id, `flow phase '${phase.id}': saved flow not found: '${useName}'`);
|
|
819
|
+
name = useName;
|
|
820
|
+
recursionKey = useName;
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
if (recursionKey === state.flowName || stack.includes(recursionKey)) {
|
|
824
|
+
return failPhase(phase.id, `flow phase '${phase.id}': recursive sub-flow ${[...stack, state.flowName, recursionKey].join(" -> ")}`);
|
|
671
825
|
}
|
|
672
826
|
// Resolve sub-flow args (interpolate string values), then apply declared defaults.
|
|
673
827
|
const provided: Record<string, unknown> = {};
|
|
@@ -675,7 +829,11 @@ async function executePhase(
|
|
|
675
829
|
provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
|
|
676
830
|
}
|
|
677
831
|
const subArgs = resolveArgs(subDef, provided);
|
|
678
|
-
|
|
832
|
+
// For inline defs the cache identity must include the resolved def content so
|
|
833
|
+
// that a different generated plan yields a different key (and an identical plan
|
|
834
|
+
// hits cache). For saved flows the name is the identity (historical behavior).
|
|
835
|
+
const flowIdentity = hasDef ? `def:${JSON.stringify(subDef)}` : `flow:${name}`;
|
|
836
|
+
const inputHash = cacheKey(cc, [phase.id, flowIdentity, preRead, JSON.stringify(subArgs)]);
|
|
679
837
|
const cached = cachedPhase(cc, inputHash);
|
|
680
838
|
if (cached) return cached;
|
|
681
839
|
|
|
@@ -707,7 +865,7 @@ async function executePhase(
|
|
|
707
865
|
// flow's cwd (not the caller's cwd).
|
|
708
866
|
cwd: phase.cwd ?? deps.cwd,
|
|
709
867
|
runTask: subRunTask,
|
|
710
|
-
_stack: [...stack, state.flowName],
|
|
868
|
+
_stack: hasDef ? [...stack, state.flowName, recursionKey] : [...stack, state.flowName],
|
|
711
869
|
persist: undefined,
|
|
712
870
|
onProgress: () => {
|
|
713
871
|
if (live) {
|
package/extensions/schema.ts
CHANGED
|
@@ -20,6 +20,19 @@ export type PhaseType = (typeof PHASE_TYPES)[number];
|
|
|
20
20
|
export const LOOP_DEFAULT_MAX_ITERATIONS = 10;
|
|
21
21
|
export const LOOP_HARD_MAX_ITERATIONS = 100;
|
|
22
22
|
|
|
23
|
+
/** Max depth of runtime `flow { def }` sub-flow nesting (runaway guard for
|
|
24
|
+
* LLM-generated sub-flows that themselves spawn more sub-flows). The existing
|
|
25
|
+
* `_stack` recursion check guards saved-flow cycles; this bounds inline depth. */
|
|
26
|
+
export const MAX_DYNAMIC_NESTING = 5;
|
|
27
|
+
|
|
28
|
+
/** Breadth caps applied ONLY to runtime-generated (`flow { def }`) sub-flows,
|
|
29
|
+
* whose content is LLM-authored and therefore untrusted. Authored/saved flows
|
|
30
|
+
* are not subject to these (a human reviewed them). They bound DoS blast radius
|
|
31
|
+
* from a model emitting a graph with thousands of phases / a giant fan-out. */
|
|
32
|
+
export const MAX_DYNAMIC_PHASES = 100;
|
|
33
|
+
export const MAX_DYNAMIC_MAP_ITEMS = 200;
|
|
34
|
+
export const MAX_DYNAMIC_CONCURRENCY = 16;
|
|
35
|
+
|
|
23
36
|
/** Tournament competitor bounds. */
|
|
24
37
|
export const TOURNAMENT_DEFAULT_VARIANTS = 3;
|
|
25
38
|
export const TOURNAMENT_HARD_MAX_VARIANTS = 20;
|
|
@@ -119,6 +132,12 @@ const PhaseSchema = Type.Object(
|
|
|
119
132
|
|
|
120
133
|
// sub-workflow (flow)
|
|
121
134
|
use: Type.Optional(Type.String({ description: "[flow] Name of a saved taskflow to run as this phase" })),
|
|
135
|
+
def: Type.Optional(
|
|
136
|
+
Type.Unknown({
|
|
137
|
+
description:
|
|
138
|
+
"[flow] Inline sub-flow definition, resolved at runtime. Mutually exclusive with 'use'. A string is interpolated (e.g. '{steps.plan.json}') then JSON-parsed; an object is used directly. The result must be a Taskflow ({name,phases}) or a bare phases array / {phases:[...]} (auto-wrapped). Validated + verified before execution; on any failure the phase fails-open (defError) without aborting the run.",
|
|
139
|
+
}),
|
|
140
|
+
),
|
|
122
141
|
with: Type.Optional(
|
|
123
142
|
Type.Record(Type.String(), Type.Unknown(), {
|
|
124
143
|
description: "[flow] Args passed to the sub-flow (string values support interpolation)",
|
|
@@ -388,6 +407,10 @@ export interface ValidationOptions {
|
|
|
388
407
|
cwd?: string;
|
|
389
408
|
/** Override the flow's own `strictInterpolation` flag for this validation call. */
|
|
390
409
|
strict?: boolean;
|
|
410
|
+
/** When true, this flow is a runtime-generated (`flow { def }`) sub-flow whose
|
|
411
|
+
* content is LLM-authored / untrusted. Enables hardening checks: breadth caps
|
|
412
|
+
* (phase count, map items, concurrency) and cwd containment under `cwd`. */
|
|
413
|
+
dynamic?: boolean;
|
|
391
414
|
}
|
|
392
415
|
|
|
393
416
|
export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): ValidationResult {
|
|
@@ -406,6 +429,32 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
|
|
|
406
429
|
return { ok: false, errors, warnings };
|
|
407
430
|
}
|
|
408
431
|
|
|
432
|
+
// Hardening for runtime-generated (untrusted) sub-flows: bound breadth and
|
|
433
|
+
// contain filesystem access. These do NOT apply to authored/saved flows.
|
|
434
|
+
if (opts.dynamic) {
|
|
435
|
+
if (flow.phases.length > MAX_DYNAMIC_PHASES) {
|
|
436
|
+
errors.push(`Dynamic sub-flow has too many phases (${flow.phases.length}, max ${MAX_DYNAMIC_PHASES})`);
|
|
437
|
+
}
|
|
438
|
+
if (typeof flow.concurrency === "number" && flow.concurrency > MAX_DYNAMIC_CONCURRENCY) {
|
|
439
|
+
errors.push(`Dynamic sub-flow concurrency too high (${flow.concurrency}, max ${MAX_DYNAMIC_CONCURRENCY})`);
|
|
440
|
+
}
|
|
441
|
+
const root = opts.cwd ? path.resolve(opts.cwd) : undefined;
|
|
442
|
+
for (const p of flow.phases) {
|
|
443
|
+
if (!p || typeof p !== "object") continue;
|
|
444
|
+
// Per-phase concurrency override is also capped.
|
|
445
|
+
if (typeof p.concurrency === "number" && p.concurrency > MAX_DYNAMIC_CONCURRENCY) {
|
|
446
|
+
errors.push(`Dynamic sub-flow phase '${p.id}': concurrency too high (${p.concurrency}, max ${MAX_DYNAMIC_CONCURRENCY})`);
|
|
447
|
+
}
|
|
448
|
+
// cwd containment: a generated phase may not escape the run's cwd.
|
|
449
|
+
if (typeof p.cwd === "string" && root) {
|
|
450
|
+
const resolved = path.resolve(root, p.cwd);
|
|
451
|
+
if (resolved !== root && !resolved.startsWith(root + path.sep)) {
|
|
452
|
+
errors.push(`Dynamic sub-flow phase '${p.id}': cwd '${p.cwd}' escapes the run directory`);
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
409
458
|
const ids = new Set<string>();
|
|
410
459
|
for (const p of flow.phases) {
|
|
411
460
|
if (!p || typeof p !== "object") {
|
|
@@ -439,7 +488,13 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
|
|
|
439
488
|
if (!p.task) errors.push(`Phase '${p.id}' (reduce) requires 'task'`);
|
|
440
489
|
}
|
|
441
490
|
if (type === "flow") {
|
|
442
|
-
|
|
491
|
+
const hasUse = typeof p.use === "string" && p.use.length > 0;
|
|
492
|
+
const hasDef = (p as { def?: unknown }).def !== undefined;
|
|
493
|
+
if (!hasUse && !hasDef) {
|
|
494
|
+
errors.push(`Phase '${p.id}' (flow) requires 'use' (a saved flow name) or 'def' (an inline definition)`);
|
|
495
|
+
} else if (hasUse && hasDef) {
|
|
496
|
+
errors.push(`Phase '${p.id}' (flow): 'use' and 'def' are mutually exclusive — provide exactly one`);
|
|
497
|
+
}
|
|
443
498
|
}
|
|
444
499
|
if (type === "loop") {
|
|
445
500
|
if (!p.task) errors.push(`Phase '${p.id}' (loop) requires 'task' (the iteration body)`);
|
package/extensions/store.ts
CHANGED
|
@@ -54,7 +54,8 @@ export interface PhaseState {
|
|
|
54
54
|
gate?: { verdict: "pass" | "block"; reason?: string };
|
|
55
55
|
/** Total subagent attempts incl. retries (when > calls, a retry happened). */
|
|
56
56
|
attempts?: number;
|
|
57
|
-
/** True when a map/parallel fan-out was cut short by the budget cap
|
|
57
|
+
/** True when a map/parallel fan-out was cut short by the budget cap, or by the
|
|
58
|
+
* dynamic sub-flow fan-out safety limit (MAX_DYNAMIC_MAP_ITEMS). */
|
|
58
59
|
budgetTruncated?: boolean;
|
|
59
60
|
/** Human-in-the-loop outcome (approval phases only). */
|
|
60
61
|
approval?: { decision: "approve" | "reject" | "edit"; note?: string; auto?: boolean };
|
|
@@ -62,6 +63,9 @@ export interface PhaseState {
|
|
|
62
63
|
loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" | "aborted" };
|
|
63
64
|
/** Tournament outcome (tournament phases only). */
|
|
64
65
|
tournament?: { variants: number; winner: number; mode: "best" | "aggregate"; reason?: string };
|
|
66
|
+
/** Set when a `flow { def }` inline sub-flow definition could not be resolved,
|
|
67
|
+
* parsed, validated, or verified. The phase fails-open: this records why. */
|
|
68
|
+
defError?: string;
|
|
65
69
|
/** Non-fatal diagnostic warnings accumulated during this phase (e.g.
|
|
66
70
|
* unresolved interpolation placeholders, suspicious templates). */
|
|
67
71
|
warnings?: string[];
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-taskflow",
|
|
3
|
-
"version": "0.0.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.0.19",
|
|
4
|
+
"description": "A declarative, verifiable graph of task nodes for the Pi coding agent — not a workflow you script, but a DAG you declare: statically verified before it runs, with dynamic fan-out, gates, isolated subagent context, resumable runs, and saveable commands.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
|
7
7
|
"pi",
|
|
@@ -33,12 +33,11 @@
|
|
|
33
33
|
"README.md",
|
|
34
34
|
"README.zh-CN.md",
|
|
35
35
|
"CHANGELOG.md",
|
|
36
|
-
"DESIGN.md",
|
|
37
36
|
"LICENSE"
|
|
38
37
|
],
|
|
39
38
|
"scripts": {
|
|
40
39
|
"typecheck": "tsc --noEmit",
|
|
41
|
-
"test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts",
|
|
40
|
+
"test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts test/flow-def.test.ts",
|
|
42
41
|
"test:e2e": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e.mts",
|
|
43
42
|
"test:dogfood-cache": "node --experimental-strip-types test/dogfood-cache.mts"
|
|
44
43
|
},
|
package/skills/taskflow/SKILL.md
CHANGED
|
@@ -79,15 +79,17 @@ Call the `taskflow` tool. To run a brand-new flow you write inline, pass
|
|
|
79
79
|
|
|
80
80
|
### Phase types
|
|
81
81
|
|
|
82
|
-
| type | meaning |
|
|
83
|
-
|
|
84
|
-
| `agent` | one subagent runs `task` |
|
|
85
|
-
| `parallel` | run `branches[]` concurrently |
|
|
86
|
-
| `map` | fan out over `over` (an array) — one subagent per item, `{item}` bound |
|
|
87
|
-
| `gate` | quality/review step that can **halt the flow**
|
|
88
|
-
| `reduce` | aggregate `from[]` phases into one output |
|
|
89
|
-
| `approval` | **human-in-the-loop** pause: ask a person to approve / reject / edit before continuing |
|
|
90
|
-
| `flow` | run a **
|
|
82
|
+
| type | meaning | details |
|
|
83
|
+
|------|---------|---------|
|
|
84
|
+
| `agent` | one subagent runs `task` | DSL shape |
|
|
85
|
+
| `parallel` | run `branches[]` concurrently | Conditional routing |
|
|
86
|
+
| `map` | fan out over `over` (an array) — one subagent per item, `{item}` bound | DSL shape |
|
|
87
|
+
| `gate` | quality/review step that can **halt the flow** | Gate phases |
|
|
88
|
+
| `reduce` | aggregate `from[]` phases into one output | DSL shape |
|
|
89
|
+
| `approval` | **human-in-the-loop** pause: ask a person to approve / reject / edit before continuing | Approval phases |
|
|
90
|
+
| `flow` | run a **sub-flow** as one phase — **saved** (`use`) or **runtime-generated** (`def`) | Sub-flows |
|
|
91
|
+
| `loop` | repeat a body until a condition / convergence / `maxIterations` | Loop phases |
|
|
92
|
+
| `tournament` | run N competing `variants`, a `judge` picks the best or aggregates | Tournament phases |
|
|
91
93
|
|
|
92
94
|
### Control-flow fields (any phase)
|
|
93
95
|
|
|
@@ -100,7 +102,9 @@ Call the `taskflow` tool. To run a brand-new flow you write inline, pass
|
|
|
100
102
|
### Conditional routing (when + gate/branches)
|
|
101
103
|
|
|
102
104
|
Pair `when` with an upstream phase that emits a decision to build real if/else
|
|
103
|
-
routing. Use `join: "any"` on the merge phase so it runs whichever branch fired
|
|
105
|
+
routing. Use `join: "any"` on the merge phase so it runs whichever branch fired. For
|
|
106
|
+
static (non-conditional) concurrency, a `parallel` phase runs fixed `branches[]`
|
|
107
|
+
instead — `{ "type": "parallel", "branches": [{"task":"..."}, {"task":"...","agent":"reviewer"}] }`.
|
|
104
108
|
|
|
105
109
|
```jsonc
|
|
106
110
|
{ "id": "triage", "type": "agent", "agent": "analyst", "output": "json",
|
|
@@ -133,15 +137,105 @@ deciding. The (interpolated) `task` is the prompt shown.
|
|
|
133
137
|
|
|
134
138
|
### Sub-flows (composition)
|
|
135
139
|
|
|
136
|
-
A `flow` phase runs another
|
|
137
|
-
output.
|
|
138
|
-
|
|
140
|
+
A `flow` phase runs another taskflow as a single phase and bubbles up its final
|
|
141
|
+
output. Two sources, **mutually exclusive**:
|
|
142
|
+
|
|
143
|
+
**Saved** (`use`) — run a previously saved flow by name. Pass args via `with`
|
|
144
|
+
(string values interpolate). Recursion is detected and rejected.
|
|
139
145
|
|
|
140
146
|
```jsonc
|
|
141
147
|
{ "id": "research", "type": "flow", "use": "deep-research",
|
|
142
148
|
"with": { "topic": "{item}" }, "dependsOn": ["plan"] }
|
|
143
149
|
```
|
|
144
150
|
|
|
151
|
+
**Runtime-generated** (`def`) — resolve a sub-flow *at runtime*, usually from an
|
|
152
|
+
upstream phase's JSON output. The runtime interpolates + JSON-parses the `def`,
|
|
153
|
+
**validates it** (cycles / dangling refs / duplicate ids), then runs it as a
|
|
154
|
+
nested sub-flow. This is how a planner decides *at runtime* what work to spawn —
|
|
155
|
+
the declarative answer to a code-mode `for`/`if` loop, with each generated plan
|
|
156
|
+
checked before it spends a token.
|
|
157
|
+
|
|
158
|
+
```jsonc
|
|
159
|
+
// 1) A planner emits a plan as JSON. 2) flow{def} runs it.
|
|
160
|
+
{ "id": "plan", "type": "agent", "agent": "planner", "output": "json",
|
|
161
|
+
"task": "Scan the repo. Output ONLY JSON {\"name\":\"audit\",\"phases\":[...]} — one audit phase per file." },
|
|
162
|
+
{ "id": "run", "type": "flow", "def": "{steps.plan.json}", "dependsOn": ["plan"], "final": true }
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
**LLM output contract for `def`:** the upstream phase must output a *full*
|
|
166
|
+
Taskflow `{"name":"...","phases":[...]}`, a bare `phases` array, or
|
|
167
|
+
`{"phases":[...]}` — pure JSON (a ```json fence is tolerated and stripped).
|
|
168
|
+
Use hyphens in ids, never underscores. Sub-flow phases reference each other in
|
|
169
|
+
their **own** `{steps.x.output}` namespace (no parent-id prefixing needed).
|
|
170
|
+
|
|
171
|
+
**Fail-open & limits:** if the `def` doesn't parse, has the wrong shape, or fails
|
|
172
|
+
validation, the phase fails *open* — it's marked failed with a `defError`, the
|
|
173
|
+
upstream output is preserved, and the run continues (use `optional: true` on the
|
|
174
|
+
flow phase so a bad plan never aborts the run). An **empty** `phases` array is a
|
|
175
|
+
valid no-op (the planner decided there's nothing to do). Inline nesting is capped
|
|
176
|
+
at `MAX_DYNAMIC_NESTING` (5) to bound runaway self-spawning.
|
|
177
|
+
|
|
178
|
+
**Iterative replanning** — pair `flow{def}` (or a JSON-emitting body) with `loop`
|
|
179
|
+
so round N's plan depends on round N-1's **result** (not a one-shot fan-out):
|
|
180
|
+
the declarative equivalent of `for (...) { read result; decide next }`. See
|
|
181
|
+
`examples/dynamic-plan-execute.json` and `examples/iterative-replan.json`.
|
|
182
|
+
|
|
183
|
+
### Loop phases (iterate until done)
|
|
184
|
+
|
|
185
|
+
A `loop` phase runs its body repeatedly, exposing each iteration's output as
|
|
186
|
+
`{steps.<thisId>.output}` / `.json` so the next round can react to the last. It
|
|
187
|
+
stops on the first of: `until` truthy, **convergence** (output stops changing),
|
|
188
|
+
or `maxIterations` (hard cap). This is the declarative "keep going until good
|
|
189
|
+
enough" — the runtime always terminates (the cap is mandatory).
|
|
190
|
+
|
|
191
|
+
- `until` — stop condition, same operators as `when` (a parse error stops the loop, fail-safe).
|
|
192
|
+
- `maxIterations` — hard iteration cap (required to bound the loop).
|
|
193
|
+
- `convergence` — `true` to stop early when an iteration's output equals the previous one.
|
|
194
|
+
|
|
195
|
+
```jsonc
|
|
196
|
+
{
|
|
197
|
+
"id": "refine",
|
|
198
|
+
"type": "loop",
|
|
199
|
+
"agent": "executor",
|
|
200
|
+
"maxIterations": 5,
|
|
201
|
+
"until": "{steps.refine.json.done} == true",
|
|
202
|
+
"convergence": true,
|
|
203
|
+
"task": "Improve the draft. When nothing else needs fixing, output JSON {\"done\":true,\"draft\":\"...\"}; otherwise {\"done\":false,\"draft\":\"...\"}.",
|
|
204
|
+
"output": "json",
|
|
205
|
+
"final": true
|
|
206
|
+
}
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
For data-dependent **replanning** each round, pair a `loop` body that emits a
|
|
210
|
+
plan with `flow{def}` (see Sub-flows above). See `examples/iterative-replan.json`.
|
|
211
|
+
|
|
212
|
+
### Tournament phases (N variants, judge picks best)
|
|
213
|
+
|
|
214
|
+
A `tournament` phase runs `variants` competing attempts in parallel, then a
|
|
215
|
+
**judge** sub-phase selects the winner (`mode: "best"`) or merges them
|
|
216
|
+
(`mode: "aggregate"`). Use it when one shot is unreliable and you want the best
|
|
217
|
+
of several drafts, or a synthesis of diverse approaches.
|
|
218
|
+
|
|
219
|
+
- `variants` — the competing attempts: a number (run the same `task` N times) or an array of `{task, agent?}` for genuinely different approaches.
|
|
220
|
+
- `mode` — `"best"` (judge picks one winner, default) or `"aggregate"` (judge merges all into one output).
|
|
221
|
+
- `judge` — the judge's rubric/instructions (how to choose or merge).
|
|
222
|
+
- `judgeAgent` — *(optional)* the agent that runs the judge step; defaults to the phase `agent`.
|
|
223
|
+
- Fail-open: if the judge's pick is unparseable, variant 1 is returned (work is never lost).
|
|
224
|
+
|
|
225
|
+
```jsonc
|
|
226
|
+
{
|
|
227
|
+
"id": "headline",
|
|
228
|
+
"type": "tournament",
|
|
229
|
+
"agent": "executor",
|
|
230
|
+
"variants": 3,
|
|
231
|
+
"mode": "best",
|
|
232
|
+
"judge": "Pick the clearest, most accurate headline. End with: WINNER: <n>.",
|
|
233
|
+
"task": "Write one headline for the article below.\n\n{steps.draft.output}",
|
|
234
|
+
"dependsOn": ["draft"],
|
|
235
|
+
"final": true
|
|
236
|
+
}
|
|
237
|
+
```
|
|
238
|
+
|
|
145
239
|
### Budget (cost / token caps)
|
|
146
240
|
|
|
147
241
|
Add a run-wide ceiling at the top level. When accumulated cost/tokens exceed it,
|
|
@@ -172,6 +266,30 @@ Review the audit results below. If any endpoint is missing auth, end with
|
|
|
172
266
|
{steps.audit.output}
|
|
173
267
|
```
|
|
174
268
|
|
|
269
|
+
**Zero-token machine checks (`eval`).** Before spending a token on the LLM gate,
|
|
270
|
+
list machine-checkable assertions in `eval`. If **all** pass, the gate
|
|
271
|
+
auto-passes with **no LLM call**; if any fails, it falls through to the LLM
|
|
272
|
+
`task` (the qualitative residue). Each entry supports the `when` operators plus
|
|
273
|
+
`X contains Y` (substring). A parse error fails **open** (consistent with the
|
|
274
|
+
gate invariant).
|
|
275
|
+
|
|
276
|
+
```jsonc
|
|
277
|
+
{ "id": "quality", "type": "gate", "dependsOn": ["build","test"],
|
|
278
|
+
"eval": ["{steps.build.output} contains BUILD SUCCESS", "{steps.test.json.failures} == 0"],
|
|
279
|
+
"task": "Review the diff for subtle logic errors a linter can't catch. VERDICT: PASS or BLOCK." }
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
**Self-healing (`onBlock: "retry"`).** By default a blocking gate halts the run
|
|
283
|
+
(`onBlock: "halt"`). With `onBlock: "retry"` the gate instead **re-runs its
|
|
284
|
+
upstream `dependsOn` phases and re-evaluates**, up to `retry.max` rounds (or
|
|
285
|
+
until PASS / budget / abort) — a generate→critique→regenerate rework loop.
|
|
286
|
+
|
|
287
|
+
```jsonc
|
|
288
|
+
{ "id": "spec-gate", "type": "gate", "onBlock": "retry", "retry": { "max": 3 },
|
|
289
|
+
"dependsOn": ["implement"],
|
|
290
|
+
"task": "Does the implementation satisfy ALL acceptance criteria? VERDICT: PASS or BLOCK with reasons." }
|
|
291
|
+
```
|
|
292
|
+
|
|
175
293
|
### Structured-verify phases (v0.0.8.1)
|
|
176
294
|
|
|
177
295
|
A "verify" phase typically runs `npx tsc --noEmit && npm test && git diff --stat`
|
|
@@ -309,16 +427,26 @@ variables, and storage paths — read `configuration.md` (next to this file).
|
|
|
309
427
|
Quick reference:
|
|
310
428
|
|
|
311
429
|
- **Flow:** `name`, `description`, `concurrency` (default 8), `budget` (`maxUSD`/`maxTokens`), `agentScope` (user|project|both), `args`, `strictInterpolation`.
|
|
312
|
-
- **Phase:** `model`, `thinking`, `tools` (whitelist), `cwd`, `output:"json"`, `concurrency` (map/parallel fan-out), `when`, `join` (all|any), `retry`, `use`/`with` (flow), `final`.
|
|
430
|
+
- **Phase:** `model`, `thinking`, `tools` (whitelist), `cwd`, `output:"json"`, `concurrency` (map/parallel fan-out), `when`, `join` (all|any), `retry`, `use`/`with` (flow), `optional` (fail-soft — a failed/blocked phase won't abort the run), `final`.
|
|
431
|
+
- **Cross-run caching:** add `cache: { "scope": "cross-run" }` to a phase to memoize its output across runs (same input → instant reuse, zero tokens). See `configuration.md` for `ttl`, `fingerprint` (git/glob/file/env invalidation), and scope options.
|
|
313
432
|
- **Precedence (model/thinking/tools):** phase value → agent frontmatter (resolved via `modelRoles`) → global/default.
|
|
314
433
|
- **Concurrency:** same-layer phases use `flow.concurrency`; a `map`/`parallel` phase uses `phase.concurrency ?? flow.concurrency ?? 8`.
|
|
315
434
|
|
|
316
435
|
## Actions
|
|
317
436
|
|
|
318
|
-
- `action: "run"` — run inline `define` or a saved `name` (with optional `args`).
|
|
319
|
-
- `action: "save"` — persist `define` (scope `project` or `user`); becomes `/tf:<name>`.
|
|
320
|
-
- `action: "resume"` — continue a paused/failed run by `runId
|
|
321
|
-
- `action: "list"` — list saved flows.
|
|
437
|
+
- `action: "run"` — run an inline `define` (a one-off DAG) **or** a saved `name` (with optional `args`). Use `define` for an ad-hoc flow; use `name` to invoke something previously saved.
|
|
438
|
+
- `action: "save"` — persist `define` (scope `project` — default, committed/shared — or `user`); it becomes `/tf:<name>`. On a name collision, project overrides user.
|
|
439
|
+
- `action: "resume"` — continue a paused/failed run by `runId`.
|
|
440
|
+
- `action: "list"` — list saved flows. `action: "verify"` — static-check a `define` (zero tokens). `action: "agents"` — list available agents.
|
|
441
|
+
|
|
442
|
+
## Operating a run (lifecycle, resume, inspection)
|
|
443
|
+
|
|
444
|
+
A run moves through: **running →** `completed` (a `final` phase produced output) **/** `blocked` (a gate emitted BLOCK, an `approval` was rejected, or the `budget` cap was hit) **/** `failed` (a non-`optional` phase errored) **/** `paused` (the run was aborted). `failed` and `paused` runs are resumable; `blocked` is terminal (fix the gate/budget and re-run).
|
|
445
|
+
|
|
446
|
+
- **Resume is cache-aware.** `action: "resume"` re-runs only what didn't finish: every phase already `done` is reused from its recorded output (within-run cache), so resuming after a crash or a `blocked`/`failed` stop never repeats completed work. A phase that was mid-flight is re-executed cleanly (stale `error`/`endedAt` are cleared first).
|
|
447
|
+
- **When to resume vs. re-run.** Resume when the inputs are unchanged and you just want to continue/retry the tail (fixed a gate, raised the budget, approved a checkpoint). Re-run from scratch when the task or upstream inputs changed — resume would reuse now-stale outputs. (For reuse *across* runs, opt a phase into `cache: {scope:"cross-run"}` — see configuration.md.)
|
|
448
|
+
- **Budget mid-run.** When the run-wide `budget` is exceeded, remaining phases are skipped and an in-flight `map`/`parallel` stops spawning new items; the run ends `blocked` with the partial outputs preserved.
|
|
449
|
+
- **Inspect runs.** `/tf runs` lists recent runs with status; `/tf show <name>` prints a saved flow's definition. Run state lives at `<project .pi>/taskflows/runs/<runId>.json` (gitignored).
|
|
322
450
|
|
|
323
451
|
## User commands
|
|
324
452
|
|