pi-taskflow 0.0.16 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,8 @@ import type { AgentConfig } from "./agents.ts";
16
16
  import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse, tryEvaluateCondition } from "./interpolate.ts";
17
17
  import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
18
18
  import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
19
- import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode } from "./schema.ts";
19
+ import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, MAX_DYNAMIC_MAP_ITEMS, MAX_DYNAMIC_NESTING, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode, validateTaskflow } from "./schema.ts";
20
+ import { verifyTaskflow } from "./verify.ts";
20
21
  import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
21
22
  import { CacheStore, resolveFingerprint } from "./cache.ts";
22
23
 
@@ -70,8 +71,17 @@ function buildInterpolationContext(
70
71
  ): InterpolationContext {
71
72
  const steps: Record<string, { output: string; json?: unknown }> = {};
72
73
  for (const [id, ps] of Object.entries(state.phases)) {
73
- if (ps.status === "done" && ps.output !== undefined) {
74
- steps[id] = { output: ps.output, json: ps.json };
74
+ // Include both done AND failed phases so downstream phases can see
75
+ // error info. Skipped phases (upstream failure cascade) are excluded.
76
+ if (ps.status === "done" || ps.status === "failed") {
77
+ if (ps.output !== undefined) {
78
+ steps[id] = { output: ps.output, json: ps.json };
79
+ } else if (ps.status === "failed") {
80
+ // M-3: Failed phases without output get a placeholder so
81
+ // downstream references like {steps.X.output} resolve to a
82
+ // sensible value instead of leaving the raw placeholder intact.
83
+ steps[id] = { output: "[previous phase failed]", json: undefined };
84
+ }
75
85
  }
76
86
  }
77
87
  return { args: state.args, steps, previousOutput, locals };
@@ -80,10 +90,16 @@ function buildInterpolationContext(
80
90
  function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJson: boolean): PhaseState {
81
91
  const failed = isFailed(r);
82
92
  const attempts = attemptsOf(r);
93
+ // For failed phases, embed the error info in the output so downstream
94
+ // phases (and the user) can see what went wrong. The raw r.output is
95
+ // often a useless placeholder like "(upstream error: subagent failed)".
96
+ const output = failed
97
+ ? r.errorMessage || r.stderr || r.output
98
+ : r.output;
83
99
  return {
84
100
  id,
85
101
  status: failed ? "failed" : "done",
86
- output: r.output,
102
+ output,
87
103
  json: parseJson && !failed ? safeParse(r.output) : undefined,
88
104
  usage: r.usage,
89
105
  model: r.model,
@@ -127,6 +143,63 @@ function failPhase(id: string, error: string): PhaseState {
127
143
  return { id, status: "failed", error, inputHash: hashInput(id, error), endedAt: Date.now(), usage: emptyUsage() };
128
144
  }
129
145
 
146
+ /**
147
+ * Normalize an inline `flow.def` payload into a full Taskflow shape.
148
+ * Accepts: a full Taskflow ({name?,phases:[...]}), a bare phases array, or
149
+ * {phases:[...]}. Returns undefined if the shape is unrecognized. A recognized
150
+ * shape with ZERO phases is returned as-is (caller treats it as a no-op) so the
151
+ * empty-plan case is distinguishable from a malformed one.
152
+ *
153
+ * The payload is deep-cloned so the runtime never shares references with (or
154
+ * mutates) the upstream phase's parsed JSON. Cloning also drops any non-own /
155
+ * prototype-shadowing `__proto__` own-property that a crafted JSON could carry.
156
+ */
157
+ function normalizeInlineDef(parsed: unknown, phaseId: string): Taskflow | undefined {
158
+ let shaped: Taskflow | undefined;
159
+ if (Array.isArray(parsed)) {
160
+ shaped = { name: `${phaseId}-inline`, phases: parsed as Taskflow["phases"] };
161
+ } else if (parsed && typeof parsed === "object") {
162
+ const o = parsed as Record<string, unknown>;
163
+ if (Array.isArray(o.phases)) {
164
+ const name = typeof o.name === "string" && o.name.length > 0 ? (o.name as string) : `${phaseId}-inline`;
165
+ shaped = { ...(o as object), name, phases: o.phases as Taskflow["phases"] } as Taskflow;
166
+ }
167
+ }
168
+ if (!shaped) return undefined;
169
+ // Deep clone via JSON round-trip: severs shared references with upstream output
170
+ // and drops any own "__proto__" key (JSON.stringify omits it). As belt-and-
171
+ // suspenders, also delete inert `constructor`/`prototype` own-keys a crafted
172
+ // payload could carry, so the returned object is clean of pollution vectors.
173
+ try {
174
+ const clone = JSON.parse(JSON.stringify(shaped)) as Record<string, unknown>;
175
+ for (const k of ["__proto__", "constructor", "prototype"]) {
176
+ if (Object.prototype.hasOwnProperty.call(clone, k)) delete clone[k];
177
+ }
178
+ return clone as unknown as Taskflow;
179
+ } catch {
180
+ return undefined;
181
+ }
182
+ }
183
+
184
+ /**
185
+ * Clamp a runtime-generated sub-flow's budget so it can only ever be TIGHTER
186
+ * than the parent's, never looser. A generated def cannot raise the spend cap by
187
+ * declaring its own large budget. Each dimension becomes min(child, parent).
188
+ */
189
+ function clampSubFlowBudget(sub: Taskflow, parentBudget: Budget | undefined): Taskflow {
190
+ if (!parentBudget) return sub;
191
+ const child = sub.budget;
192
+ const clamped: Budget = {
193
+ maxUSD: Math.min(child?.maxUSD ?? Infinity, parentBudget.maxUSD ?? Infinity),
194
+ maxTokens: Math.min(child?.maxTokens ?? Infinity, parentBudget.maxTokens ?? Infinity),
195
+ };
196
+ // Drop Infinity dimensions (no cap on that axis).
197
+ const budget: Budget = {};
198
+ if (Number.isFinite(clamped.maxUSD)) budget.maxUSD = clamped.maxUSD;
199
+ if (Number.isFinite(clamped.maxTokens)) budget.maxTokens = clamped.maxTokens;
200
+ return { ...sub, budget: budget.maxUSD === undefined && budget.maxTokens === undefined ? undefined : budget };
201
+ }
202
+
130
203
  /** Aggregate run cost/tokens so far and test against the budget. */
131
204
  function overBudget(state: RunState): { over: boolean; reason: string } {
132
205
  const budget: Budget | undefined = state.def.budget;
@@ -156,8 +229,13 @@ function mergePhaseState(
156
229
  // which model produced the merged output.
157
230
  const model = ran.find((r) => r.model !== undefined)?.model;
158
231
  // Combine outputs as a labelled list; also expose a JSON array of outputs.
232
+ // For failed items, use the error message instead of the useless placeholder.
159
233
  const combinedText = ran
160
- .map((r, i) => `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}\n\n${r.output}`)
234
+ .map((r, i) => {
235
+ const label = `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}`;
236
+ const content = isFailed(r) ? (r.errorMessage || r.stderr || r.output) : r.output;
237
+ return `${label}\n\n${content}`;
238
+ })
161
239
  .join("\n\n---\n\n");
162
240
  // Only successful runs feed the parsed JSON array (no error/skip strings).
163
241
  const jsonArray = parseJson ? ran.filter((r) => !isFailed(r)).map((r) => safeParse(r.output) ?? r.output) : undefined;
@@ -373,7 +451,14 @@ async function executePhase(
373
451
  // Backoff: prefer the explicit policy's curve when the phase defines one
374
452
  // (covers transient retries too, and keeps tests fast with backoffMs:0),
375
453
  // otherwise use the transient defaults.
376
- const baseMs = retry ? (retry.backoffMs ?? 0) : DEFAULT_TRANSIENT_BACKOFF_MS;
454
+ const baseMs = retry?.backoffMs != null ? retry.backoffMs : DEFAULT_TRANSIENT_BACKOFF_MS;
455
+ // Factor asymmetry is intentional:
456
+ // - Explicit retry: backoffMs * (factor ?? 1) ^ attempt — user's
457
+ // curve, defaults to flat (factor=1 → constant backoff).
458
+ // - Transient fallback: backoffMs * 2 ^ attempt — exponential.
459
+ // This lets users opt into flat retry with retry: {max:3} without
460
+ // specifying factor, while transient errors get proper exponential
461
+ // backoff.
377
462
  const factor = retry ? (retry.factor ?? 1) : DEFAULT_TRANSIENT_FACTOR;
378
463
  const wait = Math.min(60000, Math.round(baseMs * factor ** attempt));
379
464
  if (wait > 0) await delay(wait, deps.signal);
@@ -565,7 +650,15 @@ async function executePhase(
565
650
  if (type === "map") {
566
651
  const overResolved = interpolate(phase.over ?? "", ctx).text;
567
652
  // `over` may itself be a placeholder that resolved to a JSON string.
568
- const arr = coerceArray(safeParse(overResolved)) ?? coerceArray(directRef(phase.over ?? "", state));
653
+ let arr = coerceArray(safeParse(overResolved)) ?? coerceArray(directRef(phase.over ?? "", state));
654
+ // Breadth cap for untrusted dynamic sub-flows: a `def:` frame in the stack
655
+ // means we are inside a runtime-generated flow. Truncate giant fan-outs to
656
+ // bound subprocess blast radius (fail-open: keep the first N rather than abort).
657
+ let mapTruncated = false;
658
+ if (arr && (deps._stack ?? []).some((s) => s.startsWith("def:")) && arr.length > MAX_DYNAMIC_MAP_ITEMS) {
659
+ arr = arr.slice(0, MAX_DYNAMIC_MAP_ITEMS);
660
+ mapTruncated = true;
661
+ }
569
662
  if (!arr) {
570
663
  return {
571
664
  id: phase.id,
@@ -590,6 +683,12 @@ async function executePhase(
590
683
 
591
684
  const results = await runFanout(tasks);
592
685
  const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
686
+ if (mapTruncated) {
687
+ ps.warnings = [...(ps.warnings ?? []), `map fan-out truncated to MAX_DYNAMIC_MAP_ITEMS (${MAX_DYNAMIC_MAP_ITEMS}) inside a dynamic sub-flow`];
688
+ // NB: do NOT set ps.budgetTruncated — that field drives the run-level
689
+ // budget-blocked path and would mislabel the run as "budget exceeded".
690
+ // This is a safety fan-out cap, not a cost overrun; a warning is enough.
691
+ }
593
692
  recordCache(cc, ps);
594
693
  return ps;
595
694
  }
@@ -633,14 +732,96 @@ async function executePhase(
633
732
 
634
733
  if (type === "flow") {
635
734
  const ctx = buildInterpolationContext(state, previousOutput);
636
- const name = phase.use;
637
- if (!name) return failPhase(phase.id, `flow phase '${phase.id}' requires 'use'`);
638
- if (!deps.loadFlow) return failPhase(phase.id, `flow phase '${phase.id}': no sub-flow loader available`);
639
- const subDef = deps.loadFlow(name);
640
- if (!subDef) return failPhase(phase.id, `flow phase '${phase.id}': saved flow not found: '${name}'`);
735
+ const hasDef = (phase as { def?: unknown }).def !== undefined;
641
736
  const stack = deps._stack ?? [];
642
- if (name === state.flowName || stack.includes(name)) {
643
- return failPhase(phase.id, `flow phase '${phase.id}': recursive sub-flow ${[...stack, state.flowName, name].join(" -> ")}`);
737
+
738
+ let subDef: Taskflow | undefined;
739
+ let name: string;
740
+ let recursionKey: string; // identity used for cache key + recursion guard
741
+
742
+ if (hasDef) {
743
+ // --- Inline `def`: resolve at runtime, validate, fail-OPEN on any error. ---
744
+ // Fail-open contract: a bad def NEVER aborts the run. The phase resolves
745
+ // as `done` with empty output and a `defError` diagnostic, and the
746
+ // upstream output is preserved for downstream phases. (Authors who want
747
+ // a bad plan to be a hard failure can add their own gate downstream.)
748
+ const defFailOpen = (diag: string): PhaseState => ({
749
+ id: phase.id,
750
+ status: "done",
751
+ output: "",
752
+ json: parseJson ? safeParse("") : undefined,
753
+ usage: emptyUsage(),
754
+ inputHash: hashInput(phase.id, `flow-def-error:${diag}`),
755
+ endedAt: Date.now(),
756
+ defError: diag,
757
+ });
758
+ // Nesting guard: each `flow{def}` adds a frame to _stack; cap inline depth.
759
+ const inlineDepth = stack.filter((s) => s.startsWith("def:")).length;
760
+ if (inlineDepth >= MAX_DYNAMIC_NESTING) {
761
+ return defFailOpen(`inline sub-flow nesting exceeded MAX_DYNAMIC_NESTING (${MAX_DYNAMIC_NESTING}): depth ${inlineDepth}`);
762
+ }
763
+ const rawDef = (phase as { def?: unknown }).def;
764
+ // String defs are interpolated then JSON-parsed; objects are used directly.
765
+ let parsed: unknown;
766
+ if (typeof rawDef === "string") {
767
+ const resolved = interpolate(rawDef, ctx).text;
768
+ parsed = safeParse(resolved);
769
+ if (parsed === undefined) {
770
+ return defFailOpen("inline def string did not parse as JSON");
771
+ }
772
+ } else {
773
+ parsed = rawDef;
774
+ }
775
+ // Accept a full Taskflow, a bare phases array, or {phases:[...]}; wrap the latter two.
776
+ const wrapped = normalizeInlineDef(parsed, phase.id);
777
+ if (!wrapped) {
778
+ return defFailOpen("inline def is not a Taskflow, phases array, or {phases:[...]}");
779
+ }
780
+ // Empty plan is a valid no-op (a planner deciding there is nothing to do):
781
+ // succeed with empty output instead of failing validation on zero phases.
782
+ if (wrapped.phases.length === 0) {
783
+ return {
784
+ id: phase.id,
785
+ status: "done",
786
+ output: "",
787
+ json: parseJson ? safeParse("") : undefined,
788
+ usage: emptyUsage(),
789
+ inputHash: hashInput(phase.id, "flow-def-empty"),
790
+ endedAt: Date.now(),
791
+ };
792
+ }
793
+ // Validate with `dynamic` hardening (breadth caps + cwd containment) since
794
+ // this content is LLM-authored / untrusted. cwd anchors containment checks.
795
+ const dynCwd = phase.cwd ?? deps.cwd;
796
+ const v = validateTaskflow(wrapped, { dynamic: true, cwd: dynCwd });
797
+ if (!v.ok) {
798
+ return defFailOpen(`inline def failed validation: ${v.errors.join("; ")}`);
799
+ }
800
+ // Static verification (dead-ends, unreachable, gate-exhaustion, budget,
801
+ // concurrency). Only error-severity issues block; warnings are advisory.
802
+ const ver = verifyTaskflow({ name: wrapped.name, phases: wrapped.phases as Phase[], budget: wrapped.budget, concurrency: wrapped.concurrency });
803
+ if (!ver.ok) {
804
+ const errs = ver.issues.filter((i) => i.severity === "error").map((i) => i.message);
805
+ return defFailOpen(`inline def failed verification: ${errs.join("; ")}`);
806
+ }
807
+ // Budget containment: a generated def may not raise the parent's cap. Clamp
808
+ // each dimension to min(child, parent) so it can only ever be tighter.
809
+ subDef = clampSubFlowBudget(wrapped, state.def.budget);
810
+ name = subDef.name;
811
+ recursionKey = `def:${name}`;
812
+ } else {
813
+ // --- Saved flow via `use` (unchanged behavior). ---
814
+ const useName = phase.use;
815
+ if (!useName) return failPhase(phase.id, `flow phase '${phase.id}' requires 'use' or 'def'`);
816
+ if (!deps.loadFlow) return failPhase(phase.id, `flow phase '${phase.id}': no sub-flow loader available`);
817
+ subDef = deps.loadFlow(useName);
818
+ if (!subDef) return failPhase(phase.id, `flow phase '${phase.id}': saved flow not found: '${useName}'`);
819
+ name = useName;
820
+ recursionKey = useName;
821
+ }
822
+
823
+ if (recursionKey === state.flowName || stack.includes(recursionKey)) {
824
+ return failPhase(phase.id, `flow phase '${phase.id}': recursive sub-flow ${[...stack, state.flowName, recursionKey].join(" -> ")}`);
644
825
  }
645
826
  // Resolve sub-flow args (interpolate string values), then apply declared defaults.
646
827
  const provided: Record<string, unknown> = {};
@@ -648,7 +829,11 @@ async function executePhase(
648
829
  provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
649
830
  }
650
831
  const subArgs = resolveArgs(subDef, provided);
651
- const inputHash = cacheKey(cc, [phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs)]);
832
+ // For inline defs the cache identity must include the resolved def content so
833
+ // that a different generated plan yields a different key (and an identical plan
834
+ // hits cache). For saved flows the name is the identity (historical behavior).
835
+ const flowIdentity = hasDef ? `def:${JSON.stringify(subDef)}` : `flow:${name}`;
836
+ const inputHash = cacheKey(cc, [phase.id, flowIdentity, preRead, JSON.stringify(subArgs)]);
652
837
  const cached = cachedPhase(cc, inputHash);
653
838
  if (cached) return cached;
654
839
 
@@ -680,7 +865,7 @@ async function executePhase(
680
865
  // flow's cwd (not the caller's cwd).
681
866
  cwd: phase.cwd ?? deps.cwd,
682
867
  runTask: subRunTask,
683
- _stack: [...stack, state.flowName],
868
+ _stack: hasDef ? [...stack, state.flowName, recursionKey] : [...stack, state.flowName],
684
869
  persist: undefined,
685
870
  onProgress: () => {
686
871
  if (live) {
@@ -742,7 +927,7 @@ async function executePhase(
742
927
 
743
928
  for (let i = 1; i <= maxIters; i++) {
744
929
  if (deps.signal?.aborted) {
745
- stop = "failed";
930
+ stop = "aborted";
746
931
  break;
747
932
  }
748
933
  iterations = i;
@@ -788,14 +973,14 @@ async function executePhase(
788
973
  }
789
974
 
790
975
  const aggUsage = usages.length ? aggregateUsage(usages) : emptyUsage();
791
- if (failedResult) {
976
+ if (failedResult || stop === "failed" || stop === "aborted") {
792
977
  return {
793
978
  id: phase.id,
794
979
  status: "failed",
795
980
  output: lastOutput || undefined,
796
981
  usage: aggUsage,
797
- error: failedResult.errorMessage || failedResult.stderr || `loop '${phase.id}' iteration ${iterations} failed`,
798
- loop: { iterations, stop: "failed" },
982
+ error: failedResult?.errorMessage || failedResult?.stderr || (stop === "aborted" ? "Aborted" : `loop '${phase.id}' iteration ${iterations} failed`),
983
+ loop: { iterations, stop },
799
984
  warnings: loopWarnings.length ? loopWarnings : undefined,
800
985
  inputHash: hashInput(phase.id, "loop", phase.until ?? ""),
801
986
  endedAt: Date.now(),
@@ -868,6 +1053,22 @@ async function executePhase(
868
1053
  };
869
1054
  }
870
1055
 
1056
+ // Guard: skip the judge if the run is over budget or aborted.
1057
+ if (deps.signal?.aborted || overBudget(state).over) {
1058
+ return {
1059
+ id: phase.id,
1060
+ status: "done",
1061
+ output: ok[0].output,
1062
+ json: parseJson ? safeParse(ok[0].output) : undefined,
1063
+ usage: variantUsage,
1064
+ model: ok[0].model,
1065
+ warnings: ["judge skipped: run aborted or budget exceeded"],
1066
+ tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge skipped" },
1067
+ inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
1068
+ endedAt: Date.now(),
1069
+ };
1070
+ }
1071
+
871
1072
  // Build the judge prompt: label every variant output, then the rubric.
872
1073
  const labelled = ran
873
1074
  .map((r, i) => `### Variant ${i + 1}${isFailed(r) ? " (failed — ineligible)" : ""}\n\n${r.output}`)
@@ -1288,6 +1489,10 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
1288
1489
  if (!budgetReason) budgetReason = "fan-out truncated by budget";
1289
1490
  }
1290
1491
  // Budget ceiling: once exceeded, remaining phases are skipped.
1492
+ // For concurrent same-layer phases, the check runs after each phase
1493
+ // completes, so at most (concurrency - 1) extra phases may run before
1494
+ // the budget is detected as exceeded. This bounded overshoot is
1495
+ // acceptable: budgetBlocked prevents cascading into subsequent layers.
1291
1496
  const ob = overBudget(state);
1292
1497
  if (ob.over && !budgetBlocked) {
1293
1498
  budgetBlocked = true;
@@ -20,6 +20,19 @@ export type PhaseType = (typeof PHASE_TYPES)[number];
20
20
  export const LOOP_DEFAULT_MAX_ITERATIONS = 10;
21
21
  export const LOOP_HARD_MAX_ITERATIONS = 100;
22
22
 
23
+ /** Max depth of runtime `flow { def }` sub-flow nesting (runaway guard for
24
+ * LLM-generated sub-flows that themselves spawn more sub-flows). The existing
25
+ * `_stack` recursion check guards saved-flow cycles; this bounds inline depth. */
26
+ export const MAX_DYNAMIC_NESTING = 5;
27
+
28
+ /** Breadth caps applied ONLY to runtime-generated (`flow { def }`) sub-flows,
29
+ * whose content is LLM-authored and therefore untrusted. Authored/saved flows
30
+ * are not subject to these (a human reviewed them). They bound DoS blast radius
31
+ * from a model emitting a graph with thousands of phases / a giant fan-out. */
32
+ export const MAX_DYNAMIC_PHASES = 100;
33
+ export const MAX_DYNAMIC_MAP_ITEMS = 200;
34
+ export const MAX_DYNAMIC_CONCURRENCY = 16;
35
+
23
36
  /** Tournament competitor bounds. */
24
37
  export const TOURNAMENT_DEFAULT_VARIANTS = 3;
25
38
  export const TOURNAMENT_HARD_MAX_VARIANTS = 20;
@@ -119,6 +132,12 @@ const PhaseSchema = Type.Object(
119
132
 
120
133
  // sub-workflow (flow)
121
134
  use: Type.Optional(Type.String({ description: "[flow] Name of a saved taskflow to run as this phase" })),
135
+ def: Type.Optional(
136
+ Type.Unknown({
137
+ description:
138
+ "[flow] Inline sub-flow definition, resolved at runtime. Mutually exclusive with 'use'. A string is interpolated (e.g. '{steps.plan.json}') then JSON-parsed; an object is used directly. The result must be a Taskflow ({name,phases}) or a bare phases array / {phases:[...]} (auto-wrapped). Validated + verified before execution; on any failure the phase fails-open (defError) without aborting the run.",
139
+ }),
140
+ ),
122
141
  with: Type.Optional(
123
142
  Type.Record(Type.String(), Type.Unknown(), {
124
143
  description: "[flow] Args passed to the sub-flow (string values support interpolation)",
@@ -235,7 +254,7 @@ const ArgSpecSchema = Type.Object(
235
254
 
236
255
  export const TaskflowSchema = Type.Object(
237
256
  {
238
- name: Type.String({ description: "Workflow name (becomes /tf:<name> command when saved)" }),
257
+ name: Type.String({ minLength: 1, description: "Workflow name (becomes /tf:<name> command when saved)" }),
239
258
  description: Type.Optional(Type.String()),
240
259
  version: Type.Optional(Type.Number({ default: 1 })),
241
260
  args: Type.Optional(Type.Record(Type.String(), ArgSpecSchema, { description: "Declared invocation arguments" })),
@@ -388,6 +407,10 @@ export interface ValidationOptions {
388
407
  cwd?: string;
389
408
  /** Override the flow's own `strictInterpolation` flag for this validation call. */
390
409
  strict?: boolean;
410
+ /** When true, this flow is a runtime-generated (`flow { def }`) sub-flow whose
411
+ * content is LLM-authored / untrusted. Enables hardening checks: breadth caps
412
+ * (phase count, map items, concurrency) and cwd containment under `cwd`. */
413
+ dynamic?: boolean;
391
414
  }
392
415
 
393
416
  export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): ValidationResult {
@@ -406,6 +429,32 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
406
429
  return { ok: false, errors, warnings };
407
430
  }
408
431
 
432
+ // Hardening for runtime-generated (untrusted) sub-flows: bound breadth and
433
+ // contain filesystem access. These do NOT apply to authored/saved flows.
434
+ if (opts.dynamic) {
435
+ if (flow.phases.length > MAX_DYNAMIC_PHASES) {
436
+ errors.push(`Dynamic sub-flow has too many phases (${flow.phases.length}, max ${MAX_DYNAMIC_PHASES})`);
437
+ }
438
+ if (typeof flow.concurrency === "number" && flow.concurrency > MAX_DYNAMIC_CONCURRENCY) {
439
+ errors.push(`Dynamic sub-flow concurrency too high (${flow.concurrency}, max ${MAX_DYNAMIC_CONCURRENCY})`);
440
+ }
441
+ const root = opts.cwd ? path.resolve(opts.cwd) : undefined;
442
+ for (const p of flow.phases) {
443
+ if (!p || typeof p !== "object") continue;
444
+ // Per-phase concurrency override is also capped.
445
+ if (typeof p.concurrency === "number" && p.concurrency > MAX_DYNAMIC_CONCURRENCY) {
446
+ errors.push(`Dynamic sub-flow phase '${p.id}': concurrency too high (${p.concurrency}, max ${MAX_DYNAMIC_CONCURRENCY})`);
447
+ }
448
+ // cwd containment: a generated phase may not escape the run's cwd.
449
+ if (typeof p.cwd === "string" && root) {
450
+ const resolved = path.resolve(root, p.cwd);
451
+ if (resolved !== root && !resolved.startsWith(root + path.sep)) {
452
+ errors.push(`Dynamic sub-flow phase '${p.id}': cwd '${p.cwd}' escapes the run directory`);
453
+ }
454
+ }
455
+ }
456
+ }
457
+
409
458
  const ids = new Set<string>();
410
459
  for (const p of flow.phases) {
411
460
  if (!p || typeof p !== "object") {
@@ -439,7 +488,13 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
439
488
  if (!p.task) errors.push(`Phase '${p.id}' (reduce) requires 'task'`);
440
489
  }
441
490
  if (type === "flow") {
442
- if (!p.use) errors.push(`Phase '${p.id}' (flow) requires 'use' (a saved flow name)`);
491
+ const hasUse = typeof p.use === "string" && p.use.length > 0;
492
+ const hasDef = (p as { def?: unknown }).def !== undefined;
493
+ if (!hasUse && !hasDef) {
494
+ errors.push(`Phase '${p.id}' (flow) requires 'use' (a saved flow name) or 'def' (an inline definition)`);
495
+ } else if (hasUse && hasDef) {
496
+ errors.push(`Phase '${p.id}' (flow): 'use' and 'def' are mutually exclusive — provide exactly one`);
497
+ }
443
498
  }
444
499
  if (type === "loop") {
445
500
  if (!p.task) errors.push(`Phase '${p.id}' (loop) requires 'task' (the iteration body)`);
@@ -54,14 +54,18 @@ export interface PhaseState {
54
54
  gate?: { verdict: "pass" | "block"; reason?: string };
55
55
  /** Total subagent attempts incl. retries (when > calls, a retry happened). */
56
56
  attempts?: number;
57
- /** True when a map/parallel fan-out was cut short by the budget cap. */
57
+ /** True when a map/parallel fan-out was cut short by the budget cap, or by the
58
+ * dynamic sub-flow fan-out safety limit (MAX_DYNAMIC_MAP_ITEMS). */
58
59
  budgetTruncated?: boolean;
59
60
  /** Human-in-the-loop outcome (approval phases only). */
60
61
  approval?: { decision: "approve" | "reject" | "edit"; note?: string; auto?: boolean };
61
62
  /** Loop iteration accounting (loop phases only). */
62
- loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" };
63
+ loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" | "aborted" };
63
64
  /** Tournament outcome (tournament phases only). */
64
65
  tournament?: { variants: number; winner: number; mode: "best" | "aggregate"; reason?: string };
66
+ /** Set when a `flow { def }` inline sub-flow definition could not be resolved,
67
+ * parsed, validated, or verified. The phase fails-open: this records why. */
68
+ defError?: string;
65
69
  /** Non-fatal diagnostic warnings accumulated during this phase (e.g.
66
70
  * unresolved interpolation placeholders, suspicious templates). */
67
71
  warnings?: string[];
@@ -128,6 +132,9 @@ export const DEFAULT_RUN_AGE_DAYS = DEFAULT_MAX_AGE_DAYS;
128
132
  /** Last cleanup timestamp — module-level so it persists across calls. */
129
133
  let lastCleanupAt = 0;
130
134
 
135
+ /** Shared buffer for Atomics.wait in acquireLock busy-wait (Finding 6). */
136
+ const LOCK_WAIT_BUF = new Int32Array(new SharedArrayBuffer(4));
137
+
131
138
  // ---------------------------------------------------------------------------
132
139
  // Internal helpers — path construction & sanitisation
133
140
  // ---------------------------------------------------------------------------
@@ -142,7 +149,7 @@ let lastCleanupAt = 0;
142
149
  * bare-dot / leading-dot components after the character substitution so the
143
150
  * write path can never escape runs/ (risk-reviewer v0.0.9 audit, H1).
144
151
  */
145
- function safeFlowDirName(flowName: string): string {
152
+ export function safeFlowDirName(flowName: string): string {
146
153
  let safe = flowName.replace(/[^\w.-]+/g, "_");
147
154
  // Collapse leading dots: blocks ".", "..", and hidden-dir names like ".git".
148
155
  safe = safe.replace(/^\.+/, "_");
@@ -245,7 +252,7 @@ function acquireLock(lockPath: string, timeoutMs: number = LOCK_TIMEOUT_MS): voi
245
252
  throw new Error(`Lock timeout after ${timeoutMs}ms waiting for ${path.basename(lockPath)}`);
246
253
  }
247
254
  // Busy-wait with Atomics.wait (CPU-efficient sleep).
248
- Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, LOCK_POLL_MS);
255
+ Atomics.wait(LOCK_WAIT_BUF, 0, 0, LOCK_POLL_MS);
249
256
  }
250
257
  }
251
258
  }
@@ -392,11 +399,18 @@ function rebuildIndex(runsRoot: string): RunIndexEntry[] {
392
399
  } catch { /* skip corrupt */ }
393
400
  }
394
401
 
395
- const result = Array.from(entries.values());
396
- // Persist the rebuilt index under the index lock so it does not race a
397
- // concurrent updateIndexEntry / cleanup write (M1).
398
- withLock(indexLockPath(runsRoot), () => writeIndex(runsRoot, result));
399
- return result;
402
+ const scanned = Array.from(entries.values());
403
+ // Persist the rebuilt index under the index lock. Re-read the current
404
+ // index inside the lock and merge by runId so concurrent writes are not
405
+ // clobbered scanned entries win on conflict (Finding 5).
406
+ withLock(indexLockPath(runsRoot), () => {
407
+ const currentIndex = readIndex(runsRoot);
408
+ const merged = new Map<string, RunIndexEntry>();
409
+ for (const e of currentIndex) merged.set(e.runId, e);
410
+ for (const e of scanned) merged.set(e.runId, e); // scanned wins
411
+ writeIndex(runsRoot, Array.from(merged.values()));
412
+ });
413
+ return scanned;
400
414
  }
401
415
 
402
416
  // ---------------------------------------------------------------------------
@@ -422,7 +436,8 @@ function cleanupTerminalRuns(
422
436
  maxKeep: number = DEFAULT_MAX_KEPT_TERMINAL,
423
437
  maxAgeDays: number = DEFAULT_MAX_AGE_DAYS,
424
438
  ): void {
425
- const now = Date.now();
439
+ const cleanupStarted = Date.now();
440
+ const now = cleanupStarted;
426
441
  if (now - lastCleanupAt < CLEANUP_INTERVAL_MS) return;
427
442
  lastCleanupAt = now;
428
443
 
@@ -473,6 +488,8 @@ function cleanupTerminalRuns(
473
488
  // Delete run files + lock files (outside the index lock).
474
489
  for (const e of toRemove) {
475
490
  const filePath = path.join(runsRoot, e.relPath);
491
+ // Race guard: skip files modified after cleanup started (Finding 2).
492
+ try { if (fs.statSync(filePath).mtimeMs > cleanupStarted) continue; } catch { continue; }
476
493
  try { fs.unlinkSync(filePath); } catch { /* already gone */ }
477
494
  // Also remove any orphaned lock file.
478
495
  try { fs.unlinkSync(filePath + ".lock"); } catch { /* ignore */ }
@@ -566,16 +583,19 @@ export function saveFlow(
566
583
  scope: "user" | "project" = "project",
567
584
  ): { filePath: string } {
568
585
  const dir = scope === "user" ? userFlowsDir() : (findProjectFlowsDir(cwd, true) ?? path.join(cwd, ".pi", "taskflows"));
586
+ if (!def.name || def.name.trim().length === 0) throw new Error("Flow name must not be empty");
569
587
  fs.mkdirSync(dir, { recursive: true });
570
- const safe = def.name.replace(/[^\w.-]+/g, "_");
588
+ const safe = safeFlowDirName(def.name);
571
589
  const filePath = path.join(dir, `${safe}.json`);
572
- writeFileAtomic(filePath, `${JSON.stringify(def, null, 2)}\n`);
590
+ const fileLockPath = filePath + ".lock";
591
+ withLock(fileLockPath, () => { writeFileAtomic(filePath, `${JSON.stringify(def, null, 2)}\n`); });
573
592
 
574
- // One-shot: let the user know we're creating a .pi/ directory on first save.
593
+ // One-shot: let the user know about .pi/ directory on first save (Finding 8).
575
594
  if (!_piCreationHinted) {
576
595
  _piCreationHinted = true;
596
+ const piExisted = fs.existsSync(path.join(dir, "..", ".."));
577
597
  console.warn(
578
- `[taskflow] Created .pi/taskflows/ for project-scoped flow storage. ` +
598
+ `[taskflow] ${piExisted ? "Using" : "Created"} .pi/taskflows/ for project-scoped flow storage. ` +
579
599
  `Add .pi/ to .gitignore if desired.`,
580
600
  );
581
601
  }
@@ -587,6 +607,8 @@ export function saveFlow(
587
607
  // --- Run state ---
588
608
 
589
609
  function runsDir(cwd: string): string {
610
+ // Safe non-null assertion: create=true guarantees a non-null return because
611
+ // findProjectFlowsDirInternal falls back to path.join(cwd, ".pi", "taskflows").
590
612
  const projDir = findProjectFlowsDir(cwd, true)!;
591
613
  return path.join(projDir, "runs");
592
614
  }
@@ -614,6 +636,9 @@ export function newRunId(flowName: string): string {
614
636
  * caller's reference.
615
637
  */
616
638
  export function saveRun(state: RunState, cleanup?: { maxKeep?: number; maxAgeDays?: number }): void {
639
+ // Reject unsafe runIds before any filesystem access (Finding 1).
640
+ if (!validateRunId(state.runId)) return;
641
+
617
642
  const root = runsDir(state.cwd);
618
643
  const flowDir = flowRunDir(root, state.flowName);
619
644
  fs.mkdirSync(flowDir, { recursive: true });
@@ -253,6 +253,7 @@ function detectBudgetOverflow(flow: VerifiableFlow): VerificationIssue[] {
253
253
  }
254
254
  }
255
255
 
256
+ const ESTIMATED_COST_PER_PHASE = 0.001; // $0.001 minimum per subagent call
256
257
  if (budget.maxTokens !== undefined && budget.maxTokens > 0 && minTokens > budget.maxTokens) {
257
258
  issues.push({
258
259
  message:
@@ -263,6 +264,16 @@ function detectBudgetOverflow(flow: VerifiableFlow): VerificationIssue[] {
263
264
  category: "budget-overflow",
264
265
  });
265
266
  }
267
+ if (budget.maxUSD !== undefined && budget.maxUSD > 0 && minTokens * ESTIMATED_COST_PER_PHASE > budget.maxUSD) {
268
+ issues.push({
269
+ message:
270
+ `Budget cap ($${budget.maxUSD}) is below the estimated minimum of ~$${(minTokens * ESTIMATED_COST_PER_PHASE).toFixed(3)} ` +
271
+ `for ${flow.phases.length} phase(s). The flow will likely be truncated before completion. ` +
272
+ `Increase maxUSD or reduce the number of phases.`,
273
+ severity: "warning",
274
+ category: "budget-overflow",
275
+ });
276
+ }
266
277
 
267
278
  return issues;
268
279
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "pi-taskflow",
3
- "version": "0.0.16",
4
- "description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
3
+ "version": "0.0.18",
4
+ "description": "A declarative, verifiable graph of task nodes for the Pi coding agent — not a workflow you script, but a DAG you declare: statically verified before it runs, with dynamic fan-out, gates, isolated subagent context, resumable runs, and saveable commands.",
5
5
  "keywords": [
6
6
  "pi-package",
7
7
  "pi",
@@ -33,12 +33,11 @@
33
33
  "README.md",
34
34
  "README.zh-CN.md",
35
35
  "CHANGELOG.md",
36
- "DESIGN.md",
37
36
  "LICENSE"
38
37
  ],
39
38
  "scripts": {
40
39
  "typecheck": "tsc --noEmit",
41
- "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts",
40
+ "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts test/flow-def.test.ts",
42
41
  "test:e2e": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e.mts",
43
42
  "test:dogfood-cache": "node --experimental-strip-types test/dogfood-cache.mts"
44
43
  },