pi-taskflow 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,11 +10,29 @@
10
10
  * result are skipped.
11
11
  */
12
12
 
13
+ import * as path from "node:path";
14
+ import * as fs from "node:fs";
13
15
  import type { AgentConfig } from "./agents.ts";
14
- import { coerceArray, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
15
- import { aggregateUsage, emptyUsage, isFailed, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult, type UsageStats } from "./runner.ts";
16
- import { dependenciesOf, finalPhase, type Phase, type Taskflow, topoLayers } from "./schema.ts";
17
- import { hashInput, type PhaseState, type RunState } from "./store.ts";
16
+ import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
17
+ import { isFailed, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
18
+ import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
19
+ import { type Budget, dependenciesOf, finalPhase, type Phase, resolveArgs, type Taskflow, topoLayers } from "./schema.ts";
20
+ import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
21
+
22
+ /** A human-in-the-loop approval request raised by an `approval` phase. */
23
+ export interface ApprovalRequest {
24
+ phaseId: string;
25
+ /** Interpolated prompt shown to the human. */
26
+ message: string;
27
+ /** Output of the immediately-upstream phase, for context. */
28
+ upstream?: string;
29
+ }
30
+
31
+ /** The human's decision. `edit` carries guidance passed downstream as the phase output. */
32
+ export interface ApprovalDecision {
33
+ decision: "approve" | "reject" | "edit";
34
+ note?: string;
35
+ }
18
36
 
19
37
  export interface RuntimeDeps {
20
38
  cwd: string;
@@ -27,6 +45,12 @@ export interface RuntimeDeps {
27
45
  onProgress?: (state: RunState) => void;
28
46
  /** Injectable task runner (defaults to spawning a real subagent). Enables testing. */
29
47
  runTask?: typeof runAgentTask;
48
+ /** Resolve an `approval` phase. Omit for non-interactive runs (auto-approve). */
49
+ requestApproval?: (req: ApprovalRequest) => Promise<ApprovalDecision>;
50
+ /** Resolve a saved taskflow by name for `flow` (sub-workflow) phases. */
51
+ loadFlow?: (name: string) => Taskflow | undefined;
52
+ /** Internal: sub-flow call stack, for recursion detection. */
53
+ _stack?: string[];
30
54
  }
31
55
 
32
56
  export interface RuntimeResult {
@@ -52,6 +76,7 @@ function buildInterpolationContext(
52
76
 
53
77
  function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJson: boolean): PhaseState {
54
78
  const failed = isFailed(r);
79
+ const attempts = attemptsOf(r);
55
80
  return {
56
81
  id,
57
82
  status: failed ? "failed" : "done",
@@ -59,12 +84,60 @@ function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJs
59
84
  json: parseJson && !failed ? safeParse(r.output) : undefined,
60
85
  usage: r.usage,
61
86
  model: r.model,
87
+ attempts: attempts > 1 ? attempts : undefined,
62
88
  error: failed ? r.errorMessage || r.stderr || r.output : undefined,
63
89
  inputHash,
64
90
  endedAt: Date.now(),
65
91
  };
66
92
  }
67
93
 
94
+ /** Attempts recorded by the retry wrapper (defaults to 1). */
95
+ function attemptsOf(r: RunResult): number {
96
+ const a = r.attempts;
97
+ return typeof a === "number" && a > 0 ? a : 1;
98
+ }
99
+
100
+ /** Cancellable delay used between retry attempts. */
101
+ function delay(ms: number, signal?: AbortSignal): Promise<void> {
102
+ return new Promise((resolve) => {
103
+ if (ms <= 0) return resolve();
104
+ let onAbort: (() => void) | undefined;
105
+ const t = setTimeout(() => {
106
+ if (signal && onAbort) signal.removeEventListener("abort", onAbort);
107
+ resolve();
108
+ }, ms);
109
+ if (signal) {
110
+ if (signal.aborted) {
111
+ clearTimeout(t);
112
+ return resolve();
113
+ }
114
+ onAbort = () => {
115
+ clearTimeout(t);
116
+ resolve();
117
+ };
118
+ signal.addEventListener("abort", onAbort, { once: true });
119
+ }
120
+ });
121
+ }
122
+
123
+ function failPhase(id: string, error: string): PhaseState {
124
+ return { id, status: "failed", error, inputHash: hashInput(id, error), endedAt: Date.now(), usage: emptyUsage() };
125
+ }
126
+
127
+ /** Aggregate run cost/tokens so far and test against the budget. */
128
+ function overBudget(state: RunState): { over: boolean; reason: string } {
129
+ const budget: Budget | undefined = state.def.budget;
130
+ if (!budget) return { over: false, reason: "" };
131
+ const u = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
132
+ if (budget.maxUSD !== undefined && u.cost > budget.maxUSD) {
133
+ return { over: true, reason: `cost $${u.cost.toFixed(3)} exceeded cap $${budget.maxUSD}` };
134
+ }
135
+ if (budget.maxTokens !== undefined && u.input + u.output > budget.maxTokens) {
136
+ return { over: true, reason: `tokens ${u.input + u.output} exceeded cap ${budget.maxTokens}` };
137
+ }
138
+ return { over: false, reason: "" };
139
+ }
140
+
68
141
  /** Merge several sub-results into a single PhaseState (for map/parallel). */
69
142
  function mergePhaseState(
70
143
  id: string,
@@ -72,27 +145,138 @@ function mergePhaseState(
72
145
  inputHash: string,
73
146
  parseJson: boolean,
74
147
  ): PhaseState {
75
- const anyFailed = results.some(isFailed);
148
+ const budgetSkips = results.filter((r) => r.stopReason === "budget-skipped");
149
+ const ran = results.filter((r) => r.stopReason !== "budget-skipped");
150
+ const anyFailed = ran.some(isFailed);
76
151
  const usage = aggregateUsage(results.map((r) => r.usage));
152
+ // B12: surface the model(s) used in the fan-out so consumers can show
153
+ // which model produced the merged output.
154
+ const model = ran.find((r) => r.model !== undefined)?.model;
77
155
  // Combine outputs as a labelled list; also expose a JSON array of outputs.
78
- const combinedText = results
79
- .map((r, i) => `### [${i + 1}/${results.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}\n\n${r.output}`)
156
+ const combinedText = ran
157
+ .map((r, i) => `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}\n\n${r.output}`)
80
158
  .join("\n\n---\n\n");
81
- const jsonArray = parseJson ? results.map((r) => safeParse(r.output) ?? r.output) : undefined;
82
- const failedCount = results.filter(isFailed).length;
159
+ // Only successful runs feed the parsed JSON array (no error/skip strings).
160
+ const jsonArray = parseJson ? ran.filter((r) => !isFailed(r)).map((r) => safeParse(r.output) ?? r.output) : undefined;
161
+ const failedCount = ran.filter(isFailed).length;
162
+ const attempts = results.reduce((sum, r) => sum + attemptsOf(r), 0);
163
+ const errors = ran.filter(isFailed).map((r) => `${r.agent}: ${r.errorMessage ?? r.stderr}`);
164
+ if (budgetSkips.length) errors.push(`${budgetSkips.length} item(s) skipped: budget exceeded`);
83
165
  return {
84
166
  id,
85
167
  status: anyFailed ? "failed" : "done",
86
168
  output: combinedText,
87
169
  json: jsonArray,
88
170
  usage,
89
- subProgress: { done: results.length, total: results.length, running: 0, failed: failedCount },
90
- error: anyFailed ? results.filter(isFailed).map((r) => `${r.agent}: ${r.errorMessage ?? r.stderr}`).join("; ") : undefined,
171
+ model,
172
+ attempts: attempts > results.length ? attempts : undefined,
173
+ budgetTruncated: budgetSkips.length > 0 || undefined,
174
+ subProgress: { done: ran.length, total: results.length, running: 0, failed: failedCount },
175
+ error: errors.length ? errors.join("; ") : undefined,
91
176
  inputHash,
92
177
  endedAt: Date.now(),
93
178
  };
94
179
  }
95
180
 
181
+ /**
182
+ * A live-update sink that mirrors a subagent's streaming progress into a single
183
+ * phase's state row, then notifies the TUI. Shared by all single-agent phases.
184
+ */
185
+ function liveSink(state: RunState, phaseId: string, emitProgress: () => void): (l: LiveUpdate) => void {
186
+ return (l: LiveUpdate) => {
187
+ const live = state.phases[phaseId];
188
+ if (live) {
189
+ live.liveText = l.text;
190
+ live.usage = l.usage;
191
+ live.model = l.model;
192
+ }
193
+ emitProgress();
194
+ };
195
+ }
196
+
197
+
198
+ /**
199
+ * Pre-read files listed in a phase's `context` field and return them as
200
+ * markdown code blocks. Handles:
201
+ * - literal paths
202
+ * - interpolation refs (e.g. `{steps.scout.json}` resolving to `["a.ts"]`)
203
+ * - per-file truncation via `contextLimit`
204
+ *
205
+ * The result is a single string that should be prepended to the phase task so
206
+ * the subagent never needs to spend turns on file exploration.
207
+ */
208
+ const CONTEXT_MAX_FILE_BYTES = 10 * 1024 * 1024; // 10 MB
209
+ const MAX_TOTAL_CONTEXT_CHARS = 200_000;
210
+
211
+ async function resolvePhaseContext(
212
+ phase: Phase,
213
+ ctx: InterpolationContext,
214
+ ): Promise<string> {
215
+ const entries = phase.context;
216
+ if (!entries || entries.length === 0) return "";
217
+ const limit = phase.contextLimit ?? 8000;
218
+
219
+ const paths: string[] = [];
220
+ for (const entry of entries) {
221
+ const r = interpolate(entry, ctx);
222
+ if (r.text !== entry) {
223
+ // Resolved — may be a JSON array from {steps.X.json}
224
+ const parsed = safeParse(r.text);
225
+ if (Array.isArray(parsed)) {
226
+ for (const item of parsed) {
227
+ if (typeof item === "string" && item.trim()) paths.push(item.trim());
228
+ }
229
+ } else if (typeof r.text === "string" && r.text.trim()) {
230
+ paths.push(r.text.trim());
231
+ }
232
+ } else {
233
+ // Unchanged — literal path
234
+ paths.push(entry);
235
+ }
236
+ }
237
+
238
+ const unique = Array.from(new Set(paths));
239
+
240
+ // Diagnose JSON blobs masquerading as file paths — common when a context
241
+ // entry like {steps.discover.output} resolves to {"files":[...]} instead
242
+ // of a flat path or JSON array. The author should use {steps.discover.json.files}.
243
+ const jsonBlobs = unique.filter((p) => p.startsWith("{"));
244
+ for (const blob of jsonBlobs) {
245
+ console.warn(
246
+ `[taskflow] Context entry "${blob.slice(0, 80)}…" looks like a JSON object, not a file path. ` +
247
+ `Use {steps.<id>.json.<field>} to extract a specific field.`,
248
+ );
249
+ }
250
+ const filtered = jsonBlobs.length ? unique.filter((p) => !p.startsWith("{")) : unique;
251
+
252
+ const blocks: string[] = [];
253
+ for (const p of filtered) {
254
+ try {
255
+ const abs = path.resolve(p);
256
+ const stat = fs.statSync(abs);
257
+ if (!stat.isFile()) continue;
258
+ if (stat.size > CONTEXT_MAX_FILE_BYTES) continue;
259
+ const content = fs.readFileSync(abs, "utf-8");
260
+ const truncated =
261
+ content.length > limit
262
+ ? content.slice(0, limit) + `\n... [truncated ${content.length - limit} chars]`
263
+ : content;
264
+ const ext = path.extname(p).slice(1) || "txt";
265
+ blocks.push(`## File: ${p}\n\n\`\`\`${ext}\n${truncated}\n\`\`\``);
266
+ } catch {
267
+ console.warn(`[taskflow] Skipped unreadable context file: ${p}`);
268
+ }
269
+ }
270
+
271
+ // Safety cap: truncate total context when too many files are listed.
272
+ let result = blocks.join("\n\n") + "\n\n";
273
+ if (result.length > MAX_TOTAL_CONTEXT_CHARS) {
274
+ result = result.slice(0, MAX_TOTAL_CONTEXT_CHARS) + `\n\n... [truncated ${result.length - MAX_TOTAL_CONTEXT_CHARS} total chars]`;
275
+ }
276
+ return result;
277
+ }
278
+
279
+
96
280
  async function executePhase(
97
281
  phase: Phase,
98
282
  state: RunState,
@@ -105,7 +289,13 @@ async function executePhase(
105
289
  const previousOutput = lastCompletedOutput(state, phase);
106
290
  const run = deps.runTask ?? runAgentTask;
107
291
 
108
- const runOne = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
292
+ // Resolve context pre-read files once, before any type branching.
293
+ // The content is prepended to every task so the subagent never spends
294
+ // turns on file exploration for files the flow author already knows.
295
+ const ctx = buildInterpolationContext(state, previousOutput);
296
+ const preRead = await resolvePhaseContext(phase, ctx);
297
+
298
+ const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
109
299
  run(
110
300
  deps.cwd,
111
301
  deps.agents,
@@ -122,6 +312,48 @@ async function executePhase(
122
312
  deps.globalThinking,
123
313
  );
124
314
 
315
+ // Wrap each subagent call in the phase's retry policy. Usage is summed across
316
+ // attempts; the attempt count rides along on the result for the TUI.
317
+ const retry = phase.retry;
318
+ const runOne = async (agentName: string, task: string, onLive?: (l: LiveUpdate) => void): Promise<RunResult> => {
319
+ const maxAttempts = Math.max(1, 1 + Math.max(0, Math.floor(retry?.max ?? 0)));
320
+ const usages: UsageStats[] = [];
321
+ let last: RunResult | undefined;
322
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
323
+ if (deps.signal?.aborted) break;
324
+ last = await baseRun(agentName, task, onLive);
325
+ usages.push(last.usage);
326
+ // B6: aggregate and surface cumulative usage before the retry decision,
327
+ // so the TUI / budget guard see the in-flight spend on every attempt.
328
+ const liveRetry = state.phases[phase.id];
329
+ if (liveRetry) liveRetry.usage = aggregateUsage(usages);
330
+ if (!isFailed(last)) break;
331
+ // Stop retrying on abort or once the run is over budget.
332
+ if (deps.signal?.aborted || overBudget(state).over) break;
333
+ if (attempt < maxAttempts - 1) {
334
+ const wait = Math.min(60000, Math.round((retry?.backoffMs ?? 0) * (retry?.factor ?? 1) ** attempt));
335
+ await delay(wait, deps.signal);
336
+ }
337
+ }
338
+ // Aborted before any attempt ran → return a clean aborted result (no crash).
339
+ if (!last) {
340
+ return {
341
+ agent: agentName,
342
+ task,
343
+ exitCode: 1,
344
+ output: "",
345
+ stderr: "Aborted before execution",
346
+ usage: emptyUsage(),
347
+ stopReason: "aborted",
348
+ errorMessage: "Aborted before execution",
349
+ attempts: 0,
350
+ };
351
+ }
352
+ if (usages.length > 1) last.usage = aggregateUsage(usages);
353
+ last.attempts = usages.length;
354
+ return last;
355
+ };
356
+
125
357
  const parseJson = phase.output === "json";
126
358
 
127
359
  // Runs a list of sub-tasks with live fan-out progress + aggregate live usage/activity.
@@ -145,6 +377,20 @@ async function executePhase(
145
377
  };
146
378
  refresh();
147
379
  return mapWithConcurrencyLimit(items, concurrency, async (it, idx) => {
380
+ // Budget guard: stop spawning new fan-out items once the run is over budget.
381
+ if (overBudget(state).over) {
382
+ done++;
383
+ refresh();
384
+ return {
385
+ agent: it.agent,
386
+ task: it.task,
387
+ exitCode: 0,
388
+ output: "(skipped: budget exceeded)",
389
+ stderr: "",
390
+ usage: emptyUsage(),
391
+ stopReason: "budget-skipped",
392
+ } satisfies RunResult;
393
+ }
148
394
  running++;
149
395
  refresh();
150
396
  const r = await runOne(it.agent, it.task, (l) => {
@@ -162,33 +408,30 @@ async function executePhase(
162
408
  });
163
409
  };
164
410
 
165
- if (type === "agent" || type === "gate") {
166
- const ctx = buildInterpolationContext(state, previousOutput);
411
+ // Single-agent phases: agent, gate, and reduce all run one subagent on an
412
+ // interpolated task. gate additionally parses a verdict; reduce simply pulls
413
+ // its inputs from `from` phases (already exposed via interpolation).
414
+ if (type === "agent" || type === "gate" || type === "reduce") {
167
415
  const { text } = interpolate(phase.task ?? "", ctx);
168
- const inputHash = hashInput(phase.id, phase.agent ?? "", text);
416
+ const fullTask = preRead + text;
417
+ const inputHash = hashInput(phase.id, phase.agent ?? "", fullTask);
169
418
  const cached = cachedPhase(prior, inputHash);
170
419
  if (cached) return cached;
171
420
 
172
- const live = state.phases[phase.id];
173
- const r = await runOne(phase.agent ?? defaultAgent(deps), text, (l) => {
174
- if (live) {
175
- live.liveText = l.text;
176
- live.usage = l.usage;
177
- live.model = l.model;
178
- }
179
- emitProgress();
180
- });
421
+ const r = await runOne(phase.agent ?? defaultAgent(deps), fullTask, liveSink(state, phase.id, emitProgress));
181
422
  const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
182
423
  if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
183
424
  return ps;
184
425
  }
185
426
 
186
427
  if (type === "parallel") {
187
- const ctx = buildInterpolationContext(state, previousOutput);
188
- const branches = (phase.branches ?? []).map((b) => ({
189
- agent: b.agent ?? phase.agent ?? defaultAgent(deps),
190
- task: interpolate(b.task, ctx).text,
191
- }));
428
+ const branches = (phase.branches ?? []).map((b) => {
429
+ const r = interpolate(b.task, ctx);
430
+ return {
431
+ agent: b.agent ?? phase.agent ?? defaultAgent(deps),
432
+ task: preRead + r.text,
433
+ };
434
+ });
192
435
  const inputHash = hashInput(phase.id, JSON.stringify(branches));
193
436
  const cached = cachedPhase(prior, inputHash);
194
437
  if (cached) return cached;
@@ -198,7 +441,6 @@ async function executePhase(
198
441
  }
199
442
 
200
443
  if (type === "map") {
201
- const ctx = buildInterpolationContext(state, previousOutput);
202
444
  const overResolved = interpolate(phase.over ?? "", ctx).text;
203
445
  // `over` may itself be a placeholder that resolved to a JSON string.
204
446
  const arr = coerceArray(safeParse(overResolved)) ?? coerceArray(directRef(phase.over ?? "", state));
@@ -217,7 +459,7 @@ async function executePhase(
217
459
  const localCtx = buildInterpolationContext(state, previousOutput, { [loopVar]: item });
218
460
  return {
219
461
  agent: phase.agent ?? defaultAgent(deps),
220
- task: interpolate(phase.task ?? "", localCtx).text,
462
+ task: preRead + interpolate(phase.task ?? "", localCtx).text,
221
463
  };
222
464
  });
223
465
  const inputHash = hashInput(phase.id, JSON.stringify(tasks));
@@ -228,24 +470,123 @@ async function executePhase(
228
470
  return mergePhaseState(phase.id, results, inputHash, parseJson);
229
471
  }
230
472
 
231
- if (type === "reduce") {
473
+ if (type === "approval") {
232
474
  const ctx = buildInterpolationContext(state, previousOutput);
233
- // Inputs for reduce come from `from` phases; interpolation already exposes them.
234
- const { text } = interpolate(phase.task ?? "", ctx);
235
- const inputHash = hashInput(phase.id, text);
475
+ const message = interpolate(phase.task ?? "Approve to continue?", ctx).text;
476
+ const inputHash = hashInput(phase.id, "approval", message);
477
+ const cached = cachedPhase(prior, inputHash);
478
+ if (cached) return cached;
479
+
480
+ // Non-interactive (headless/CI/tests): auto-approve, fail-open, but record it.
481
+ if (!deps.requestApproval) {
482
+ return {
483
+ id: phase.id,
484
+ status: "done",
485
+ output: "(auto-approved: no interactive approver available)",
486
+ approval: { decision: "approve", auto: true },
487
+ usage: emptyUsage(),
488
+ inputHash,
489
+ endedAt: Date.now(),
490
+ };
491
+ }
492
+ const decision = await deps.requestApproval({ phaseId: phase.id, message, upstream: previousOutput });
493
+ const note = decision.note?.trim();
494
+ const ps: PhaseState = {
495
+ id: phase.id,
496
+ status: "done",
497
+ output: note || `(${decision.decision})`,
498
+ approval: { decision: decision.decision, note },
499
+ usage: emptyUsage(),
500
+ inputHash,
501
+ endedAt: Date.now(),
502
+ };
503
+ // A rejection halts the flow via the same mechanism as a blocking gate.
504
+ if (decision.decision === "reject") {
505
+ ps.gate = { verdict: "block", reason: note || "Rejected by user" };
506
+ }
507
+ return ps;
508
+ }
509
+
510
+ if (type === "flow") {
511
+ const ctx = buildInterpolationContext(state, previousOutput);
512
+ const name = phase.use;
513
+ if (!name) return failPhase(phase.id, `flow phase '${phase.id}' requires 'use'`);
514
+ if (!deps.loadFlow) return failPhase(phase.id, `flow phase '${phase.id}': no sub-flow loader available`);
515
+ const subDef = deps.loadFlow(name);
516
+ if (!subDef) return failPhase(phase.id, `flow phase '${phase.id}': saved flow not found: '${name}'`);
517
+ const stack = deps._stack ?? [];
518
+ if (name === state.flowName || stack.includes(name)) {
519
+ return failPhase(phase.id, `flow phase '${phase.id}': recursive sub-flow ${[...stack, state.flowName, name].join(" -> ")}`);
520
+ }
521
+ // Resolve sub-flow args (interpolate string values), then apply declared defaults.
522
+ const provided: Record<string, unknown> = {};
523
+ for (const [k, v] of Object.entries(phase.with ?? {})) {
524
+ provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
525
+ }
526
+ const subArgs = resolveArgs(subDef, provided);
527
+ const inputHash = hashInput(phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs));
236
528
  const cached = cachedPhase(prior, inputHash);
237
529
  if (cached) return cached;
238
530
 
239
531
  const live = state.phases[phase.id];
240
- const r = await runOne(phase.agent ?? defaultAgent(deps), text, (l) => {
241
- if (live) {
242
- live.liveText = l.text;
243
- live.usage = l.usage;
244
- live.model = l.model;
245
- }
246
- emitProgress();
532
+ // Sub-flows enforce their own budget; if they declare none, inherit the
533
+ // parent cap as a soft per-flow ceiling (best-effort — spend does not cross
534
+ // flow boundaries, so the parent's already-spent total is not subtracted).
535
+ const subDefEffective = subDef.budget || !state.def.budget ? subDef : { ...subDef, budget: state.def.budget };
536
+ const subState: RunState = {
537
+ runId: newRunId(subDef.name),
538
+ flowName: subDef.name,
539
+ def: subDefEffective,
540
+ args: subArgs,
541
+ status: "running",
542
+ phases: {},
543
+ createdAt: Date.now(),
544
+ updatedAt: Date.now(),
545
+ cwd: phase.cwd ?? deps.cwd,
546
+ };
547
+ // B8: pass this flow phase's preRead content to every sub-flow phase by
548
+ // wrapping runTask — sub-phase preRead still gets prepended on top of it.
549
+ const baseRunTask = deps.runTask ?? runAgentTask;
550
+ const subRunTask: typeof runAgentTask = (cwd, agents, agentName, subTask, opts, globalThinking) =>
551
+ baseRunTask(cwd, agents, agentName, preRead + subTask, opts, globalThinking);
552
+ const subResult = await executeTaskflow(subState, {
553
+ ...deps,
554
+ runTask: subRunTask,
555
+ _stack: [...stack, state.flowName],
556
+ persist: undefined,
557
+ onProgress: () => {
558
+ if (live) {
559
+ const ph = Object.values(subState.phases);
560
+ live.subProgress = {
561
+ done: ph.filter((p) => p.status === "done").length,
562
+ total: subDef.phases.length,
563
+ running: ph.filter((p) => p.status === "running").length,
564
+ failed: ph.filter((p) => p.status === "failed").length,
565
+ };
566
+ const cur = ph.find((p) => p.status === "running");
567
+ if (cur) live.liveText = `↳ ${cur.id}${cur.liveText ? `: ${cur.liveText}` : ""}`;
568
+ live.usage = aggregateUsage(ph.map((p) => p.usage ?? emptyUsage()));
569
+ }
570
+ emitProgress();
571
+ },
247
572
  });
248
- return resultToPhaseState(phase.id, r, inputHash, parseJson);
573
+ const sp = Object.values(subState.phases);
574
+ return {
575
+ id: phase.id,
576
+ status: subResult.ok ? "done" : "failed",
577
+ output: subResult.finalOutput,
578
+ json: parseJson ? safeParse(subResult.finalOutput) : undefined,
579
+ usage: subResult.totalUsage,
580
+ subProgress: {
581
+ done: sp.filter((p) => p.status === "done").length,
582
+ total: subDef.phases.length,
583
+ running: 0,
584
+ failed: sp.filter((p) => p.status === "failed").length,
585
+ },
586
+ error: subResult.ok ? undefined : `sub-flow '${name}' ${subResult.state.status}`,
587
+ inputHash,
588
+ endedAt: Date.now(),
589
+ };
249
590
  }
250
591
 
251
592
  return {
@@ -259,7 +600,7 @@ async function executePhase(
259
600
 
260
601
  /** Resolve a `{steps.x.json}`-style ref directly to its parsed value (bypassing stringify). */
261
602
  function directRef(over: string, state: RunState): unknown {
262
- const m = over.match(/^\{steps\.([a-zA-Z0-9_]+)\.(output|json)(?:\.([a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)*))?\}$/);
603
+ const m = over.match(/^\{steps\.([a-zA-Z0-9_-]+)\.(output|json)(?:\.([a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*))?\}$/);
263
604
  if (!m) return undefined;
264
605
  const step = state.phases[m[1]];
265
606
  if (!step || step.status !== "done") return undefined;
@@ -329,6 +670,29 @@ function asReason(v: unknown): string | undefined {
329
670
  * Execute a full taskflow. Mutates and persists `state` as it progresses.
330
671
  */
331
672
  export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
673
+ const def: Taskflow = state.def;
674
+ try {
675
+ return await runTaskflowLayers(state, deps);
676
+ } catch (e) {
677
+ // A thrown phase must not leave the run wedged in "running" (which breaks
678
+ // resume). Mark any in-flight phase + the run as failed, persist, and return.
679
+ const message = e instanceof Error ? e.message : String(e);
680
+ for (const p of Object.values(state.phases)) {
681
+ if (p.status === "running") {
682
+ p.status = "failed";
683
+ p.error = p.error ?? message;
684
+ p.endedAt = Date.now();
685
+ }
686
+ }
687
+ state.status = "failed";
688
+ deps.persist?.(state);
689
+ deps.onProgress?.(state);
690
+ const totalUsage = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
691
+ return { state, finalOutput: `Taskflow '${def.name}' crashed: ${message}`, ok: false, totalUsage };
692
+ }
693
+ }
694
+
695
+ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
332
696
  const def: Taskflow = state.def;
333
697
  const layers = topoLayers(def.phases);
334
698
 
@@ -340,6 +704,14 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
340
704
  let gateBlocked = false;
341
705
  let gateReason = "";
342
706
  let gateOutput = "";
707
+ // `budgetBlocked` gates the skipping of remaining phases once the cap is hit.
708
+ // `budgetSkipped` records that a phase was *actually* skipped/truncated for
709
+ // budget — only then is the run terminal-status "blocked" (a cap crossed by the
710
+ // very last phase, with nothing left to skip, must NOT mark a good run failed).
711
+ let budgetBlocked = false;
712
+ let budgetSkipped = false;
713
+ let budgetReason = "";
714
+ const byId = new Map(def.phases.map((p) => [p.id, p]));
343
715
 
344
716
  for (const layer of layers) {
345
717
  if (deps.signal?.aborted) {
@@ -351,13 +723,36 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
351
723
  await mapWithConcurrencyLimit(layer, layerConcurrency, async (phase) => {
352
724
  // Snapshot prior state BEFORE marking running, so resume cache checks work.
353
725
  const prior = state.phases[phase.id];
354
- // Skip if a dependency failed, or an upstream gate blocked the flow.
355
- const failedDep = dependenciesOf(phase).some((d) => state.phases[d]?.status === "failed");
356
- if (gateBlocked || failedDep) {
726
+
727
+ // Determine whether this phase should run, or be skipped (and why).
728
+ const deps_ = dependenciesOf(phase);
729
+ const join = phase.join ?? "all";
730
+ // An `optional` dependency that failed still counts as satisfied.
731
+ const depOk = (d: string): boolean => {
732
+ const s = state.phases[d]?.status;
733
+ if (s === "done") return true;
734
+ if (s === "failed" && byId.get(d)?.optional) return true;
735
+ return false;
736
+ };
737
+ const depsSatisfied =
738
+ deps_.length === 0 ? true : join === "any" ? deps_.some(depOk) : deps_.every(depOk);
739
+
740
+ let skipReason: string | undefined;
741
+ if (gateBlocked) skipReason = `Gate blocked${gateReason ? `: ${gateReason}` : ""}`;
742
+ else if (budgetBlocked) skipReason = `Budget exceeded${budgetReason ? `: ${budgetReason}` : ""}`;
743
+ else if (!depsSatisfied)
744
+ skipReason = join === "any" ? "All dependencies failed or were skipped" : "Upstream dependency not satisfied";
745
+ else if (phase.when !== undefined) {
746
+ const condCtx = buildInterpolationContext(state, lastCompletedOutput(state, phase));
747
+ if (!evaluateCondition(phase.when, condCtx)) skipReason = `Condition not met: ${phase.when}`;
748
+ }
749
+
750
+ if (skipReason) {
751
+ if (skipReason.startsWith("Budget exceeded")) budgetSkipped = true;
357
752
  state.phases[phase.id] = {
358
753
  id: phase.id,
359
754
  status: "skipped",
360
- error: gateBlocked ? `Gate blocked${gateReason ? `: ${gateReason}` : ""}` : "Upstream dependency failed",
755
+ error: skipReason,
361
756
  endedAt: Date.now(),
362
757
  usage: emptyUsage(),
363
758
  };
@@ -379,27 +774,58 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
379
774
  // Preserve the phase start time: executePhase returns a fresh PhaseState
380
775
  // that omits startedAt (cached/resumed results carry their own).
381
776
  state.phases[phase.id] = ps.startedAt ? ps : { ...ps, startedAt };
382
- if ((phase.type ?? "agent") === "gate" && ps.gate?.verdict === "block") {
777
+ // A blocking verdict (gate phase OR a rejected approval) halts the flow.
778
+ const ptype = phase.type ?? "agent";
779
+ if (ps.gate?.verdict === "block" && (ptype === "gate" || ptype === "approval")) {
383
780
  gateBlocked = true;
384
781
  gateReason = ps.gate.reason ?? "";
385
782
  gateOutput = ps.output ?? "";
386
783
  }
784
+ // A fan-out cut short by the cap is itself a budget skip.
785
+ if (ps.budgetTruncated) {
786
+ budgetBlocked = true;
787
+ budgetSkipped = true;
788
+ if (!budgetReason) budgetReason = "fan-out truncated by budget";
789
+ }
790
+ // Budget ceiling: once exceeded, remaining phases are skipped.
791
+ const ob = overBudget(state);
792
+ if (ob.over && !budgetBlocked) {
793
+ budgetBlocked = true;
794
+ budgetReason = ob.reason;
795
+ }
387
796
  deps.persist?.(state);
388
797
  deps.onProgress?.(state);
389
798
  });
390
799
  }
391
800
 
392
801
  const fp = finalPhase(def.phases);
393
- const finalState = state.phases[fp.id];
394
- const anyFailed = Object.values(state.phases).some((p) => p.status === "failed");
395
-
396
- state.status = aborted ? "paused" : gateBlocked ? "blocked" : anyFailed ? "failed" : "completed";
802
+ let finalState = state.phases[fp.id];
803
+ // If the designated final phase produced no output (skipped/blocked), fall
804
+ // back to the last phase (in definition order) that actually completed.
805
+ if (!finalState || finalState.status !== "done") {
806
+ const doneInOrder = def.phases.map((p) => state.phases[p.id]).filter((p) => p?.status === "done");
807
+ if (doneInOrder.length) finalState = doneInOrder[doneInOrder.length - 1];
808
+ }
809
+ // A failed non-optional phase fails the run; optional failures are tolerated.
810
+ const anyFailed = Object.entries(state.phases).some(
811
+ ([id, p]) => p.status === "failed" && !byId.get(id)?.optional,
812
+ );
813
+
814
+ state.status = aborted
815
+ ? "paused"
816
+ : gateBlocked || budgetSkipped
817
+ ? "blocked"
818
+ : anyFailed
819
+ ? "failed"
820
+ : "completed";
397
821
  deps.persist?.(state);
398
822
  deps.onProgress?.(state);
399
823
 
400
824
  let finalOutput = finalState?.output ?? "(no output)";
401
- if (gateBlocked && (!finalState || finalState.status === "skipped")) {
825
+ if (gateBlocked) {
402
826
  finalOutput = `Gate blocked the workflow.${gateReason ? `\nReason: ${gateReason}` : ""}${gateOutput ? `\n\n${gateOutput}` : ""}`;
827
+ } else if (budgetSkipped) {
828
+ finalOutput = `Budget exceeded — run halted.${budgetReason ? `\nReason: ${budgetReason}` : ""}${finalState?.output ? `\n\n${finalState.output}` : ""}`;
403
829
  }
404
830
 
405
831
  const totalUsage = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));