pi-taskflow 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,10 +11,26 @@
11
11
  */
12
12
 
13
13
  import type { AgentConfig } from "./agents.ts";
14
- import { coerceArray, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
15
- import { aggregateUsage, emptyUsage, isFailed, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult, type UsageStats } from "./runner.ts";
16
- import { dependenciesOf, finalPhase, type Phase, type Taskflow, topoLayers } from "./schema.ts";
17
- import { hashInput, type PhaseState, type RunState } from "./store.ts";
14
+ import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
15
+ import { isFailed, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
16
+ import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
17
+ import { type Budget, dependenciesOf, finalPhase, type Phase, resolveArgs, type Taskflow, topoLayers } from "./schema.ts";
18
+ import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
19
+
20
+ /** A human-in-the-loop approval request raised by an `approval` phase. */
21
+ export interface ApprovalRequest {
22
+ phaseId: string;
23
+ /** Interpolated prompt shown to the human. */
24
+ message: string;
25
+ /** Output of the immediately-upstream phase, for context. */
26
+ upstream?: string;
27
+ }
28
+
29
+ /** The human's decision. `edit` carries guidance passed downstream as the phase output. */
30
+ export interface ApprovalDecision {
31
+ decision: "approve" | "reject" | "edit";
32
+ note?: string;
33
+ }
18
34
 
19
35
  export interface RuntimeDeps {
20
36
  cwd: string;
@@ -27,6 +43,12 @@ export interface RuntimeDeps {
27
43
  onProgress?: (state: RunState) => void;
28
44
  /** Injectable task runner (defaults to spawning a real subagent). Enables testing. */
29
45
  runTask?: typeof runAgentTask;
46
+ /** Resolve an `approval` phase. Omit for non-interactive runs (auto-approve). */
47
+ requestApproval?: (req: ApprovalRequest) => Promise<ApprovalDecision>;
48
+ /** Resolve a saved taskflow by name for `flow` (sub-workflow) phases. */
49
+ loadFlow?: (name: string) => Taskflow | undefined;
50
+ /** Internal: sub-flow call stack, for recursion detection. */
51
+ _stack?: string[];
30
52
  }
31
53
 
32
54
  export interface RuntimeResult {
@@ -52,6 +74,7 @@ function buildInterpolationContext(
52
74
 
53
75
  function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJson: boolean): PhaseState {
54
76
  const failed = isFailed(r);
77
+ const attempts = attemptsOf(r);
55
78
  return {
56
79
  id,
57
80
  status: failed ? "failed" : "done",
@@ -59,12 +82,60 @@ function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJs
59
82
  json: parseJson && !failed ? safeParse(r.output) : undefined,
60
83
  usage: r.usage,
61
84
  model: r.model,
85
+ attempts: attempts > 1 ? attempts : undefined,
62
86
  error: failed ? r.errorMessage || r.stderr || r.output : undefined,
63
87
  inputHash,
64
88
  endedAt: Date.now(),
65
89
  };
66
90
  }
67
91
 
92
+ /** Attempts recorded by the retry wrapper (defaults to 1). */
93
+ function attemptsOf(r: RunResult): number {
94
+ const a = r.attempts;
95
+ return typeof a === "number" && a > 0 ? a : 1;
96
+ }
97
+
98
+ /** Cancellable delay used between retry attempts. */
99
+ function delay(ms: number, signal?: AbortSignal): Promise<void> {
100
+ return new Promise((resolve) => {
101
+ if (ms <= 0) return resolve();
102
+ let onAbort: (() => void) | undefined;
103
+ const t = setTimeout(() => {
104
+ if (signal && onAbort) signal.removeEventListener("abort", onAbort);
105
+ resolve();
106
+ }, ms);
107
+ if (signal) {
108
+ if (signal.aborted) {
109
+ clearTimeout(t);
110
+ return resolve();
111
+ }
112
+ onAbort = () => {
113
+ clearTimeout(t);
114
+ resolve();
115
+ };
116
+ signal.addEventListener("abort", onAbort, { once: true });
117
+ }
118
+ });
119
+ }
120
+
121
+ function failPhase(id: string, error: string): PhaseState {
122
+ return { id, status: "failed", error, inputHash: hashInput(id, error), endedAt: Date.now(), usage: emptyUsage() };
123
+ }
124
+
125
+ /** Aggregate run cost/tokens so far and test against the budget. */
126
+ function overBudget(state: RunState): { over: boolean; reason: string } {
127
+ const budget: Budget | undefined = state.def.budget;
128
+ if (!budget) return { over: false, reason: "" };
129
+ const u = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
130
+ if (budget.maxUSD !== undefined && u.cost > budget.maxUSD) {
131
+ return { over: true, reason: `cost $${u.cost.toFixed(3)} exceeded cap $${budget.maxUSD}` };
132
+ }
133
+ if (budget.maxTokens !== undefined && u.input + u.output > budget.maxTokens) {
134
+ return { over: true, reason: `tokens ${u.input + u.output} exceeded cap ${budget.maxTokens}` };
135
+ }
136
+ return { over: false, reason: "" };
137
+ }
138
+
68
139
  /** Merge several sub-results into a single PhaseState (for map/parallel). */
69
140
  function mergePhaseState(
70
141
  id: string,
@@ -72,27 +143,51 @@ function mergePhaseState(
72
143
  inputHash: string,
73
144
  parseJson: boolean,
74
145
  ): PhaseState {
75
- const anyFailed = results.some(isFailed);
146
+ const budgetSkips = results.filter((r) => r.stopReason === "budget-skipped");
147
+ const ran = results.filter((r) => r.stopReason !== "budget-skipped");
148
+ const anyFailed = ran.some(isFailed);
76
149
  const usage = aggregateUsage(results.map((r) => r.usage));
77
150
  // Combine outputs as a labelled list; also expose a JSON array of outputs.
78
- const combinedText = results
79
- .map((r, i) => `### [${i + 1}/${results.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}\n\n${r.output}`)
151
+ const combinedText = ran
152
+ .map((r, i) => `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}\n\n${r.output}`)
80
153
  .join("\n\n---\n\n");
81
- const jsonArray = parseJson ? results.map((r) => safeParse(r.output) ?? r.output) : undefined;
82
- const failedCount = results.filter(isFailed).length;
154
+ // Only successful runs feed the parsed JSON array (no error/skip strings).
155
+ const jsonArray = parseJson ? ran.filter((r) => !isFailed(r)).map((r) => safeParse(r.output) ?? r.output) : undefined;
156
+ const failedCount = ran.filter(isFailed).length;
157
+ const attempts = results.reduce((sum, r) => sum + attemptsOf(r), 0);
158
+ const errors = ran.filter(isFailed).map((r) => `${r.agent}: ${r.errorMessage ?? r.stderr}`);
159
+ if (budgetSkips.length) errors.push(`${budgetSkips.length} item(s) skipped: budget exceeded`);
83
160
  return {
84
161
  id,
85
162
  status: anyFailed ? "failed" : "done",
86
163
  output: combinedText,
87
164
  json: jsonArray,
88
165
  usage,
89
- subProgress: { done: results.length, total: results.length, running: 0, failed: failedCount },
90
- error: anyFailed ? results.filter(isFailed).map((r) => `${r.agent}: ${r.errorMessage ?? r.stderr}`).join("; ") : undefined,
166
+ attempts: attempts > results.length ? attempts : undefined,
167
+ budgetTruncated: budgetSkips.length > 0 || undefined,
168
+ subProgress: { done: ran.length, total: results.length, running: 0, failed: failedCount },
169
+ error: errors.length ? errors.join("; ") : undefined,
91
170
  inputHash,
92
171
  endedAt: Date.now(),
93
172
  };
94
173
  }
95
174
 
175
+ /**
176
+ * A live-update sink that mirrors a subagent's streaming progress into a single
177
+ * phase's state row, then notifies the TUI. Shared by all single-agent phases.
178
+ */
179
+ function liveSink(state: RunState, phaseId: string, emitProgress: () => void): (l: LiveUpdate) => void {
180
+ return (l: LiveUpdate) => {
181
+ const live = state.phases[phaseId];
182
+ if (live) {
183
+ live.liveText = l.text;
184
+ live.usage = l.usage;
185
+ live.model = l.model;
186
+ }
187
+ emitProgress();
188
+ };
189
+ }
190
+
96
191
  async function executePhase(
97
192
  phase: Phase,
98
193
  state: RunState,
@@ -105,7 +200,7 @@ async function executePhase(
105
200
  const previousOutput = lastCompletedOutput(state, phase);
106
201
  const run = deps.runTask ?? runAgentTask;
107
202
 
108
- const runOne = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
203
+ const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
109
204
  run(
110
205
  deps.cwd,
111
206
  deps.agents,
@@ -122,6 +217,44 @@ async function executePhase(
122
217
  deps.globalThinking,
123
218
  );
124
219
 
220
+ // Wrap each subagent call in the phase's retry policy. Usage is summed across
221
+ // attempts; the attempt count rides along on the result for the TUI.
222
+ const retry = phase.retry;
223
+ const runOne = async (agentName: string, task: string, onLive?: (l: LiveUpdate) => void): Promise<RunResult> => {
224
+ const maxAttempts = Math.max(1, 1 + Math.max(0, Math.floor(retry?.max ?? 0)));
225
+ const usages: UsageStats[] = [];
226
+ let last: RunResult | undefined;
227
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
228
+ if (deps.signal?.aborted) break;
229
+ last = await baseRun(agentName, task, onLive);
230
+ usages.push(last.usage);
231
+ if (!isFailed(last)) break;
232
+ // Stop retrying on abort or once the run is over budget.
233
+ if (deps.signal?.aborted || overBudget(state).over) break;
234
+ if (attempt < maxAttempts - 1) {
235
+ const wait = Math.min(60000, Math.round((retry?.backoffMs ?? 0) * (retry?.factor ?? 1) ** attempt));
236
+ await delay(wait, deps.signal);
237
+ }
238
+ }
239
+ // Aborted before any attempt ran → return a clean aborted result (no crash).
240
+ if (!last) {
241
+ return {
242
+ agent: agentName,
243
+ task,
244
+ exitCode: 1,
245
+ output: "",
246
+ stderr: "Aborted before execution",
247
+ usage: emptyUsage(),
248
+ stopReason: "aborted",
249
+ errorMessage: "Aborted before execution",
250
+ attempts: 0,
251
+ };
252
+ }
253
+ if (usages.length > 1) last.usage = aggregateUsage(usages);
254
+ last.attempts = usages.length;
255
+ return last;
256
+ };
257
+
125
258
  const parseJson = phase.output === "json";
126
259
 
127
260
  // Runs a list of sub-tasks with live fan-out progress + aggregate live usage/activity.
@@ -145,6 +278,20 @@ async function executePhase(
145
278
  };
146
279
  refresh();
147
280
  return mapWithConcurrencyLimit(items, concurrency, async (it, idx) => {
281
+ // Budget guard: stop spawning new fan-out items once the run is over budget.
282
+ if (overBudget(state).over) {
283
+ done++;
284
+ refresh();
285
+ return {
286
+ agent: it.agent,
287
+ task: it.task,
288
+ exitCode: 0,
289
+ output: "(skipped: budget exceeded)",
290
+ stderr: "",
291
+ usage: emptyUsage(),
292
+ stopReason: "budget-skipped",
293
+ } satisfies RunResult;
294
+ }
148
295
  running++;
149
296
  refresh();
150
297
  const r = await runOne(it.agent, it.task, (l) => {
@@ -162,22 +309,17 @@ async function executePhase(
162
309
  });
163
310
  };
164
311
 
165
- if (type === "agent" || type === "gate") {
312
+ // Single-agent phases: agent, gate, and reduce all run one subagent on an
313
+ // interpolated task. gate additionally parses a verdict; reduce simply pulls
314
+ // its inputs from `from` phases (already exposed via interpolation).
315
+ if (type === "agent" || type === "gate" || type === "reduce") {
166
316
  const ctx = buildInterpolationContext(state, previousOutput);
167
317
  const { text } = interpolate(phase.task ?? "", ctx);
168
318
  const inputHash = hashInput(phase.id, phase.agent ?? "", text);
169
319
  const cached = cachedPhase(prior, inputHash);
170
320
  if (cached) return cached;
171
321
 
172
- const live = state.phases[phase.id];
173
- const r = await runOne(phase.agent ?? defaultAgent(deps), text, (l) => {
174
- if (live) {
175
- live.liveText = l.text;
176
- live.usage = l.usage;
177
- live.model = l.model;
178
- }
179
- emitProgress();
180
- });
322
+ const r = await runOne(phase.agent ?? defaultAgent(deps), text, liveSink(state, phase.id, emitProgress));
181
323
  const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
182
324
  if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
183
325
  return ps;
@@ -228,24 +370,117 @@ async function executePhase(
228
370
  return mergePhaseState(phase.id, results, inputHash, parseJson);
229
371
  }
230
372
 
231
- if (type === "reduce") {
373
+ if (type === "approval") {
232
374
  const ctx = buildInterpolationContext(state, previousOutput);
233
- // Inputs for reduce come from `from` phases; interpolation already exposes them.
234
- const { text } = interpolate(phase.task ?? "", ctx);
235
- const inputHash = hashInput(phase.id, text);
375
+ const message = interpolate(phase.task ?? "Approve to continue?", ctx).text;
376
+ const inputHash = hashInput(phase.id, "approval", message);
377
+ const cached = cachedPhase(prior, inputHash);
378
+ if (cached) return cached;
379
+
380
+ // Non-interactive (headless/CI/tests): auto-approve, fail-open, but record it.
381
+ if (!deps.requestApproval) {
382
+ return {
383
+ id: phase.id,
384
+ status: "done",
385
+ output: "(auto-approved: no interactive approver available)",
386
+ approval: { decision: "approve", auto: true },
387
+ usage: emptyUsage(),
388
+ inputHash,
389
+ endedAt: Date.now(),
390
+ };
391
+ }
392
+ const decision = await deps.requestApproval({ phaseId: phase.id, message, upstream: previousOutput });
393
+ const note = decision.note?.trim();
394
+ const ps: PhaseState = {
395
+ id: phase.id,
396
+ status: "done",
397
+ output: note || `(${decision.decision})`,
398
+ approval: { decision: decision.decision, note },
399
+ usage: emptyUsage(),
400
+ inputHash,
401
+ endedAt: Date.now(),
402
+ };
403
+ // A rejection halts the flow via the same mechanism as a blocking gate.
404
+ if (decision.decision === "reject") {
405
+ ps.gate = { verdict: "block", reason: note || "Rejected by user" };
406
+ }
407
+ return ps;
408
+ }
409
+
410
+ if (type === "flow") {
411
+ const ctx = buildInterpolationContext(state, previousOutput);
412
+ const name = phase.use;
413
+ if (!name) return failPhase(phase.id, `flow phase '${phase.id}' requires 'use'`);
414
+ if (!deps.loadFlow) return failPhase(phase.id, `flow phase '${phase.id}': no sub-flow loader available`);
415
+ const subDef = deps.loadFlow(name);
416
+ if (!subDef) return failPhase(phase.id, `flow phase '${phase.id}': saved flow not found: '${name}'`);
417
+ const stack = deps._stack ?? [];
418
+ if (name === state.flowName || stack.includes(name)) {
419
+ return failPhase(phase.id, `flow phase '${phase.id}': recursive sub-flow ${[...stack, state.flowName, name].join(" -> ")}`);
420
+ }
421
+ // Resolve sub-flow args (interpolate string values), then apply declared defaults.
422
+ const provided: Record<string, unknown> = {};
423
+ for (const [k, v] of Object.entries(phase.with ?? {})) {
424
+ provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
425
+ }
426
+ const subArgs = resolveArgs(subDef, provided);
427
+ const inputHash = hashInput(phase.id, `flow:${name}`, JSON.stringify(subArgs));
236
428
  const cached = cachedPhase(prior, inputHash);
237
429
  if (cached) return cached;
238
430
 
239
431
  const live = state.phases[phase.id];
240
- const r = await runOne(phase.agent ?? defaultAgent(deps), text, (l) => {
241
- if (live) {
242
- live.liveText = l.text;
243
- live.usage = l.usage;
244
- live.model = l.model;
245
- }
246
- emitProgress();
432
+ // Sub-flows enforce their own budget; if they declare none, inherit the
433
+ // parent cap as a soft per-flow ceiling (best-effort — spend does not cross
434
+ // flow boundaries, so the parent's already-spent total is not subtracted).
435
+ const subDefEffective = subDef.budget || !state.def.budget ? subDef : { ...subDef, budget: state.def.budget };
436
+ const subState: RunState = {
437
+ runId: newRunId(subDef.name),
438
+ flowName: subDef.name,
439
+ def: subDefEffective,
440
+ args: subArgs,
441
+ status: "running",
442
+ phases: {},
443
+ createdAt: Date.now(),
444
+ updatedAt: Date.now(),
445
+ cwd: deps.cwd,
446
+ };
447
+ const subResult = await executeTaskflow(subState, {
448
+ ...deps,
449
+ _stack: [...stack, state.flowName],
450
+ persist: undefined,
451
+ onProgress: () => {
452
+ if (live) {
453
+ const ph = Object.values(subState.phases);
454
+ live.subProgress = {
455
+ done: ph.filter((p) => p.status === "done").length,
456
+ total: subDef.phases.length,
457
+ running: ph.filter((p) => p.status === "running").length,
458
+ failed: ph.filter((p) => p.status === "failed").length,
459
+ };
460
+ const cur = ph.find((p) => p.status === "running");
461
+ if (cur) live.liveText = `↳ ${cur.id}${cur.liveText ? `: ${cur.liveText}` : ""}`;
462
+ live.usage = aggregateUsage(ph.map((p) => p.usage ?? emptyUsage()));
463
+ }
464
+ emitProgress();
465
+ },
247
466
  });
248
- return resultToPhaseState(phase.id, r, inputHash, parseJson);
467
+ const sp = Object.values(subState.phases);
468
+ return {
469
+ id: phase.id,
470
+ status: subResult.ok ? "done" : "failed",
471
+ output: subResult.finalOutput,
472
+ json: parseJson ? safeParse(subResult.finalOutput) : undefined,
473
+ usage: subResult.totalUsage,
474
+ subProgress: {
475
+ done: sp.filter((p) => p.status === "done").length,
476
+ total: subDef.phases.length,
477
+ running: 0,
478
+ failed: sp.filter((p) => p.status === "failed").length,
479
+ },
480
+ error: subResult.ok ? undefined : `sub-flow '${name}' ${subResult.state.status}`,
481
+ inputHash,
482
+ endedAt: Date.now(),
483
+ };
249
484
  }
250
485
 
251
486
  return {
@@ -329,6 +564,29 @@ function asReason(v: unknown): string | undefined {
329
564
  * Execute a full taskflow. Mutates and persists `state` as it progresses.
330
565
  */
331
566
  export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
567
+ const def: Taskflow = state.def;
568
+ try {
569
+ return await runTaskflowLayers(state, deps);
570
+ } catch (e) {
571
+ // A thrown phase must not leave the run wedged in "running" (which breaks
572
+ // resume). Mark any in-flight phase + the run as failed, persist, and return.
573
+ const message = e instanceof Error ? e.message : String(e);
574
+ for (const p of Object.values(state.phases)) {
575
+ if (p.status === "running") {
576
+ p.status = "failed";
577
+ p.error = p.error ?? message;
578
+ p.endedAt = Date.now();
579
+ }
580
+ }
581
+ state.status = "failed";
582
+ deps.persist?.(state);
583
+ deps.onProgress?.(state);
584
+ const totalUsage = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
585
+ return { state, finalOutput: `Taskflow '${def.name}' crashed: ${message}`, ok: false, totalUsage };
586
+ }
587
+ }
588
+
589
+ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
332
590
  const def: Taskflow = state.def;
333
591
  const layers = topoLayers(def.phases);
334
592
 
@@ -340,6 +598,14 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
340
598
  let gateBlocked = false;
341
599
  let gateReason = "";
342
600
  let gateOutput = "";
601
+ // `budgetBlocked` gates the skipping of remaining phases once the cap is hit.
602
+ // `budgetSkipped` records that a phase was *actually* skipped/truncated for
603
+ // budget — only then is the run terminal-status "blocked" (a cap crossed by the
604
+ // very last phase, with nothing left to skip, must NOT mark a good run failed).
605
+ let budgetBlocked = false;
606
+ let budgetSkipped = false;
607
+ let budgetReason = "";
608
+ const byId = new Map(def.phases.map((p) => [p.id, p]));
343
609
 
344
610
  for (const layer of layers) {
345
611
  if (deps.signal?.aborted) {
@@ -351,13 +617,36 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
351
617
  await mapWithConcurrencyLimit(layer, layerConcurrency, async (phase) => {
352
618
  // Snapshot prior state BEFORE marking running, so resume cache checks work.
353
619
  const prior = state.phases[phase.id];
354
- // Skip if a dependency failed, or an upstream gate blocked the flow.
355
- const failedDep = dependenciesOf(phase).some((d) => state.phases[d]?.status === "failed");
356
- if (gateBlocked || failedDep) {
620
+
621
+ // Determine whether this phase should run, or be skipped (and why).
622
+ const deps_ = dependenciesOf(phase);
623
+ const join = phase.join ?? "all";
624
+ // An `optional` dependency that failed still counts as satisfied.
625
+ const depOk = (d: string): boolean => {
626
+ const s = state.phases[d]?.status;
627
+ if (s === "done") return true;
628
+ if (s === "failed" && byId.get(d)?.optional) return true;
629
+ return false;
630
+ };
631
+ const depsSatisfied =
632
+ deps_.length === 0 ? true : join === "any" ? deps_.some(depOk) : deps_.every(depOk);
633
+
634
+ let skipReason: string | undefined;
635
+ if (gateBlocked) skipReason = `Gate blocked${gateReason ? `: ${gateReason}` : ""}`;
636
+ else if (budgetBlocked) skipReason = `Budget exceeded${budgetReason ? `: ${budgetReason}` : ""}`;
637
+ else if (!depsSatisfied)
638
+ skipReason = join === "any" ? "All dependencies failed or were skipped" : "Upstream dependency not satisfied";
639
+ else if (phase.when !== undefined) {
640
+ const condCtx = buildInterpolationContext(state, lastCompletedOutput(state, phase));
641
+ if (!evaluateCondition(phase.when, condCtx)) skipReason = `Condition not met: ${phase.when}`;
642
+ }
643
+
644
+ if (skipReason) {
645
+ if (skipReason.startsWith("Budget exceeded")) budgetSkipped = true;
357
646
  state.phases[phase.id] = {
358
647
  id: phase.id,
359
648
  status: "skipped",
360
- error: gateBlocked ? `Gate blocked${gateReason ? `: ${gateReason}` : ""}` : "Upstream dependency failed",
649
+ error: skipReason,
361
650
  endedAt: Date.now(),
362
651
  usage: emptyUsage(),
363
652
  };
@@ -366,37 +655,71 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
366
655
  return;
367
656
  }
368
657
 
658
+ const startedAt = Date.now();
369
659
  state.phases[phase.id] = {
370
660
  ...(state.phases[phase.id] ?? { id: phase.id }),
371
661
  id: phase.id,
372
662
  status: "running",
373
- startedAt: Date.now(),
663
+ startedAt,
374
664
  };
375
665
  deps.onProgress?.(state);
376
666
 
377
667
  const ps = await executePhase(phase, state, deps, prior, () => deps.onProgress?.(state));
378
- state.phases[phase.id] = ps;
379
- if ((phase.type ?? "agent") === "gate" && ps.gate?.verdict === "block") {
668
+ // Preserve the phase start time: executePhase returns a fresh PhaseState
669
+ // that omits startedAt (cached/resumed results carry their own).
670
+ state.phases[phase.id] = ps.startedAt ? ps : { ...ps, startedAt };
671
+ // A blocking verdict (gate phase OR a rejected approval) halts the flow.
672
+ const ptype = phase.type ?? "agent";
673
+ if (ps.gate?.verdict === "block" && (ptype === "gate" || ptype === "approval")) {
380
674
  gateBlocked = true;
381
675
  gateReason = ps.gate.reason ?? "";
382
676
  gateOutput = ps.output ?? "";
383
677
  }
678
+ // A fan-out cut short by the cap is itself a budget skip.
679
+ if (ps.budgetTruncated) {
680
+ budgetBlocked = true;
681
+ budgetSkipped = true;
682
+ if (!budgetReason) budgetReason = "fan-out truncated by budget";
683
+ }
684
+ // Budget ceiling: once exceeded, remaining phases are skipped.
685
+ const ob = overBudget(state);
686
+ if (ob.over && !budgetBlocked) {
687
+ budgetBlocked = true;
688
+ budgetReason = ob.reason;
689
+ }
384
690
  deps.persist?.(state);
385
691
  deps.onProgress?.(state);
386
692
  });
387
693
  }
388
694
 
389
695
  const fp = finalPhase(def.phases);
390
- const finalState = state.phases[fp.id];
391
- const anyFailed = Object.values(state.phases).some((p) => p.status === "failed");
392
-
393
- state.status = aborted ? "paused" : gateBlocked ? "blocked" : anyFailed ? "failed" : "completed";
696
+ let finalState = state.phases[fp.id];
697
+ // If the designated final phase produced no output (skipped/blocked), fall
698
+ // back to the last phase (in definition order) that actually completed.
699
+ if (!finalState || finalState.status !== "done") {
700
+ const doneInOrder = def.phases.map((p) => state.phases[p.id]).filter((p) => p?.status === "done");
701
+ if (doneInOrder.length) finalState = doneInOrder[doneInOrder.length - 1];
702
+ }
703
+ // A failed non-optional phase fails the run; optional failures are tolerated.
704
+ const anyFailed = Object.entries(state.phases).some(
705
+ ([id, p]) => p.status === "failed" && !byId.get(id)?.optional,
706
+ );
707
+
708
+ state.status = aborted
709
+ ? "paused"
710
+ : gateBlocked || budgetSkipped
711
+ ? "blocked"
712
+ : anyFailed
713
+ ? "failed"
714
+ : "completed";
394
715
  deps.persist?.(state);
395
716
  deps.onProgress?.(state);
396
717
 
397
718
  let finalOutput = finalState?.output ?? "(no output)";
398
- if (gateBlocked && (!finalState || finalState.status === "skipped")) {
719
+ if (gateBlocked) {
399
720
  finalOutput = `Gate blocked the workflow.${gateReason ? `\nReason: ${gateReason}` : ""}${gateOutput ? `\n\n${gateOutput}` : ""}`;
721
+ } else if (budgetSkipped) {
722
+ finalOutput = `Budget exceeded — run halted.${budgetReason ? `\nReason: ${budgetReason}` : ""}${finalState?.output ? `\n\n${finalState.output}` : ""}`;
400
723
  }
401
724
 
402
725
  const totalUsage = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));