pi-taskflow 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DESIGN.md +15 -1
- package/README.md +115 -16
- package/examples/conditional-research.json +56 -0
- package/examples/guarded-refactor.json +50 -0
- package/extensions/agents.ts +8 -1
- package/extensions/index.ts +42 -18
- package/extensions/interpolate.ts +232 -1
- package/extensions/render.ts +47 -35
- package/extensions/runner.ts +127 -80
- package/extensions/runtime.ts +480 -54
- package/extensions/schema.ts +218 -6
- package/extensions/store.ts +76 -4
- package/extensions/usage.ts +42 -0
- package/package.json +2 -2
- package/skills/taskflow/SKILL.md +146 -2
- package/skills/taskflow/configuration.md +0 -2
package/extensions/runtime.ts
CHANGED
|
@@ -10,11 +10,29 @@
|
|
|
10
10
|
* result are skipped.
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
+
import * as path from "node:path";
|
|
14
|
+
import * as fs from "node:fs";
|
|
13
15
|
import type { AgentConfig } from "./agents.ts";
|
|
14
|
-
import { coerceArray, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
|
|
15
|
-
import {
|
|
16
|
-
import {
|
|
17
|
-
import {
|
|
16
|
+
import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
|
|
17
|
+
import { isFailed, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
|
|
18
|
+
import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
|
|
19
|
+
import { type Budget, dependenciesOf, finalPhase, type Phase, resolveArgs, type Taskflow, topoLayers } from "./schema.ts";
|
|
20
|
+
import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
|
|
21
|
+
|
|
22
|
+
/** A human-in-the-loop approval request raised by an `approval` phase. */
|
|
23
|
+
export interface ApprovalRequest {
|
|
24
|
+
phaseId: string;
|
|
25
|
+
/** Interpolated prompt shown to the human. */
|
|
26
|
+
message: string;
|
|
27
|
+
/** Output of the immediately-upstream phase, for context. */
|
|
28
|
+
upstream?: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/** The human's decision. `edit` carries guidance passed downstream as the phase output. */
|
|
32
|
+
export interface ApprovalDecision {
|
|
33
|
+
decision: "approve" | "reject" | "edit";
|
|
34
|
+
note?: string;
|
|
35
|
+
}
|
|
18
36
|
|
|
19
37
|
export interface RuntimeDeps {
|
|
20
38
|
cwd: string;
|
|
@@ -27,6 +45,12 @@ export interface RuntimeDeps {
|
|
|
27
45
|
onProgress?: (state: RunState) => void;
|
|
28
46
|
/** Injectable task runner (defaults to spawning a real subagent). Enables testing. */
|
|
29
47
|
runTask?: typeof runAgentTask;
|
|
48
|
+
/** Resolve an `approval` phase. Omit for non-interactive runs (auto-approve). */
|
|
49
|
+
requestApproval?: (req: ApprovalRequest) => Promise<ApprovalDecision>;
|
|
50
|
+
/** Resolve a saved taskflow by name for `flow` (sub-workflow) phases. */
|
|
51
|
+
loadFlow?: (name: string) => Taskflow | undefined;
|
|
52
|
+
/** Internal: sub-flow call stack, for recursion detection. */
|
|
53
|
+
_stack?: string[];
|
|
30
54
|
}
|
|
31
55
|
|
|
32
56
|
export interface RuntimeResult {
|
|
@@ -52,6 +76,7 @@ function buildInterpolationContext(
|
|
|
52
76
|
|
|
53
77
|
function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJson: boolean): PhaseState {
|
|
54
78
|
const failed = isFailed(r);
|
|
79
|
+
const attempts = attemptsOf(r);
|
|
55
80
|
return {
|
|
56
81
|
id,
|
|
57
82
|
status: failed ? "failed" : "done",
|
|
@@ -59,12 +84,60 @@ function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJs
|
|
|
59
84
|
json: parseJson && !failed ? safeParse(r.output) : undefined,
|
|
60
85
|
usage: r.usage,
|
|
61
86
|
model: r.model,
|
|
87
|
+
attempts: attempts > 1 ? attempts : undefined,
|
|
62
88
|
error: failed ? r.errorMessage || r.stderr || r.output : undefined,
|
|
63
89
|
inputHash,
|
|
64
90
|
endedAt: Date.now(),
|
|
65
91
|
};
|
|
66
92
|
}
|
|
67
93
|
|
|
94
|
+
/** Attempts recorded by the retry wrapper (defaults to 1). */
|
|
95
|
+
function attemptsOf(r: RunResult): number {
|
|
96
|
+
const a = r.attempts;
|
|
97
|
+
return typeof a === "number" && a > 0 ? a : 1;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/** Cancellable delay used between retry attempts. */
|
|
101
|
+
function delay(ms: number, signal?: AbortSignal): Promise<void> {
|
|
102
|
+
return new Promise((resolve) => {
|
|
103
|
+
if (ms <= 0) return resolve();
|
|
104
|
+
let onAbort: (() => void) | undefined;
|
|
105
|
+
const t = setTimeout(() => {
|
|
106
|
+
if (signal && onAbort) signal.removeEventListener("abort", onAbort);
|
|
107
|
+
resolve();
|
|
108
|
+
}, ms);
|
|
109
|
+
if (signal) {
|
|
110
|
+
if (signal.aborted) {
|
|
111
|
+
clearTimeout(t);
|
|
112
|
+
return resolve();
|
|
113
|
+
}
|
|
114
|
+
onAbort = () => {
|
|
115
|
+
clearTimeout(t);
|
|
116
|
+
resolve();
|
|
117
|
+
};
|
|
118
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function failPhase(id: string, error: string): PhaseState {
|
|
124
|
+
return { id, status: "failed", error, inputHash: hashInput(id, error), endedAt: Date.now(), usage: emptyUsage() };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/** Aggregate run cost/tokens so far and test against the budget. */
|
|
128
|
+
function overBudget(state: RunState): { over: boolean; reason: string } {
|
|
129
|
+
const budget: Budget | undefined = state.def.budget;
|
|
130
|
+
if (!budget) return { over: false, reason: "" };
|
|
131
|
+
const u = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
|
|
132
|
+
if (budget.maxUSD !== undefined && u.cost > budget.maxUSD) {
|
|
133
|
+
return { over: true, reason: `cost $${u.cost.toFixed(3)} exceeded cap $${budget.maxUSD}` };
|
|
134
|
+
}
|
|
135
|
+
if (budget.maxTokens !== undefined && u.input + u.output > budget.maxTokens) {
|
|
136
|
+
return { over: true, reason: `tokens ${u.input + u.output} exceeded cap ${budget.maxTokens}` };
|
|
137
|
+
}
|
|
138
|
+
return { over: false, reason: "" };
|
|
139
|
+
}
|
|
140
|
+
|
|
68
141
|
/** Merge several sub-results into a single PhaseState (for map/parallel). */
|
|
69
142
|
function mergePhaseState(
|
|
70
143
|
id: string,
|
|
@@ -72,27 +145,138 @@ function mergePhaseState(
|
|
|
72
145
|
inputHash: string,
|
|
73
146
|
parseJson: boolean,
|
|
74
147
|
): PhaseState {
|
|
75
|
-
const
|
|
148
|
+
const budgetSkips = results.filter((r) => r.stopReason === "budget-skipped");
|
|
149
|
+
const ran = results.filter((r) => r.stopReason !== "budget-skipped");
|
|
150
|
+
const anyFailed = ran.some(isFailed);
|
|
76
151
|
const usage = aggregateUsage(results.map((r) => r.usage));
|
|
152
|
+
// B12: surface the model(s) used in the fan-out so consumers can show
|
|
153
|
+
// which model produced the merged output.
|
|
154
|
+
const model = ran.find((r) => r.model !== undefined)?.model;
|
|
77
155
|
// Combine outputs as a labelled list; also expose a JSON array of outputs.
|
|
78
|
-
const combinedText =
|
|
79
|
-
.map((r, i) => `### [${i + 1}/${
|
|
156
|
+
const combinedText = ran
|
|
157
|
+
.map((r, i) => `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}\n\n${r.output}`)
|
|
80
158
|
.join("\n\n---\n\n");
|
|
81
|
-
|
|
82
|
-
const
|
|
159
|
+
// Only successful runs feed the parsed JSON array (no error/skip strings).
|
|
160
|
+
const jsonArray = parseJson ? ran.filter((r) => !isFailed(r)).map((r) => safeParse(r.output) ?? r.output) : undefined;
|
|
161
|
+
const failedCount = ran.filter(isFailed).length;
|
|
162
|
+
const attempts = results.reduce((sum, r) => sum + attemptsOf(r), 0);
|
|
163
|
+
const errors = ran.filter(isFailed).map((r) => `${r.agent}: ${r.errorMessage ?? r.stderr}`);
|
|
164
|
+
if (budgetSkips.length) errors.push(`${budgetSkips.length} item(s) skipped: budget exceeded`);
|
|
83
165
|
return {
|
|
84
166
|
id,
|
|
85
167
|
status: anyFailed ? "failed" : "done",
|
|
86
168
|
output: combinedText,
|
|
87
169
|
json: jsonArray,
|
|
88
170
|
usage,
|
|
89
|
-
|
|
90
|
-
|
|
171
|
+
model,
|
|
172
|
+
attempts: attempts > results.length ? attempts : undefined,
|
|
173
|
+
budgetTruncated: budgetSkips.length > 0 || undefined,
|
|
174
|
+
subProgress: { done: ran.length, total: results.length, running: 0, failed: failedCount },
|
|
175
|
+
error: errors.length ? errors.join("; ") : undefined,
|
|
91
176
|
inputHash,
|
|
92
177
|
endedAt: Date.now(),
|
|
93
178
|
};
|
|
94
179
|
}
|
|
95
180
|
|
|
181
|
+
/**
|
|
182
|
+
* A live-update sink that mirrors a subagent's streaming progress into a single
|
|
183
|
+
* phase's state row, then notifies the TUI. Shared by all single-agent phases.
|
|
184
|
+
*/
|
|
185
|
+
function liveSink(state: RunState, phaseId: string, emitProgress: () => void): (l: LiveUpdate) => void {
|
|
186
|
+
return (l: LiveUpdate) => {
|
|
187
|
+
const live = state.phases[phaseId];
|
|
188
|
+
if (live) {
|
|
189
|
+
live.liveText = l.text;
|
|
190
|
+
live.usage = l.usage;
|
|
191
|
+
live.model = l.model;
|
|
192
|
+
}
|
|
193
|
+
emitProgress();
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Pre-read files listed in a phase's `context` field and return them as
|
|
200
|
+
* markdown code blocks. Handles:
|
|
201
|
+
* - literal paths
|
|
202
|
+
* - interpolation refs (e.g. `{steps.scout.json}` resolving to `["a.ts"]`)
|
|
203
|
+
* - per-file truncation via `contextLimit`
|
|
204
|
+
*
|
|
205
|
+
* The result is a single string that should be prepended to the phase task so
|
|
206
|
+
* the subagent never needs to spend turns on file exploration.
|
|
207
|
+
*/
|
|
208
|
+
const CONTEXT_MAX_FILE_BYTES = 10 * 1024 * 1024; // 10 MB
|
|
209
|
+
const MAX_TOTAL_CONTEXT_CHARS = 200_000;
|
|
210
|
+
|
|
211
|
+
async function resolvePhaseContext(
|
|
212
|
+
phase: Phase,
|
|
213
|
+
ctx: InterpolationContext,
|
|
214
|
+
): Promise<string> {
|
|
215
|
+
const entries = phase.context;
|
|
216
|
+
if (!entries || entries.length === 0) return "";
|
|
217
|
+
const limit = phase.contextLimit ?? 8000;
|
|
218
|
+
|
|
219
|
+
const paths: string[] = [];
|
|
220
|
+
for (const entry of entries) {
|
|
221
|
+
const r = interpolate(entry, ctx);
|
|
222
|
+
if (r.text !== entry) {
|
|
223
|
+
// Resolved — may be a JSON array from {steps.X.json}
|
|
224
|
+
const parsed = safeParse(r.text);
|
|
225
|
+
if (Array.isArray(parsed)) {
|
|
226
|
+
for (const item of parsed) {
|
|
227
|
+
if (typeof item === "string" && item.trim()) paths.push(item.trim());
|
|
228
|
+
}
|
|
229
|
+
} else if (typeof r.text === "string" && r.text.trim()) {
|
|
230
|
+
paths.push(r.text.trim());
|
|
231
|
+
}
|
|
232
|
+
} else {
|
|
233
|
+
// Unchanged — literal path
|
|
234
|
+
paths.push(entry);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
const unique = Array.from(new Set(paths));
|
|
239
|
+
|
|
240
|
+
// Diagnose JSON blobs masquerading as file paths — common when a context
|
|
241
|
+
// entry like {steps.discover.output} resolves to {"files":[...]} instead
|
|
242
|
+
// of a flat path or JSON array. The author should use {steps.discover.json.files}.
|
|
243
|
+
const jsonBlobs = unique.filter((p) => p.startsWith("{"));
|
|
244
|
+
for (const blob of jsonBlobs) {
|
|
245
|
+
console.warn(
|
|
246
|
+
`[taskflow] Context entry "${blob.slice(0, 80)}…" looks like a JSON object, not a file path. ` +
|
|
247
|
+
`Use {steps.<id>.json.<field>} to extract a specific field.`,
|
|
248
|
+
);
|
|
249
|
+
}
|
|
250
|
+
const filtered = jsonBlobs.length ? unique.filter((p) => !p.startsWith("{")) : unique;
|
|
251
|
+
|
|
252
|
+
const blocks: string[] = [];
|
|
253
|
+
for (const p of filtered) {
|
|
254
|
+
try {
|
|
255
|
+
const abs = path.resolve(p);
|
|
256
|
+
const stat = fs.statSync(abs);
|
|
257
|
+
if (!stat.isFile()) continue;
|
|
258
|
+
if (stat.size > CONTEXT_MAX_FILE_BYTES) continue;
|
|
259
|
+
const content = fs.readFileSync(abs, "utf-8");
|
|
260
|
+
const truncated =
|
|
261
|
+
content.length > limit
|
|
262
|
+
? content.slice(0, limit) + `\n... [truncated ${content.length - limit} chars]`
|
|
263
|
+
: content;
|
|
264
|
+
const ext = path.extname(p).slice(1) || "txt";
|
|
265
|
+
blocks.push(`## File: ${p}\n\n\`\`\`${ext}\n${truncated}\n\`\`\``);
|
|
266
|
+
} catch {
|
|
267
|
+
console.warn(`[taskflow] Skipped unreadable context file: ${p}`);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Safety cap: truncate total context when too many files are listed.
|
|
272
|
+
let result = blocks.join("\n\n") + "\n\n";
|
|
273
|
+
if (result.length > MAX_TOTAL_CONTEXT_CHARS) {
|
|
274
|
+
result = result.slice(0, MAX_TOTAL_CONTEXT_CHARS) + `\n\n... [truncated ${result.length - MAX_TOTAL_CONTEXT_CHARS} total chars]`;
|
|
275
|
+
}
|
|
276
|
+
return result;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
|
|
96
280
|
async function executePhase(
|
|
97
281
|
phase: Phase,
|
|
98
282
|
state: RunState,
|
|
@@ -105,7 +289,13 @@ async function executePhase(
|
|
|
105
289
|
const previousOutput = lastCompletedOutput(state, phase);
|
|
106
290
|
const run = deps.runTask ?? runAgentTask;
|
|
107
291
|
|
|
108
|
-
|
|
292
|
+
// Resolve context pre-read files once, before any type branching.
|
|
293
|
+
// The content is prepended to every task so the subagent never spends
|
|
294
|
+
// turns on file exploration for files the flow author already knows.
|
|
295
|
+
const ctx = buildInterpolationContext(state, previousOutput);
|
|
296
|
+
const preRead = await resolvePhaseContext(phase, ctx);
|
|
297
|
+
|
|
298
|
+
const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
|
|
109
299
|
run(
|
|
110
300
|
deps.cwd,
|
|
111
301
|
deps.agents,
|
|
@@ -122,6 +312,48 @@ async function executePhase(
|
|
|
122
312
|
deps.globalThinking,
|
|
123
313
|
);
|
|
124
314
|
|
|
315
|
+
// Wrap each subagent call in the phase's retry policy. Usage is summed across
|
|
316
|
+
// attempts; the attempt count rides along on the result for the TUI.
|
|
317
|
+
const retry = phase.retry;
|
|
318
|
+
const runOne = async (agentName: string, task: string, onLive?: (l: LiveUpdate) => void): Promise<RunResult> => {
|
|
319
|
+
const maxAttempts = Math.max(1, 1 + Math.max(0, Math.floor(retry?.max ?? 0)));
|
|
320
|
+
const usages: UsageStats[] = [];
|
|
321
|
+
let last: RunResult | undefined;
|
|
322
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
323
|
+
if (deps.signal?.aborted) break;
|
|
324
|
+
last = await baseRun(agentName, task, onLive);
|
|
325
|
+
usages.push(last.usage);
|
|
326
|
+
// B6: aggregate and surface cumulative usage before the retry decision,
|
|
327
|
+
// so the TUI / budget guard see the in-flight spend on every attempt.
|
|
328
|
+
const liveRetry = state.phases[phase.id];
|
|
329
|
+
if (liveRetry) liveRetry.usage = aggregateUsage(usages);
|
|
330
|
+
if (!isFailed(last)) break;
|
|
331
|
+
// Stop retrying on abort or once the run is over budget.
|
|
332
|
+
if (deps.signal?.aborted || overBudget(state).over) break;
|
|
333
|
+
if (attempt < maxAttempts - 1) {
|
|
334
|
+
const wait = Math.min(60000, Math.round((retry?.backoffMs ?? 0) * (retry?.factor ?? 1) ** attempt));
|
|
335
|
+
await delay(wait, deps.signal);
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
// Aborted before any attempt ran → return a clean aborted result (no crash).
|
|
339
|
+
if (!last) {
|
|
340
|
+
return {
|
|
341
|
+
agent: agentName,
|
|
342
|
+
task,
|
|
343
|
+
exitCode: 1,
|
|
344
|
+
output: "",
|
|
345
|
+
stderr: "Aborted before execution",
|
|
346
|
+
usage: emptyUsage(),
|
|
347
|
+
stopReason: "aborted",
|
|
348
|
+
errorMessage: "Aborted before execution",
|
|
349
|
+
attempts: 0,
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
if (usages.length > 1) last.usage = aggregateUsage(usages);
|
|
353
|
+
last.attempts = usages.length;
|
|
354
|
+
return last;
|
|
355
|
+
};
|
|
356
|
+
|
|
125
357
|
const parseJson = phase.output === "json";
|
|
126
358
|
|
|
127
359
|
// Runs a list of sub-tasks with live fan-out progress + aggregate live usage/activity.
|
|
@@ -145,6 +377,20 @@ async function executePhase(
|
|
|
145
377
|
};
|
|
146
378
|
refresh();
|
|
147
379
|
return mapWithConcurrencyLimit(items, concurrency, async (it, idx) => {
|
|
380
|
+
// Budget guard: stop spawning new fan-out items once the run is over budget.
|
|
381
|
+
if (overBudget(state).over) {
|
|
382
|
+
done++;
|
|
383
|
+
refresh();
|
|
384
|
+
return {
|
|
385
|
+
agent: it.agent,
|
|
386
|
+
task: it.task,
|
|
387
|
+
exitCode: 0,
|
|
388
|
+
output: "(skipped: budget exceeded)",
|
|
389
|
+
stderr: "",
|
|
390
|
+
usage: emptyUsage(),
|
|
391
|
+
stopReason: "budget-skipped",
|
|
392
|
+
} satisfies RunResult;
|
|
393
|
+
}
|
|
148
394
|
running++;
|
|
149
395
|
refresh();
|
|
150
396
|
const r = await runOne(it.agent, it.task, (l) => {
|
|
@@ -162,33 +408,30 @@ async function executePhase(
|
|
|
162
408
|
});
|
|
163
409
|
};
|
|
164
410
|
|
|
165
|
-
|
|
166
|
-
|
|
411
|
+
// Single-agent phases: agent, gate, and reduce all run one subagent on an
|
|
412
|
+
// interpolated task. gate additionally parses a verdict; reduce simply pulls
|
|
413
|
+
// its inputs from `from` phases (already exposed via interpolation).
|
|
414
|
+
if (type === "agent" || type === "gate" || type === "reduce") {
|
|
167
415
|
const { text } = interpolate(phase.task ?? "", ctx);
|
|
168
|
-
const
|
|
416
|
+
const fullTask = preRead + text;
|
|
417
|
+
const inputHash = hashInput(phase.id, phase.agent ?? "", fullTask);
|
|
169
418
|
const cached = cachedPhase(prior, inputHash);
|
|
170
419
|
if (cached) return cached;
|
|
171
420
|
|
|
172
|
-
const
|
|
173
|
-
const r = await runOne(phase.agent ?? defaultAgent(deps), text, (l) => {
|
|
174
|
-
if (live) {
|
|
175
|
-
live.liveText = l.text;
|
|
176
|
-
live.usage = l.usage;
|
|
177
|
-
live.model = l.model;
|
|
178
|
-
}
|
|
179
|
-
emitProgress();
|
|
180
|
-
});
|
|
421
|
+
const r = await runOne(phase.agent ?? defaultAgent(deps), fullTask, liveSink(state, phase.id, emitProgress));
|
|
181
422
|
const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
|
|
182
423
|
if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
|
|
183
424
|
return ps;
|
|
184
425
|
}
|
|
185
426
|
|
|
186
427
|
if (type === "parallel") {
|
|
187
|
-
const
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
428
|
+
const branches = (phase.branches ?? []).map((b) => {
|
|
429
|
+
const r = interpolate(b.task, ctx);
|
|
430
|
+
return {
|
|
431
|
+
agent: b.agent ?? phase.agent ?? defaultAgent(deps),
|
|
432
|
+
task: preRead + r.text,
|
|
433
|
+
};
|
|
434
|
+
});
|
|
192
435
|
const inputHash = hashInput(phase.id, JSON.stringify(branches));
|
|
193
436
|
const cached = cachedPhase(prior, inputHash);
|
|
194
437
|
if (cached) return cached;
|
|
@@ -198,7 +441,6 @@ async function executePhase(
|
|
|
198
441
|
}
|
|
199
442
|
|
|
200
443
|
if (type === "map") {
|
|
201
|
-
const ctx = buildInterpolationContext(state, previousOutput);
|
|
202
444
|
const overResolved = interpolate(phase.over ?? "", ctx).text;
|
|
203
445
|
// `over` may itself be a placeholder that resolved to a JSON string.
|
|
204
446
|
const arr = coerceArray(safeParse(overResolved)) ?? coerceArray(directRef(phase.over ?? "", state));
|
|
@@ -217,7 +459,7 @@ async function executePhase(
|
|
|
217
459
|
const localCtx = buildInterpolationContext(state, previousOutput, { [loopVar]: item });
|
|
218
460
|
return {
|
|
219
461
|
agent: phase.agent ?? defaultAgent(deps),
|
|
220
|
-
task: interpolate(phase.task ?? "", localCtx).text,
|
|
462
|
+
task: preRead + interpolate(phase.task ?? "", localCtx).text,
|
|
221
463
|
};
|
|
222
464
|
});
|
|
223
465
|
const inputHash = hashInput(phase.id, JSON.stringify(tasks));
|
|
@@ -228,24 +470,123 @@ async function executePhase(
|
|
|
228
470
|
return mergePhaseState(phase.id, results, inputHash, parseJson);
|
|
229
471
|
}
|
|
230
472
|
|
|
231
|
-
if (type === "
|
|
473
|
+
if (type === "approval") {
|
|
232
474
|
const ctx = buildInterpolationContext(state, previousOutput);
|
|
233
|
-
|
|
234
|
-
const
|
|
235
|
-
const
|
|
475
|
+
const message = interpolate(phase.task ?? "Approve to continue?", ctx).text;
|
|
476
|
+
const inputHash = hashInput(phase.id, "approval", message);
|
|
477
|
+
const cached = cachedPhase(prior, inputHash);
|
|
478
|
+
if (cached) return cached;
|
|
479
|
+
|
|
480
|
+
// Non-interactive (headless/CI/tests): auto-approve, fail-open, but record it.
|
|
481
|
+
if (!deps.requestApproval) {
|
|
482
|
+
return {
|
|
483
|
+
id: phase.id,
|
|
484
|
+
status: "done",
|
|
485
|
+
output: "(auto-approved: no interactive approver available)",
|
|
486
|
+
approval: { decision: "approve", auto: true },
|
|
487
|
+
usage: emptyUsage(),
|
|
488
|
+
inputHash,
|
|
489
|
+
endedAt: Date.now(),
|
|
490
|
+
};
|
|
491
|
+
}
|
|
492
|
+
const decision = await deps.requestApproval({ phaseId: phase.id, message, upstream: previousOutput });
|
|
493
|
+
const note = decision.note?.trim();
|
|
494
|
+
const ps: PhaseState = {
|
|
495
|
+
id: phase.id,
|
|
496
|
+
status: "done",
|
|
497
|
+
output: note || `(${decision.decision})`,
|
|
498
|
+
approval: { decision: decision.decision, note },
|
|
499
|
+
usage: emptyUsage(),
|
|
500
|
+
inputHash,
|
|
501
|
+
endedAt: Date.now(),
|
|
502
|
+
};
|
|
503
|
+
// A rejection halts the flow via the same mechanism as a blocking gate.
|
|
504
|
+
if (decision.decision === "reject") {
|
|
505
|
+
ps.gate = { verdict: "block", reason: note || "Rejected by user" };
|
|
506
|
+
}
|
|
507
|
+
return ps;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
if (type === "flow") {
|
|
511
|
+
const ctx = buildInterpolationContext(state, previousOutput);
|
|
512
|
+
const name = phase.use;
|
|
513
|
+
if (!name) return failPhase(phase.id, `flow phase '${phase.id}' requires 'use'`);
|
|
514
|
+
if (!deps.loadFlow) return failPhase(phase.id, `flow phase '${phase.id}': no sub-flow loader available`);
|
|
515
|
+
const subDef = deps.loadFlow(name);
|
|
516
|
+
if (!subDef) return failPhase(phase.id, `flow phase '${phase.id}': saved flow not found: '${name}'`);
|
|
517
|
+
const stack = deps._stack ?? [];
|
|
518
|
+
if (name === state.flowName || stack.includes(name)) {
|
|
519
|
+
return failPhase(phase.id, `flow phase '${phase.id}': recursive sub-flow ${[...stack, state.flowName, name].join(" -> ")}`);
|
|
520
|
+
}
|
|
521
|
+
// Resolve sub-flow args (interpolate string values), then apply declared defaults.
|
|
522
|
+
const provided: Record<string, unknown> = {};
|
|
523
|
+
for (const [k, v] of Object.entries(phase.with ?? {})) {
|
|
524
|
+
provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
|
|
525
|
+
}
|
|
526
|
+
const subArgs = resolveArgs(subDef, provided);
|
|
527
|
+
const inputHash = hashInput(phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs));
|
|
236
528
|
const cached = cachedPhase(prior, inputHash);
|
|
237
529
|
if (cached) return cached;
|
|
238
530
|
|
|
239
531
|
const live = state.phases[phase.id];
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
532
|
+
// Sub-flows enforce their own budget; if they declare none, inherit the
|
|
533
|
+
// parent cap as a soft per-flow ceiling (best-effort — spend does not cross
|
|
534
|
+
// flow boundaries, so the parent's already-spent total is not subtracted).
|
|
535
|
+
const subDefEffective = subDef.budget || !state.def.budget ? subDef : { ...subDef, budget: state.def.budget };
|
|
536
|
+
const subState: RunState = {
|
|
537
|
+
runId: newRunId(subDef.name),
|
|
538
|
+
flowName: subDef.name,
|
|
539
|
+
def: subDefEffective,
|
|
540
|
+
args: subArgs,
|
|
541
|
+
status: "running",
|
|
542
|
+
phases: {},
|
|
543
|
+
createdAt: Date.now(),
|
|
544
|
+
updatedAt: Date.now(),
|
|
545
|
+
cwd: phase.cwd ?? deps.cwd,
|
|
546
|
+
};
|
|
547
|
+
// B8: pass this flow phase's preRead content to every sub-flow phase by
|
|
548
|
+
// wrapping runTask — sub-phase preRead still gets prepended on top of it.
|
|
549
|
+
const baseRunTask = deps.runTask ?? runAgentTask;
|
|
550
|
+
const subRunTask: typeof runAgentTask = (cwd, agents, agentName, subTask, opts, globalThinking) =>
|
|
551
|
+
baseRunTask(cwd, agents, agentName, preRead + subTask, opts, globalThinking);
|
|
552
|
+
const subResult = await executeTaskflow(subState, {
|
|
553
|
+
...deps,
|
|
554
|
+
runTask: subRunTask,
|
|
555
|
+
_stack: [...stack, state.flowName],
|
|
556
|
+
persist: undefined,
|
|
557
|
+
onProgress: () => {
|
|
558
|
+
if (live) {
|
|
559
|
+
const ph = Object.values(subState.phases);
|
|
560
|
+
live.subProgress = {
|
|
561
|
+
done: ph.filter((p) => p.status === "done").length,
|
|
562
|
+
total: subDef.phases.length,
|
|
563
|
+
running: ph.filter((p) => p.status === "running").length,
|
|
564
|
+
failed: ph.filter((p) => p.status === "failed").length,
|
|
565
|
+
};
|
|
566
|
+
const cur = ph.find((p) => p.status === "running");
|
|
567
|
+
if (cur) live.liveText = `↳ ${cur.id}${cur.liveText ? `: ${cur.liveText}` : ""}`;
|
|
568
|
+
live.usage = aggregateUsage(ph.map((p) => p.usage ?? emptyUsage()));
|
|
569
|
+
}
|
|
570
|
+
emitProgress();
|
|
571
|
+
},
|
|
247
572
|
});
|
|
248
|
-
|
|
573
|
+
const sp = Object.values(subState.phases);
|
|
574
|
+
return {
|
|
575
|
+
id: phase.id,
|
|
576
|
+
status: subResult.ok ? "done" : "failed",
|
|
577
|
+
output: subResult.finalOutput,
|
|
578
|
+
json: parseJson ? safeParse(subResult.finalOutput) : undefined,
|
|
579
|
+
usage: subResult.totalUsage,
|
|
580
|
+
subProgress: {
|
|
581
|
+
done: sp.filter((p) => p.status === "done").length,
|
|
582
|
+
total: subDef.phases.length,
|
|
583
|
+
running: 0,
|
|
584
|
+
failed: sp.filter((p) => p.status === "failed").length,
|
|
585
|
+
},
|
|
586
|
+
error: subResult.ok ? undefined : `sub-flow '${name}' ${subResult.state.status}`,
|
|
587
|
+
inputHash,
|
|
588
|
+
endedAt: Date.now(),
|
|
589
|
+
};
|
|
249
590
|
}
|
|
250
591
|
|
|
251
592
|
return {
|
|
@@ -259,7 +600,7 @@ async function executePhase(
|
|
|
259
600
|
|
|
260
601
|
/** Resolve a `{steps.x.json}`-style ref directly to its parsed value (bypassing stringify). */
|
|
261
602
|
function directRef(over: string, state: RunState): unknown {
|
|
262
|
-
const m = over.match(/^\{steps\.([a-zA-Z0-9_]+)\.(output|json)(?:\.([a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)*))?\}$/);
|
|
603
|
+
const m = over.match(/^\{steps\.([a-zA-Z0-9_-]+)\.(output|json)(?:\.([a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*))?\}$/);
|
|
263
604
|
if (!m) return undefined;
|
|
264
605
|
const step = state.phases[m[1]];
|
|
265
606
|
if (!step || step.status !== "done") return undefined;
|
|
@@ -329,6 +670,29 @@ function asReason(v: unknown): string | undefined {
|
|
|
329
670
|
* Execute a full taskflow. Mutates and persists `state` as it progresses.
|
|
330
671
|
*/
|
|
331
672
|
export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
|
|
673
|
+
const def: Taskflow = state.def;
|
|
674
|
+
try {
|
|
675
|
+
return await runTaskflowLayers(state, deps);
|
|
676
|
+
} catch (e) {
|
|
677
|
+
// A thrown phase must not leave the run wedged in "running" (which breaks
|
|
678
|
+
// resume). Mark any in-flight phase + the run as failed, persist, and return.
|
|
679
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
680
|
+
for (const p of Object.values(state.phases)) {
|
|
681
|
+
if (p.status === "running") {
|
|
682
|
+
p.status = "failed";
|
|
683
|
+
p.error = p.error ?? message;
|
|
684
|
+
p.endedAt = Date.now();
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
state.status = "failed";
|
|
688
|
+
deps.persist?.(state);
|
|
689
|
+
deps.onProgress?.(state);
|
|
690
|
+
const totalUsage = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
|
|
691
|
+
return { state, finalOutput: `Taskflow '${def.name}' crashed: ${message}`, ok: false, totalUsage };
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
|
|
332
696
|
const def: Taskflow = state.def;
|
|
333
697
|
const layers = topoLayers(def.phases);
|
|
334
698
|
|
|
@@ -340,6 +704,14 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
|
|
|
340
704
|
let gateBlocked = false;
|
|
341
705
|
let gateReason = "";
|
|
342
706
|
let gateOutput = "";
|
|
707
|
+
// `budgetBlocked` gates the skipping of remaining phases once the cap is hit.
|
|
708
|
+
// `budgetSkipped` records that a phase was *actually* skipped/truncated for
|
|
709
|
+
// budget — only then is the run terminal-status "blocked" (a cap crossed by the
|
|
710
|
+
// very last phase, with nothing left to skip, must NOT mark a good run failed).
|
|
711
|
+
let budgetBlocked = false;
|
|
712
|
+
let budgetSkipped = false;
|
|
713
|
+
let budgetReason = "";
|
|
714
|
+
const byId = new Map(def.phases.map((p) => [p.id, p]));
|
|
343
715
|
|
|
344
716
|
for (const layer of layers) {
|
|
345
717
|
if (deps.signal?.aborted) {
|
|
@@ -351,13 +723,36 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
|
|
|
351
723
|
await mapWithConcurrencyLimit(layer, layerConcurrency, async (phase) => {
|
|
352
724
|
// Snapshot prior state BEFORE marking running, so resume cache checks work.
|
|
353
725
|
const prior = state.phases[phase.id];
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
726
|
+
|
|
727
|
+
// Determine whether this phase should run, or be skipped (and why).
|
|
728
|
+
const deps_ = dependenciesOf(phase);
|
|
729
|
+
const join = phase.join ?? "all";
|
|
730
|
+
// An `optional` dependency that failed still counts as satisfied.
|
|
731
|
+
const depOk = (d: string): boolean => {
|
|
732
|
+
const s = state.phases[d]?.status;
|
|
733
|
+
if (s === "done") return true;
|
|
734
|
+
if (s === "failed" && byId.get(d)?.optional) return true;
|
|
735
|
+
return false;
|
|
736
|
+
};
|
|
737
|
+
const depsSatisfied =
|
|
738
|
+
deps_.length === 0 ? true : join === "any" ? deps_.some(depOk) : deps_.every(depOk);
|
|
739
|
+
|
|
740
|
+
let skipReason: string | undefined;
|
|
741
|
+
if (gateBlocked) skipReason = `Gate blocked${gateReason ? `: ${gateReason}` : ""}`;
|
|
742
|
+
else if (budgetBlocked) skipReason = `Budget exceeded${budgetReason ? `: ${budgetReason}` : ""}`;
|
|
743
|
+
else if (!depsSatisfied)
|
|
744
|
+
skipReason = join === "any" ? "All dependencies failed or were skipped" : "Upstream dependency not satisfied";
|
|
745
|
+
else if (phase.when !== undefined) {
|
|
746
|
+
const condCtx = buildInterpolationContext(state, lastCompletedOutput(state, phase));
|
|
747
|
+
if (!evaluateCondition(phase.when, condCtx)) skipReason = `Condition not met: ${phase.when}`;
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
if (skipReason) {
|
|
751
|
+
if (skipReason.startsWith("Budget exceeded")) budgetSkipped = true;
|
|
357
752
|
state.phases[phase.id] = {
|
|
358
753
|
id: phase.id,
|
|
359
754
|
status: "skipped",
|
|
360
|
-
error:
|
|
755
|
+
error: skipReason,
|
|
361
756
|
endedAt: Date.now(),
|
|
362
757
|
usage: emptyUsage(),
|
|
363
758
|
};
|
|
@@ -379,27 +774,58 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
|
|
|
379
774
|
// Preserve the phase start time: executePhase returns a fresh PhaseState
|
|
380
775
|
// that omits startedAt (cached/resumed results carry their own).
|
|
381
776
|
state.phases[phase.id] = ps.startedAt ? ps : { ...ps, startedAt };
|
|
382
|
-
|
|
777
|
+
// A blocking verdict (gate phase OR a rejected approval) halts the flow.
|
|
778
|
+
const ptype = phase.type ?? "agent";
|
|
779
|
+
if (ps.gate?.verdict === "block" && (ptype === "gate" || ptype === "approval")) {
|
|
383
780
|
gateBlocked = true;
|
|
384
781
|
gateReason = ps.gate.reason ?? "";
|
|
385
782
|
gateOutput = ps.output ?? "";
|
|
386
783
|
}
|
|
784
|
+
// A fan-out cut short by the cap is itself a budget skip.
|
|
785
|
+
if (ps.budgetTruncated) {
|
|
786
|
+
budgetBlocked = true;
|
|
787
|
+
budgetSkipped = true;
|
|
788
|
+
if (!budgetReason) budgetReason = "fan-out truncated by budget";
|
|
789
|
+
}
|
|
790
|
+
// Budget ceiling: once exceeded, remaining phases are skipped.
|
|
791
|
+
const ob = overBudget(state);
|
|
792
|
+
if (ob.over && !budgetBlocked) {
|
|
793
|
+
budgetBlocked = true;
|
|
794
|
+
budgetReason = ob.reason;
|
|
795
|
+
}
|
|
387
796
|
deps.persist?.(state);
|
|
388
797
|
deps.onProgress?.(state);
|
|
389
798
|
});
|
|
390
799
|
}
|
|
391
800
|
|
|
392
801
|
const fp = finalPhase(def.phases);
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
802
|
+
let finalState = state.phases[fp.id];
|
|
803
|
+
// If the designated final phase produced no output (skipped/blocked), fall
|
|
804
|
+
// back to the last phase (in definition order) that actually completed.
|
|
805
|
+
if (!finalState || finalState.status !== "done") {
|
|
806
|
+
const doneInOrder = def.phases.map((p) => state.phases[p.id]).filter((p) => p?.status === "done");
|
|
807
|
+
if (doneInOrder.length) finalState = doneInOrder[doneInOrder.length - 1];
|
|
808
|
+
}
|
|
809
|
+
// A failed non-optional phase fails the run; optional failures are tolerated.
|
|
810
|
+
const anyFailed = Object.entries(state.phases).some(
|
|
811
|
+
([id, p]) => p.status === "failed" && !byId.get(id)?.optional,
|
|
812
|
+
);
|
|
813
|
+
|
|
814
|
+
state.status = aborted
|
|
815
|
+
? "paused"
|
|
816
|
+
: gateBlocked || budgetSkipped
|
|
817
|
+
? "blocked"
|
|
818
|
+
: anyFailed
|
|
819
|
+
? "failed"
|
|
820
|
+
: "completed";
|
|
397
821
|
deps.persist?.(state);
|
|
398
822
|
deps.onProgress?.(state);
|
|
399
823
|
|
|
400
824
|
let finalOutput = finalState?.output ?? "(no output)";
|
|
401
|
-
if (gateBlocked
|
|
825
|
+
if (gateBlocked) {
|
|
402
826
|
finalOutput = `Gate blocked the workflow.${gateReason ? `\nReason: ${gateReason}` : ""}${gateOutput ? `\n\n${gateOutput}` : ""}`;
|
|
827
|
+
} else if (budgetSkipped) {
|
|
828
|
+
finalOutput = `Budget exceeded — run halted.${budgetReason ? `\nReason: ${budgetReason}` : ""}${finalState?.output ? `\n\n${finalState.output}` : ""}`;
|
|
403
829
|
}
|
|
404
830
|
|
|
405
831
|
const totalUsage = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
|