pi-taskflow 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/examples/conditional-research.json +1 -1
- package/examples/guarded-refactor.json +2 -2
- package/extensions/agents.ts +54 -34
- package/extensions/index.ts +7 -4
- package/extensions/interpolate.ts +24 -3
- package/extensions/render.ts +7 -3
- package/extensions/runner.ts +35 -17
- package/extensions/runs-view.ts +3 -0
- package/extensions/runtime.ts +97 -15
- package/extensions/schema.ts +22 -5
- package/extensions/store.ts +30 -6
- package/package.json +1 -1
- package/skills/taskflow/SKILL.md +42 -10
|
@@ -47,7 +47,7 @@
|
|
|
47
47
|
"id": "report",
|
|
48
48
|
"type": "reduce",
|
|
49
49
|
"from": ["review"],
|
|
50
|
-
"dependsOn": ["review"],
|
|
50
|
+
"dependsOn": ["review", "deep", "quick"],
|
|
51
51
|
"agent": "doc-writer",
|
|
52
52
|
"task": "Write a clean markdown brief on \"{args.topic}\" from the validated research:\n\n{steps.deep.output}{steps.quick.output}",
|
|
53
53
|
"final": true
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"id": "implement",
|
|
27
27
|
"type": "agent",
|
|
28
28
|
"agent": "executor_code",
|
|
29
|
-
"dependsOn": ["approve"],
|
|
29
|
+
"dependsOn": ["approve", "plan"],
|
|
30
30
|
"task": "Implement the approved plan for {args.target}.\nPlan:\n{steps.plan.output}\nExtra human guidance (if any):\n{steps.approve.output}",
|
|
31
31
|
"retry": { "max": 1, "backoffMs": 1000 }
|
|
32
32
|
},
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
"id": "summary",
|
|
42
42
|
"type": "reduce",
|
|
43
43
|
"from": ["review"],
|
|
44
|
-
"dependsOn": ["review"],
|
|
44
|
+
"dependsOn": ["review", "implement"],
|
|
45
45
|
"agent": "doc-writer",
|
|
46
46
|
"task": "Write a short changelog entry summarizing what was done:\n\n{steps.implement.output}",
|
|
47
47
|
"final": true
|
package/extensions/agents.ts
CHANGED
|
@@ -44,42 +44,56 @@ function loadAgentsFromDir(dir: string, source: "user" | "project"): AgentConfig
|
|
|
44
44
|
}
|
|
45
45
|
|
|
46
46
|
for (const entry of entries) {
|
|
47
|
-
if (!entry.name.endsWith(".md")) continue;
|
|
48
|
-
if (!entry.isFile() && !entry.isSymbolicLink()) continue;
|
|
49
|
-
|
|
50
|
-
const filePath = path.join(dir, entry.name);
|
|
51
|
-
let content: string;
|
|
52
47
|
try {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
continue;
|
|
56
|
-
}
|
|
48
|
+
if (!entry.name.endsWith(".md")) continue;
|
|
49
|
+
if (!entry.isFile() && !entry.isSymbolicLink()) continue;
|
|
57
50
|
|
|
58
|
-
|
|
51
|
+
const filePath = path.join(dir, entry.name);
|
|
52
|
+
let content: string;
|
|
59
53
|
try {
|
|
60
|
-
|
|
54
|
+
content = fs.readFileSync(filePath, "utf-8");
|
|
61
55
|
} catch {
|
|
62
|
-
|
|
63
|
-
return { frontmatter: {} as Record<string, string>, body: "" };
|
|
56
|
+
continue;
|
|
64
57
|
}
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
name
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
58
|
+
|
|
59
|
+
const { frontmatter, body } = (() => {
|
|
60
|
+
try {
|
|
61
|
+
return parseFrontmatter<Record<string, unknown>>(content);
|
|
62
|
+
} catch {
|
|
63
|
+
// A single malformed agent file must not break discovery for every flow.
|
|
64
|
+
return { frontmatter: {} as Record<string, unknown>, body: "" };
|
|
65
|
+
}
|
|
66
|
+
})();
|
|
67
|
+
if (!frontmatter.name || !frontmatter.description) continue;
|
|
68
|
+
|
|
69
|
+
// frontmatter is YAML-parsed: tools may be a comma-separated string ("a, b")
|
|
70
|
+
// OR a YAML sequence ([a, b]). Handle both forms.
|
|
71
|
+
const rawTools = frontmatter.tools;
|
|
72
|
+
const tools: string[] | undefined = Array.isArray(rawTools)
|
|
73
|
+
? rawTools.map((t) => String(t).trim()).filter(Boolean)
|
|
74
|
+
: rawTools !== undefined && rawTools !== null
|
|
75
|
+
? String(rawTools)
|
|
76
|
+
.split(",")
|
|
77
|
+
.map((t) => t.trim())
|
|
78
|
+
.filter(Boolean)
|
|
79
|
+
: undefined;
|
|
80
|
+
|
|
81
|
+
agents.push({
|
|
82
|
+
name: String(frontmatter.name),
|
|
83
|
+
description: String(frontmatter.description),
|
|
84
|
+
tools: tools && tools.length > 0 ? tools : undefined,
|
|
85
|
+
model: frontmatter.model === undefined ? undefined : String(frontmatter.model),
|
|
86
|
+
thinking: frontmatter.thinking === undefined ? undefined : String(frontmatter.thinking),
|
|
87
|
+
systemPrompt: body,
|
|
88
|
+
source,
|
|
89
|
+
filePath,
|
|
90
|
+
});
|
|
91
|
+
} catch {
|
|
92
|
+
// Defense-in-depth: a single bad agent file must not break discovery
|
|
93
|
+
// for the entire flow (e.g. exotic YAML shapes, runtime errors in
|
|
94
|
+
// field access, symlink races, etc.).
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
83
97
|
}
|
|
84
98
|
return agents;
|
|
85
99
|
}
|
|
@@ -128,9 +142,15 @@ export function discoverAgents(
|
|
|
128
142
|
for (const [name, override] of Object.entries(overrides)) {
|
|
129
143
|
const agent = agentMap.get(name);
|
|
130
144
|
if (agent) {
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
145
|
+
// Clone before mutating: agentMap owns the original AgentConfig
|
|
146
|
+
// (loaded from disk in loadAgentsFromDir). Mutating it in place
|
|
147
|
+
// would cause cross-contamination for any caller that retains a
|
|
148
|
+
// reference and invokes discoverAgents again with different overrides.
|
|
149
|
+
const mutated: AgentConfig = { ...agent };
|
|
150
|
+
if (override.model !== undefined) mutated.model = override.model;
|
|
151
|
+
if (override.thinking !== undefined) mutated.thinking = override.thinking;
|
|
152
|
+
if (override.tools !== undefined) mutated.tools = override.tools;
|
|
153
|
+
agentMap.set(name, mutated);
|
|
134
154
|
}
|
|
135
155
|
}
|
|
136
156
|
}
|
package/extensions/index.ts
CHANGED
|
@@ -108,10 +108,6 @@ async function runFlow(
|
|
|
108
108
|
onUpdate: ((p: AgentToolResult<TaskflowDetails>) => void) | undefined,
|
|
109
109
|
existing?: RunState,
|
|
110
110
|
): Promise<RuntimeResult> {
|
|
111
|
-
const settings = readSubagentSettings();
|
|
112
|
-
const scope: AgentScope = def.agentScope ?? "user";
|
|
113
|
-
const { agents } = discoverAgents(ctx.cwd, scope, settings.agentOverrides);
|
|
114
|
-
|
|
115
111
|
const state = existing ?? makeRunState(def, args, ctx.cwd);
|
|
116
112
|
|
|
117
113
|
const emit = (s: RunState, finalOutput?: string) => {
|
|
@@ -166,6 +162,13 @@ async function runFlow(
|
|
|
166
162
|
: undefined;
|
|
167
163
|
|
|
168
164
|
try {
|
|
165
|
+
// Discover settings/agents inside try so a YAML/IO crash in
|
|
166
|
+
// discoverAgents or readSubagentSettings (F-001) is caught and
|
|
167
|
+
// the heartbeat timer is cleared by the finally block below.
|
|
168
|
+
const settings = readSubagentSettings();
|
|
169
|
+
const scope: AgentScope = def.agentScope ?? "user";
|
|
170
|
+
const { agents } = discoverAgents(ctx.cwd, scope, settings.agentOverrides);
|
|
171
|
+
|
|
169
172
|
const result = await executeTaskflow(state, {
|
|
170
173
|
cwd: ctx.cwd,
|
|
171
174
|
agents,
|
|
@@ -27,10 +27,13 @@ export interface InterpolationResult {
|
|
|
27
27
|
missing: string[];
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
export function interpolate(
|
|
30
|
+
export function interpolate(
|
|
31
|
+
template: string | null | undefined,
|
|
32
|
+
ctx: InterpolationContext,
|
|
33
|
+
): InterpolationResult {
|
|
31
34
|
const missing: string[] = [];
|
|
32
35
|
|
|
33
|
-
const text = template.replace(PLACEHOLDER, (whole, path: string) => {
|
|
36
|
+
const text = String(template ?? "").replace(PLACEHOLDER, (whole, path: string) => {
|
|
34
37
|
const value = resolvePath(path, ctx);
|
|
35
38
|
if (value === undefined) {
|
|
36
39
|
missing.push(path);
|
|
@@ -134,6 +137,24 @@ export function safeParse(text: string): unknown {
|
|
|
134
137
|
}
|
|
135
138
|
}
|
|
136
139
|
}
|
|
140
|
+
// Anti-pattern detection (v0.0.8.1): array followed by a stray top-level
|
|
141
|
+
// "key": value. A common LLM mistake — the model appends
|
|
142
|
+
// `"deferred": [...]` after a JSON array, producing a non-JSON hybrid that
|
|
143
|
+
// none of the above strategies can recover. We surface a diagnostic hint
|
|
144
|
+
// so flow authors can spot the bug fast.
|
|
145
|
+
//
|
|
146
|
+
// We check the original (trimmed) input rather than the slice tail,
|
|
147
|
+
// because `lastIndexOf(close)` lands on the *last* bracket — for the
|
|
148
|
+
// anti-pattern the stray key is between the array's `]` and the trailing
|
|
149
|
+
// `]`, not after the last one.
|
|
150
|
+
if (/]\s*[\},]?\s*"[^"\n]+"\s*:/.test(trimmed)) {
|
|
151
|
+
console.warn(
|
|
152
|
+
"[pi-taskflow safeParse] input looks like a JSON array followed by a stray top-level key " +
|
|
153
|
+
`(pattern: [{...}], "key": ...). This is not valid JSON. ` +
|
|
154
|
+
`Hint: put extra data as array members (e.g. {"id":"D-001","status":"deferred",...}) ` +
|
|
155
|
+
`or split into a separate phase.`,
|
|
156
|
+
);
|
|
157
|
+
}
|
|
137
158
|
return undefined;
|
|
138
159
|
}
|
|
139
160
|
|
|
@@ -142,7 +163,7 @@ export function coerceArray(value: unknown): unknown[] | null {
|
|
|
142
163
|
if (Array.isArray(value)) return value;
|
|
143
164
|
if (value && typeof value === "object") {
|
|
144
165
|
// {items: [...]} or {results: [...]} convenience
|
|
145
|
-
for (const key of ["items", "results", "list", "data"]) {
|
|
166
|
+
for (const key of ["items", "results", "list", "data", "findings"]) {
|
|
146
167
|
const v = (value as Record<string, unknown>)[key];
|
|
147
168
|
if (Array.isArray(v)) return v;
|
|
148
169
|
}
|
package/extensions/render.ts
CHANGED
|
@@ -71,7 +71,10 @@ function agentRole(phase: Phase, ps: PhaseState | undefined, theme: Theme): stri
|
|
|
71
71
|
|
|
72
72
|
function costStr(usage: UsageStats | undefined, theme: Theme): string {
|
|
73
73
|
if (!usage?.cost) return "";
|
|
74
|
-
|
|
74
|
+
const c = usage.cost;
|
|
75
|
+
return c >= 0.01
|
|
76
|
+
? theme.fg("muted", `$${c.toFixed(2)}`)
|
|
77
|
+
: theme.fg("muted", `$${c.toFixed(4)}`);
|
|
75
78
|
}
|
|
76
79
|
|
|
77
80
|
function aggregateCost(state: RunState): number {
|
|
@@ -174,6 +177,7 @@ function phaseDetail(phase: Phase, ps: PhaseState | undefined, theme: Theme): st
|
|
|
174
177
|
const color = d === "reject" ? "error" : d === "edit" ? "warning" : "success";
|
|
175
178
|
let a = theme.fg("warning", "⚠") + " " + theme.fg(color as Parameters<typeof theme.fg>[0], theme.bold(d.toUpperCase()));
|
|
176
179
|
if (ps.approval.auto) a += theme.fg("dim", " auto");
|
|
180
|
+
if (cost) a += ` ${cost}`;
|
|
177
181
|
if (time) a += ` ${time}`;
|
|
178
182
|
if (ps.warnings?.length) a += theme.fg("warning", ` ⚠${ps.warnings.length}`);
|
|
179
183
|
return a;
|
|
@@ -228,8 +232,8 @@ function headerLine(state: RunState, theme: Theme): string {
|
|
|
228
232
|
if (state.status === "blocked") line += theme.fg("error", " · blocked");
|
|
229
233
|
const cost = aggregateCost(state);
|
|
230
234
|
const budget = state.def.budget;
|
|
231
|
-
if (budget?.maxUSD !== undefined) line += theme.fg("muted", ` · $${cost.toFixed(
|
|
232
|
-
else if (cost) line += theme.fg("muted", ` · $${cost.toFixed(
|
|
235
|
+
if (budget?.maxUSD !== undefined) line += theme.fg("muted", ` · $${cost >= 0.01 ? cost.toFixed(2) : cost.toFixed(4)}/$${budget.maxUSD}`);
|
|
236
|
+
else if (cost) line += theme.fg("muted", ` · $${cost >= 0.01 ? cost.toFixed(2) : cost.toFixed(4)}`);
|
|
233
237
|
const el = runElapsed(state);
|
|
234
238
|
if (el) line += theme.fg("dim", ` · ${elapsed(el)}`);
|
|
235
239
|
return line;
|
package/extensions/runner.ts
CHANGED
|
@@ -203,14 +203,10 @@ function summarizeToolCall(name: string, args: Record<string, unknown>): string
|
|
|
203
203
|
}
|
|
204
204
|
}
|
|
205
205
|
|
|
206
|
-
async function writePromptToTempFile(
|
|
207
|
-
const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
|
|
208
|
-
const safeName = agentName.replace(/[^\w.-]+/g, "_");
|
|
209
|
-
const filePath = path.join(tmpDir, `prompt-${safeName}.md`);
|
|
206
|
+
async function writePromptToTempFile(filePath: string, prompt: string): Promise<void> {
|
|
210
207
|
await withFileMutationQueue(filePath, async () => {
|
|
211
208
|
await fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 });
|
|
212
209
|
});
|
|
213
|
-
return { dir: tmpDir, filePath };
|
|
214
210
|
}
|
|
215
211
|
|
|
216
212
|
function getPiInvocation(args: string[]): { command: string; args: string[] } {
|
|
@@ -284,9 +280,13 @@ export async function runAgentTask(
|
|
|
284
280
|
|
|
285
281
|
try {
|
|
286
282
|
if (agent.systemPrompt.trim()) {
|
|
287
|
-
|
|
288
|
-
tmpPromptDir
|
|
289
|
-
|
|
283
|
+
// Allocate the temp dir + path BEFORE any fallible I/O so that if
|
|
284
|
+
// writeFile throws, tmpPromptDir/tmpPromptPath are already set and
|
|
285
|
+
// the finally block can clean up the directory (F-004).
|
|
286
|
+
tmpPromptDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
|
|
287
|
+
const safeName = agent.name.replace(/[^\w.-]+/g, "_");
|
|
288
|
+
tmpPromptPath = path.join(tmpPromptDir, `prompt-${safeName}.md`);
|
|
289
|
+
await writePromptToTempFile(tmpPromptPath, agent.systemPrompt);
|
|
290
290
|
args.push("--append-system-prompt", tmpPromptPath);
|
|
291
291
|
}
|
|
292
292
|
args.push(`Task: ${task}`);
|
|
@@ -319,15 +319,25 @@ export async function runAgentTask(
|
|
|
319
319
|
if (buffer.trim()) processLine(buffer);
|
|
320
320
|
resolve(code ?? 0);
|
|
321
321
|
});
|
|
322
|
-
proc.on("error", () =>
|
|
322
|
+
proc.on("error", (err) => {
|
|
323
|
+
if (!result.stderr) result.stderr = err.message;
|
|
324
|
+
if (!result.errorMessage) result.errorMessage = err.message;
|
|
325
|
+
resolve(1);
|
|
326
|
+
});
|
|
323
327
|
|
|
324
328
|
if (opts.signal) {
|
|
325
329
|
const kill = () => {
|
|
326
330
|
wasAborted = true;
|
|
327
331
|
proc.kill("SIGTERM");
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
332
|
+
// Force-kill fallback. proc.kill("SIGKILL") is idempotent if
|
|
333
|
+
// the process already exited, and `proc.killed` is set true
|
|
334
|
+
// synchronously by the SIGTERM above — so the previous
|
|
335
|
+
// `if (!proc.killed)` guard would skip SIGKILL entirely,
|
|
336
|
+
// hanging forever on a child that ignores SIGTERM.
|
|
337
|
+
// .unref() keeps the timer from holding the event loop open
|
|
338
|
+
// after the process is gone.
|
|
339
|
+
const forceKill = setTimeout(() => proc.kill("SIGKILL"), 5000);
|
|
340
|
+
forceKill.unref();
|
|
331
341
|
};
|
|
332
342
|
if (opts.signal.aborted) kill();
|
|
333
343
|
else opts.signal.addEventListener("abort", kill, { once: true });
|
|
@@ -349,12 +359,20 @@ export async function runAgentTask(
|
|
|
349
359
|
// `output`: upstream providers (e.g. a Cloudflare challenge page) can
|
|
350
360
|
// surface huge HTML/JSON in errorMessage, and that garbage would
|
|
351
361
|
// otherwise flow into downstream phase interpolations.
|
|
352
|
-
if
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
362
|
+
// Sanitization must run whenever the run failed, even if some output
|
|
363
|
+
// was already emitted (e.g. crash mid-stream with a partial result):
|
|
364
|
+
// an unsanitized errorMessage would still leak into PhaseState and
|
|
365
|
+
// downstream interpolation contexts. (F-013)
|
|
366
|
+
if (isFailed(result)) {
|
|
367
|
+
if (!result.output) {
|
|
368
|
+
result.output = TRANSPORT_ERROR_PLACEHOLDER;
|
|
369
|
+
if (!result.errorMessage) {
|
|
370
|
+
result.errorMessage = result.stderr || `Subagent exited with code ${result.exitCode} (stopReason: ${result.stopReason ?? "unknown"})`;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
if (result.errorMessage) {
|
|
374
|
+
result.errorMessage = sanitizeErrorMessage(result.errorMessage);
|
|
356
375
|
}
|
|
357
|
-
result.errorMessage = sanitizeErrorMessage(result.errorMessage);
|
|
358
376
|
}
|
|
359
377
|
return result;
|
|
360
378
|
} finally {
|
package/extensions/runs-view.ts
CHANGED
|
@@ -50,6 +50,9 @@ export class RunHistoryComponent {
|
|
|
50
50
|
private cachedLines?: string[];
|
|
51
51
|
|
|
52
52
|
constructor(runs: RunState[], theme: Theme, onDone: (result?: RunHistoryResult) => void) {
|
|
53
|
+
if (!runs.length) {
|
|
54
|
+
throw new Error("RunHistoryComponent requires at least one run");
|
|
55
|
+
}
|
|
53
56
|
this.runs = runs;
|
|
54
57
|
this.theme = theme;
|
|
55
58
|
this.onDone = onDone;
|
package/extensions/runtime.ts
CHANGED
|
@@ -551,14 +551,20 @@ async function executePhase(
|
|
|
551
551
|
baseRunTask(cwd, agents, agentName, preRead + subTask, opts, globalThinking);
|
|
552
552
|
const subResult = await executeTaskflow(subState, {
|
|
553
553
|
...deps,
|
|
554
|
+
// Override deps.cwd with the flow phase's own cwd so that sub-flow
|
|
555
|
+
// phases without an explicit cwd derive their subagents from the
|
|
556
|
+
// flow's cwd (not the caller's cwd).
|
|
557
|
+
cwd: phase.cwd ?? deps.cwd,
|
|
554
558
|
runTask: subRunTask,
|
|
555
559
|
_stack: [...stack, state.flowName],
|
|
556
560
|
persist: undefined,
|
|
557
561
|
onProgress: () => {
|
|
558
562
|
if (live) {
|
|
559
563
|
const ph = Object.values(subState.phases);
|
|
564
|
+
// B-F015: `done` must include both success and failure so the
|
|
565
|
+
// renderer's `done - failed` shows the true success count.
|
|
560
566
|
live.subProgress = {
|
|
561
|
-
done: ph.filter((p) => p.status === "done").length,
|
|
567
|
+
done: ph.filter((p) => p.status === "done" || p.status === "failed").length,
|
|
562
568
|
total: subDef.phases.length,
|
|
563
569
|
running: ph.filter((p) => p.status === "running").length,
|
|
564
570
|
failed: ph.filter((p) => p.status === "failed").length,
|
|
@@ -577,8 +583,11 @@ async function executePhase(
|
|
|
577
583
|
output: subResult.finalOutput,
|
|
578
584
|
json: parseJson ? safeParse(subResult.finalOutput) : undefined,
|
|
579
585
|
usage: subResult.totalUsage,
|
|
586
|
+
// B-F015: include failed in `done` so the renderer's
|
|
587
|
+
// `done - failed` formula gives the success count (matches the
|
|
588
|
+
// map/parallel runner's overlapping-counter convention).
|
|
580
589
|
subProgress: {
|
|
581
|
-
done: sp.filter((p) => p.status === "done").length,
|
|
590
|
+
done: sp.filter((p) => p.status === "done" || p.status === "failed").length,
|
|
582
591
|
total: subDef.phases.length,
|
|
583
592
|
running: 0,
|
|
584
593
|
failed: sp.filter((p) => p.status === "failed").length,
|
|
@@ -649,7 +658,10 @@ export function parseGateVerdict(output: string): { verdict: "pass" | "block"; r
|
|
|
649
658
|
if (typeof o.continue === "boolean") return { verdict: o.continue ? "pass" : "block", reason: asReason(o.reason) };
|
|
650
659
|
if (typeof o.pass === "boolean") return { verdict: o.pass ? "pass" : "block", reason: asReason(o.reason) };
|
|
651
660
|
if (typeof o.verdict === "string") {
|
|
652
|
-
|
|
661
|
+
// Note: do NOT include standalone "no" — natural-language verdicts like
|
|
662
|
+
// "No issues found" / "no errors" would otherwise be false-positive BLOCK.
|
|
663
|
+
// Fail-open covers any ambiguous text.
|
|
664
|
+
const block = /block|fail|stop|reject|halt/i.test(o.verdict);
|
|
653
665
|
return { verdict: block ? "block" : "pass", reason: asReason(o.reason) };
|
|
654
666
|
}
|
|
655
667
|
}
|
|
@@ -666,11 +678,86 @@ function asReason(v: unknown): string | undefined {
|
|
|
666
678
|
return typeof v === "string" && v.trim() ? v.trim() : undefined;
|
|
667
679
|
}
|
|
668
680
|
|
|
681
|
+
/**
|
|
682
|
+
* Best-effort invocation of the user-provided `persist` + `onProgress` callbacks.
|
|
683
|
+
*
|
|
684
|
+
* A throw from a host-supplied callback must NEVER replace the runtime's
|
|
685
|
+
* outcome — neither the original crash message in `executeTaskflow`'s catch
|
|
686
|
+
* block, nor the final output of a successful run. Callbacks are observability
|
|
687
|
+
* hooks; the run survives their failure.
|
|
688
|
+
*
|
|
689
|
+
* Used at every "checkpoint" call site (phase start, phase end, terminal state).
|
|
690
|
+
* For high-frequency live updates inside a phase, see `safeProgress` below.
|
|
691
|
+
*/
|
|
692
|
+
function safeEmit(deps: RuntimeDeps, state: RunState): void {
|
|
693
|
+
try {
|
|
694
|
+
deps.persist?.(state);
|
|
695
|
+
} catch {
|
|
696
|
+
// user callback — must not break the run
|
|
697
|
+
}
|
|
698
|
+
try {
|
|
699
|
+
deps.onProgress?.(state);
|
|
700
|
+
} catch {
|
|
701
|
+
// user callback — must not break the run
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
/**
|
|
706
|
+
* Like `safeEmit` but for the high-frequency live-update channel only.
|
|
707
|
+
* Skips `persist` (which is intentionally checkpoint-only) and swallows any
|
|
708
|
+
* throw from the user-supplied `onProgress` so a misbehaving TUI sink cannot
|
|
709
|
+
* disrupt an in-flight phase.
|
|
710
|
+
*/
|
|
711
|
+
function safeProgress(deps: RuntimeDeps, state: RunState): void {
|
|
712
|
+
try {
|
|
713
|
+
deps.onProgress?.(state);
|
|
714
|
+
} catch {
|
|
715
|
+
// user callback — must not break the run
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
|
|
669
719
|
/**
|
|
670
720
|
* Execute a full taskflow. Mutates and persists `state` as it progresses.
|
|
671
721
|
*/
|
|
722
|
+
function ensureImplicitGate(def: Taskflow): void {
|
|
723
|
+
// Respect explicit opt-out
|
|
724
|
+
if ((def as any).implicitGate === false) return;
|
|
725
|
+
|
|
726
|
+
const hasGate = def.phases.some(
|
|
727
|
+
(p) => p.type === "gate" || p.type === "approval" || p.id === "_implicit-gate",
|
|
728
|
+
);
|
|
729
|
+
if (hasGate || def.phases.length === 0) return;
|
|
730
|
+
|
|
731
|
+
// The last existing phase is the effective "final" phase — pin it so the
|
|
732
|
+
// injected gate doesn't become the finalOutput.
|
|
733
|
+
const lastPhase = def.phases[def.phases.length - 1];
|
|
734
|
+
if (!lastPhase.final && !def.phases.some((p) => p.final)) {
|
|
735
|
+
lastPhase.final = true;
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
const allIds = def.phases.map((p) => p.id);
|
|
739
|
+
def.phases.push({
|
|
740
|
+
id: "_implicit-gate",
|
|
741
|
+
type: "gate",
|
|
742
|
+
dependsOn: allIds,
|
|
743
|
+
agent: "reviewer",
|
|
744
|
+
task: `Review all phase outputs from this taskflow for accuracy and consistency.
|
|
745
|
+
|
|
746
|
+
For each upstream phase, scan its output for:
|
|
747
|
+
1. **Factual accuracy**: Any file paths, line numbers, or code snippets that are wrong?
|
|
748
|
+
2. **Internal contradictions**: Do any phases contradict each other?
|
|
749
|
+
3. **Completeness**: Is any output truncated, empty, or anomalously short?
|
|
750
|
+
4. **Hallucination markers**: Wrong file names, impossible line ranges, circular logic, information not in the given context.
|
|
751
|
+
|
|
752
|
+
Output:
|
|
753
|
+
- If ALL outputs look consistent and plausible: output **VERDICT: PASS** with a one-line summary.
|
|
754
|
+
- If ANY issues found: output **VERDICT: BLOCK** listing each issue with the phase ID and specific concern.`,
|
|
755
|
+
});
|
|
756
|
+
}
|
|
757
|
+
|
|
672
758
|
export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
|
|
673
759
|
const def: Taskflow = state.def;
|
|
760
|
+
ensureImplicitGate(def);
|
|
674
761
|
try {
|
|
675
762
|
return await runTaskflowLayers(state, deps);
|
|
676
763
|
} catch (e) {
|
|
@@ -685,8 +772,7 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
|
|
|
685
772
|
}
|
|
686
773
|
}
|
|
687
774
|
state.status = "failed";
|
|
688
|
-
deps
|
|
689
|
-
deps.onProgress?.(state);
|
|
775
|
+
safeEmit(deps, state);
|
|
690
776
|
const totalUsage = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
|
|
691
777
|
return { state, finalOutput: `Taskflow '${def.name}' crashed: ${message}`, ok: false, totalUsage };
|
|
692
778
|
}
|
|
@@ -697,8 +783,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
697
783
|
const layers = topoLayers(def.phases);
|
|
698
784
|
|
|
699
785
|
state.status = "running";
|
|
700
|
-
deps
|
|
701
|
-
deps.onProgress?.(state);
|
|
786
|
+
safeEmit(deps, state);
|
|
702
787
|
|
|
703
788
|
let aborted = false;
|
|
704
789
|
let gateBlocked = false;
|
|
@@ -756,8 +841,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
756
841
|
endedAt: Date.now(),
|
|
757
842
|
usage: emptyUsage(),
|
|
758
843
|
};
|
|
759
|
-
deps
|
|
760
|
-
deps.onProgress?.(state);
|
|
844
|
+
safeEmit(deps, state);
|
|
761
845
|
return;
|
|
762
846
|
}
|
|
763
847
|
|
|
@@ -768,9 +852,9 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
768
852
|
status: "running",
|
|
769
853
|
startedAt,
|
|
770
854
|
};
|
|
771
|
-
deps
|
|
855
|
+
safeProgress(deps, state);
|
|
772
856
|
|
|
773
|
-
const ps = await executePhase(phase, state, deps, prior, () => deps
|
|
857
|
+
const ps = await executePhase(phase, state, deps, prior, () => safeProgress(deps, state));
|
|
774
858
|
// Preserve the phase start time: executePhase returns a fresh PhaseState
|
|
775
859
|
// that omits startedAt (cached/resumed results carry their own).
|
|
776
860
|
state.phases[phase.id] = ps.startedAt ? ps : { ...ps, startedAt };
|
|
@@ -793,8 +877,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
793
877
|
budgetBlocked = true;
|
|
794
878
|
budgetReason = ob.reason;
|
|
795
879
|
}
|
|
796
|
-
deps
|
|
797
|
-
deps.onProgress?.(state);
|
|
880
|
+
safeEmit(deps, state);
|
|
798
881
|
});
|
|
799
882
|
}
|
|
800
883
|
|
|
@@ -818,8 +901,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
818
901
|
: anyFailed
|
|
819
902
|
? "failed"
|
|
820
903
|
: "completed";
|
|
821
|
-
deps
|
|
822
|
-
deps.onProgress?.(state);
|
|
904
|
+
safeEmit(deps, state);
|
|
823
905
|
|
|
824
906
|
let finalOutput = finalState?.output ?? "(no output)";
|
|
825
907
|
if (gateBlocked) {
|
package/extensions/schema.ts
CHANGED
|
@@ -147,6 +147,12 @@ export const TaskflowSchema = Type.Object(
|
|
|
147
147
|
}),
|
|
148
148
|
),
|
|
149
149
|
phases: Type.Array(PhaseSchema, { minItems: 1, description: "Ordered phase definitions (DAG via dependsOn)" }),
|
|
150
|
+
implicitGate: Type.Optional(
|
|
151
|
+
Type.Boolean({
|
|
152
|
+
description: "When true (default), a reviewer gate is auto-injected after all phases if no explicit gate or approval exists",
|
|
153
|
+
default: true,
|
|
154
|
+
}),
|
|
155
|
+
),
|
|
150
156
|
},
|
|
151
157
|
{ additionalProperties: false },
|
|
152
158
|
);
|
|
@@ -184,7 +190,11 @@ export function isShorthand(def: unknown): boolean {
|
|
|
184
190
|
if (typeof def !== "object" || def === null) return false;
|
|
185
191
|
const d = def as Record<string, unknown>;
|
|
186
192
|
if (Array.isArray(d.phases)) return false;
|
|
187
|
-
return
|
|
193
|
+
return (
|
|
194
|
+
(Array.isArray(d.chain) && d.chain.length > 0) ||
|
|
195
|
+
(Array.isArray(d.tasks) && d.tasks.length > 0) ||
|
|
196
|
+
typeof d.task === "string"
|
|
197
|
+
);
|
|
188
198
|
}
|
|
189
199
|
|
|
190
200
|
function readStep(s: unknown): ShorthandStep {
|
|
@@ -355,20 +365,27 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
|
|
|
355
365
|
const finals = (flow.phases as Phase[]).filter((p) => p?.final);
|
|
356
366
|
if (finals.length > 1) errors.push(`Only one phase may be marked 'final' (found ${finals.length})`);
|
|
357
367
|
|
|
358
|
-
// ---
|
|
368
|
+
// --- Hard errors: {steps.X.*} references that aren't declared deps ------
|
|
359
369
|
// Catches the most common authoring mistake: the task talks about
|
|
360
370
|
// `{steps.review.output}` but `dependsOn: ["review"]` is missing, so the
|
|
361
371
|
// phase runs in parallel with `review` and the model sees the literal
|
|
362
|
-
// placeholder string. The runtime can't infer the intent
|
|
372
|
+
// placeholder string. The runtime can't infer the intent — fail fast at
|
|
373
|
+
// validation time so the mistake is caught before the run starts.
|
|
374
|
+
//
|
|
375
|
+
// Phases with `join: "any"` are exempt: by design they only need ONE of
|
|
376
|
+
// their declared deps to complete, and may reference other phases as
|
|
377
|
+
// informational context (not as true dependencies).
|
|
363
378
|
if (errors.length === 0) {
|
|
364
379
|
const idToPhase = new Map((flow.phases as Phase[]).map((p) => [p.id, p]));
|
|
365
380
|
for (const p of flow.phases as Phase[]) {
|
|
366
381
|
if (!p?.id) continue;
|
|
382
|
+
const isJoinAny = p.join === "any";
|
|
383
|
+
if (isJoinAny) continue;
|
|
367
384
|
const deps = new Set(dependenciesOf(p));
|
|
368
385
|
const refs = collectRefs(p);
|
|
369
386
|
for (const ref of refs.steps) {
|
|
370
387
|
if (ref === p.id) {
|
|
371
|
-
|
|
388
|
+
errors.push(`Phase '${p.id}': references its own output via {steps.${ref}.*}; this is almost always a bug.`);
|
|
372
389
|
continue;
|
|
373
390
|
}
|
|
374
391
|
if (!idToPhase.has(ref)) {
|
|
@@ -378,7 +395,7 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
|
|
|
378
395
|
continue;
|
|
379
396
|
}
|
|
380
397
|
if (!deps.has(ref)) {
|
|
381
|
-
|
|
398
|
+
errors.push(
|
|
382
399
|
`Phase '${p.id}': task references {steps.${ref}.*} but '${ref}' is not in dependsOn. ` +
|
|
383
400
|
`The phase will run in parallel with '${ref}' and see the literal placeholder. ` +
|
|
384
401
|
`Add "dependsOn": ["${ref}"] (or include '${ref}' transitively).`,
|
package/extensions/store.ts
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
import * as crypto from "node:crypto";
|
|
10
10
|
import * as fs from "node:fs";
|
|
11
|
+
import * as os from "node:os";
|
|
11
12
|
import * as path from "node:path";
|
|
12
13
|
import { getAgentDir } from "@earendil-works/pi-coding-agent";
|
|
13
14
|
import type { Taskflow } from "./schema.ts";
|
|
@@ -69,12 +70,20 @@ function userFlowsDir(): string {
|
|
|
69
70
|
return path.join(getAgentDir(), "taskflows");
|
|
70
71
|
}
|
|
71
72
|
|
|
72
|
-
function
|
|
73
|
+
function findProjectFlowsDirInternal(cwd: string, create = false): string | null {
|
|
73
74
|
// Prefer an existing .pi dir up the tree; else use cwd/.pi when creating.
|
|
75
|
+
// **Never treat `~/.pi/` as a project flow dir** — the home directory is
|
|
76
|
+
// the user-scope boundary, and the user's `~/.pi/` is the agent dir, not a
|
|
77
|
+
// project. We skip the home entry entirely during the walk-up, so even a
|
|
78
|
+
// deeply nested cwd under home will return null (create=false) when no
|
|
79
|
+
// project `.pi` exists on the path.
|
|
80
|
+
const home = os.homedir();
|
|
74
81
|
let dir = cwd;
|
|
75
82
|
while (true) {
|
|
76
|
-
|
|
77
|
-
|
|
83
|
+
if (dir !== home) {
|
|
84
|
+
const candidate = path.join(dir, ".pi");
|
|
85
|
+
if (fs.existsSync(candidate)) return path.join(candidate, "taskflows");
|
|
86
|
+
}
|
|
78
87
|
const parent = path.dirname(dir);
|
|
79
88
|
if (parent === dir) break;
|
|
80
89
|
dir = parent;
|
|
@@ -94,6 +103,11 @@ function readFlowFile(filePath: string, scope: "user" | "project"): SavedFlow |
|
|
|
94
103
|
}
|
|
95
104
|
|
|
96
105
|
/** List all saved flows (project overrides user on name collision). */
|
|
106
|
+
/** Internal-but-exported for tests: walk-up `.pi` finder with home-dir stop. */
|
|
107
|
+
export function findProjectFlowsDir(cwd: string, create = false): string | null {
|
|
108
|
+
return findProjectFlowsDirInternal(cwd, create);
|
|
109
|
+
}
|
|
110
|
+
|
|
97
111
|
export function listFlows(cwd: string): SavedFlow[] {
|
|
98
112
|
const map = new Map<string, SavedFlow>();
|
|
99
113
|
const dirs: Array<{ dir: string; scope: "user" | "project" }> = [{ dir: userFlowsDir(), scope: "user" }];
|
|
@@ -149,8 +163,11 @@ export function newRunId(flowName: string): string {
|
|
|
149
163
|
export function saveRun(state: RunState): void {
|
|
150
164
|
const dir = runsDir(state.cwd);
|
|
151
165
|
fs.mkdirSync(dir, { recursive: true });
|
|
152
|
-
|
|
153
|
-
|
|
166
|
+
// Clone before stamping updatedAt so the caller's RunState reference is not
|
|
167
|
+
// mutated as a hidden side effect (v0.0.6 audit, F-009). Shallow clone is
|
|
168
|
+
// sufficient: saveRun only serializes; it does not mutate nested objects.
|
|
169
|
+
const toSave = { ...state, updatedAt: Date.now() };
|
|
170
|
+
writeFileAtomic(path.join(dir, `${state.runId}.json`), JSON.stringify(toSave, null, 2));
|
|
154
171
|
}
|
|
155
172
|
|
|
156
173
|
export function loadRun(cwd: string, runId: string): RunState | null {
|
|
@@ -219,7 +236,14 @@ export function listRuns(cwd: string, limit = 20): RunState[] {
|
|
|
219
236
|
/* ignore */
|
|
220
237
|
}
|
|
221
238
|
}
|
|
222
|
-
|
|
239
|
+
// Guard against records missing/with non-numeric `updatedAt` — a bare
|
|
240
|
+
// `JSON.parse` may yield an object without it, and `undefined - undefined`
|
|
241
|
+
// is NaN, which makes `Array.prototype.sort` produce implementation-defined
|
|
242
|
+
// order. Drop those before sorting. (v0.0.8 audit, F-010.)
|
|
243
|
+
return runs
|
|
244
|
+
.filter((r) => typeof r.updatedAt === "number" && !Number.isNaN(r.updatedAt))
|
|
245
|
+
.sort((a, b) => b.updatedAt - a.updatedAt)
|
|
246
|
+
.slice(0, limit);
|
|
223
247
|
}
|
|
224
248
|
|
|
225
249
|
/** Stable hash of a phase's resolved task + inputs, for resume caching. */
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-taskflow",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.8",
|
|
4
4
|
"description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
package/skills/taskflow/SKILL.md
CHANGED
|
@@ -172,6 +172,36 @@ Review the audit results below. If any endpoint is missing auth, end with
|
|
|
172
172
|
{steps.audit.output}
|
|
173
173
|
```
|
|
174
174
|
|
|
175
|
+
### Structured-verify phases (v0.0.8.1)
|
|
176
|
+
|
|
177
|
+
A "verify" phase typically runs `npx tsc --noEmit && npm test && git diff --stat`
|
|
178
|
+
and reports whether everything is green. **Don't** delegate this to a generic
|
|
179
|
+
verifier subagent that summarizes the output in prose — LLMs commonly misread
|
|
180
|
+
shell output (e.g., 234 tests reported as 230, 745 insertions as 599, "1 type
|
|
181
|
+
error" reported as "clean"). Instead, **use a dedicated agent whose task is a
|
|
182
|
+
structured shell pipeline** that echoes structured key/value lines the next
|
|
183
|
+
phase can parse directly. Recommended pattern:
|
|
184
|
+
|
|
185
|
+
```jsonc
|
|
186
|
+
{
|
|
187
|
+
"id": "verify",
|
|
188
|
+
"type": "agent",
|
|
189
|
+
"agent": "verifier",
|
|
190
|
+
"dependsOn": ["apply-fixes"],
|
|
191
|
+
"task": "Run the verification pipeline and report structured results.\n\nExecute:\n```bash\ncd $REPO && npx tsc --noEmit 2>&1 | tee /tmp/tsc.log\ncd $REPO && npm test 2>&1 | tee /tmp/test.log | tail -10\ncd $REPO && git diff --shortstat HEAD | tee /tmp/diff.log\n```\n\nReport EXACTLY in this format (one key=value pair per line, no prose):\ntypecheck=PASS|FAIL\ntests_total=N\ntests_pass=N\ntests_fail=N\ninsertions=N\ndeletions=N\nfiles_changed=N\n\nIf any field is missing, you failed the task — re-run the command and re-read the output.",
|
|
192
|
+
"tools": ["read", "edit", "write", "bash"]
|
|
193
|
+
}
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
The key insight: **LLMs are bad at summarizing shell output, good at copying
|
|
197
|
+
structured data**. Asking for `key=value` pairs with explicit fields and "if
|
|
198
|
+
missing, you failed" forces the agent to read each field carefully. Downstream
|
|
199
|
+
phases that consume `{steps.verify.output}` can then `safeParse`-it into a
|
|
200
|
+
JSON object and assert against expected values.
|
|
201
|
+
|
|
202
|
+
For audits where the upstream is LLM-generated prose (not shell output), use a
|
|
203
|
+
plain `gate` phase with `VERDICT:` instead.
|
|
204
|
+
|
|
175
205
|
### Interpolation
|
|
176
206
|
|
|
177
207
|
- `{args.X}` — invocation argument
|
|
@@ -188,12 +218,11 @@ Review the audit results below. If any endpoint is missing auth, end with
|
|
|
188
218
|
3. Reference upstream results explicitly with `{steps.ID...}` and set `dependsOn`.
|
|
189
219
|
4. Mark the result-bearing phase with `"final": true` (else the last phase wins).
|
|
190
220
|
|
|
191
|
-
## Common mistakes (the runtime will
|
|
221
|
+
## Common mistakes (the runtime will reject these at validation time)
|
|
192
222
|
|
|
193
|
-
The runtime validates your flow at startup
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
on the flow.
|
|
223
|
+
The runtime validates your flow at startup. As of v0.0.8.1, the two most
|
|
224
|
+
common authoring mistakes below are **hard validation errors** (the flow
|
|
225
|
+
refuses to start). Fix the flow before running it.
|
|
197
226
|
|
|
198
227
|
### 1. Referencing `{steps.X}` without `dependsOn: ["X"]`
|
|
199
228
|
|
|
@@ -209,10 +238,9 @@ on the flow.
|
|
|
209
238
|
}
|
|
210
239
|
```
|
|
211
240
|
|
|
212
|
-
|
|
213
|
-
{steps.code-review-1.*} but 'code-review-1' is not in dependsOn
|
|
214
|
-
|
|
215
|
-
The TUI shows a `⚠N` badge. **Always declare the chain:**
|
|
241
|
+
Validation now rejects this with: `Phase 'fix-issues': task references
|
|
242
|
+
{steps.code-review-1.*} but 'code-review-1' is not in dependsOn. ...`
|
|
243
|
+
**Always declare the chain:**
|
|
216
244
|
|
|
217
245
|
```jsonc
|
|
218
246
|
// ✅ RIGHT
|
|
@@ -233,7 +261,11 @@ The TUI shows a `⚠N` badge. **Always declare the chain:**
|
|
|
233
261
|
|
|
234
262
|
Tip: write the `task` first (it tells you what each phase needs), then scan for
|
|
235
263
|
`{steps.*}` references and add the matching `dependsOn`. If a phase truly does
|
|
236
|
-
not depend on anything in its task, you can
|
|
264
|
+
not depend on anything in its task, you can omit the reference.
|
|
265
|
+
|
|
266
|
+
Exception: phases with `join: "any"` are exempt from this check, since they
|
|
267
|
+
deliberately wait for only one of their declared deps to complete and may
|
|
268
|
+
reference others as informational context.
|
|
237
269
|
|
|
238
270
|
### 2. Assuming the runtime knows "this is a chain"
|
|
239
271
|
|