pi-taskflow 0.0.7 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/examples/conditional-research.json +1 -1
- package/examples/guarded-refactor.json +2 -2
- package/extensions/agents.ts +54 -34
- package/extensions/index.ts +25 -7
- package/extensions/interpolate.ts +24 -3
- package/extensions/render.ts +7 -3
- package/extensions/runner.ts +35 -17
- package/extensions/runs-view.ts +3 -0
- package/extensions/runtime.ts +123 -19
- package/extensions/schema.ts +22 -5
- package/extensions/store.ts +30 -6
- package/package.json +1 -1
- package/skills/taskflow/SKILL.md +42 -10
|
@@ -47,7 +47,7 @@
|
|
|
47
47
|
"id": "report",
|
|
48
48
|
"type": "reduce",
|
|
49
49
|
"from": ["review"],
|
|
50
|
-
"dependsOn": ["review"],
|
|
50
|
+
"dependsOn": ["review", "deep", "quick"],
|
|
51
51
|
"agent": "doc-writer",
|
|
52
52
|
"task": "Write a clean markdown brief on \"{args.topic}\" from the validated research:\n\n{steps.deep.output}{steps.quick.output}",
|
|
53
53
|
"final": true
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"id": "implement",
|
|
27
27
|
"type": "agent",
|
|
28
28
|
"agent": "executor_code",
|
|
29
|
-
"dependsOn": ["approve"],
|
|
29
|
+
"dependsOn": ["approve", "plan"],
|
|
30
30
|
"task": "Implement the approved plan for {args.target}.\nPlan:\n{steps.plan.output}\nExtra human guidance (if any):\n{steps.approve.output}",
|
|
31
31
|
"retry": { "max": 1, "backoffMs": 1000 }
|
|
32
32
|
},
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
"id": "summary",
|
|
42
42
|
"type": "reduce",
|
|
43
43
|
"from": ["review"],
|
|
44
|
-
"dependsOn": ["review"],
|
|
44
|
+
"dependsOn": ["review", "implement"],
|
|
45
45
|
"agent": "doc-writer",
|
|
46
46
|
"task": "Write a short changelog entry summarizing what was done:\n\n{steps.implement.output}",
|
|
47
47
|
"final": true
|
package/extensions/agents.ts
CHANGED
|
@@ -44,42 +44,56 @@ function loadAgentsFromDir(dir: string, source: "user" | "project"): AgentConfig
|
|
|
44
44
|
}
|
|
45
45
|
|
|
46
46
|
for (const entry of entries) {
|
|
47
|
-
if (!entry.name.endsWith(".md")) continue;
|
|
48
|
-
if (!entry.isFile() && !entry.isSymbolicLink()) continue;
|
|
49
|
-
|
|
50
|
-
const filePath = path.join(dir, entry.name);
|
|
51
|
-
let content: string;
|
|
52
47
|
try {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
continue;
|
|
56
|
-
}
|
|
48
|
+
if (!entry.name.endsWith(".md")) continue;
|
|
49
|
+
if (!entry.isFile() && !entry.isSymbolicLink()) continue;
|
|
57
50
|
|
|
58
|
-
|
|
51
|
+
const filePath = path.join(dir, entry.name);
|
|
52
|
+
let content: string;
|
|
59
53
|
try {
|
|
60
|
-
|
|
54
|
+
content = fs.readFileSync(filePath, "utf-8");
|
|
61
55
|
} catch {
|
|
62
|
-
|
|
63
|
-
return { frontmatter: {} as Record<string, string>, body: "" };
|
|
56
|
+
continue;
|
|
64
57
|
}
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
name
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
58
|
+
|
|
59
|
+
const { frontmatter, body } = (() => {
|
|
60
|
+
try {
|
|
61
|
+
return parseFrontmatter<Record<string, unknown>>(content);
|
|
62
|
+
} catch {
|
|
63
|
+
// A single malformed agent file must not break discovery for every flow.
|
|
64
|
+
return { frontmatter: {} as Record<string, unknown>, body: "" };
|
|
65
|
+
}
|
|
66
|
+
})();
|
|
67
|
+
if (!frontmatter.name || !frontmatter.description) continue;
|
|
68
|
+
|
|
69
|
+
// frontmatter is YAML-parsed: tools may be a comma-separated string ("a, b")
|
|
70
|
+
// OR a YAML sequence ([a, b]). Handle both forms.
|
|
71
|
+
const rawTools = frontmatter.tools;
|
|
72
|
+
const tools: string[] | undefined = Array.isArray(rawTools)
|
|
73
|
+
? rawTools.map((t) => String(t).trim()).filter(Boolean)
|
|
74
|
+
: rawTools !== undefined && rawTools !== null
|
|
75
|
+
? String(rawTools)
|
|
76
|
+
.split(",")
|
|
77
|
+
.map((t) => t.trim())
|
|
78
|
+
.filter(Boolean)
|
|
79
|
+
: undefined;
|
|
80
|
+
|
|
81
|
+
agents.push({
|
|
82
|
+
name: String(frontmatter.name),
|
|
83
|
+
description: String(frontmatter.description),
|
|
84
|
+
tools: tools && tools.length > 0 ? tools : undefined,
|
|
85
|
+
model: frontmatter.model === undefined ? undefined : String(frontmatter.model),
|
|
86
|
+
thinking: frontmatter.thinking === undefined ? undefined : String(frontmatter.thinking),
|
|
87
|
+
systemPrompt: body,
|
|
88
|
+
source,
|
|
89
|
+
filePath,
|
|
90
|
+
});
|
|
91
|
+
} catch {
|
|
92
|
+
// Defense-in-depth: a single bad agent file must not break discovery
|
|
93
|
+
// for the entire flow (e.g. exotic YAML shapes, runtime errors in
|
|
94
|
+
// field access, symlink races, etc.).
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
83
97
|
}
|
|
84
98
|
return agents;
|
|
85
99
|
}
|
|
@@ -128,9 +142,15 @@ export function discoverAgents(
|
|
|
128
142
|
for (const [name, override] of Object.entries(overrides)) {
|
|
129
143
|
const agent = agentMap.get(name);
|
|
130
144
|
if (agent) {
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
145
|
+
// Clone before mutating: agentMap owns the original AgentConfig
|
|
146
|
+
// (loaded from disk in loadAgentsFromDir). Mutating it in place
|
|
147
|
+
// would cause cross-contamination for any caller that retains a
|
|
148
|
+
// reference and invokes discoverAgents again with different overrides.
|
|
149
|
+
const mutated: AgentConfig = { ...agent };
|
|
150
|
+
if (override.model !== undefined) mutated.model = override.model;
|
|
151
|
+
if (override.thinking !== undefined) mutated.thinking = override.thinking;
|
|
152
|
+
if (override.tools !== undefined) mutated.tools = override.tools;
|
|
153
|
+
agentMap.set(name, mutated);
|
|
134
154
|
}
|
|
135
155
|
}
|
|
136
156
|
}
|
package/extensions/index.ts
CHANGED
|
@@ -50,8 +50,8 @@ const ShorthandStep = Type.Object(
|
|
|
50
50
|
);
|
|
51
51
|
|
|
52
52
|
const TaskflowParams = Type.Object({
|
|
53
|
-
action: StringEnum(["run", "save", "resume", "list"] as const, {
|
|
54
|
-
description: "What to do: run a flow, save a definition, resume a paused run, or list
|
|
53
|
+
action: StringEnum(["run", "save", "resume", "list", "agents"] as const, {
|
|
54
|
+
description: "What to do: run a flow, save a definition, resume a paused run, list saved flows, or list available agents you can use in phases",
|
|
55
55
|
default: "run",
|
|
56
56
|
}),
|
|
57
57
|
name: Type.Optional(Type.String({ description: "Name of a saved flow (for run/save without inline define)" })),
|
|
@@ -108,10 +108,6 @@ async function runFlow(
|
|
|
108
108
|
onUpdate: ((p: AgentToolResult<TaskflowDetails>) => void) | undefined,
|
|
109
109
|
existing?: RunState,
|
|
110
110
|
): Promise<RuntimeResult> {
|
|
111
|
-
const settings = readSubagentSettings();
|
|
112
|
-
const scope: AgentScope = def.agentScope ?? "user";
|
|
113
|
-
const { agents } = discoverAgents(ctx.cwd, scope, settings.agentOverrides);
|
|
114
|
-
|
|
115
111
|
const state = existing ?? makeRunState(def, args, ctx.cwd);
|
|
116
112
|
|
|
117
113
|
const emit = (s: RunState, finalOutput?: string) => {
|
|
@@ -166,6 +162,13 @@ async function runFlow(
|
|
|
166
162
|
: undefined;
|
|
167
163
|
|
|
168
164
|
try {
|
|
165
|
+
// Discover settings/agents inside try so a YAML/IO crash in
|
|
166
|
+
// discoverAgents or readSubagentSettings (F-001) is caught and
|
|
167
|
+
// the heartbeat timer is cleared by the finally block below.
|
|
168
|
+
const settings = readSubagentSettings();
|
|
169
|
+
const scope: AgentScope = def.agentScope ?? "user";
|
|
170
|
+
const { agents } = discoverAgents(ctx.cwd, scope, settings.agentOverrides);
|
|
171
|
+
|
|
169
172
|
const result = await executeTaskflow(state, {
|
|
170
173
|
cwd: ctx.cwd,
|
|
171
174
|
agents,
|
|
@@ -216,7 +219,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
216
219
|
"Phases (agent, parallel, map, gate, reduce, approval, flow) form a DAG; intermediate outputs stay out of your context — only the final phase output is returned.",
|
|
217
220
|
"Use action=run with an inline `define` (you write the DSL) or a saved `name`.",
|
|
218
221
|
"For simple non-DAG delegations (like the subagent tool) skip the DSL: pass `task` (+optional `agent`) for one task, `tasks:[{task,agent?}]` to run in parallel, or `chain:[{task,agent?}]` to run sequentially (reference the prior step with {previous.output}).",
|
|
219
|
-
"Use action=save to persist a definition as a reusable /tf:<name> command. action=resume continues a paused run. action=list shows saved flows.",
|
|
222
|
+
"Use action=save to persist a definition as a reusable /tf:<name> command. action=resume continues a paused run. action=list shows saved flows. Use action=agents to list available agents — do NOT invent agent names; either use an agent from that list or omit the 'agent' field to auto-select the default agent.",
|
|
220
223
|
"DSL: {name, args?, concurrency?, budget?:{maxUSD,maxTokens}, phases:[{id, type, agent, task, dependsOn?, join?:'all'|'any', when?, retry?:{max,backoffMs,factor}, over?(map), as?(map), branches?(parallel), from?(reduce), use?(flow), with?(flow), output?:'json', final?}]}.",
|
|
221
224
|
"Phase types: agent (one subagent), parallel (static branches), map (dynamic fan-out over an array), gate (VERDICT: PASS/BLOCK quality gate), reduce (aggregate from N phases), approval (human-in-the-loop pause), flow (run a saved sub-flow). join:'any' is an OR-join; when is a conditional guard; retry adds backoff; budget caps run cost.",
|
|
222
225
|
"Interpolation: {args.X}, {steps.ID.output}, {steps.ID.json}, {item} (map), {previous.output}.",
|
|
@@ -232,6 +235,21 @@ export default function (pi: ExtensionAPI) {
|
|
|
232
235
|
async execute(_id, params, signal, onUpdate, ctx) {
|
|
233
236
|
const action = params.action ?? "run";
|
|
234
237
|
|
|
238
|
+
// agents — list available agents the LLM can use in phase definitions
|
|
239
|
+
if (action === "agents") {
|
|
240
|
+
const scope = params.scope ?? "both";
|
|
241
|
+
const { agents } = discoverAgents(ctx.cwd, scope as AgentScope, undefined);
|
|
242
|
+
const text = agents.length
|
|
243
|
+
? agents
|
|
244
|
+
.map(
|
|
245
|
+
(a) =>
|
|
246
|
+
`- ${a.name} (${a.source}): ${a.description}${a.model ? ` [model: ${a.model}]` : ""}${a.tools?.length ? ` [tools: ${a.tools.join(", ")}]` : ""}`,
|
|
247
|
+
)
|
|
248
|
+
.join("\n")
|
|
249
|
+
: "No agents found. Use the default agent by omitting the 'agent' field in phases.";
|
|
250
|
+
return { content: [{ type: "text", text }], details: { action } satisfies TaskflowDetails };
|
|
251
|
+
}
|
|
252
|
+
|
|
235
253
|
// list
|
|
236
254
|
if (action === "list") {
|
|
237
255
|
const flows = listFlows(ctx.cwd);
|
|
@@ -27,10 +27,13 @@ export interface InterpolationResult {
|
|
|
27
27
|
missing: string[];
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
export function interpolate(
|
|
30
|
+
export function interpolate(
|
|
31
|
+
template: string | null | undefined,
|
|
32
|
+
ctx: InterpolationContext,
|
|
33
|
+
): InterpolationResult {
|
|
31
34
|
const missing: string[] = [];
|
|
32
35
|
|
|
33
|
-
const text = template.replace(PLACEHOLDER, (whole, path: string) => {
|
|
36
|
+
const text = String(template ?? "").replace(PLACEHOLDER, (whole, path: string) => {
|
|
34
37
|
const value = resolvePath(path, ctx);
|
|
35
38
|
if (value === undefined) {
|
|
36
39
|
missing.push(path);
|
|
@@ -134,6 +137,24 @@ export function safeParse(text: string): unknown {
|
|
|
134
137
|
}
|
|
135
138
|
}
|
|
136
139
|
}
|
|
140
|
+
// Anti-pattern detection (v0.0.8.1): array followed by a stray top-level
|
|
141
|
+
// "key": value. A common LLM mistake — the model appends
|
|
142
|
+
// `"deferred": [...]` after a JSON array, producing a non-JSON hybrid that
|
|
143
|
+
// none of the above strategies can recover. We surface a diagnostic hint
|
|
144
|
+
// so flow authors can spot the bug fast.
|
|
145
|
+
//
|
|
146
|
+
// We check the original (trimmed) input rather than the slice tail,
|
|
147
|
+
// because `lastIndexOf(close)` lands on the *last* bracket — for the
|
|
148
|
+
// anti-pattern the stray key is between the array's `]` and the trailing
|
|
149
|
+
// `]`, not after the last one.
|
|
150
|
+
if (/]\s*[\},]?\s*"[^"\n]+"\s*:/.test(trimmed)) {
|
|
151
|
+
console.warn(
|
|
152
|
+
"[pi-taskflow safeParse] input looks like a JSON array followed by a stray top-level key " +
|
|
153
|
+
`(pattern: [{...}], "key": ...). This is not valid JSON. ` +
|
|
154
|
+
`Hint: put extra data as array members (e.g. {"id":"D-001","status":"deferred",...}) ` +
|
|
155
|
+
`or split into a separate phase.`,
|
|
156
|
+
);
|
|
157
|
+
}
|
|
137
158
|
return undefined;
|
|
138
159
|
}
|
|
139
160
|
|
|
@@ -142,7 +163,7 @@ export function coerceArray(value: unknown): unknown[] | null {
|
|
|
142
163
|
if (Array.isArray(value)) return value;
|
|
143
164
|
if (value && typeof value === "object") {
|
|
144
165
|
// {items: [...]} or {results: [...]} convenience
|
|
145
|
-
for (const key of ["items", "results", "list", "data"]) {
|
|
166
|
+
for (const key of ["items", "results", "list", "data", "findings"]) {
|
|
146
167
|
const v = (value as Record<string, unknown>)[key];
|
|
147
168
|
if (Array.isArray(v)) return v;
|
|
148
169
|
}
|
package/extensions/render.ts
CHANGED
|
@@ -71,7 +71,10 @@ function agentRole(phase: Phase, ps: PhaseState | undefined, theme: Theme): stri
|
|
|
71
71
|
|
|
72
72
|
function costStr(usage: UsageStats | undefined, theme: Theme): string {
|
|
73
73
|
if (!usage?.cost) return "";
|
|
74
|
-
|
|
74
|
+
const c = usage.cost;
|
|
75
|
+
return c >= 0.01
|
|
76
|
+
? theme.fg("muted", `$${c.toFixed(2)}`)
|
|
77
|
+
: theme.fg("muted", `$${c.toFixed(4)}`);
|
|
75
78
|
}
|
|
76
79
|
|
|
77
80
|
function aggregateCost(state: RunState): number {
|
|
@@ -174,6 +177,7 @@ function phaseDetail(phase: Phase, ps: PhaseState | undefined, theme: Theme): st
|
|
|
174
177
|
const color = d === "reject" ? "error" : d === "edit" ? "warning" : "success";
|
|
175
178
|
let a = theme.fg("warning", "⚠") + " " + theme.fg(color as Parameters<typeof theme.fg>[0], theme.bold(d.toUpperCase()));
|
|
176
179
|
if (ps.approval.auto) a += theme.fg("dim", " auto");
|
|
180
|
+
if (cost) a += ` ${cost}`;
|
|
177
181
|
if (time) a += ` ${time}`;
|
|
178
182
|
if (ps.warnings?.length) a += theme.fg("warning", ` ⚠${ps.warnings.length}`);
|
|
179
183
|
return a;
|
|
@@ -228,8 +232,8 @@ function headerLine(state: RunState, theme: Theme): string {
|
|
|
228
232
|
if (state.status === "blocked") line += theme.fg("error", " · blocked");
|
|
229
233
|
const cost = aggregateCost(state);
|
|
230
234
|
const budget = state.def.budget;
|
|
231
|
-
if (budget?.maxUSD !== undefined) line += theme.fg("muted", ` · $${cost.toFixed(
|
|
232
|
-
else if (cost) line += theme.fg("muted", ` · $${cost.toFixed(
|
|
235
|
+
if (budget?.maxUSD !== undefined) line += theme.fg("muted", ` · $${cost >= 0.01 ? cost.toFixed(2) : cost.toFixed(4)}/$${budget.maxUSD}`);
|
|
236
|
+
else if (cost) line += theme.fg("muted", ` · $${cost >= 0.01 ? cost.toFixed(2) : cost.toFixed(4)}`);
|
|
233
237
|
const el = runElapsed(state);
|
|
234
238
|
if (el) line += theme.fg("dim", ` · ${elapsed(el)}`);
|
|
235
239
|
return line;
|
package/extensions/runner.ts
CHANGED
|
@@ -203,14 +203,10 @@ function summarizeToolCall(name: string, args: Record<string, unknown>): string
|
|
|
203
203
|
}
|
|
204
204
|
}
|
|
205
205
|
|
|
206
|
-
async function writePromptToTempFile(
|
|
207
|
-
const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
|
|
208
|
-
const safeName = agentName.replace(/[^\w.-]+/g, "_");
|
|
209
|
-
const filePath = path.join(tmpDir, `prompt-${safeName}.md`);
|
|
206
|
+
async function writePromptToTempFile(filePath: string, prompt: string): Promise<void> {
|
|
210
207
|
await withFileMutationQueue(filePath, async () => {
|
|
211
208
|
await fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 });
|
|
212
209
|
});
|
|
213
|
-
return { dir: tmpDir, filePath };
|
|
214
210
|
}
|
|
215
211
|
|
|
216
212
|
function getPiInvocation(args: string[]): { command: string; args: string[] } {
|
|
@@ -284,9 +280,13 @@ export async function runAgentTask(
|
|
|
284
280
|
|
|
285
281
|
try {
|
|
286
282
|
if (agent.systemPrompt.trim()) {
|
|
287
|
-
|
|
288
|
-
tmpPromptDir
|
|
289
|
-
|
|
283
|
+
// Allocate the temp dir + path BEFORE any fallible I/O so that if
|
|
284
|
+
// writeFile throws, tmpPromptDir/tmpPromptPath are already set and
|
|
285
|
+
// the finally block can clean up the directory (F-004).
|
|
286
|
+
tmpPromptDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
|
|
287
|
+
const safeName = agent.name.replace(/[^\w.-]+/g, "_");
|
|
288
|
+
tmpPromptPath = path.join(tmpPromptDir, `prompt-${safeName}.md`);
|
|
289
|
+
await writePromptToTempFile(tmpPromptPath, agent.systemPrompt);
|
|
290
290
|
args.push("--append-system-prompt", tmpPromptPath);
|
|
291
291
|
}
|
|
292
292
|
args.push(`Task: ${task}`);
|
|
@@ -319,15 +319,25 @@ export async function runAgentTask(
|
|
|
319
319
|
if (buffer.trim()) processLine(buffer);
|
|
320
320
|
resolve(code ?? 0);
|
|
321
321
|
});
|
|
322
|
-
proc.on("error", () =>
|
|
322
|
+
proc.on("error", (err) => {
|
|
323
|
+
if (!result.stderr) result.stderr = err.message;
|
|
324
|
+
if (!result.errorMessage) result.errorMessage = err.message;
|
|
325
|
+
resolve(1);
|
|
326
|
+
});
|
|
323
327
|
|
|
324
328
|
if (opts.signal) {
|
|
325
329
|
const kill = () => {
|
|
326
330
|
wasAborted = true;
|
|
327
331
|
proc.kill("SIGTERM");
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
332
|
+
// Force-kill fallback. proc.kill("SIGKILL") is idempotent if
|
|
333
|
+
// the process already exited, and `proc.killed` is set true
|
|
334
|
+
// synchronously by the SIGTERM above — so the previous
|
|
335
|
+
// `if (!proc.killed)` guard would skip SIGKILL entirely,
|
|
336
|
+
// hanging forever on a child that ignores SIGTERM.
|
|
337
|
+
// .unref() keeps the timer from holding the event loop open
|
|
338
|
+
// after the process is gone.
|
|
339
|
+
const forceKill = setTimeout(() => proc.kill("SIGKILL"), 5000);
|
|
340
|
+
forceKill.unref();
|
|
331
341
|
};
|
|
332
342
|
if (opts.signal.aborted) kill();
|
|
333
343
|
else opts.signal.addEventListener("abort", kill, { once: true });
|
|
@@ -349,12 +359,20 @@ export async function runAgentTask(
|
|
|
349
359
|
// `output`: upstream providers (e.g. a Cloudflare challenge page) can
|
|
350
360
|
// surface huge HTML/JSON in errorMessage, and that garbage would
|
|
351
361
|
// otherwise flow into downstream phase interpolations.
|
|
352
|
-
if
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
362
|
+
// Sanitization must run whenever the run failed, even if some output
|
|
363
|
+
// was already emitted (e.g. crash mid-stream with a partial result):
|
|
364
|
+
// an unsanitized errorMessage would still leak into PhaseState and
|
|
365
|
+
// downstream interpolation contexts. (F-013)
|
|
366
|
+
if (isFailed(result)) {
|
|
367
|
+
if (!result.output) {
|
|
368
|
+
result.output = TRANSPORT_ERROR_PLACEHOLDER;
|
|
369
|
+
if (!result.errorMessage) {
|
|
370
|
+
result.errorMessage = result.stderr || `Subagent exited with code ${result.exitCode} (stopReason: ${result.stopReason ?? "unknown"})`;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
if (result.errorMessage) {
|
|
374
|
+
result.errorMessage = sanitizeErrorMessage(result.errorMessage);
|
|
356
375
|
}
|
|
357
|
-
result.errorMessage = sanitizeErrorMessage(result.errorMessage);
|
|
358
376
|
}
|
|
359
377
|
return result;
|
|
360
378
|
} finally {
|
package/extensions/runs-view.ts
CHANGED
|
@@ -50,6 +50,9 @@ export class RunHistoryComponent {
|
|
|
50
50
|
private cachedLines?: string[];
|
|
51
51
|
|
|
52
52
|
constructor(runs: RunState[], theme: Theme, onDone: (result?: RunHistoryResult) => void) {
|
|
53
|
+
if (!runs.length) {
|
|
54
|
+
throw new Error("RunHistoryComponent requires at least one run");
|
|
55
|
+
}
|
|
53
56
|
this.runs = runs;
|
|
54
57
|
this.theme = theme;
|
|
55
58
|
this.onDone = onDone;
|
package/extensions/runtime.ts
CHANGED
|
@@ -414,11 +414,12 @@ async function executePhase(
|
|
|
414
414
|
if (type === "agent" || type === "gate" || type === "reduce") {
|
|
415
415
|
const { text } = interpolate(phase.task ?? "", ctx);
|
|
416
416
|
const fullTask = preRead + text;
|
|
417
|
-
const
|
|
417
|
+
const agentName = resolveAgent(phase.agent, deps, state);
|
|
418
|
+
const inputHash = hashInput(phase.id, agentName, fullTask);
|
|
418
419
|
const cached = cachedPhase(prior, inputHash);
|
|
419
420
|
if (cached) return cached;
|
|
420
421
|
|
|
421
|
-
const r = await runOne(
|
|
422
|
+
const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress));
|
|
422
423
|
const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
|
|
423
424
|
if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
|
|
424
425
|
return ps;
|
|
@@ -428,7 +429,7 @@ async function executePhase(
|
|
|
428
429
|
const branches = (phase.branches ?? []).map((b) => {
|
|
429
430
|
const r = interpolate(b.task, ctx);
|
|
430
431
|
return {
|
|
431
|
-
agent: b.agent ?? phase.agent
|
|
432
|
+
agent: resolveAgent(b.agent ?? phase.agent, deps, state),
|
|
432
433
|
task: preRead + r.text,
|
|
433
434
|
};
|
|
434
435
|
});
|
|
@@ -458,7 +459,7 @@ async function executePhase(
|
|
|
458
459
|
const tasks = arr.map((item) => {
|
|
459
460
|
const localCtx = buildInterpolationContext(state, previousOutput, { [loopVar]: item });
|
|
460
461
|
return {
|
|
461
|
-
agent: phase.agent
|
|
462
|
+
agent: resolveAgent(phase.agent, deps, state),
|
|
462
463
|
task: preRead + interpolate(phase.task ?? "", localCtx).text,
|
|
463
464
|
};
|
|
464
465
|
});
|
|
@@ -551,14 +552,20 @@ async function executePhase(
|
|
|
551
552
|
baseRunTask(cwd, agents, agentName, preRead + subTask, opts, globalThinking);
|
|
552
553
|
const subResult = await executeTaskflow(subState, {
|
|
553
554
|
...deps,
|
|
555
|
+
// Override deps.cwd with the flow phase's own cwd so that sub-flow
|
|
556
|
+
// phases without an explicit cwd derive their subagents from the
|
|
557
|
+
// flow's cwd (not the caller's cwd).
|
|
558
|
+
cwd: phase.cwd ?? deps.cwd,
|
|
554
559
|
runTask: subRunTask,
|
|
555
560
|
_stack: [...stack, state.flowName],
|
|
556
561
|
persist: undefined,
|
|
557
562
|
onProgress: () => {
|
|
558
563
|
if (live) {
|
|
559
564
|
const ph = Object.values(subState.phases);
|
|
565
|
+
// B-F015: `done` must include both success and failure so the
|
|
566
|
+
// renderer's `done - failed` shows the true success count.
|
|
560
567
|
live.subProgress = {
|
|
561
|
-
done: ph.filter((p) => p.status === "done").length,
|
|
568
|
+
done: ph.filter((p) => p.status === "done" || p.status === "failed").length,
|
|
562
569
|
total: subDef.phases.length,
|
|
563
570
|
running: ph.filter((p) => p.status === "running").length,
|
|
564
571
|
failed: ph.filter((p) => p.status === "failed").length,
|
|
@@ -577,8 +584,11 @@ async function executePhase(
|
|
|
577
584
|
output: subResult.finalOutput,
|
|
578
585
|
json: parseJson ? safeParse(subResult.finalOutput) : undefined,
|
|
579
586
|
usage: subResult.totalUsage,
|
|
587
|
+
// B-F015: include failed in `done` so the renderer's
|
|
588
|
+
// `done - failed` formula gives the success count (matches the
|
|
589
|
+
// map/parallel runner's overlapping-counter convention).
|
|
580
590
|
subProgress: {
|
|
581
|
-
done: sp.filter((p) => p.status === "done").length,
|
|
591
|
+
done: sp.filter((p) => p.status === "done" || p.status === "failed").length,
|
|
582
592
|
total: subDef.phases.length,
|
|
583
593
|
running: 0,
|
|
584
594
|
failed: sp.filter((p) => p.status === "failed").length,
|
|
@@ -632,6 +642,27 @@ function cachedPhase(prior: PhaseState | undefined, inputHash: string): PhaseSta
|
|
|
632
642
|
return null;
|
|
633
643
|
}
|
|
634
644
|
|
|
645
|
+
/**
|
|
646
|
+
* Resolve an agent name against available agents. Falls back to the default
|
|
647
|
+
* agent if the requested agent isn't found, logging a warning via safeEmit.
|
|
648
|
+
*/
|
|
649
|
+
function resolveAgent(name: string | undefined, deps: RuntimeDeps, state: RunState): string {
|
|
650
|
+
const resolved = name ?? defaultAgent(deps);
|
|
651
|
+
if (name && !deps.agents.some((a) => a.name === name)) {
|
|
652
|
+
const fallback = defaultAgent(deps);
|
|
653
|
+
// Log only once per run to avoid noise.
|
|
654
|
+
if (!(state as any).__unknownAgentWarned) {
|
|
655
|
+
(state as any).__unknownAgentWarned = new Set<string>();
|
|
656
|
+
}
|
|
657
|
+
if (!(state as any).__unknownAgentWarned.has(name)) {
|
|
658
|
+
(state as any).__unknownAgentWarned.add(name);
|
|
659
|
+
console.warn(`[taskflow] Unknown agent "${name}", falling back to "${fallback}". Use action=agents to list available agents.`);
|
|
660
|
+
}
|
|
661
|
+
return fallback;
|
|
662
|
+
}
|
|
663
|
+
return resolved;
|
|
664
|
+
}
|
|
665
|
+
|
|
635
666
|
function defaultAgent(deps: RuntimeDeps): string {
|
|
636
667
|
return deps.agents[0]?.name ?? "default";
|
|
637
668
|
}
|
|
@@ -649,7 +680,10 @@ export function parseGateVerdict(output: string): { verdict: "pass" | "block"; r
|
|
|
649
680
|
if (typeof o.continue === "boolean") return { verdict: o.continue ? "pass" : "block", reason: asReason(o.reason) };
|
|
650
681
|
if (typeof o.pass === "boolean") return { verdict: o.pass ? "pass" : "block", reason: asReason(o.reason) };
|
|
651
682
|
if (typeof o.verdict === "string") {
|
|
652
|
-
|
|
683
|
+
// Note: do NOT include standalone "no" — natural-language verdicts like
|
|
684
|
+
// "No issues found" / "no errors" would otherwise be false-positive BLOCK.
|
|
685
|
+
// Fail-open covers any ambiguous text.
|
|
686
|
+
const block = /block|fail|stop|reject|halt/i.test(o.verdict);
|
|
653
687
|
return { verdict: block ? "block" : "pass", reason: asReason(o.reason) };
|
|
654
688
|
}
|
|
655
689
|
}
|
|
@@ -666,11 +700,86 @@ function asReason(v: unknown): string | undefined {
|
|
|
666
700
|
return typeof v === "string" && v.trim() ? v.trim() : undefined;
|
|
667
701
|
}
|
|
668
702
|
|
|
703
|
+
/**
|
|
704
|
+
* Best-effort invocation of the user-provided `persist` + `onProgress` callbacks.
|
|
705
|
+
*
|
|
706
|
+
* A throw from a host-supplied callback must NEVER replace the runtime's
|
|
707
|
+
* outcome — neither the original crash message in `executeTaskflow`'s catch
|
|
708
|
+
* block, nor the final output of a successful run. Callbacks are observability
|
|
709
|
+
* hooks; the run survives their failure.
|
|
710
|
+
*
|
|
711
|
+
* Used at every "checkpoint" call site (phase start, phase end, terminal state).
|
|
712
|
+
* For high-frequency live updates inside a phase, see `safeProgress` below.
|
|
713
|
+
*/
|
|
714
|
+
function safeEmit(deps: RuntimeDeps, state: RunState): void {
|
|
715
|
+
try {
|
|
716
|
+
deps.persist?.(state);
|
|
717
|
+
} catch {
|
|
718
|
+
// user callback — must not break the run
|
|
719
|
+
}
|
|
720
|
+
try {
|
|
721
|
+
deps.onProgress?.(state);
|
|
722
|
+
} catch {
|
|
723
|
+
// user callback — must not break the run
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
/**
|
|
728
|
+
* Like `safeEmit` but for the high-frequency live-update channel only.
|
|
729
|
+
* Skips `persist` (which is intentionally checkpoint-only) and swallows any
|
|
730
|
+
* throw from the user-supplied `onProgress` so a misbehaving TUI sink cannot
|
|
731
|
+
* disrupt an in-flight phase.
|
|
732
|
+
*/
|
|
733
|
+
function safeProgress(deps: RuntimeDeps, state: RunState): void {
|
|
734
|
+
try {
|
|
735
|
+
deps.onProgress?.(state);
|
|
736
|
+
} catch {
|
|
737
|
+
// user callback — must not break the run
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
|
|
669
741
|
/**
|
|
670
742
|
* Execute a full taskflow. Mutates and persists `state` as it progresses.
|
|
671
743
|
*/
|
|
744
|
+
function ensureImplicitGate(def: Taskflow): void {
|
|
745
|
+
// Respect explicit opt-out
|
|
746
|
+
if ((def as any).implicitGate === false) return;
|
|
747
|
+
|
|
748
|
+
const hasGate = def.phases.some(
|
|
749
|
+
(p) => p.type === "gate" || p.type === "approval" || p.id === "_implicit-gate",
|
|
750
|
+
);
|
|
751
|
+
if (hasGate || def.phases.length === 0) return;
|
|
752
|
+
|
|
753
|
+
// The last existing phase is the effective "final" phase — pin it so the
|
|
754
|
+
// injected gate doesn't become the finalOutput.
|
|
755
|
+
const lastPhase = def.phases[def.phases.length - 1];
|
|
756
|
+
if (!lastPhase.final && !def.phases.some((p) => p.final)) {
|
|
757
|
+
lastPhase.final = true;
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
const allIds = def.phases.map((p) => p.id);
|
|
761
|
+
def.phases.push({
|
|
762
|
+
id: "_implicit-gate",
|
|
763
|
+
type: "gate",
|
|
764
|
+
dependsOn: allIds,
|
|
765
|
+
agent: "reviewer",
|
|
766
|
+
task: `Review all phase outputs from this taskflow for accuracy and consistency.
|
|
767
|
+
|
|
768
|
+
For each upstream phase, scan its output for:
|
|
769
|
+
1. **Factual accuracy**: Any file paths, line numbers, or code snippets that are wrong?
|
|
770
|
+
2. **Internal contradictions**: Do any phases contradict each other?
|
|
771
|
+
3. **Completeness**: Is any output truncated, empty, or anomalously short?
|
|
772
|
+
4. **Hallucination markers**: Wrong file names, impossible line ranges, circular logic, information not in the given context.
|
|
773
|
+
|
|
774
|
+
Output:
|
|
775
|
+
- If ALL outputs look consistent and plausible: output **VERDICT: PASS** with a one-line summary.
|
|
776
|
+
- If ANY issues found: output **VERDICT: BLOCK** listing each issue with the phase ID and specific concern.`,
|
|
777
|
+
});
|
|
778
|
+
}
|
|
779
|
+
|
|
672
780
|
export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
|
|
673
781
|
const def: Taskflow = state.def;
|
|
782
|
+
ensureImplicitGate(def);
|
|
674
783
|
try {
|
|
675
784
|
return await runTaskflowLayers(state, deps);
|
|
676
785
|
} catch (e) {
|
|
@@ -685,8 +794,7 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
|
|
|
685
794
|
}
|
|
686
795
|
}
|
|
687
796
|
state.status = "failed";
|
|
688
|
-
deps
|
|
689
|
-
deps.onProgress?.(state);
|
|
797
|
+
safeEmit(deps, state);
|
|
690
798
|
const totalUsage = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
|
|
691
799
|
return { state, finalOutput: `Taskflow '${def.name}' crashed: ${message}`, ok: false, totalUsage };
|
|
692
800
|
}
|
|
@@ -697,8 +805,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
697
805
|
const layers = topoLayers(def.phases);
|
|
698
806
|
|
|
699
807
|
state.status = "running";
|
|
700
|
-
deps
|
|
701
|
-
deps.onProgress?.(state);
|
|
808
|
+
safeEmit(deps, state);
|
|
702
809
|
|
|
703
810
|
let aborted = false;
|
|
704
811
|
let gateBlocked = false;
|
|
@@ -756,8 +863,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
756
863
|
endedAt: Date.now(),
|
|
757
864
|
usage: emptyUsage(),
|
|
758
865
|
};
|
|
759
|
-
deps
|
|
760
|
-
deps.onProgress?.(state);
|
|
866
|
+
safeEmit(deps, state);
|
|
761
867
|
return;
|
|
762
868
|
}
|
|
763
869
|
|
|
@@ -768,9 +874,9 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
768
874
|
status: "running",
|
|
769
875
|
startedAt,
|
|
770
876
|
};
|
|
771
|
-
deps
|
|
877
|
+
safeProgress(deps, state);
|
|
772
878
|
|
|
773
|
-
const ps = await executePhase(phase, state, deps, prior, () => deps
|
|
879
|
+
const ps = await executePhase(phase, state, deps, prior, () => safeProgress(deps, state));
|
|
774
880
|
// Preserve the phase start time: executePhase returns a fresh PhaseState
|
|
775
881
|
// that omits startedAt (cached/resumed results carry their own).
|
|
776
882
|
state.phases[phase.id] = ps.startedAt ? ps : { ...ps, startedAt };
|
|
@@ -793,8 +899,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
793
899
|
budgetBlocked = true;
|
|
794
900
|
budgetReason = ob.reason;
|
|
795
901
|
}
|
|
796
|
-
deps
|
|
797
|
-
deps.onProgress?.(state);
|
|
902
|
+
safeEmit(deps, state);
|
|
798
903
|
});
|
|
799
904
|
}
|
|
800
905
|
|
|
@@ -818,8 +923,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
|
|
|
818
923
|
: anyFailed
|
|
819
924
|
? "failed"
|
|
820
925
|
: "completed";
|
|
821
|
-
deps
|
|
822
|
-
deps.onProgress?.(state);
|
|
926
|
+
safeEmit(deps, state);
|
|
823
927
|
|
|
824
928
|
let finalOutput = finalState?.output ?? "(no output)";
|
|
825
929
|
if (gateBlocked) {
|
package/extensions/schema.ts
CHANGED
|
@@ -147,6 +147,12 @@ export const TaskflowSchema = Type.Object(
|
|
|
147
147
|
}),
|
|
148
148
|
),
|
|
149
149
|
phases: Type.Array(PhaseSchema, { minItems: 1, description: "Ordered phase definitions (DAG via dependsOn)" }),
|
|
150
|
+
implicitGate: Type.Optional(
|
|
151
|
+
Type.Boolean({
|
|
152
|
+
description: "When true (default), a reviewer gate is auto-injected after all phases if no explicit gate or approval exists",
|
|
153
|
+
default: true,
|
|
154
|
+
}),
|
|
155
|
+
),
|
|
150
156
|
},
|
|
151
157
|
{ additionalProperties: false },
|
|
152
158
|
);
|
|
@@ -184,7 +190,11 @@ export function isShorthand(def: unknown): boolean {
|
|
|
184
190
|
if (typeof def !== "object" || def === null) return false;
|
|
185
191
|
const d = def as Record<string, unknown>;
|
|
186
192
|
if (Array.isArray(d.phases)) return false;
|
|
187
|
-
return
|
|
193
|
+
return (
|
|
194
|
+
(Array.isArray(d.chain) && d.chain.length > 0) ||
|
|
195
|
+
(Array.isArray(d.tasks) && d.tasks.length > 0) ||
|
|
196
|
+
typeof d.task === "string"
|
|
197
|
+
);
|
|
188
198
|
}
|
|
189
199
|
|
|
190
200
|
function readStep(s: unknown): ShorthandStep {
|
|
@@ -355,20 +365,27 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
|
|
|
355
365
|
const finals = (flow.phases as Phase[]).filter((p) => p?.final);
|
|
356
366
|
if (finals.length > 1) errors.push(`Only one phase may be marked 'final' (found ${finals.length})`);
|
|
357
367
|
|
|
358
|
-
// ---
|
|
368
|
+
// --- Hard errors: {steps.X.*} references that aren't declared deps ------
|
|
359
369
|
// Catches the most common authoring mistake: the task talks about
|
|
360
370
|
// `{steps.review.output}` but `dependsOn: ["review"]` is missing, so the
|
|
361
371
|
// phase runs in parallel with `review` and the model sees the literal
|
|
362
|
-
// placeholder string. The runtime can't infer the intent
|
|
372
|
+
// placeholder string. The runtime can't infer the intent — fail fast at
|
|
373
|
+
// validation time so the mistake is caught before the run starts.
|
|
374
|
+
//
|
|
375
|
+
// Phases with `join: "any"` are exempt: by design they only need ONE of
|
|
376
|
+
// their declared deps to complete, and may reference other phases as
|
|
377
|
+
// informational context (not as true dependencies).
|
|
363
378
|
if (errors.length === 0) {
|
|
364
379
|
const idToPhase = new Map((flow.phases as Phase[]).map((p) => [p.id, p]));
|
|
365
380
|
for (const p of flow.phases as Phase[]) {
|
|
366
381
|
if (!p?.id) continue;
|
|
382
|
+
const isJoinAny = p.join === "any";
|
|
383
|
+
if (isJoinAny) continue;
|
|
367
384
|
const deps = new Set(dependenciesOf(p));
|
|
368
385
|
const refs = collectRefs(p);
|
|
369
386
|
for (const ref of refs.steps) {
|
|
370
387
|
if (ref === p.id) {
|
|
371
|
-
|
|
388
|
+
errors.push(`Phase '${p.id}': references its own output via {steps.${ref}.*}; this is almost always a bug.`);
|
|
372
389
|
continue;
|
|
373
390
|
}
|
|
374
391
|
if (!idToPhase.has(ref)) {
|
|
@@ -378,7 +395,7 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
|
|
|
378
395
|
continue;
|
|
379
396
|
}
|
|
380
397
|
if (!deps.has(ref)) {
|
|
381
|
-
|
|
398
|
+
errors.push(
|
|
382
399
|
`Phase '${p.id}': task references {steps.${ref}.*} but '${ref}' is not in dependsOn. ` +
|
|
383
400
|
`The phase will run in parallel with '${ref}' and see the literal placeholder. ` +
|
|
384
401
|
`Add "dependsOn": ["${ref}"] (or include '${ref}' transitively).`,
|
package/extensions/store.ts
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
import * as crypto from "node:crypto";
|
|
10
10
|
import * as fs from "node:fs";
|
|
11
|
+
import * as os from "node:os";
|
|
11
12
|
import * as path from "node:path";
|
|
12
13
|
import { getAgentDir } from "@earendil-works/pi-coding-agent";
|
|
13
14
|
import type { Taskflow } from "./schema.ts";
|
|
@@ -69,12 +70,20 @@ function userFlowsDir(): string {
|
|
|
69
70
|
return path.join(getAgentDir(), "taskflows");
|
|
70
71
|
}
|
|
71
72
|
|
|
72
|
-
function
|
|
73
|
+
function findProjectFlowsDirInternal(cwd: string, create = false): string | null {
|
|
73
74
|
// Prefer an existing .pi dir up the tree; else use cwd/.pi when creating.
|
|
75
|
+
// **Never treat `~/.pi/` as a project flow dir** — the home directory is
|
|
76
|
+
// the user-scope boundary, and the user's `~/.pi/` is the agent dir, not a
|
|
77
|
+
// project. We skip the home entry entirely during the walk-up, so even a
|
|
78
|
+
// deeply nested cwd under home will return null (create=false) when no
|
|
79
|
+
// project `.pi` exists on the path.
|
|
80
|
+
const home = os.homedir();
|
|
74
81
|
let dir = cwd;
|
|
75
82
|
while (true) {
|
|
76
|
-
|
|
77
|
-
|
|
83
|
+
if (dir !== home) {
|
|
84
|
+
const candidate = path.join(dir, ".pi");
|
|
85
|
+
if (fs.existsSync(candidate)) return path.join(candidate, "taskflows");
|
|
86
|
+
}
|
|
78
87
|
const parent = path.dirname(dir);
|
|
79
88
|
if (parent === dir) break;
|
|
80
89
|
dir = parent;
|
|
@@ -94,6 +103,11 @@ function readFlowFile(filePath: string, scope: "user" | "project"): SavedFlow |
|
|
|
94
103
|
}
|
|
95
104
|
|
|
96
105
|
/** List all saved flows (project overrides user on name collision). */
|
|
106
|
+
/** Internal-but-exported for tests: walk-up `.pi` finder with home-dir stop. */
|
|
107
|
+
export function findProjectFlowsDir(cwd: string, create = false): string | null {
|
|
108
|
+
return findProjectFlowsDirInternal(cwd, create);
|
|
109
|
+
}
|
|
110
|
+
|
|
97
111
|
export function listFlows(cwd: string): SavedFlow[] {
|
|
98
112
|
const map = new Map<string, SavedFlow>();
|
|
99
113
|
const dirs: Array<{ dir: string; scope: "user" | "project" }> = [{ dir: userFlowsDir(), scope: "user" }];
|
|
@@ -149,8 +163,11 @@ export function newRunId(flowName: string): string {
|
|
|
149
163
|
export function saveRun(state: RunState): void {
|
|
150
164
|
const dir = runsDir(state.cwd);
|
|
151
165
|
fs.mkdirSync(dir, { recursive: true });
|
|
152
|
-
|
|
153
|
-
|
|
166
|
+
// Clone before stamping updatedAt so the caller's RunState reference is not
|
|
167
|
+
// mutated as a hidden side effect (v0.0.6 audit, F-009). Shallow clone is
|
|
168
|
+
// sufficient: saveRun only serializes; it does not mutate nested objects.
|
|
169
|
+
const toSave = { ...state, updatedAt: Date.now() };
|
|
170
|
+
writeFileAtomic(path.join(dir, `${state.runId}.json`), JSON.stringify(toSave, null, 2));
|
|
154
171
|
}
|
|
155
172
|
|
|
156
173
|
export function loadRun(cwd: string, runId: string): RunState | null {
|
|
@@ -219,7 +236,14 @@ export function listRuns(cwd: string, limit = 20): RunState[] {
|
|
|
219
236
|
/* ignore */
|
|
220
237
|
}
|
|
221
238
|
}
|
|
222
|
-
|
|
239
|
+
// Guard against records missing/with non-numeric `updatedAt` — a bare
|
|
240
|
+
// `JSON.parse` may yield an object without it, and `undefined - undefined`
|
|
241
|
+
// is NaN, which makes `Array.prototype.sort` produce implementation-defined
|
|
242
|
+
// order. Drop those before sorting. (v0.0.8 audit, F-010.)
|
|
243
|
+
return runs
|
|
244
|
+
.filter((r) => typeof r.updatedAt === "number" && !Number.isNaN(r.updatedAt))
|
|
245
|
+
.sort((a, b) => b.updatedAt - a.updatedAt)
|
|
246
|
+
.slice(0, limit);
|
|
223
247
|
}
|
|
224
248
|
|
|
225
249
|
/** Stable hash of a phase's resolved task + inputs, for resume caching. */
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-taskflow",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.9",
|
|
4
4
|
"description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
package/skills/taskflow/SKILL.md
CHANGED
|
@@ -172,6 +172,36 @@ Review the audit results below. If any endpoint is missing auth, end with
|
|
|
172
172
|
{steps.audit.output}
|
|
173
173
|
```
|
|
174
174
|
|
|
175
|
+
### Structured-verify phases (v0.0.8.1)
|
|
176
|
+
|
|
177
|
+
A "verify" phase typically runs `npx tsc --noEmit && npm test && git diff --stat`
|
|
178
|
+
and reports whether everything is green. **Don't** delegate this to a generic
|
|
179
|
+
verifier subagent that summarizes the output in prose — LLMs commonly misread
|
|
180
|
+
shell output (e.g., 234 tests reported as 230, 745 insertions as 599, "1 type
|
|
181
|
+
error" reported as "clean"). Instead, **use a dedicated agent whose task is a
|
|
182
|
+
structured shell pipeline** that echoes structured key/value lines the next
|
|
183
|
+
phase can parse directly. Recommended pattern:
|
|
184
|
+
|
|
185
|
+
```jsonc
|
|
186
|
+
{
|
|
187
|
+
"id": "verify",
|
|
188
|
+
"type": "agent",
|
|
189
|
+
"agent": "verifier",
|
|
190
|
+
"dependsOn": ["apply-fixes"],
|
|
191
|
+
"task": "Run the verification pipeline and report structured results.\n\nExecute:\n```bash\ncd $REPO && npx tsc --noEmit 2>&1 | tee /tmp/tsc.log\ncd $REPO && npm test 2>&1 | tee /tmp/test.log | tail -10\ncd $REPO && git diff --shortstat HEAD | tee /tmp/diff.log\n```\n\nReport EXACTLY in this format (one key=value pair per line, no prose):\ntypecheck=PASS|FAIL\ntests_total=N\ntests_pass=N\ntests_fail=N\ninsertions=N\ndeletions=N\nfiles_changed=N\n\nIf any field is missing, you failed the task — re-run the command and re-read the output.",
|
|
192
|
+
"tools": ["read", "edit", "write", "bash"]
|
|
193
|
+
}
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
The key insight: **LLMs are bad at summarizing shell output, good at copying
|
|
197
|
+
structured data**. Asking for `key=value` pairs with explicit fields and "if
|
|
198
|
+
missing, you failed" forces the agent to read each field carefully. Downstream
|
|
199
|
+
phases that consume `{steps.verify.output}` can then `safeParse`-it into a
|
|
200
|
+
JSON object and assert against expected values.
|
|
201
|
+
|
|
202
|
+
For audits where the upstream is LLM-generated prose (not shell output), use a
|
|
203
|
+
plain `gate` phase with `VERDICT:` instead.
|
|
204
|
+
|
|
175
205
|
### Interpolation
|
|
176
206
|
|
|
177
207
|
- `{args.X}` — invocation argument
|
|
@@ -188,12 +218,11 @@ Review the audit results below. If any endpoint is missing auth, end with
|
|
|
188
218
|
3. Reference upstream results explicitly with `{steps.ID...}` and set `dependsOn`.
|
|
189
219
|
4. Mark the result-bearing phase with `"final": true` (else the last phase wins).
|
|
190
220
|
|
|
191
|
-
## Common mistakes (the runtime will
|
|
221
|
+
## Common mistakes (the runtime will reject these at validation time)
|
|
192
222
|
|
|
193
|
-
The runtime validates your flow at startup
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
on the flow.
|
|
223
|
+
The runtime validates your flow at startup. As of v0.0.8.1, the two most
|
|
224
|
+
common authoring mistakes below are **hard validation errors** (the flow
|
|
225
|
+
refuses to start). Fix the flow before running it.
|
|
197
226
|
|
|
198
227
|
### 1. Referencing `{steps.X}` without `dependsOn: ["X"]`
|
|
199
228
|
|
|
@@ -209,10 +238,9 @@ on the flow.
|
|
|
209
238
|
}
|
|
210
239
|
```
|
|
211
240
|
|
|
212
|
-
|
|
213
|
-
{steps.code-review-1.*} but 'code-review-1' is not in dependsOn
|
|
214
|
-
|
|
215
|
-
The TUI shows a `⚠N` badge. **Always declare the chain:**
|
|
241
|
+
Validation now rejects this with: `Phase 'fix-issues': task references
|
|
242
|
+
{steps.code-review-1.*} but 'code-review-1' is not in dependsOn. ...`
|
|
243
|
+
**Always declare the chain:**
|
|
216
244
|
|
|
217
245
|
```jsonc
|
|
218
246
|
// ✅ RIGHT
|
|
@@ -233,7 +261,11 @@ The TUI shows a `⚠N` badge. **Always declare the chain:**
|
|
|
233
261
|
|
|
234
262
|
Tip: write the `task` first (it tells you what each phase needs), then scan for
|
|
235
263
|
`{steps.*}` references and add the matching `dependsOn`. If a phase truly does
|
|
236
|
-
not depend on anything in its task, you can
|
|
264
|
+
not depend on anything in its task, you can omit the reference.
|
|
265
|
+
|
|
266
|
+
Exception: phases with `join: "any"` are exempt from this check, since they
|
|
267
|
+
deliberately wait for only one of their declared deps to complete and may
|
|
268
|
+
reference others as informational context.
|
|
237
269
|
|
|
238
270
|
### 2. Assuming the runtime knows "this is a chain"
|
|
239
271
|
|