pi-taskflow 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47,7 +47,7 @@
47
47
  "id": "report",
48
48
  "type": "reduce",
49
49
  "from": ["review"],
50
- "dependsOn": ["review"],
50
+ "dependsOn": ["review", "deep", "quick"],
51
51
  "agent": "doc-writer",
52
52
  "task": "Write a clean markdown brief on \"{args.topic}\" from the validated research:\n\n{steps.deep.output}{steps.quick.output}",
53
53
  "final": true
@@ -26,7 +26,7 @@
26
26
  "id": "implement",
27
27
  "type": "agent",
28
28
  "agent": "executor_code",
29
- "dependsOn": ["approve"],
29
+ "dependsOn": ["approve", "plan"],
30
30
  "task": "Implement the approved plan for {args.target}.\nPlan:\n{steps.plan.output}\nExtra human guidance (if any):\n{steps.approve.output}",
31
31
  "retry": { "max": 1, "backoffMs": 1000 }
32
32
  },
@@ -41,7 +41,7 @@
41
41
  "id": "summary",
42
42
  "type": "reduce",
43
43
  "from": ["review"],
44
- "dependsOn": ["review"],
44
+ "dependsOn": ["review", "implement"],
45
45
  "agent": "doc-writer",
46
46
  "task": "Write a short changelog entry summarizing what was done:\n\n{steps.implement.output}",
47
47
  "final": true
@@ -44,42 +44,56 @@ function loadAgentsFromDir(dir: string, source: "user" | "project"): AgentConfig
44
44
  }
45
45
 
46
46
  for (const entry of entries) {
47
- if (!entry.name.endsWith(".md")) continue;
48
- if (!entry.isFile() && !entry.isSymbolicLink()) continue;
49
-
50
- const filePath = path.join(dir, entry.name);
51
- let content: string;
52
47
  try {
53
- content = fs.readFileSync(filePath, "utf-8");
54
- } catch {
55
- continue;
56
- }
48
+ if (!entry.name.endsWith(".md")) continue;
49
+ if (!entry.isFile() && !entry.isSymbolicLink()) continue;
57
50
 
58
- const { frontmatter, body } = (() => {
51
+ const filePath = path.join(dir, entry.name);
52
+ let content: string;
59
53
  try {
60
- return parseFrontmatter<Record<string, string>>(content);
54
+ content = fs.readFileSync(filePath, "utf-8");
61
55
  } catch {
62
- // A single malformed agent file must not break discovery for every flow.
63
- return { frontmatter: {} as Record<string, string>, body: "" };
56
+ continue;
64
57
  }
65
- })();
66
- if (!frontmatter.name || !frontmatter.description) continue;
67
-
68
- const tools = frontmatter.tools
69
- ?.split(",")
70
- .map((t) => t.trim())
71
- .filter(Boolean);
72
-
73
- agents.push({
74
- name: frontmatter.name,
75
- description: frontmatter.description,
76
- tools: tools && tools.length > 0 ? tools : undefined,
77
- model: frontmatter.model,
78
- thinking: frontmatter.thinking,
79
- systemPrompt: body,
80
- source,
81
- filePath,
82
- });
58
+
59
+ const { frontmatter, body } = (() => {
60
+ try {
61
+ return parseFrontmatter<Record<string, unknown>>(content);
62
+ } catch {
63
+ // A single malformed agent file must not break discovery for every flow.
64
+ return { frontmatter: {} as Record<string, unknown>, body: "" };
65
+ }
66
+ })();
67
+ if (!frontmatter.name || !frontmatter.description) continue;
68
+
69
+ // frontmatter is YAML-parsed: tools may be a comma-separated string ("a, b")
70
+ // OR a YAML sequence ([a, b]). Handle both forms.
71
+ const rawTools = frontmatter.tools;
72
+ const tools: string[] | undefined = Array.isArray(rawTools)
73
+ ? rawTools.map((t) => String(t).trim()).filter(Boolean)
74
+ : rawTools !== undefined && rawTools !== null
75
+ ? String(rawTools)
76
+ .split(",")
77
+ .map((t) => t.trim())
78
+ .filter(Boolean)
79
+ : undefined;
80
+
81
+ agents.push({
82
+ name: String(frontmatter.name),
83
+ description: String(frontmatter.description),
84
+ tools: tools && tools.length > 0 ? tools : undefined,
85
+ model: frontmatter.model === undefined ? undefined : String(frontmatter.model),
86
+ thinking: frontmatter.thinking === undefined ? undefined : String(frontmatter.thinking),
87
+ systemPrompt: body,
88
+ source,
89
+ filePath,
90
+ });
91
+ } catch {
92
+ // Defense-in-depth: a single bad agent file must not break discovery
93
+ // for the entire flow (e.g. exotic YAML shapes, runtime errors in
94
+ // field access, symlink races, etc.).
95
+ continue;
96
+ }
83
97
  }
84
98
  return agents;
85
99
  }
@@ -128,9 +142,15 @@ export function discoverAgents(
128
142
  for (const [name, override] of Object.entries(overrides)) {
129
143
  const agent = agentMap.get(name);
130
144
  if (agent) {
131
- if (override.model !== undefined) agent.model = override.model;
132
- if (override.thinking !== undefined) agent.thinking = override.thinking;
133
- if (override.tools !== undefined) agent.tools = override.tools;
145
+ // Clone before mutating: agentMap owns the original AgentConfig
146
+ // (loaded from disk in loadAgentsFromDir). Mutating it in place
147
+ // would cause cross-contamination for any caller that retains a
148
+ // reference and invokes discoverAgents again with different overrides.
149
+ const mutated: AgentConfig = { ...agent };
150
+ if (override.model !== undefined) mutated.model = override.model;
151
+ if (override.thinking !== undefined) mutated.thinking = override.thinking;
152
+ if (override.tools !== undefined) mutated.tools = override.tools;
153
+ agentMap.set(name, mutated);
134
154
  }
135
155
  }
136
156
  }
@@ -50,8 +50,8 @@ const ShorthandStep = Type.Object(
50
50
  );
51
51
 
52
52
  const TaskflowParams = Type.Object({
53
- action: StringEnum(["run", "save", "resume", "list"] as const, {
54
- description: "What to do: run a flow, save a definition, resume a paused run, or list saved flows",
53
+ action: StringEnum(["run", "save", "resume", "list", "agents"] as const, {
54
+ description: "What to do: run a flow, save a definition, resume a paused run, list saved flows, or list available agents you can use in phases",
55
55
  default: "run",
56
56
  }),
57
57
  name: Type.Optional(Type.String({ description: "Name of a saved flow (for run/save without inline define)" })),
@@ -108,10 +108,6 @@ async function runFlow(
108
108
  onUpdate: ((p: AgentToolResult<TaskflowDetails>) => void) | undefined,
109
109
  existing?: RunState,
110
110
  ): Promise<RuntimeResult> {
111
- const settings = readSubagentSettings();
112
- const scope: AgentScope = def.agentScope ?? "user";
113
- const { agents } = discoverAgents(ctx.cwd, scope, settings.agentOverrides);
114
-
115
111
  const state = existing ?? makeRunState(def, args, ctx.cwd);
116
112
 
117
113
  const emit = (s: RunState, finalOutput?: string) => {
@@ -166,6 +162,13 @@ async function runFlow(
166
162
  : undefined;
167
163
 
168
164
  try {
165
+ // Discover settings/agents inside try so a YAML/IO crash in
166
+ // discoverAgents or readSubagentSettings (F-001) is caught and
167
+ // the heartbeat timer is cleared by the finally block below.
168
+ const settings = readSubagentSettings();
169
+ const scope: AgentScope = def.agentScope ?? "user";
170
+ const { agents } = discoverAgents(ctx.cwd, scope, settings.agentOverrides);
171
+
169
172
  const result = await executeTaskflow(state, {
170
173
  cwd: ctx.cwd,
171
174
  agents,
@@ -216,7 +219,7 @@ export default function (pi: ExtensionAPI) {
216
219
  "Phases (agent, parallel, map, gate, reduce, approval, flow) form a DAG; intermediate outputs stay out of your context — only the final phase output is returned.",
217
220
  "Use action=run with an inline `define` (you write the DSL) or a saved `name`.",
218
221
  "For simple non-DAG delegations (like the subagent tool) skip the DSL: pass `task` (+optional `agent`) for one task, `tasks:[{task,agent?}]` to run in parallel, or `chain:[{task,agent?}]` to run sequentially (reference the prior step with {previous.output}).",
219
- "Use action=save to persist a definition as a reusable /tf:<name> command. action=resume continues a paused run. action=list shows saved flows.",
222
+ "Use action=save to persist a definition as a reusable /tf:<name> command. action=resume continues a paused run. action=list shows saved flows. Use action=agents to list available agents — do NOT invent agent names; either use an agent from that list or omit the 'agent' field to auto-select the default agent.",
220
223
  "DSL: {name, args?, concurrency?, budget?:{maxUSD,maxTokens}, phases:[{id, type, agent, task, dependsOn?, join?:'all'|'any', when?, retry?:{max,backoffMs,factor}, over?(map), as?(map), branches?(parallel), from?(reduce), use?(flow), with?(flow), output?:'json', final?}]}.",
221
224
  "Phase types: agent (one subagent), parallel (static branches), map (dynamic fan-out over an array), gate (VERDICT: PASS/BLOCK quality gate), reduce (aggregate from N phases), approval (human-in-the-loop pause), flow (run a saved sub-flow). join:'any' is an OR-join; when is a conditional guard; retry adds backoff; budget caps run cost.",
222
225
  "Interpolation: {args.X}, {steps.ID.output}, {steps.ID.json}, {item} (map), {previous.output}.",
@@ -232,6 +235,21 @@ export default function (pi: ExtensionAPI) {
232
235
  async execute(_id, params, signal, onUpdate, ctx) {
233
236
  const action = params.action ?? "run";
234
237
 
238
+ // agents — list available agents the LLM can use in phase definitions
239
+ if (action === "agents") {
240
+ const scope = params.scope ?? "both";
241
+ const { agents } = discoverAgents(ctx.cwd, scope as AgentScope, undefined);
242
+ const text = agents.length
243
+ ? agents
244
+ .map(
245
+ (a) =>
246
+ `- ${a.name} (${a.source}): ${a.description}${a.model ? ` [model: ${a.model}]` : ""}${a.tools?.length ? ` [tools: ${a.tools.join(", ")}]` : ""}`,
247
+ )
248
+ .join("\n")
249
+ : "No agents found. Use the default agent by omitting the 'agent' field in phases.";
250
+ return { content: [{ type: "text", text }], details: { action } satisfies TaskflowDetails };
251
+ }
252
+
235
253
  // list
236
254
  if (action === "list") {
237
255
  const flows = listFlows(ctx.cwd);
@@ -27,10 +27,13 @@ export interface InterpolationResult {
27
27
  missing: string[];
28
28
  }
29
29
 
30
- export function interpolate(template: string, ctx: InterpolationContext): InterpolationResult {
30
+ export function interpolate(
31
+ template: string | null | undefined,
32
+ ctx: InterpolationContext,
33
+ ): InterpolationResult {
31
34
  const missing: string[] = [];
32
35
 
33
- const text = template.replace(PLACEHOLDER, (whole, path: string) => {
36
+ const text = String(template ?? "").replace(PLACEHOLDER, (whole, path: string) => {
34
37
  const value = resolvePath(path, ctx);
35
38
  if (value === undefined) {
36
39
  missing.push(path);
@@ -134,6 +137,24 @@ export function safeParse(text: string): unknown {
134
137
  }
135
138
  }
136
139
  }
140
+ // Anti-pattern detection (v0.0.8.1): array followed by a stray top-level
141
+ // "key": value. A common LLM mistake — the model appends
142
+ // `"deferred": [...]` after a JSON array, producing a non-JSON hybrid that
143
+ // none of the above strategies can recover. We surface a diagnostic hint
144
+ // so flow authors can spot the bug fast.
145
+ //
146
+ // We check the original (trimmed) input rather than the slice tail,
147
+ // because `lastIndexOf(close)` lands on the *last* bracket — for the
148
+ // anti-pattern the stray key is between the array's `]` and the trailing
149
+ // `]`, not after the last one.
150
+ if (/]\s*[\},]?\s*"[^"\n]+"\s*:/.test(trimmed)) {
151
+ console.warn(
152
+ "[pi-taskflow safeParse] input looks like a JSON array followed by a stray top-level key " +
153
+ `(pattern: [{...}], "key": ...). This is not valid JSON. ` +
154
+ `Hint: put extra data as array members (e.g. {"id":"D-001","status":"deferred",...}) ` +
155
+ `or split into a separate phase.`,
156
+ );
157
+ }
137
158
  return undefined;
138
159
  }
139
160
 
@@ -142,7 +163,7 @@ export function coerceArray(value: unknown): unknown[] | null {
142
163
  if (Array.isArray(value)) return value;
143
164
  if (value && typeof value === "object") {
144
165
  // {items: [...]} or {results: [...]} convenience
145
- for (const key of ["items", "results", "list", "data"]) {
166
+ for (const key of ["items", "results", "list", "data", "findings"]) {
146
167
  const v = (value as Record<string, unknown>)[key];
147
168
  if (Array.isArray(v)) return v;
148
169
  }
@@ -71,7 +71,10 @@ function agentRole(phase: Phase, ps: PhaseState | undefined, theme: Theme): stri
71
71
 
72
72
  function costStr(usage: UsageStats | undefined, theme: Theme): string {
73
73
  if (!usage?.cost) return "";
74
- return theme.fg("muted", `$${usage.cost.toFixed(3)}`);
74
+ const c = usage.cost;
75
+ return c >= 0.01
76
+ ? theme.fg("muted", `$${c.toFixed(2)}`)
77
+ : theme.fg("muted", `$${c.toFixed(4)}`);
75
78
  }
76
79
 
77
80
  function aggregateCost(state: RunState): number {
@@ -174,6 +177,7 @@ function phaseDetail(phase: Phase, ps: PhaseState | undefined, theme: Theme): st
174
177
  const color = d === "reject" ? "error" : d === "edit" ? "warning" : "success";
175
178
  let a = theme.fg("warning", "⚠") + " " + theme.fg(color as Parameters<typeof theme.fg>[0], theme.bold(d.toUpperCase()));
176
179
  if (ps.approval.auto) a += theme.fg("dim", " auto");
180
+ if (cost) a += ` ${cost}`;
177
181
  if (time) a += ` ${time}`;
178
182
  if (ps.warnings?.length) a += theme.fg("warning", ` ⚠${ps.warnings.length}`);
179
183
  return a;
@@ -228,8 +232,8 @@ function headerLine(state: RunState, theme: Theme): string {
228
232
  if (state.status === "blocked") line += theme.fg("error", " · blocked");
229
233
  const cost = aggregateCost(state);
230
234
  const budget = state.def.budget;
231
- if (budget?.maxUSD !== undefined) line += theme.fg("muted", ` · $${cost.toFixed(3)}/$${budget.maxUSD}`);
232
- else if (cost) line += theme.fg("muted", ` · $${cost.toFixed(3)}`);
235
+ if (budget?.maxUSD !== undefined) line += theme.fg("muted", ` · $${cost >= 0.01 ? cost.toFixed(2) : cost.toFixed(4)}/$${budget.maxUSD}`);
236
+ else if (cost) line += theme.fg("muted", ` · $${cost >= 0.01 ? cost.toFixed(2) : cost.toFixed(4)}`);
233
237
  const el = runElapsed(state);
234
238
  if (el) line += theme.fg("dim", ` · ${elapsed(el)}`);
235
239
  return line;
@@ -203,14 +203,10 @@ function summarizeToolCall(name: string, args: Record<string, unknown>): string
203
203
  }
204
204
  }
205
205
 
206
- async function writePromptToTempFile(agentName: string, prompt: string): Promise<{ dir: string; filePath: string }> {
207
- const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
208
- const safeName = agentName.replace(/[^\w.-]+/g, "_");
209
- const filePath = path.join(tmpDir, `prompt-${safeName}.md`);
206
+ async function writePromptToTempFile(filePath: string, prompt: string): Promise<void> {
210
207
  await withFileMutationQueue(filePath, async () => {
211
208
  await fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 });
212
209
  });
213
- return { dir: tmpDir, filePath };
214
210
  }
215
211
 
216
212
  function getPiInvocation(args: string[]): { command: string; args: string[] } {
@@ -284,9 +280,13 @@ export async function runAgentTask(
284
280
 
285
281
  try {
286
282
  if (agent.systemPrompt.trim()) {
287
- const tmp = await writePromptToTempFile(agent.name, agent.systemPrompt);
288
- tmpPromptDir = tmp.dir;
289
- tmpPromptPath = tmp.filePath;
283
+ // Allocate the temp dir + path BEFORE any fallible I/O so that if
284
+ // writeFile throws, tmpPromptDir/tmpPromptPath are already set and
285
+ // the finally block can clean up the directory (F-004).
286
+ tmpPromptDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
287
+ const safeName = agent.name.replace(/[^\w.-]+/g, "_");
288
+ tmpPromptPath = path.join(tmpPromptDir, `prompt-${safeName}.md`);
289
+ await writePromptToTempFile(tmpPromptPath, agent.systemPrompt);
290
290
  args.push("--append-system-prompt", tmpPromptPath);
291
291
  }
292
292
  args.push(`Task: ${task}`);
@@ -319,15 +319,25 @@ export async function runAgentTask(
319
319
  if (buffer.trim()) processLine(buffer);
320
320
  resolve(code ?? 0);
321
321
  });
322
- proc.on("error", () => resolve(1));
322
+ proc.on("error", (err) => {
323
+ if (!result.stderr) result.stderr = err.message;
324
+ if (!result.errorMessage) result.errorMessage = err.message;
325
+ resolve(1);
326
+ });
323
327
 
324
328
  if (opts.signal) {
325
329
  const kill = () => {
326
330
  wasAborted = true;
327
331
  proc.kill("SIGTERM");
328
- setTimeout(() => {
329
- if (!proc.killed) proc.kill("SIGKILL");
330
- }, 5000);
332
+ // Force-kill fallback. proc.kill("SIGKILL") is idempotent if
333
+ // the process already exited, and `proc.killed` is set true
334
+ // synchronously by the SIGTERM above — so the previous
335
+ // `if (!proc.killed)` guard would skip SIGKILL entirely,
336
+ // hanging forever on a child that ignores SIGTERM.
337
+ // .unref() keeps the timer from holding the event loop open
338
+ // after the process is gone.
339
+ const forceKill = setTimeout(() => proc.kill("SIGKILL"), 5000);
340
+ forceKill.unref();
331
341
  };
332
342
  if (opts.signal.aborted) kill();
333
343
  else opts.signal.addEventListener("abort", kill, { once: true });
@@ -349,12 +359,20 @@ export async function runAgentTask(
349
359
  // `output`: upstream providers (e.g. a Cloudflare challenge page) can
350
360
  // surface huge HTML/JSON in errorMessage, and that garbage would
351
361
  // otherwise flow into downstream phase interpolations.
352
- if (isFailed(result) && !result.output) {
353
- result.output = TRANSPORT_ERROR_PLACEHOLDER;
354
- if (!result.errorMessage) {
355
- result.errorMessage = result.stderr || `Subagent exited with code ${result.exitCode} (stopReason: ${result.stopReason ?? "unknown"})`;
362
+ // Sanitization must run whenever the run failed, even if some output
363
+ // was already emitted (e.g. crash mid-stream with a partial result):
364
+ // an unsanitized errorMessage would still leak into PhaseState and
365
+ // downstream interpolation contexts. (F-013)
366
+ if (isFailed(result)) {
367
+ if (!result.output) {
368
+ result.output = TRANSPORT_ERROR_PLACEHOLDER;
369
+ if (!result.errorMessage) {
370
+ result.errorMessage = result.stderr || `Subagent exited with code ${result.exitCode} (stopReason: ${result.stopReason ?? "unknown"})`;
371
+ }
372
+ }
373
+ if (result.errorMessage) {
374
+ result.errorMessage = sanitizeErrorMessage(result.errorMessage);
356
375
  }
357
- result.errorMessage = sanitizeErrorMessage(result.errorMessage);
358
376
  }
359
377
  return result;
360
378
  } finally {
@@ -50,6 +50,9 @@ export class RunHistoryComponent {
50
50
  private cachedLines?: string[];
51
51
 
52
52
  constructor(runs: RunState[], theme: Theme, onDone: (result?: RunHistoryResult) => void) {
53
+ if (!runs.length) {
54
+ throw new Error("RunHistoryComponent requires at least one run");
55
+ }
53
56
  this.runs = runs;
54
57
  this.theme = theme;
55
58
  this.onDone = onDone;
@@ -414,11 +414,12 @@ async function executePhase(
414
414
  if (type === "agent" || type === "gate" || type === "reduce") {
415
415
  const { text } = interpolate(phase.task ?? "", ctx);
416
416
  const fullTask = preRead + text;
417
- const inputHash = hashInput(phase.id, phase.agent ?? "", fullTask);
417
+ const agentName = resolveAgent(phase.agent, deps, state);
418
+ const inputHash = hashInput(phase.id, agentName, fullTask);
418
419
  const cached = cachedPhase(prior, inputHash);
419
420
  if (cached) return cached;
420
421
 
421
- const r = await runOne(phase.agent ?? defaultAgent(deps), fullTask, liveSink(state, phase.id, emitProgress));
422
+ const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress));
422
423
  const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
423
424
  if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
424
425
  return ps;
@@ -428,7 +429,7 @@ async function executePhase(
428
429
  const branches = (phase.branches ?? []).map((b) => {
429
430
  const r = interpolate(b.task, ctx);
430
431
  return {
431
- agent: b.agent ?? phase.agent ?? defaultAgent(deps),
432
+ agent: resolveAgent(b.agent ?? phase.agent, deps, state),
432
433
  task: preRead + r.text,
433
434
  };
434
435
  });
@@ -458,7 +459,7 @@ async function executePhase(
458
459
  const tasks = arr.map((item) => {
459
460
  const localCtx = buildInterpolationContext(state, previousOutput, { [loopVar]: item });
460
461
  return {
461
- agent: phase.agent ?? defaultAgent(deps),
462
+ agent: resolveAgent(phase.agent, deps, state),
462
463
  task: preRead + interpolate(phase.task ?? "", localCtx).text,
463
464
  };
464
465
  });
@@ -551,14 +552,20 @@ async function executePhase(
551
552
  baseRunTask(cwd, agents, agentName, preRead + subTask, opts, globalThinking);
552
553
  const subResult = await executeTaskflow(subState, {
553
554
  ...deps,
555
+ // Override deps.cwd with the flow phase's own cwd so that sub-flow
556
+ // phases without an explicit cwd derive their subagents from the
557
+ // flow's cwd (not the caller's cwd).
558
+ cwd: phase.cwd ?? deps.cwd,
554
559
  runTask: subRunTask,
555
560
  _stack: [...stack, state.flowName],
556
561
  persist: undefined,
557
562
  onProgress: () => {
558
563
  if (live) {
559
564
  const ph = Object.values(subState.phases);
565
+ // B-F015: `done` must include both success and failure so the
566
+ // renderer's `done - failed` shows the true success count.
560
567
  live.subProgress = {
561
- done: ph.filter((p) => p.status === "done").length,
568
+ done: ph.filter((p) => p.status === "done" || p.status === "failed").length,
562
569
  total: subDef.phases.length,
563
570
  running: ph.filter((p) => p.status === "running").length,
564
571
  failed: ph.filter((p) => p.status === "failed").length,
@@ -577,8 +584,11 @@ async function executePhase(
577
584
  output: subResult.finalOutput,
578
585
  json: parseJson ? safeParse(subResult.finalOutput) : undefined,
579
586
  usage: subResult.totalUsage,
587
+ // B-F015: include failed in `done` so the renderer's
588
+ // `done - failed` formula gives the success count (matches the
589
+ // map/parallel runner's overlapping-counter convention).
580
590
  subProgress: {
581
- done: sp.filter((p) => p.status === "done").length,
591
+ done: sp.filter((p) => p.status === "done" || p.status === "failed").length,
582
592
  total: subDef.phases.length,
583
593
  running: 0,
584
594
  failed: sp.filter((p) => p.status === "failed").length,
@@ -632,6 +642,27 @@ function cachedPhase(prior: PhaseState | undefined, inputHash: string): PhaseSta
632
642
  return null;
633
643
  }
634
644
 
645
+ /**
646
+ * Resolve an agent name against available agents. Falls back to the default
647
+ * agent if the requested agent isn't found, logging a warning via safeEmit.
648
+ */
649
+ function resolveAgent(name: string | undefined, deps: RuntimeDeps, state: RunState): string {
650
+ const resolved = name ?? defaultAgent(deps);
651
+ if (name && !deps.agents.some((a) => a.name === name)) {
652
+ const fallback = defaultAgent(deps);
653
+ // Log only once per run to avoid noise.
654
+ if (!(state as any).__unknownAgentWarned) {
655
+ (state as any).__unknownAgentWarned = new Set<string>();
656
+ }
657
+ if (!(state as any).__unknownAgentWarned.has(name)) {
658
+ (state as any).__unknownAgentWarned.add(name);
659
+ console.warn(`[taskflow] Unknown agent "${name}", falling back to "${fallback}". Use action=agents to list available agents.`);
660
+ }
661
+ return fallback;
662
+ }
663
+ return resolved;
664
+ }
665
+
635
666
  function defaultAgent(deps: RuntimeDeps): string {
636
667
  return deps.agents[0]?.name ?? "default";
637
668
  }
@@ -649,7 +680,10 @@ export function parseGateVerdict(output: string): { verdict: "pass" | "block"; r
649
680
  if (typeof o.continue === "boolean") return { verdict: o.continue ? "pass" : "block", reason: asReason(o.reason) };
650
681
  if (typeof o.pass === "boolean") return { verdict: o.pass ? "pass" : "block", reason: asReason(o.reason) };
651
682
  if (typeof o.verdict === "string") {
652
- const block = /block|fail|stop|reject|halt|\bno\b/i.test(o.verdict);
683
+ // Note: do NOT include standalone "no" — natural-language verdicts like
684
+ // "No issues found" / "no errors" would otherwise be false-positive BLOCK.
685
+ // Fail-open covers any ambiguous text.
686
+ const block = /block|fail|stop|reject|halt/i.test(o.verdict);
653
687
  return { verdict: block ? "block" : "pass", reason: asReason(o.reason) };
654
688
  }
655
689
  }
@@ -666,11 +700,86 @@ function asReason(v: unknown): string | undefined {
666
700
  return typeof v === "string" && v.trim() ? v.trim() : undefined;
667
701
  }
668
702
 
703
+ /**
704
+ * Best-effort invocation of the user-provided `persist` + `onProgress` callbacks.
705
+ *
706
+ * A throw from a host-supplied callback must NEVER replace the runtime's
707
+ * outcome — neither the original crash message in `executeTaskflow`'s catch
708
+ * block, nor the final output of a successful run. Callbacks are observability
709
+ * hooks; the run survives their failure.
710
+ *
711
+ * Used at every "checkpoint" call site (phase start, phase end, terminal state).
712
+ * For high-frequency live updates inside a phase, see `safeProgress` below.
713
+ */
714
+ function safeEmit(deps: RuntimeDeps, state: RunState): void {
715
+ try {
716
+ deps.persist?.(state);
717
+ } catch {
718
+ // user callback — must not break the run
719
+ }
720
+ try {
721
+ deps.onProgress?.(state);
722
+ } catch {
723
+ // user callback — must not break the run
724
+ }
725
+ }
726
+
727
+ /**
728
+ * Like `safeEmit` but for the high-frequency live-update channel only.
729
+ * Skips `persist` (which is intentionally checkpoint-only) and swallows any
730
+ * throw from the user-supplied `onProgress` so a misbehaving TUI sink cannot
731
+ * disrupt an in-flight phase.
732
+ */
733
+ function safeProgress(deps: RuntimeDeps, state: RunState): void {
734
+ try {
735
+ deps.onProgress?.(state);
736
+ } catch {
737
+ // user callback — must not break the run
738
+ }
739
+ }
740
+
669
741
  /**
670
742
  * Execute a full taskflow. Mutates and persists `state` as it progresses.
671
743
  */
744
+ function ensureImplicitGate(def: Taskflow): void {
745
+ // Respect explicit opt-out
746
+ if ((def as any).implicitGate === false) return;
747
+
748
+ const hasGate = def.phases.some(
749
+ (p) => p.type === "gate" || p.type === "approval" || p.id === "_implicit-gate",
750
+ );
751
+ if (hasGate || def.phases.length === 0) return;
752
+
753
+ // The last existing phase is the effective "final" phase — pin it so the
754
+ // injected gate doesn't become the finalOutput.
755
+ const lastPhase = def.phases[def.phases.length - 1];
756
+ if (!lastPhase.final && !def.phases.some((p) => p.final)) {
757
+ lastPhase.final = true;
758
+ }
759
+
760
+ const allIds = def.phases.map((p) => p.id);
761
+ def.phases.push({
762
+ id: "_implicit-gate",
763
+ type: "gate",
764
+ dependsOn: allIds,
765
+ agent: "reviewer",
766
+ task: `Review all phase outputs from this taskflow for accuracy and consistency.
767
+
768
+ For each upstream phase, scan its output for:
769
+ 1. **Factual accuracy**: Any file paths, line numbers, or code snippets that are wrong?
770
+ 2. **Internal contradictions**: Do any phases contradict each other?
771
+ 3. **Completeness**: Is any output truncated, empty, or anomalously short?
772
+ 4. **Hallucination markers**: Wrong file names, impossible line ranges, circular logic, information not in the given context.
773
+
774
+ Output:
775
+ - If ALL outputs look consistent and plausible: output **VERDICT: PASS** with a one-line summary.
776
+ - If ANY issues found: output **VERDICT: BLOCK** listing each issue with the phase ID and specific concern.`,
777
+ });
778
+ }
779
+
672
780
  export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
673
781
  const def: Taskflow = state.def;
782
+ ensureImplicitGate(def);
674
783
  try {
675
784
  return await runTaskflowLayers(state, deps);
676
785
  } catch (e) {
@@ -685,8 +794,7 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
685
794
  }
686
795
  }
687
796
  state.status = "failed";
688
- deps.persist?.(state);
689
- deps.onProgress?.(state);
797
+ safeEmit(deps, state);
690
798
  const totalUsage = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
691
799
  return { state, finalOutput: `Taskflow '${def.name}' crashed: ${message}`, ok: false, totalUsage };
692
800
  }
@@ -697,8 +805,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
697
805
  const layers = topoLayers(def.phases);
698
806
 
699
807
  state.status = "running";
700
- deps.persist?.(state);
701
- deps.onProgress?.(state);
808
+ safeEmit(deps, state);
702
809
 
703
810
  let aborted = false;
704
811
  let gateBlocked = false;
@@ -756,8 +863,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
756
863
  endedAt: Date.now(),
757
864
  usage: emptyUsage(),
758
865
  };
759
- deps.persist?.(state);
760
- deps.onProgress?.(state);
866
+ safeEmit(deps, state);
761
867
  return;
762
868
  }
763
869
 
@@ -768,9 +874,9 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
768
874
  status: "running",
769
875
  startedAt,
770
876
  };
771
- deps.onProgress?.(state);
877
+ safeProgress(deps, state);
772
878
 
773
- const ps = await executePhase(phase, state, deps, prior, () => deps.onProgress?.(state));
879
+ const ps = await executePhase(phase, state, deps, prior, () => safeProgress(deps, state));
774
880
  // Preserve the phase start time: executePhase returns a fresh PhaseState
775
881
  // that omits startedAt (cached/resumed results carry their own).
776
882
  state.phases[phase.id] = ps.startedAt ? ps : { ...ps, startedAt };
@@ -793,8 +899,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
793
899
  budgetBlocked = true;
794
900
  budgetReason = ob.reason;
795
901
  }
796
- deps.persist?.(state);
797
- deps.onProgress?.(state);
902
+ safeEmit(deps, state);
798
903
  });
799
904
  }
800
905
 
@@ -818,8 +923,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
818
923
  : anyFailed
819
924
  ? "failed"
820
925
  : "completed";
821
- deps.persist?.(state);
822
- deps.onProgress?.(state);
926
+ safeEmit(deps, state);
823
927
 
824
928
  let finalOutput = finalState?.output ?? "(no output)";
825
929
  if (gateBlocked) {
@@ -147,6 +147,12 @@ export const TaskflowSchema = Type.Object(
147
147
  }),
148
148
  ),
149
149
  phases: Type.Array(PhaseSchema, { minItems: 1, description: "Ordered phase definitions (DAG via dependsOn)" }),
150
+ implicitGate: Type.Optional(
151
+ Type.Boolean({
152
+ description: "When true (default), a reviewer gate is auto-injected after all phases if no explicit gate or approval exists",
153
+ default: true,
154
+ }),
155
+ ),
150
156
  },
151
157
  { additionalProperties: false },
152
158
  );
@@ -184,7 +190,11 @@ export function isShorthand(def: unknown): boolean {
184
190
  if (typeof def !== "object" || def === null) return false;
185
191
  const d = def as Record<string, unknown>;
186
192
  if (Array.isArray(d.phases)) return false;
187
- return Array.isArray(d.chain) || Array.isArray(d.tasks) || typeof d.task === "string";
193
+ return (
194
+ (Array.isArray(d.chain) && d.chain.length > 0) ||
195
+ (Array.isArray(d.tasks) && d.tasks.length > 0) ||
196
+ typeof d.task === "string"
197
+ );
188
198
  }
189
199
 
190
200
  function readStep(s: unknown): ShorthandStep {
@@ -355,20 +365,27 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
355
365
  const finals = (flow.phases as Phase[]).filter((p) => p?.final);
356
366
  if (finals.length > 1) errors.push(`Only one phase may be marked 'final' (found ${finals.length})`);
357
367
 
358
- // --- Soft warnings: {steps.X.*} references that aren't declared deps -------
368
+ // --- Hard errors: {steps.X.*} references that aren't declared deps ------
359
369
  // Catches the most common authoring mistake: the task talks about
360
370
  // `{steps.review.output}` but `dependsOn: ["review"]` is missing, so the
361
371
  // phase runs in parallel with `review` and the model sees the literal
362
- // placeholder string. The runtime can't infer the intent.
372
+ // placeholder string. The runtime can't infer the intent — fail fast at
373
+ // validation time so the mistake is caught before the run starts.
374
+ //
375
+ // Phases with `join: "any"` are exempt: by design they only need ONE of
376
+ // their declared deps to complete, and may reference other phases as
377
+ // informational context (not as true dependencies).
363
378
  if (errors.length === 0) {
364
379
  const idToPhase = new Map((flow.phases as Phase[]).map((p) => [p.id, p]));
365
380
  for (const p of flow.phases as Phase[]) {
366
381
  if (!p?.id) continue;
382
+ const isJoinAny = p.join === "any";
383
+ if (isJoinAny) continue;
367
384
  const deps = new Set(dependenciesOf(p));
368
385
  const refs = collectRefs(p);
369
386
  for (const ref of refs.steps) {
370
387
  if (ref === p.id) {
371
- warnings.push(`Phase '${p.id}': references its own output via {steps.${ref}.*}; this is almost always a bug.`);
388
+ errors.push(`Phase '${p.id}': references its own output via {steps.${ref}.*}; this is almost always a bug.`);
372
389
  continue;
373
390
  }
374
391
  if (!idToPhase.has(ref)) {
@@ -378,7 +395,7 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
378
395
  continue;
379
396
  }
380
397
  if (!deps.has(ref)) {
381
- warnings.push(
398
+ errors.push(
382
399
  `Phase '${p.id}': task references {steps.${ref}.*} but '${ref}' is not in dependsOn. ` +
383
400
  `The phase will run in parallel with '${ref}' and see the literal placeholder. ` +
384
401
  `Add "dependsOn": ["${ref}"] (or include '${ref}' transitively).`,
@@ -8,6 +8,7 @@
8
8
 
9
9
  import * as crypto from "node:crypto";
10
10
  import * as fs from "node:fs";
11
+ import * as os from "node:os";
11
12
  import * as path from "node:path";
12
13
  import { getAgentDir } from "@earendil-works/pi-coding-agent";
13
14
  import type { Taskflow } from "./schema.ts";
@@ -69,12 +70,20 @@ function userFlowsDir(): string {
69
70
  return path.join(getAgentDir(), "taskflows");
70
71
  }
71
72
 
72
- function findProjectFlowsDir(cwd: string, create = false): string | null {
73
+ function findProjectFlowsDirInternal(cwd: string, create = false): string | null {
73
74
  // Prefer an existing .pi dir up the tree; else use cwd/.pi when creating.
75
+ // **Never treat `~/.pi/` as a project flow dir** — the home directory is
76
+ // the user-scope boundary, and the user's `~/.pi/` is the agent dir, not a
77
+ // project. We skip the home entry entirely during the walk-up, so even a
78
+ // deeply nested cwd under home will return null (create=false) when no
79
+ // project `.pi` exists on the path.
80
+ const home = os.homedir();
74
81
  let dir = cwd;
75
82
  while (true) {
76
- const candidate = path.join(dir, ".pi");
77
- if (fs.existsSync(candidate)) return path.join(candidate, "taskflows");
83
+ if (dir !== home) {
84
+ const candidate = path.join(dir, ".pi");
85
+ if (fs.existsSync(candidate)) return path.join(candidate, "taskflows");
86
+ }
78
87
  const parent = path.dirname(dir);
79
88
  if (parent === dir) break;
80
89
  dir = parent;
@@ -94,6 +103,11 @@ function readFlowFile(filePath: string, scope: "user" | "project"): SavedFlow |
94
103
  }
95
104
 
96
105
  /** List all saved flows (project overrides user on name collision). */
106
+ /** Internal-but-exported for tests: walk-up `.pi` finder with home-dir stop. */
107
+ export function findProjectFlowsDir(cwd: string, create = false): string | null {
108
+ return findProjectFlowsDirInternal(cwd, create);
109
+ }
110
+
97
111
  export function listFlows(cwd: string): SavedFlow[] {
98
112
  const map = new Map<string, SavedFlow>();
99
113
  const dirs: Array<{ dir: string; scope: "user" | "project" }> = [{ dir: userFlowsDir(), scope: "user" }];
@@ -149,8 +163,11 @@ export function newRunId(flowName: string): string {
149
163
  export function saveRun(state: RunState): void {
150
164
  const dir = runsDir(state.cwd);
151
165
  fs.mkdirSync(dir, { recursive: true });
152
- state.updatedAt = Date.now();
153
- writeFileAtomic(path.join(dir, `${state.runId}.json`), JSON.stringify(state, null, 2));
166
+ // Clone before stamping updatedAt so the caller's RunState reference is not
167
+ // mutated as a hidden side effect (v0.0.6 audit, F-009). Shallow clone is
168
+ // sufficient: saveRun only serializes; it does not mutate nested objects.
169
+ const toSave = { ...state, updatedAt: Date.now() };
170
+ writeFileAtomic(path.join(dir, `${state.runId}.json`), JSON.stringify(toSave, null, 2));
154
171
  }
155
172
 
156
173
  export function loadRun(cwd: string, runId: string): RunState | null {
@@ -219,7 +236,14 @@ export function listRuns(cwd: string, limit = 20): RunState[] {
219
236
  /* ignore */
220
237
  }
221
238
  }
222
- return runs.sort((a, b) => b.updatedAt - a.updatedAt).slice(0, limit);
239
+ // Guard against records missing/with non-numeric `updatedAt` — a bare
240
+ // `JSON.parse` may yield an object without it, and `undefined - undefined`
241
+ // is NaN, which makes `Array.prototype.sort` produce implementation-defined
242
+ // order. Drop those before sorting. (v0.0.8 audit, F-010.)
243
+ return runs
244
+ .filter((r) => typeof r.updatedAt === "number" && !Number.isNaN(r.updatedAt))
245
+ .sort((a, b) => b.updatedAt - a.updatedAt)
246
+ .slice(0, limit);
223
247
  }
224
248
 
225
249
  /** Stable hash of a phase's resolved task + inputs, for resume caching. */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-taskflow",
3
- "version": "0.0.7",
3
+ "version": "0.0.9",
4
4
  "description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -172,6 +172,36 @@ Review the audit results below. If any endpoint is missing auth, end with
172
172
  {steps.audit.output}
173
173
  ```
174
174
 
175
+ ### Structured-verify phases (v0.0.8.1)
176
+
177
+ A "verify" phase typically runs `npx tsc --noEmit && npm test && git diff --stat`
178
+ and reports whether everything is green. **Don't** delegate this to a generic
179
+ verifier subagent that summarizes the output in prose — LLMs commonly misread
180
+ shell output (e.g., 234 tests reported as 230, 745 insertions as 599, "1 type
181
+ error" reported as "clean"). Instead, **use a dedicated agent whose task is a
182
+ structured shell pipeline** that echoes structured key/value lines the next
183
+ phase can parse directly. Recommended pattern:
184
+
185
+ ```jsonc
186
+ {
187
+ "id": "verify",
188
+ "type": "agent",
189
+ "agent": "verifier",
190
+ "dependsOn": ["apply-fixes"],
191
+ "task": "Run the verification pipeline and report structured results.\n\nExecute:\n```bash\ncd $REPO && npx tsc --noEmit 2>&1 | tee /tmp/tsc.log\ncd $REPO && npm test 2>&1 | tee /tmp/test.log | tail -10\ncd $REPO && git diff --shortstat HEAD | tee /tmp/diff.log\n```\n\nReport EXACTLY in this format (one key=value pair per line, no prose):\ntypecheck=PASS|FAIL\ntests_total=N\ntests_pass=N\ntests_fail=N\ninsertions=N\ndeletions=N\nfiles_changed=N\n\nIf any field is missing, you failed the task — re-run the command and re-read the output.",
192
+ "tools": ["read", "edit", "write", "bash"]
193
+ }
194
+ ```
195
+
196
+ The key insight: **LLMs are bad at summarizing shell output, good at copying
197
+ structured data**. Asking for `key=value` pairs with explicit fields and "if
198
+ missing, you failed" forces the agent to read each field carefully. Downstream
199
+ phases that consume `{steps.verify.output}` can then `safeParse`-it into a
200
+ JSON object and assert against expected values.
201
+
202
+ For audits where the upstream is LLM-generated prose (not shell output), use a
203
+ plain `gate` phase with `VERDICT:` instead.
204
+
175
205
  ### Interpolation
176
206
 
177
207
  - `{args.X}` — invocation argument
@@ -188,12 +218,11 @@ Review the audit results below. If any endpoint is missing auth, end with
188
218
  3. Reference upstream results explicitly with `{steps.ID...}` and set `dependsOn`.
189
219
  4. Mark the result-bearing phase with `"final": true` (else the last phase wins).
190
220
 
191
- ## Common mistakes (the runtime will warn you, but don't trip them)
221
+ ## Common mistakes (the runtime will reject these at validation time)
192
222
 
193
- The runtime validates your flow at startup and at each phase's interpolation.
194
- Two patterns account for ~all the broken runs in the wild — avoid them. If you
195
- want warnings like these to become hard failures, set `"strictInterpolation": true`
196
- on the flow.
223
+ The runtime validates your flow at startup. As of v0.0.8.1, the two most
224
+ common authoring mistakes below are **hard validation errors** (the flow
225
+ refuses to start). Fix the flow before running it.
197
226
 
198
227
  ### 1. Referencing `{steps.X}` without `dependsOn: ["X"]`
199
228
 
@@ -209,10 +238,9 @@ on the flow.
209
238
  }
210
239
  ```
211
240
 
212
- The runtime logs a warning at run start (`Phase 'fix-issues': task references
213
- {steps.code-review-1.*} but 'code-review-1' is not in dependsOn`) and the phase
214
- itself gets a `warnings` field with a non-fatal `unresolved placeholders` line.
215
- The TUI shows a `⚠N` badge. **Always declare the chain:**
241
+ Validation now rejects this with: `Phase 'fix-issues': task references
242
+ {steps.code-review-1.*} but 'code-review-1' is not in dependsOn. ...`
243
+ **Always declare the chain:**
216
244
 
217
245
  ```jsonc
218
246
  // ✅ RIGHT
@@ -233,7 +261,11 @@ The TUI shows a `⚠N` badge. **Always declare the chain:**
233
261
 
234
262
  Tip: write the `task` first (it tells you what each phase needs), then scan for
235
263
  `{steps.*}` references and add the matching `dependsOn`. If a phase truly does
236
- not depend on anything in its task, you can ignore the warning.
264
+ not depend on anything in its task, you can omit the reference.
265
+
266
+ Exception: phases with `join: "any"` are exempt from this check, since they
267
+ deliberately wait for only one of their declared deps to complete and may
268
+ reference others as informational context.
237
269
 
238
270
  ### 2. Assuming the runtime knows "this is a chain"
239
271