pi-taskflow 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47,7 +47,7 @@
47
47
  "id": "report",
48
48
  "type": "reduce",
49
49
  "from": ["review"],
50
- "dependsOn": ["review"],
50
+ "dependsOn": ["review", "deep", "quick"],
51
51
  "agent": "doc-writer",
52
52
  "task": "Write a clean markdown brief on \"{args.topic}\" from the validated research:\n\n{steps.deep.output}{steps.quick.output}",
53
53
  "final": true
@@ -26,7 +26,7 @@
26
26
  "id": "implement",
27
27
  "type": "agent",
28
28
  "agent": "executor_code",
29
- "dependsOn": ["approve"],
29
+ "dependsOn": ["approve", "plan"],
30
30
  "task": "Implement the approved plan for {args.target}.\nPlan:\n{steps.plan.output}\nExtra human guidance (if any):\n{steps.approve.output}",
31
31
  "retry": { "max": 1, "backoffMs": 1000 }
32
32
  },
@@ -41,7 +41,7 @@
41
41
  "id": "summary",
42
42
  "type": "reduce",
43
43
  "from": ["review"],
44
- "dependsOn": ["review"],
44
+ "dependsOn": ["review", "implement"],
45
45
  "agent": "doc-writer",
46
46
  "task": "Write a short changelog entry summarizing what was done:\n\n{steps.implement.output}",
47
47
  "final": true
@@ -44,42 +44,56 @@ function loadAgentsFromDir(dir: string, source: "user" | "project"): AgentConfig
44
44
  }
45
45
 
46
46
  for (const entry of entries) {
47
- if (!entry.name.endsWith(".md")) continue;
48
- if (!entry.isFile() && !entry.isSymbolicLink()) continue;
49
-
50
- const filePath = path.join(dir, entry.name);
51
- let content: string;
52
47
  try {
53
- content = fs.readFileSync(filePath, "utf-8");
54
- } catch {
55
- continue;
56
- }
48
+ if (!entry.name.endsWith(".md")) continue;
49
+ if (!entry.isFile() && !entry.isSymbolicLink()) continue;
57
50
 
58
- const { frontmatter, body } = (() => {
51
+ const filePath = path.join(dir, entry.name);
52
+ let content: string;
59
53
  try {
60
- return parseFrontmatter<Record<string, string>>(content);
54
+ content = fs.readFileSync(filePath, "utf-8");
61
55
  } catch {
62
- // A single malformed agent file must not break discovery for every flow.
63
- return { frontmatter: {} as Record<string, string>, body: "" };
56
+ continue;
64
57
  }
65
- })();
66
- if (!frontmatter.name || !frontmatter.description) continue;
67
-
68
- const tools = frontmatter.tools
69
- ?.split(",")
70
- .map((t) => t.trim())
71
- .filter(Boolean);
72
-
73
- agents.push({
74
- name: frontmatter.name,
75
- description: frontmatter.description,
76
- tools: tools && tools.length > 0 ? tools : undefined,
77
- model: frontmatter.model,
78
- thinking: frontmatter.thinking,
79
- systemPrompt: body,
80
- source,
81
- filePath,
82
- });
58
+
59
+ const { frontmatter, body } = (() => {
60
+ try {
61
+ return parseFrontmatter<Record<string, unknown>>(content);
62
+ } catch {
63
+ // A single malformed agent file must not break discovery for every flow.
64
+ return { frontmatter: {} as Record<string, unknown>, body: "" };
65
+ }
66
+ })();
67
+ if (!frontmatter.name || !frontmatter.description) continue;
68
+
69
+ // frontmatter is YAML-parsed: tools may be a comma-separated string ("a, b")
70
+ // OR a YAML sequence ([a, b]). Handle both forms.
71
+ const rawTools = frontmatter.tools;
72
+ const tools: string[] | undefined = Array.isArray(rawTools)
73
+ ? rawTools.map((t) => String(t).trim()).filter(Boolean)
74
+ : rawTools !== undefined && rawTools !== null
75
+ ? String(rawTools)
76
+ .split(",")
77
+ .map((t) => t.trim())
78
+ .filter(Boolean)
79
+ : undefined;
80
+
81
+ agents.push({
82
+ name: String(frontmatter.name),
83
+ description: String(frontmatter.description),
84
+ tools: tools && tools.length > 0 ? tools : undefined,
85
+ model: frontmatter.model === undefined ? undefined : String(frontmatter.model),
86
+ thinking: frontmatter.thinking === undefined ? undefined : String(frontmatter.thinking),
87
+ systemPrompt: body,
88
+ source,
89
+ filePath,
90
+ });
91
+ } catch {
92
+ // Defense-in-depth: a single bad agent file must not break discovery
93
+ // for the entire flow (e.g. exotic YAML shapes, runtime errors in
94
+ // field access, symlink races, etc.).
95
+ continue;
96
+ }
83
97
  }
84
98
  return agents;
85
99
  }
@@ -128,9 +142,15 @@ export function discoverAgents(
128
142
  for (const [name, override] of Object.entries(overrides)) {
129
143
  const agent = agentMap.get(name);
130
144
  if (agent) {
131
- if (override.model !== undefined) agent.model = override.model;
132
- if (override.thinking !== undefined) agent.thinking = override.thinking;
133
- if (override.tools !== undefined) agent.tools = override.tools;
145
+ // Clone before mutating: agentMap owns the original AgentConfig
146
+ // (loaded from disk in loadAgentsFromDir). Mutating it in place
147
+ // would cause cross-contamination for any caller that retains a
148
+ // reference and invokes discoverAgents again with different overrides.
149
+ const mutated: AgentConfig = { ...agent };
150
+ if (override.model !== undefined) mutated.model = override.model;
151
+ if (override.thinking !== undefined) mutated.thinking = override.thinking;
152
+ if (override.tools !== undefined) mutated.tools = override.tools;
153
+ agentMap.set(name, mutated);
134
154
  }
135
155
  }
136
156
  }
@@ -108,10 +108,6 @@ async function runFlow(
108
108
  onUpdate: ((p: AgentToolResult<TaskflowDetails>) => void) | undefined,
109
109
  existing?: RunState,
110
110
  ): Promise<RuntimeResult> {
111
- const settings = readSubagentSettings();
112
- const scope: AgentScope = def.agentScope ?? "user";
113
- const { agents } = discoverAgents(ctx.cwd, scope, settings.agentOverrides);
114
-
115
111
  const state = existing ?? makeRunState(def, args, ctx.cwd);
116
112
 
117
113
  const emit = (s: RunState, finalOutput?: string) => {
@@ -166,6 +162,13 @@ async function runFlow(
166
162
  : undefined;
167
163
 
168
164
  try {
165
+ // Discover settings/agents inside try so a YAML/IO crash in
166
+ // discoverAgents or readSubagentSettings (F-001) is caught and
167
+ // the heartbeat timer is cleared by the finally block below.
168
+ const settings = readSubagentSettings();
169
+ const scope: AgentScope = def.agentScope ?? "user";
170
+ const { agents } = discoverAgents(ctx.cwd, scope, settings.agentOverrides);
171
+
169
172
  const result = await executeTaskflow(state, {
170
173
  cwd: ctx.cwd,
171
174
  agents,
@@ -27,10 +27,13 @@ export interface InterpolationResult {
27
27
  missing: string[];
28
28
  }
29
29
 
30
- export function interpolate(template: string, ctx: InterpolationContext): InterpolationResult {
30
+ export function interpolate(
31
+ template: string | null | undefined,
32
+ ctx: InterpolationContext,
33
+ ): InterpolationResult {
31
34
  const missing: string[] = [];
32
35
 
33
- const text = template.replace(PLACEHOLDER, (whole, path: string) => {
36
+ const text = String(template ?? "").replace(PLACEHOLDER, (whole, path: string) => {
34
37
  const value = resolvePath(path, ctx);
35
38
  if (value === undefined) {
36
39
  missing.push(path);
@@ -134,6 +137,24 @@ export function safeParse(text: string): unknown {
134
137
  }
135
138
  }
136
139
  }
140
+ // Anti-pattern detection (v0.0.8.1): array followed by a stray top-level
141
+ // "key": value. A common LLM mistake — the model appends
142
+ // `"deferred": [...]` after a JSON array, producing a non-JSON hybrid that
143
+ // none of the above strategies can recover. We surface a diagnostic hint
144
+ // so flow authors can spot the bug fast.
145
+ //
146
+ // We check the original (trimmed) input rather than the slice tail,
147
+ // because `lastIndexOf(close)` lands on the *last* bracket — for the
148
+ // anti-pattern the stray key is between the array's `]` and the trailing
149
+ // `]`, not after the last one.
150
+ if (/]\s*[\},]?\s*"[^"\n]+"\s*:/.test(trimmed)) {
151
+ console.warn(
152
+ "[pi-taskflow safeParse] input looks like a JSON array followed by a stray top-level key " +
153
+ `(pattern: [{...}], "key": ...). This is not valid JSON. ` +
154
+ `Hint: put extra data as array members (e.g. {"id":"D-001","status":"deferred",...}) ` +
155
+ `or split into a separate phase.`,
156
+ );
157
+ }
137
158
  return undefined;
138
159
  }
139
160
 
@@ -142,7 +163,7 @@ export function coerceArray(value: unknown): unknown[] | null {
142
163
  if (Array.isArray(value)) return value;
143
164
  if (value && typeof value === "object") {
144
165
  // {items: [...]} or {results: [...]} convenience
145
- for (const key of ["items", "results", "list", "data"]) {
166
+ for (const key of ["items", "results", "list", "data", "findings"]) {
146
167
  const v = (value as Record<string, unknown>)[key];
147
168
  if (Array.isArray(v)) return v;
148
169
  }
@@ -71,7 +71,10 @@ function agentRole(phase: Phase, ps: PhaseState | undefined, theme: Theme): stri
71
71
 
72
72
  function costStr(usage: UsageStats | undefined, theme: Theme): string {
73
73
  if (!usage?.cost) return "";
74
- return theme.fg("muted", `$${usage.cost.toFixed(3)}`);
74
+ const c = usage.cost;
75
+ return c >= 0.01
76
+ ? theme.fg("muted", `$${c.toFixed(2)}`)
77
+ : theme.fg("muted", `$${c.toFixed(4)}`);
75
78
  }
76
79
 
77
80
  function aggregateCost(state: RunState): number {
@@ -174,6 +177,7 @@ function phaseDetail(phase: Phase, ps: PhaseState | undefined, theme: Theme): st
174
177
  const color = d === "reject" ? "error" : d === "edit" ? "warning" : "success";
175
178
  let a = theme.fg("warning", "⚠") + " " + theme.fg(color as Parameters<typeof theme.fg>[0], theme.bold(d.toUpperCase()));
176
179
  if (ps.approval.auto) a += theme.fg("dim", " auto");
180
+ if (cost) a += ` ${cost}`;
177
181
  if (time) a += ` ${time}`;
178
182
  if (ps.warnings?.length) a += theme.fg("warning", ` ⚠${ps.warnings.length}`);
179
183
  return a;
@@ -228,8 +232,8 @@ function headerLine(state: RunState, theme: Theme): string {
228
232
  if (state.status === "blocked") line += theme.fg("error", " · blocked");
229
233
  const cost = aggregateCost(state);
230
234
  const budget = state.def.budget;
231
- if (budget?.maxUSD !== undefined) line += theme.fg("muted", ` · $${cost.toFixed(3)}/$${budget.maxUSD}`);
232
- else if (cost) line += theme.fg("muted", ` · $${cost.toFixed(3)}`);
235
+ if (budget?.maxUSD !== undefined) line += theme.fg("muted", ` · $${cost >= 0.01 ? cost.toFixed(2) : cost.toFixed(4)}/$${budget.maxUSD}`);
236
+ else if (cost) line += theme.fg("muted", ` · $${cost >= 0.01 ? cost.toFixed(2) : cost.toFixed(4)}`);
233
237
  const el = runElapsed(state);
234
238
  if (el) line += theme.fg("dim", ` · ${elapsed(el)}`);
235
239
  return line;
@@ -203,14 +203,10 @@ function summarizeToolCall(name: string, args: Record<string, unknown>): string
203
203
  }
204
204
  }
205
205
 
206
- async function writePromptToTempFile(agentName: string, prompt: string): Promise<{ dir: string; filePath: string }> {
207
- const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
208
- const safeName = agentName.replace(/[^\w.-]+/g, "_");
209
- const filePath = path.join(tmpDir, `prompt-${safeName}.md`);
206
+ async function writePromptToTempFile(filePath: string, prompt: string): Promise<void> {
210
207
  await withFileMutationQueue(filePath, async () => {
211
208
  await fs.promises.writeFile(filePath, prompt, { encoding: "utf-8", mode: 0o600 });
212
209
  });
213
- return { dir: tmpDir, filePath };
214
210
  }
215
211
 
216
212
  function getPiInvocation(args: string[]): { command: string; args: string[] } {
@@ -284,9 +280,13 @@ export async function runAgentTask(
284
280
 
285
281
  try {
286
282
  if (agent.systemPrompt.trim()) {
287
- const tmp = await writePromptToTempFile(agent.name, agent.systemPrompt);
288
- tmpPromptDir = tmp.dir;
289
- tmpPromptPath = tmp.filePath;
283
+ // Allocate the temp dir + path BEFORE any fallible I/O so that if
284
+ // writeFile throws, tmpPromptDir/tmpPromptPath are already set and
285
+ // the finally block can clean up the directory (F-004).
286
+ tmpPromptDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
287
+ const safeName = agent.name.replace(/[^\w.-]+/g, "_");
288
+ tmpPromptPath = path.join(tmpPromptDir, `prompt-${safeName}.md`);
289
+ await writePromptToTempFile(tmpPromptPath, agent.systemPrompt);
290
290
  args.push("--append-system-prompt", tmpPromptPath);
291
291
  }
292
292
  args.push(`Task: ${task}`);
@@ -319,15 +319,25 @@ export async function runAgentTask(
319
319
  if (buffer.trim()) processLine(buffer);
320
320
  resolve(code ?? 0);
321
321
  });
322
- proc.on("error", () => resolve(1));
322
+ proc.on("error", (err) => {
323
+ if (!result.stderr) result.stderr = err.message;
324
+ if (!result.errorMessage) result.errorMessage = err.message;
325
+ resolve(1);
326
+ });
323
327
 
324
328
  if (opts.signal) {
325
329
  const kill = () => {
326
330
  wasAborted = true;
327
331
  proc.kill("SIGTERM");
328
- setTimeout(() => {
329
- if (!proc.killed) proc.kill("SIGKILL");
330
- }, 5000);
332
+ // Force-kill fallback. proc.kill("SIGKILL") is idempotent if
333
+ // the process already exited, and `proc.killed` is set true
334
+ // synchronously by the SIGTERM above — so the previous
335
+ // `if (!proc.killed)` guard would skip SIGKILL entirely,
336
+ // hanging forever on a child that ignores SIGTERM.
337
+ // .unref() keeps the timer from holding the event loop open
338
+ // after the process is gone.
339
+ const forceKill = setTimeout(() => proc.kill("SIGKILL"), 5000);
340
+ forceKill.unref();
331
341
  };
332
342
  if (opts.signal.aborted) kill();
333
343
  else opts.signal.addEventListener("abort", kill, { once: true });
@@ -349,12 +359,20 @@ export async function runAgentTask(
349
359
  // `output`: upstream providers (e.g. a Cloudflare challenge page) can
350
360
  // surface huge HTML/JSON in errorMessage, and that garbage would
351
361
  // otherwise flow into downstream phase interpolations.
352
- if (isFailed(result) && !result.output) {
353
- result.output = TRANSPORT_ERROR_PLACEHOLDER;
354
- if (!result.errorMessage) {
355
- result.errorMessage = result.stderr || `Subagent exited with code ${result.exitCode} (stopReason: ${result.stopReason ?? "unknown"})`;
362
+ // Sanitization must run whenever the run failed, even if some output
363
+ // was already emitted (e.g. crash mid-stream with a partial result):
364
+ // an unsanitized errorMessage would still leak into PhaseState and
365
+ // downstream interpolation contexts. (F-013)
366
+ if (isFailed(result)) {
367
+ if (!result.output) {
368
+ result.output = TRANSPORT_ERROR_PLACEHOLDER;
369
+ if (!result.errorMessage) {
370
+ result.errorMessage = result.stderr || `Subagent exited with code ${result.exitCode} (stopReason: ${result.stopReason ?? "unknown"})`;
371
+ }
372
+ }
373
+ if (result.errorMessage) {
374
+ result.errorMessage = sanitizeErrorMessage(result.errorMessage);
356
375
  }
357
- result.errorMessage = sanitizeErrorMessage(result.errorMessage);
358
376
  }
359
377
  return result;
360
378
  } finally {
@@ -50,6 +50,9 @@ export class RunHistoryComponent {
50
50
  private cachedLines?: string[];
51
51
 
52
52
  constructor(runs: RunState[], theme: Theme, onDone: (result?: RunHistoryResult) => void) {
53
+ if (!runs.length) {
54
+ throw new Error("RunHistoryComponent requires at least one run");
55
+ }
53
56
  this.runs = runs;
54
57
  this.theme = theme;
55
58
  this.onDone = onDone;
@@ -551,14 +551,20 @@ async function executePhase(
551
551
  baseRunTask(cwd, agents, agentName, preRead + subTask, opts, globalThinking);
552
552
  const subResult = await executeTaskflow(subState, {
553
553
  ...deps,
554
+ // Override deps.cwd with the flow phase's own cwd so that sub-flow
555
+ // phases without an explicit cwd derive their subagents from the
556
+ // flow's cwd (not the caller's cwd).
557
+ cwd: phase.cwd ?? deps.cwd,
554
558
  runTask: subRunTask,
555
559
  _stack: [...stack, state.flowName],
556
560
  persist: undefined,
557
561
  onProgress: () => {
558
562
  if (live) {
559
563
  const ph = Object.values(subState.phases);
564
+ // B-F015: `done` must include both success and failure so the
565
+ // renderer's `done - failed` shows the true success count.
560
566
  live.subProgress = {
561
- done: ph.filter((p) => p.status === "done").length,
567
+ done: ph.filter((p) => p.status === "done" || p.status === "failed").length,
562
568
  total: subDef.phases.length,
563
569
  running: ph.filter((p) => p.status === "running").length,
564
570
  failed: ph.filter((p) => p.status === "failed").length,
@@ -577,8 +583,11 @@ async function executePhase(
577
583
  output: subResult.finalOutput,
578
584
  json: parseJson ? safeParse(subResult.finalOutput) : undefined,
579
585
  usage: subResult.totalUsage,
586
+ // B-F015: include failed in `done` so the renderer's
587
+ // `done - failed` formula gives the success count (matches the
588
+ // map/parallel runner's overlapping-counter convention).
580
589
  subProgress: {
581
- done: sp.filter((p) => p.status === "done").length,
590
+ done: sp.filter((p) => p.status === "done" || p.status === "failed").length,
582
591
  total: subDef.phases.length,
583
592
  running: 0,
584
593
  failed: sp.filter((p) => p.status === "failed").length,
@@ -649,7 +658,10 @@ export function parseGateVerdict(output: string): { verdict: "pass" | "block"; r
649
658
  if (typeof o.continue === "boolean") return { verdict: o.continue ? "pass" : "block", reason: asReason(o.reason) };
650
659
  if (typeof o.pass === "boolean") return { verdict: o.pass ? "pass" : "block", reason: asReason(o.reason) };
651
660
  if (typeof o.verdict === "string") {
652
- const block = /block|fail|stop|reject|halt|\bno\b/i.test(o.verdict);
661
+ // Note: do NOT include standalone "no" — natural-language verdicts like
662
+ // "No issues found" / "no errors" would otherwise be false-positive BLOCK.
663
+ // Fail-open covers any ambiguous text.
664
+ const block = /block|fail|stop|reject|halt/i.test(o.verdict);
653
665
  return { verdict: block ? "block" : "pass", reason: asReason(o.reason) };
654
666
  }
655
667
  }
@@ -666,11 +678,86 @@ function asReason(v: unknown): string | undefined {
666
678
  return typeof v === "string" && v.trim() ? v.trim() : undefined;
667
679
  }
668
680
 
681
+ /**
682
+ * Best-effort invocation of the user-provided `persist` + `onProgress` callbacks.
683
+ *
684
+ * A throw from a host-supplied callback must NEVER replace the runtime's
685
+ * outcome — neither the original crash message in `executeTaskflow`'s catch
686
+ * block, nor the final output of a successful run. Callbacks are observability
687
+ * hooks; the run survives their failure.
688
+ *
689
+ * Used at every "checkpoint" call site (phase start, phase end, terminal state).
690
+ * For high-frequency live updates inside a phase, see `safeProgress` below.
691
+ */
692
+ function safeEmit(deps: RuntimeDeps, state: RunState): void {
693
+ try {
694
+ deps.persist?.(state);
695
+ } catch {
696
+ // user callback — must not break the run
697
+ }
698
+ try {
699
+ deps.onProgress?.(state);
700
+ } catch {
701
+ // user callback — must not break the run
702
+ }
703
+ }
704
+
705
+ /**
706
+ * Like `safeEmit` but for the high-frequency live-update channel only.
707
+ * Skips `persist` (which is intentionally checkpoint-only) and swallows any
708
+ * throw from the user-supplied `onProgress` so a misbehaving TUI sink cannot
709
+ * disrupt an in-flight phase.
710
+ */
711
+ function safeProgress(deps: RuntimeDeps, state: RunState): void {
712
+ try {
713
+ deps.onProgress?.(state);
714
+ } catch {
715
+ // user callback — must not break the run
716
+ }
717
+ }
718
+
669
719
  /**
670
720
  * Execute a full taskflow. Mutates and persists `state` as it progresses.
671
721
  */
722
+ function ensureImplicitGate(def: Taskflow): void {
723
+ // Respect explicit opt-out
724
+ if ((def as any).implicitGate === false) return;
725
+
726
+ const hasGate = def.phases.some(
727
+ (p) => p.type === "gate" || p.type === "approval" || p.id === "_implicit-gate",
728
+ );
729
+ if (hasGate || def.phases.length === 0) return;
730
+
731
+ // The last existing phase is the effective "final" phase — pin it so the
732
+ // injected gate doesn't become the finalOutput.
733
+ const lastPhase = def.phases[def.phases.length - 1];
734
+ if (!lastPhase.final && !def.phases.some((p) => p.final)) {
735
+ lastPhase.final = true;
736
+ }
737
+
738
+ const allIds = def.phases.map((p) => p.id);
739
+ def.phases.push({
740
+ id: "_implicit-gate",
741
+ type: "gate",
742
+ dependsOn: allIds,
743
+ agent: "reviewer",
744
+ task: `Review all phase outputs from this taskflow for accuracy and consistency.
745
+
746
+ For each upstream phase, scan its output for:
747
+ 1. **Factual accuracy**: Any file paths, line numbers, or code snippets that are wrong?
748
+ 2. **Internal contradictions**: Do any phases contradict each other?
749
+ 3. **Completeness**: Is any output truncated, empty, or anomalously short?
750
+ 4. **Hallucination markers**: Wrong file names, impossible line ranges, circular logic, information not in the given context.
751
+
752
+ Output:
753
+ - If ALL outputs look consistent and plausible: output **VERDICT: PASS** with a one-line summary.
754
+ - If ANY issues found: output **VERDICT: BLOCK** listing each issue with the phase ID and specific concern.`,
755
+ });
756
+ }
757
+
672
758
  export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
673
759
  const def: Taskflow = state.def;
760
+ ensureImplicitGate(def);
674
761
  try {
675
762
  return await runTaskflowLayers(state, deps);
676
763
  } catch (e) {
@@ -685,8 +772,7 @@ export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promi
685
772
  }
686
773
  }
687
774
  state.status = "failed";
688
- deps.persist?.(state);
689
- deps.onProgress?.(state);
775
+ safeEmit(deps, state);
690
776
  const totalUsage = aggregateUsage(Object.values(state.phases).map((p) => p.usage ?? emptyUsage()));
691
777
  return { state, finalOutput: `Taskflow '${def.name}' crashed: ${message}`, ok: false, totalUsage };
692
778
  }
@@ -697,8 +783,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
697
783
  const layers = topoLayers(def.phases);
698
784
 
699
785
  state.status = "running";
700
- deps.persist?.(state);
701
- deps.onProgress?.(state);
786
+ safeEmit(deps, state);
702
787
 
703
788
  let aborted = false;
704
789
  let gateBlocked = false;
@@ -756,8 +841,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
756
841
  endedAt: Date.now(),
757
842
  usage: emptyUsage(),
758
843
  };
759
- deps.persist?.(state);
760
- deps.onProgress?.(state);
844
+ safeEmit(deps, state);
761
845
  return;
762
846
  }
763
847
 
@@ -768,9 +852,9 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
768
852
  status: "running",
769
853
  startedAt,
770
854
  };
771
- deps.onProgress?.(state);
855
+ safeProgress(deps, state);
772
856
 
773
- const ps = await executePhase(phase, state, deps, prior, () => deps.onProgress?.(state));
857
+ const ps = await executePhase(phase, state, deps, prior, () => safeProgress(deps, state));
774
858
  // Preserve the phase start time: executePhase returns a fresh PhaseState
775
859
  // that omits startedAt (cached/resumed results carry their own).
776
860
  state.phases[phase.id] = ps.startedAt ? ps : { ...ps, startedAt };
@@ -793,8 +877,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
793
877
  budgetBlocked = true;
794
878
  budgetReason = ob.reason;
795
879
  }
796
- deps.persist?.(state);
797
- deps.onProgress?.(state);
880
+ safeEmit(deps, state);
798
881
  });
799
882
  }
800
883
 
@@ -818,8 +901,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
818
901
  : anyFailed
819
902
  ? "failed"
820
903
  : "completed";
821
- deps.persist?.(state);
822
- deps.onProgress?.(state);
904
+ safeEmit(deps, state);
823
905
 
824
906
  let finalOutput = finalState?.output ?? "(no output)";
825
907
  if (gateBlocked) {
@@ -147,6 +147,12 @@ export const TaskflowSchema = Type.Object(
147
147
  }),
148
148
  ),
149
149
  phases: Type.Array(PhaseSchema, { minItems: 1, description: "Ordered phase definitions (DAG via dependsOn)" }),
150
+ implicitGate: Type.Optional(
151
+ Type.Boolean({
152
+ description: "When true (default), a reviewer gate is auto-injected after all phases if no explicit gate or approval exists",
153
+ default: true,
154
+ }),
155
+ ),
150
156
  },
151
157
  { additionalProperties: false },
152
158
  );
@@ -184,7 +190,11 @@ export function isShorthand(def: unknown): boolean {
184
190
  if (typeof def !== "object" || def === null) return false;
185
191
  const d = def as Record<string, unknown>;
186
192
  if (Array.isArray(d.phases)) return false;
187
- return Array.isArray(d.chain) || Array.isArray(d.tasks) || typeof d.task === "string";
193
+ return (
194
+ (Array.isArray(d.chain) && d.chain.length > 0) ||
195
+ (Array.isArray(d.tasks) && d.tasks.length > 0) ||
196
+ typeof d.task === "string"
197
+ );
188
198
  }
189
199
 
190
200
  function readStep(s: unknown): ShorthandStep {
@@ -355,20 +365,27 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
355
365
  const finals = (flow.phases as Phase[]).filter((p) => p?.final);
356
366
  if (finals.length > 1) errors.push(`Only one phase may be marked 'final' (found ${finals.length})`);
357
367
 
358
- // --- Soft warnings: {steps.X.*} references that aren't declared deps -------
368
+ // --- Hard errors: {steps.X.*} references that aren't declared deps ------
359
369
  // Catches the most common authoring mistake: the task talks about
360
370
  // `{steps.review.output}` but `dependsOn: ["review"]` is missing, so the
361
371
  // phase runs in parallel with `review` and the model sees the literal
362
- // placeholder string. The runtime can't infer the intent.
372
+ // placeholder string. The runtime can't infer the intent — fail fast at
373
+ // validation time so the mistake is caught before the run starts.
374
+ //
375
+ // Phases with `join: "any"` are exempt: by design they only need ONE of
376
+ // their declared deps to complete, and may reference other phases as
377
+ // informational context (not as true dependencies).
363
378
  if (errors.length === 0) {
364
379
  const idToPhase = new Map((flow.phases as Phase[]).map((p) => [p.id, p]));
365
380
  for (const p of flow.phases as Phase[]) {
366
381
  if (!p?.id) continue;
382
+ const isJoinAny = p.join === "any";
383
+ if (isJoinAny) continue;
367
384
  const deps = new Set(dependenciesOf(p));
368
385
  const refs = collectRefs(p);
369
386
  for (const ref of refs.steps) {
370
387
  if (ref === p.id) {
371
- warnings.push(`Phase '${p.id}': references its own output via {steps.${ref}.*}; this is almost always a bug.`);
388
+ errors.push(`Phase '${p.id}': references its own output via {steps.${ref}.*}; this is almost always a bug.`);
372
389
  continue;
373
390
  }
374
391
  if (!idToPhase.has(ref)) {
@@ -378,7 +395,7 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
378
395
  continue;
379
396
  }
380
397
  if (!deps.has(ref)) {
381
- warnings.push(
398
+ errors.push(
382
399
  `Phase '${p.id}': task references {steps.${ref}.*} but '${ref}' is not in dependsOn. ` +
383
400
  `The phase will run in parallel with '${ref}' and see the literal placeholder. ` +
384
401
  `Add "dependsOn": ["${ref}"] (or include '${ref}' transitively).`,
@@ -8,6 +8,7 @@
8
8
 
9
9
  import * as crypto from "node:crypto";
10
10
  import * as fs from "node:fs";
11
+ import * as os from "node:os";
11
12
  import * as path from "node:path";
12
13
  import { getAgentDir } from "@earendil-works/pi-coding-agent";
13
14
  import type { Taskflow } from "./schema.ts";
@@ -69,12 +70,20 @@ function userFlowsDir(): string {
69
70
  return path.join(getAgentDir(), "taskflows");
70
71
  }
71
72
 
72
- function findProjectFlowsDir(cwd: string, create = false): string | null {
73
+ function findProjectFlowsDirInternal(cwd: string, create = false): string | null {
73
74
  // Prefer an existing .pi dir up the tree; else use cwd/.pi when creating.
75
+ // **Never treat `~/.pi/` as a project flow dir** — the home directory is
76
+ // the user-scope boundary, and the user's `~/.pi/` is the agent dir, not a
77
+ // project. We skip the home entry entirely during the walk-up, so even a
78
+ // deeply nested cwd under home will return null (create=false) when no
79
+ // project `.pi` exists on the path.
80
+ const home = os.homedir();
74
81
  let dir = cwd;
75
82
  while (true) {
76
- const candidate = path.join(dir, ".pi");
77
- if (fs.existsSync(candidate)) return path.join(candidate, "taskflows");
83
+ if (dir !== home) {
84
+ const candidate = path.join(dir, ".pi");
85
+ if (fs.existsSync(candidate)) return path.join(candidate, "taskflows");
86
+ }
78
87
  const parent = path.dirname(dir);
79
88
  if (parent === dir) break;
80
89
  dir = parent;
@@ -94,6 +103,11 @@ function readFlowFile(filePath: string, scope: "user" | "project"): SavedFlow |
94
103
  }
95
104
 
96
105
  /** List all saved flows (project overrides user on name collision). */
106
+ /** Internal-but-exported for tests: walk-up `.pi` finder with home-dir stop. */
107
+ export function findProjectFlowsDir(cwd: string, create = false): string | null {
108
+ return findProjectFlowsDirInternal(cwd, create);
109
+ }
110
+
97
111
  export function listFlows(cwd: string): SavedFlow[] {
98
112
  const map = new Map<string, SavedFlow>();
99
113
  const dirs: Array<{ dir: string; scope: "user" | "project" }> = [{ dir: userFlowsDir(), scope: "user" }];
@@ -149,8 +163,11 @@ export function newRunId(flowName: string): string {
149
163
  export function saveRun(state: RunState): void {
150
164
  const dir = runsDir(state.cwd);
151
165
  fs.mkdirSync(dir, { recursive: true });
152
- state.updatedAt = Date.now();
153
- writeFileAtomic(path.join(dir, `${state.runId}.json`), JSON.stringify(state, null, 2));
166
+ // Clone before stamping updatedAt so the caller's RunState reference is not
167
+ // mutated as a hidden side effect (v0.0.6 audit, F-009). Shallow clone is
168
+ // sufficient: saveRun only serializes; it does not mutate nested objects.
169
+ const toSave = { ...state, updatedAt: Date.now() };
170
+ writeFileAtomic(path.join(dir, `${state.runId}.json`), JSON.stringify(toSave, null, 2));
154
171
  }
155
172
 
156
173
  export function loadRun(cwd: string, runId: string): RunState | null {
@@ -219,7 +236,14 @@ export function listRuns(cwd: string, limit = 20): RunState[] {
219
236
  /* ignore */
220
237
  }
221
238
  }
222
- return runs.sort((a, b) => b.updatedAt - a.updatedAt).slice(0, limit);
239
+ // Guard against records missing/with non-numeric `updatedAt` — a bare
240
+ // `JSON.parse` may yield an object without it, and `undefined - undefined`
241
+ // is NaN, which makes `Array.prototype.sort` produce implementation-defined
242
+ // order. Drop those before sorting. (v0.0.8 audit, F-010.)
243
+ return runs
244
+ .filter((r) => typeof r.updatedAt === "number" && !Number.isNaN(r.updatedAt))
245
+ .sort((a, b) => b.updatedAt - a.updatedAt)
246
+ .slice(0, limit);
223
247
  }
224
248
 
225
249
  /** Stable hash of a phase's resolved task + inputs, for resume caching. */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-taskflow",
3
- "version": "0.0.7",
3
+ "version": "0.0.8",
4
4
  "description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -172,6 +172,36 @@ Review the audit results below. If any endpoint is missing auth, end with
172
172
  {steps.audit.output}
173
173
  ```
174
174
 
175
+ ### Structured-verify phases (v0.0.8.1)
176
+
177
+ A "verify" phase typically runs `npx tsc --noEmit && npm test && git diff --stat`
178
+ and reports whether everything is green. **Don't** delegate this to a generic
179
+ verifier subagent that summarizes the output in prose — LLMs commonly misread
180
+ shell output (e.g., 234 tests reported as 230, 745 insertions as 599, "1 type
181
+ error" reported as "clean"). Instead, **use a dedicated agent whose task is a
182
+ structured shell pipeline** that echoes structured key/value lines the next
183
+ phase can parse directly. Recommended pattern:
184
+
185
+ ```jsonc
186
+ {
187
+ "id": "verify",
188
+ "type": "agent",
189
+ "agent": "verifier",
190
+ "dependsOn": ["apply-fixes"],
191
+ "task": "Run the verification pipeline and report structured results.\n\nExecute:\n```bash\ncd $REPO && npx tsc --noEmit 2>&1 | tee /tmp/tsc.log\ncd $REPO && npm test 2>&1 | tee /tmp/test.log | tail -10\ncd $REPO && git diff --shortstat HEAD | tee /tmp/diff.log\n```\n\nReport EXACTLY in this format (one key=value pair per line, no prose):\ntypecheck=PASS|FAIL\ntests_total=N\ntests_pass=N\ntests_fail=N\ninsertions=N\ndeletions=N\nfiles_changed=N\n\nIf any field is missing, you failed the task — re-run the command and re-read the output.",
192
+ "tools": ["read", "edit", "write", "bash"]
193
+ }
194
+ ```
195
+
196
+ The key insight: **LLMs are bad at summarizing shell output, good at copying
197
+ structured data**. Asking for `key=value` pairs with explicit fields and "if
198
+ missing, you failed" forces the agent to read each field carefully. Downstream
199
+ phases that consume `{steps.verify.output}` can then `safeParse`-it into a
200
+ JSON object and assert against expected values.
201
+
202
+ For audits where the upstream is LLM-generated prose (not shell output), use a
203
+ plain `gate` phase with `VERDICT:` instead.
204
+
175
205
  ### Interpolation
176
206
 
177
207
  - `{args.X}` — invocation argument
@@ -188,12 +218,11 @@ Review the audit results below. If any endpoint is missing auth, end with
188
218
  3. Reference upstream results explicitly with `{steps.ID...}` and set `dependsOn`.
189
219
  4. Mark the result-bearing phase with `"final": true` (else the last phase wins).
190
220
 
191
- ## Common mistakes (the runtime will warn you, but don't trip them)
221
+ ## Common mistakes (the runtime will reject these at validation time)
192
222
 
193
- The runtime validates your flow at startup and at each phase's interpolation.
194
- Two patterns account for ~all the broken runs in the wild — avoid them. If you
195
- want warnings like these to become hard failures, set `"strictInterpolation": true`
196
- on the flow.
223
+ The runtime validates your flow at startup. As of v0.0.8.1, the two most
224
+ common authoring mistakes below are **hard validation errors** (the flow
225
+ refuses to start). Fix the flow before running it.
197
226
 
198
227
  ### 1. Referencing `{steps.X}` without `dependsOn: ["X"]`
199
228
 
@@ -209,10 +238,9 @@ on the flow.
209
238
  }
210
239
  ```
211
240
 
212
- The runtime logs a warning at run start (`Phase 'fix-issues': task references
213
- {steps.code-review-1.*} but 'code-review-1' is not in dependsOn`) and the phase
214
- itself gets a `warnings` field with a non-fatal `unresolved placeholders` line.
215
- The TUI shows a `⚠N` badge. **Always declare the chain:**
241
+ Validation now rejects this with: `Phase 'fix-issues': task references
242
+ {steps.code-review-1.*} but 'code-review-1' is not in dependsOn. ...`
243
+ **Always declare the chain:**
216
244
 
217
245
  ```jsonc
218
246
  // ✅ RIGHT
@@ -233,7 +261,11 @@ The TUI shows a `⚠N` badge. **Always declare the chain:**
233
261
 
234
262
  Tip: write the `task` first (it tells you what each phase needs), then scan for
235
263
  `{steps.*}` references and add the matching `dependsOn`. If a phase truly does
236
- not depend on anything in its task, you can ignore the warning.
264
+ not depend on anything in its task, you can omit the reference.
265
+
266
+ Exception: phases with `join: "any"` are exempt from this check, since they
267
+ deliberately wait for only one of their declared deps to complete and may
268
+ reference others as informational context.
237
269
 
238
270
  ### 2. Assuming the runtime knows "this is a chain"
239
271