pi-taskflow 0.0.16 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -522,19 +522,9 @@ Your choices are written to `~/.pi/agent/settings.json`:
522
522
  }
523
523
  ```
524
524
 
525
- Edit the values manually any time, or just re-run `/tf init`. You can also override individual agents via `subagents.agentOverrides` in the same file:
525
+ Edit the values manually any time, or just re-run `/tf init`.
526
526
 
527
- ```json
528
- {
529
- "modelRoles": { ... },
530
- "subagents": {
531
- "agentOverrides": {
532
- "executor": { "model": "anthropic/claude-sonnet-4-20250514" },
533
- "reviewer": { "thinking": "xhigh" }
534
- }
535
- }
536
- }
537
- ```
527
+ To customize a specific agent's model or thinking without changing `modelRoles`, create an agent file at `~/.pi/agent/agents/<name>.md` with the desired overrides in the YAML frontmatter.
538
528
 
539
529
  ### Tool path (`action="init"`)
540
530
 
package/README.zh-CN.md CHANGED
@@ -524,19 +524,9 @@ Taskflow 自带 **18 个内置代理**——每个代理是一个 `.md` 文件
524
524
  }
525
525
  ```
526
526
 
527
- 随时手动编辑这些值,或重新运行 `/tf init`。你也可以通过同级文件中的 `subagents.agentOverrides` 覆盖单个代理:
527
+ 随时手动编辑这些值,或重新运行 `/tf init`。
528
528
 
529
- ```json
530
- {
531
- "modelRoles": { ... },
532
- "subagents": {
533
- "agentOverrides": {
534
- "executor": { "model": "anthropic/claude-sonnet-4-20250514" },
535
- "reviewer": { "thinking": "xhigh" }
536
- }
537
- }
538
- }
539
- ```
529
+ 若需自定义特定代理的模型或 thinking 而不修改 `modelRoles`,可在 `~/.pi/agent/agents/<name>.md` 创建代理文件,在 YAML frontmatter 中覆盖。
540
530
 
541
531
  ### 工具路径(`action="init"`)
542
532
 
@@ -21,7 +21,7 @@ export interface TaskflowSettings {
21
21
  maxRunAgeDays: number;
22
22
  }
23
23
 
24
- import { DEFAULT_KEPT_RUNS, DEFAULT_RUN_AGE_DAYS } from "./store.ts";
24
+ import { DEFAULT_KEPT_RUNS, DEFAULT_RUN_AGE_DAYS, writeFileAtomic } from "./store.ts";
25
25
 
26
26
  export const DEFAULT_TASKFLOW_SETTINGS: TaskflowSettings = {
27
27
  builtInAgents: true,
@@ -63,12 +63,6 @@ export function shouldSyncBuiltinAgentsToProject(settings: TaskflowSettings = DE
63
63
  return settings.builtInAgents && settings.syncBuiltinAgentsToProject;
64
64
  }
65
65
 
66
- export interface AgentOverride {
67
- model?: string;
68
- thinking?: string;
69
- tools?: string[];
70
- }
71
-
72
66
  export interface AgentConfig {
73
67
  name: string;
74
68
  description: string;
@@ -120,16 +114,18 @@ function loadAgentsFromDir(dir: string, source: "user" | "project" | "built-in")
120
114
  if (!frontmatter.name || !frontmatter.description) continue;
121
115
 
122
116
  // frontmatter is YAML-parsed: tools may be a comma-separated string ("a, b")
123
- // OR a YAML sequence ([a, b]). Handle both forms.
117
+ // OR a YAML sequence ([a, b]). Handle both forms; reject other types to
118
+ // prevent garbage output from malformed YAML (e.g. boolean, number).
124
119
  const rawTools = frontmatter.tools;
125
- const tools: string[] | undefined = Array.isArray(rawTools)
126
- ? rawTools.map((t) => String(t).trim()).filter(Boolean)
127
- : rawTools !== undefined && rawTools !== null
128
- ? String(rawTools)
129
- .split(",")
130
- .map((t) => t.trim())
131
- .filter(Boolean)
132
- : undefined;
120
+ let tools: string[] | undefined;
121
+ if (Array.isArray(rawTools)) {
122
+ tools = rawTools.map((t) => String(t).trim()).filter(Boolean);
123
+ } else if (typeof rawTools === "string") {
124
+ tools = rawTools.split(",").map((t) => t.trim()).filter(Boolean);
125
+ } else if (rawTools !== undefined && rawTools !== null) {
126
+ console.warn(`[taskflow] Agent '${String(frontmatter.name)}': 'tools' must be a string or array, got ${typeof rawTools}. Ignoring.`);
127
+ tools = undefined;
128
+ }
133
129
 
134
130
  agents.push({
135
131
  name: String(frontmatter.name),
@@ -173,7 +169,6 @@ function findNearestProjectAgentsDir(cwd: string): string | null {
173
169
  export function discoverAgents(
174
170
  cwd: string,
175
171
  scope: AgentScope,
176
- overrides?: Record<string, AgentOverride>,
177
172
  modelRoles?: Record<string, string>,
178
173
  taskflowSettings: TaskflowSettings = DEFAULT_TASKFLOW_SETTINGS,
179
174
  ): AgentDiscoveryResult {
@@ -202,23 +197,6 @@ export function discoverAgents(
202
197
  for (const a of projectAgents) agentMap.set(a.name, a);
203
198
  }
204
199
 
205
- if (overrides) {
206
- for (const [name, override] of Object.entries(overrides)) {
207
- const agent = agentMap.get(name);
208
- if (agent) {
209
- // Clone before mutating: agentMap owns the original AgentConfig
210
- // (loaded from disk in loadAgentsFromDir). Mutating it in place
211
- // would cause cross-contamination for any caller that retains a
212
- // reference and invokes discoverAgents again with different overrides.
213
- const mutated: AgentConfig = { ...agent };
214
- if (override.model !== undefined) mutated.model = override.model;
215
- if (override.thinking !== undefined) mutated.thinking = override.thinking;
216
- if (override.tools !== undefined) mutated.tools = override.tools;
217
- agentMap.set(name, mutated);
218
- }
219
- }
220
- }
221
-
222
200
  // Resolve {{role}} model references (e.g. {{fast}} → openrouter/deepseek/v4-flash)
223
201
  // Clone before mutating, consistent with the overrides block above.
224
202
  if (modelRoles) {
@@ -236,7 +214,6 @@ export function discoverAgents(
236
214
  }
237
215
 
238
216
  export interface SubagentSettings {
239
- agentOverrides?: Record<string, AgentOverride>;
240
217
  globalThinking?: string;
241
218
  modelRoles?: Record<string, string>;
242
219
  taskflow: TaskflowSettings;
@@ -261,7 +238,6 @@ export function readSubagentSettings(): SubagentSettings {
261
238
  if (!fs.existsSync(settingsPath)) return { taskflow: { ...DEFAULT_TASKFLOW_SETTINGS } };
262
239
  const raw = JSON.parse(fs.readFileSync(settingsPath, "utf-8"));
263
240
  return {
264
- agentOverrides: raw.subagents?.agentOverrides,
265
241
  globalThinking: raw.subagents?.globalThinking ?? raw.defaultThinkingLevel,
266
242
  modelRoles: raw.modelRoles,
267
243
  taskflow: normalizeTaskflowSettings(raw.taskflow),
@@ -311,7 +287,7 @@ export function syncBuiltinAgentsToProject(cwd: string): void {
311
287
 
312
288
  try {
313
289
  const content = fs.readFileSync(src, "utf-8");
314
- fs.writeFileSync(dst, content, "utf-8");
290
+ writeFileAtomic(dst, content);
315
291
  } catch {
316
292
  // Best-effort: a locked file must not block the sync.
317
293
  }
@@ -47,9 +47,13 @@ function resolveOne(entry: string, cwd: string): string {
47
47
  cwd,
48
48
  encoding: "utf-8",
49
49
  stdio: ["ignore", "pipe", "ignore"],
50
+ timeout: 30_000,
50
51
  }).trim();
51
52
  return `git:${ref}=${sha}`;
52
- } catch {
53
+ } catch (e: unknown) {
54
+ if ((e as NodeJS.ErrnoException).code === "ETIMEDOUT") {
55
+ return `git:${ref}=<timeout>`;
56
+ }
53
57
  return `git:${ref}=<no-git>`;
54
58
  }
55
59
  }
@@ -42,6 +42,7 @@ import {
42
42
  DEFAULT_RUN_AGE_DAYS,
43
43
  } from "./store.ts";
44
44
  import { CacheStore } from "./cache.ts";
45
+ import { safeParse } from "./interpolate.ts";
45
46
 
46
47
  interface TaskflowDetails {
47
48
  state?: RunState;
@@ -195,7 +196,7 @@ async function runFlow(
195
196
  cleanupConfig.maxKeep = settings.taskflow.maxKeptRuns;
196
197
  cleanupConfig.maxAgeDays = settings.taskflow.maxRunAgeDays;
197
198
  const scope: AgentScope = def.agentScope ?? "user";
198
- const { agents } = discoverAgents(ctx.cwd, scope, settings.agentOverrides, settings.modelRoles, settings.taskflow);
199
+ const { agents } = discoverAgents(ctx.cwd, scope, settings.modelRoles, settings.taskflow);
199
200
 
200
201
  // Hint: if any agent still has unresolved {{role}} references, suggest configuring modelRoles
201
202
  const unresolvedRoles = agents
@@ -416,7 +417,7 @@ export default function (pi: ExtensionAPI) {
416
417
  if (action === "agents") {
417
418
  const scope = params.scope ?? "both";
418
419
  const settings2 = readSubagentSettings();
419
- const { agents } = discoverAgents(ctx.cwd, scope as AgentScope, undefined, settings2.modelRoles, settings2.taskflow);
420
+ const { agents } = discoverAgents(ctx.cwd, scope as AgentScope, settings2.modelRoles, settings2.taskflow);
420
421
  const text = agents.length
421
422
  ? agents
422
423
  .map(
@@ -441,13 +442,18 @@ export default function (pi: ExtensionAPI) {
441
442
  const { verifyTaskflow } = await import("./verify.ts");
442
443
  // Load definition: inline define takes priority, then saved name
443
444
  let def: Taskflow | undefined;
444
- if (params.define) {
445
- const d = params.define as Record<string, unknown>;
445
+ let resolvedDefine: unknown = params.define;
446
+ if (typeof resolvedDefine === "string") {
447
+ const parsed = safeParse(resolvedDefine);
448
+ if (parsed && typeof parsed === "object") resolvedDefine = parsed;
449
+ }
450
+ if (resolvedDefine) {
451
+ const d = resolvedDefine as Record<string, unknown>;
446
452
  if (typeof d === "object" && d !== null && Array.isArray(d.phases)) {
447
453
  def = d as unknown as Taskflow;
448
- } else if (isShorthand(params.define)) {
449
- const r = validateTaskflow(params.define);
450
- if (r.ok) def = params.define as unknown as Taskflow;
454
+ } else if (isShorthand(resolvedDefine)) {
455
+ const r = validateTaskflow(resolvedDefine);
456
+ if (r.ok) def = resolvedDefine as unknown as Taskflow;
451
457
  }
452
458
  } else if (params.name) {
453
459
  const saved = getFlow(ctx.cwd, params.name);
@@ -505,9 +511,25 @@ export default function (pi: ExtensionAPI) {
505
511
  // resolve the definition: inline `define` / shorthand (single|parallel|chain), else saved `name`.
506
512
  let def: Taskflow | undefined;
507
513
 
514
+ // Auto-parse string `define` — LLMs sometimes pass a JSON string
515
+ // instead of a parsed object. safeParse handles markdown fences too.
516
+ let resolvedDefine: unknown = params.define;
517
+ if (typeof resolvedDefine === "string") {
518
+ const parsed = safeParse(resolvedDefine);
519
+ if (parsed && typeof parsed === "object") {
520
+ resolvedDefine = parsed;
521
+ } else {
522
+ return errorResult(
523
+ action,
524
+ `'define' was passed as a string, not a JSON object. Pass it as a proper object, e.g.:\n` +
525
+ `define: {"name":"my-flow","phases":[{"id":"step1","task":"do something"}]}`,
526
+ );
527
+ }
528
+ }
529
+
508
530
  // A shorthand spec can come from `define` (no phases) or top-level params.
509
531
  const shorthandSpec: unknown =
510
- params.define ??
532
+ resolvedDefine ??
511
533
  (params.chain
512
534
  ? { chain: params.chain, name: params.name }
513
535
  : params.tasks
@@ -530,11 +552,25 @@ export default function (pi: ExtensionAPI) {
530
552
  def = candidate as Taskflow;
531
553
  } else if (params.name) {
532
554
  const saved = getFlow(ctx.cwd, params.name);
533
- if (!saved) return errorResult(action, `Saved flow not found: ${params.name}`);
555
+ if (!saved) {
556
+ const available = listFlows(ctx.cwd);
557
+ const hint = available.length
558
+ ? ` Available flows: ${available.map((f) => f.name).join(", ")}.`
559
+ : " No saved flows found. Use action=save to create one, or pass 'define' for an inline flow.";
560
+ return errorResult(action, `Saved flow '${params.name}' not found.${hint}`);
561
+ }
534
562
  def = saved.def;
535
563
  }
536
564
  if (!def)
537
- return errorResult(action, "Provide 'define' (DSL), shorthand 'task'/'tasks'/'chain', or 'name' (saved).");
565
+ return errorResult(
566
+ action,
567
+ `No taskflow definition provided. Use one of:\n` +
568
+ `- define: {"name":"...","phases":[...]} (inline DSL object)\n` +
569
+ `- task: "..." (shorthand single agent)\n` +
570
+ `- tasks: [{"task":"..."},...] (shorthand parallel)\n` +
571
+ `- chain: [{"task":"..."},...] (shorthand sequential)\n` +
572
+ `- name: "saved-flow-name" (run a previously saved flow)`,
573
+ );
538
574
 
539
575
  // save
540
576
  if (action === "save") {
@@ -562,7 +598,17 @@ export default function (pi: ExtensionAPI) {
562
598
  }
563
599
 
564
600
  // run
565
- const args = resolveArgs(def, params.args);
601
+ // Auto-parse string args LLMs sometimes pass a JSON string.
602
+ let resolvedArgs: Record<string, unknown> | undefined;
603
+ if (typeof params.args === "string") {
604
+ const parsed = safeParse(params.args);
605
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
606
+ resolvedArgs = parsed as Record<string, unknown>;
607
+ }
608
+ } else if (params.args && typeof params.args === "object") {
609
+ resolvedArgs = params.args as Record<string, unknown>;
610
+ }
611
+ const args = resolveArgs(def, resolvedArgs);
566
612
  const v = validateTaskflow(def, { args, cwd: ctx.cwd });
567
613
  if (!v.ok) return errorResult(action, `Invalid taskflow:\n- ${v.errors.join("\n- ")}`);
568
614
  for (const w of v.warnings) {
@@ -579,7 +625,14 @@ export default function (pi: ExtensionAPI) {
579
625
 
580
626
  renderCall(args, theme) {
581
627
  const action = args.action ?? "run";
582
- let label = args.name || (args.define as { name?: string } | undefined)?.name;
628
+ let label = args.name;
629
+ if (!label) {
630
+ let define = args.define;
631
+ if (typeof define === "string") {
632
+ try { define = JSON.parse(define); } catch { /* not JSON */ }
633
+ }
634
+ label = (define as { name?: string } | undefined)?.name;
635
+ }
583
636
  let suffix = "";
584
637
  const phases = (args.define as Taskflow | undefined)?.phases;
585
638
  if (phases) suffix = ` (${phases.length} phases)`;
@@ -613,7 +666,7 @@ export default function (pi: ExtensionAPI) {
613
666
  pi.registerCommand("tf", {
614
667
  description: "Taskflow: list | run <name> | show <name> | runs | init",
615
668
  getArgumentCompletions: (prefix) => {
616
- const subs = ["list", "run", "show", "runs", "resume", "init"];
669
+ const subs = ["list", "run", "show", "runs", "resume", "init", "save", "verify"];
617
670
  const items = subs.map((s) => ({ value: s, label: s }));
618
671
  const filtered = items.filter((i) => i.value.startsWith(prefix));
619
672
  return filtered.length > 0 ? filtered : null;
@@ -797,13 +850,13 @@ function parseArgsString(input: string, def: Taskflow): Record<string, unknown>
797
850
  }
798
851
  // key=value pairs
799
852
  const out: Record<string, unknown> = {};
800
- const pairs = trimmed.match(/(\w+)=("[^"]*"|\S+)/g);
853
+ const pairs = trimmed.match(/(\w+)=("(?:[^"\\]|\\.)*"|\S+)/g);
801
854
  if (pairs) {
802
855
  for (const p of pairs) {
803
856
  const idx = p.indexOf("=");
804
857
  const k = p.slice(0, idx);
805
858
  let v: string = p.slice(idx + 1);
806
- if (v.startsWith('"') && v.endsWith('"')) v = v.slice(1, -1);
859
+ if (v.startsWith('"') && v.endsWith('"')) v = v.slice(1, -1).replace(/\\"/g, '"');
807
860
  out[k] = v;
808
861
  }
809
862
  return out;
@@ -66,7 +66,13 @@ function resolvePath(path: string, ctx: InterpolationContext): unknown {
66
66
  const step = stepId ? ctx.steps[stepId] : undefined;
67
67
  if (!step) return undefined;
68
68
  const field = parts[2];
69
- if (field === "output") return step.output;
69
+ if (field === "output") {
70
+ // Guard: {steps.X.output.trailing} — trailing segments after output are
71
+ // likely author errors (output is a string, not an object). Return
72
+ // undefined so the placeholder is left intact with a missing warning.
73
+ if (parts.length > 3) return undefined;
74
+ return step.output;
75
+ }
70
76
  if (field === "json") {
71
77
  const json = step.json ?? safeParse(step.output);
72
78
  return dig(json, parts.slice(3));
@@ -82,6 +88,12 @@ function resolvePath(path: string, ctx: InterpolationContext): unknown {
82
88
  return undefined;
83
89
  }
84
90
 
91
+ /**
92
+ * Traverse an object by a sequence of property keys. Returns `undefined`
93
+ * when any segment is missing or the current value is not an object —
94
+ * never throws, so extra path segments like {steps.X.json.a.b} where the
95
+ * data is shallower resolve gracefully to undefined (M-8).
96
+ */
85
97
  function dig(obj: unknown, parts: string[]): unknown {
86
98
  let cur: unknown = obj;
87
99
  for (const part of parts) {
@@ -219,10 +231,25 @@ function tokenize(input: string): Tok[] {
219
231
  }
220
232
  // quoted string
221
233
  if (c === '"' || c === "'") {
222
- const end = input.indexOf(c, i + 1);
223
- if (end === -1) throw new Error("unterminated string");
224
- toks.push({ t: "str", v: input.slice(i + 1, end) });
225
- i = end + 1;
234
+ // Handle escaped quotes. Note: ALL \X sequences are interpreted as literal X
235
+ // (including \n → n, \t → t). This differs from JSON/JS escaping but is
236
+ // correct for condition strings which only need quote escaping.
237
+ let j = i + 1;
238
+ let val = "";
239
+ while (j < n) {
240
+ if (input[j] === "\\" && j + 1 < n) {
241
+ val += input[j + 1];
242
+ j += 2;
243
+ } else if (input[j] === c) {
244
+ break;
245
+ } else {
246
+ val += input[j];
247
+ j++;
248
+ }
249
+ }
250
+ if (j >= n) throw new Error("unterminated string");
251
+ toks.push({ t: "str", v: val });
252
+ i = j + 1;
226
253
  continue;
227
254
  }
228
255
  // multi/single char operators
@@ -104,7 +104,7 @@ export function summarizeRun(state: RunState): string {
104
104
  const done = phases.filter((p) => p.status === "done").length;
105
105
  const failed = phases.filter((p) => p.status === "failed").length;
106
106
  const running = phases.filter((p) => p.status === "running").length;
107
- const total = state.def.phases.length;
107
+ const total = Object.keys(state.phases).length;
108
108
  const bits = [`${done}/${total} done`];
109
109
  if (running) bits.push(`${running} running`);
110
110
  if (failed) bits.push(`${failed} failed`);
@@ -254,7 +254,7 @@ function headerLine(state: RunState, theme: Theme): string {
254
254
  const done = phases.filter((p) => p.status === "done").length;
255
255
  const failed = phases.filter((p) => p.status === "failed").length;
256
256
  const running = phases.filter((p) => p.status === "running").length;
257
- const total = state.def.phases.length;
257
+ const total = Object.keys(state.phases).length;
258
258
 
259
259
  const head =
260
260
  state.status === "completed"
@@ -25,6 +25,8 @@ export interface RunResult {
25
25
  errorMessage?: string;
26
26
  /** Total subagent attempts incl. retries (set by the runtime's retry wrapper). */
27
27
  attempts?: number;
28
+ /** Set when the subagent was killed by the idle watchdog (not a user abort). */
29
+ idleTimeout?: boolean;
28
30
  }
29
31
 
30
32
  export interface LiveUpdate {
@@ -74,6 +76,8 @@ const TRANSIENT_ERROR_RE =
74
76
  /rate[_\s-]?limit|too\s+many\s+requests|overloaded|\b429\b|\b503\b|\b502\b|\b504\b|service\s+unavailable|temporarily\s+unavailable|timeout|timed?\s+out|econnreset|etimedout|socket\s+hang\s*up/i;
75
77
  export function isTransientError(r: RunResult): boolean {
76
78
  if (r.stopReason === "aborted") return false;
79
+ // Idle timeout is a deterministic stall — retrying won't help.
80
+ if (r.stopReason === "error" && r.idleTimeout) return false;
77
81
  const hay = `${r.errorMessage ?? ""} ${r.stderr ?? ""} ${r.output ?? ""}`;
78
82
  return TRANSIENT_ERROR_RE.test(hay);
79
83
  }
@@ -153,6 +157,8 @@ export interface EventAccumulator {
153
157
  stopReason?: string;
154
158
  errorMessage?: string;
155
159
  lastActivity: string;
160
+ /** Set when message cap was hit — output gets a truncation notice. */
161
+ truncated?: boolean;
156
162
  }
157
163
 
158
164
  export function newAccumulator(model?: string): EventAccumulator {
@@ -175,7 +181,15 @@ export function foldEventLine(acc: EventAccumulator, line: string): LiveUpdate |
175
181
  }
176
182
  if (event.type !== "message_end" || !event.message) return null;
177
183
  const msg = event.message as Message;
178
- acc.messages.push(msg);
184
+ // Cap prevents OOM from misconfigured loops. 500 messages is generous for
185
+ // normal subagent tasks (50 turns × 10 messages each). Messages beyond the
186
+ // cap are still parsed for usage/model/stopReason extraction.
187
+ const MAX_MESSAGES = 500;
188
+ if (acc.messages.length < MAX_MESSAGES) {
189
+ acc.messages.push(msg);
190
+ } else {
191
+ acc.truncated = true;
192
+ }
179
193
  if (msg.role !== "assistant") return null;
180
194
  acc.usage.turns++;
181
195
  const u = (msg as any).usage;
@@ -323,6 +337,7 @@ export async function runAgentTask(
323
337
 
324
338
  let wasAborted = false;
325
339
  let idleTimedOut = false;
340
+ let killedBySignal: string | undefined;
326
341
  const exitCode = await new Promise<number>((resolve) => {
327
342
  const invocation = getPiInvocation(args);
328
343
  const proc = spawn(invocation.command, invocation.args, {
@@ -371,12 +386,19 @@ export async function runAgentTask(
371
386
  buffer = lines.pop() || "";
372
387
  for (const line of lines) processLine(line);
373
388
  });
389
+ // Cap prevents OOM from verbose tool output (e.g., npm install). 64 KB is
390
+ // generous for error diagnosis while preventing memory exhaustion.
391
+ const STDERR_MAX_LEN = 64 * 1024;
374
392
  proc.stderr.on("data", (data) => {
375
393
  result.stderr += data.toString();
394
+ if (result.stderr.length >= STDERR_MAX_LEN) {
395
+ result.stderr = result.stderr.slice(0, STDERR_MAX_LEN) + "\n[...stderr truncated at 64KB]";
396
+ }
376
397
  });
377
- proc.on("close", (code) => {
398
+ proc.on("close", (code, signal) => {
378
399
  clearTimers();
379
400
  if (buffer.trim()) processLine(buffer);
401
+ if (code === null && signal) killedBySignal = signal;
380
402
  resolve(code ?? 0);
381
403
  });
382
404
  proc.on("error", (err) => {
@@ -411,11 +433,25 @@ export async function runAgentTask(
411
433
  result.stopReason = acc.stopReason;
412
434
  result.errorMessage = acc.errorMessage;
413
435
  result.output = getFinalOutput(acc.messages);
436
+ // M-6: surface truncation when the message cap was hit so downstream
437
+ // phases and the user know output was cut short.
438
+ if (acc.truncated) {
439
+ result.output += "\n\n[...output truncated after 500 messages]";
440
+ }
441
+ // Signal kill detection: process exited 0 but was killed by a signal
442
+ // (e.g. OOM killer, cgroup limit). Treat as failure so the runtime's
443
+ // retry/fail handling doesn't silently accept a truncated result.
444
+ if (exitCode === 0 && killedBySignal && !idleTimedOut && !wasAborted) {
445
+ result.exitCode = 1;
446
+ result.stopReason = "error";
447
+ result.errorMessage = `Subagent killed by signal ${killedBySignal}`;
448
+ }
414
449
  if (idleTimedOut) {
415
450
  // Distinct, actionable signal: the child was killed for being idle, not
416
451
  // a user abort. stopReason "error" keeps it in the failed bucket so the
417
452
  // runtime's retry/fail handling treats it as a real failure.
418
453
  result.stopReason = "error";
454
+ result.idleTimeout = true;
419
455
  result.errorMessage = `Subagent stalled: no output for ${Math.round((opts.idleTimeoutMs ?? DEFAULT_IDLE_TIMEOUT_MS) / 1000)}s (idle timeout) — killed`;
420
456
  } else if (wasAborted) {
421
457
  result.stopReason = "aborted";
@@ -29,7 +29,7 @@ function statusBadge(status: RunState["status"], theme: Theme): string {
29
29
  }
30
30
 
31
31
  function timeAgo(ts: number): string {
32
- const s = Math.floor((Date.now() - ts) / 1000);
32
+ const s = Math.max(0, Math.floor((Date.now() - ts) / 1000));
33
33
  if (s < 60) return `${s}s ago`;
34
34
  if (s < 3600) return `${Math.floor(s / 60)}m ago`;
35
35
  if (s < 86400) return `${Math.floor(s / 3600)}h ago`;
@@ -37,7 +37,7 @@ function timeAgo(ts: number): string {
37
37
  }
38
38
 
39
39
  function isResumable(r: RunState): boolean {
40
- return r.status === "paused" || r.status === "failed" || r.status === "blocked";
40
+ return r.status === "paused" || r.status === "failed";
41
41
  }
42
42
 
43
43
  export class RunHistoryComponent {
@@ -70,8 +70,17 @@ function buildInterpolationContext(
70
70
  ): InterpolationContext {
71
71
  const steps: Record<string, { output: string; json?: unknown }> = {};
72
72
  for (const [id, ps] of Object.entries(state.phases)) {
73
- if (ps.status === "done" && ps.output !== undefined) {
74
- steps[id] = { output: ps.output, json: ps.json };
73
+ // Include both done AND failed phases so downstream phases can see
74
+ // error info. Skipped phases (upstream failure cascade) are excluded.
75
+ if (ps.status === "done" || ps.status === "failed") {
76
+ if (ps.output !== undefined) {
77
+ steps[id] = { output: ps.output, json: ps.json };
78
+ } else if (ps.status === "failed") {
79
+ // M-3: Failed phases without output get a placeholder so
80
+ // downstream references like {steps.X.output} resolve to a
81
+ // sensible value instead of leaving the raw placeholder intact.
82
+ steps[id] = { output: "[previous phase failed]", json: undefined };
83
+ }
75
84
  }
76
85
  }
77
86
  return { args: state.args, steps, previousOutput, locals };
@@ -80,10 +89,16 @@ function buildInterpolationContext(
80
89
  function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJson: boolean): PhaseState {
81
90
  const failed = isFailed(r);
82
91
  const attempts = attemptsOf(r);
92
+ // For failed phases, embed the error info in the output so downstream
93
+ // phases (and the user) can see what went wrong. The raw r.output is
94
+ // often a useless placeholder like "(upstream error: subagent failed)".
95
+ const output = failed
96
+ ? r.errorMessage || r.stderr || r.output
97
+ : r.output;
83
98
  return {
84
99
  id,
85
100
  status: failed ? "failed" : "done",
86
- output: r.output,
101
+ output,
87
102
  json: parseJson && !failed ? safeParse(r.output) : undefined,
88
103
  usage: r.usage,
89
104
  model: r.model,
@@ -156,8 +171,13 @@ function mergePhaseState(
156
171
  // which model produced the merged output.
157
172
  const model = ran.find((r) => r.model !== undefined)?.model;
158
173
  // Combine outputs as a labelled list; also expose a JSON array of outputs.
174
+ // For failed items, use the error message instead of the useless placeholder.
159
175
  const combinedText = ran
160
- .map((r, i) => `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}\n\n${r.output}`)
176
+ .map((r, i) => {
177
+ const label = `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}`;
178
+ const content = isFailed(r) ? (r.errorMessage || r.stderr || r.output) : r.output;
179
+ return `${label}\n\n${content}`;
180
+ })
161
181
  .join("\n\n---\n\n");
162
182
  // Only successful runs feed the parsed JSON array (no error/skip strings).
163
183
  const jsonArray = parseJson ? ran.filter((r) => !isFailed(r)).map((r) => safeParse(r.output) ?? r.output) : undefined;
@@ -373,7 +393,14 @@ async function executePhase(
373
393
  // Backoff: prefer the explicit policy's curve when the phase defines one
374
394
  // (covers transient retries too, and keeps tests fast with backoffMs:0),
375
395
  // otherwise use the transient defaults.
376
- const baseMs = retry ? (retry.backoffMs ?? 0) : DEFAULT_TRANSIENT_BACKOFF_MS;
396
+ const baseMs = retry?.backoffMs != null ? retry.backoffMs : DEFAULT_TRANSIENT_BACKOFF_MS;
397
+ // Factor asymmetry is intentional:
398
+ // - Explicit retry: backoffMs * (factor ?? 1) ^ attempt — user's
399
+ // curve, defaults to flat (factor=1 → constant backoff).
400
+ // - Transient fallback: backoffMs * 2 ^ attempt — exponential.
401
+ // This lets users opt into flat retry with retry: {max:3} without
402
+ // specifying factor, while transient errors get proper exponential
403
+ // backoff.
377
404
  const factor = retry ? (retry.factor ?? 1) : DEFAULT_TRANSIENT_FACTOR;
378
405
  const wait = Math.min(60000, Math.round(baseMs * factor ** attempt));
379
406
  if (wait > 0) await delay(wait, deps.signal);
@@ -742,7 +769,7 @@ async function executePhase(
742
769
 
743
770
  for (let i = 1; i <= maxIters; i++) {
744
771
  if (deps.signal?.aborted) {
745
- stop = "failed";
772
+ stop = "aborted";
746
773
  break;
747
774
  }
748
775
  iterations = i;
@@ -788,14 +815,14 @@ async function executePhase(
788
815
  }
789
816
 
790
817
  const aggUsage = usages.length ? aggregateUsage(usages) : emptyUsage();
791
- if (failedResult) {
818
+ if (failedResult || stop === "failed" || stop === "aborted") {
792
819
  return {
793
820
  id: phase.id,
794
821
  status: "failed",
795
822
  output: lastOutput || undefined,
796
823
  usage: aggUsage,
797
- error: failedResult.errorMessage || failedResult.stderr || `loop '${phase.id}' iteration ${iterations} failed`,
798
- loop: { iterations, stop: "failed" },
824
+ error: failedResult?.errorMessage || failedResult?.stderr || (stop === "aborted" ? "Aborted" : `loop '${phase.id}' iteration ${iterations} failed`),
825
+ loop: { iterations, stop },
799
826
  warnings: loopWarnings.length ? loopWarnings : undefined,
800
827
  inputHash: hashInput(phase.id, "loop", phase.until ?? ""),
801
828
  endedAt: Date.now(),
@@ -868,6 +895,22 @@ async function executePhase(
868
895
  };
869
896
  }
870
897
 
898
+ // Guard: skip the judge if the run is over budget or aborted.
899
+ if (deps.signal?.aborted || overBudget(state).over) {
900
+ return {
901
+ id: phase.id,
902
+ status: "done",
903
+ output: ok[0].output,
904
+ json: parseJson ? safeParse(ok[0].output) : undefined,
905
+ usage: variantUsage,
906
+ model: ok[0].model,
907
+ warnings: ["judge skipped: run aborted or budget exceeded"],
908
+ tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge skipped" },
909
+ inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
910
+ endedAt: Date.now(),
911
+ };
912
+ }
913
+
871
914
  // Build the judge prompt: label every variant output, then the rubric.
872
915
  const labelled = ran
873
916
  .map((r, i) => `### Variant ${i + 1}${isFailed(r) ? " (failed — ineligible)" : ""}\n\n${r.output}`)
@@ -1288,6 +1331,10 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
1288
1331
  if (!budgetReason) budgetReason = "fan-out truncated by budget";
1289
1332
  }
1290
1333
  // Budget ceiling: once exceeded, remaining phases are skipped.
1334
+ // For concurrent same-layer phases, the check runs after each phase
1335
+ // completes, so at most (concurrency - 1) extra phases may run before
1336
+ // the budget is detected as exceeded. This bounded overshoot is
1337
+ // acceptable: budgetBlocked prevents cascading into subsequent layers.
1291
1338
  const ob = overBudget(state);
1292
1339
  if (ob.over && !budgetBlocked) {
1293
1340
  budgetBlocked = true;
@@ -235,7 +235,7 @@ const ArgSpecSchema = Type.Object(
235
235
 
236
236
  export const TaskflowSchema = Type.Object(
237
237
  {
238
- name: Type.String({ description: "Workflow name (becomes /tf:<name> command when saved)" }),
238
+ name: Type.String({ minLength: 1, description: "Workflow name (becomes /tf:<name> command when saved)" }),
239
239
  description: Type.Optional(Type.String()),
240
240
  version: Type.Optional(Type.Number({ default: 1 })),
241
241
  args: Type.Optional(Type.Record(Type.String(), ArgSpecSchema, { description: "Declared invocation arguments" })),
@@ -59,7 +59,7 @@ export interface PhaseState {
59
59
  /** Human-in-the-loop outcome (approval phases only). */
60
60
  approval?: { decision: "approve" | "reject" | "edit"; note?: string; auto?: boolean };
61
61
  /** Loop iteration accounting (loop phases only). */
62
- loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" };
62
+ loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" | "aborted" };
63
63
  /** Tournament outcome (tournament phases only). */
64
64
  tournament?: { variants: number; winner: number; mode: "best" | "aggregate"; reason?: string };
65
65
  /** Non-fatal diagnostic warnings accumulated during this phase (e.g.
@@ -128,6 +128,9 @@ export const DEFAULT_RUN_AGE_DAYS = DEFAULT_MAX_AGE_DAYS;
128
128
  /** Last cleanup timestamp — module-level so it persists across calls. */
129
129
  let lastCleanupAt = 0;
130
130
 
131
+ /** Shared buffer for Atomics.wait in acquireLock busy-wait (Finding 6). */
132
+ const LOCK_WAIT_BUF = new Int32Array(new SharedArrayBuffer(4));
133
+
131
134
  // ---------------------------------------------------------------------------
132
135
  // Internal helpers — path construction & sanitisation
133
136
  // ---------------------------------------------------------------------------
@@ -142,7 +145,7 @@ let lastCleanupAt = 0;
142
145
  * bare-dot / leading-dot components after the character substitution so the
143
146
  * write path can never escape runs/ (risk-reviewer v0.0.9 audit, H1).
144
147
  */
145
- function safeFlowDirName(flowName: string): string {
148
+ export function safeFlowDirName(flowName: string): string {
146
149
  let safe = flowName.replace(/[^\w.-]+/g, "_");
147
150
  // Collapse leading dots: blocks ".", "..", and hidden-dir names like ".git".
148
151
  safe = safe.replace(/^\.+/, "_");
@@ -245,7 +248,7 @@ function acquireLock(lockPath: string, timeoutMs: number = LOCK_TIMEOUT_MS): voi
245
248
  throw new Error(`Lock timeout after ${timeoutMs}ms waiting for ${path.basename(lockPath)}`);
246
249
  }
247
250
  // Busy-wait with Atomics.wait (CPU-efficient sleep).
248
- Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, LOCK_POLL_MS);
251
+ Atomics.wait(LOCK_WAIT_BUF, 0, 0, LOCK_POLL_MS);
249
252
  }
250
253
  }
251
254
  }
@@ -392,11 +395,18 @@ function rebuildIndex(runsRoot: string): RunIndexEntry[] {
392
395
  } catch { /* skip corrupt */ }
393
396
  }
394
397
 
395
- const result = Array.from(entries.values());
396
- // Persist the rebuilt index under the index lock so it does not race a
397
- // concurrent updateIndexEntry / cleanup write (M1).
398
- withLock(indexLockPath(runsRoot), () => writeIndex(runsRoot, result));
399
- return result;
398
+ const scanned = Array.from(entries.values());
399
+ // Persist the rebuilt index under the index lock. Re-read the current
400
+ // index inside the lock and merge by runId so concurrent writes are not
401
+ // clobbered scanned entries win on conflict (Finding 5).
402
+ withLock(indexLockPath(runsRoot), () => {
403
+ const currentIndex = readIndex(runsRoot);
404
+ const merged = new Map<string, RunIndexEntry>();
405
+ for (const e of currentIndex) merged.set(e.runId, e);
406
+ for (const e of scanned) merged.set(e.runId, e); // scanned wins
407
+ writeIndex(runsRoot, Array.from(merged.values()));
408
+ });
409
+ return scanned;
400
410
  }
401
411
 
402
412
  // ---------------------------------------------------------------------------
@@ -422,7 +432,8 @@ function cleanupTerminalRuns(
422
432
  maxKeep: number = DEFAULT_MAX_KEPT_TERMINAL,
423
433
  maxAgeDays: number = DEFAULT_MAX_AGE_DAYS,
424
434
  ): void {
425
- const now = Date.now();
435
+ const cleanupStarted = Date.now();
436
+ const now = cleanupStarted;
426
437
  if (now - lastCleanupAt < CLEANUP_INTERVAL_MS) return;
427
438
  lastCleanupAt = now;
428
439
 
@@ -473,6 +484,8 @@ function cleanupTerminalRuns(
473
484
  // Delete run files + lock files (outside the index lock).
474
485
  for (const e of toRemove) {
475
486
  const filePath = path.join(runsRoot, e.relPath);
487
+ // Race guard: skip files modified after cleanup started (Finding 2).
488
+ try { if (fs.statSync(filePath).mtimeMs > cleanupStarted) continue; } catch { continue; }
476
489
  try { fs.unlinkSync(filePath); } catch { /* already gone */ }
477
490
  // Also remove any orphaned lock file.
478
491
  try { fs.unlinkSync(filePath + ".lock"); } catch { /* ignore */ }
@@ -566,16 +579,19 @@ export function saveFlow(
566
579
  scope: "user" | "project" = "project",
567
580
  ): { filePath: string } {
568
581
  const dir = scope === "user" ? userFlowsDir() : (findProjectFlowsDir(cwd, true) ?? path.join(cwd, ".pi", "taskflows"));
582
+ if (!def.name || def.name.trim().length === 0) throw new Error("Flow name must not be empty");
569
583
  fs.mkdirSync(dir, { recursive: true });
570
- const safe = def.name.replace(/[^\w.-]+/g, "_");
584
+ const safe = safeFlowDirName(def.name);
571
585
  const filePath = path.join(dir, `${safe}.json`);
572
- writeFileAtomic(filePath, `${JSON.stringify(def, null, 2)}\n`);
586
+ const fileLockPath = filePath + ".lock";
587
+ withLock(fileLockPath, () => { writeFileAtomic(filePath, `${JSON.stringify(def, null, 2)}\n`); });
573
588
 
574
- // One-shot: let the user know we're creating a .pi/ directory on first save.
589
+ // One-shot: let the user know about .pi/ directory on first save (Finding 8).
575
590
  if (!_piCreationHinted) {
576
591
  _piCreationHinted = true;
592
+ const piExisted = fs.existsSync(path.join(dir, "..", ".."));
577
593
  console.warn(
578
- `[taskflow] Created .pi/taskflows/ for project-scoped flow storage. ` +
594
+ `[taskflow] ${piExisted ? "Using" : "Created"} .pi/taskflows/ for project-scoped flow storage. ` +
579
595
  `Add .pi/ to .gitignore if desired.`,
580
596
  );
581
597
  }
@@ -587,6 +603,8 @@ export function saveFlow(
587
603
  // --- Run state ---
588
604
 
589
605
  function runsDir(cwd: string): string {
606
+ // Safe non-null assertion: create=true guarantees a non-null return because
607
+ // findProjectFlowsDirInternal falls back to path.join(cwd, ".pi", "taskflows").
590
608
  const projDir = findProjectFlowsDir(cwd, true)!;
591
609
  return path.join(projDir, "runs");
592
610
  }
@@ -614,6 +632,9 @@ export function newRunId(flowName: string): string {
614
632
  * caller's reference.
615
633
  */
616
634
  export function saveRun(state: RunState, cleanup?: { maxKeep?: number; maxAgeDays?: number }): void {
635
+ // Reject unsafe runIds before any filesystem access (Finding 1).
636
+ if (!validateRunId(state.runId)) return;
637
+
617
638
  const root = runsDir(state.cwd);
618
639
  const flowDir = flowRunDir(root, state.flowName);
619
640
  fs.mkdirSync(flowDir, { recursive: true });
@@ -253,6 +253,7 @@ function detectBudgetOverflow(flow: VerifiableFlow): VerificationIssue[] {
253
253
  }
254
254
  }
255
255
 
256
+ const ESTIMATED_COST_PER_PHASE = 0.001; // $0.001 minimum per subagent call
256
257
  if (budget.maxTokens !== undefined && budget.maxTokens > 0 && minTokens > budget.maxTokens) {
257
258
  issues.push({
258
259
  message:
@@ -263,6 +264,16 @@ function detectBudgetOverflow(flow: VerifiableFlow): VerificationIssue[] {
263
264
  category: "budget-overflow",
264
265
  });
265
266
  }
267
+ if (budget.maxUSD !== undefined && budget.maxUSD > 0 && minTokens * ESTIMATED_COST_PER_PHASE > budget.maxUSD) {
268
+ issues.push({
269
+ message:
270
+ `Budget cap ($${budget.maxUSD}) is below the estimated minimum of ~$${(minTokens * ESTIMATED_COST_PER_PHASE).toFixed(3)} ` +
271
+ `for ${flow.phases.length} phase(s). The flow will likely be truncated before completion. ` +
272
+ `Increase maxUSD or reduce the number of phases.`,
273
+ severity: "warning",
274
+ category: "budget-overflow",
275
+ });
276
+ }
266
277
 
267
278
  return issues;
268
279
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-taskflow",
3
- "version": "0.0.16",
3
+ "version": "0.0.17",
4
4
  "description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -310,7 +310,7 @@ Quick reference:
310
310
 
311
311
  - **Flow:** `name`, `description`, `concurrency` (default 8), `budget` (`maxUSD`/`maxTokens`), `agentScope` (user|project|both), `args`, `strictInterpolation`.
312
312
  - **Phase:** `model`, `thinking`, `tools` (whitelist), `cwd`, `output:"json"`, `concurrency` (map/parallel fan-out), `when`, `join` (all|any), `retry`, `use`/`with` (flow), `final`.
313
- - **Precedence (model/thinking/tools):** phase value → `settings.subagents.agentOverrides[agent]` agent frontmatter → global/default.
313
+ - **Precedence (model/thinking/tools):** phase value → agent frontmatter (resolved via `modelRoles`) → global/default.
314
314
  - **Concurrency:** same-layer phases use `flow.concurrency`; a `map`/`parallel` phase uses `phase.concurrency ?? flow.concurrency ?? 8`.
315
315
 
316
316
  ## Actions
@@ -11,7 +11,7 @@ Configuration lives in **five layers**, from most local to most global:
11
11
  | Phase | a phase object in the DSL | per-step model/thinking/tools/cwd/output/concurrency |
12
12
  | Flow | the top-level DSL object | name, args, default concurrency, agent scope |
13
13
  | Agent | `~/.pi/agent/agents/*.md`, `.pi/agents/*.md` frontmatter | per-agent default model/thinking/tools + system prompt |
14
- | Settings | `~/.pi/agent/settings.json` | `subagents.agentOverrides`, global thinking |
14
+ | Settings | `~/.pi/agent/settings.json` | `modelRoles`, global thinking |
15
15
  | Environment | shell env | `PI_TASKFLOW_PI_BIN` |
16
16
 
17
17
  ---
@@ -156,9 +156,9 @@ For any phase, the effective value is resolved in this **precedence order**
156
156
 
157
157
  | Setting | Precedence (high → low) |
158
158
  |---------|-------------------------|
159
- | **model** | `phase.model` → `settings.agentOverrides[agent].model` agent frontmatter `model` → pi default |
160
- | **thinking** | `phase.thinking` → `settings.agentOverrides[agent].thinking` → agent frontmatter `thinking` → `settings` global thinking → pi default |
161
- | **tools** | `phase.tools` → `settings.agentOverrides[agent].tools` → agent frontmatter `tools` → all tools |
159
+ | **model** | `phase.model` → agent frontmatter `model` (resolved via `modelRoles`) → pi default |
160
+ | **thinking** | `phase.thinking` → agent frontmatter `thinking` → `settings` global thinking → pi default |
161
+ | **tools** | `phase.tools` → agent frontmatter `tools` → all tools |
162
162
 
163
163
  Notes:
164
164
  - `tools` is a **whitelist** passed as `--tools a,b,c`. Omit it to allow all.
@@ -192,19 +192,18 @@ Taskflow shares the subagent settings file at `~/.pi/agent/settings.json`:
192
192
 
193
193
  ```jsonc
194
194
  {
195
+ "modelRoles": {
196
+ "fast": "openrouter/deepseek/deepseek-v4-flash",
197
+ "strong": "openrouter/xiaomi/mimo-v2.5-pro"
198
+ },
195
199
  "subagents": {
196
- "globalThinking": "medium", // fallback thinking for all subagents
197
- "agentOverrides": {
198
- "analyst": { "model": "claude-sonnet-4-5", "thinking": "high" },
199
- "scout": { "tools": ["read", "bash", "grep"] }
200
- }
200
+ "globalThinking": "medium" // fallback thinking for all subagents
201
201
  },
202
202
  "defaultThinkingLevel": "low" // used if subagents.globalThinking is absent
203
203
  }
204
204
  ```
205
205
 
206
- - `subagents.agentOverrides` — per-agent overrides applied at discovery; they beat
207
- agent frontmatter but lose to a phase-level value (see §5).
206
+ - `modelRoles` — maps `{{role}}` references in agent frontmatter to actual model identifiers.
208
207
  - `subagents.globalThinking` (or top-level `defaultThinkingLevel`) — global
209
208
  thinking fallback.
210
209
 
package/DESIGN.md DELETED
@@ -1,338 +0,0 @@
1
- # pi-taskflow — 设计与可行性方案
2
-
3
- > 轻量工作流编排框架 for [pi coding agent](https://pi.dev)
4
- > 灵感来自 Claude Code Dynamic Workflows(2026-05-28 发布),适配 pi extension 生态。
5
-
6
- ---
7
-
8
- ## 0. 一句话定位
9
-
10
- **让 LLM(或用户)用声明式 DSL 描述一个多阶段工作流,由确定性 runtime 编排 subagent 执行,中间结果不污染主 context,最终只回收结论;工作流可保存为命令、可复用、可恢复。**
11
-
12
- ---
13
-
14
- ## 1. 市场调研结论
15
-
16
- ### 1.1 命名
17
-
18
- | 名字 | 状态 | 说明 |
19
- |------|------|------|
20
- | `pi-workflow` | ❌ 已占 | VSCode GUI 扩展(聊天面板/侧栏),**非编排框架**,不冲突 |
21
- | **`pi-taskflow`** | ✅ 可用 | 本项目 |
22
-
23
- ### 1.2 竞品分析(pi 生态无同类)
24
-
25
- | 包 | 模式 | 与 pi-taskflow 差异 |
26
- |----|------|------|
27
- | `pi-pipeline` | SPEC→PLAN→TASKS→VERIFY 固定流水线 | 固定流程,非动态可定义 DSL |
28
- | `pi-agent-flow` | fork subagent 并行调用器(scout/audit…) | 一次性并行调用,无 DAG / 无保存 / 无恢复 |
29
- | `pi-crew` | 重型多 agent 编排 + worktree + 异步 | 太重,用户已弃用 |
30
- | `pi-loop` | planner-worker-judge 固定循环 | 固定架构 |
31
- | `pi-subagents`(官方) | single/parallel/chain 即时调用 | 无持久化工作流定义、无 fan-out scale、无恢复 |
32
-
33
- **结论:声明式、可保存、可恢复、支持动态 fan-out 的轻量编排框架在 pi 生态是空白。**
34
-
35
- ### 1.3 Claude Code Dynamic Workflows 借鉴要点
36
-
37
- | 特性 | Claude Code | pi-taskflow 对应 |
38
- |------|-------------|------------------|
39
- | 计划进代码 | Claude 写 JS 脚本 | LLM 产出 **声明式 JSON DSL**(更轻、可审、更安全) |
40
- | 中间结果隔离 | 脚本变量 | runtime 内存 Map,不进 context |
41
- | 规模 | 16 并发 / 1000 agent | 可配置并发上限 + `map` 动态 fan-out |
42
- | 可复用 | 保存为 `/command` | 保存到 `.pi/taskflows/`,注册为 `/tf:<name>` |
43
- | 可恢复 | 同 session 缓存 | run 状态落盘,**跨 session 可恢复**(超越 CC) |
44
- | 质量模式 | 对抗式 review | `gate` / `review` 阶段类型 |
45
-
46
- ---
47
-
48
- ## 2. 深度可行性验证(逐项对照 pi 真实 API)
49
-
50
- > 全部基于阅读 `@earendil-works/pi-coding-agent` 的 extensions.md / packages.md / json.md / skills.md / prompt-templates.md / development.md + 现有 `~/.pi/agent/extensions/subagent/` 源码。
51
-
52
- ### ✅ V1. 生成隔离上下文的 subagent,并拿到结构化输出
53
- - **机制**:`spawn("pi", ["--mode","json","-p","--no-session", ...])`,逐行解析 JSON 事件(`message_end` / `tool_result_end`)。
54
- - **证据**:现有 subagent extension 的 `runSingleAgent()` 已完整实现,含 usage 统计、stopReason、错误处理、abort 信号。
55
- - **结论**:**直接复用**,零风险。
56
-
57
- ### ✅ V2. 并发控制(matching CC 的 scale)
58
- - **机制**:`mapWithConcurrencyLimit(items, concurrency, fn)`。
59
- - **证据**:subagent extension 已有该函数(worker pool 实现)。
60
- - **结论**:复用 + 提高默认上限(CC=16),新增 `map` 阶段做动态 fan-out。
61
-
62
- ### ✅ V3. 中间结果不进 context window
63
- - **机制**:phase 结果存 runtime 内存 `Map<phaseName, PhaseResult>`;只有最终 phase 的 output 写进 tool `content`;完整轨迹放 `details`(默认不送 LLM,仅 TUI 渲染)。
64
- - **证据**:tool result 的 `content` vs `details` 分离(json.md / 现有 subagent)。
65
- - **结论**:可行,这是相对"裸 subagent 串联"的核心优势。
66
-
67
- ### ⚠️ V4. 后台执行(session 保持响应)—— 已知约束 + 取舍
68
- - **pi 现实**:工具调用在一个 agent turn 内是**同步阻塞**的;没有 CC 那种独立 workflow runtime 进程。
69
- - **可用手段**:
70
- - 工具 `onUpdate(partial)` 回调可**实时流式**推进度(subagent parallel 模式已验证)。
71
- - `ctx.ui.setStatus()` / `ctx.ui.setWidget()` footer/widget 进度。
72
- - **取舍**:
73
- - **v1(采用)**:工作流作为**单次长工具调用**执行,期间实时流式进度。session 在该 turn 内"忙",但有完整 phase 进度可视化 —— 与 subagent 现有体验一致,符合"轻量"。
74
- - **v2(路线图)**:detached 子进程 + 文件状态轮询 + `/tf status` 命令实现**真后台**。复杂度高,非首版。
75
- - **结论**:v1 可行,体验对标 subagent;真后台留作演进。诚实记录此约束。
76
-
77
- ### ✅ V5. 保存工作流 → 可复用命令
78
- - **三条可用路径**(均已读文档确认):
79
- 1. `pi.registerCommand()` —— 文档明确支持**运行时注册**(与 registerTool 同源刷新)。
80
- 2. `resources_discover` 事件 —— 动态贡献 prompt/skill 路径(dynamic-resources 示例验证)。
81
- 3. prompt templates(`.pi/prompts/*.md`)—— `/name` 展开为文本。
82
- - **采用方案**:
83
- - 工作流定义存 `.pi/taskflows/<name>.json`(项目级)/ `~/.pi/agent/taskflows/<name>.json`(用户级)。
84
- - `session_start` 时扫描目录,为每个工作流 `registerCommand("tf:<name>")`。
85
- - 始终提供通用 `taskflow` 工具(LLM 调用)+ `/tf run <name> [args]` 命令(用户调用)。
86
- - 保存新工作流后 `registerCommand` 立即生效(同 session 可用),无需 reload。
87
- - **结论**:可行,比 prompt-template 方案更强(命令直接驱动 runtime)。
88
-
89
- ### ✅ V6. 状态持久化 / 恢复
90
- - **机制**:
91
- - `pi.appendEntry(customType, data)` —— 会话内持久化(survive reload)。
92
- - run 状态额外落盘 `.pi/taskflows/runs/<runId>.json` —— **跨 session 恢复**。
93
- - 恢复逻辑:按 `phaseName + inputHash` 缓存结果;重跑跳过已完成 phase(与 CC "cached results" 一致)。
94
- - **证据**:todo.ts 示例(从 session entries 重建状态);appendEntry API(extensions.md)。
95
- - **结论**:可行,且跨 session 恢复**超越 CC**(CC 仅同 session)。
96
-
97
- ### ✅ V7. 进度可视化(TUI)
98
- - **机制**:复用 subagent 的 `renderCall` / `renderResult`;新增 phase 进度条 / DAG 状态。`ctx.ui.custom()` 做全屏 run 视图(todo.ts 模式)。
99
- - **结论**:可行,有现成范式。
100
-
101
- ### ✅ V8. 打包发布
102
- - **机制**:`package.json` + `pi` manifest + `pi-package` keyword;pi 核心走 `peerDependencies`;`extensions/` 约定目录。`pi install npm:pi-taskflow`。
103
- - **证据**:packages.md。
104
- - **结论**:可行。
105
-
106
- ### ✅ V9. Agent 复用
107
- - **机制**:复用 `discoverAgents(cwd, scope, overrides)`,从 `~/.pi/agent/agents/*.md` + `.pi/agents/*.md` 加载;工作流按 agent 名引用;支持 settings.json 的 `subagents.agentOverrides`。
108
- - **结论**:与现有 subagent 体系无缝衔接。
109
-
110
- ### 可行性总评
111
-
112
- | 项 | 结论 |
113
- |----|------|
114
- | 核心编排(spawn/并发/隔离) | ✅ 复用现成代码,零风险 |
115
- | 保存/命令/恢复 | ✅ API 齐全 |
116
- | 真·后台执行 | ⚠️ v1 用流式长调用替代,v2 演进 |
117
- | TUI/打包/agent | ✅ 有范式 |
118
-
119
- **整体:高度可行。唯一妥协是"真后台"留 v2,v1 用流式长工具调用,体验对标现有 subagent。**
120
-
121
- ---
122
-
123
- ## 3. 架构设计
124
-
125
- ### 3.1 包结构
126
-
127
- ```
128
- pi-taskflow/
129
- ├── package.json # pi manifest + peerDeps + pi-package keyword
130
- ├── tsconfig.json
131
- ├── README.md
132
- ├── DESIGN.md # 本文件
133
- ├── extensions/
134
- │ ├── index.ts # 入口:注册 tool + commands + 事件
135
- │ ├── runtime.ts # 编排引擎(DAG 解析 + 调度 + 恢复)
136
- │ ├── runner.ts # subagent spawn(复用/移植 runSingleAgent)
137
- │ ├── agents.ts # agent discovery(移植自 subagent/agents.ts)
138
- │ ├── schema.ts # Taskflow DSL typebox schema + 校验
139
- │ ├── store.ts # 工作流定义/run 状态读写(.pi/taskflows/)
140
- │ ├── interpolate.ts # 模板插值 {steps.x.output} / {args.y}
141
- │ └── render.ts # TUI renderCall/renderResult + 进度视图
142
- ├── skills/
143
- │ └── taskflow/
144
- │ └── SKILL.md # 教 LLM 何时/如何写 taskflow 定义
145
- └── examples/
146
- ├── audit-endpoints.json
147
- ├── deep-research.json
148
- └── migrate-files.json
149
- ```
150
-
151
- ### 3.2 DSL(声明式工作流定义)
152
-
153
- ```jsonc
154
- {
155
- "name": "audit-endpoints",
156
- "description": "审计 src/routes/ 下所有 API 端点的认证检查",
157
- "version": 1,
158
- "args": { // 调用时传入,{args.dir}
159
- "dir": { "default": "src/routes" }
160
- },
161
- "concurrency": 8, // 默认并发上限
162
- "phases": [
163
- {
164
- "id": "discover",
165
- "type": "agent", // 单 agent
166
- "agent": "analyst",
167
- "task": "列出 {args.dir} 下所有 API 端点,输出 JSON 数组 [{file, route}]",
168
- "output": "json" // 解析为结构化数据供 map 用
169
- },
170
- {
171
- "id": "audit",
172
- "type": "map", // ★ 动态 fan-out(scale 核心)
173
- "over": "{steps.discover.output}", // 对数组每项起一个 agent
174
- "as": "item",
175
- "agent": "analyst",
176
- "task": "审计端点 {item.route}(文件 {item.file})的认证检查,列出风险",
177
- "dependsOn": ["discover"]
178
- },
179
- {
180
- "id": "review",
181
- "type": "gate", // ★ 对抗式质量门
182
- "agent": "reviewer",
183
- "task": "复核以下审计结果,剔除误报,标注置信度:\n{steps.audit.output}",
184
- "dependsOn": ["audit"]
185
- },
186
- {
187
- "id": "report",
188
- "type": "agent",
189
- "agent": "planner",
190
- "task": "汇总成最终报告:\n{steps.review.output}",
191
- "dependsOn": ["review"],
192
- "final": true // 该 phase 输出回收到主 session
193
- }
194
- ]
195
- }
196
- ```
197
-
198
- ### 3.3 Phase 类型
199
-
200
- | type | 语义 | 并发 |
201
- |------|------|------|
202
- | `agent` | 单 subagent 调用 | 1 |
203
- | `parallel` | 静态多任务并行(固定 task 列表) | ≤concurrency |
204
- | `map` | 对上游数组**动态 fan-out**,每项一个 agent | ≤concurrency |
205
- | `gate` | 质量门 / 对抗 review(可决定是否继续) | 1+ |
206
- | `reduce` | 把多结果聚合为一(synthesize) | 1 |
207
- | `approval` | **人在环**:暂停等待 approve / reject / edit | 1 |
208
- | `flow` | 把一个**已保存的 taskflow** 当作单个 phase 运行(组合复用) | 子流程并发 |
209
-
210
- ### 3.3b 控制流 / 可靠性字段(任意 phase)
211
-
212
- | 字段 | 语义 |
213
- |------|------|
214
- | `when` | 条件守卫:表达式为假则 skip 该 phase。支持 `{refs}`、`== != < > <= >=`、`&& \|\| !`、括号、字符串/数字字面量。解析失败 fail-open(仍运行) |
215
- | `join` | 依赖 join:`all`(默认,等全部 dep)/ `any`(OR-join,任一 dep 完成即运行) |
216
- | `retry` | `{max, backoffMs, factor}`:失败重试,延迟 = `backoffMs * factor^attempt` |
217
- | `use` / `with` | `flow` 子流程的名字与入参(入参字符串值会插值) |
218
-
219
- 顶层 `budget: {maxUSD, maxTokens}`:累计成本/token 超限即停(剩余 phase skip,运行态 `blocked`)。
220
-
221
- ### 3.4 模板插值
222
-
223
- | 占位符 | 含义 |
224
- |--------|------|
225
- | `{args.X}` | 调用参数 |
226
- | `{steps.ID.output}` | 某 phase 的最终输出(字符串) |
227
- | `{steps.ID.json}` | 某 phase 输出解析为 JSON |
228
- | `{item}` / `{item.field}` | map 阶段当前项 |
229
- | `{previous.output}` | 上一 phase 输出(链式简写) |
230
-
231
- ### 3.5 执行引擎(runtime.ts)
232
-
233
- ```
234
- 1. 校验 DSL(schema.ts)
235
- 2. 拓扑排序 phases(dependsOn 建 DAG,检测环)
236
- 3. 按层调度:
237
- - 同层无依赖 phase 并行
238
- - map 阶段展开为 N 子任务,受 concurrency 限流
239
- 4. 每个 phase:
240
- - 插值 task
241
- - 命中缓存(phaseName+inputHash 在 run 状态里)→ 跳过
242
- - 否则 spawn subagent(runner.ts),流式 onUpdate
243
- - 存结果到内存 Map + 落盘 run 状态
244
- 5. gate 阶段可返回 {continue:false} 中止
245
- 6. final phase(或最后一个)输出 → tool content 回主 session
246
- 7. 全程 details 累积完整轨迹供 TUI
247
- ```
248
-
249
- ### 3.6 对外接口
250
-
251
- **(a) LLM 工具:`taskflow`**
252
- ```jsonc
253
- // 内联定义直接跑(LLM 动态生成工作流 —— 对标 CC "Claude 写脚本")
254
- { "define": { /* 完整 DSL */ }, "args": { "dir": "src/api" } }
255
-
256
- // 跑已保存的工作流
257
- { "run": "audit-endpoints", "args": { "dir": "src/api" } }
258
-
259
- // 保存定义为可复用命令
260
- { "save": "audit-endpoints", "define": { /* DSL */ } }
261
-
262
- // 从中断处恢复
263
- { "resume": "<runId>" }
264
- ```
265
-
266
- **(b) 用户命令**
267
- | 命令 | 作用 |
268
- |------|------|
269
- | `/tf list` | 列出已保存工作流 + 最近 run |
270
- | `/tf run <name> [args]` | 运行 |
271
- | `/tf:<name> [args]` | 每个保存的工作流自动注册的快捷命令 |
272
- | `/tf resume <runId>` | 恢复中断的 run |
273
- | `/tf show <name>` | 查看定义 |
274
- | `/tf runs` | 全屏 run 历史/状态视图(ctx.ui.custom) |
275
-
276
- **(c) 编程接口(供其他 extension)**
277
- ```ts
278
- export async function runTaskflow(def, args, ctx): Promise<TaskflowResult>
279
- ```
280
-
281
- ### 3.7 存储布局
282
-
283
- ```
284
- .pi/taskflows/ # 项目级定义(可入库共享)
285
- audit-endpoints.json
286
- ~/.pi/agent/taskflows/ # 用户级定义
287
- deep-research.json
288
- .pi/taskflows/runs/ # run 状态(恢复用,gitignore)
289
- <runId>.json # {def, args, phases:{id:{status,output,usage,hash}}}
290
- ```
291
-
292
- ---
293
-
294
- ## 4. 与现有 subagent 的关系
295
-
296
- - **不替代,是上层编排**。subagent = 即时调用;taskflow = 可定义/保存/恢复的编排。
297
- - 复用其 spawn / 并发 / usage / TUI 代码(移植进 `runner.ts`,避免硬依赖一个非 npm 的本地扩展)。
298
- - 共享 agent 体系(`~/.pi/agent/agents/*.md` + settings `subagents.agentOverrides`)。
299
-
300
- ---
301
-
302
- ## 5. 路线图
303
-
304
- | 版本 | 范围 | 状态 |
305
- |------|------|------|
306
- | **v0.1** | DSL + schema + runtime(agent/parallel/map/reduce)+ `taskflow` 工具 + `/tf run` + 内存隔离 + 流式进度 | ✅ 已发布 (npm 0.0.1) |
307
- | **v0.2** | 保存/动态命令注册 + 跨 session 恢复 + `gate` 真门控 + run 历史交互 TUI | ✅ 已完成 (npm 0.0.3) |
308
- | **v0.3** | examples + SKILL.md(教 LLM 写定义)+ YAML 支持 + 发布 npm | 🚧 examples/SKILL/npm 已做;YAML 待办 |
309
- | **v0.6** | 控制流 & 可靠性:`when` 条件分支 + `join:any` OR-join + 声明式 `retry` + `approval` 人在环 + `flow` 子流程组合 + `budget` 成本上限 | ✅ 已完成 |
310
- | **v0.7+** | 真·后台执行(detached + 轮询)+ 事件/cron 触发 + 成本**预估** + mermaid DAG 导出 + 内置 `deep-research` 工作流 | ⏳ 待办 |
311
-
312
- ---
313
-
314
- ## 6. 风险与缓解
315
-
316
- | 风险 | 缓解 |
317
- |------|------|
318
- | 真后台执行 v1 缺失 | 流式长调用 + 明确文档;v4 补 |
319
- | map 依赖上游输出结构化 JSON | `output:"json"` + 容错解析 + schema 提示 agent |
320
- | spawn pi 路径解析(bun/node/standalone) | 移植 subagent 的 `getPiInvocation()`(已处理三种运行时) |
321
- | 并发过高耗 token/限流 | concurrency 上限 + 成本预估(v4) |
322
- | 运行时命令注册兼容性 | session_start 扫描注册兜底;保存即注册为增强 |
323
- | DSL 过度复杂 | 保持声明式、5 种 phase 封顶;JS 逃生舱不做(保持"轻量") |
324
-
325
- ---
326
-
327
- ## 7. 下一步
328
-
329
- 1. 创建 `package.json` + `tsconfig.json` + 骨架目录
330
- 2. 实现 `schema.ts`(DSL 校验)+ `interpolate.ts`
331
- 3. 移植 `runner.ts` / `agents.ts`(自 subagent)
332
- 4. 实现 `runtime.ts`(DAG 调度 + map fan-out)
333
- 5. `index.ts` 接线 tool + `/tf` 命令
334
- 6. 本地 `pi -e ./extensions/index.ts` 联调
335
- 7. examples + SKILL.md + README
336
- 8. 发布 `npm publish` → `pi install npm:pi-taskflow`
337
- </content>
338
- </invoke>