pi-taskflow 0.0.15 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -66,7 +66,13 @@ function resolvePath(path: string, ctx: InterpolationContext): unknown {
66
66
  const step = stepId ? ctx.steps[stepId] : undefined;
67
67
  if (!step) return undefined;
68
68
  const field = parts[2];
69
- if (field === "output") return step.output;
69
+ if (field === "output") {
70
+ // Guard: {steps.X.output.trailing} — trailing segments after output are
71
+ // likely author errors (output is a string, not an object). Return
72
+ // undefined so the placeholder is left intact with a missing warning.
73
+ if (parts.length > 3) return undefined;
74
+ return step.output;
75
+ }
70
76
  if (field === "json") {
71
77
  const json = step.json ?? safeParse(step.output);
72
78
  return dig(json, parts.slice(3));
@@ -82,6 +88,12 @@ function resolvePath(path: string, ctx: InterpolationContext): unknown {
82
88
  return undefined;
83
89
  }
84
90
 
91
+ /**
92
+ * Traverse an object by a sequence of property keys. Returns `undefined`
93
+ * when any segment is missing or the current value is not an object —
94
+ * never throws, so extra path segments like {steps.X.json.a.b} where the
95
+ * data is shallower resolve gracefully to undefined (M-8).
96
+ */
85
97
  function dig(obj: unknown, parts: string[]): unknown {
86
98
  let cur: unknown = obj;
87
99
  for (const part of parts) {
@@ -219,10 +231,25 @@ function tokenize(input: string): Tok[] {
219
231
  }
220
232
  // quoted string
221
233
  if (c === '"' || c === "'") {
222
- const end = input.indexOf(c, i + 1);
223
- if (end === -1) throw new Error("unterminated string");
224
- toks.push({ t: "str", v: input.slice(i + 1, end) });
225
- i = end + 1;
234
+ // Handle escaped quotes. Note: ALL \X sequences are interpreted as literal X
235
+ // (including \n → n, \t → t). This differs from JSON/JS escaping but is
236
+ // correct for condition strings which only need quote escaping.
237
+ let j = i + 1;
238
+ let val = "";
239
+ while (j < n) {
240
+ if (input[j] === "\\" && j + 1 < n) {
241
+ val += input[j + 1];
242
+ j += 2;
243
+ } else if (input[j] === c) {
244
+ break;
245
+ } else {
246
+ val += input[j];
247
+ j++;
248
+ }
249
+ }
250
+ if (j >= n) throw new Error("unterminated string");
251
+ toks.push({ t: "str", v: val });
252
+ i = j + 1;
226
253
  continue;
227
254
  }
228
255
  // multi/single char operators
@@ -104,7 +104,7 @@ export function summarizeRun(state: RunState): string {
104
104
  const done = phases.filter((p) => p.status === "done").length;
105
105
  const failed = phases.filter((p) => p.status === "failed").length;
106
106
  const running = phases.filter((p) => p.status === "running").length;
107
- const total = state.def.phases.length;
107
+ const total = Object.keys(state.phases).length;
108
108
  const bits = [`${done}/${total} done`];
109
109
  if (running) bits.push(`${running} running`);
110
110
  if (failed) bits.push(`${failed} failed`);
@@ -254,7 +254,7 @@ function headerLine(state: RunState, theme: Theme): string {
254
254
  const done = phases.filter((p) => p.status === "done").length;
255
255
  const failed = phases.filter((p) => p.status === "failed").length;
256
256
  const running = phases.filter((p) => p.status === "running").length;
257
- const total = state.def.phases.length;
257
+ const total = Object.keys(state.phases).length;
258
258
 
259
259
  const head =
260
260
  state.status === "completed"
@@ -25,6 +25,8 @@ export interface RunResult {
25
25
  errorMessage?: string;
26
26
  /** Total subagent attempts incl. retries (set by the runtime's retry wrapper). */
27
27
  attempts?: number;
28
+ /** Set when the subagent was killed by the idle watchdog (not a user abort). */
29
+ idleTimeout?: boolean;
28
30
  }
29
31
 
30
32
  export interface LiveUpdate {
@@ -74,6 +76,8 @@ const TRANSIENT_ERROR_RE =
74
76
  /rate[_\s-]?limit|too\s+many\s+requests|overloaded|\b429\b|\b503\b|\b502\b|\b504\b|service\s+unavailable|temporarily\s+unavailable|timeout|timed?\s+out|econnreset|etimedout|socket\s+hang\s*up/i;
75
77
  export function isTransientError(r: RunResult): boolean {
76
78
  if (r.stopReason === "aborted") return false;
79
+ // Idle timeout is a deterministic stall — retrying won't help.
80
+ if (r.stopReason === "error" && r.idleTimeout) return false;
77
81
  const hay = `${r.errorMessage ?? ""} ${r.stderr ?? ""} ${r.output ?? ""}`;
78
82
  return TRANSIENT_ERROR_RE.test(hay);
79
83
  }
@@ -153,6 +157,8 @@ export interface EventAccumulator {
153
157
  stopReason?: string;
154
158
  errorMessage?: string;
155
159
  lastActivity: string;
160
+ /** Set when message cap was hit — output gets a truncation notice. */
161
+ truncated?: boolean;
156
162
  }
157
163
 
158
164
  export function newAccumulator(model?: string): EventAccumulator {
@@ -175,7 +181,15 @@ export function foldEventLine(acc: EventAccumulator, line: string): LiveUpdate |
175
181
  }
176
182
  if (event.type !== "message_end" || !event.message) return null;
177
183
  const msg = event.message as Message;
178
- acc.messages.push(msg);
184
+ // Cap prevents OOM from misconfigured loops. 500 messages is generous for
185
+ // normal subagent tasks (50 turns × 10 messages each). Messages beyond the
186
+ // cap are still parsed for usage/model/stopReason extraction.
187
+ const MAX_MESSAGES = 500;
188
+ if (acc.messages.length < MAX_MESSAGES) {
189
+ acc.messages.push(msg);
190
+ } else {
191
+ acc.truncated = true;
192
+ }
179
193
  if (msg.role !== "assistant") return null;
180
194
  acc.usage.turns++;
181
195
  const u = (msg as any).usage;
@@ -323,6 +337,7 @@ export async function runAgentTask(
323
337
 
324
338
  let wasAborted = false;
325
339
  let idleTimedOut = false;
340
+ let killedBySignal: string | undefined;
326
341
  const exitCode = await new Promise<number>((resolve) => {
327
342
  const invocation = getPiInvocation(args);
328
343
  const proc = spawn(invocation.command, invocation.args, {
@@ -371,12 +386,19 @@ export async function runAgentTask(
371
386
  buffer = lines.pop() || "";
372
387
  for (const line of lines) processLine(line);
373
388
  });
389
+ // Cap prevents OOM from verbose tool output (e.g., npm install). 64 KB is
390
+ // generous for error diagnosis while preventing memory exhaustion.
391
+ const STDERR_MAX_LEN = 64 * 1024;
374
392
  proc.stderr.on("data", (data) => {
375
393
  result.stderr += data.toString();
394
+ if (result.stderr.length >= STDERR_MAX_LEN) {
395
+ result.stderr = result.stderr.slice(0, STDERR_MAX_LEN) + "\n[...stderr truncated at 64KB]";
396
+ }
376
397
  });
377
- proc.on("close", (code) => {
398
+ proc.on("close", (code, signal) => {
378
399
  clearTimers();
379
400
  if (buffer.trim()) processLine(buffer);
401
+ if (code === null && signal) killedBySignal = signal;
380
402
  resolve(code ?? 0);
381
403
  });
382
404
  proc.on("error", (err) => {
@@ -411,11 +433,25 @@ export async function runAgentTask(
411
433
  result.stopReason = acc.stopReason;
412
434
  result.errorMessage = acc.errorMessage;
413
435
  result.output = getFinalOutput(acc.messages);
436
+ // M-6: surface truncation when the message cap was hit so downstream
437
+ // phases and the user know output was cut short.
438
+ if (acc.truncated) {
439
+ result.output += "\n\n[...output truncated after 500 messages]";
440
+ }
441
+ // Signal kill detection: process exited 0 but was killed by a signal
442
+ // (e.g. OOM killer, cgroup limit). Treat as failure so the runtime's
443
+ // retry/fail handling doesn't silently accept a truncated result.
444
+ if (exitCode === 0 && killedBySignal && !idleTimedOut && !wasAborted) {
445
+ result.exitCode = 1;
446
+ result.stopReason = "error";
447
+ result.errorMessage = `Subagent killed by signal ${killedBySignal}`;
448
+ }
414
449
  if (idleTimedOut) {
415
450
  // Distinct, actionable signal: the child was killed for being idle, not
416
451
  // a user abort. stopReason "error" keeps it in the failed bucket so the
417
452
  // runtime's retry/fail handling treats it as a real failure.
418
453
  result.stopReason = "error";
454
+ result.idleTimeout = true;
419
455
  result.errorMessage = `Subagent stalled: no output for ${Math.round((opts.idleTimeoutMs ?? DEFAULT_IDLE_TIMEOUT_MS) / 1000)}s (idle timeout) — killed`;
420
456
  } else if (wasAborted) {
421
457
  result.stopReason = "aborted";
@@ -29,7 +29,7 @@ function statusBadge(status: RunState["status"], theme: Theme): string {
29
29
  }
30
30
 
31
31
  function timeAgo(ts: number): string {
32
- const s = Math.floor((Date.now() - ts) / 1000);
32
+ const s = Math.max(0, Math.floor((Date.now() - ts) / 1000));
33
33
  if (s < 60) return `${s}s ago`;
34
34
  if (s < 3600) return `${Math.floor(s / 60)}m ago`;
35
35
  if (s < 86400) return `${Math.floor(s / 3600)}h ago`;
@@ -37,7 +37,7 @@ function timeAgo(ts: number): string {
37
37
  }
38
38
 
39
39
  function isResumable(r: RunState): boolean {
40
- return r.status === "paused" || r.status === "failed" || r.status === "blocked";
40
+ return r.status === "paused" || r.status === "failed";
41
41
  }
42
42
 
43
43
  export class RunHistoryComponent {
@@ -70,8 +70,17 @@ function buildInterpolationContext(
70
70
  ): InterpolationContext {
71
71
  const steps: Record<string, { output: string; json?: unknown }> = {};
72
72
  for (const [id, ps] of Object.entries(state.phases)) {
73
- if (ps.status === "done" && ps.output !== undefined) {
74
- steps[id] = { output: ps.output, json: ps.json };
73
+ // Include both done AND failed phases so downstream phases can see
74
+ // error info. Skipped phases (upstream failure cascade) are excluded.
75
+ if (ps.status === "done" || ps.status === "failed") {
76
+ if (ps.output !== undefined) {
77
+ steps[id] = { output: ps.output, json: ps.json };
78
+ } else if (ps.status === "failed") {
79
+ // M-3: Failed phases without output get a placeholder so
80
+ // downstream references like {steps.X.output} resolve to a
81
+ // sensible value instead of leaving the raw placeholder intact.
82
+ steps[id] = { output: "[previous phase failed]", json: undefined };
83
+ }
75
84
  }
76
85
  }
77
86
  return { args: state.args, steps, previousOutput, locals };
@@ -80,10 +89,16 @@ function buildInterpolationContext(
80
89
  function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJson: boolean): PhaseState {
81
90
  const failed = isFailed(r);
82
91
  const attempts = attemptsOf(r);
92
+ // For failed phases, embed the error info in the output so downstream
93
+ // phases (and the user) can see what went wrong. The raw r.output is
94
+ // often a useless placeholder like "(upstream error: subagent failed)".
95
+ const output = failed
96
+ ? r.errorMessage || r.stderr || r.output
97
+ : r.output;
83
98
  return {
84
99
  id,
85
100
  status: failed ? "failed" : "done",
86
- output: r.output,
101
+ output,
87
102
  json: parseJson && !failed ? safeParse(r.output) : undefined,
88
103
  usage: r.usage,
89
104
  model: r.model,
@@ -156,8 +171,13 @@ function mergePhaseState(
156
171
  // which model produced the merged output.
157
172
  const model = ran.find((r) => r.model !== undefined)?.model;
158
173
  // Combine outputs as a labelled list; also expose a JSON array of outputs.
174
+ // For failed items, use the error message instead of the useless placeholder.
159
175
  const combinedText = ran
160
- .map((r, i) => `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}\n\n${r.output}`)
176
+ .map((r, i) => {
177
+ const label = `### [${i + 1}/${ran.length}] ${r.agent}${isFailed(r) ? " (failed)" : ""}`;
178
+ const content = isFailed(r) ? (r.errorMessage || r.stderr || r.output) : r.output;
179
+ return `${label}\n\n${content}`;
180
+ })
161
181
  .join("\n\n---\n\n");
162
182
  // Only successful runs feed the parsed JSON array (no error/skip strings).
163
183
  const jsonArray = parseJson ? ran.filter((r) => !isFailed(r)).map((r) => safeParse(r.output) ?? r.output) : undefined;
@@ -373,7 +393,14 @@ async function executePhase(
373
393
  // Backoff: prefer the explicit policy's curve when the phase defines one
374
394
  // (covers transient retries too, and keeps tests fast with backoffMs:0),
375
395
  // otherwise use the transient defaults.
376
- const baseMs = retry ? (retry.backoffMs ?? 0) : DEFAULT_TRANSIENT_BACKOFF_MS;
396
+ const baseMs = retry?.backoffMs != null ? retry.backoffMs : DEFAULT_TRANSIENT_BACKOFF_MS;
397
+ // Factor asymmetry is intentional:
398
+ // - Explicit retry: backoffMs * (factor ?? 1) ^ attempt — user's
399
+ // curve, defaults to flat (factor=1 → constant backoff).
400
+ // - Transient fallback: backoffMs * 2 ^ attempt — exponential.
401
+ // This lets users opt into flat retry with retry: {max:3} without
402
+ // specifying factor, while transient errors get proper exponential
403
+ // backoff.
377
404
  const factor = retry ? (retry.factor ?? 1) : DEFAULT_TRANSIENT_FACTOR;
378
405
  const wait = Math.min(60000, Math.round(baseMs * factor ** attempt));
379
406
  if (wait > 0) await delay(wait, deps.signal);
@@ -742,7 +769,7 @@ async function executePhase(
742
769
 
743
770
  for (let i = 1; i <= maxIters; i++) {
744
771
  if (deps.signal?.aborted) {
745
- stop = "failed";
772
+ stop = "aborted";
746
773
  break;
747
774
  }
748
775
  iterations = i;
@@ -788,14 +815,14 @@ async function executePhase(
788
815
  }
789
816
 
790
817
  const aggUsage = usages.length ? aggregateUsage(usages) : emptyUsage();
791
- if (failedResult) {
818
+ if (failedResult || stop === "failed" || stop === "aborted") {
792
819
  return {
793
820
  id: phase.id,
794
821
  status: "failed",
795
822
  output: lastOutput || undefined,
796
823
  usage: aggUsage,
797
- error: failedResult.errorMessage || failedResult.stderr || `loop '${phase.id}' iteration ${iterations} failed`,
798
- loop: { iterations, stop: "failed" },
824
+ error: failedResult?.errorMessage || failedResult?.stderr || (stop === "aborted" ? "Aborted" : `loop '${phase.id}' iteration ${iterations} failed`),
825
+ loop: { iterations, stop },
799
826
  warnings: loopWarnings.length ? loopWarnings : undefined,
800
827
  inputHash: hashInput(phase.id, "loop", phase.until ?? ""),
801
828
  endedAt: Date.now(),
@@ -868,6 +895,22 @@ async function executePhase(
868
895
  };
869
896
  }
870
897
 
898
+ // Guard: skip the judge if the run is over budget or aborted.
899
+ if (deps.signal?.aborted || overBudget(state).over) {
900
+ return {
901
+ id: phase.id,
902
+ status: "done",
903
+ output: ok[0].output,
904
+ json: parseJson ? safeParse(ok[0].output) : undefined,
905
+ usage: variantUsage,
906
+ model: ok[0].model,
907
+ warnings: ["judge skipped: run aborted or budget exceeded"],
908
+ tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge skipped" },
909
+ inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
910
+ endedAt: Date.now(),
911
+ };
912
+ }
913
+
871
914
  // Build the judge prompt: label every variant output, then the rubric.
872
915
  const labelled = ran
873
916
  .map((r, i) => `### Variant ${i + 1}${isFailed(r) ? " (failed — ineligible)" : ""}\n\n${r.output}`)
@@ -1288,6 +1331,10 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
1288
1331
  if (!budgetReason) budgetReason = "fan-out truncated by budget";
1289
1332
  }
1290
1333
  // Budget ceiling: once exceeded, remaining phases are skipped.
1334
+ // For concurrent same-layer phases, the check runs after each phase
1335
+ // completes, so at most (concurrency - 1) extra phases may run before
1336
+ // the budget is detected as exceeded. This bounded overshoot is
1337
+ // acceptable: budgetBlocked prevents cascading into subsequent layers.
1291
1338
  const ob = overBudget(state);
1292
1339
  if (ob.over && !budgetBlocked) {
1293
1340
  budgetBlocked = true;
@@ -235,7 +235,7 @@ const ArgSpecSchema = Type.Object(
235
235
 
236
236
  export const TaskflowSchema = Type.Object(
237
237
  {
238
- name: Type.String({ description: "Workflow name (becomes /tf:<name> command when saved)" }),
238
+ name: Type.String({ minLength: 1, description: "Workflow name (becomes /tf:<name> command when saved)" }),
239
239
  description: Type.Optional(Type.String()),
240
240
  version: Type.Optional(Type.Number({ default: 1 })),
241
241
  args: Type.Optional(Type.Record(Type.String(), ArgSpecSchema, { description: "Declared invocation arguments" })),
@@ -59,7 +59,7 @@ export interface PhaseState {
59
59
  /** Human-in-the-loop outcome (approval phases only). */
60
60
  approval?: { decision: "approve" | "reject" | "edit"; note?: string; auto?: boolean };
61
61
  /** Loop iteration accounting (loop phases only). */
62
- loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" };
62
+ loop?: { iterations: number; stop: "until" | "converged" | "maxIterations" | "failed" | "aborted" };
63
63
  /** Tournament outcome (tournament phases only). */
64
64
  tournament?: { variants: number; winner: number; mode: "best" | "aggregate"; reason?: string };
65
65
  /** Non-fatal diagnostic warnings accumulated during this phase (e.g.
@@ -121,9 +121,16 @@ const DEFAULT_MAX_KEPT_TERMINAL = 100;
121
121
  /** Remove terminal runs older than this (days). */
122
122
  const DEFAULT_MAX_AGE_DAYS = 30;
123
123
 
124
+ // Re-exported for use in TaskflowSettings defaults (agents.ts).
125
+ export const DEFAULT_KEPT_RUNS = DEFAULT_MAX_KEPT_TERMINAL;
126
+ export const DEFAULT_RUN_AGE_DAYS = DEFAULT_MAX_AGE_DAYS;
127
+
124
128
  /** Last cleanup timestamp — module-level so it persists across calls. */
125
129
  let lastCleanupAt = 0;
126
130
 
131
+ /** Shared buffer for Atomics.wait in acquireLock busy-wait (Finding 6). */
132
+ const LOCK_WAIT_BUF = new Int32Array(new SharedArrayBuffer(4));
133
+
127
134
  // ---------------------------------------------------------------------------
128
135
  // Internal helpers — path construction & sanitisation
129
136
  // ---------------------------------------------------------------------------
@@ -138,7 +145,7 @@ let lastCleanupAt = 0;
138
145
  * bare-dot / leading-dot components after the character substitution so the
139
146
  * write path can never escape runs/ (risk-reviewer v0.0.9 audit, H1).
140
147
  */
141
- function safeFlowDirName(flowName: string): string {
148
+ export function safeFlowDirName(flowName: string): string {
142
149
  let safe = flowName.replace(/[^\w.-]+/g, "_");
143
150
  // Collapse leading dots: blocks ".", "..", and hidden-dir names like ".git".
144
151
  safe = safe.replace(/^\.+/, "_");
@@ -241,7 +248,7 @@ function acquireLock(lockPath: string, timeoutMs: number = LOCK_TIMEOUT_MS): voi
241
248
  throw new Error(`Lock timeout after ${timeoutMs}ms waiting for ${path.basename(lockPath)}`);
242
249
  }
243
250
  // Busy-wait with Atomics.wait (CPU-efficient sleep).
244
- Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, LOCK_POLL_MS);
251
+ Atomics.wait(LOCK_WAIT_BUF, 0, 0, LOCK_POLL_MS);
245
252
  }
246
253
  }
247
254
  }
@@ -388,11 +395,18 @@ function rebuildIndex(runsRoot: string): RunIndexEntry[] {
388
395
  } catch { /* skip corrupt */ }
389
396
  }
390
397
 
391
- const result = Array.from(entries.values());
392
- // Persist the rebuilt index under the index lock so it does not race a
393
- // concurrent updateIndexEntry / cleanup write (M1).
394
- withLock(indexLockPath(runsRoot), () => writeIndex(runsRoot, result));
395
- return result;
398
+ const scanned = Array.from(entries.values());
399
+ // Persist the rebuilt index under the index lock. Re-read the current
400
+ // index inside the lock and merge by runId so concurrent writes are not
401
+ // clobbered scanned entries win on conflict (Finding 5).
402
+ withLock(indexLockPath(runsRoot), () => {
403
+ const currentIndex = readIndex(runsRoot);
404
+ const merged = new Map<string, RunIndexEntry>();
405
+ for (const e of currentIndex) merged.set(e.runId, e);
406
+ for (const e of scanned) merged.set(e.runId, e); // scanned wins
407
+ writeIndex(runsRoot, Array.from(merged.values()));
408
+ });
409
+ return scanned;
396
410
  }
397
411
 
398
412
  // ---------------------------------------------------------------------------
@@ -418,7 +432,8 @@ function cleanupTerminalRuns(
418
432
  maxKeep: number = DEFAULT_MAX_KEPT_TERMINAL,
419
433
  maxAgeDays: number = DEFAULT_MAX_AGE_DAYS,
420
434
  ): void {
421
- const now = Date.now();
435
+ const cleanupStarted = Date.now();
436
+ const now = cleanupStarted;
422
437
  if (now - lastCleanupAt < CLEANUP_INTERVAL_MS) return;
423
438
  lastCleanupAt = now;
424
439
 
@@ -460,9 +475,17 @@ function cleanupTerminalRuns(
460
475
 
461
476
  if (toRemove.length === 0) return;
462
477
 
478
+ console.warn(
479
+ `[taskflow] Cleaning up ${toRemove.length} old run(s) ` +
480
+ `(max ${maxKeep} runs, ${maxAgeDays} day age limit). ` +
481
+ `Configure 'taskflow.maxKeptRuns' / 'taskflow.maxRunAgeDays' in settings.json (0 = keep all).`,
482
+ );
483
+
463
484
  // Delete run files + lock files (outside the index lock).
464
485
  for (const e of toRemove) {
465
486
  const filePath = path.join(runsRoot, e.relPath);
487
+ // Race guard: skip files modified after cleanup started (Finding 2).
488
+ try { if (fs.statSync(filePath).mtimeMs > cleanupStarted) continue; } catch { continue; }
466
489
  try { fs.unlinkSync(filePath); } catch { /* already gone */ }
467
490
  // Also remove any orphaned lock file.
468
491
  try { fs.unlinkSync(filePath + ".lock"); } catch { /* ignore */ }
@@ -548,22 +571,40 @@ export function getFlow(cwd: string, name: string): SavedFlow | null {
548
571
  return listFlows(cwd).find((f) => f.name === name) ?? null;
549
572
  }
550
573
 
574
+ let _piCreationHinted = false;
575
+
551
576
  export function saveFlow(
552
577
  cwd: string,
553
578
  def: Taskflow,
554
579
  scope: "user" | "project" = "project",
555
580
  ): { filePath: string } {
556
581
  const dir = scope === "user" ? userFlowsDir() : (findProjectFlowsDir(cwd, true) ?? path.join(cwd, ".pi", "taskflows"));
582
+ if (!def.name || def.name.trim().length === 0) throw new Error("Flow name must not be empty");
557
583
  fs.mkdirSync(dir, { recursive: true });
558
- const safe = def.name.replace(/[^\w.-]+/g, "_");
584
+ const safe = safeFlowDirName(def.name);
559
585
  const filePath = path.join(dir, `${safe}.json`);
560
- writeFileAtomic(filePath, `${JSON.stringify(def, null, 2)}\n`);
586
+ const fileLockPath = filePath + ".lock";
587
+ withLock(fileLockPath, () => { writeFileAtomic(filePath, `${JSON.stringify(def, null, 2)}\n`); });
588
+
589
+ // One-shot: let the user know about .pi/ directory on first save (Finding 8).
590
+ if (!_piCreationHinted) {
591
+ _piCreationHinted = true;
592
+ const piExisted = fs.existsSync(path.join(dir, "..", ".."));
593
+ console.warn(
594
+ `[taskflow] ${piExisted ? "Using" : "Created"} .pi/taskflows/ for project-scoped flow storage. ` +
595
+ `Add .pi/ to .gitignore if desired.`,
596
+ );
597
+ }
598
+
561
599
  return { filePath };
562
600
  }
563
601
 
602
+
564
603
  // --- Run state ---
565
604
 
566
605
  function runsDir(cwd: string): string {
606
+ // Safe non-null assertion: create=true guarantees a non-null return because
607
+ // findProjectFlowsDirInternal falls back to path.join(cwd, ".pi", "taskflows").
567
608
  const projDir = findProjectFlowsDir(cwd, true)!;
568
609
  return path.join(projDir, "runs");
569
610
  }
@@ -590,7 +631,10 @@ export function newRunId(flowName: string): string {
590
631
  * F-009: shallow-clones state before stamping updatedAt to avoid mutating the
591
632
  * caller's reference.
592
633
  */
593
- export function saveRun(state: RunState): void {
634
+ export function saveRun(state: RunState, cleanup?: { maxKeep?: number; maxAgeDays?: number }): void {
635
+ // Reject unsafe runIds before any filesystem access (Finding 1).
636
+ if (!validateRunId(state.runId)) return;
637
+
594
638
  const root = runsDir(state.cwd);
595
639
  const flowDir = flowRunDir(root, state.flowName);
596
640
  fs.mkdirSync(flowDir, { recursive: true });
@@ -608,7 +652,11 @@ export function saveRun(state: RunState): void {
608
652
  });
609
653
 
610
654
  // Opportunistic cleanup — throttled to once per CLEANUP_INTERVAL_MS.
611
- cleanupTerminalRuns(root);
655
+ const maxKeep = cleanup?.maxKeep ?? DEFAULT_MAX_KEPT_TERMINAL;
656
+ const maxAgeDays = cleanup?.maxAgeDays ?? DEFAULT_MAX_AGE_DAYS;
657
+ if (maxKeep > 0 || maxAgeDays > 0) {
658
+ cleanupTerminalRuns(root, maxKeep, maxAgeDays);
659
+ }
612
660
  }
613
661
 
614
662
  /**
@@ -253,6 +253,7 @@ function detectBudgetOverflow(flow: VerifiableFlow): VerificationIssue[] {
253
253
  }
254
254
  }
255
255
 
256
+ const ESTIMATED_COST_PER_PHASE = 0.001; // $0.001 minimum per subagent call
256
257
  if (budget.maxTokens !== undefined && budget.maxTokens > 0 && minTokens > budget.maxTokens) {
257
258
  issues.push({
258
259
  message:
@@ -263,6 +264,16 @@ function detectBudgetOverflow(flow: VerifiableFlow): VerificationIssue[] {
263
264
  category: "budget-overflow",
264
265
  });
265
266
  }
267
+ if (budget.maxUSD !== undefined && budget.maxUSD > 0 && minTokens * ESTIMATED_COST_PER_PHASE > budget.maxUSD) {
268
+ issues.push({
269
+ message:
270
+ `Budget cap ($${budget.maxUSD}) is below the estimated minimum of ~$${(minTokens * ESTIMATED_COST_PER_PHASE).toFixed(3)} ` +
271
+ `for ${flow.phases.length} phase(s). The flow will likely be truncated before completion. ` +
272
+ `Increase maxUSD or reduce the number of phases.`,
273
+ severity: "warning",
274
+ category: "budget-overflow",
275
+ });
276
+ }
266
277
 
267
278
  return issues;
268
279
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-taskflow",
3
- "version": "0.0.15",
3
+ "version": "0.0.17",
4
4
  "description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -31,12 +31,14 @@
31
31
  "skills",
32
32
  "examples",
33
33
  "README.md",
34
+ "README.zh-CN.md",
35
+ "CHANGELOG.md",
34
36
  "DESIGN.md",
35
37
  "LICENSE"
36
38
  ],
37
39
  "scripts": {
38
40
  "typecheck": "tsc --noEmit",
39
- "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts",
41
+ "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts",
40
42
  "test:e2e": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e.mts",
41
43
  "test:dogfood-cache": "node --experimental-strip-types test/dogfood-cache.mts"
42
44
  },
@@ -46,7 +48,8 @@
46
48
  ],
47
49
  "skills": [
48
50
  "./skills"
49
- ]
51
+ ],
52
+ "image": "https://raw.githubusercontent.com/heggria/pi-taskflow/main/assets/social-preview.png"
50
53
  },
51
54
  "peerDependencies": {
52
55
  "@earendil-works/pi-agent-core": "*",
@@ -310,7 +310,7 @@ Quick reference:
310
310
 
311
311
  - **Flow:** `name`, `description`, `concurrency` (default 8), `budget` (`maxUSD`/`maxTokens`), `agentScope` (user|project|both), `args`, `strictInterpolation`.
312
312
  - **Phase:** `model`, `thinking`, `tools` (whitelist), `cwd`, `output:"json"`, `concurrency` (map/parallel fan-out), `when`, `join` (all|any), `retry`, `use`/`with` (flow), `final`.
313
- - **Precedence (model/thinking/tools):** phase value → `settings.subagents.agentOverrides[agent]` agent frontmatter → global/default.
313
+ - **Precedence (model/thinking/tools):** phase value → agent frontmatter (resolved via `modelRoles`) → global/default.
314
314
  - **Concurrency:** same-layer phases use `flow.concurrency`; a `map`/`parallel` phase uses `phase.concurrency ?? flow.concurrency ?? 8`.
315
315
 
316
316
  ## Actions
@@ -11,7 +11,7 @@ Configuration lives in **five layers**, from most local to most global:
11
11
  | Phase | a phase object in the DSL | per-step model/thinking/tools/cwd/output/concurrency |
12
12
  | Flow | the top-level DSL object | name, args, default concurrency, agent scope |
13
13
  | Agent | `~/.pi/agent/agents/*.md`, `.pi/agents/*.md` frontmatter | per-agent default model/thinking/tools + system prompt |
14
- | Settings | `~/.pi/agent/settings.json` | `subagents.agentOverrides`, global thinking |
14
+ | Settings | `~/.pi/agent/settings.json` | `modelRoles`, global thinking |
15
15
  | Environment | shell env | `PI_TASKFLOW_PI_BIN` |
16
16
 
17
17
  ---
@@ -156,9 +156,9 @@ For any phase, the effective value is resolved in this **precedence order**
156
156
 
157
157
  | Setting | Precedence (high → low) |
158
158
  |---------|-------------------------|
159
- | **model** | `phase.model` → `settings.agentOverrides[agent].model` agent frontmatter `model` → pi default |
160
- | **thinking** | `phase.thinking` → `settings.agentOverrides[agent].thinking` → agent frontmatter `thinking` → `settings` global thinking → pi default |
161
- | **tools** | `phase.tools` → `settings.agentOverrides[agent].tools` → agent frontmatter `tools` → all tools |
159
+ | **model** | `phase.model` → agent frontmatter `model` (resolved via `modelRoles`) → pi default |
160
+ | **thinking** | `phase.thinking` → agent frontmatter `thinking` → `settings` global thinking → pi default |
161
+ | **tools** | `phase.tools` → agent frontmatter `tools` → all tools |
162
162
 
163
163
  Notes:
164
164
  - `tools` is a **whitelist** passed as `--tools a,b,c`. Omit it to allow all.
@@ -192,19 +192,18 @@ Taskflow shares the subagent settings file at `~/.pi/agent/settings.json`:
192
192
 
193
193
  ```jsonc
194
194
  {
195
+ "modelRoles": {
196
+ "fast": "openrouter/deepseek/deepseek-v4-flash",
197
+ "strong": "openrouter/xiaomi/mimo-v2.5-pro"
198
+ },
195
199
  "subagents": {
196
- "globalThinking": "medium", // fallback thinking for all subagents
197
- "agentOverrides": {
198
- "analyst": { "model": "claude-sonnet-4-5", "thinking": "high" },
199
- "scout": { "tools": ["read", "bash", "grep"] }
200
- }
200
+ "globalThinking": "medium" // fallback thinking for all subagents
201
201
  },
202
202
  "defaultThinkingLevel": "low" // used if subagents.globalThinking is absent
203
203
  }
204
204
  ```
205
205
 
206
- - `subagents.agentOverrides` — per-agent overrides applied at discovery; they beat
207
- agent frontmatter but lose to a phase-level value (see §5).
206
+ - `modelRoles` — maps `{{role}}` references in agent frontmatter to actual model identifiers.
208
207
  - `subagents.globalThinking` (or top-level `defaultThinkingLevel`) — global
209
208
  thinking fallback.
210
209