pi-taskflow 0.0.19 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,15 @@ import { withFileMutationQueue } from "@earendil-works/pi-coding-agent";
13
13
  import type { AgentConfig } from "./agents.ts";
14
14
  import { emptyUsage, type UsageStats } from "./usage.ts";
15
15
 
16
+ const activeChildren = new Set<number>();
17
+ const killAll = () => {
18
+ for (const pid of activeChildren) {
19
+ try { process.kill(pid, "SIGKILL"); } catch { /* already dead */ }
20
+ }
21
+ };
22
+ process.on("exit", killAll);
23
+ process.on("SIGTERM", () => { killAll(); process.exit(143); });
24
+
16
25
  export interface RunResult {
17
26
  agent: string;
18
27
  task: string;
@@ -60,7 +69,7 @@ export interface RunOptions {
60
69
  * 5 minutes is generous enough for slow reasoning/long tool calls while still
61
70
  * bounding a true hang.
62
71
  */
63
- export const DEFAULT_IDLE_TIMEOUT_MS = 5 * 60_000;
72
+ const DEFAULT_IDLE_TIMEOUT_MS = 5 * 60_000;
64
73
 
65
74
  export function isFailed(r: RunResult): boolean {
66
75
  return r.exitCode !== 0 || r.stopReason === "error" || r.stopReason === "aborted";
@@ -345,6 +354,7 @@ export async function runAgentTask(
345
354
  shell: false,
346
355
  stdio: ["ignore", "pipe", "pipe"],
347
356
  });
357
+ if (proc.pid) activeChildren.add(proc.pid);
348
358
  let buffer = "";
349
359
 
350
360
  // Idle watchdog: a subagent that goes silent on stdout for too long is
@@ -389,13 +399,18 @@ export async function runAgentTask(
389
399
  // Cap prevents OOM from verbose tool output (e.g., npm install). 64 KB is
390
400
  // generous for error diagnosis while preventing memory exhaustion.
391
401
  const STDERR_MAX_LEN = 64 * 1024;
402
+ let stderrCapped = false;
392
403
  proc.stderr.on("data", (data) => {
393
- result.stderr += data.toString();
394
- if (result.stderr.length >= STDERR_MAX_LEN) {
395
- result.stderr = result.stderr.slice(0, STDERR_MAX_LEN) + "\n[...stderr truncated at 64KB]";
404
+ if (!stderrCapped) {
405
+ result.stderr += data.toString();
406
+ if (result.stderr.length >= STDERR_MAX_LEN) {
407
+ result.stderr = result.stderr.slice(0, STDERR_MAX_LEN) + "\n[...stderr truncated at 64KB]";
408
+ stderrCapped = true;
409
+ }
396
410
  }
397
411
  });
398
412
  proc.on("close", (code, signal) => {
413
+ if (proc.pid) activeChildren.delete(proc.pid);
399
414
  clearTimers();
400
415
  if (buffer.trim()) processLine(buffer);
401
416
  if (code === null && signal) killedBySignal = signal;
@@ -47,7 +47,7 @@ export interface RuntimeDeps {
47
47
  onProgress?: (state: RunState) => void;
48
48
  /** Injectable task runner (defaults to spawning a real subagent). Enables testing. */
49
49
  runTask?: typeof runAgentTask;
50
- /** Resolve an `approval` phase. Omit for non-interactive runs (auto-approve). */
50
+ /** Resolve an `approval` phase. Omit for non-interactive runs (auto-reject). */
51
51
  requestApproval?: (req: ApprovalRequest) => Promise<ApprovalDecision>;
52
52
  /** Resolve a saved taskflow by name for `flow` (sub-workflow) phases. */
53
53
  loadFlow?: (name: string) => Taskflow | undefined;
@@ -392,6 +392,7 @@ async function executePhase(
392
392
  runId: state.runId,
393
393
  thinking: phase.thinking,
394
394
  tools: phase.tools,
395
+ preRead,
395
396
  };
396
397
 
397
398
  const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
@@ -700,13 +701,16 @@ async function executePhase(
700
701
  const cached = cachedPhase(cc, inputHash);
701
702
  if (cached) return cached;
702
703
 
703
- // Non-interactive (headless/CI/tests): auto-approve, fail-open, but record it.
704
+ // Non-interactive (headless/CI/detached): auto-REJECT, fail-open, but record it.
705
+ // Approval gates are safety boundaries — bypassing them silently in CI would
706
+ // let unreviewed work ship. Detached/CI runs must not bypass approval gates.
704
707
  if (!deps.requestApproval) {
705
708
  return {
706
709
  id: phase.id,
707
710
  status: "done",
708
- output: "(auto-approved: no interactive approver available)",
709
- approval: { decision: "approve", auto: true },
711
+ output: "(auto-rejected: no interactive approver available)",
712
+ approval: { decision: "reject", auto: true },
713
+ gate: { verdict: "block", reason: "(auto-rejected: no interactive approver available)" },
710
714
  usage: emptyUsage(),
711
715
  inputHash,
712
716
  endedAt: Date.now(),
@@ -1025,6 +1029,7 @@ async function executePhase(
1025
1029
  // Using indexOf on the stable `ran` array is reference-based and correct even
1026
1030
  // when two variants produce byte-identical output.
1027
1031
  const ranIdx = (r: RunResult) => ran.indexOf(r) + 1;
1032
+ const budgetSkipCount = results.filter((r) => r.stopReason === "budget-skipped").length;
1028
1033
 
1029
1034
  // All competitors failed → the tournament fails (nothing to judge).
1030
1035
  if (ok.length === 0) {
@@ -1033,6 +1038,7 @@ async function executePhase(
1033
1038
  status: "failed",
1034
1039
  usage: variantUsage,
1035
1040
  error: `tournament '${phase.id}': all ${competitors.length} variants failed`,
1041
+ budgetTruncated: budgetSkipCount > 0 || undefined,
1036
1042
  tournament: { variants: competitors.length, winner: 0, mode },
1037
1043
  inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
1038
1044
  endedAt: Date.now(),
@@ -1047,6 +1053,7 @@ async function executePhase(
1047
1053
  json: parseJson ? safeParse(ok[0].output) : undefined,
1048
1054
  usage: variantUsage,
1049
1055
  model: ok[0].model,
1056
+ budgetTruncated: budgetSkipCount > 0 || undefined,
1050
1057
  tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "only surviving variant" },
1051
1058
  inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
1052
1059
  endedAt: Date.now(),
@@ -1062,6 +1069,7 @@ async function executePhase(
1062
1069
  json: parseJson ? safeParse(ok[0].output) : undefined,
1063
1070
  usage: variantUsage,
1064
1071
  model: ok[0].model,
1072
+ budgetTruncated: budgetSkipCount > 0 || undefined,
1065
1073
  warnings: ["judge skipped: run aborted or budget exceeded"],
1066
1074
  tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge skipped" },
1067
1075
  inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
@@ -1095,6 +1103,7 @@ async function executePhase(
1095
1103
  json: parseJson ? safeParse(ok[0].output) : undefined,
1096
1104
  usage: judgeUsage,
1097
1105
  model: ok[0].model,
1106
+ budgetTruncated: budgetSkipCount > 0 || undefined,
1098
1107
  warnings: [`judge failed (${judgeRes.errorMessage ?? "error"}); used variant ${ranIdx(ok[0])}`],
1099
1108
  tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge failed" },
1100
1109
  inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
@@ -1117,6 +1126,7 @@ async function executePhase(
1117
1126
  json: parseJson ? safeParse(output) : undefined,
1118
1127
  usage: judgeUsage,
1119
1128
  model: mode === "aggregate" ? judgeRes.model : chosen.model,
1129
+ budgetTruncated: budgetSkipCount > 0 || undefined,
1120
1130
  warnings: winnerIneligible ? [`judge picked an ineligible variant; used variant ${winnerIdx}`] : undefined,
1121
1131
  tournament: { variants: competitors.length, winner: winnerIdx, mode, reason },
1122
1132
  inputHash: hashInput(phase.id, "tournament", String(competitors.length), mode),
@@ -1179,15 +1189,26 @@ interface PhaseCacheCtx {
1179
1189
  * silently serve a stale cross-run hit). */
1180
1190
  thinking?: string;
1181
1191
  tools?: string[];
1192
+ /** Resolved `context` pre-read content. Explicitly part of the cache identity
1193
+ * so a context-file change always invalidates the phase — independent of
1194
+ * whether a given branch happens to fold preRead into its task string
1195
+ * (previously this was only incidentally true via `fullTask`). */
1196
+ preRead?: string;
1182
1197
  }
1183
1198
 
1184
1199
  /** Fold the phase fingerprint into the base hash parts to form the final cache key. */
1185
1200
  function cacheKey(cc: PhaseCacheCtx, baseParts: string[]): string {
1186
1201
  // Fold the full cache identity into the hash: flow name (prevents collisions
1187
1202
  // across different flows that share a phase.id + task + model), the per-phase
1188
- // thinking/tools config (changing either changes the subagent's output), and
1189
- // the resolved world-state fingerprint.
1190
- const parts = [`flow:${cc.flowName}`, ...baseParts, `think:${cc.thinking ?? ""}`, `tools:${JSON.stringify(cc.tools ?? [])}`];
1203
+ // thinking/tools config (changing either changes the subagent's output), the
1204
+ // resolved context pre-read content, and the world-state fingerprint.
1205
+ const parts = [
1206
+ `flow:${cc.flowName}`,
1207
+ ...baseParts,
1208
+ `think:${cc.thinking ?? ""}`,
1209
+ `tools:${JSON.stringify(cc.tools ?? [])}`,
1210
+ `ctx:${cc.preRead ?? ""}`,
1211
+ ];
1191
1212
  return cc.fingerprint ? hashInput(...parts, cc.fingerprint) : hashInput(...parts);
1192
1213
  }
1193
1214
 
@@ -1398,12 +1419,10 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
1398
1419
  let gateBlocked = false;
1399
1420
  let gateReason = "";
1400
1421
  let gateOutput = "";
1401
- // `budgetBlocked` gates the skipping of remaining phases once the cap is hit.
1402
- // `budgetSkipped` records that a phase was *actually* skipped/truncated for
1403
- // budget only then is the run terminal-status "blocked" (a cap crossed by the
1404
- // very last phase, with nothing left to skip, must NOT mark a good run failed).
1422
+ // `budgetBlocked` gates the skipping of remaining phases once the cap is hit
1423
+ // and also drives the terminal "blocked" status — a maxUSD ceiling must never
1424
+ // silently do nothing.
1405
1425
  let budgetBlocked = false;
1406
- let budgetSkipped = false;
1407
1426
  let budgetReason = "";
1408
1427
  const byId = new Map(def.phases.map((p) => [p.id, p]));
1409
1428
 
@@ -1442,7 +1461,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
1442
1461
  }
1443
1462
 
1444
1463
  if (skipReason) {
1445
- if (skipReason.startsWith("Budget exceeded")) budgetSkipped = true;
1464
+ if (skipReason.startsWith("Budget exceeded")) budgetBlocked = true;
1446
1465
  state.phases[phase.id] = {
1447
1466
  id: phase.id,
1448
1467
  status: "skipped",
@@ -1485,7 +1504,6 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
1485
1504
  // A fan-out cut short by the cap is itself a budget skip.
1486
1505
  if (ps.budgetTruncated) {
1487
1506
  budgetBlocked = true;
1488
- budgetSkipped = true;
1489
1507
  if (!budgetReason) budgetReason = "fan-out truncated by budget";
1490
1508
  }
1491
1509
  // Budget ceiling: once exceeded, remaining phases are skipped.
@@ -1494,7 +1512,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
1494
1512
  // the budget is detected as exceeded. This bounded overshoot is
1495
1513
  // acceptable: budgetBlocked prevents cascading into subsequent layers.
1496
1514
  const ob = overBudget(state);
1497
- if (ob.over && !budgetBlocked) {
1515
+ if (ob.over) {
1498
1516
  budgetBlocked = true;
1499
1517
  budgetReason = ob.reason;
1500
1518
  }
@@ -1517,7 +1535,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
1517
1535
 
1518
1536
  state.status = aborted
1519
1537
  ? "paused"
1520
- : gateBlocked || budgetSkipped
1538
+ : gateBlocked || budgetBlocked
1521
1539
  ? "blocked"
1522
1540
  : anyFailed
1523
1541
  ? "failed"
@@ -1527,7 +1545,7 @@ async function runTaskflowLayers(state: RunState, deps: RuntimeDeps): Promise<Ru
1527
1545
  let finalOutput = finalState?.output ?? "(no output)";
1528
1546
  if (gateBlocked) {
1529
1547
  finalOutput = `Gate blocked the workflow.${gateReason ? `\nReason: ${gateReason}` : ""}${gateOutput ? `\n\n${gateOutput}` : ""}`;
1530
- } else if (budgetSkipped) {
1548
+ } else if (budgetBlocked) {
1531
1549
  finalOutput = `Budget exceeded — run halted.${budgetReason ? `\nReason: ${budgetReason}` : ""}${finalState?.output ? `\n\n${finalState.output}` : ""}`;
1532
1550
  }
1533
1551
 
@@ -13,8 +13,8 @@ import { Type, type Static } from "typebox";
13
13
  // Phase types
14
14
  // ---------------------------------------------------------------------------
15
15
 
16
- export const PHASE_TYPES = ["agent", "parallel", "map", "gate", "reduce", "approval", "flow", "loop", "tournament"] as const;
17
- export type PhaseType = (typeof PHASE_TYPES)[number];
16
+ const PHASE_TYPES = ["agent", "parallel", "map", "gate", "reduce", "approval", "flow", "loop", "tournament"] as const;
17
+ type PhaseType = (typeof PHASE_TYPES)[number];
18
18
 
19
19
  /** Loop iteration bounds. Authors may lower the max; the hard cap is a runaway guard. */
20
20
  export const LOOP_DEFAULT_MAX_ITERATIONS = 10;
@@ -36,17 +36,18 @@ export const MAX_DYNAMIC_CONCURRENCY = 16;
36
36
  /** Tournament competitor bounds. */
37
37
  export const TOURNAMENT_DEFAULT_VARIANTS = 3;
38
38
  export const TOURNAMENT_HARD_MAX_VARIANTS = 20;
39
- export const TOURNAMENT_MODES = ["best", "aggregate"] as const;
39
+ const TOURNAMENT_MODES = ["best", "aggregate"] as const;
40
+ /** @internal */
40
41
  export type TournamentMode = (typeof TOURNAMENT_MODES)[number];
41
42
 
42
- export const OUTPUT_FORMATS = ["text", "json"] as const;
43
- export const JOIN_MODES = ["all", "any"] as const;
44
- export const CACHE_SCOPES = ["run-only", "cross-run", "off"] as const;
43
+ const OUTPUT_FORMATS = ["text", "json"] as const;
44
+ const JOIN_MODES = ["all", "any"] as const;
45
+ const CACHE_SCOPES = ["run-only", "cross-run", "off"] as const;
45
46
  export type CacheScope = (typeof CACHE_SCOPES)[number];
46
47
  /** Allowed fingerprint entry prefixes. `glob!:` = content-hash variant of `glob:`. */
47
- export const CACHE_FINGERPRINT_PREFIXES = ["git:", "glob:", "glob!:", "file:", "env:"] as const;
48
+ const CACHE_FINGERPRINT_PREFIXES = ["git:", "glob:", "glob!:", "file:", "env:"] as const;
48
49
  /** Phase types that must NOT be cached across runs (a fresh result is required each run). */
49
- export const CACHE_CROSS_RUN_BLOCKED_TYPES = ["gate", "approval", "loop", "tournament"] as const;
50
+ const CACHE_CROSS_RUN_BLOCKED_TYPES = ["gate", "approval", "loop", "tournament"] as const;
50
51
 
51
52
  const ParallelTaskSchema = Type.Object(
52
53
  {
@@ -282,7 +283,7 @@ export type ArgSpec = Static<typeof ArgSpecSchema>;
282
283
  export type RetryPolicy = Static<typeof RetrySchema>;
283
284
  export type Budget = Static<typeof BudgetSchema>;
284
285
  export type CachePolicy = Static<typeof CacheSchema>;
285
- export type JoinMode = (typeof JOIN_MODES)[number];
286
+ type JoinMode = (typeof JOIN_MODES)[number];
286
287
 
287
288
  // ---------------------------------------------------------------------------
288
289
  // Shorthand (non-DAG) specs — subagent-style ergonomics
@@ -302,6 +303,10 @@ export type JoinMode = (typeof JOIN_MODES)[number];
302
303
  export interface ShorthandStep {
303
304
  agent?: string;
304
305
  task: string;
306
+ /** Files to pre-read and inject before the task (pass-through to Phase.context). */
307
+ context?: string[];
308
+ /** Max characters per context file (pass-through to Phase.contextLimit). */
309
+ contextLimit?: number;
305
310
  }
306
311
 
307
312
  /** True when `def` is a shorthand spec (no `phases`, but a task/tasks/chain field). */
@@ -316,11 +321,22 @@ export function isShorthand(def: unknown): boolean {
316
321
  );
317
322
  }
318
323
 
324
+ /** Coerce an unknown value into a non-empty list of non-empty strings (or undefined). */
325
+ function readContextList(v: unknown): string[] | undefined {
326
+ if (!Array.isArray(v)) return undefined;
327
+ const list = v.filter((x): x is string => typeof x === "string" && x.trim().length > 0);
328
+ return list.length ? list : undefined;
329
+ }
330
+
319
331
  function readStep(s: unknown): ShorthandStep {
320
332
  if (typeof s === "string") return { task: s };
321
333
  if (s && typeof s === "object") {
322
334
  const o = s as Record<string, unknown>;
323
- return { agent: typeof o.agent === "string" ? o.agent : undefined, task: String(o.task ?? "") };
335
+ const step: ShorthandStep = { agent: typeof o.agent === "string" ? o.agent : undefined, task: String(o.task ?? "") };
336
+ const ctx = readContextList(o.context);
337
+ if (ctx) step.context = ctx;
338
+ if (typeof o.contextLimit === "number") step.contextLimit = o.contextLimit;
339
+ return step;
324
340
  }
325
341
  return { task: "" };
326
342
  }
@@ -345,10 +361,19 @@ export function desugar(def: unknown): Taskflow {
345
361
 
346
362
  // chain → sequential agent phases
347
363
  if (Array.isArray(d.chain) && d.chain.length > 0) {
364
+ // Spec-level context in chain mode would be a flow-level default (every
365
+ // step), which is deliberately NOT supported — declare it per step instead.
366
+ if (d.context !== undefined || d.contextLimit !== undefined) {
367
+ console.warn(
368
+ "[taskflow] Shorthand chain ignores top-level 'context'/'contextLimit' — put them on individual steps instead.",
369
+ );
370
+ }
348
371
  const steps = d.chain.map(readStep);
349
372
  const phases: Phase[] = steps.map((s, i) => {
350
373
  const phase: Phase = { id: `step${i + 1}`, type: "agent", task: s.task };
351
374
  if (s.agent) phase.agent = s.agent;
375
+ if (s.context) phase.context = s.context;
376
+ if (s.contextLimit !== undefined) phase.contextLimit = s.contextLimit;
352
377
  if (i > 0) phase.dependsOn = [`step${i}`];
353
378
  if (i === steps.length - 1) phase.final = true;
354
379
  return phase;
@@ -356,16 +381,30 @@ export function desugar(def: unknown): Taskflow {
356
381
  return { name: nameOf("chain"), ...meta, phases };
357
382
  }
358
383
 
359
- // tasks → one parallel phase (fan-out + merge), no extra aggregation agent
384
+ // tasks → one parallel phase (fan-out + merge), no extra aggregation agent.
385
+ // Context is SHARED across all branches (the runtime pre-reads per phase, not
386
+ // per branch): spec-level context plus the union of step-level contexts.
360
387
  if (Array.isArray(d.tasks) && d.tasks.length > 0) {
361
- const branches: ParallelTask[] = d.tasks.map(readStep).map((s) => (s.agent ? { task: s.task, agent: s.agent } : { task: s.task }));
362
- return { name: nameOf("parallel"), ...meta, phases: [{ id: "parallel", type: "parallel", branches, final: true }] };
388
+ const steps = d.tasks.map(readStep);
389
+ const branches: ParallelTask[] = steps.map((s) => (s.agent ? { task: s.task, agent: s.agent } : { task: s.task }));
390
+ const phase: Phase = { id: "parallel", type: "parallel", branches, final: true };
391
+ const shared = [...(readContextList(d.context) ?? []), ...steps.flatMap((s) => s.context ?? [])];
392
+ if (shared.length) phase.context = Array.from(new Set(shared));
393
+ const limits = [
394
+ typeof d.contextLimit === "number" ? d.contextLimit : undefined,
395
+ ...steps.map((s) => s.contextLimit),
396
+ ].filter((n): n is number => typeof n === "number");
397
+ if (limits.length) phase.contextLimit = Math.max(...limits);
398
+ return { name: nameOf("parallel"), ...meta, phases: [phase] };
363
399
  }
364
400
 
365
- // single task → one agent phase
401
+ // single task → one agent phase (the spec itself is the step)
366
402
  if (typeof d.task === "string") {
367
403
  const phase: Phase = { id: "main", type: "agent", task: d.task, final: true };
368
404
  if (typeof d.agent === "string") phase.agent = d.agent;
405
+ const ctx = readContextList(d.context);
406
+ if (ctx) phase.context = ctx;
407
+ if (typeof d.contextLimit === "number") phase.contextLimit = d.contextLimit;
369
408
  return { name: nameOf("task"), ...meta, phases: [phase] };
370
409
  }
371
410
 
@@ -376,6 +415,7 @@ export function desugar(def: unknown): Taskflow {
376
415
  // Validation (beyond schema: DAG integrity, phase-type requirements)
377
416
  // ---------------------------------------------------------------------------
378
417
 
418
+ /** @internal */
379
419
  export interface ValidationResult {
380
420
  ok: boolean;
381
421
  errors: string[];
@@ -618,16 +658,41 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
618
658
  // placeholder string. The runtime can't infer the intent — fail fast at
619
659
  // validation time so the mistake is caught before the run starts.
620
660
  //
661
+ // The check uses TRANSITIVE ancestors: if phase B depends on A, and C depends
662
+ // on B, then C may reference {steps.A.*} transitively. Only truly unreachable
663
+ // refs are errors.
664
+ //
621
665
  // Phases with `join: "any"` are exempt: by design they only need ONE of
622
666
  // their declared deps to complete, and may reference other phases as
623
667
  // informational context (not as true dependencies).
624
668
  if (errors.length === 0) {
625
669
  const idToPhase = new Map((flow.phases as Phase[]).map((p) => [p.id, p]));
670
+ // Precompute transitive ancestors for every phase via BFS over dependsOn.
671
+ const transitiveCache = new Map<string, Set<string>>();
672
+ const transitiveAncestors = (phaseId: string): Set<string> => {
673
+ const cached = transitiveCache.get(phaseId);
674
+ if (cached) return cached;
675
+ const result = new Set<string>();
676
+ const queue = [...(idToPhase.get(phaseId)?.dependsOn ?? []), ...(idToPhase.get(phaseId)?.from ?? [])];
677
+ while (queue.length) {
678
+ const id = queue.shift()!;
679
+ if (result.has(id)) continue;
680
+ result.add(id);
681
+ const dep = idToPhase.get(id);
682
+ if (dep) {
683
+ for (const d of [...(dep.dependsOn ?? []), ...(dep.from ?? [])]) {
684
+ if (!result.has(d)) queue.push(d);
685
+ }
686
+ }
687
+ }
688
+ transitiveCache.set(phaseId, result);
689
+ return result;
690
+ };
626
691
  for (const p of flow.phases as Phase[]) {
627
692
  if (!p?.id) continue;
628
693
  const isJoinAny = p.join === "any";
629
694
  if (isJoinAny) continue;
630
- const deps = new Set(dependenciesOf(p));
695
+ const transitive = transitiveAncestors(p.id);
631
696
  const refs = collectRefs(p);
632
697
  for (const ref of refs.steps) {
633
698
  if (ref === p.id) {
@@ -640,9 +705,9 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
640
705
  // double-warn — the dependsOn loop above already flags it.
641
706
  continue;
642
707
  }
643
- if (!deps.has(ref)) {
708
+ if (!transitive.has(ref)) {
644
709
  errors.push(
645
- `Phase '${p.id}': task references {steps.${ref}.*} but '${ref}' is not in dependsOn. ` +
710
+ `Phase '${p.id}': task references {steps.${ref}.*} but '${ref}' is not reachable via dependsOn. ` +
646
711
  `The phase will run in parallel with '${ref}' and see the literal placeholder. ` +
647
712
  `Add "dependsOn": ["${ref}"] (or include '${ref}' transitively).`,
648
713
  );
@@ -29,6 +29,7 @@ export interface SavedFlow {
29
29
  def: Taskflow;
30
30
  }
31
31
 
32
+ /** @internal */
32
33
  export type PhaseStatus = "pending" | "running" | "done" | "failed" | "skipped";
33
34
 
34
35
  export interface PhaseState {
@@ -84,6 +85,10 @@ export interface RunState {
84
85
  createdAt: number;
85
86
  updatedAt: number;
86
87
  cwd: string;
88
+ /** OS PID of a detached runner process (set only for background runs). */
89
+ pid?: number;
90
+ /** True for runs spawned via `detach: true` (background execution). */
91
+ detached?: boolean;
87
92
  }
88
93
 
89
94
  // ---------------------------------------------------------------------------
@@ -458,10 +463,21 @@ function cleanupTerminalRuns(
458
463
  }
459
464
 
460
465
  // Sort terminal by updatedAt desc (newest first).
461
- terminal.sort((a, b) => b.updatedAt - a.updatedAt);
466
+ // Filter out entries with corrupt updatedAt (non-numeric/NaN) BEFORE sorting
467
+ // to prevent NaN from corrupting sort order. Corrupt entries cannot be
468
+ // reliably aged, so they are always moved to toRemove.
469
+ const cleanTerminal: RunIndexEntry[] = [];
470
+ for (const e of terminal) {
471
+ if (typeof e.updatedAt === "number" && !Number.isNaN(e.updatedAt)) {
472
+ cleanTerminal.push(e);
473
+ } else {
474
+ toRemove.push(e);
475
+ }
476
+ }
477
+ cleanTerminal.sort((a, b) => b.updatedAt - a.updatedAt);
462
478
 
463
- for (let i = 0; i < terminal.length; i++) {
464
- const e = terminal[i]!;
479
+ for (let i = 0; i < cleanTerminal.length; i++) {
480
+ const e = cleanTerminal[i]!;
465
481
  const expiredByAge = now - e.updatedAt > maxAgeMs;
466
482
  const excessByCount = i >= maxKeep;
467
483
  if (expiredByAge || excessByCount) {
@@ -473,7 +489,7 @@ function cleanupTerminalRuns(
473
489
 
474
490
  // Commit the pruned index while holding the lock so a concurrent
475
491
  // updateIndexEntry cannot interleave and lose entries.
476
- const remaining = terminal.filter((e) => !toRemove.includes(e));
492
+ const remaining = cleanTerminal.filter((e) => !toRemove.includes(e));
477
493
  writeIndex(runsRoot, [...active, ...remaining]);
478
494
  });
479
495
 
@@ -783,8 +799,12 @@ export function listRuns(cwd: string, limit = 20): RunState[] {
783
799
  }
784
800
 
785
801
  // Sort by updatedAt desc, slice to limit.
786
- entries.sort((a, b) => b.updatedAt - a.updatedAt);
787
- const sliced = entries.slice(0, limit);
802
+ // Filter out entries with non-numeric/NaN updatedAt BEFORE sorting to
803
+ // prevent NaN from corrupting V8's sort order (which can displace valid
804
+ // entries when a limit is applied).
805
+ const valid = entries.filter((e) => typeof e.updatedAt === "number" && !Number.isNaN(e.updatedAt));
806
+ valid.sort((a, b) => b.updatedAt - a.updatedAt);
807
+ const sliced = valid.slice(0, limit);
788
808
 
789
809
  // Read full RunState for each entry.
790
810
  const runs: RunState[] = [];
@@ -804,6 +824,20 @@ export function hashInput(...parts: string[]): string {
804
824
  return crypto.createHash("sha256").update(parts.join("\u0000")).digest("hex").slice(0, 16);
805
825
  }
806
826
 
827
+ /**
828
+ * Check whether a process with the given PID is still alive.
829
+ * Uses signal 0 (no signal sent) — succeeds if the process exists and we have
830
+ * permission to signal it, throws ESRCH if it doesn't exist.
831
+ */
832
+ export function isProcessAlive(pid: number): boolean {
833
+ try {
834
+ process.kill(pid, 0);
835
+ return true;
836
+ } catch {
837
+ return false;
838
+ }
839
+ }
840
+
807
841
  /**
808
842
  * Write a file atomically: write to a unique temp file in the same directory,
809
843
  * then rename over the target (rename is atomic on the same filesystem). Prevents
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-taskflow",
3
- "version": "0.0.19",
3
+ "version": "0.0.21",
4
4
  "description": "A declarative, verifiable graph of task nodes for the Pi coding agent — not a workflow you script, but a DAG you declare: statically verified before it runs, with dynamic fan-out, gates, isolated subagent context, resumable runs, and saveable commands.",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -37,7 +37,7 @@
37
37
  ],
38
38
  "scripts": {
39
39
  "typecheck": "tsc --noEmit",
40
- "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts test/flow-def.test.ts",
40
+ "test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test/interpolate.test.ts test/condition.test.ts test/schema.test.ts test/usage.test.ts test/runtime.test.ts test/features.test.ts test/runner.test.ts test/store.test.ts test/agents.test.ts test/init.test.ts test/render.test.ts test/approval-view.test.ts test/desugar.test.ts test/cache.test.ts test/loop.test.ts test/tournament.test.ts test/verify.test.ts test/gate-eval.test.ts test/transient-error.test.ts test/runtime-branches.test.ts test/interpolate-extended.test.ts test/store-extended.test.ts test/flow-def.test.ts test/detached.test.ts",
41
41
  "test:e2e": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e.mts",
42
42
  "test:dogfood-cache": "node --experimental-strip-types test/dogfood-cache.mts"
43
43
  },
@@ -43,10 +43,25 @@ proper flow, so you still get progress, persistence, resume, and `save`.
43
43
  ```
44
44
 
45
45
  - `agent` is optional (defaults to the first available agent).
46
+ - `context` (optional, per step or top-level in single mode): file paths to
47
+ pre-read and inject before the task — same as the full-DSL `Phase.context`
48
+ (per-file `contextLimit`, default 8000 chars). In **parallel `tasks` mode**
49
+ all branches SHARE the union of step contexts (the runtime pre-reads per
50
+ phase, not per branch). In **chain mode** declare `context` on individual
51
+ steps; a top-level `context` is ignored (with a warning).
46
52
  - Add `name` to label the run (and to `save` it as a `/tf:<name>` command).
47
53
  - Precedence if several are given: `chain` > `tasks` > `task`.
48
54
  - You can pass these as top-level tool params **or** inside `define`.
49
55
 
56
+ ```jsonc
57
+ // context pre-read in shorthand — the file content is injected before the task
58
+ { "chain": [
59
+ { "task": "Map the public API of src/lib", "agent": "scout" },
60
+ { "task": "Write docs for:\n{previous.output}", "agent": "doc-writer",
61
+ "context": ["AGENTS.md", "docs/style-guide.md"] }
62
+ ] }
63
+ ```
64
+
50
65
  ## How to author a taskflow
51
66
 
52
67
  Call the `taskflow` tool. To run a brand-new flow you write inline, pass
@@ -128,7 +143,8 @@ deciding. The (interpolated) `task` is the prompt shown.
128
143
  - **Reject** → halt the flow (same mechanism as a blocking gate).
129
144
  - **Edit** → the typed note becomes this phase's `output`, so you can inject
130
145
  guidance mid-run: reference it downstream with `{steps.<id>.output}`.
131
- - **Non-interactive** runs (headless/CI/print mode) **auto-approve** and record it.
146
+ - **Non-interactive** runs (headless/CI/print mode) **auto-reject** and record it — approval gates are safety boundaries that must never be silently bypassed.
147
+ - **Background (detached)** runs **auto-reject** (no interactive approver) — downstream sees the rejection; the flow continues (fail-open).
132
148
 
133
149
  ```jsonc
134
150
  { "id": "checkpoint", "type": "approval", "dependsOn": ["plan"],
@@ -169,9 +185,10 @@ Use hyphens in ids, never underscores. Sub-flow phases reference each other in
169
185
  their **own** `{steps.x.output}` namespace (no parent-id prefixing needed).
170
186
 
171
187
  **Fail-open & limits:** if the `def` doesn't parse, has the wrong shape, or fails
172
- validation, the phase fails *open* it's marked failed with a `defError`, the
173
- upstream output is preserved, and the run continues (use `optional: true` on the
174
- flow phase so a bad plan never aborts the run). An **empty** `phases` array is a
188
+ validation, the phase completes with `status: "done"` and carries a `defError`
189
+ diagnostic field; downstream phases receive empty output. Authors who want a
190
+ hard failure can add a gate that checks for `defError`. The run continues
191
+ (add `optional: true` on the flow phase so a bad plan never aborts the run). An **empty** `phases` array is a
175
192
  valid no-op (the planner decided there's nothing to do). Inline nesting is capped
176
193
  at `MAX_DYNAMIC_NESTING` (5) to bound runaway self-spawning.
177
194
 
@@ -216,7 +233,7 @@ A `tournament` phase runs `variants` competing attempts in parallel, then a
216
233
  (`mode: "aggregate"`). Use it when one shot is unreliable and you want the best
217
234
  of several drafts, or a synthesis of diverse approaches.
218
235
 
219
- - `variants` — the competing attempts: a number (run the same `task` N times) or an array of `{task, agent?}` for genuinely different approaches.
236
+ - `variants` — a number specifying how many competing variants to spawn from 'task' (default 3, max 20). For genuinely different approaches, use the `branches` field instead an explicit array of `{task, agent?}` definitions.
220
237
  - `mode` — `"best"` (judge picks one winner, default) or `"aggregate"` (judge merges all into one output).
221
238
  - `judge` — the judge's rubric/instructions (how to choose or merge).
222
239
  - `judgeAgent` — *(optional)* the agent that runs the judge step; defaults to the phase `agent`.
@@ -434,19 +451,28 @@ Quick reference:
434
451
 
435
452
  ## Actions
436
453
 
437
- - `action: "run"` — run an inline `define` (a one-off DAG) **or** a saved `name` (with optional `args`). Use `define` for an ad-hoc flow; use `name` to invoke something previously saved.
454
+ - `action: "run"` — run an inline `define` (a one-off DAG) **or** a saved `name` (with optional `args`). Use `define` for an ad-hoc flow; use `name` to invoke something previously saved. Add `detach: true` to run in the background (returns immediately with the runId; poll the store for status).
438
455
  - `action: "save"` — persist `define` (scope `project` — default, committed/shared — or `user`); it becomes `/tf:<name>`. On a name collision, project overrides user.
439
456
  - `action: "resume"` — continue a paused/failed run by `runId`.
440
457
  - `action: "list"` — list saved flows. `action: "verify"` — static-check a `define` (zero tokens). `action: "agents"` — list available agents.
441
458
 
459
+ ## Background (detached) runs
460
+
461
+ Add `detach: true` to `action: "run"` to spawn the flow in a detached child process. The tool returns immediately with the `runId`; the flow continues running even if the host session exits. Status is polled via the store (`/tf runs` or `action: "resume"`).
462
+
463
+ - **Approval phases auto-reject** in detached mode (no interactive approver). Downstream phases see the rejection; the flow continues (fail-open).
464
+ - **Crash resilience:** if the detached process crashes, the store persists `status: "failed"`; resume with `action: "resume"`.
465
+ - **Same flow, both modes:** a flow can run foreground or background — `detach` is a dispatch-time decision, not a flow property.
466
+
442
467
  ## Operating a run (lifecycle, resume, inspection)
443
468
 
444
- A run moves through: **running →** `completed` (a `final` phase produced output) **/** `blocked` (a gate emitted BLOCK, an `approval` was rejected, or the `budget` cap was hit) **/** `failed` (a non-`optional` phase errored) **/** `paused` (the run was aborted). `failed` and `paused` runs are resumable; `blocked` is terminal (fix the gate/budget and re-run).
469
+ A run moves through: **running →** `completed` (a `final` phase produced output) **/** `blocked` (a gate emitted BLOCK, an `approval` was rejected, or the `budget` cap was hit) **/** `failed` (a non-`optional` phase errored) **/** `paused` (the run was aborted). `failed` and `paused` runs are resumable.
445
470
 
446
- - **Resume is cache-aware.** `action: "resume"` re-runs only what didn't finish: every phase already `done` is reused from its recorded output (within-run cache), so resuming after a crash or a `blocked`/`failed` stop never repeats completed work. A phase that was mid-flight is re-executed cleanly (stale `error`/`endedAt` are cleared first).
471
+ - **`blocked` runs:** a blocked status halts the current run the flow status is set to `blocked` and remaining phases are skipped. Re-running the flow resumes from the last completed state: `done` phases with matching input hashes are skipped; blocked/failed/skipped phases are re-attempted. Fix the gate condition or budget before re-running.
472
+ - **Resume is cache-aware.** `action: "resume"` re-runs only what didn't finish: every phase already `done` is reused from its recorded output (within-run cache), so resuming after a crash or a failed/blocked stop never repeats completed work. A phase that was mid-flight is re-executed cleanly (stale `error`/`endedAt` are cleared first).
447
473
  - **When to resume vs. re-run.** Resume when the inputs are unchanged and you just want to continue/retry the tail (fixed a gate, raised the budget, approved a checkpoint). Re-run from scratch when the task or upstream inputs changed — resume would reuse now-stale outputs. (For reuse *across* runs, opt a phase into `cache: {scope:"cross-run"}` — see configuration.md.)
448
474
  - **Budget mid-run.** When the run-wide `budget` is exceeded, remaining phases are skipped and an in-flight `map`/`parallel` stops spawning new items; the run ends `blocked` with the partial outputs preserved.
449
- - **Inspect runs.** `/tf runs` lists recent runs with status; `/tf show <name>` prints a saved flow's definition. Run state lives at `<project .pi>/taskflows/runs/<runId>.json` (gitignored).
475
+ - **Inspect runs.** `/tf runs` lists recent runs with status; `/tf show <name>` prints a saved flow's definition. Run state lives at `<project .pi>/taskflows/runs/<flowName>/<runId>.json` (gitignored).
450
476
 
451
477
  ## User commands
452
478
 
@@ -286,7 +286,7 @@ for the design.
286
286
  ### `ttl` (cross-run only)
287
287
 
288
288
  Max age before a cross-run hit is treated as a miss: e.g. `"30m"`, `"6h"`, `"7d"`.
289
- Omit for no time bound. A hit older than the TTL re-executes the phase.
289
+ Omit for no time bound. A hit older than the TTL re-executes the phase. Cross-run cache entries are hard-evicted after 90 days regardless of per-entry TTL. This ceiling is not configurable.
290
290
 
291
291
  ### `fingerprint` (cross-run only)
292
292
 
@@ -298,7 +298,7 @@ Each entry is one of:
298
298
  | Entry | Becomes a miss when… | Resolves to |
299
299
  |-------|----------------------|-------------|
300
300
  | `git:HEAD` / `git:<ref>` | the commit moves | the resolved SHA (30s timeout → `<timeout>`; no git → `<no-git>`) |
301
- | `glob:<pattern>` | the **set of matching paths** changes | sorted path list (mtime-free) |
301
+ | `glob:<pattern>` | the **set of matching paths** or their metadata changes | sorted path list with size + mtime (content-hashed globs use `glob!:` instead, which is mtime-independent) |
302
302
  | `glob!:<pattern>` | the **contents** of matching files change | content hashes (capped at 5000 matches) |
303
303
  | `file:<path>` | that file's content changes | sha256 of the file (>10 MB or missing → `<skip>`/`<missing>`) |
304
304
  | `env:<NAME>` | the env var changes | the env value |
@@ -333,7 +333,7 @@ Each entry is one of:
333
333
  |------|------|---------|
334
334
  | User-scoped flow | `~/.pi/agent/taskflows/<name>.json` | personal |
335
335
  | Project-scoped flow | `<nearest .pi>/taskflows/<name>.json` | ✅ commit to share |
336
- | Run state (resume) | `<project .pi>/taskflows/runs/<runId>.json` | ❌ gitignore |
336
+ | Run state (resume) | `<project .pi>/taskflows/runs/<flowName>/<runId>.json` | ❌ gitignore |
337
337
 
338
338
  - `action: "save"` takes `scope: "project"` (default) or `"user"`.
339
339
  - Saved flows auto-register as `/tf:<name>` (immediately for the current session,