pi-taskflow 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -156,10 +156,10 @@ declaratively, no scripting:
156
156
  { "id": "triage", "type": "agent", "agent": "analyst", "output": "json",
157
157
  "task": "Classify the bug. Output ONLY {\"severity\":\"high\"} or {\"severity\":\"low\"}." },
158
158
  { "id": "deep", "when": "{steps.triage.json.severity} == high", "dependsOn": ["triage"],
159
- "agent": "executor_code", "task": "Root-cause and patch it.",
159
+ "agent": "executor-code", "task": "Root-cause and patch it.",
160
160
  "retry": { "max": 2, "backoffMs": 500 } },
161
161
  { "id": "quick", "when": "{steps.triage.json.severity} == low", "dependsOn": ["triage"],
162
- "agent": "executor_fast", "task": "Apply the quick fix." },
162
+ "agent": "executor-fast", "task": "Apply the quick fix." },
163
163
  { "id": "approve", "type": "approval", "join": "any", "dependsOn": ["deep", "quick"],
164
164
  "task": "Review the fix before it ships." },
165
165
  { "id": "ship", "type": "agent", "dependsOn": ["approve"],
@@ -25,7 +25,7 @@
25
25
  {
26
26
  "id": "implement",
27
27
  "type": "agent",
28
- "agent": "executor_code",
28
+ "agent": "executor-code",
29
29
  "dependsOn": ["approve", "plan"],
30
30
  "task": "Implement the approved plan for {args.target}.\nPlan:\n{steps.plan.output}\nExtra human guidance (if any):\n{steps.approve.output}",
31
31
  "retry": { "max": 1, "backoffMs": 1000 }
@@ -48,6 +48,20 @@ export function isFailed(r: RunResult): boolean {
48
48
  return r.exitCode !== 0 || r.stopReason === "error" || r.stopReason === "aborted";
49
49
  }
50
50
 
51
+ /**
52
+ * Heuristic: did this failure look like a transient/retryable provider error
53
+ * (rate limit, overload, timeout, 5xx)? Such errors should be retried inside
54
+ * the taskflow run with backoff rather than bubbled up — otherwise the calling
55
+ * agent tends to re-invoke the whole tool, producing duplicate progress blocks.
56
+ */
57
+ const TRANSIENT_ERROR_RE =
58
+ /rate[_\s-]?limit|too\s+many\s+requests|overloaded|\b429\b|\b503\b|\b502\b|\b504\b|service\s+unavailable|temporarily\s+unavailable|timeout|timed?\s+out|econnreset|etimedout|socket\s+hang\s*up/i;
59
+ export function isTransientError(r: RunResult): boolean {
60
+ if (r.stopReason === "aborted") return false;
61
+ const hay = `${r.errorMessage ?? ""} ${r.stderr ?? ""} ${r.output ?? ""}`;
62
+ return TRANSIENT_ERROR_RE.test(hay);
63
+ }
64
+
51
65
  /** Placeholder written to a failed phase's `output` so downstream interpolation
52
66
  * can detect "upstream failed" without being polluted by raw HTML/JSON. */
53
67
  export const TRANSPORT_ERROR_PLACEHOLDER = "(upstream error: subagent failed; see error)";
@@ -14,7 +14,7 @@ import * as path from "node:path";
14
14
  import * as fs from "node:fs";
15
15
  import type { AgentConfig } from "./agents.ts";
16
16
  import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
17
- import { isFailed, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
17
+ import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
18
18
  import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
19
19
  import { type Budget, dependenciesOf, finalPhase, type Phase, resolveArgs, type Taskflow, topoLayers } from "./schema.ts";
20
20
  import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
@@ -314,9 +314,20 @@ async function executePhase(
314
314
 
315
315
  // Wrap each subagent call in the phase's retry policy. Usage is summed across
316
316
  // attempts; the attempt count rides along on the result for the TUI.
317
+ //
318
+ // Even without an explicit `phase.retry`, transient provider errors (rate
319
+ // limits, overload, 5xx, timeouts) are retried with backoff so a momentary
320
+ // 429 is absorbed inside this run instead of bubbling up and provoking the
321
+ // calling agent to re-invoke the whole tool (which stacks duplicate progress
322
+ // blocks in the transcript).
317
323
  const retry = phase.retry;
324
+ const DEFAULT_TRANSIENT_RETRIES = 3;
325
+ const DEFAULT_TRANSIENT_BACKOFF_MS = 2000;
326
+ const DEFAULT_TRANSIENT_FACTOR = 2;
318
327
  const runOne = async (agentName: string, task: string, onLive?: (l: LiveUpdate) => void): Promise<RunResult> => {
319
- const maxAttempts = Math.max(1, 1 + Math.max(0, Math.floor(retry?.max ?? 0)));
328
+ const explicitMax = Math.max(1, 1 + Math.max(0, Math.floor(retry?.max ?? 0)));
329
+ // Allow enough attempts to cover whichever policy applies on a given attempt.
330
+ const maxAttempts = Math.max(explicitMax, 1 + DEFAULT_TRANSIENT_RETRIES);
320
331
  const usages: UsageStats[] = [];
321
332
  let last: RunResult | undefined;
322
333
  for (let attempt = 0; attempt < maxAttempts; attempt++) {
@@ -330,10 +341,21 @@ async function executePhase(
330
341
  if (!isFailed(last)) break;
331
342
  // Stop retrying on abort or once the run is over budget.
332
343
  if (deps.signal?.aborted || overBudget(state).over) break;
333
- if (attempt < maxAttempts - 1) {
334
- const wait = Math.min(60000, Math.round((retry?.backoffMs ?? 0) * (retry?.factor ?? 1) ** attempt));
335
- await delay(wait, deps.signal);
336
- }
344
+ // Decide whether THIS failure warrants another attempt. Explicit retry
345
+ // policy covers all failures up to its cap; the transient fallback covers
346
+ // only retryable provider errors. A non-transient failure with no explicit
347
+ // policy stops immediately (no point burning attempts on a hard error).
348
+ const withinExplicit = attempt < explicitMax - 1;
349
+ const transient = isTransientError(last);
350
+ const withinTransient = transient && attempt < DEFAULT_TRANSIENT_RETRIES;
351
+ if (!withinExplicit && !withinTransient) break;
352
+ // Backoff: prefer the explicit policy's curve when the phase defines one
353
+ // (covers transient retries too, and keeps tests fast with backoffMs:0),
354
+ // otherwise use the transient defaults.
355
+ const baseMs = retry ? (retry.backoffMs ?? 0) : DEFAULT_TRANSIENT_BACKOFF_MS;
356
+ const factor = retry ? (retry.factor ?? 1) : DEFAULT_TRANSIENT_FACTOR;
357
+ const wait = Math.min(60000, Math.round(baseMs * factor ** attempt));
358
+ if (wait > 0) await delay(wait, deps.signal);
337
359
  }
338
360
  // Aborted before any attempt ran → return a clean aborted result (no crash).
339
361
  if (!last) {
@@ -741,45 +763,8 @@ function safeProgress(deps: RuntimeDeps, state: RunState): void {
741
763
  /**
742
764
  * Execute a full taskflow. Mutates and persists `state` as it progresses.
743
765
  */
744
- function ensureImplicitGate(def: Taskflow): void {
745
- // Respect explicit opt-out
746
- if ((def as any).implicitGate === false) return;
747
-
748
- const hasGate = def.phases.some(
749
- (p) => p.type === "gate" || p.type === "approval" || p.id === "_implicit-gate",
750
- );
751
- if (hasGate || def.phases.length === 0) return;
752
-
753
- // The last existing phase is the effective "final" phase — pin it so the
754
- // injected gate doesn't become the finalOutput.
755
- const lastPhase = def.phases[def.phases.length - 1];
756
- if (!lastPhase.final && !def.phases.some((p) => p.final)) {
757
- lastPhase.final = true;
758
- }
759
-
760
- const allIds = def.phases.map((p) => p.id);
761
- def.phases.push({
762
- id: "_implicit-gate",
763
- type: "gate",
764
- dependsOn: allIds,
765
- agent: "reviewer",
766
- task: `Review all phase outputs from this taskflow for accuracy and consistency.
767
-
768
- For each upstream phase, scan its output for:
769
- 1. **Factual accuracy**: Any file paths, line numbers, or code snippets that are wrong?
770
- 2. **Internal contradictions**: Do any phases contradict each other?
771
- 3. **Completeness**: Is any output truncated, empty, or anomalously short?
772
- 4. **Hallucination markers**: Wrong file names, impossible line ranges, circular logic, information not in the given context.
773
-
774
- Output:
775
- - If ALL outputs look consistent and plausible: output **VERDICT: PASS** with a one-line summary.
776
- - If ANY issues found: output **VERDICT: BLOCK** listing each issue with the phase ID and specific concern.`,
777
- });
778
- }
779
-
780
766
  export async function executeTaskflow(state: RunState, deps: RuntimeDeps): Promise<RuntimeResult> {
781
767
  const def: Taskflow = state.def;
782
- ensureImplicitGate(def);
783
768
  try {
784
769
  return await runTaskflowLayers(state, deps);
785
770
  } catch (e) {
@@ -147,12 +147,6 @@ export const TaskflowSchema = Type.Object(
147
147
  }),
148
148
  ),
149
149
  phases: Type.Array(PhaseSchema, { minItems: 1, description: "Ordered phase definitions (DAG via dependsOn)" }),
150
- implicitGate: Type.Optional(
151
- Type.Boolean({
152
- description: "When true (default), a reviewer gate is auto-injected after all phases if no explicit gate or approval exists",
153
- default: true,
154
- }),
155
- ),
156
150
  },
157
151
  { additionalProperties: false },
158
152
  );
@@ -342,6 +336,16 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
342
336
  if (p.join && !JOIN_MODES.includes(p.join as JoinMode)) {
343
337
  errors.push(`Phase '${p.id}': unknown join mode '${p.join}'`);
344
338
  }
339
+
340
+ // Agent name convention: hyphens only (per AGENTS.md naming convention)
341
+ if (p.agent && typeof p.agent === "string" && p.agent.includes("_")) {
342
+ errors.push(`Phase '${p.id}': agent name '${p.agent}' uses underscores — use hyphens (e.g. 'executor-code' not 'executor_code')`);
343
+ }
344
+
345
+ // Phase id convention: hyphens only (consistent with agent naming)
346
+ if (p.id && p.id.includes("_")) {
347
+ errors.push(`Phase '${p.id}': id uses underscores — use hyphens for consistency with agent naming convention`);
348
+ }
345
349
  }
346
350
 
347
351
  // dependsOn / from references must exist
@@ -355,6 +359,15 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
355
359
  }
356
360
  }
357
361
 
362
+ // Agent name format validation (AGENTS.md naming convention: hyphens only, no underscores)
363
+ const VALID_AGENT_RE = /^[a-z][a-z0-9-]*$/;
364
+ for (const p of flow.phases) {
365
+ if (!p?.id) continue;
366
+ if (p.agent && !VALID_AGENT_RE.test(p.agent)) {
367
+ errors.push(`Phase '${p.id}': agent '${p.agent}' has invalid name format (expected lowercase alphanumeric with hyphens)`);
368
+ }
369
+ }
370
+
358
371
  // Cycle detection (Kahn)
359
372
  if (errors.length === 0) {
360
373
  const cycle = detectCycle(flow.phases as Phase[]);
@@ -3,7 +3,15 @@
3
3
  *
4
4
  * Definitions: .pi/taskflows/<name>.json (project)
5
5
  * ~/.pi/agent/taskflows/<name>.json (user)
6
- * Run state: .pi/taskflows/runs/<runId>.json (resume support)
6
+ * Run state: .pi/taskflows/runs/<sanitizedFlowName>/<runId>.json
7
+ * Index: .pi/taskflows/runs/index.json (lookup accelerator)
8
+ *
9
+ * Legacy layout (v0.0.8 and earlier):
10
+ * .pi/taskflows/runs/<runId>.json (flat, still readable)
11
+ *
12
+ * v0.0.9 refactor: per-flow subdirectory layout + lightweight index + file
13
+ * lock + TTL/cap cleanup. Full backward compatibility with the flat layout
14
+ * is maintained: loadRun and listRuns still discover legacy flat files.
7
15
  */
8
16
 
9
17
  import * as crypto from "node:crypto";
@@ -66,6 +74,403 @@ export interface RunState {
66
74
  cwd: string;
67
75
  }
68
76
 
77
+ // ---------------------------------------------------------------------------
78
+ // Index entry — lightweight lookup record persisted in runs/index.json.
79
+ // Enables listRuns to find files without a full directory scan. Every
80
+ // non-terminal run and every terminal run within the retention window has an
81
+ // index entry; missing/stale entries are tolerated via degradation (rebuild).
82
+ // ---------------------------------------------------------------------------
83
+
84
+ export interface RunIndexEntry {
85
+ runId: string;
86
+ flowName: string;
87
+ status: RunState["status"];
88
+ createdAt: number;
89
+ updatedAt: number;
90
+ /** Path relative to runsRoot, e.g. "test-flow/test-roundtrip-001.json". */
91
+ relPath: string;
92
+ }
93
+
94
+ // ---------------------------------------------------------------------------
95
+ // File-lock constants
96
+ // ---------------------------------------------------------------------------
97
+
98
+ /** Lock file considered stale after 30 s (orphaned from crash / kill -9). */
99
+ const LOCK_STALE_MS = 30_000;
100
+ /** Lock acquisition busy-wait interval. */
101
+ const LOCK_POLL_MS = 50;
102
+ /** Default acquisition timeout before throwing. */
103
+ const LOCK_TIMEOUT_MS = 10_000;
104
+
105
+ // ---------------------------------------------------------------------------
106
+ // Cleanup throttle
107
+ // ---------------------------------------------------------------------------
108
+
109
+ /** Minimum ms between opportunistic cleanup runs (called inside saveRun). */
110
+ const CLEANUP_INTERVAL_MS = 60_000;
111
+ /** Retain at most this many terminal runs by default. */
112
+ const DEFAULT_MAX_KEPT_TERMINAL = 100;
113
+ /** Remove terminal runs older than this (days). */
114
+ const DEFAULT_MAX_AGE_DAYS = 30;
115
+
116
+ /** Last cleanup timestamp — module-level so it persists across calls. */
117
+ let lastCleanupAt = 0;
118
+
119
+ // ---------------------------------------------------------------------------
120
+ // Internal helpers — path construction & sanitisation
121
+ // ---------------------------------------------------------------------------
122
+
123
+ /**
124
+ * Sanitise a flow name into a safe directory name. Same regex used by
125
+ * saveFlow/newRunId — but that regex keeps `.` in its allow-list, so a
126
+ * flowName of "." or ".." would pass through unchanged and let `flowRunDir`
127
+ * resolve OUTSIDE the runs root (write-side path traversal). `def.name` is
128
+ * internally derived and TypeBox only enforces Type.String() with no charset,
129
+ * so a Taskflow literally named ".." is schema-valid. We therefore reject
130
+ * bare-dot / leading-dot components after the character substitution so the
131
+ * write path can never escape runs/ (risk-reviewer v0.0.9 audit, H1).
132
+ */
133
+ function safeFlowDirName(flowName: string): string {
134
+ let safe = flowName.replace(/[^\w.-]+/g, "_");
135
+ // Collapse leading dots: blocks ".", "..", and hidden-dir names like ".git".
136
+ safe = safe.replace(/^\.+/, "_");
137
+ return safe || "_";
138
+ }
139
+
140
+ /** Return the per-flow run directory: runs/<sanitisedFlowName>. */
141
+ function flowRunDir(runsRoot: string, flowName: string): string {
142
+ return path.join(runsRoot, safeFlowDirName(flowName));
143
+ }
144
+
145
+ /** Return the full path for a run file in the new subdirectory layout. */
146
+ function runFilePath(runsRoot: string, flowName: string, runId: string): string {
147
+ return path.join(flowRunDir(runsRoot, flowName), `${runId}.json`);
148
+ }
149
+
150
+ /** Return the path to the run index file. */
151
+ function indexPath(runsRoot: string): string {
152
+ return path.join(runsRoot, "index.json");
153
+ }
154
+
155
+ /** Return the lock-file path guarding all index.json read-modify-write cycles. */
156
+ function indexLockPath(runsRoot: string): string {
157
+ return path.join(runsRoot, "index.json.lock");
158
+ }
159
+
160
+ /** Return the lock-file path for a given runId (placed next to the run file). */
161
+ function lockPathForRun(runsRoot: string, flowName: string, runId: string): string {
162
+ return path.join(flowRunDir(runsRoot, flowName), `${runId}.json.lock`);
163
+ }
164
+
165
+ /**
166
+ * Validate that a runId looks safe before performing any filesystem access.
167
+ * Legitimate runIds are produced by newRunId() and contain only [A-Za-z0-9._-].
168
+ */
169
+ function validateRunId(runId: string): boolean {
170
+ return (
171
+ typeof runId === "string" &&
172
+ runId.length > 0 &&
173
+ !runId.includes("/") &&
174
+ !runId.includes("\\") &&
175
+ !runId.includes("\0")
176
+ );
177
+ }
178
+
179
+ // ---------------------------------------------------------------------------
180
+ // File-lock primitives — zero-dependency, using O_CREAT|O_EXCL (atomic)
181
+ // ---------------------------------------------------------------------------
182
+
183
+ /**
184
+ * Acquire a file lock by atomically creating a lock file.
185
+ *
186
+ * Uses O_CREAT|O_EXCL (`wx` flag) which is atomic on POSIX and NTFS.
187
+ * Stale locks (> LOCK_STALE_MS) are stolen via an atomic rename rather than a
188
+ * naive unlink-then-create: a plain `unlinkSync` + `openSync('wx')` has a
189
+ * TOCTOU window where two processes both unlink the same stale lock and both
190
+ * then create a fresh one, yielding two simultaneous holders (risk-reviewer
191
+ * v0.0.9 audit, L1). `rename` is atomic and removes the *specific* inode the
192
+ * caller observed: only one racing process can win the rename of that exact
193
+ * stale file, so at most one process proceeds to re-create the lock.
194
+ * Throws on timeout.
195
+ */
196
+ function acquireLock(lockPath: string, timeoutMs: number = LOCK_TIMEOUT_MS): void {
197
+ const start = Date.now();
198
+ // Ensure parent directory exists (lock file lives inside the flow subdir).
199
+ const dir = path.dirname(lockPath);
200
+ fs.mkdirSync(dir, { recursive: true });
201
+
202
+ while (true) {
203
+ try {
204
+ const fd = fs.openSync(lockPath, "wx");
205
+ fs.writeFileSync(fd, JSON.stringify({ pid: process.pid, ts: Date.now() }));
206
+ fs.closeSync(fd);
207
+ return; // lock acquired
208
+ } catch (e: unknown) {
209
+ if ((e as NodeJS.ErrnoException).code !== "EEXIST") throw e;
210
+ // Lock file exists — check if stale.
211
+ try {
212
+ const stat = fs.statSync(lockPath);
213
+ if (Date.now() - stat.mtimeMs > LOCK_STALE_MS) {
214
+ // Stale lock — steal it via atomic rename so only one racing
215
+ // stealer can win (L1). The "graveyard" name is unique per
216
+ // process+attempt; the winner unlinks it, losers see ENOENT
217
+ // on their own rename and simply retry the acquire loop.
218
+ const grave = `${lockPath}.stale.${process.pid}.${crypto.randomBytes(4).toString("hex")}`;
219
+ try {
220
+ fs.renameSync(lockPath, grave);
221
+ // We won the steal — discard the graveyard copy and retry
222
+ // the loop, where openSync('wx') will create a fresh lock.
223
+ try { fs.unlinkSync(grave); } catch { /* ignore */ }
224
+ } catch { /* lost the steal race (ENOENT) — just retry */ }
225
+ continue;
226
+ }
227
+ } catch {
228
+ // ENOENT: another process released it between openSync and statSync — retry.
229
+ continue;
230
+ }
231
+ // Lock is held and not stale — wait and retry.
232
+ if (Date.now() - start > timeoutMs) {
233
+ throw new Error(`Lock timeout after ${timeoutMs}ms waiting for ${path.basename(lockPath)}`);
234
+ }
235
+ // Busy-wait with Atomics.wait (CPU-efficient sleep).
236
+ Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, LOCK_POLL_MS);
237
+ }
238
+ }
239
+ }
240
+
241
+ /**
242
+ * Release a file lock by deleting the lock file. Ignores ENOENT (already
243
+ * released by another process or stolen due to staleness).
244
+ */
245
+ function releaseLock(lockPath: string): void {
246
+ try { fs.unlinkSync(lockPath); } catch { /* ENOENT or other — ignore */ }
247
+ }
248
+
249
+ /**
250
+ * Execute `fn` while holding a file lock. Guarantees release even on throw.
251
+ */
252
+ function withLock<T>(lockPath: string, fn: () => T): T {
253
+ acquireLock(lockPath);
254
+ try {
255
+ return fn();
256
+ } finally {
257
+ releaseLock(lockPath);
258
+ }
259
+ }
260
+
261
+ // ---------------------------------------------------------------------------
262
+ // Index CRUD
263
+ // ---------------------------------------------------------------------------
264
+
265
+ /**
266
+ * Extract a RunIndexEntry from a RunState + computed relative path.
267
+ */
268
+ function extractIndexEntry(state: RunState, relPath: string): RunIndexEntry {
269
+ return {
270
+ runId: state.runId,
271
+ flowName: state.flowName,
272
+ status: state.status,
273
+ createdAt: state.createdAt,
274
+ updatedAt: state.updatedAt,
275
+ relPath,
276
+ };
277
+ }
278
+
279
+ /** Read the index file; return [] on any error (missing, corrupt, etc.). */
280
+ function readIndex(runsRoot: string): RunIndexEntry[] {
281
+ try {
282
+ const raw = fs.readFileSync(indexPath(runsRoot), "utf-8");
283
+ const parsed = JSON.parse(raw);
284
+ if (!Array.isArray(parsed)) return [];
285
+ // Validate each entry minimally.
286
+ return (parsed as RunIndexEntry[]).filter(
287
+ (e) => e && typeof e.runId === "string" && typeof e.relPath === "string",
288
+ );
289
+ } catch {
290
+ return [];
291
+ }
292
+ }
293
+
294
+ /** Write the full index atomically. */
295
+ function writeIndex(runsRoot: string, entries: RunIndexEntry[]): void {
296
+ writeFileAtomic(indexPath(runsRoot), JSON.stringify(entries, null, 2));
297
+ }
298
+
299
+ /** Upsert a single entry by runId (read → mutate → write). */
300
+ /**
301
+ * Upsert a single entry by runId (read → mutate → write).
302
+ *
303
+ * Guarded by a dedicated index lock so concurrent saveRun calls for *different*
304
+ * runIds (each holding only its own per-run lock) cannot interleave their
305
+ * read-modify-write of the shared index and lose each other's entries
306
+ * (risk-reviewer v0.0.9 audit, M1). The per-run lock protects the run file;
307
+ * this index lock protects the shared index.
308
+ */
309
+ function updateIndexEntry(runsRoot: string, entry: RunIndexEntry): void {
310
+ withLock(indexLockPath(runsRoot), () => {
311
+ const entries = readIndex(runsRoot);
312
+ const idx = entries.findIndex((e) => e.runId === entry.runId);
313
+ if (idx >= 0) {
314
+ entries[idx] = entry;
315
+ } else {
316
+ entries.push(entry);
317
+ }
318
+ writeIndex(runsRoot, entries);
319
+ });
320
+ }
321
+
322
+ // Note: removeIndexEntry is available but not currently called; cleanupTerminalRuns
323
+ // rewrites the full index instead. Kept as a comment for future use.
324
+
325
+ /**
326
+ * Scan all subdirectories + legacy flat files and rebuild the full index.
327
+ * Called when the index is missing or corrupt (self-healing).
328
+ *
329
+ * Deduplicates by runId: subdirectory entry wins over flat.
330
+ */
331
+ function rebuildIndex(runsRoot: string): RunIndexEntry[] {
332
+ const entries = new Map<string, RunIndexEntry>();
333
+
334
+ let dirs: string[];
335
+ try {
336
+ dirs = fs.readdirSync(runsRoot, { withFileTypes: true })
337
+ .filter((d) => d.isDirectory())
338
+ .map((d) => d.name);
339
+ } catch {
340
+ dirs = [];
341
+ }
342
+
343
+ // Scan per-flow subdirectories.
344
+ for (const dirName of dirs) {
345
+ const dirPath = path.join(runsRoot, dirName);
346
+ let files: string[];
347
+ try {
348
+ files = fs.readdirSync(dirPath).filter((f) => f.endsWith(".json") && !f.includes(".lock"));
349
+ } catch { continue; }
350
+
351
+ for (const file of files) {
352
+ try {
353
+ const raw = fs.readFileSync(path.join(dirPath, file), "utf-8");
354
+ const state = JSON.parse(raw) as RunState;
355
+ if (state && typeof state.runId === "string") {
356
+ entries.set(state.runId, extractIndexEntry(state, `${dirName}/${file}`));
357
+ }
358
+ } catch { /* skip corrupt */ }
359
+ }
360
+ }
361
+
362
+ // Scan legacy flat files (runs/*.json, skip index.json).
363
+ let flatFiles: string[];
364
+ try {
365
+ flatFiles = fs.readdirSync(runsRoot).filter(
366
+ (f) => f.endsWith(".json") && f !== "index.json" && !f.includes(".lock"),
367
+ );
368
+ } catch {
369
+ flatFiles = [];
370
+ }
371
+
372
+ for (const file of flatFiles) {
373
+ if (entries.has(file.replace(/\.json$/, ""))) continue; // prefer subdir entry
374
+ try {
375
+ const raw = fs.readFileSync(path.join(runsRoot, file), "utf-8");
376
+ const state = JSON.parse(raw) as RunState;
377
+ if (state && typeof state.runId === "string" && !entries.has(state.runId)) {
378
+ entries.set(state.runId, extractIndexEntry(state, file));
379
+ }
380
+ } catch { /* skip corrupt */ }
381
+ }
382
+
383
+ const result = Array.from(entries.values());
384
+ // Persist the rebuilt index under the index lock so it does not race a
385
+ // concurrent updateIndexEntry / cleanup write (M1).
386
+ withLock(indexLockPath(runsRoot), () => writeIndex(runsRoot, result));
387
+ return result;
388
+ }
389
+
390
+ // ---------------------------------------------------------------------------
391
+ // TTL / cap cleanup
392
+ // ---------------------------------------------------------------------------
393
+
394
+ /**
395
+ * Remove excess and expired terminal (completed/failed) runs.
396
+ *
397
+ * Called opportunistically at the end of saveRun. Throttled to at most once
398
+ * per CLEANUP_INTERVAL_MS. Active runs (running/paused/blocked) are never
399
+ * touched.
400
+ *
401
+ * The index read-modify-write is performed under the index lock so it cannot
402
+ * race a concurrent updateIndexEntry and clobber a freshly-added entry (M1).
403
+ * We re-read the index *inside* the lock (rather than trusting a snapshot read
404
+ * before locking) so the rewrite reflects the latest committed state. File and
405
+ * directory unlinks happen after the lock is released to keep the critical
406
+ * section short; deleting a file that is no longer in the index is harmless.
407
+ */
408
+ function cleanupTerminalRuns(
409
+ runsRoot: string,
410
+ maxKeep: number = DEFAULT_MAX_KEPT_TERMINAL,
411
+ maxAgeDays: number = DEFAULT_MAX_AGE_DAYS,
412
+ ): void {
413
+ const now = Date.now();
414
+ if (now - lastCleanupAt < CLEANUP_INTERVAL_MS) return;
415
+ lastCleanupAt = now;
416
+
417
+ const maxAgeMs = maxAgeDays * 86_400_000;
418
+ let toRemove: RunIndexEntry[] = [];
419
+
420
+ withLock(indexLockPath(runsRoot), () => {
421
+ const entries = readIndex(runsRoot);
422
+ const terminal: RunIndexEntry[] = [];
423
+ const active: RunIndexEntry[] = [];
424
+
425
+ for (const e of entries) {
426
+ if (e.status === "completed" || e.status === "failed") {
427
+ terminal.push(e);
428
+ } else {
429
+ active.push(e);
430
+ }
431
+ }
432
+
433
+ // Sort terminal by updatedAt desc (newest first).
434
+ terminal.sort((a, b) => b.updatedAt - a.updatedAt);
435
+
436
+ for (let i = 0; i < terminal.length; i++) {
437
+ const e = terminal[i]!;
438
+ const expiredByAge = now - e.updatedAt > maxAgeMs;
439
+ const excessByCount = i >= maxKeep;
440
+ if (expiredByAge || excessByCount) {
441
+ toRemove.push(e);
442
+ }
443
+ }
444
+
445
+ if (toRemove.length === 0) return;
446
+
447
+ // Commit the pruned index while holding the lock so a concurrent
448
+ // updateIndexEntry cannot interleave and lose entries.
449
+ const remaining = terminal.filter((e) => !toRemove.includes(e));
450
+ writeIndex(runsRoot, [...active, ...remaining]);
451
+ });
452
+
453
+ if (toRemove.length === 0) return;
454
+
455
+ // Delete run files + lock files (outside the index lock).
456
+ for (const e of toRemove) {
457
+ const filePath = path.join(runsRoot, e.relPath);
458
+ try { fs.unlinkSync(filePath); } catch { /* already gone */ }
459
+ // Also remove any orphaned lock file.
460
+ try { fs.unlinkSync(filePath + ".lock"); } catch { /* ignore */ }
461
+ }
462
+
463
+ // Remove empty flow subdirectories.
464
+ for (const e of toRemove) {
465
+ const dirPath = path.dirname(path.join(runsRoot, e.relPath));
466
+ try { fs.rmdirSync(dirPath); } catch { /* ENOTEMPTY or ENOENT — ignore */ }
467
+ }
468
+ }
469
+
470
+ // ---------------------------------------------------------------------------
471
+ // Original helpers (unchanged)
472
+ // ---------------------------------------------------------------------------
473
+
69
474
  function userFlowsDir(): string {
70
475
  return path.join(getAgentDir(), "taskflows");
71
476
  }
@@ -160,90 +565,172 @@ export function newRunId(flowName: string): string {
160
565
  return `${safe}-${Date.now().toString(36)}-${crypto.randomBytes(3).toString("hex")}`;
161
566
  }
162
567
 
568
+ /**
569
+ * Persist a run state to disk.
570
+ *
571
+ * v0.0.9: writes to `runs/<sanitisedFlowName>/<runId>.json` (per-flow
572
+ * subdirectory) and updates the lightweight index. Uses a per-run file lock
573
+ * to prevent concurrent writes to the same runId. After the write, runs
574
+ * opportunistic cleanup of expired terminal runs.
575
+ *
576
+ * F-009: shallow-clones state before stamping updatedAt to avoid mutating the
577
+ * caller's reference.
578
+ */
163
579
  export function saveRun(state: RunState): void {
164
- const dir = runsDir(state.cwd);
165
- fs.mkdirSync(dir, { recursive: true });
580
+ const root = runsDir(state.cwd);
581
+ const flowDir = flowRunDir(root, state.flowName);
582
+ fs.mkdirSync(flowDir, { recursive: true });
583
+
166
584
  // Clone before stamping updatedAt so the caller's RunState reference is not
167
585
  // mutated as a hidden side effect (v0.0.6 audit, F-009). Shallow clone is
168
586
  // sufficient: saveRun only serializes; it does not mutate nested objects.
169
587
  const toSave = { ...state, updatedAt: Date.now() };
170
- writeFileAtomic(path.join(dir, `${state.runId}.json`), JSON.stringify(toSave, null, 2));
588
+ const filePath = runFilePath(root, state.flowName, state.runId);
589
+ const lockPath = lockPathForRun(root, state.flowName, state.runId);
590
+
591
+ withLock(lockPath, () => {
592
+ writeFileAtomic(filePath, JSON.stringify(toSave, null, 2));
593
+ updateIndexEntry(root, extractIndexEntry(toSave, path.basename(flowDir) + "/" + path.basename(filePath)));
594
+ });
595
+
596
+ // Opportunistic cleanup — throttled to once per CLEANUP_INTERVAL_MS.
597
+ cleanupTerminalRuns(root);
171
598
  }
172
599
 
600
+ /**
601
+ * Load a single run by runId.
602
+ *
603
+ * Lookup chain (fast → slow):
604
+ * 1. INDEX — read index.json, find entry with matching runId, read via relPath.
605
+ * 2. SUBDIR SCAN — for each subdirectory in runsDir, check <subdir>/<runId>.json.
606
+ * 3. FLAT FALLBACK — check runsDir/<runId>.json directly (legacy layout).
607
+ *
608
+ * All existing path-traversal, symlink, and realpath guards are preserved for
609
+ * every path touched.
610
+ */
173
611
  export function loadRun(cwd: string, runId: string): RunState | null {
174
- const dir = runsDir(cwd);
175
-
176
- // Reject runIds that could be used for path traversal or filesystem abuse.
177
- // Legitimate runIds are produced by newRunId() and contain only
178
- // [A-Za-z0-9._-]; anything else (empty string, path separators, NUL bytes,
179
- // backslashes on POSIX, forward slashes on Windows) is suspicious.
180
- if (
181
- typeof runId !== "string" ||
182
- runId.length === 0 ||
183
- runId.includes("/") ||
184
- runId.includes("\\") ||
185
- runId.includes("\0")
186
- ) {
187
- return null;
612
+ if (!validateRunId(runId)) return null;
613
+
614
+ const root = runsDir(cwd);
615
+
616
+ // ---- Try index first ----
617
+ const indexEntries = readIndex(root);
618
+ const entry = indexEntries.find((e) => e.runId === runId);
619
+ if (entry) {
620
+ const filePath = path.join(root, entry.relPath);
621
+ const state = tryReadRunFile(root, filePath);
622
+ if (state) return state;
623
+ // Index entry exists but file is gone or corrupt — fall through.
188
624
  }
189
625
 
190
- const filePath = path.resolve(dir, `${runId}.json`);
191
- // Reject runIds that would escape the runs directory (e.g. "../etc/passwd").
192
- // Compare with a path-separator suffix so legitimate filenames like "..foo"
193
- // (a name that just happens to start with two dots) are not false-positives.
194
- const rel = path.relative(dir, filePath);
626
+ // ---- Try subdirectory scan ----
627
+ let dirs: string[];
628
+ try {
629
+ dirs = fs.readdirSync(root, { withFileTypes: true })
630
+ .filter((d) => d.isDirectory())
631
+ .map((d) => d.name);
632
+ } catch { dirs = []; }
633
+
634
+ for (const dirName of dirs) {
635
+ const filePath = path.join(root, dirName, `${runId}.json`);
636
+ const state = tryReadRunFile(root, filePath);
637
+ if (state) return state;
638
+ }
639
+
640
+ // ---- Try legacy flat fallback ----
641
+ const flatPath = path.join(root, `${runId}.json`);
642
+ const state = tryReadRunFile(root, flatPath);
643
+ if (state) return state;
644
+
645
+ return null;
646
+ }
647
+
648
+ /**
649
+ * Safely read a run file, performing all path-traversal / symlink guards.
650
+ * Returns null on any violation or read error.
651
+ */
652
+ function tryReadRunFile(runsRoot: string, filePath: string): RunState | null {
653
+ // Lexical traversal guard.
654
+ const rel = path.relative(runsRoot, filePath);
195
655
  if (rel === ".." || rel.startsWith(`..${path.sep}`) || path.isAbsolute(rel)) return null;
196
656
 
197
- // Resolve symlinks on both the runs dir and the file, so the containment
198
- // check below is on a consistent physical path. Without normalizing `dir`,
199
- // a legitimate run on macOS (where /var → /private/var) would compare a
200
- // symlinked dir prefix to a real path and falsely flag traversal. A
201
- // malicious file already placed inside the runs dir could otherwise also
202
- // point at an arbitrary path on disk and bypass the lexical check above.
657
+ // Resolve symlinks on both runsRoot and the file so the containment check
658
+ // uses consistent physical paths (macOS /var /private/var etc.).
203
659
  let realDir: string;
204
660
  let realFilePath: string;
205
661
  try {
206
- realDir = fs.realpathSync(dir);
662
+ realDir = fs.realpathSync(runsRoot);
207
663
  realFilePath = fs.realpathSync(filePath);
208
- } catch {
209
- return null;
210
- }
664
+ } catch { return null; }
665
+
211
666
  const realRel = path.relative(realDir, realFilePath);
212
667
  if (realRel === ".." || realRel.startsWith(`..${path.sep}`) || path.isAbsolute(realRel)) return null;
213
668
 
214
669
  try {
215
670
  const raw = fs.readFileSync(realFilePath, "utf-8");
216
671
  return JSON.parse(raw) as RunState;
217
- } catch {
218
- return null;
219
- }
672
+ } catch { return null; }
220
673
  }
221
674
 
675
+ /**
676
+ * List recent runs, sorted by updatedAt descending.
677
+ *
678
+ * v0.0.9: reads from index first, then merges any legacy flat files not yet in
679
+ * the index. If the index is missing/corrupt, calls rebuildIndex for
680
+ * self-healing.
681
+ *
682
+ * F-010: drops records with non-numeric/NaN updatedAt before sorting.
683
+ */
222
684
  export function listRuns(cwd: string, limit = 20): RunState[] {
223
- const dir = runsDir(cwd);
224
- if (!fs.existsSync(dir)) return [];
225
- let files: string[];
685
+ const root = runsDir(cwd);
686
+ if (!fs.existsSync(root)) return [];
687
+
688
+ // Index-first path.
689
+ let entries = readIndex(root);
690
+ if (entries.length === 0) {
691
+ // Index missing or corrupt — rebuild from filesystem.
692
+ entries = rebuildIndex(root);
693
+ }
694
+
695
+ // Collect runIds from index for deduplication.
696
+ const indexRunIds = new Set(entries.map((e) => e.runId));
697
+
698
+ // Merge legacy flat files not yet in the index.
699
+ let flatFiles: string[];
226
700
  try {
227
- files = fs.readdirSync(dir).filter((f) => f.endsWith(".json"));
228
- } catch {
229
- return [];
701
+ flatFiles = fs.readdirSync(root).filter(
702
+ (f) => f.endsWith(".json") && f !== "index.json" && !f.includes(".lock"),
703
+ );
704
+ } catch { flatFiles = []; }
705
+
706
+ for (const file of flatFiles) {
707
+ const runIdFromName = file.replace(/\.json$/, "");
708
+ if (indexRunIds.has(runIdFromName)) continue;
709
+ try {
710
+ const raw = fs.readFileSync(path.join(root, file), "utf-8");
711
+ const state = JSON.parse(raw) as RunState;
712
+ if (state && typeof state.runId === "string" && !indexRunIds.has(state.runId)) {
713
+ entries.push(extractIndexEntry(state, file));
714
+ indexRunIds.add(state.runId);
715
+ }
716
+ } catch { /* skip corrupt */ }
230
717
  }
718
+
719
+ // Sort by updatedAt desc, slice to limit.
720
+ entries.sort((a, b) => b.updatedAt - a.updatedAt);
721
+ const sliced = entries.slice(0, limit);
722
+
723
+ // Read full RunState for each entry.
231
724
  const runs: RunState[] = [];
232
- for (const f of files) {
725
+ for (const e of sliced) {
233
726
  try {
234
- runs.push(JSON.parse(fs.readFileSync(path.join(dir, f), "utf-8")));
235
- } catch {
236
- /* ignore */
237
- }
727
+ const raw = fs.readFileSync(path.join(root, e.relPath), "utf-8");
728
+ runs.push(JSON.parse(raw) as RunState);
729
+ } catch { /* file may have been deleted since index was built — skip */ }
238
730
  }
239
- // Guard against records missing/with non-numeric `updatedAt` — a bare
240
- // `JSON.parse` may yield an object without it, and `undefined - undefined`
241
- // is NaN, which makes `Array.prototype.sort` produce implementation-defined
242
- // order. Drop those before sorting. (v0.0.8 audit, F-010.)
243
- return runs
244
- .filter((r) => typeof r.updatedAt === "number" && !Number.isNaN(r.updatedAt))
245
- .sort((a, b) => b.updatedAt - a.updatedAt)
246
- .slice(0, limit);
731
+
732
+ // F-010: filter out records with non-numeric/NaN updatedAt.
733
+ return runs.filter((r) => typeof r.updatedAt === "number" && !Number.isNaN(r.updatedAt));
247
734
  }
248
735
 
249
736
  /** Stable hash of a phase's resolved task + inputs, for resume caching. */
@@ -257,6 +744,8 @@ export function hashInput(...parts: string[]): string {
257
744
  * a crash or concurrent write from leaving a half-written, corrupt JSON file.
258
745
  */
259
746
  function writeFileAtomic(filePath: string, data: string): void {
747
+ // Ensure parent directory exists.
748
+ fs.mkdirSync(path.dirname(filePath), { recursive: true });
260
749
  const tmp = `${filePath}.${process.pid}.${crypto.randomBytes(4).toString("hex")}.tmp`;
261
750
  try {
262
751
  fs.writeFileSync(tmp, data, "utf-8");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-taskflow",
3
- "version": "0.0.9",
3
+ "version": "0.0.10",
4
4
  "description": "Lightweight workflow orchestration for the Pi coding agent — declarative multi-phase taskflows with dynamic fan-out, isolated subagent context, resumable runs, and saveable commands.",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -106,7 +106,7 @@ routing. Use `join: "any"` on the merge phase so it runs whichever branch fired:
106
106
  { "id": "triage", "type": "agent", "agent": "analyst", "output": "json",
107
107
  "task": "Classify the task. Output ONLY {\"route\":\"deep\"} or {\"route\":\"quick\"}." },
108
108
  { "id": "deep", "when": "{steps.triage.json.route} == deep", "dependsOn": ["triage"], "agent": "analyst", "task": "..." },
109
- { "id": "quick", "when": "{steps.triage.json.route} == quick", "dependsOn": ["triage"], "agent": "executor_fast", "task": "..." },
109
+ { "id": "quick", "when": "{steps.triage.json.route} == quick", "dependsOn": ["triage"], "agent": "executor-fast", "task": "..." },
110
110
  { "id": "report", "type": "reduce", "from": ["deep","quick"], "join": "any",
111
111
  "dependsOn": ["deep","quick"], "agent": "writer", "task": "...", "final": true }
112
112
  ```