pi-taskflow 0.0.22 → 0.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,447 @@
1
+ /**
2
+ * Shared Context Tree — the file-based blackboard + supervision-tree store.
3
+ *
4
+ * This module is the IPC substrate that lets isolated subagent processes share
5
+ * context with each other (a horizontal blackboard) and report results upward
6
+ * so a parent can react (a vertical supervision tree). It deliberately reuses
7
+ * the SAME atomic-write + file-lock primitives as the run store (`store.ts`),
8
+ * so it inherits the project's "all file ops are atomic" invariant for free.
9
+ *
10
+ * On-disk layout, rooted at PI_TASKFLOW_CTX_DIR (one directory per run):
11
+ *
12
+ * <ctxDir>/
13
+ * ├── tree.json the node tree (who spawned whom + status)
14
+ * ├── tree.json.lock lock guarding tree.json RMW cycles
15
+ * ├── findings/
16
+ * │ ├── <nodeId>.json findings written by one node (last-write-wins per key)
17
+ * │ └── <nodeId>.json.lock
18
+ * ├── reports/
19
+ * │ └── <nodeId>.json a node's upward report ({summary, structured?})
20
+ * └── pending/
21
+ * └── <nodeId>-<seq>.json a ctx_spawn intent the runtime will pick up
22
+ *
23
+ * Why per-node findings files (not one shared findings.json): sibling subagents
24
+ * run concurrently. Giving each node its OWN file means concurrent writers never
25
+ * contend on the same lock — a node only locks its own file. A reader unions the
26
+ * relevant nodes' files (its ancestors + completed siblings). This is the same
27
+ * "shard by writer" trick the run index uses to avoid a global write bottleneck.
28
+ */
29
+
30
+ import * as crypto from "node:crypto";
31
+ import * as fs from "node:fs";
32
+ import * as path from "node:path";
33
+ import { validateRunId, withLock, writeFileAtomic } from "./store.ts";
34
+
35
+ // ---------------------------------------------------------------------------
36
+ // Guards (size + key charset). A subagent is untrusted input from the LLM's
37
+ // point of view; cap what it can write so a runaway tool call can't fill the
38
+ // disk or smuggle a path-traversal key.
39
+ // ---------------------------------------------------------------------------
40
+
41
+ /** Max bytes for a single findings value (after JSON.stringify). */
42
+ export const MAX_VALUE_BYTES = 256 * 1024; // 256 KB
43
+ /** Max bytes for a single report summary string. */
44
+ export const MAX_REPORT_BYTES = 256 * 1024;
45
+ /** Max bytes for a report's structured payload (after JSON.stringify). */
46
+ export const MAX_STRUCTURED_BYTES = 256 * 1024;
47
+ /** Max bytes for a single ctx_spawn task prompt. */
48
+ export const MAX_TASK_BYTES = 64 * 1024;
49
+ /** Max number of keys one node may write. */
50
+ export const MAX_KEYS_PER_NODE = 256;
51
+ /** Max assignments a single ctx_spawn call may queue. */
52
+ export const MAX_SPAWN_ASSIGNMENTS = 16;
53
+ /** Max bytes for a single ctx_spawn `subflow` payload (after JSON.stringify). */
54
+ export const MAX_SUBFLOW_BYTES = 256 * 1024; // 256 KB
55
+
56
+ /** A findings/report key must be a short, traversal-safe token. */
57
+ const KEY_RE = /^[A-Za-z0-9._-]{1,128}$/;
58
+
59
+ export function isValidKey(key: string): boolean {
60
+ return typeof key === "string" && KEY_RE.test(key) && !key.includes("..");
61
+ }
62
+
63
+ // ---------------------------------------------------------------------------
64
+ // Types
65
+ // ---------------------------------------------------------------------------
66
+
67
+ export type NodeStatus = "running" | "done" | "failed";
68
+
69
+ /** One node in the supervision tree (one subagent task). */
70
+ export interface TreeNode {
71
+ nodeId: string;
72
+ phaseId: string;
73
+ parentNodeId?: string;
74
+ status: NodeStatus;
75
+ createdAt: number;
76
+ updatedAt: number;
77
+ }
78
+
79
+ export interface ContextTree {
80
+ nodes: TreeNode[];
81
+ }
82
+
83
+ /** A node's findings — a flat string→JSON map. Last-write-wins per key. */
84
+ export type FindingsMap = Record<string, unknown>;
85
+
86
+ export interface NodeReport {
87
+ nodeId: string;
88
+ summary: string;
89
+ structured?: unknown;
90
+ at: number;
91
+ }
92
+
93
+ /**
94
+ * A queued ctx_spawn intent, picked up by the runtime after the node finishes.
95
+ * Each assignment is EITHER a flat task OR an inline sub-flow (DAG) — never both.
96
+ *
97
+ * - `task` : a single prompt string (the agent named by `agent` runs it).
98
+ * - `subflow` : an inline Taskflow ({phases:[...]} or a bare phases array)
99
+ * the runtime validates + runs as a nested sub-flow. Inner
100
+ * phases without their own `agent` fall back to `defaultAgent`.
101
+ *
102
+ * `agent` (flat) means "who executes this task"; `defaultAgent` (subflow) means
103
+ * "fallback agent for inner phases" — different semantics, hence different fields.
104
+ */
105
+ export interface SpawnAssignment {
106
+ task?: string;
107
+ agent?: string;
108
+ subflow?: unknown;
109
+ defaultAgent?: string;
110
+ }
111
+
112
+ export interface PendingSpawn {
113
+ parentNodeId: string;
114
+ assignments: SpawnAssignment[];
115
+ at: number;
116
+ }
117
+
118
+ // ---------------------------------------------------------------------------
119
+ // Path helpers
120
+ // ---------------------------------------------------------------------------
121
+
122
+ function treePath(ctxDir: string): string {
123
+ return path.join(ctxDir, "tree.json");
124
+ }
125
+ function treeLockPath(ctxDir: string): string {
126
+ return path.join(ctxDir, "tree.json.lock");
127
+ }
128
+ function findingsDir(ctxDir: string): string {
129
+ return path.join(ctxDir, "findings");
130
+ }
131
+ function findingsPath(ctxDir: string, nodeId: string): string {
132
+ return path.join(findingsDir(ctxDir), `${nodeId}.json`);
133
+ }
134
+ function findingsLockPath(ctxDir: string, nodeId: string): string {
135
+ return path.join(findingsDir(ctxDir), `${nodeId}.json.lock`);
136
+ }
137
+ function reportsDir(ctxDir: string): string {
138
+ return path.join(ctxDir, "reports");
139
+ }
140
+ function reportPath(ctxDir: string, nodeId: string): string {
141
+ return path.join(reportsDir(ctxDir), `${nodeId}.json`);
142
+ }
143
+ function pendingDir(ctxDir: string): string {
144
+ return path.join(ctxDir, "pending");
145
+ }
146
+
147
+ /** Build the per-run ctx directory path under a runs root. */
148
+ export function ctxDirFor(runsRoot: string, runId: string): string {
149
+ if (!validateRunId(runId)) throw new Error(`Unsafe runId for ctx dir: ${runId}`);
150
+ return path.join(runsRoot, "ctx", runId);
151
+ }
152
+
153
+ /**
154
+ * Ensure the ctx directory tree exists. Idempotent; safe to call repeatedly.
155
+ * Returns the same ctxDir for chaining.
156
+ */
157
+ export function initCtxDir(ctxDir: string): string {
158
+ fs.mkdirSync(ctxDir, { recursive: true });
159
+ fs.mkdirSync(findingsDir(ctxDir), { recursive: true });
160
+ fs.mkdirSync(reportsDir(ctxDir), { recursive: true });
161
+ fs.mkdirSync(pendingDir(ctxDir), { recursive: true });
162
+ return ctxDir;
163
+ }
164
+
165
+ // ---------------------------------------------------------------------------
166
+ // Tree
167
+ // ---------------------------------------------------------------------------
168
+
169
+ export function readTree(ctxDir: string): ContextTree {
170
+ try {
171
+ const raw = fs.readFileSync(treePath(ctxDir), "utf-8");
172
+ const parsed = JSON.parse(raw) as ContextTree;
173
+ if (parsed && Array.isArray(parsed.nodes)) return parsed;
174
+ } catch {
175
+ /* missing/corrupt → empty tree */
176
+ }
177
+ return { nodes: [] };
178
+ }
179
+
180
+ /**
181
+ * Register (or update) a node in the tree. IDEMPOTENT — upserts by nodeId so a
182
+ * resume that re-runs a phase does not duplicate tree entries (which would
183
+ * double-count ancestor findings). This is the C3 resume-safety fix.
184
+ */
185
+ export function registerNode(
186
+ ctxDir: string,
187
+ nodeId: string,
188
+ phaseId: string,
189
+ parentNodeId: string | undefined,
190
+ status: NodeStatus = "running",
191
+ ): void {
192
+ if (!validateRunId(nodeId)) throw new Error(`Unsafe nodeId: ${nodeId}`);
193
+ withLock(treeLockPath(ctxDir), () => {
194
+ const tree = readTree(ctxDir);
195
+ const now = Date.now();
196
+ const idx = tree.nodes.findIndex((n) => n.nodeId === nodeId);
197
+ if (idx >= 0) {
198
+ const existing = tree.nodes[idx]!;
199
+ tree.nodes[idx] = {
200
+ ...existing,
201
+ phaseId,
202
+ parentNodeId,
203
+ status,
204
+ updatedAt: now,
205
+ };
206
+ } else {
207
+ tree.nodes.push({ nodeId, phaseId, parentNodeId, status, createdAt: now, updatedAt: now });
208
+ }
209
+ writeFileAtomic(treePath(ctxDir), JSON.stringify(tree, null, 2));
210
+ });
211
+ }
212
+
213
+ export function setNodeStatus(ctxDir: string, nodeId: string, status: NodeStatus): void {
214
+ withLock(treeLockPath(ctxDir), () => {
215
+ const tree = readTree(ctxDir);
216
+ const node = tree.nodes.find((n) => n.nodeId === nodeId);
217
+ if (!node) return;
218
+ node.status = status;
219
+ node.updatedAt = Date.now();
220
+ writeFileAtomic(treePath(ctxDir), JSON.stringify(tree, null, 2));
221
+ });
222
+ }
223
+
224
+ /** Compute a node's depth (root = 0) by walking the parent chain. */
225
+ export function nodeDepth(tree: ContextTree, nodeId: string): number {
226
+ let depth = 0;
227
+ let current = tree.nodes.find((n) => n.nodeId === nodeId);
228
+ const seen = new Set<string>();
229
+ while (current?.parentNodeId && !seen.has(current.nodeId)) {
230
+ seen.add(current.nodeId);
231
+ depth++;
232
+ const parentId = current.parentNodeId;
233
+ current = tree.nodes.find((n) => n.nodeId === parentId);
234
+ }
235
+ return depth;
236
+ }
237
+
238
+ /** Return the ancestor chain nodeIds for a node (excluding itself), in nearest-first order (parent, grandparent, …). */
239
+ export function ancestorIds(tree: ContextTree, nodeId: string): string[] {
240
+ const out: string[] = [];
241
+ const seen = new Set<string>([nodeId]);
242
+ let current = tree.nodes.find((n) => n.nodeId === nodeId);
243
+ while (current?.parentNodeId && !seen.has(current.parentNodeId)) {
244
+ out.push(current.parentNodeId);
245
+ seen.add(current.parentNodeId);
246
+ const parentId = current.parentNodeId;
247
+ current = tree.nodes.find((n) => n.nodeId === parentId);
248
+ }
249
+ return out;
250
+ }
251
+
252
+ // ---------------------------------------------------------------------------
253
+ // Findings (the horizontal blackboard)
254
+ // ---------------------------------------------------------------------------
255
+
256
+ export function readNodeFindings(ctxDir: string, nodeId: string): FindingsMap {
257
+ if (!validateRunId(nodeId)) return {}; // defense-in-depth: never build a path from an unsafe id
258
+ try {
259
+ const raw = fs.readFileSync(findingsPath(ctxDir, nodeId), "utf-8");
260
+ const parsed = JSON.parse(raw) as FindingsMap;
261
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) return parsed;
262
+ } catch {
263
+ /* missing/corrupt → empty */
264
+ }
265
+ return {};
266
+ }
267
+
268
+ /**
269
+ * Write one finding (last-write-wins per key) into THIS node's findings file.
270
+ * Only locks the node's own file → concurrent siblings never contend.
271
+ * Throws on bad key / oversized value / too many keys (caller surfaces as tool error).
272
+ */
273
+ export function writeFinding(ctxDir: string, nodeId: string, key: string, value: unknown): void {
274
+ if (!validateRunId(nodeId)) throw new Error(`Unsafe nodeId: ${nodeId}`);
275
+ if (!isValidKey(key)) throw new Error(`Invalid finding key '${key}' (allowed: [A-Za-z0-9._-], <=128 chars, no '..').`);
276
+ const serialized = JSON.stringify(value ?? null);
277
+ if (Buffer.byteLength(serialized, "utf-8") > MAX_VALUE_BYTES) {
278
+ throw new Error(`Finding '${key}' exceeds ${MAX_VALUE_BYTES} bytes.`);
279
+ }
280
+ fs.mkdirSync(findingsDir(ctxDir), { recursive: true });
281
+ withLock(findingsLockPath(ctxDir, nodeId), () => {
282
+ const findings = readNodeFindings(ctxDir, nodeId);
283
+ if (!(key in findings) && Object.keys(findings).length >= MAX_KEYS_PER_NODE) {
284
+ throw new Error(`Node '${nodeId}' exceeds ${MAX_KEYS_PER_NODE} findings keys.`);
285
+ }
286
+ findings[key] = JSON.parse(serialized);
287
+ writeFileAtomic(findingsPath(ctxDir, nodeId), JSON.stringify(findings, null, 2));
288
+ });
289
+ }
290
+
291
+ /**
292
+ * Read the findings visible to a node: its OWN findings unioned with its
293
+ * ancestors' and all sibling/other nodes' findings that are already `done`.
294
+ * "done" visibility prevents reading a half-written blackboard from a sibling
295
+ * that is still running (eventual consistency: you see a sibling's findings
296
+ * once it has reported completion). The node's own findings are always visible.
297
+ *
298
+ * On key conflicts: nearer scope wins (own > ancestors > completed others),
299
+ * matching intuition that a node trusts its own/closer notes most.
300
+ *
301
+ * @param key optional — return only that key's value (or undefined).
302
+ */
303
+ export function readVisibleFindings(
304
+ ctxDir: string,
305
+ nodeId: string,
306
+ key?: string,
307
+ ): FindingsMap | unknown {
308
+ if (!validateRunId(nodeId)) return key !== undefined ? undefined : {};
309
+ const tree = readTree(ctxDir);
310
+ const ancestors = new Set(ancestorIds(tree, nodeId));
311
+ // Build layered maps; merge order = lowest priority first.
312
+ const completedOthers: FindingsMap = {};
313
+ const ancestorFindings: FindingsMap = {};
314
+ for (const n of tree.nodes) {
315
+ if (n.nodeId === nodeId) continue;
316
+ const f = readNodeFindings(ctxDir, n.nodeId);
317
+ if (ancestors.has(n.nodeId)) {
318
+ Object.assign(ancestorFindings, f);
319
+ } else if (n.status === "done") {
320
+ Object.assign(completedOthers, f);
321
+ }
322
+ }
323
+ const own = readNodeFindings(ctxDir, nodeId);
324
+ const merged: FindingsMap = { ...completedOthers, ...ancestorFindings, ...own };
325
+ if (key !== undefined) {
326
+ if (!isValidKey(key)) return undefined;
327
+ return merged[key];
328
+ }
329
+ return merged;
330
+ }
331
+
332
+ // ---------------------------------------------------------------------------
333
+ // Reports (the vertical upward channel)
334
+ // ---------------------------------------------------------------------------
335
+
336
+ export function writeReport(ctxDir: string, nodeId: string, summary: string, structured?: unknown): void {
337
+ if (!validateRunId(nodeId)) throw new Error(`Unsafe nodeId: ${nodeId}`);
338
+ if (Buffer.byteLength(String(summary ?? ""), "utf-8") > MAX_REPORT_BYTES) {
339
+ throw new Error(`Report summary exceeds ${MAX_REPORT_BYTES} bytes.`);
340
+ }
341
+ if (structured !== undefined && Buffer.byteLength(JSON.stringify(structured ?? null), "utf-8") > MAX_STRUCTURED_BYTES) {
342
+ throw new Error(`Report 'structured' payload exceeds ${MAX_STRUCTURED_BYTES} bytes.`);
343
+ }
344
+ fs.mkdirSync(reportsDir(ctxDir), { recursive: true });
345
+ // No lock: each node owns its own report file and is a single process, so the
346
+ // pure-overwrite writeFileAtomic is race-free here (unlike findings, which do
347
+ // read-modify-write and therefore lock).
348
+ const report: NodeReport = { nodeId, summary: String(summary ?? ""), structured, at: Date.now() };
349
+ writeFileAtomic(reportPath(ctxDir, nodeId), JSON.stringify(report, null, 2));
350
+ }
351
+
352
+ export function readReport(ctxDir: string, nodeId: string): NodeReport | undefined {
353
+ if (!validateRunId(nodeId)) return undefined;
354
+ try {
355
+ const raw = fs.readFileSync(reportPath(ctxDir, nodeId), "utf-8");
356
+ const parsed = JSON.parse(raw) as NodeReport;
357
+ if (parsed && typeof parsed.summary === "string") return parsed;
358
+ } catch {
359
+ /* none */
360
+ }
361
+ return undefined;
362
+ }
363
+
364
+ // ---------------------------------------------------------------------------
365
+ // Pending spawns (ctx_spawn intents → runtime supervision loop)
366
+ // ---------------------------------------------------------------------------
367
+
368
+ /** Queue a ctx_spawn intent. Each call writes a unique file the runtime picks up. */
369
+ export function queueSpawn(ctxDir: string, parentNodeId: string, assignments: SpawnAssignment[]): number {
370
+ if (!validateRunId(parentNodeId)) throw new Error(`Unsafe nodeId: ${parentNodeId}`);
371
+ if (!Array.isArray(assignments) || assignments.length === 0) {
372
+ throw new Error("ctx_spawn requires a non-empty assignments array.");
373
+ }
374
+ if (assignments.length > MAX_SPAWN_ASSIGNMENTS) {
375
+ throw new Error(`ctx_spawn limited to ${MAX_SPAWN_ASSIGNMENTS} assignments per call.`);
376
+ }
377
+ const clean: SpawnAssignment[] = assignments.map((a) => {
378
+ if (!a || typeof a !== "object") {
379
+ throw new Error("Each ctx_spawn assignment must be an object with 'task' or 'subflow'.");
380
+ }
381
+ const hasTask = typeof a.task === "string" && a.task.trim().length > 0;
382
+ const hasSubflow = a.subflow !== undefined && a.subflow !== null;
383
+ // XOR: exactly one of task / subflow. Check subflow first so a pure-subflow
384
+ // assignment (no `task`) is never rejected by the task-required branch.
385
+ if (hasSubflow) {
386
+ if (hasTask) {
387
+ throw new Error("A ctx_spawn assignment has both 'task' and 'subflow' — provide exactly one.");
388
+ }
389
+ const bytes = Buffer.byteLength(JSON.stringify(a.subflow), "utf-8");
390
+ if (bytes > MAX_SUBFLOW_BYTES) {
391
+ throw new Error(`ctx_spawn subflow exceeds ${MAX_SUBFLOW_BYTES} bytes.`);
392
+ }
393
+ return { subflow: a.subflow, defaultAgent: typeof a.defaultAgent === "string" ? a.defaultAgent : undefined };
394
+ }
395
+ if (hasTask) {
396
+ if (Buffer.byteLength(a.task as string, "utf-8") > MAX_TASK_BYTES) {
397
+ throw new Error(`ctx_spawn task exceeds ${MAX_TASK_BYTES} bytes.`);
398
+ }
399
+ return { task: a.task as string, agent: typeof a.agent === "string" ? a.agent : undefined };
400
+ }
401
+ throw new Error("Each ctx_spawn assignment needs exactly one of 'task' (non-empty string) or 'subflow' (object).");
402
+ });
403
+ fs.mkdirSync(pendingDir(ctxDir), { recursive: true });
404
+ // Unique per call: time + crypto-random so two concurrent queueSpawn calls
405
+ // from the same parent in the same ms cannot collide (and overwrite).
406
+ const seq = `${Date.now().toString(36)}-${crypto.randomBytes(6).toString("hex")}`;
407
+ const payload: PendingSpawn = { parentNodeId, assignments: clean, at: Date.now() };
408
+ writeFileAtomic(path.join(pendingDir(ctxDir), `${parentNodeId}-${seq}.json`), JSON.stringify(payload, null, 2));
409
+ return clean.length;
410
+ }
411
+
412
+ /**
413
+ * Drain (read + delete) all pending spawn intents queued by a parent node.
414
+ * Returns the flattened assignment list. Used by the runtime supervision loop
415
+ * after a node's subagent finishes.
416
+ *
417
+ * INVARIANT: only call this AFTER the parent subagent process has exited. The
418
+ * read-then-unlink is not directory-locked, so a concurrent queueSpawn from a
419
+ * still-running parent could be missed. The runtime drains post-exit, so no
420
+ * concurrent writer exists.
421
+ */
422
+ export function drainPendingSpawns(ctxDir: string, parentNodeId: string): SpawnAssignment[] {
423
+ if (!validateRunId(parentNodeId)) return [];
424
+ const dir = pendingDir(ctxDir);
425
+ let files: string[];
426
+ try {
427
+ files = fs.readdirSync(dir).filter((f) => f.startsWith(`${parentNodeId}-`) && f.endsWith(".json"));
428
+ } catch {
429
+ return [];
430
+ }
431
+ const out: SpawnAssignment[] = [];
432
+ for (const f of files.sort()) {
433
+ const full = path.join(dir, f);
434
+ try {
435
+ const parsed = JSON.parse(fs.readFileSync(full, "utf-8")) as PendingSpawn;
436
+ if (parsed && Array.isArray(parsed.assignments)) out.push(...parsed.assignments);
437
+ } catch {
438
+ /* skip corrupt */
439
+ }
440
+ try {
441
+ fs.unlinkSync(full);
442
+ } catch {
443
+ /* already gone */
444
+ }
445
+ }
446
+ return out;
447
+ }