pi-taskflow 0.0.22 → 0.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +77 -0
- package/README.md +174 -46
- package/extensions/context-store.ts +447 -0
- package/extensions/index.ts +135 -0
- package/extensions/runner.ts +96 -3
- package/extensions/runtime.ts +310 -13
- package/extensions/schema.ts +34 -6
- package/extensions/store.ts +17 -4
- package/extensions/workspace.ts +206 -0
- package/package.json +6 -2
- package/skills/taskflow/SKILL.md +104 -0
package/extensions/schema.ts
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import * as path from "node:path";
|
|
9
9
|
import { StringEnum } from "@earendil-works/pi-ai";
|
|
10
10
|
import { Type, type Static } from "typebox";
|
|
11
|
+
import { WORKSPACE_KEYWORDS } from "./workspace.ts";
|
|
11
12
|
|
|
12
13
|
// ---------------------------------------------------------------------------
|
|
13
14
|
// Phase types
|
|
@@ -208,7 +209,7 @@ const PhaseSchema = Type.Object(
|
|
|
208
209
|
model: Type.Optional(Type.String({ description: "Model override for this phase" })),
|
|
209
210
|
thinking: Type.Optional(Type.String({ description: "Thinking level override for this phase" })),
|
|
210
211
|
tools: Type.Optional(Type.Array(Type.String(), { description: "Restrict tools for this phase's agent" })),
|
|
211
|
-
cwd: Type.Optional(Type.String({ description: "Working directory for this phase's subagent" })),
|
|
212
|
+
cwd: Type.Optional(Type.String({ description: "Working directory for this phase's subagent. A literal path, or a reserved keyword: 'temp' (ephemeral dir, removed after the phase), 'dedicated' (persistent dir under the run state, kept), or 'worktree' (a git worktree on a throwaway branch, removed after the phase)." })),
|
|
212
213
|
final: Type.Optional(Type.Boolean({ description: "Mark this phase's output as the workflow result" })),
|
|
213
214
|
optional: Type.Optional(
|
|
214
215
|
Type.Boolean({ description: "If true, a failure does not abort the run", default: false }),
|
|
@@ -240,6 +241,12 @@ const PhaseSchema = Type.Object(
|
|
|
240
241
|
}),
|
|
241
242
|
),
|
|
242
243
|
cache: Type.Optional(CacheSchema),
|
|
244
|
+
shareContext: Type.Optional(
|
|
245
|
+
Type.Boolean({
|
|
246
|
+
description:
|
|
247
|
+
"Opt into the Shared Context Tree for this phase: the subagent gets ctx_read/ctx_write (a blackboard shared with siblings/ancestors, to avoid re-reading files) and ctx_report/ctx_spawn (report upward + queue child tasks the runtime picks up). Default false — existing flows are unaffected.",
|
|
248
|
+
}),
|
|
249
|
+
),
|
|
243
250
|
},
|
|
244
251
|
{ additionalProperties: false },
|
|
245
252
|
);
|
|
@@ -271,6 +278,12 @@ export const TaskflowSchema = Type.Object(
|
|
|
271
278
|
default: false,
|
|
272
279
|
}),
|
|
273
280
|
),
|
|
281
|
+
contextSharing: Type.Optional(
|
|
282
|
+
Type.Boolean({
|
|
283
|
+
description:
|
|
284
|
+
"Enable the Shared Context Tree for ALL phases in this flow (shorthand for setting shareContext on every phase). Default false.",
|
|
285
|
+
}),
|
|
286
|
+
),
|
|
274
287
|
phases: Type.Array(PhaseSchema, { minItems: 1, description: "Ordered phase definitions (DAG via dependsOn)" }),
|
|
275
288
|
},
|
|
276
289
|
{ additionalProperties: false },
|
|
@@ -485,11 +498,18 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
|
|
|
485
498
|
if (typeof p.concurrency === "number" && p.concurrency > MAX_DYNAMIC_CONCURRENCY) {
|
|
486
499
|
errors.push(`Dynamic sub-flow phase '${p.id}': concurrency too high (${p.concurrency}, max ${MAX_DYNAMIC_CONCURRENCY})`);
|
|
487
500
|
}
|
|
488
|
-
// cwd containment: a generated phase may not escape the run's cwd
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
501
|
+
// cwd containment: a generated phase may not escape the run's cwd, and
|
|
502
|
+
// may not request a reserved workspace keyword (temp/dedicated/worktree)
|
|
503
|
+
// — LLM-authored sub-flows must not allocate isolated dirs or git
|
|
504
|
+
// worktrees that mutate the repo. Only author-written flows may.
|
|
505
|
+
if (typeof p.cwd === "string") {
|
|
506
|
+
if (WORKSPACE_KEYWORDS.includes(p.cwd as (typeof WORKSPACE_KEYWORDS)[number])) {
|
|
507
|
+
errors.push(`Dynamic sub-flow phase '${p.id}': cwd '${p.cwd}' is a reserved workspace keyword not allowed in generated flows`);
|
|
508
|
+
} else if (root) {
|
|
509
|
+
const resolved = path.resolve(root, p.cwd);
|
|
510
|
+
if (resolved !== root && !resolved.startsWith(root + path.sep)) {
|
|
511
|
+
errors.push(`Dynamic sub-flow phase '${p.id}': cwd '${p.cwd}' escapes the run directory`);
|
|
512
|
+
}
|
|
493
513
|
}
|
|
494
514
|
}
|
|
495
515
|
}
|
|
@@ -508,6 +528,14 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
|
|
|
508
528
|
if (ids.has(p.id)) errors.push(`Duplicate phase id: ${p.id}`);
|
|
509
529
|
ids.add(p.id);
|
|
510
530
|
|
|
531
|
+
// When a phase opts into the Shared Context Tree, its id becomes a filesystem
|
|
532
|
+
// node id; restrict the charset so two ids can't sanitize to the same node
|
|
533
|
+
// (which would silently merge their blackboards). Non-sharing phases are
|
|
534
|
+
// unaffected (full backward compat).
|
|
535
|
+
if ((p.shareContext === true || flow.contextSharing === true) && !/^[A-Za-z0-9._-]+$/.test(p.id)) {
|
|
536
|
+
errors.push(`Phase '${p.id}': ids used with context sharing must match [A-Za-z0-9._-]+`);
|
|
537
|
+
}
|
|
538
|
+
|
|
511
539
|
const type = (p.type ?? "agent") as PhaseType;
|
|
512
540
|
if (!PHASE_TYPES.includes(type)) errors.push(`Phase '${p.id}': unknown type '${type}'`);
|
|
513
541
|
|
package/extensions/store.ts
CHANGED
|
@@ -190,13 +190,14 @@ function lockPathForRun(runsRoot: string, flowName: string, runId: string): stri
|
|
|
190
190
|
* Validate that a runId looks safe before performing any filesystem access.
|
|
191
191
|
* Legitimate runIds are produced by newRunId() and contain only [A-Za-z0-9._-].
|
|
192
192
|
*/
|
|
193
|
-
function validateRunId(runId: string): boolean {
|
|
193
|
+
export function validateRunId(runId: string): boolean {
|
|
194
194
|
return (
|
|
195
195
|
typeof runId === "string" &&
|
|
196
196
|
runId.length > 0 &&
|
|
197
197
|
!runId.includes("/") &&
|
|
198
198
|
!runId.includes("\\") &&
|
|
199
|
-
!runId.includes("\0")
|
|
199
|
+
!runId.includes("\0") &&
|
|
200
|
+
!runId.includes("..")
|
|
200
201
|
);
|
|
201
202
|
}
|
|
202
203
|
|
|
@@ -509,6 +510,16 @@ function cleanupTerminalRuns(
|
|
|
509
510
|
try { fs.unlinkSync(filePath); } catch { /* already gone */ }
|
|
510
511
|
// Also remove any orphaned lock file.
|
|
511
512
|
try { fs.unlinkSync(filePath + ".lock"); } catch { /* ignore */ }
|
|
513
|
+
// Also remove the per-run Shared Context Tree directory (C6). Orphaned
|
|
514
|
+
// ctx dirs would otherwise accumulate under runs/ctx/ over many runs.
|
|
515
|
+
try { fs.rmSync(path.join(runsRoot, "ctx", e.runId), { recursive: true, force: true }); } catch { /* ignore */ }
|
|
516
|
+
// Also remove the per-run isolated-workspace dir tree (cwd:"dedicated").
|
|
517
|
+
// `dedicated` workspaces are persistent by design; reclaim them once the
|
|
518
|
+
// run is pruned. The dir name uses the same sanitization as workspace.ts.
|
|
519
|
+
try {
|
|
520
|
+
const wsSeg = e.runId.replace(/[^A-Za-z0-9._-]/g, "_").replace(/^\.+/, "_").slice(0, 100) || "phase";
|
|
521
|
+
fs.rmSync(path.join(runsRoot, "ws", wsSeg), { recursive: true, force: true });
|
|
522
|
+
} catch { /* ignore */ }
|
|
512
523
|
}
|
|
513
524
|
|
|
514
525
|
// Remove empty flow subdirectories.
|
|
@@ -622,7 +633,7 @@ export function saveFlow(
|
|
|
622
633
|
|
|
623
634
|
// --- Run state ---
|
|
624
635
|
|
|
625
|
-
function runsDir(cwd: string): string {
|
|
636
|
+
export function runsDir(cwd: string): string {
|
|
626
637
|
// Safe non-null assertion: create=true guarantees a non-null return because
|
|
627
638
|
// findProjectFlowsDirInternal falls back to path.join(cwd, ".pi", "taskflows").
|
|
628
639
|
const projDir = findProjectFlowsDir(cwd, true)!;
|
|
@@ -636,7 +647,9 @@ export function cacheDir(cwd: string): string {
|
|
|
636
647
|
}
|
|
637
648
|
|
|
638
649
|
export function newRunId(flowName: string): string {
|
|
639
|
-
|
|
650
|
+
// Collapse to a safe charset AND fold any dot-runs so the result can never
|
|
651
|
+
// contain a '..' traversal token (validateRunId rejects '..').
|
|
652
|
+
const safe = flowName.replace(/[^\w.-]+/g, "_").replace(/\.{2,}/g, "_").slice(0, 24);
|
|
640
653
|
return `${safe}-${Date.now().toString(36)}-${crypto.randomBytes(3).toString("hex")}`;
|
|
641
654
|
}
|
|
642
655
|
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-phase workspace isolation ("worktree isolation", STRATEGY H2).
|
|
3
|
+
*
|
|
4
|
+
* By default a phase's `cwd` is a literal path (or inherited from the run).
|
|
5
|
+
* Three reserved keywords ask the runtime to ALLOCATE an isolated working
|
|
6
|
+
* directory for the phase's subagent(s) and tear it down afterwards:
|
|
7
|
+
*
|
|
8
|
+
* - `"temp"` — an ephemeral dir under the OS tmpdir; removed when the
|
|
9
|
+
* phase finishes (success or failure). For scratch work that
|
|
10
|
+
* must not touch the main tree.
|
|
11
|
+
* - `"dedicated"` — a persistent dir under the run's own state directory
|
|
12
|
+
* (`<runs>/ws/<runId>/<phaseId>`); kept after the phase so
|
|
13
|
+
* its artifacts survive for inspection / downstream reuse.
|
|
14
|
+
* Idempotent across resume (same path for the same phase).
|
|
15
|
+
* - `"worktree"` — a real `git worktree` on a throwaway branch, rooted at the
|
|
16
|
+
* run's git repo; removed (`git worktree remove --force`)
|
|
17
|
+
* when the phase finishes. For changes you want to diff /
|
|
18
|
+
* commit / discard in isolation. Falls back to a `temp` dir
|
|
19
|
+
* (fail-open) when the base dir is not a git work tree.
|
|
20
|
+
*
|
|
21
|
+
* Invariants honoured (AGENTS.md "Critical invariants"):
|
|
22
|
+
* - Fail-open: any allocation/teardown error degrades gracefully and never
|
|
23
|
+
* sinks the phase (a failed allocation falls back to the base cwd).
|
|
24
|
+
* - No new deps: OS tmpdir via `fs.mkdtemp`, git via `child_process` (already
|
|
25
|
+
* a peer of the runner). No third-party libraries.
|
|
26
|
+
* - Resume-safe: `dedicated` is deterministic per (runId, phaseId) so a resume
|
|
27
|
+
* reuses the same dir; `temp`/`worktree` are re-allocated cleanly.
|
|
28
|
+
* - Path containment: `dedicated` dirs are contained under the run dir;
|
|
29
|
+
* sanitized phase ids prevent traversal.
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
import { spawnSync } from "node:child_process";
|
|
33
|
+
import fs from "node:fs";
|
|
34
|
+
import os from "node:os";
|
|
35
|
+
import path from "node:path";
|
|
36
|
+
|
|
37
|
+
/** The reserved `cwd` keywords that trigger workspace allocation. */
|
|
38
|
+
export const WORKSPACE_KEYWORDS = ["temp", "dedicated", "worktree"] as const;
|
|
39
|
+
export type WorkspaceKind = (typeof WORKSPACE_KEYWORDS)[number];
|
|
40
|
+
|
|
41
|
+
export function isWorkspaceKeyword(cwd: string | undefined): cwd is WorkspaceKind {
|
|
42
|
+
return cwd === "temp" || cwd === "dedicated" || cwd === "worktree";
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** A handle to an allocated workspace. `dir` is where the subagent runs. */
|
|
46
|
+
export interface Workspace {
|
|
47
|
+
/** Resolved absolute working directory for the phase's subagent(s). */
|
|
48
|
+
dir: string;
|
|
49
|
+
/** What was actually allocated (may differ from requested on fail-open). */
|
|
50
|
+
kind: WorkspaceKind | "inherited";
|
|
51
|
+
/** Idempotent teardown — safe to call once, after the phase completes. */
|
|
52
|
+
teardown(): void;
|
|
53
|
+
/** For `worktree`: the throwaway branch name (diagnostics only). */
|
|
54
|
+
branch?: string;
|
|
55
|
+
/** Non-fatal diagnostic if allocation degraded (e.g. worktree→temp). */
|
|
56
|
+
note?: string;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** A no-op workspace: the phase runs in `baseCwd` and nothing is torn down. */
|
|
60
|
+
function inherited(baseCwd: string, note?: string): Workspace {
|
|
61
|
+
return { dir: baseCwd, kind: "inherited", note, teardown() {} };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/** Sanitize a phase id for use as a path segment (mirrors safeFlowDirName). */
|
|
65
|
+
function safeSegment(id: string): string {
|
|
66
|
+
const cleaned = id.replace(/[^A-Za-z0-9._-]/g, "_").replace(/^\.+/, "_");
|
|
67
|
+
return cleaned.slice(0, 100) || "phase";
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Best-effort recursive delete, restricted to dirs we ourselves allocate
|
|
72
|
+
* (under the OS tmpdir or a run's `ws/` tree). The containment check is
|
|
73
|
+
* defense-in-depth: every `dir` passed here is already constructed by this
|
|
74
|
+
* module, but guarding ensures a future caller can't turn `rmrf` into an
|
|
75
|
+
* arbitrary-path delete.
|
|
76
|
+
*/
|
|
77
|
+
function rmrf(dir: string, allowedRoots?: string[]): void {
|
|
78
|
+
try {
|
|
79
|
+
const resolved = path.resolve(dir);
|
|
80
|
+
const roots = [path.resolve(os.tmpdir()), ...(allowedRoots ?? []).map((r) => path.resolve(r))];
|
|
81
|
+
const contained = roots.some((root) => resolved === root || resolved.startsWith(root + path.sep));
|
|
82
|
+
if (!contained) return; // refuse to delete outside our own allocation roots
|
|
83
|
+
fs.rmSync(resolved, { recursive: true, force: true });
|
|
84
|
+
} catch {
|
|
85
|
+
/* fail-open: best-effort cleanup */
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** Is `dir` inside a git work tree? (cheap, no network, fail-closed to false) */
|
|
90
|
+
function isGitRepo(dir: string): boolean {
|
|
91
|
+
try {
|
|
92
|
+
const r = spawnSync("git", ["-C", dir, "rev-parse", "--is-inside-work-tree"], {
|
|
93
|
+
encoding: "utf-8",
|
|
94
|
+
timeout: 5000,
|
|
95
|
+
});
|
|
96
|
+
return r.status === 0 && String(r.stdout).trim() === "true";
|
|
97
|
+
} catch {
|
|
98
|
+
return false;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** The absolute toplevel of the git work tree containing `dir`, or undefined. */
|
|
103
|
+
function gitToplevel(dir: string): string | undefined {
|
|
104
|
+
try {
|
|
105
|
+
const r = spawnSync("git", ["-C", dir, "rev-parse", "--show-toplevel"], {
|
|
106
|
+
encoding: "utf-8",
|
|
107
|
+
timeout: 5000,
|
|
108
|
+
});
|
|
109
|
+
if (r.status === 0) return String(r.stdout).trim() || undefined;
|
|
110
|
+
} catch {
|
|
111
|
+
/* fall through */
|
|
112
|
+
}
|
|
113
|
+
return undefined;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
interface AllocOpts {
|
|
117
|
+
/** The phase's effective base cwd (where it would run without isolation). */
|
|
118
|
+
baseCwd: string;
|
|
119
|
+
/** Run id — anchors `dedicated` dirs and names throwaway worktree branches. */
|
|
120
|
+
runId: string;
|
|
121
|
+
/** Phase id — second path segment / branch suffix. */
|
|
122
|
+
phaseId: string;
|
|
123
|
+
/** The run's state dir root (`runsDir(cwd)`) for `dedicated` workspaces. */
|
|
124
|
+
runsRoot: string;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Allocate an isolated workspace for a phase. Always returns a usable handle:
|
|
129
|
+
* on any failure it falls back to the base cwd (fail-open) with a `note`.
|
|
130
|
+
*/
|
|
131
|
+
export function allocateWorkspace(kind: WorkspaceKind, opts: AllocOpts): Workspace {
|
|
132
|
+
const { baseCwd, runId, phaseId, runsRoot } = opts;
|
|
133
|
+
const seg = safeSegment(phaseId);
|
|
134
|
+
|
|
135
|
+
if (kind === "temp") {
|
|
136
|
+
try {
|
|
137
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), `pi-tf-ws-${seg}-`));
|
|
138
|
+
return { dir, kind: "temp", teardown: () => rmrf(dir) };
|
|
139
|
+
} catch (e) {
|
|
140
|
+
return inherited(baseCwd, `temp workspace alloc failed: ${errMsg(e)}`);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if (kind === "dedicated") {
|
|
145
|
+
try {
|
|
146
|
+
// Deterministic per (runId, phaseId) → resume reuses the same dir.
|
|
147
|
+
const dir = path.join(runsRoot, "ws", safeSegment(runId), seg);
|
|
148
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
149
|
+
// Persistent by design: teardown is a no-op (kept for inspection).
|
|
150
|
+
return { dir, kind: "dedicated", teardown() {} };
|
|
151
|
+
} catch (e) {
|
|
152
|
+
return inherited(baseCwd, `dedicated workspace alloc failed: ${errMsg(e)}`);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// kind === "worktree"
|
|
157
|
+
if (!isGitRepo(baseCwd)) {
|
|
158
|
+
// Fail-open: not a git repo → degrade to an ephemeral temp dir so the
|
|
159
|
+
// phase still gets isolation (just without git semantics).
|
|
160
|
+
const fb = allocateWorkspace("temp", opts);
|
|
161
|
+
return { ...fb, note: "worktree requested but base cwd is not a git work tree; used a temp dir instead" };
|
|
162
|
+
}
|
|
163
|
+
const top = gitToplevel(baseCwd) ?? baseCwd;
|
|
164
|
+
const branch = `tf/${safeSegment(runId)}/${seg}-${Date.now().toString(36)}`;
|
|
165
|
+
let dir: string;
|
|
166
|
+
try {
|
|
167
|
+
dir = fs.mkdtempSync(path.join(os.tmpdir(), `pi-tf-wt-${seg}-`));
|
|
168
|
+
} catch (e) {
|
|
169
|
+
const fb = allocateWorkspace("temp", opts);
|
|
170
|
+
return { ...fb, note: `worktree temp path alloc failed: ${errMsg(e)}` };
|
|
171
|
+
}
|
|
172
|
+
// `git worktree add -b <branch> <dir>` creates the dir's contents itself, so
|
|
173
|
+
// remove the empty mkdtemp dir first and let git recreate it.
|
|
174
|
+
rmrf(dir);
|
|
175
|
+
const add = spawnSync("git", ["-C", top, "worktree", "add", "-b", branch, dir, "HEAD"], {
|
|
176
|
+
encoding: "utf-8",
|
|
177
|
+
timeout: 60000,
|
|
178
|
+
});
|
|
179
|
+
if (add.status !== 0) {
|
|
180
|
+
rmrf(dir);
|
|
181
|
+
const fb = allocateWorkspace("temp", opts);
|
|
182
|
+
return {
|
|
183
|
+
...fb,
|
|
184
|
+
note: `git worktree add failed (${String(add.stderr).trim().slice(0, 200)}); used a temp dir instead`,
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
const teardown = () => {
|
|
188
|
+
// Remove the worktree, then delete its throwaway branch. Both best-effort.
|
|
189
|
+
try {
|
|
190
|
+
spawnSync("git", ["-C", top, "worktree", "remove", "--force", dir], { timeout: 30000 });
|
|
191
|
+
} catch {
|
|
192
|
+
/* fall through to rmrf */
|
|
193
|
+
}
|
|
194
|
+
rmrf(dir);
|
|
195
|
+
try {
|
|
196
|
+
spawnSync("git", ["-C", top, "branch", "-D", branch], { timeout: 10000 });
|
|
197
|
+
} catch {
|
|
198
|
+
/* fail-open: leftover branch is harmless */
|
|
199
|
+
}
|
|
200
|
+
};
|
|
201
|
+
return { dir, kind: "worktree", branch, teardown };
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function errMsg(e: unknown): string {
|
|
205
|
+
return e instanceof Error ? e.message : String(e);
|
|
206
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-taskflow",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.23",
|
|
4
4
|
"description": "A declarative, verifiable graph of task nodes for the Pi coding agent — not a workflow you script, but a DAG you declare: statically verified before it runs, with dynamic fan-out, gates, isolated subagent context, resumable runs, and saveable commands.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
|
@@ -37,8 +37,12 @@
|
|
|
37
37
|
],
|
|
38
38
|
"scripts": {
|
|
39
39
|
"typecheck": "tsc --noEmit",
|
|
40
|
-
"test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test test
|
|
40
|
+
"test": "PI_TASKFLOW_BUILTIN_AGENTS_DIR= node --experimental-strip-types --test 'test/*.test.ts'",
|
|
41
41
|
"test:e2e": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e.mts",
|
|
42
|
+
"test:e2e-context": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e-context.mts",
|
|
43
|
+
"test:e2e-context-value": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e-context-value.mts",
|
|
44
|
+
"test:e2e-team": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e-team.mts",
|
|
45
|
+
"test:e2e-spawn-subflow": "PI_TASKFLOW_PI_BIN=pi node --experimental-strip-types test/e2e-spawn-subflow.mts",
|
|
42
46
|
"test:dogfood-cache": "node --experimental-strip-types test/dogfood-cache.mts"
|
|
43
47
|
},
|
|
44
48
|
"pi": {
|
package/skills/taskflow/SKILL.md
CHANGED
|
@@ -253,6 +253,34 @@ of several drafts, or a synthesis of diverse approaches.
|
|
|
253
253
|
}
|
|
254
254
|
```
|
|
255
255
|
|
|
256
|
+
### Workspace isolation (`cwd` keywords)
|
|
257
|
+
|
|
258
|
+
A phase's `cwd` is normally a literal path (or inherited from the run). Three
|
|
259
|
+
**reserved keywords** instead ask the runtime to allocate an isolated working
|
|
260
|
+
directory for the phase's subagent and tear it down afterwards — so a phase can
|
|
261
|
+
do scratch work, or mutate files, without touching the main tree:
|
|
262
|
+
|
|
263
|
+
| `cwd` value | what the runtime does | lifecycle |
|
|
264
|
+
|-------------|-----------------------|-----------|
|
|
265
|
+
| `"temp"` | makes an ephemeral dir under the OS tmpdir | removed when the phase finishes |
|
|
266
|
+
| `"dedicated"` | makes a persistent dir under the run state (`runs/ws/<runId>/<phaseId>`) | **kept** for inspection; deterministic per phase (resume reuses it) |
|
|
267
|
+
| `"worktree"` | `git worktree add` on a throwaway branch off `HEAD` | `git worktree remove` + branch delete when the phase finishes |
|
|
268
|
+
|
|
269
|
+
```jsonc
|
|
270
|
+
{ "id": "experiment", "type": "agent", "agent": "executor", "cwd": "worktree",
|
|
271
|
+
"task": "Try the risky refactor and run the tests. Your edits are isolated in a git worktree." }
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
- **Fail-open.** If allocation fails (e.g. `worktree` requested but the repo
|
|
275
|
+
isn't a git work tree), the phase degrades — `worktree`→`temp`, and any other
|
|
276
|
+
failure → the base cwd — and records a `warnings` diagnostic. A phase never
|
|
277
|
+
fails to run because of isolation.
|
|
278
|
+
- **Security.** The keywords are honoured only in **author-written** flows.
|
|
279
|
+
An LLM-authored sub-flow (`flow{def}` / `ctx_spawn` subflow) that asks for a
|
|
280
|
+
reserved keyword is **rejected at validation** — generated plans cannot
|
|
281
|
+
allocate worktrees or temp dirs that mutate the repo.
|
|
282
|
+
- A literal path is passed through unchanged (fully backward-compatible).
|
|
283
|
+
|
|
256
284
|
### Budget (cost / token caps)
|
|
257
285
|
|
|
258
286
|
Add a run-wide ceiling at the top level. When accumulated cost/tokens exceed it,
|
|
@@ -434,6 +462,82 @@ Use the shorthand if you literally just want `a → b → c → d`:
|
|
|
434
462
|
…or write the full DAG with explicit `dependsOn` (so reviewers/fixers can run
|
|
435
463
|
in parallel against multiple review streams when you want that).
|
|
436
464
|
|
|
465
|
+
### Shared Context Tree (blackboard + supervision) — opt-in
|
|
466
|
+
|
|
467
|
+
By default subagents are fully isolated: they share nothing and only return a
|
|
468
|
+
final output string. Opt a phase into the **Shared Context Tree** with
|
|
469
|
+
`shareContext: true` (or set `contextSharing: true` at the flow level for every
|
|
470
|
+
phase) to give its subagent four extra tools backed by a per-run, file-based
|
|
471
|
+
blackboard:
|
|
472
|
+
|
|
473
|
+
| tool | direction | use |
|
|
474
|
+
|------|-----------|-----|
|
|
475
|
+
| `ctx_write(key, value)` | horizontal | publish a finding so siblings/descendants can reuse it (avoid re-reading the same files) |
|
|
476
|
+
| `ctx_read(key?)` | horizontal | read findings visible to this node: its own + ancestors' + **completed** other nodes' (omit `key` to list all) |
|
|
477
|
+
| `ctx_report(summary, structured?)` | vertical ↑ | report a result upward to the parent |
|
|
478
|
+
| `ctx_spawn(assignments[])` | vertical ↓ | delegate child tasks; after this node finishes the runtime runs each child (isolated) and **folds their reports into this phase's output**. Each assignment is either a flat `{task, agent?}` OR a `{subflow, defaultAgent?}` — an inline plan `{phases:[...]}` (a dependency-bearing DAG) the runtime validates and runs as a nested sub-flow |
|
|
479
|
+
|
|
480
|
+
Visibility is eventually-consistent: a sibling's findings become visible once
|
|
481
|
+
that sibling **completes** (a running sibling's half-written blackboard is
|
|
482
|
+
hidden). Own findings beat ancestors' beat completed-others' on key conflicts.
|
|
483
|
+
|
|
484
|
+
Use it when fan-out items share expensive context (one map item maps the repo,
|
|
485
|
+
the rest read its findings), or when a task should discover work at runtime and
|
|
486
|
+
delegate it (`ctx_spawn`) rather than the author pre-declaring every branch.
|
|
487
|
+
|
|
488
|
+
**Spawning a sub-graph (not just flat tasks).** A `ctx_spawn` assignment can be
|
|
489
|
+
a whole inline plan instead of a single task — use `subflow` when the delegated
|
|
490
|
+
work has multiple coordinated steps with dependencies:
|
|
491
|
+
|
|
492
|
+
```jsonc
|
|
493
|
+
ctx_spawn({ assignments: [
|
|
494
|
+
{ task: "quick standalone check", agent: "analyst" }, // flat task
|
|
495
|
+
{ subflow: { // a DAG
|
|
496
|
+
phases: [
|
|
497
|
+
{ id: "scan", type: "agent", agent: "scout", task: "list endpoints" },
|
|
498
|
+
{ id: "audit", type: "map", over: "{steps.scan.json}", task: "audit {item}", dependsOn: ["scan"] },
|
|
499
|
+
{ id: "sum", type: "reduce", from: ["audit"], task: "summarize", dependsOn: ["audit"], final: true }
|
|
500
|
+
]
|
|
501
|
+
},
|
|
502
|
+
defaultAgent: "analyst" // inner phases without their own `agent` use this
|
|
503
|
+
}
|
|
504
|
+
] })
|
|
505
|
+
```
|
|
506
|
+
|
|
507
|
+
The subflow is validated (cycles / dangling refs / dead-ends) before it runs;
|
|
508
|
+
a bad plan fails **open** (a diagnostic is folded into the report, the run
|
|
509
|
+
continues). `agent` (flat task) = who executes; `defaultAgent` (subflow) =
|
|
510
|
+
fallback for inner phases — different fields because the semantics differ.
|
|
511
|
+
Nesting is bounded: spawn-subflows and `flow{def}` share one depth counter
|
|
512
|
+
capped at `MAX_DYNAMIC_NESTING` (5), so neither can multiply with the other.
|
|
513
|
+
|
|
514
|
+
```jsonc
|
|
515
|
+
{ "id": "survey", "type": "agent", "agent": "scout", "shareContext": true,
|
|
516
|
+
"task": "Map the API surface. ctx_write key 'endpoints' with the JSON list so the auditors don't re-scan." },
|
|
517
|
+
{ "id": "audit", "type": "map", "over": "{steps.survey.json}", "shareContext": true,
|
|
518
|
+
"dependsOn": ["survey"], "agent": "analyst",
|
|
519
|
+
"task": "ctx_read 'endpoints' for shared context, then audit {item} for missing auth." }
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
Guards & limits: ids used with sharing must match `[A-Za-z0-9._-]+`; keys are
|
|
523
|
+
`[A-Za-z0-9._-]` (≤128 chars); values ≤256 KB; ≤256 keys/node; `ctx_spawn`
|
|
524
|
+
≤16 tasks/call, task ≤64 KB, depth-capped at 5. All bookkeeping is fail-open
|
|
525
|
+
(it can never sink a phase) and the per-run blackboard is cleaned up with the
|
|
526
|
+
run. Backward compatible: flows that don't opt in behave exactly as before.
|
|
527
|
+
|
|
528
|
+
You do **not** need to teach the tools in your `task` text — enabling
|
|
529
|
+
`shareContext` auto-appends usage guidance to the subagent's system prompt
|
|
530
|
+
(read-first discipline, publish reusable findings, report up, delegate on
|
|
531
|
+
fan-out). Mentioning a specific key in the task (e.g. "ctx_write the endpoint
|
|
532
|
+
list under 'endpoints'") just makes the cross-phase contract explicit.
|
|
533
|
+
|
|
534
|
+
**Producer tip (learned from real runs):** the phase that *publishes* shared
|
|
535
|
+
context should be a **capable** agent (high thinking), and the `ctx_write`
|
|
536
|
+
should be framed as its **primary deliverable** ("if you did not call ctx_write
|
|
537
|
+
you failed the task"). A fast / `thinking: off` agent asked to "survey AND
|
|
538
|
+
ctx_write" will often do the survey and skip the write. Consumers (the agents
|
|
539
|
+
that `ctx_read`) can be lighter — reading is a single reliable step.
|
|
540
|
+
|
|
437
541
|
## Configuration
|
|
438
542
|
|
|
439
543
|
For the full set of knobs — per-phase `model`/`thinking`/`tools`/`cwd`, the
|