pi-taskflow 0.0.22 → 0.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +77 -0
- package/README.md +174 -46
- package/extensions/context-store.ts +447 -0
- package/extensions/index.ts +135 -0
- package/extensions/runner.ts +96 -3
- package/extensions/runtime.ts +310 -13
- package/extensions/schema.ts +34 -6
- package/extensions/store.ts +17 -4
- package/extensions/workspace.ts +206 -0
- package/package.json +6 -2
- package/skills/taskflow/SKILL.md +104 -0
package/extensions/runner.ts
CHANGED
|
@@ -60,6 +60,14 @@ export interface RunOptions {
|
|
|
60
60
|
* the prior behaviour (no idle timeout). Defaults to DEFAULT_IDLE_TIMEOUT_MS.
|
|
61
61
|
*/
|
|
62
62
|
idleTimeoutMs?: number;
|
|
63
|
+
/**
|
|
64
|
+
* Shared Context Tree (opt-in). When set, the spawned subagent receives
|
|
65
|
+
* PI_TASKFLOW_CTX_DIR + PI_TASKFLOW_NODE_ID in its environment and is loaded
|
|
66
|
+
* with this extension via `--extension`, so it can register the ctx_* tools
|
|
67
|
+
* (read/write/report/spawn) that read & write the per-run blackboard.
|
|
68
|
+
*/
|
|
69
|
+
ctxDir?: string;
|
|
70
|
+
nodeId?: string;
|
|
63
71
|
}
|
|
64
72
|
|
|
65
73
|
/**
|
|
@@ -71,6 +79,41 @@ export interface RunOptions {
|
|
|
71
79
|
*/
|
|
72
80
|
const DEFAULT_IDLE_TIMEOUT_MS = 5 * 60_000;
|
|
73
81
|
|
|
82
|
+
/** The Shared Context Tree tool names a subagent may call when sharing is on. */
|
|
83
|
+
export const CTX_TOOL_NAMES = ["ctx_read", "ctx_write", "ctx_report", "ctx_spawn"] as const;
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Guidance appended to a subagent's system prompt when the Shared Context Tree
|
|
87
|
+
* is enabled for its phase. Registering the ctx_* tools makes them AVAILABLE;
|
|
88
|
+
* this block is what makes the model actually USE them with the right discipline
|
|
89
|
+
* (read-before-you-explore; publish reusable findings; report up; delegate when
|
|
90
|
+
* work fans out). Kept short and imperative on purpose.
|
|
91
|
+
*/
|
|
92
|
+
export const CTX_TOOLS_GUIDANCE = [
|
|
93
|
+
"## Shared Context Tree (you are part of a coordinated team of agents)",
|
|
94
|
+
"",
|
|
95
|
+
"You are one agent in a tree working a shared goal, with a shared blackboard",
|
|
96
|
+
"and an upward report channel. Use these tools deliberately \u2014 they save tokens",
|
|
97
|
+
"and prevent the team from duplicating work:",
|
|
98
|
+
"",
|
|
99
|
+
"- ctx_read(key?): BEFORE exploring the codebase or re-reading files, call",
|
|
100
|
+
" ctx_read with no arguments to see what teammates already discovered. If a",
|
|
101
|
+
" finding you need already exists, REUSE it instead of re-deriving it.",
|
|
102
|
+
"- ctx_write(key, value): when you discover something other agents will likely",
|
|
103
|
+
" need (a file map, an endpoint list, an interface, a config value), publish it",
|
|
104
|
+
" under a short key (e.g. 'endpoints', 'db.schema'). Keep values concise and",
|
|
105
|
+
" structured (JSON) so others can consume them directly.",
|
|
106
|
+
"- ctx_report(summary, structured?): when you finish, report your result upward",
|
|
107
|
+
" so the parent task and downstream steps can see it. Lead with the outcome.",
|
|
108
|
+
"- ctx_spawn(assignments[]): if you discover the work should fan out into",
|
|
109
|
+
" independent sub-tasks, delegate them as child agents. They run after you",
|
|
110
|
+
" finish and their reports are folded back into your output. Only spawn when it",
|
|
111
|
+
" genuinely parallelizes \u2014 otherwise just do the work yourself.",
|
|
112
|
+
"",
|
|
113
|
+
"Default habit: ctx_read first, do the work (reusing shared findings), ctx_write",
|
|
114
|
+
"anything reusable, then ctx_report your result.",
|
|
115
|
+
].join("\n");
|
|
116
|
+
|
|
74
117
|
export function isFailed(r: RunResult): boolean {
|
|
75
118
|
return r.exitCode !== 0 || r.stopReason === "error" || r.stopReason === "aborted";
|
|
76
119
|
}
|
|
@@ -281,6 +324,25 @@ function getPiInvocation(args: string[]): { command: string; args: string[] } {
|
|
|
281
324
|
return { command: "pi", args };
|
|
282
325
|
}
|
|
283
326
|
|
|
327
|
+
/**
|
|
328
|
+
* Resolve the path to this extension's entry file, so a spawned subagent can be
|
|
329
|
+
* launched with `--extension <path>` and register the ctx_* tools. Returns
|
|
330
|
+
* undefined if it cannot be resolved (the subagent then simply runs without the
|
|
331
|
+
* ctx tools — fail-open: context sharing degrades to "no sharing").
|
|
332
|
+
*/
|
|
333
|
+
export function ctxExtensionPath(): string | undefined {
|
|
334
|
+
const override = process.env.PI_TASKFLOW_EXT_PATH;
|
|
335
|
+
if (override) return override;
|
|
336
|
+
try {
|
|
337
|
+
const here = path.dirname(new URL(import.meta.url).pathname);
|
|
338
|
+
const entry = path.join(here, "index.ts");
|
|
339
|
+
if (fs.existsSync(entry)) return entry;
|
|
340
|
+
} catch {
|
|
341
|
+
/* fall through */
|
|
342
|
+
}
|
|
343
|
+
return undefined;
|
|
344
|
+
}
|
|
345
|
+
|
|
284
346
|
/**
|
|
285
347
|
* Run a single subagent task. Resolves the agent from `agents` by name and
|
|
286
348
|
* spawns an isolated pi process, returning structured output + usage.
|
|
@@ -310,7 +372,15 @@ export async function runAgentTask(
|
|
|
310
372
|
|
|
311
373
|
const model = opts.model ?? agent.model;
|
|
312
374
|
const thinking = opts.thinking ?? agent.thinking ?? globalThinking;
|
|
313
|
-
const
|
|
375
|
+
const ctxEnabledEarly = Boolean(opts.ctxDir && opts.nodeId);
|
|
376
|
+
let tools = opts.tools ?? agent.tools;
|
|
377
|
+
// If the agent restricts tools to a whitelist, the ctx_* tools we register
|
|
378
|
+
// would be filtered out by `--tools` even though they're registered. When
|
|
379
|
+
// context sharing is on, extend the whitelist so the subagent can actually
|
|
380
|
+
// call them. (No whitelist = all tools available = nothing to do.)
|
|
381
|
+
if (ctxEnabledEarly && tools && tools.length > 0) {
|
|
382
|
+
tools = [...new Set([...tools, ...CTX_TOOL_NAMES])];
|
|
383
|
+
}
|
|
314
384
|
|
|
315
385
|
const args: string[] = ["--mode", "json", "-p", "--no-session"];
|
|
316
386
|
if (model) args.push("--model", model);
|
|
@@ -332,18 +402,40 @@ export async function runAgentTask(
|
|
|
332
402
|
};
|
|
333
403
|
|
|
334
404
|
try {
|
|
335
|
-
|
|
405
|
+
const ctxEnabled = Boolean(opts.ctxDir && opts.nodeId);
|
|
406
|
+
// Build the appended system prompt = the agent's own prompt PLUS, when the
|
|
407
|
+
// Shared Context Tree is enabled for this phase, a guidance block that tells
|
|
408
|
+
// the subagent the ctx_* tools exist and the discipline for using them.
|
|
409
|
+
// Without this the model only sees terse tool descriptions and rarely uses
|
|
410
|
+
// them proactively (capability != usage).
|
|
411
|
+
const appendedPrompt = [agent.systemPrompt.trim(), ctxEnabled ? CTX_TOOLS_GUIDANCE : ""]
|
|
412
|
+
.filter(Boolean)
|
|
413
|
+
.join("\n\n");
|
|
414
|
+
if (appendedPrompt) {
|
|
336
415
|
// Allocate the temp dir + path BEFORE any fallible I/O so that if
|
|
337
416
|
// writeFile throws, tmpPromptDir/tmpPromptPath are already set and
|
|
338
417
|
// the finally block can clean up the directory (F-004).
|
|
339
418
|
tmpPromptDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), "pi-taskflow-"));
|
|
340
419
|
const safeName = agent.name.replace(/[^\w.-]+/g, "_");
|
|
341
420
|
tmpPromptPath = path.join(tmpPromptDir, `prompt-${safeName}.md`);
|
|
342
|
-
await writePromptToTempFile(tmpPromptPath,
|
|
421
|
+
await writePromptToTempFile(tmpPromptPath, appendedPrompt);
|
|
343
422
|
args.push("--append-system-prompt", tmpPromptPath);
|
|
344
423
|
}
|
|
345
424
|
args.push(`Task: ${task}`);
|
|
346
425
|
|
|
426
|
+
// Shared Context Tree opt-in: load THIS extension into the subagent so it
|
|
427
|
+
// can register the ctx_* tools, and pass the blackboard dir + node id via
|
|
428
|
+
// env. `--extension` is the explicit, self-documenting fallback that does
|
|
429
|
+
// not rely on the subagent auto-discovering user/project extensions in
|
|
430
|
+
// `-p` mode. The env vars drive the dual-identity branch in index.ts.
|
|
431
|
+
const ctxEnv: Record<string, string> = {};
|
|
432
|
+
if (opts.ctxDir && opts.nodeId) {
|
|
433
|
+
const selfPath = ctxExtensionPath();
|
|
434
|
+
if (selfPath) args.push("--extension", selfPath);
|
|
435
|
+
ctxEnv.PI_TASKFLOW_CTX_DIR = opts.ctxDir;
|
|
436
|
+
ctxEnv.PI_TASKFLOW_NODE_ID = opts.nodeId;
|
|
437
|
+
}
|
|
438
|
+
|
|
347
439
|
let wasAborted = false;
|
|
348
440
|
let idleTimedOut = false;
|
|
349
441
|
let killedBySignal: string | undefined;
|
|
@@ -353,6 +445,7 @@ export async function runAgentTask(
|
|
|
353
445
|
cwd: opts.cwd ?? defaultCwd,
|
|
354
446
|
shell: false,
|
|
355
447
|
stdio: ["ignore", "pipe", "pipe"],
|
|
448
|
+
env: { ...process.env, ...ctxEnv },
|
|
356
449
|
});
|
|
357
450
|
if (proc.pid) activeChildren.add(proc.pid);
|
|
358
451
|
let buffer = "";
|
package/extensions/runtime.ts
CHANGED
|
@@ -18,8 +18,10 @@ import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, r
|
|
|
18
18
|
import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
|
|
19
19
|
import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, MAX_DYNAMIC_MAP_ITEMS, MAX_DYNAMIC_NESTING, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode, validateTaskflow } from "./schema.ts";
|
|
20
20
|
import { verifyTaskflow } from "./verify.ts";
|
|
21
|
-
import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
|
|
21
|
+
import { hashInput, newRunId, type PhaseState, type RunState, runsDir } from "./store.ts";
|
|
22
22
|
import { CacheStore, resolveFingerprint } from "./cache.ts";
|
|
23
|
+
import { ctxDirFor, drainPendingSpawns, initCtxDir, registerNode, setNodeStatus, type SpawnAssignment } from "./context-store.ts";
|
|
24
|
+
import { allocateWorkspace, isWorkspaceKeyword, type Workspace } from "./workspace.ts";
|
|
23
25
|
|
|
24
26
|
/** A human-in-the-loop approval request raised by an `approval` phase. */
|
|
25
27
|
export interface ApprovalRequest {
|
|
@@ -55,6 +57,10 @@ export interface RuntimeDeps {
|
|
|
55
57
|
cacheStore?: CacheStore;
|
|
56
58
|
/** Internal: sub-flow call stack, for recursion detection. */
|
|
57
59
|
_stack?: string[];
|
|
60
|
+
/** Internal: pre-resolved Shared Context Tree dir for this run (sub-flows inherit the parent's). */
|
|
61
|
+
_ctxDir?: string;
|
|
62
|
+
/** Internal: an isolated workspace dir override for the current phase (worktree isolation). */
|
|
63
|
+
_cwdOverride?: string;
|
|
58
64
|
}
|
|
59
65
|
|
|
60
66
|
export interface RuntimeResult {
|
|
@@ -372,7 +378,179 @@ async function resolvePhaseContext(
|
|
|
372
378
|
return result;
|
|
373
379
|
}
|
|
374
380
|
|
|
381
|
+
/**
|
|
382
|
+
* Supervision loop: run the child tasks a parent node queued via ctx_spawn.
|
|
383
|
+
* Each child is an isolated subagent registered under the parent in the tree.
|
|
384
|
+
* Children themselves may share context (and recursively spawn, up to the depth
|
|
385
|
+
* cap enforced inside the ctx_spawn tool). Returns a markdown block of the
|
|
386
|
+
* children's reports to fold into the parent phase's output, or undefined.
|
|
387
|
+
*
|
|
388
|
+
* Fail-open: a child failure is recorded in its report text but never throws.
|
|
389
|
+
*/
|
|
390
|
+
/** What a spawned child contributed: its folded report text + the tokens it burned. */
|
|
391
|
+
interface SpawnedResult {
|
|
392
|
+
reports: string | undefined;
|
|
393
|
+
usage: UsageStats;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
/**
|
|
397
|
+
* Run an inline sub-flow queued via `ctx_spawn({subflow})`. Reuses the SAME
|
|
398
|
+
* validation + execution machinery as a `flow{def}` phase (normalizeInlineDef →
|
|
399
|
+
* validateTaskflow(dynamic) → verifyTaskflow → nested executeTaskflow), so a
|
|
400
|
+
* spawned DAG is held to the same safety bar as an author-written one.
|
|
401
|
+
*
|
|
402
|
+
* Crucially it extends `deps._stack` with a `def:spawn-<childNodeId>` frame so
|
|
403
|
+
* the existing inline-nesting guard counts spawn-subflows AND flow{def} on the
|
|
404
|
+
* SAME counter — neither axis can independently reach MAX_DYNAMIC_NESTING and
|
|
405
|
+
* multiply with the other (verdict Issue 1). Failures are fail-open: a bad
|
|
406
|
+
* subflow returns a diagnostic string, never throws.
|
|
407
|
+
*/
|
|
408
|
+
/**
|
|
409
|
+
* The effective working directory for a phase's execution. Honours an allocated
|
|
410
|
+
* workspace override (`_cwdOverride`, set by the executePhase wrapper for
|
|
411
|
+
* isolated `temp`/`dedicated`/`worktree` cwds) and never passes a reserved
|
|
412
|
+
* keyword through to a runner (keywords are resolved upstream into a real dir).
|
|
413
|
+
* Single source of truth — do not inline this formula (divergence here caused
|
|
414
|
+
* two isolation-leak bugs in the 0.0.23 review).
|
|
415
|
+
*/
|
|
416
|
+
function resolveEffCwd(deps: RuntimeDeps, phase: Phase): string {
|
|
417
|
+
return deps._cwdOverride ?? (isWorkspaceKeyword(phase.cwd) ? deps.cwd : phase.cwd ?? deps.cwd);
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
async function runInlineSubflow(
|
|
421
|
+
subflowSpec: unknown,
|
|
422
|
+
defaultAgent: string | undefined,
|
|
423
|
+
childNodeId: string,
|
|
424
|
+
phase: Phase,
|
|
425
|
+
deps: RuntimeDeps,
|
|
426
|
+
state: RunState,
|
|
427
|
+
): Promise<{ output: string; usage: UsageStats }> {
|
|
428
|
+
const stack = deps._stack ?? [];
|
|
429
|
+
const inlineDepth = stack.filter((s) => s.startsWith("def:")).length;
|
|
430
|
+
if (inlineDepth >= MAX_DYNAMIC_NESTING) {
|
|
431
|
+
return { output: `(spawned subflow rejected: nesting exceeded MAX_DYNAMIC_NESTING (${MAX_DYNAMIC_NESTING}))`, usage: emptyUsage() };
|
|
432
|
+
}
|
|
433
|
+
const wrapped = normalizeInlineDef(subflowSpec, childNodeId);
|
|
434
|
+
if (!wrapped) return { output: "(spawned subflow is not a Taskflow / phases array)", usage: emptyUsage() };
|
|
435
|
+
if (wrapped.phases.length === 0) return { output: "(spawned subflow had zero phases — no-op)", usage: emptyUsage() };
|
|
436
|
+
// Inner phases without their own agent inherit the assignment's defaultAgent.
|
|
437
|
+
if (defaultAgent) {
|
|
438
|
+
for (const p of wrapped.phases as Phase[]) if (!p.agent) p.agent = defaultAgent;
|
|
439
|
+
}
|
|
440
|
+
const spawnCwd = resolveEffCwd(deps, phase);
|
|
441
|
+
const dynCwd = spawnCwd;
|
|
442
|
+
const v = validateTaskflow(wrapped, { dynamic: true, cwd: dynCwd });
|
|
443
|
+
if (!v.ok) return { output: `(spawned subflow failed validation: ${v.errors.join("; ")})`, usage: emptyUsage() };
|
|
444
|
+
const ver = verifyTaskflow({ name: wrapped.name, phases: wrapped.phases as Phase[], budget: wrapped.budget, concurrency: wrapped.concurrency });
|
|
445
|
+
if (!ver.ok) {
|
|
446
|
+
const errs = ver.issues.filter((i) => i.severity === "error").map((i) => i.message);
|
|
447
|
+
return { output: `(spawned subflow failed verification: ${errs.join("; ")})`, usage: emptyUsage() };
|
|
448
|
+
}
|
|
449
|
+
const subDef = clampSubFlowBudget(wrapped, state.def.budget);
|
|
450
|
+
const subState: RunState = {
|
|
451
|
+
runId: newRunId(subDef.name),
|
|
452
|
+
flowName: subDef.name,
|
|
453
|
+
def: subDef,
|
|
454
|
+
args: resolveArgs(subDef, {}),
|
|
455
|
+
status: "running",
|
|
456
|
+
phases: {},
|
|
457
|
+
createdAt: Date.now(),
|
|
458
|
+
updatedAt: Date.now(),
|
|
459
|
+
cwd: dynCwd,
|
|
460
|
+
};
|
|
461
|
+
try {
|
|
462
|
+
const subResult = await executeTaskflow(subState, {
|
|
463
|
+
...deps,
|
|
464
|
+
cwd: dynCwd,
|
|
465
|
+
// The parent phase's isolated workspace (if any) applies only to the
|
|
466
|
+
// parent — each spawned sub-phase resolves its own cwd. Clear the
|
|
467
|
+
// override so the whole subflow doesn't inherit the parent's dir
|
|
468
|
+
// (mirrors the `flow` phase handler discipline).
|
|
469
|
+
_cwdOverride: undefined,
|
|
470
|
+
// Don't let spawned sub-phases persist the parent's run state.
|
|
471
|
+
persist: undefined,
|
|
472
|
+
// Unify the nesting counter across both recursion axes (verdict Issue 1).
|
|
473
|
+
_stack: [...stack, state.flowName, `def:spawn-${childNodeId}`],
|
|
474
|
+
_ctxDir: deps._ctxDir,
|
|
475
|
+
onProgress: undefined,
|
|
476
|
+
});
|
|
477
|
+
// Sum every sub-phase's usage so the parent's budget guard sees spawn spend
|
|
478
|
+
// (verdict Issue 2).
|
|
479
|
+
const usage = aggregateUsage(Object.values(subResult.state.phases).map((p) => p.usage ?? emptyUsage()));
|
|
480
|
+
return { output: subResult.finalOutput ?? "", usage };
|
|
481
|
+
} catch (e) {
|
|
482
|
+
return { output: `(spawned subflow failed: ${e instanceof Error ? e.message : String(e)})`, usage: emptyUsage() };
|
|
483
|
+
}
|
|
484
|
+
}
|
|
375
485
|
|
|
486
|
+
async function runSpawnedChildren(
|
|
487
|
+
assignments: SpawnAssignment[],
|
|
488
|
+
ctxDir: string,
|
|
489
|
+
parentNodeId: string,
|
|
490
|
+
phase: Phase,
|
|
491
|
+
deps: RuntimeDeps,
|
|
492
|
+
state: RunState,
|
|
493
|
+
run: typeof runAgentTask,
|
|
494
|
+
): Promise<SpawnedResult> {
|
|
495
|
+
const capped = assignments.slice(0, MAX_DYNAMIC_MAP_ITEMS);
|
|
496
|
+
const lines: string[] = [];
|
|
497
|
+
const usages: UsageStats[] = [];
|
|
498
|
+
// Effective cwd for flat spawned tasks: honour a workspace override and never
|
|
499
|
+
// pass a reserved keyword through to the runner.
|
|
500
|
+
const spawnCwd = resolveEffCwd(deps, phase);
|
|
501
|
+
let idx = 0;
|
|
502
|
+
for (const a of capped) {
|
|
503
|
+
if (deps.signal?.aborted || overBudget(state).over) break;
|
|
504
|
+
idx++;
|
|
505
|
+
const childNodeId = `${parentNodeId}--c${idx}`.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
506
|
+
const isSubflow = a.subflow !== undefined && a.subflow !== null;
|
|
507
|
+
const agentName = isSubflow ? "(subflow)" : resolveAgent(a.agent ?? phase.agent, deps, state);
|
|
508
|
+
registerNode(ctxDir, childNodeId, `${phase.id}:spawn`, parentNodeId, "running");
|
|
509
|
+
let out = "";
|
|
510
|
+
try {
|
|
511
|
+
if (isSubflow) {
|
|
512
|
+
const sub = await runInlineSubflow(a.subflow, a.defaultAgent ?? phase.agent, childNodeId, phase, deps, state);
|
|
513
|
+
out = sub.output;
|
|
514
|
+
usages.push(sub.usage);
|
|
515
|
+
setNodeStatus(ctxDir, childNodeId, "done");
|
|
516
|
+
} else {
|
|
517
|
+
const r = await run(
|
|
518
|
+
spawnCwd,
|
|
519
|
+
deps.agents,
|
|
520
|
+
agentName,
|
|
521
|
+
a.task ?? "",
|
|
522
|
+
{ model: phase.model, thinking: phase.thinking, tools: phase.tools, cwd: spawnCwd, signal: deps.signal, ctxDir, nodeId: childNodeId },
|
|
523
|
+
deps.globalThinking,
|
|
524
|
+
);
|
|
525
|
+
out = r.output ?? "";
|
|
526
|
+
if (r.usage) usages.push(r.usage);
|
|
527
|
+
setNodeStatus(ctxDir, childNodeId, isFailed(r) ? "failed" : "done");
|
|
528
|
+
// A child may itself have queued spawns — recurse (depth-capped by the tool).
|
|
529
|
+
const grand = drainPendingSpawns(ctxDir, childNodeId);
|
|
530
|
+
if (grand.length > 0 && !deps.signal?.aborted && !overBudget(state).over) {
|
|
531
|
+
const rec = await runSpawnedChildren(grand, ctxDir, childNodeId, phase, deps, state, run);
|
|
532
|
+
if (rec.reports) out += rec.reports;
|
|
533
|
+
usages.push(rec.usage);
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
} catch (e) {
|
|
537
|
+
setNodeStatus(ctxDir, childNodeId, "failed");
|
|
538
|
+
out = `(spawned child failed: ${e instanceof Error ? e.message : String(e)})`;
|
|
539
|
+
}
|
|
540
|
+
lines.push(`### spawned child ${idx} (${agentName})\n${out}`);
|
|
541
|
+
}
|
|
542
|
+
const usage = aggregateUsage(usages);
|
|
543
|
+
if (lines.length === 0) return { reports: undefined, usage };
|
|
544
|
+
return { reports: `\n\n<!-- ctx_spawn: ${lines.length} child report(s) -->\n${lines.join("\n\n")}`, usage };
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
/**
|
|
549
|
+
* Public phase executor. Resolves an isolated workspace when `phase.cwd` is a
|
|
550
|
+
* reserved keyword (`temp`/`dedicated`/`worktree`), runs the phase against it,
|
|
551
|
+
* and tears it down afterwards. All allocation is fail-open: a failed allocation
|
|
552
|
+
* degrades to the base cwd so a phase never fails to run because of isolation.
|
|
553
|
+
*/
|
|
376
554
|
async function executePhase(
|
|
377
555
|
phase: Phase,
|
|
378
556
|
state: RunState,
|
|
@@ -380,11 +558,75 @@ async function executePhase(
|
|
|
380
558
|
prior: PhaseState | undefined,
|
|
381
559
|
emitProgress: () => void,
|
|
382
560
|
_retryDepth = 0,
|
|
561
|
+
): Promise<PhaseState> {
|
|
562
|
+
// Non-keyword cwd (or none): no workspace lifecycle — run directly.
|
|
563
|
+
if (!isWorkspaceKeyword(phase.cwd)) {
|
|
564
|
+
return executePhaseInner(phase, state, deps, prior, emitProgress, _retryDepth);
|
|
565
|
+
}
|
|
566
|
+
let ws: Workspace | undefined;
|
|
567
|
+
try {
|
|
568
|
+
ws = allocateWorkspace(phase.cwd, {
|
|
569
|
+
baseCwd: deps.cwd,
|
|
570
|
+
runId: state.runId,
|
|
571
|
+
phaseId: phase.id,
|
|
572
|
+
runsRoot: runsDir(deps.cwd),
|
|
573
|
+
});
|
|
574
|
+
} catch {
|
|
575
|
+
ws = undefined; // fail-open: run in the base cwd
|
|
576
|
+
}
|
|
577
|
+
const innerDeps: RuntimeDeps = ws ? { ...deps, _cwdOverride: ws.dir } : deps;
|
|
578
|
+
try {
|
|
579
|
+
const ps = await executePhaseInner(phase, state, innerDeps, prior, emitProgress, _retryDepth);
|
|
580
|
+
if (ws && (ws.kind !== "inherited" || ws.note)) {
|
|
581
|
+
const tag = ws.kind === "inherited" ? "workspace" : `workspace:${ws.kind}`;
|
|
582
|
+
const msg = ws.note ? `${tag} — ${ws.note}` : `${tag} at ${ws.dir}`;
|
|
583
|
+
ps.warnings = [...(ps.warnings ?? []), msg];
|
|
584
|
+
}
|
|
585
|
+
return ps;
|
|
586
|
+
} finally {
|
|
587
|
+
try {
|
|
588
|
+
ws?.teardown();
|
|
589
|
+
} catch {
|
|
590
|
+
/* fail-open: teardown best-effort */
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
async function executePhaseInner(
|
|
596
|
+
phase: Phase,
|
|
597
|
+
state: RunState,
|
|
598
|
+
deps: RuntimeDeps,
|
|
599
|
+
prior: PhaseState | undefined,
|
|
600
|
+
emitProgress: () => void,
|
|
601
|
+
_retryDepth = 0,
|
|
383
602
|
): Promise<PhaseState> {
|
|
384
603
|
const type = phase.type ?? "agent";
|
|
385
604
|
const concurrency = phase.concurrency ?? state.def.concurrency ?? 8;
|
|
386
605
|
const previousOutput = lastCompletedOutput(state, phase);
|
|
387
606
|
const run = deps.runTask ?? runAgentTask;
|
|
607
|
+
// Effective working directory for THIS phase's execution. When an isolated
|
|
608
|
+
// workspace was allocated (worktree isolation), `_cwdOverride` is its dir and
|
|
609
|
+
// takes precedence; otherwise a literal `phase.cwd` (non-keyword) or the run
|
|
610
|
+
// cwd is used. Keyword cwds are never passed to a runner (they're resolved
|
|
611
|
+
// upstream in the executePhase wrapper).
|
|
612
|
+
const effCwd = resolveEffCwd(deps, phase);
|
|
613
|
+
|
|
614
|
+
// Shared Context Tree opt-in (per-phase or flow-wide). When on, the subagent
|
|
615
|
+
// gets ctx_* tools backed by a per-run blackboard directory. nodeId is
|
|
616
|
+
// deterministic per phase so a resume re-uses the same tree node (idempotent
|
|
617
|
+
// upsert in registerNode prevents duplication). Sub-items (map/parallel) get
|
|
618
|
+
// a suffixed nodeId so concurrent siblings write to distinct findings files.
|
|
619
|
+
const sharing = (phase.shareContext ?? state.def.contextSharing) === true;
|
|
620
|
+
let ctxDir: string | undefined;
|
|
621
|
+
if (sharing) {
|
|
622
|
+
try {
|
|
623
|
+
ctxDir = deps._ctxDir ?? initCtxDir(ctxDirFor(runsDir(deps.cwd), state.runId));
|
|
624
|
+
} catch {
|
|
625
|
+
ctxDir = undefined; // fail-open: degrade to no sharing
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
const nodeIdFor = (suffix?: string): string =>
|
|
629
|
+
`${phase.id}${suffix ? `-${suffix}` : ""}`.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
388
630
|
|
|
389
631
|
// Resolve context pre-read files once, before any type branching.
|
|
390
632
|
// The content is prepended to every task so the subagent never spends
|
|
@@ -399,7 +641,7 @@ async function executePhase(
|
|
|
399
641
|
const cc: PhaseCacheCtx = {
|
|
400
642
|
scope: cacheScope,
|
|
401
643
|
ttlMs: phase.cache?.ttl ? (parseTtlMs(phase.cache.ttl) ?? undefined) : undefined,
|
|
402
|
-
fingerprint: cacheScope === "cross-run" ? resolveFingerprint(phase.cache?.fingerprint,
|
|
644
|
+
fingerprint: cacheScope === "cross-run" ? resolveFingerprint(phase.cache?.fingerprint, effCwd) : "",
|
|
403
645
|
store: deps.cacheStore ?? new CacheStore(deps.cwd),
|
|
404
646
|
prior,
|
|
405
647
|
phaseId: phase.id,
|
|
@@ -410,9 +652,9 @@ async function executePhase(
|
|
|
410
652
|
preRead,
|
|
411
653
|
};
|
|
412
654
|
|
|
413
|
-
const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
|
|
655
|
+
const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void, ctxNodeId?: string) =>
|
|
414
656
|
run(
|
|
415
|
-
|
|
657
|
+
effCwd,
|
|
416
658
|
deps.agents,
|
|
417
659
|
agentName,
|
|
418
660
|
task,
|
|
@@ -420,9 +662,11 @@ async function executePhase(
|
|
|
420
662
|
model: phase.model,
|
|
421
663
|
thinking: phase.thinking,
|
|
422
664
|
tools: phase.tools,
|
|
423
|
-
cwd:
|
|
665
|
+
cwd: effCwd,
|
|
424
666
|
signal: deps.signal,
|
|
425
667
|
onLive,
|
|
668
|
+
ctxDir: ctxDir,
|
|
669
|
+
nodeId: ctxDir ? ctxNodeId : undefined,
|
|
426
670
|
},
|
|
427
671
|
deps.globalThinking,
|
|
428
672
|
);
|
|
@@ -439,7 +683,7 @@ async function executePhase(
|
|
|
439
683
|
const DEFAULT_TRANSIENT_RETRIES = 3;
|
|
440
684
|
const DEFAULT_TRANSIENT_BACKOFF_MS = 2000;
|
|
441
685
|
const DEFAULT_TRANSIENT_FACTOR = 2;
|
|
442
|
-
const runOne = async (agentName: string, task: string, onLive?: (l: LiveUpdate) => void): Promise<RunResult> => {
|
|
686
|
+
const runOne = async (agentName: string, task: string, onLive?: (l: LiveUpdate) => void, ctxNodeId?: string): Promise<RunResult> => {
|
|
443
687
|
const explicitMax = Math.max(1, 1 + Math.max(0, Math.floor(retry?.max ?? 0)));
|
|
444
688
|
// Allow enough attempts to cover whichever policy applies on a given attempt.
|
|
445
689
|
const maxAttempts = Math.max(explicitMax, 1 + DEFAULT_TRANSIENT_RETRIES);
|
|
@@ -447,7 +691,7 @@ async function executePhase(
|
|
|
447
691
|
let last: RunResult | undefined;
|
|
448
692
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
449
693
|
if (deps.signal?.aborted) break;
|
|
450
|
-
last = await baseRun(agentName, task, onLive);
|
|
694
|
+
last = await baseRun(agentName, task, onLive, ctxNodeId);
|
|
451
695
|
usages.push(last.usage);
|
|
452
696
|
// B6: aggregate and surface cumulative usage before the retry decision,
|
|
453
697
|
// so the TUI / budget guard see the in-flight spend on every attempt.
|
|
@@ -537,16 +781,37 @@ async function executePhase(
|
|
|
537
781
|
}
|
|
538
782
|
running++;
|
|
539
783
|
refresh();
|
|
784
|
+
if (ctxDir) {
|
|
785
|
+
try { registerNode(ctxDir, nodeIdFor(String(idx)), phase.id, undefined, "running"); } catch { /* fail-open */ }
|
|
786
|
+
}
|
|
540
787
|
const r = await runOne(it.agent, it.task, (l) => {
|
|
541
788
|
liveUsages[idx] = l.usage;
|
|
542
789
|
if (l.text) latestText = l.text;
|
|
543
790
|
if (l.model) latestModel = l.model;
|
|
544
791
|
refresh();
|
|
545
|
-
});
|
|
792
|
+
}, ctxDir ? nodeIdFor(String(idx)) : undefined);
|
|
546
793
|
running--;
|
|
547
794
|
done++;
|
|
548
795
|
if (isFailed(r)) failed++;
|
|
549
796
|
liveUsages[idx] = r.usage;
|
|
797
|
+
if (ctxDir) {
|
|
798
|
+
try {
|
|
799
|
+
const itemNid = nodeIdFor(String(idx));
|
|
800
|
+
setNodeStatus(ctxDir, itemNid, isFailed(r) ? "failed" : "done");
|
|
801
|
+
// A fan-out item may itself ctx_spawn children. Without this drain a
|
|
802
|
+
// map/parallel item's spawn intents are silently orphaned (the
|
|
803
|
+
// post-run drain below only covers single-agent phases).
|
|
804
|
+
const spawned = drainPendingSpawns(ctxDir, itemNid);
|
|
805
|
+
if (spawned.length > 0 && !deps.signal?.aborted && !overBudget(state).over) {
|
|
806
|
+
const child = await runSpawnedChildren(spawned, ctxDir, itemNid, phase, deps, state, run);
|
|
807
|
+
if (child.reports) r.output = `${r.output ?? ""}${child.reports}`;
|
|
808
|
+
if (child.usage) {
|
|
809
|
+
r.usage = aggregateUsage([r.usage ?? emptyUsage(), child.usage]);
|
|
810
|
+
liveUsages[idx] = r.usage;
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
} catch { /* fail-open */ }
|
|
814
|
+
}
|
|
550
815
|
refresh();
|
|
551
816
|
return r;
|
|
552
817
|
});
|
|
@@ -606,11 +871,32 @@ async function executePhase(
|
|
|
606
871
|
const cached = cachedPhase(cc, inputHash);
|
|
607
872
|
if (cached) return cached;
|
|
608
873
|
|
|
609
|
-
const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress));
|
|
874
|
+
const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress), nodeIdFor());
|
|
610
875
|
const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
|
|
611
876
|
if (refWarning) ps.warnings = [...(ps.warnings ?? []), refWarning];
|
|
612
877
|
if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
|
|
613
878
|
|
|
879
|
+
// Shared Context Tree: register this node, mark its terminal status, and
|
|
880
|
+
// pick up any ctx_spawn intents the subagent queued. The spawned child
|
|
881
|
+
// tasks run here (supervision loop) and their reports are folded into this
|
|
882
|
+
// phase's output so the parent — and downstream phases — can see them.
|
|
883
|
+
if (ctxDir) {
|
|
884
|
+
try {
|
|
885
|
+
const nid = nodeIdFor();
|
|
886
|
+
registerNode(ctxDir, nid, phase.id, undefined, ps.status === "failed" ? "failed" : "done");
|
|
887
|
+
const spawned = drainPendingSpawns(ctxDir, nid);
|
|
888
|
+
if (spawned.length > 0 && !deps.signal?.aborted && !overBudget(state).over) {
|
|
889
|
+
const child = await runSpawnedChildren(spawned, ctxDir, nid, phase, deps, state, run);
|
|
890
|
+
if (child.reports) ps.output = `${ps.output ?? ""}${child.reports}`;
|
|
891
|
+
// Fold spawned spend into this phase's usage so the run-wide budget
|
|
892
|
+
// guard accounts for it (verdict Issue 2).
|
|
893
|
+
ps.usage = aggregateUsage([ps.usage ?? emptyUsage(), child.usage]);
|
|
894
|
+
}
|
|
895
|
+
} catch {
|
|
896
|
+
/* fail-open: context-tree bookkeeping must never sink the phase */
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
|
|
614
900
|
// onBlock:retry — re-execute upstream + gate until pass or max attempts.
|
|
615
901
|
if (type === "gate" && ps.gate?.verdict === "block") {
|
|
616
902
|
const onBlockV: string = phase.onBlock ?? "halt";
|
|
@@ -624,10 +910,16 @@ async function executePhase(
|
|
|
624
910
|
// H2: cap nested retry depth to prevent exponential re-execution
|
|
625
911
|
// when a gate's upstream dependency is itself a gate with onBlock:retry
|
|
626
912
|
if (_retryDepth < MAX_RETRY_DEPTH) {
|
|
913
|
+
// Re-executing upstream deps must NOT inherit this gate's isolated
|
|
914
|
+
// workspace — each dep resolves its own cwd. Strip the override.
|
|
915
|
+
// NOTE: we intentionally pass the gate's `prior` (not the dep's own
|
|
916
|
+
// completed state) so the dep does NOT cache-hit and actually
|
|
917
|
+
// RE-RUNS — re-running upstream is the whole point of onBlock:retry.
|
|
918
|
+
const { _cwdOverride: _dropGateWs, ...depsForUpstream } = deps;
|
|
627
919
|
for (const depId of phase.dependsOn ?? []) {
|
|
628
920
|
const d = state.def.phases.find((p) => p.id === depId);
|
|
629
921
|
if (!d) continue;
|
|
630
|
-
const dPs = await executePhase(d, state,
|
|
922
|
+
const dPs = await executePhase(d, state, depsForUpstream, prior, emitProgress, _retryDepth + 1);
|
|
631
923
|
state.phases[depId] = dPs;
|
|
632
924
|
}
|
|
633
925
|
}
|
|
@@ -814,7 +1106,7 @@ async function executePhase(
|
|
|
814
1106
|
}
|
|
815
1107
|
// Validate with `dynamic` hardening (breadth caps + cwd containment) since
|
|
816
1108
|
// this content is LLM-authored / untrusted. cwd anchors containment checks.
|
|
817
|
-
const dynCwd =
|
|
1109
|
+
const dynCwd = effCwd;
|
|
818
1110
|
const v = validateTaskflow(wrapped, { dynamic: true, cwd: dynCwd });
|
|
819
1111
|
if (!v.ok) {
|
|
820
1112
|
return defFailOpen(`inline def failed validation: ${v.errors.join("; ")}`);
|
|
@@ -873,7 +1165,7 @@ async function executePhase(
|
|
|
873
1165
|
phases: {},
|
|
874
1166
|
createdAt: Date.now(),
|
|
875
1167
|
updatedAt: Date.now(),
|
|
876
|
-
cwd:
|
|
1168
|
+
cwd: effCwd,
|
|
877
1169
|
};
|
|
878
1170
|
// B8: pass this flow phase's preRead content to every sub-flow phase by
|
|
879
1171
|
// wrapping runTask — sub-phase preRead still gets prepended on top of it.
|
|
@@ -885,9 +1177,14 @@ async function executePhase(
|
|
|
885
1177
|
// Override deps.cwd with the flow phase's own cwd so that sub-flow
|
|
886
1178
|
// phases without an explicit cwd derive their subagents from the
|
|
887
1179
|
// flow's cwd (not the caller's cwd).
|
|
888
|
-
cwd:
|
|
1180
|
+
cwd: effCwd,
|
|
1181
|
+
// The workspace override applies only to THIS flow phase, not to the
|
|
1182
|
+
// nested sub-phases (each resolves its own cwd). Clear it so the child
|
|
1183
|
+
// phases don't all inherit this phase's isolated dir as an override.
|
|
1184
|
+
_cwdOverride: undefined,
|
|
889
1185
|
runTask: subRunTask,
|
|
890
1186
|
_stack: hasDef ? [...stack, state.flowName, recursionKey] : [...stack, state.flowName],
|
|
1187
|
+
_ctxDir: ctxDir ?? deps._ctxDir,
|
|
891
1188
|
persist: undefined,
|
|
892
1189
|
onProgress: () => {
|
|
893
1190
|
if (live) {
|