pi-taskflow 0.0.21 → 0.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +91 -0
- package/README.md +174 -46
- package/extensions/approval-view.ts +11 -57
- package/extensions/context-store.ts +447 -0
- package/extensions/index.ts +142 -3
- package/extensions/interpolate.ts +18 -7
- package/extensions/runner.ts +96 -3
- package/extensions/runs-view.ts +69 -3
- package/extensions/runtime.ts +331 -16
- package/extensions/schema.ts +34 -6
- package/extensions/store.ts +17 -4
- package/extensions/workspace.ts +206 -0
- package/package.json +6 -2
- package/skills/taskflow/SKILL.md +104 -0
package/extensions/runtime.ts
CHANGED
|
@@ -18,8 +18,10 @@ import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, r
|
|
|
18
18
|
import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
|
|
19
19
|
import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, MAX_DYNAMIC_MAP_ITEMS, MAX_DYNAMIC_NESTING, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode, validateTaskflow } from "./schema.ts";
|
|
20
20
|
import { verifyTaskflow } from "./verify.ts";
|
|
21
|
-
import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
|
|
21
|
+
import { hashInput, newRunId, type PhaseState, type RunState, runsDir } from "./store.ts";
|
|
22
22
|
import { CacheStore, resolveFingerprint } from "./cache.ts";
|
|
23
|
+
import { ctxDirFor, drainPendingSpawns, initCtxDir, registerNode, setNodeStatus, type SpawnAssignment } from "./context-store.ts";
|
|
24
|
+
import { allocateWorkspace, isWorkspaceKeyword, type Workspace } from "./workspace.ts";
|
|
23
25
|
|
|
24
26
|
/** A human-in-the-loop approval request raised by an `approval` phase. */
|
|
25
27
|
export interface ApprovalRequest {
|
|
@@ -55,6 +57,10 @@ export interface RuntimeDeps {
|
|
|
55
57
|
cacheStore?: CacheStore;
|
|
56
58
|
/** Internal: sub-flow call stack, for recursion detection. */
|
|
57
59
|
_stack?: string[];
|
|
60
|
+
/** Internal: pre-resolved Shared Context Tree dir for this run (sub-flows inherit the parent's). */
|
|
61
|
+
_ctxDir?: string;
|
|
62
|
+
/** Internal: an isolated workspace dir override for the current phase (worktree isolation). */
|
|
63
|
+
_cwdOverride?: string;
|
|
58
64
|
}
|
|
59
65
|
|
|
60
66
|
export interface RuntimeResult {
|
|
@@ -87,8 +93,7 @@ function buildInterpolationContext(
|
|
|
87
93
|
return { args: state.args, steps, previousOutput, locals };
|
|
88
94
|
}
|
|
89
95
|
|
|
90
|
-
function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJson: boolean): PhaseState {
|
|
91
|
-
const failed = isFailed(r);
|
|
96
|
+
function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJson: boolean): PhaseState { const failed = isFailed(r);
|
|
92
97
|
const attempts = attemptsOf(r);
|
|
93
98
|
// For failed phases, embed the error info in the output so downstream
|
|
94
99
|
// phases (and the user) can see what went wrong. The raw r.output is
|
|
@@ -110,6 +115,22 @@ function resultToPhaseState(id: string, r: RunResult, inputHash: string, parseJs
|
|
|
110
115
|
};
|
|
111
116
|
}
|
|
112
117
|
|
|
118
|
+
/**
|
|
119
|
+
* Surface unresolved interpolation placeholders (the `missing[]` from
|
|
120
|
+
* `interpolate()`). Without this they are silently left intact in the task —
|
|
121
|
+
* the doc comment in interpolate.ts promises "a recorded warning". We both
|
|
122
|
+
* log to the console and return a string to attach to PhaseState.warnings so
|
|
123
|
+
* the warning is persisted in the run record and visible in `/tf runs`.
|
|
124
|
+
* Returns undefined when nothing is missing.
|
|
125
|
+
*/
|
|
126
|
+
function warnUnresolvedRefs(phaseId: string, missing: string[]): string | undefined {
|
|
127
|
+
if (!missing.length) return undefined;
|
|
128
|
+
const unique = Array.from(new Set(missing));
|
|
129
|
+
const msg = `unresolved refs in task: ${unique.map((m) => `{${m}}`).join(", ")} — left intact (check dependsOn / placeholder spelling)`;
|
|
130
|
+
console.warn(`[taskflow] phase '${phaseId}': ${msg}`);
|
|
131
|
+
return msg;
|
|
132
|
+
}
|
|
133
|
+
|
|
113
134
|
/** Attempts recorded by the retry wrapper (defaults to 1). */
|
|
114
135
|
function attemptsOf(r: RunResult): number {
|
|
115
136
|
const a = r.attempts;
|
|
@@ -357,7 +378,179 @@ async function resolvePhaseContext(
|
|
|
357
378
|
return result;
|
|
358
379
|
}
|
|
359
380
|
|
|
381
|
+
/**
|
|
382
|
+
* Supervision loop: run the child tasks a parent node queued via ctx_spawn.
|
|
383
|
+
* Each child is an isolated subagent registered under the parent in the tree.
|
|
384
|
+
* Children themselves may share context (and recursively spawn, up to the depth
|
|
385
|
+
* cap enforced inside the ctx_spawn tool). Returns a markdown block of the
|
|
386
|
+
* children's reports to fold into the parent phase's output, or undefined.
|
|
387
|
+
*
|
|
388
|
+
* Fail-open: a child failure is recorded in its report text but never throws.
|
|
389
|
+
*/
|
|
390
|
+
/** What a spawned child contributed: its folded report text + the tokens it burned. */
|
|
391
|
+
interface SpawnedResult {
|
|
392
|
+
reports: string | undefined;
|
|
393
|
+
usage: UsageStats;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
/**
|
|
397
|
+
* Run an inline sub-flow queued via `ctx_spawn({subflow})`. Reuses the SAME
|
|
398
|
+
* validation + execution machinery as a `flow{def}` phase (normalizeInlineDef →
|
|
399
|
+
* validateTaskflow(dynamic) → verifyTaskflow → nested executeTaskflow), so a
|
|
400
|
+
* spawned DAG is held to the same safety bar as an author-written one.
|
|
401
|
+
*
|
|
402
|
+
* Crucially it extends `deps._stack` with a `def:spawn-<childNodeId>` frame so
|
|
403
|
+
* the existing inline-nesting guard counts spawn-subflows AND flow{def} on the
|
|
404
|
+
* SAME counter — neither axis can independently reach MAX_DYNAMIC_NESTING and
|
|
405
|
+
* multiply with the other (verdict Issue 1). Failures are fail-open: a bad
|
|
406
|
+
* subflow returns a diagnostic string, never throws.
|
|
407
|
+
*/
|
|
408
|
+
/**
|
|
409
|
+
* The effective working directory for a phase's execution. Honours an allocated
|
|
410
|
+
* workspace override (`_cwdOverride`, set by the executePhase wrapper for
|
|
411
|
+
* isolated `temp`/`dedicated`/`worktree` cwds) and never passes a reserved
|
|
412
|
+
* keyword through to a runner (keywords are resolved upstream into a real dir).
|
|
413
|
+
* Single source of truth — do not inline this formula (divergence here caused
|
|
414
|
+
* two isolation-leak bugs in the 0.0.23 review).
|
|
415
|
+
*/
|
|
416
|
+
function resolveEffCwd(deps: RuntimeDeps, phase: Phase): string {
|
|
417
|
+
return deps._cwdOverride ?? (isWorkspaceKeyword(phase.cwd) ? deps.cwd : phase.cwd ?? deps.cwd);
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
async function runInlineSubflow(
|
|
421
|
+
subflowSpec: unknown,
|
|
422
|
+
defaultAgent: string | undefined,
|
|
423
|
+
childNodeId: string,
|
|
424
|
+
phase: Phase,
|
|
425
|
+
deps: RuntimeDeps,
|
|
426
|
+
state: RunState,
|
|
427
|
+
): Promise<{ output: string; usage: UsageStats }> {
|
|
428
|
+
const stack = deps._stack ?? [];
|
|
429
|
+
const inlineDepth = stack.filter((s) => s.startsWith("def:")).length;
|
|
430
|
+
if (inlineDepth >= MAX_DYNAMIC_NESTING) {
|
|
431
|
+
return { output: `(spawned subflow rejected: nesting exceeded MAX_DYNAMIC_NESTING (${MAX_DYNAMIC_NESTING}))`, usage: emptyUsage() };
|
|
432
|
+
}
|
|
433
|
+
const wrapped = normalizeInlineDef(subflowSpec, childNodeId);
|
|
434
|
+
if (!wrapped) return { output: "(spawned subflow is not a Taskflow / phases array)", usage: emptyUsage() };
|
|
435
|
+
if (wrapped.phases.length === 0) return { output: "(spawned subflow had zero phases — no-op)", usage: emptyUsage() };
|
|
436
|
+
// Inner phases without their own agent inherit the assignment's defaultAgent.
|
|
437
|
+
if (defaultAgent) {
|
|
438
|
+
for (const p of wrapped.phases as Phase[]) if (!p.agent) p.agent = defaultAgent;
|
|
439
|
+
}
|
|
440
|
+
const spawnCwd = resolveEffCwd(deps, phase);
|
|
441
|
+
const dynCwd = spawnCwd;
|
|
442
|
+
const v = validateTaskflow(wrapped, { dynamic: true, cwd: dynCwd });
|
|
443
|
+
if (!v.ok) return { output: `(spawned subflow failed validation: ${v.errors.join("; ")})`, usage: emptyUsage() };
|
|
444
|
+
const ver = verifyTaskflow({ name: wrapped.name, phases: wrapped.phases as Phase[], budget: wrapped.budget, concurrency: wrapped.concurrency });
|
|
445
|
+
if (!ver.ok) {
|
|
446
|
+
const errs = ver.issues.filter((i) => i.severity === "error").map((i) => i.message);
|
|
447
|
+
return { output: `(spawned subflow failed verification: ${errs.join("; ")})`, usage: emptyUsage() };
|
|
448
|
+
}
|
|
449
|
+
const subDef = clampSubFlowBudget(wrapped, state.def.budget);
|
|
450
|
+
const subState: RunState = {
|
|
451
|
+
runId: newRunId(subDef.name),
|
|
452
|
+
flowName: subDef.name,
|
|
453
|
+
def: subDef,
|
|
454
|
+
args: resolveArgs(subDef, {}),
|
|
455
|
+
status: "running",
|
|
456
|
+
phases: {},
|
|
457
|
+
createdAt: Date.now(),
|
|
458
|
+
updatedAt: Date.now(),
|
|
459
|
+
cwd: dynCwd,
|
|
460
|
+
};
|
|
461
|
+
try {
|
|
462
|
+
const subResult = await executeTaskflow(subState, {
|
|
463
|
+
...deps,
|
|
464
|
+
cwd: dynCwd,
|
|
465
|
+
// The parent phase's isolated workspace (if any) applies only to the
|
|
466
|
+
// parent — each spawned sub-phase resolves its own cwd. Clear the
|
|
467
|
+
// override so the whole subflow doesn't inherit the parent's dir
|
|
468
|
+
// (mirrors the `flow` phase handler discipline).
|
|
469
|
+
_cwdOverride: undefined,
|
|
470
|
+
// Don't let spawned sub-phases persist the parent's run state.
|
|
471
|
+
persist: undefined,
|
|
472
|
+
// Unify the nesting counter across both recursion axes (verdict Issue 1).
|
|
473
|
+
_stack: [...stack, state.flowName, `def:spawn-${childNodeId}`],
|
|
474
|
+
_ctxDir: deps._ctxDir,
|
|
475
|
+
onProgress: undefined,
|
|
476
|
+
});
|
|
477
|
+
// Sum every sub-phase's usage so the parent's budget guard sees spawn spend
|
|
478
|
+
// (verdict Issue 2).
|
|
479
|
+
const usage = aggregateUsage(Object.values(subResult.state.phases).map((p) => p.usage ?? emptyUsage()));
|
|
480
|
+
return { output: subResult.finalOutput ?? "", usage };
|
|
481
|
+
} catch (e) {
|
|
482
|
+
return { output: `(spawned subflow failed: ${e instanceof Error ? e.message : String(e)})`, usage: emptyUsage() };
|
|
483
|
+
}
|
|
484
|
+
}
|
|
360
485
|
|
|
486
|
+
async function runSpawnedChildren(
|
|
487
|
+
assignments: SpawnAssignment[],
|
|
488
|
+
ctxDir: string,
|
|
489
|
+
parentNodeId: string,
|
|
490
|
+
phase: Phase,
|
|
491
|
+
deps: RuntimeDeps,
|
|
492
|
+
state: RunState,
|
|
493
|
+
run: typeof runAgentTask,
|
|
494
|
+
): Promise<SpawnedResult> {
|
|
495
|
+
const capped = assignments.slice(0, MAX_DYNAMIC_MAP_ITEMS);
|
|
496
|
+
const lines: string[] = [];
|
|
497
|
+
const usages: UsageStats[] = [];
|
|
498
|
+
// Effective cwd for flat spawned tasks: honour a workspace override and never
|
|
499
|
+
// pass a reserved keyword through to the runner.
|
|
500
|
+
const spawnCwd = resolveEffCwd(deps, phase);
|
|
501
|
+
let idx = 0;
|
|
502
|
+
for (const a of capped) {
|
|
503
|
+
if (deps.signal?.aborted || overBudget(state).over) break;
|
|
504
|
+
idx++;
|
|
505
|
+
const childNodeId = `${parentNodeId}--c${idx}`.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
506
|
+
const isSubflow = a.subflow !== undefined && a.subflow !== null;
|
|
507
|
+
const agentName = isSubflow ? "(subflow)" : resolveAgent(a.agent ?? phase.agent, deps, state);
|
|
508
|
+
registerNode(ctxDir, childNodeId, `${phase.id}:spawn`, parentNodeId, "running");
|
|
509
|
+
let out = "";
|
|
510
|
+
try {
|
|
511
|
+
if (isSubflow) {
|
|
512
|
+
const sub = await runInlineSubflow(a.subflow, a.defaultAgent ?? phase.agent, childNodeId, phase, deps, state);
|
|
513
|
+
out = sub.output;
|
|
514
|
+
usages.push(sub.usage);
|
|
515
|
+
setNodeStatus(ctxDir, childNodeId, "done");
|
|
516
|
+
} else {
|
|
517
|
+
const r = await run(
|
|
518
|
+
spawnCwd,
|
|
519
|
+
deps.agents,
|
|
520
|
+
agentName,
|
|
521
|
+
a.task ?? "",
|
|
522
|
+
{ model: phase.model, thinking: phase.thinking, tools: phase.tools, cwd: spawnCwd, signal: deps.signal, ctxDir, nodeId: childNodeId },
|
|
523
|
+
deps.globalThinking,
|
|
524
|
+
);
|
|
525
|
+
out = r.output ?? "";
|
|
526
|
+
if (r.usage) usages.push(r.usage);
|
|
527
|
+
setNodeStatus(ctxDir, childNodeId, isFailed(r) ? "failed" : "done");
|
|
528
|
+
// A child may itself have queued spawns — recurse (depth-capped by the tool).
|
|
529
|
+
const grand = drainPendingSpawns(ctxDir, childNodeId);
|
|
530
|
+
if (grand.length > 0 && !deps.signal?.aborted && !overBudget(state).over) {
|
|
531
|
+
const rec = await runSpawnedChildren(grand, ctxDir, childNodeId, phase, deps, state, run);
|
|
532
|
+
if (rec.reports) out += rec.reports;
|
|
533
|
+
usages.push(rec.usage);
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
} catch (e) {
|
|
537
|
+
setNodeStatus(ctxDir, childNodeId, "failed");
|
|
538
|
+
out = `(spawned child failed: ${e instanceof Error ? e.message : String(e)})`;
|
|
539
|
+
}
|
|
540
|
+
lines.push(`### spawned child ${idx} (${agentName})\n${out}`);
|
|
541
|
+
}
|
|
542
|
+
const usage = aggregateUsage(usages);
|
|
543
|
+
if (lines.length === 0) return { reports: undefined, usage };
|
|
544
|
+
return { reports: `\n\n<!-- ctx_spawn: ${lines.length} child report(s) -->\n${lines.join("\n\n")}`, usage };
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
/**
|
|
549
|
+
* Public phase executor. Resolves an isolated workspace when `phase.cwd` is a
|
|
550
|
+
* reserved keyword (`temp`/`dedicated`/`worktree`), runs the phase against it,
|
|
551
|
+
* and tears it down afterwards. All allocation is fail-open: a failed allocation
|
|
552
|
+
* degrades to the base cwd so a phase never fails to run because of isolation.
|
|
553
|
+
*/
|
|
361
554
|
async function executePhase(
|
|
362
555
|
phase: Phase,
|
|
363
556
|
state: RunState,
|
|
@@ -365,11 +558,75 @@ async function executePhase(
|
|
|
365
558
|
prior: PhaseState | undefined,
|
|
366
559
|
emitProgress: () => void,
|
|
367
560
|
_retryDepth = 0,
|
|
561
|
+
): Promise<PhaseState> {
|
|
562
|
+
// Non-keyword cwd (or none): no workspace lifecycle — run directly.
|
|
563
|
+
if (!isWorkspaceKeyword(phase.cwd)) {
|
|
564
|
+
return executePhaseInner(phase, state, deps, prior, emitProgress, _retryDepth);
|
|
565
|
+
}
|
|
566
|
+
let ws: Workspace | undefined;
|
|
567
|
+
try {
|
|
568
|
+
ws = allocateWorkspace(phase.cwd, {
|
|
569
|
+
baseCwd: deps.cwd,
|
|
570
|
+
runId: state.runId,
|
|
571
|
+
phaseId: phase.id,
|
|
572
|
+
runsRoot: runsDir(deps.cwd),
|
|
573
|
+
});
|
|
574
|
+
} catch {
|
|
575
|
+
ws = undefined; // fail-open: run in the base cwd
|
|
576
|
+
}
|
|
577
|
+
const innerDeps: RuntimeDeps = ws ? { ...deps, _cwdOverride: ws.dir } : deps;
|
|
578
|
+
try {
|
|
579
|
+
const ps = await executePhaseInner(phase, state, innerDeps, prior, emitProgress, _retryDepth);
|
|
580
|
+
if (ws && (ws.kind !== "inherited" || ws.note)) {
|
|
581
|
+
const tag = ws.kind === "inherited" ? "workspace" : `workspace:${ws.kind}`;
|
|
582
|
+
const msg = ws.note ? `${tag} — ${ws.note}` : `${tag} at ${ws.dir}`;
|
|
583
|
+
ps.warnings = [...(ps.warnings ?? []), msg];
|
|
584
|
+
}
|
|
585
|
+
return ps;
|
|
586
|
+
} finally {
|
|
587
|
+
try {
|
|
588
|
+
ws?.teardown();
|
|
589
|
+
} catch {
|
|
590
|
+
/* fail-open: teardown best-effort */
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
async function executePhaseInner(
|
|
596
|
+
phase: Phase,
|
|
597
|
+
state: RunState,
|
|
598
|
+
deps: RuntimeDeps,
|
|
599
|
+
prior: PhaseState | undefined,
|
|
600
|
+
emitProgress: () => void,
|
|
601
|
+
_retryDepth = 0,
|
|
368
602
|
): Promise<PhaseState> {
|
|
369
603
|
const type = phase.type ?? "agent";
|
|
370
604
|
const concurrency = phase.concurrency ?? state.def.concurrency ?? 8;
|
|
371
605
|
const previousOutput = lastCompletedOutput(state, phase);
|
|
372
606
|
const run = deps.runTask ?? runAgentTask;
|
|
607
|
+
// Effective working directory for THIS phase's execution. When an isolated
|
|
608
|
+
// workspace was allocated (worktree isolation), `_cwdOverride` is its dir and
|
|
609
|
+
// takes precedence; otherwise a literal `phase.cwd` (non-keyword) or the run
|
|
610
|
+
// cwd is used. Keyword cwds are never passed to a runner (they're resolved
|
|
611
|
+
// upstream in the executePhase wrapper).
|
|
612
|
+
const effCwd = resolveEffCwd(deps, phase);
|
|
613
|
+
|
|
614
|
+
// Shared Context Tree opt-in (per-phase or flow-wide). When on, the subagent
|
|
615
|
+
// gets ctx_* tools backed by a per-run blackboard directory. nodeId is
|
|
616
|
+
// deterministic per phase so a resume re-uses the same tree node (idempotent
|
|
617
|
+
// upsert in registerNode prevents duplication). Sub-items (map/parallel) get
|
|
618
|
+
// a suffixed nodeId so concurrent siblings write to distinct findings files.
|
|
619
|
+
const sharing = (phase.shareContext ?? state.def.contextSharing) === true;
|
|
620
|
+
let ctxDir: string | undefined;
|
|
621
|
+
if (sharing) {
|
|
622
|
+
try {
|
|
623
|
+
ctxDir = deps._ctxDir ?? initCtxDir(ctxDirFor(runsDir(deps.cwd), state.runId));
|
|
624
|
+
} catch {
|
|
625
|
+
ctxDir = undefined; // fail-open: degrade to no sharing
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
const nodeIdFor = (suffix?: string): string =>
|
|
629
|
+
`${phase.id}${suffix ? `-${suffix}` : ""}`.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
373
630
|
|
|
374
631
|
// Resolve context pre-read files once, before any type branching.
|
|
375
632
|
// The content is prepended to every task so the subagent never spends
|
|
@@ -384,7 +641,7 @@ async function executePhase(
|
|
|
384
641
|
const cc: PhaseCacheCtx = {
|
|
385
642
|
scope: cacheScope,
|
|
386
643
|
ttlMs: phase.cache?.ttl ? (parseTtlMs(phase.cache.ttl) ?? undefined) : undefined,
|
|
387
|
-
fingerprint: cacheScope === "cross-run" ? resolveFingerprint(phase.cache?.fingerprint,
|
|
644
|
+
fingerprint: cacheScope === "cross-run" ? resolveFingerprint(phase.cache?.fingerprint, effCwd) : "",
|
|
388
645
|
store: deps.cacheStore ?? new CacheStore(deps.cwd),
|
|
389
646
|
prior,
|
|
390
647
|
phaseId: phase.id,
|
|
@@ -395,9 +652,9 @@ async function executePhase(
|
|
|
395
652
|
preRead,
|
|
396
653
|
};
|
|
397
654
|
|
|
398
|
-
const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
|
|
655
|
+
const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void, ctxNodeId?: string) =>
|
|
399
656
|
run(
|
|
400
|
-
|
|
657
|
+
effCwd,
|
|
401
658
|
deps.agents,
|
|
402
659
|
agentName,
|
|
403
660
|
task,
|
|
@@ -405,9 +662,11 @@ async function executePhase(
|
|
|
405
662
|
model: phase.model,
|
|
406
663
|
thinking: phase.thinking,
|
|
407
664
|
tools: phase.tools,
|
|
408
|
-
cwd:
|
|
665
|
+
cwd: effCwd,
|
|
409
666
|
signal: deps.signal,
|
|
410
667
|
onLive,
|
|
668
|
+
ctxDir: ctxDir,
|
|
669
|
+
nodeId: ctxDir ? ctxNodeId : undefined,
|
|
411
670
|
},
|
|
412
671
|
deps.globalThinking,
|
|
413
672
|
);
|
|
@@ -424,7 +683,7 @@ async function executePhase(
|
|
|
424
683
|
const DEFAULT_TRANSIENT_RETRIES = 3;
|
|
425
684
|
const DEFAULT_TRANSIENT_BACKOFF_MS = 2000;
|
|
426
685
|
const DEFAULT_TRANSIENT_FACTOR = 2;
|
|
427
|
-
const runOne = async (agentName: string, task: string, onLive?: (l: LiveUpdate) => void): Promise<RunResult> => {
|
|
686
|
+
const runOne = async (agentName: string, task: string, onLive?: (l: LiveUpdate) => void, ctxNodeId?: string): Promise<RunResult> => {
|
|
428
687
|
const explicitMax = Math.max(1, 1 + Math.max(0, Math.floor(retry?.max ?? 0)));
|
|
429
688
|
// Allow enough attempts to cover whichever policy applies on a given attempt.
|
|
430
689
|
const maxAttempts = Math.max(explicitMax, 1 + DEFAULT_TRANSIENT_RETRIES);
|
|
@@ -432,7 +691,7 @@ async function executePhase(
|
|
|
432
691
|
let last: RunResult | undefined;
|
|
433
692
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
434
693
|
if (deps.signal?.aborted) break;
|
|
435
|
-
last = await baseRun(agentName, task, onLive);
|
|
694
|
+
last = await baseRun(agentName, task, onLive, ctxNodeId);
|
|
436
695
|
usages.push(last.usage);
|
|
437
696
|
// B6: aggregate and surface cumulative usage before the retry decision,
|
|
438
697
|
// so the TUI / budget guard see the in-flight spend on every attempt.
|
|
@@ -522,16 +781,37 @@ async function executePhase(
|
|
|
522
781
|
}
|
|
523
782
|
running++;
|
|
524
783
|
refresh();
|
|
784
|
+
if (ctxDir) {
|
|
785
|
+
try { registerNode(ctxDir, nodeIdFor(String(idx)), phase.id, undefined, "running"); } catch { /* fail-open */ }
|
|
786
|
+
}
|
|
525
787
|
const r = await runOne(it.agent, it.task, (l) => {
|
|
526
788
|
liveUsages[idx] = l.usage;
|
|
527
789
|
if (l.text) latestText = l.text;
|
|
528
790
|
if (l.model) latestModel = l.model;
|
|
529
791
|
refresh();
|
|
530
|
-
});
|
|
792
|
+
}, ctxDir ? nodeIdFor(String(idx)) : undefined);
|
|
531
793
|
running--;
|
|
532
794
|
done++;
|
|
533
795
|
if (isFailed(r)) failed++;
|
|
534
796
|
liveUsages[idx] = r.usage;
|
|
797
|
+
if (ctxDir) {
|
|
798
|
+
try {
|
|
799
|
+
const itemNid = nodeIdFor(String(idx));
|
|
800
|
+
setNodeStatus(ctxDir, itemNid, isFailed(r) ? "failed" : "done");
|
|
801
|
+
// A fan-out item may itself ctx_spawn children. Without this drain a
|
|
802
|
+
// map/parallel item's spawn intents are silently orphaned (the
|
|
803
|
+
// post-run drain below only covers single-agent phases).
|
|
804
|
+
const spawned = drainPendingSpawns(ctxDir, itemNid);
|
|
805
|
+
if (spawned.length > 0 && !deps.signal?.aborted && !overBudget(state).over) {
|
|
806
|
+
const child = await runSpawnedChildren(spawned, ctxDir, itemNid, phase, deps, state, run);
|
|
807
|
+
if (child.reports) r.output = `${r.output ?? ""}${child.reports}`;
|
|
808
|
+
if (child.usage) {
|
|
809
|
+
r.usage = aggregateUsage([r.usage ?? emptyUsage(), child.usage]);
|
|
810
|
+
liveUsages[idx] = r.usage;
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
} catch { /* fail-open */ }
|
|
814
|
+
}
|
|
535
815
|
refresh();
|
|
536
816
|
return r;
|
|
537
817
|
});
|
|
@@ -582,17 +862,41 @@ async function executePhase(
|
|
|
582
862
|
return ps;
|
|
583
863
|
}
|
|
584
864
|
}
|
|
585
|
-
const
|
|
865
|
+
const interp = interpolate(phase.task ?? "", ctx);
|
|
866
|
+
const text = interp.text;
|
|
867
|
+
const refWarning = warnUnresolvedRefs(phase.id, interp.missing);
|
|
586
868
|
const fullTask = preRead + text;
|
|
587
869
|
const agentName = resolveAgent(phase.agent, deps, state);
|
|
588
870
|
const inputHash = cacheKey(cc, [phase.id, agentName, phase.model ?? "", fullTask]);
|
|
589
871
|
const cached = cachedPhase(cc, inputHash);
|
|
590
872
|
if (cached) return cached;
|
|
591
873
|
|
|
592
|
-
const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress));
|
|
874
|
+
const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress), nodeIdFor());
|
|
593
875
|
const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
|
|
876
|
+
if (refWarning) ps.warnings = [...(ps.warnings ?? []), refWarning];
|
|
594
877
|
if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
|
|
595
878
|
|
|
879
|
+
// Shared Context Tree: register this node, mark its terminal status, and
|
|
880
|
+
// pick up any ctx_spawn intents the subagent queued. The spawned child
|
|
881
|
+
// tasks run here (supervision loop) and their reports are folded into this
|
|
882
|
+
// phase's output so the parent — and downstream phases — can see them.
|
|
883
|
+
if (ctxDir) {
|
|
884
|
+
try {
|
|
885
|
+
const nid = nodeIdFor();
|
|
886
|
+
registerNode(ctxDir, nid, phase.id, undefined, ps.status === "failed" ? "failed" : "done");
|
|
887
|
+
const spawned = drainPendingSpawns(ctxDir, nid);
|
|
888
|
+
if (spawned.length > 0 && !deps.signal?.aborted && !overBudget(state).over) {
|
|
889
|
+
const child = await runSpawnedChildren(spawned, ctxDir, nid, phase, deps, state, run);
|
|
890
|
+
if (child.reports) ps.output = `${ps.output ?? ""}${child.reports}`;
|
|
891
|
+
// Fold spawned spend into this phase's usage so the run-wide budget
|
|
892
|
+
// guard accounts for it (verdict Issue 2).
|
|
893
|
+
ps.usage = aggregateUsage([ps.usage ?? emptyUsage(), child.usage]);
|
|
894
|
+
}
|
|
895
|
+
} catch {
|
|
896
|
+
/* fail-open: context-tree bookkeeping must never sink the phase */
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
|
|
596
900
|
// onBlock:retry — re-execute upstream + gate until pass or max attempts.
|
|
597
901
|
if (type === "gate" && ps.gate?.verdict === "block") {
|
|
598
902
|
const onBlockV: string = phase.onBlock ?? "halt";
|
|
@@ -606,10 +910,16 @@ async function executePhase(
|
|
|
606
910
|
// H2: cap nested retry depth to prevent exponential re-execution
|
|
607
911
|
// when a gate's upstream dependency is itself a gate with onBlock:retry
|
|
608
912
|
if (_retryDepth < MAX_RETRY_DEPTH) {
|
|
913
|
+
// Re-executing upstream deps must NOT inherit this gate's isolated
|
|
914
|
+
// workspace — each dep resolves its own cwd. Strip the override.
|
|
915
|
+
// NOTE: we intentionally pass the gate's `prior` (not the dep's own
|
|
916
|
+
// completed state) so the dep does NOT cache-hit and actually
|
|
917
|
+
// RE-RUNS — re-running upstream is the whole point of onBlock:retry.
|
|
918
|
+
const { _cwdOverride: _dropGateWs, ...depsForUpstream } = deps;
|
|
609
919
|
for (const depId of phase.dependsOn ?? []) {
|
|
610
920
|
const d = state.def.phases.find((p) => p.id === depId);
|
|
611
921
|
if (!d) continue;
|
|
612
|
-
const dPs = await executePhase(d, state,
|
|
922
|
+
const dPs = await executePhase(d, state, depsForUpstream, prior, emitProgress, _retryDepth + 1);
|
|
613
923
|
state.phases[depId] = dPs;
|
|
614
924
|
}
|
|
615
925
|
}
|
|
@@ -796,7 +1106,7 @@ async function executePhase(
|
|
|
796
1106
|
}
|
|
797
1107
|
// Validate with `dynamic` hardening (breadth caps + cwd containment) since
|
|
798
1108
|
// this content is LLM-authored / untrusted. cwd anchors containment checks.
|
|
799
|
-
const dynCwd =
|
|
1109
|
+
const dynCwd = effCwd;
|
|
800
1110
|
const v = validateTaskflow(wrapped, { dynamic: true, cwd: dynCwd });
|
|
801
1111
|
if (!v.ok) {
|
|
802
1112
|
return defFailOpen(`inline def failed validation: ${v.errors.join("; ")}`);
|
|
@@ -855,7 +1165,7 @@ async function executePhase(
|
|
|
855
1165
|
phases: {},
|
|
856
1166
|
createdAt: Date.now(),
|
|
857
1167
|
updatedAt: Date.now(),
|
|
858
|
-
cwd:
|
|
1168
|
+
cwd: effCwd,
|
|
859
1169
|
};
|
|
860
1170
|
// B8: pass this flow phase's preRead content to every sub-flow phase by
|
|
861
1171
|
// wrapping runTask — sub-phase preRead still gets prepended on top of it.
|
|
@@ -867,9 +1177,14 @@ async function executePhase(
|
|
|
867
1177
|
// Override deps.cwd with the flow phase's own cwd so that sub-flow
|
|
868
1178
|
// phases without an explicit cwd derive their subagents from the
|
|
869
1179
|
// flow's cwd (not the caller's cwd).
|
|
870
|
-
cwd:
|
|
1180
|
+
cwd: effCwd,
|
|
1181
|
+
// The workspace override applies only to THIS flow phase, not to the
|
|
1182
|
+
// nested sub-phases (each resolves its own cwd). Clear it so the child
|
|
1183
|
+
// phases don't all inherit this phase's isolated dir as an override.
|
|
1184
|
+
_cwdOverride: undefined,
|
|
871
1185
|
runTask: subRunTask,
|
|
872
1186
|
_stack: hasDef ? [...stack, state.flowName, recursionKey] : [...stack, state.flowName],
|
|
1187
|
+
_ctxDir: ctxDir ?? deps._ctxDir,
|
|
873
1188
|
persist: undefined,
|
|
874
1189
|
onProgress: () => {
|
|
875
1190
|
if (live) {
|
package/extensions/schema.ts
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import * as path from "node:path";
|
|
9
9
|
import { StringEnum } from "@earendil-works/pi-ai";
|
|
10
10
|
import { Type, type Static } from "typebox";
|
|
11
|
+
import { WORKSPACE_KEYWORDS } from "./workspace.ts";
|
|
11
12
|
|
|
12
13
|
// ---------------------------------------------------------------------------
|
|
13
14
|
// Phase types
|
|
@@ -208,7 +209,7 @@ const PhaseSchema = Type.Object(
|
|
|
208
209
|
model: Type.Optional(Type.String({ description: "Model override for this phase" })),
|
|
209
210
|
thinking: Type.Optional(Type.String({ description: "Thinking level override for this phase" })),
|
|
210
211
|
tools: Type.Optional(Type.Array(Type.String(), { description: "Restrict tools for this phase's agent" })),
|
|
211
|
-
cwd: Type.Optional(Type.String({ description: "Working directory for this phase's subagent" })),
|
|
212
|
+
cwd: Type.Optional(Type.String({ description: "Working directory for this phase's subagent. A literal path, or a reserved keyword: 'temp' (ephemeral dir, removed after the phase), 'dedicated' (persistent dir under the run state, kept), or 'worktree' (a git worktree on a throwaway branch, removed after the phase)." })),
|
|
212
213
|
final: Type.Optional(Type.Boolean({ description: "Mark this phase's output as the workflow result" })),
|
|
213
214
|
optional: Type.Optional(
|
|
214
215
|
Type.Boolean({ description: "If true, a failure does not abort the run", default: false }),
|
|
@@ -240,6 +241,12 @@ const PhaseSchema = Type.Object(
|
|
|
240
241
|
}),
|
|
241
242
|
),
|
|
242
243
|
cache: Type.Optional(CacheSchema),
|
|
244
|
+
shareContext: Type.Optional(
|
|
245
|
+
Type.Boolean({
|
|
246
|
+
description:
|
|
247
|
+
"Opt into the Shared Context Tree for this phase: the subagent gets ctx_read/ctx_write (a blackboard shared with siblings/ancestors, to avoid re-reading files) and ctx_report/ctx_spawn (report upward + queue child tasks the runtime picks up). Default false — existing flows are unaffected.",
|
|
248
|
+
}),
|
|
249
|
+
),
|
|
243
250
|
},
|
|
244
251
|
{ additionalProperties: false },
|
|
245
252
|
);
|
|
@@ -271,6 +278,12 @@ export const TaskflowSchema = Type.Object(
|
|
|
271
278
|
default: false,
|
|
272
279
|
}),
|
|
273
280
|
),
|
|
281
|
+
contextSharing: Type.Optional(
|
|
282
|
+
Type.Boolean({
|
|
283
|
+
description:
|
|
284
|
+
"Enable the Shared Context Tree for ALL phases in this flow (shorthand for setting shareContext on every phase). Default false.",
|
|
285
|
+
}),
|
|
286
|
+
),
|
|
274
287
|
phases: Type.Array(PhaseSchema, { minItems: 1, description: "Ordered phase definitions (DAG via dependsOn)" }),
|
|
275
288
|
},
|
|
276
289
|
{ additionalProperties: false },
|
|
@@ -485,11 +498,18 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
|
|
|
485
498
|
if (typeof p.concurrency === "number" && p.concurrency > MAX_DYNAMIC_CONCURRENCY) {
|
|
486
499
|
errors.push(`Dynamic sub-flow phase '${p.id}': concurrency too high (${p.concurrency}, max ${MAX_DYNAMIC_CONCURRENCY})`);
|
|
487
500
|
}
|
|
488
|
-
// cwd containment: a generated phase may not escape the run's cwd
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
501
|
+
// cwd containment: a generated phase may not escape the run's cwd, and
|
|
502
|
+
// may not request a reserved workspace keyword (temp/dedicated/worktree)
|
|
503
|
+
// — LLM-authored sub-flows must not allocate isolated dirs or git
|
|
504
|
+
// worktrees that mutate the repo. Only author-written flows may.
|
|
505
|
+
if (typeof p.cwd === "string") {
|
|
506
|
+
if (WORKSPACE_KEYWORDS.includes(p.cwd as (typeof WORKSPACE_KEYWORDS)[number])) {
|
|
507
|
+
errors.push(`Dynamic sub-flow phase '${p.id}': cwd '${p.cwd}' is a reserved workspace keyword not allowed in generated flows`);
|
|
508
|
+
} else if (root) {
|
|
509
|
+
const resolved = path.resolve(root, p.cwd);
|
|
510
|
+
if (resolved !== root && !resolved.startsWith(root + path.sep)) {
|
|
511
|
+
errors.push(`Dynamic sub-flow phase '${p.id}': cwd '${p.cwd}' escapes the run directory`);
|
|
512
|
+
}
|
|
493
513
|
}
|
|
494
514
|
}
|
|
495
515
|
}
|
|
@@ -508,6 +528,14 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
|
|
|
508
528
|
if (ids.has(p.id)) errors.push(`Duplicate phase id: ${p.id}`);
|
|
509
529
|
ids.add(p.id);
|
|
510
530
|
|
|
531
|
+
// When a phase opts into the Shared Context Tree, its id becomes a filesystem
|
|
532
|
+
// node id; restrict the charset so two ids can't sanitize to the same node
|
|
533
|
+
// (which would silently merge their blackboards). Non-sharing phases are
|
|
534
|
+
// unaffected (full backward compat).
|
|
535
|
+
if ((p.shareContext === true || flow.contextSharing === true) && !/^[A-Za-z0-9._-]+$/.test(p.id)) {
|
|
536
|
+
errors.push(`Phase '${p.id}': ids used with context sharing must match [A-Za-z0-9._-]+`);
|
|
537
|
+
}
|
|
538
|
+
|
|
511
539
|
const type = (p.type ?? "agent") as PhaseType;
|
|
512
540
|
if (!PHASE_TYPES.includes(type)) errors.push(`Phase '${p.id}': unknown type '${type}'`);
|
|
513
541
|
|
package/extensions/store.ts
CHANGED
|
@@ -190,13 +190,14 @@ function lockPathForRun(runsRoot: string, flowName: string, runId: string): stri
|
|
|
190
190
|
* Validate that a runId looks safe before performing any filesystem access.
|
|
191
191
|
* Legitimate runIds are produced by newRunId() and contain only [A-Za-z0-9._-].
|
|
192
192
|
*/
|
|
193
|
-
function validateRunId(runId: string): boolean {
|
|
193
|
+
export function validateRunId(runId: string): boolean {
|
|
194
194
|
return (
|
|
195
195
|
typeof runId === "string" &&
|
|
196
196
|
runId.length > 0 &&
|
|
197
197
|
!runId.includes("/") &&
|
|
198
198
|
!runId.includes("\\") &&
|
|
199
|
-
!runId.includes("\0")
|
|
199
|
+
!runId.includes("\0") &&
|
|
200
|
+
!runId.includes("..")
|
|
200
201
|
);
|
|
201
202
|
}
|
|
202
203
|
|
|
@@ -509,6 +510,16 @@ function cleanupTerminalRuns(
|
|
|
509
510
|
try { fs.unlinkSync(filePath); } catch { /* already gone */ }
|
|
510
511
|
// Also remove any orphaned lock file.
|
|
511
512
|
try { fs.unlinkSync(filePath + ".lock"); } catch { /* ignore */ }
|
|
513
|
+
// Also remove the per-run Shared Context Tree directory (C6). Orphaned
|
|
514
|
+
// ctx dirs would otherwise accumulate under runs/ctx/ over many runs.
|
|
515
|
+
try { fs.rmSync(path.join(runsRoot, "ctx", e.runId), { recursive: true, force: true }); } catch { /* ignore */ }
|
|
516
|
+
// Also remove the per-run isolated-workspace dir tree (cwd:"dedicated").
|
|
517
|
+
// `dedicated` workspaces are persistent by design; reclaim them once the
|
|
518
|
+
// run is pruned. The dir name uses the same sanitization as workspace.ts.
|
|
519
|
+
try {
|
|
520
|
+
const wsSeg = e.runId.replace(/[^A-Za-z0-9._-]/g, "_").replace(/^\.+/, "_").slice(0, 100) || "phase";
|
|
521
|
+
fs.rmSync(path.join(runsRoot, "ws", wsSeg), { recursive: true, force: true });
|
|
522
|
+
} catch { /* ignore */ }
|
|
512
523
|
}
|
|
513
524
|
|
|
514
525
|
// Remove empty flow subdirectories.
|
|
@@ -622,7 +633,7 @@ export function saveFlow(
|
|
|
622
633
|
|
|
623
634
|
// --- Run state ---
|
|
624
635
|
|
|
625
|
-
function runsDir(cwd: string): string {
|
|
636
|
+
export function runsDir(cwd: string): string {
|
|
626
637
|
// Safe non-null assertion: create=true guarantees a non-null return because
|
|
627
638
|
// findProjectFlowsDirInternal falls back to path.join(cwd, ".pi", "taskflows").
|
|
628
639
|
const projDir = findProjectFlowsDir(cwd, true)!;
|
|
@@ -636,7 +647,9 @@ export function cacheDir(cwd: string): string {
|
|
|
636
647
|
}
|
|
637
648
|
|
|
638
649
|
export function newRunId(flowName: string): string {
|
|
639
|
-
|
|
650
|
+
// Collapse to a safe charset AND fold any dot-runs so the result can never
|
|
651
|
+
// contain a '..' traversal token (validateRunId rejects '..').
|
|
652
|
+
const safe = flowName.replace(/[^\w.-]+/g, "_").replace(/\.{2,}/g, "_").slice(0, 24);
|
|
640
653
|
return `${safe}-${Date.now().toString(36)}-${crypto.randomBytes(3).toString("hex")}`;
|
|
641
654
|
}
|
|
642
655
|
|