pi-taskflow 0.0.22 → 0.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,8 +18,10 @@ import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, r
18
18
  import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
19
19
  import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, MAX_DYNAMIC_MAP_ITEMS, MAX_DYNAMIC_NESTING, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode, validateTaskflow } from "./schema.ts";
20
20
  import { verifyTaskflow } from "./verify.ts";
21
- import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
21
+ import { hashInput, newRunId, type PhaseState, type RunState, runsDir } from "./store.ts";
22
22
  import { CacheStore, resolveFingerprint } from "./cache.ts";
23
+ import { ctxDirFor, drainPendingSpawns, initCtxDir, registerNode, setNodeStatus, type SpawnAssignment } from "./context-store.ts";
24
+ import { allocateWorkspace, isWorkspaceKeyword, type Workspace } from "./workspace.ts";
23
25
 
24
26
  /** A human-in-the-loop approval request raised by an `approval` phase. */
25
27
  export interface ApprovalRequest {
@@ -55,6 +57,10 @@ export interface RuntimeDeps {
55
57
  cacheStore?: CacheStore;
56
58
  /** Internal: sub-flow call stack, for recursion detection. */
57
59
  _stack?: string[];
60
+ /** Internal: pre-resolved Shared Context Tree dir for this run (sub-flows inherit the parent's). */
61
+ _ctxDir?: string;
62
+ /** Internal: an isolated workspace dir override for the current phase (worktree isolation). */
63
+ _cwdOverride?: string;
58
64
  }
59
65
 
60
66
  export interface RuntimeResult {
@@ -372,7 +378,179 @@ async function resolvePhaseContext(
372
378
  return result;
373
379
  }
374
380
 
381
+ /**
382
+ * Supervision loop: run the child tasks a parent node queued via ctx_spawn.
383
+ * Each child is an isolated subagent registered under the parent in the tree.
384
+ * Children themselves may share context (and recursively spawn, up to the depth
385
+ * cap enforced inside the ctx_spawn tool). Returns a markdown block of the
386
+ * children's reports to fold into the parent phase's output, or undefined.
387
+ *
388
+ * Fail-open: a child failure is recorded in its report text but never throws.
389
+ */
390
+ /** What a spawned child contributed: its folded report text + the tokens it burned. */
391
+ interface SpawnedResult {
392
+ reports: string | undefined;
393
+ usage: UsageStats;
394
+ }
395
+
396
+ /**
397
+ * Run an inline sub-flow queued via `ctx_spawn({subflow})`. Reuses the SAME
398
+ * validation + execution machinery as a `flow{def}` phase (normalizeInlineDef →
399
+ * validateTaskflow(dynamic) → verifyTaskflow → nested executeTaskflow), so a
400
+ * spawned DAG is held to the same safety bar as an author-written one.
401
+ *
402
+ * Crucially it extends `deps._stack` with a `def:spawn-<childNodeId>` frame so
403
+ * the existing inline-nesting guard counts spawn-subflows AND flow{def} on the
404
+ * SAME counter — neither axis can independently reach MAX_DYNAMIC_NESTING and
405
+ * multiply with the other (verdict Issue 1). Failures are fail-open: a bad
406
+ * subflow returns a diagnostic string, never throws.
407
+ */
408
+ /**
409
+ * The effective working directory for a phase's execution. Honours an allocated
410
+ * workspace override (`_cwdOverride`, set by the executePhase wrapper for
411
+ * isolated `temp`/`dedicated`/`worktree` cwds) and never passes a reserved
412
+ * keyword through to a runner (keywords are resolved upstream into a real dir).
413
+ * Single source of truth — do not inline this formula (divergence here caused
414
+ * two isolation-leak bugs in the 0.0.23 review).
415
+ */
416
+ function resolveEffCwd(deps: RuntimeDeps, phase: Phase): string {
417
+ return deps._cwdOverride ?? (isWorkspaceKeyword(phase.cwd) ? deps.cwd : phase.cwd ?? deps.cwd);
418
+ }
419
+
420
+ async function runInlineSubflow(
421
+ subflowSpec: unknown,
422
+ defaultAgent: string | undefined,
423
+ childNodeId: string,
424
+ phase: Phase,
425
+ deps: RuntimeDeps,
426
+ state: RunState,
427
+ ): Promise<{ output: string; usage: UsageStats }> {
428
+ const stack = deps._stack ?? [];
429
+ const inlineDepth = stack.filter((s) => s.startsWith("def:")).length;
430
+ if (inlineDepth >= MAX_DYNAMIC_NESTING) {
431
+ return { output: `(spawned subflow rejected: nesting exceeded MAX_DYNAMIC_NESTING (${MAX_DYNAMIC_NESTING}))`, usage: emptyUsage() };
432
+ }
433
+ const wrapped = normalizeInlineDef(subflowSpec, childNodeId);
434
+ if (!wrapped) return { output: "(spawned subflow is not a Taskflow / phases array)", usage: emptyUsage() };
435
+ if (wrapped.phases.length === 0) return { output: "(spawned subflow had zero phases — no-op)", usage: emptyUsage() };
436
+ // Inner phases without their own agent inherit the assignment's defaultAgent.
437
+ if (defaultAgent) {
438
+ for (const p of wrapped.phases as Phase[]) if (!p.agent) p.agent = defaultAgent;
439
+ }
440
+ const spawnCwd = resolveEffCwd(deps, phase);
441
+ const dynCwd = spawnCwd;
442
+ const v = validateTaskflow(wrapped, { dynamic: true, cwd: dynCwd });
443
+ if (!v.ok) return { output: `(spawned subflow failed validation: ${v.errors.join("; ")})`, usage: emptyUsage() };
444
+ const ver = verifyTaskflow({ name: wrapped.name, phases: wrapped.phases as Phase[], budget: wrapped.budget, concurrency: wrapped.concurrency });
445
+ if (!ver.ok) {
446
+ const errs = ver.issues.filter((i) => i.severity === "error").map((i) => i.message);
447
+ return { output: `(spawned subflow failed verification: ${errs.join("; ")})`, usage: emptyUsage() };
448
+ }
449
+ const subDef = clampSubFlowBudget(wrapped, state.def.budget);
450
+ const subState: RunState = {
451
+ runId: newRunId(subDef.name),
452
+ flowName: subDef.name,
453
+ def: subDef,
454
+ args: resolveArgs(subDef, {}),
455
+ status: "running",
456
+ phases: {},
457
+ createdAt: Date.now(),
458
+ updatedAt: Date.now(),
459
+ cwd: dynCwd,
460
+ };
461
+ try {
462
+ const subResult = await executeTaskflow(subState, {
463
+ ...deps,
464
+ cwd: dynCwd,
465
+ // The parent phase's isolated workspace (if any) applies only to the
466
+ // parent — each spawned sub-phase resolves its own cwd. Clear the
467
+ // override so the whole subflow doesn't inherit the parent's dir
468
+ // (mirrors the `flow` phase handler discipline).
469
+ _cwdOverride: undefined,
470
+ // Don't let spawned sub-phases persist the parent's run state.
471
+ persist: undefined,
472
+ // Unify the nesting counter across both recursion axes (verdict Issue 1).
473
+ _stack: [...stack, state.flowName, `def:spawn-${childNodeId}`],
474
+ _ctxDir: deps._ctxDir,
475
+ onProgress: undefined,
476
+ });
477
+ // Sum every sub-phase's usage so the parent's budget guard sees spawn spend
478
+ // (verdict Issue 2).
479
+ const usage = aggregateUsage(Object.values(subResult.state.phases).map((p) => p.usage ?? emptyUsage()));
480
+ return { output: subResult.finalOutput ?? "", usage };
481
+ } catch (e) {
482
+ return { output: `(spawned subflow failed: ${e instanceof Error ? e.message : String(e)})`, usage: emptyUsage() };
483
+ }
484
+ }
375
485
 
486
+ async function runSpawnedChildren(
487
+ assignments: SpawnAssignment[],
488
+ ctxDir: string,
489
+ parentNodeId: string,
490
+ phase: Phase,
491
+ deps: RuntimeDeps,
492
+ state: RunState,
493
+ run: typeof runAgentTask,
494
+ ): Promise<SpawnedResult> {
495
+ const capped = assignments.slice(0, MAX_DYNAMIC_MAP_ITEMS);
496
+ const lines: string[] = [];
497
+ const usages: UsageStats[] = [];
498
+ // Effective cwd for flat spawned tasks: honour a workspace override and never
499
+ // pass a reserved keyword through to the runner.
500
+ const spawnCwd = resolveEffCwd(deps, phase);
501
+ let idx = 0;
502
+ for (const a of capped) {
503
+ if (deps.signal?.aborted || overBudget(state).over) break;
504
+ idx++;
505
+ const childNodeId = `${parentNodeId}--c${idx}`.replace(/[^A-Za-z0-9._-]+/g, "_");
506
+ const isSubflow = a.subflow !== undefined && a.subflow !== null;
507
+ const agentName = isSubflow ? "(subflow)" : resolveAgent(a.agent ?? phase.agent, deps, state);
508
+ registerNode(ctxDir, childNodeId, `${phase.id}:spawn`, parentNodeId, "running");
509
+ let out = "";
510
+ try {
511
+ if (isSubflow) {
512
+ const sub = await runInlineSubflow(a.subflow, a.defaultAgent ?? phase.agent, childNodeId, phase, deps, state);
513
+ out = sub.output;
514
+ usages.push(sub.usage);
515
+ setNodeStatus(ctxDir, childNodeId, "done");
516
+ } else {
517
+ const r = await run(
518
+ spawnCwd,
519
+ deps.agents,
520
+ agentName,
521
+ a.task ?? "",
522
+ { model: phase.model, thinking: phase.thinking, tools: phase.tools, cwd: spawnCwd, signal: deps.signal, ctxDir, nodeId: childNodeId },
523
+ deps.globalThinking,
524
+ );
525
+ out = r.output ?? "";
526
+ if (r.usage) usages.push(r.usage);
527
+ setNodeStatus(ctxDir, childNodeId, isFailed(r) ? "failed" : "done");
528
+ // A child may itself have queued spawns — recurse (depth-capped by the tool).
529
+ const grand = drainPendingSpawns(ctxDir, childNodeId);
530
+ if (grand.length > 0 && !deps.signal?.aborted && !overBudget(state).over) {
531
+ const rec = await runSpawnedChildren(grand, ctxDir, childNodeId, phase, deps, state, run);
532
+ if (rec.reports) out += rec.reports;
533
+ usages.push(rec.usage);
534
+ }
535
+ }
536
+ } catch (e) {
537
+ setNodeStatus(ctxDir, childNodeId, "failed");
538
+ out = `(spawned child failed: ${e instanceof Error ? e.message : String(e)})`;
539
+ }
540
+ lines.push(`### spawned child ${idx} (${agentName})\n${out}`);
541
+ }
542
+ const usage = aggregateUsage(usages);
543
+ if (lines.length === 0) return { reports: undefined, usage };
544
+ return { reports: `\n\n<!-- ctx_spawn: ${lines.length} child report(s) -->\n${lines.join("\n\n")}`, usage };
545
+ }
546
+
547
+
548
+ /**
549
+ * Public phase executor. Resolves an isolated workspace when `phase.cwd` is a
550
+ * reserved keyword (`temp`/`dedicated`/`worktree`), runs the phase against it,
551
+ * and tears it down afterwards. All allocation is fail-open: a failed allocation
552
+ * degrades to the base cwd so a phase never fails to run because of isolation.
553
+ */
376
554
  async function executePhase(
377
555
  phase: Phase,
378
556
  state: RunState,
@@ -380,11 +558,75 @@ async function executePhase(
380
558
  prior: PhaseState | undefined,
381
559
  emitProgress: () => void,
382
560
  _retryDepth = 0,
561
+ ): Promise<PhaseState> {
562
+ // Non-keyword cwd (or none): no workspace lifecycle — run directly.
563
+ if (!isWorkspaceKeyword(phase.cwd)) {
564
+ return executePhaseInner(phase, state, deps, prior, emitProgress, _retryDepth);
565
+ }
566
+ let ws: Workspace | undefined;
567
+ try {
568
+ ws = allocateWorkspace(phase.cwd, {
569
+ baseCwd: deps.cwd,
570
+ runId: state.runId,
571
+ phaseId: phase.id,
572
+ runsRoot: runsDir(deps.cwd),
573
+ });
574
+ } catch {
575
+ ws = undefined; // fail-open: run in the base cwd
576
+ }
577
+ const innerDeps: RuntimeDeps = ws ? { ...deps, _cwdOverride: ws.dir } : deps;
578
+ try {
579
+ const ps = await executePhaseInner(phase, state, innerDeps, prior, emitProgress, _retryDepth);
580
+ if (ws && (ws.kind !== "inherited" || ws.note)) {
581
+ const tag = ws.kind === "inherited" ? "workspace" : `workspace:${ws.kind}`;
582
+ const msg = ws.note ? `${tag} — ${ws.note}` : `${tag} at ${ws.dir}`;
583
+ ps.warnings = [...(ps.warnings ?? []), msg];
584
+ }
585
+ return ps;
586
+ } finally {
587
+ try {
588
+ ws?.teardown();
589
+ } catch {
590
+ /* fail-open: teardown best-effort */
591
+ }
592
+ }
593
+ }
594
+
595
+ async function executePhaseInner(
596
+ phase: Phase,
597
+ state: RunState,
598
+ deps: RuntimeDeps,
599
+ prior: PhaseState | undefined,
600
+ emitProgress: () => void,
601
+ _retryDepth = 0,
383
602
  ): Promise<PhaseState> {
384
603
  const type = phase.type ?? "agent";
385
604
  const concurrency = phase.concurrency ?? state.def.concurrency ?? 8;
386
605
  const previousOutput = lastCompletedOutput(state, phase);
387
606
  const run = deps.runTask ?? runAgentTask;
607
+ // Effective working directory for THIS phase's execution. When an isolated
608
+ // workspace was allocated (worktree isolation), `_cwdOverride` is its dir and
609
+ // takes precedence; otherwise a literal `phase.cwd` (non-keyword) or the run
610
+ // cwd is used. Keyword cwds are never passed to a runner (they're resolved
611
+ // upstream in the executePhase wrapper).
612
+ const effCwd = resolveEffCwd(deps, phase);
613
+
614
+ // Shared Context Tree opt-in (per-phase or flow-wide). When on, the subagent
615
+ // gets ctx_* tools backed by a per-run blackboard directory. nodeId is
616
+ // deterministic per phase so a resume re-uses the same tree node (idempotent
617
+ // upsert in registerNode prevents duplication). Sub-items (map/parallel) get
618
+ // a suffixed nodeId so concurrent siblings write to distinct findings files.
619
+ const sharing = (phase.shareContext ?? state.def.contextSharing) === true;
620
+ let ctxDir: string | undefined;
621
+ if (sharing) {
622
+ try {
623
+ ctxDir = deps._ctxDir ?? initCtxDir(ctxDirFor(runsDir(deps.cwd), state.runId));
624
+ } catch {
625
+ ctxDir = undefined; // fail-open: degrade to no sharing
626
+ }
627
+ }
628
+ const nodeIdFor = (suffix?: string): string =>
629
+ `${phase.id}${suffix ? `-${suffix}` : ""}`.replace(/[^A-Za-z0-9._-]+/g, "_");
388
630
 
389
631
  // Resolve context pre-read files once, before any type branching.
390
632
  // The content is prepended to every task so the subagent never spends
@@ -399,7 +641,7 @@ async function executePhase(
399
641
  const cc: PhaseCacheCtx = {
400
642
  scope: cacheScope,
401
643
  ttlMs: phase.cache?.ttl ? (parseTtlMs(phase.cache.ttl) ?? undefined) : undefined,
402
- fingerprint: cacheScope === "cross-run" ? resolveFingerprint(phase.cache?.fingerprint, phase.cwd ?? deps.cwd) : "",
644
+ fingerprint: cacheScope === "cross-run" ? resolveFingerprint(phase.cache?.fingerprint, effCwd) : "",
403
645
  store: deps.cacheStore ?? new CacheStore(deps.cwd),
404
646
  prior,
405
647
  phaseId: phase.id,
@@ -410,9 +652,9 @@ async function executePhase(
410
652
  preRead,
411
653
  };
412
654
 
413
- const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
655
+ const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void, ctxNodeId?: string) =>
414
656
  run(
415
- deps.cwd,
657
+ effCwd,
416
658
  deps.agents,
417
659
  agentName,
418
660
  task,
@@ -420,9 +662,11 @@ async function executePhase(
420
662
  model: phase.model,
421
663
  thinking: phase.thinking,
422
664
  tools: phase.tools,
423
- cwd: phase.cwd,
665
+ cwd: effCwd,
424
666
  signal: deps.signal,
425
667
  onLive,
668
+ ctxDir: ctxDir,
669
+ nodeId: ctxDir ? ctxNodeId : undefined,
426
670
  },
427
671
  deps.globalThinking,
428
672
  );
@@ -439,7 +683,7 @@ async function executePhase(
439
683
  const DEFAULT_TRANSIENT_RETRIES = 3;
440
684
  const DEFAULT_TRANSIENT_BACKOFF_MS = 2000;
441
685
  const DEFAULT_TRANSIENT_FACTOR = 2;
442
- const runOne = async (agentName: string, task: string, onLive?: (l: LiveUpdate) => void): Promise<RunResult> => {
686
+ const runOne = async (agentName: string, task: string, onLive?: (l: LiveUpdate) => void, ctxNodeId?: string): Promise<RunResult> => {
443
687
  const explicitMax = Math.max(1, 1 + Math.max(0, Math.floor(retry?.max ?? 0)));
444
688
  // Allow enough attempts to cover whichever policy applies on a given attempt.
445
689
  const maxAttempts = Math.max(explicitMax, 1 + DEFAULT_TRANSIENT_RETRIES);
@@ -447,7 +691,7 @@ async function executePhase(
447
691
  let last: RunResult | undefined;
448
692
  for (let attempt = 0; attempt < maxAttempts; attempt++) {
449
693
  if (deps.signal?.aborted) break;
450
- last = await baseRun(agentName, task, onLive);
694
+ last = await baseRun(agentName, task, onLive, ctxNodeId);
451
695
  usages.push(last.usage);
452
696
  // B6: aggregate and surface cumulative usage before the retry decision,
453
697
  // so the TUI / budget guard see the in-flight spend on every attempt.
@@ -537,16 +781,37 @@ async function executePhase(
537
781
  }
538
782
  running++;
539
783
  refresh();
784
+ if (ctxDir) {
785
+ try { registerNode(ctxDir, nodeIdFor(String(idx)), phase.id, undefined, "running"); } catch { /* fail-open */ }
786
+ }
540
787
  const r = await runOne(it.agent, it.task, (l) => {
541
788
  liveUsages[idx] = l.usage;
542
789
  if (l.text) latestText = l.text;
543
790
  if (l.model) latestModel = l.model;
544
791
  refresh();
545
- });
792
+ }, ctxDir ? nodeIdFor(String(idx)) : undefined);
546
793
  running--;
547
794
  done++;
548
795
  if (isFailed(r)) failed++;
549
796
  liveUsages[idx] = r.usage;
797
+ if (ctxDir) {
798
+ try {
799
+ const itemNid = nodeIdFor(String(idx));
800
+ setNodeStatus(ctxDir, itemNid, isFailed(r) ? "failed" : "done");
801
+ // A fan-out item may itself ctx_spawn children. Without this drain a
802
+ // map/parallel item's spawn intents are silently orphaned (the
803
+ // post-run drain below only covers single-agent phases).
804
+ const spawned = drainPendingSpawns(ctxDir, itemNid);
805
+ if (spawned.length > 0 && !deps.signal?.aborted && !overBudget(state).over) {
806
+ const child = await runSpawnedChildren(spawned, ctxDir, itemNid, phase, deps, state, run);
807
+ if (child.reports) r.output = `${r.output ?? ""}${child.reports}`;
808
+ if (child.usage) {
809
+ r.usage = aggregateUsage([r.usage ?? emptyUsage(), child.usage]);
810
+ liveUsages[idx] = r.usage;
811
+ }
812
+ }
813
+ } catch { /* fail-open */ }
814
+ }
550
815
  refresh();
551
816
  return r;
552
817
  });
@@ -606,11 +871,32 @@ async function executePhase(
606
871
  const cached = cachedPhase(cc, inputHash);
607
872
  if (cached) return cached;
608
873
 
609
- const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress));
874
+ const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress), nodeIdFor());
610
875
  const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
611
876
  if (refWarning) ps.warnings = [...(ps.warnings ?? []), refWarning];
612
877
  if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
613
878
 
879
+ // Shared Context Tree: register this node, mark its terminal status, and
880
+ // pick up any ctx_spawn intents the subagent queued. The spawned child
881
+ // tasks run here (supervision loop) and their reports are folded into this
882
+ // phase's output so the parent — and downstream phases — can see them.
883
+ if (ctxDir) {
884
+ try {
885
+ const nid = nodeIdFor();
886
+ registerNode(ctxDir, nid, phase.id, undefined, ps.status === "failed" ? "failed" : "done");
887
+ const spawned = drainPendingSpawns(ctxDir, nid);
888
+ if (spawned.length > 0 && !deps.signal?.aborted && !overBudget(state).over) {
889
+ const child = await runSpawnedChildren(spawned, ctxDir, nid, phase, deps, state, run);
890
+ if (child.reports) ps.output = `${ps.output ?? ""}${child.reports}`;
891
+ // Fold spawned spend into this phase's usage so the run-wide budget
892
+ // guard accounts for it (verdict Issue 2).
893
+ ps.usage = aggregateUsage([ps.usage ?? emptyUsage(), child.usage]);
894
+ }
895
+ } catch {
896
+ /* fail-open: context-tree bookkeeping must never sink the phase */
897
+ }
898
+ }
899
+
614
900
  // onBlock:retry — re-execute upstream + gate until pass or max attempts.
615
901
  if (type === "gate" && ps.gate?.verdict === "block") {
616
902
  const onBlockV: string = phase.onBlock ?? "halt";
@@ -624,10 +910,16 @@ async function executePhase(
624
910
  // H2: cap nested retry depth to prevent exponential re-execution
625
911
  // when a gate's upstream dependency is itself a gate with onBlock:retry
626
912
  if (_retryDepth < MAX_RETRY_DEPTH) {
913
+ // Re-executing upstream deps must NOT inherit this gate's isolated
914
+ // workspace — each dep resolves its own cwd. Strip the override.
915
+ // NOTE: we intentionally pass the gate's `prior` (not the dep's own
916
+ // completed state) so the dep does NOT cache-hit and actually
917
+ // RE-RUNS — re-running upstream is the whole point of onBlock:retry.
918
+ const { _cwdOverride: _dropGateWs, ...depsForUpstream } = deps;
627
919
  for (const depId of phase.dependsOn ?? []) {
628
920
  const d = state.def.phases.find((p) => p.id === depId);
629
921
  if (!d) continue;
630
- const dPs = await executePhase(d, state, deps, prior, emitProgress, _retryDepth + 1);
922
+ const dPs = await executePhase(d, state, depsForUpstream, prior, emitProgress, _retryDepth + 1);
631
923
  state.phases[depId] = dPs;
632
924
  }
633
925
  }
@@ -814,7 +1106,7 @@ async function executePhase(
814
1106
  }
815
1107
  // Validate with `dynamic` hardening (breadth caps + cwd containment) since
816
1108
  // this content is LLM-authored / untrusted. cwd anchors containment checks.
817
- const dynCwd = phase.cwd ?? deps.cwd;
1109
+ const dynCwd = effCwd;
818
1110
  const v = validateTaskflow(wrapped, { dynamic: true, cwd: dynCwd });
819
1111
  if (!v.ok) {
820
1112
  return defFailOpen(`inline def failed validation: ${v.errors.join("; ")}`);
@@ -873,7 +1165,7 @@ async function executePhase(
873
1165
  phases: {},
874
1166
  createdAt: Date.now(),
875
1167
  updatedAt: Date.now(),
876
- cwd: phase.cwd ?? deps.cwd,
1168
+ cwd: effCwd,
877
1169
  };
878
1170
  // B8: pass this flow phase's preRead content to every sub-flow phase by
879
1171
  // wrapping runTask — sub-phase preRead still gets prepended on top of it.
@@ -885,9 +1177,14 @@ async function executePhase(
885
1177
  // Override deps.cwd with the flow phase's own cwd so that sub-flow
886
1178
  // phases without an explicit cwd derive their subagents from the
887
1179
  // flow's cwd (not the caller's cwd).
888
- cwd: phase.cwd ?? deps.cwd,
1180
+ cwd: effCwd,
1181
+ // The workspace override applies only to THIS flow phase, not to the
1182
+ // nested sub-phases (each resolves its own cwd). Clear it so the child
1183
+ // phases don't all inherit this phase's isolated dir as an override.
1184
+ _cwdOverride: undefined,
889
1185
  runTask: subRunTask,
890
1186
  _stack: hasDef ? [...stack, state.flowName, recursionKey] : [...stack, state.flowName],
1187
+ _ctxDir: ctxDir ?? deps._ctxDir,
891
1188
  persist: undefined,
892
1189
  onProgress: () => {
893
1190
  if (live) {
@@ -8,6 +8,7 @@
8
8
  import * as path from "node:path";
9
9
  import { StringEnum } from "@earendil-works/pi-ai";
10
10
  import { Type, type Static } from "typebox";
11
+ import { WORKSPACE_KEYWORDS } from "./workspace.ts";
11
12
 
12
13
  // ---------------------------------------------------------------------------
13
14
  // Phase types
@@ -208,7 +209,7 @@ const PhaseSchema = Type.Object(
208
209
  model: Type.Optional(Type.String({ description: "Model override for this phase" })),
209
210
  thinking: Type.Optional(Type.String({ description: "Thinking level override for this phase" })),
210
211
  tools: Type.Optional(Type.Array(Type.String(), { description: "Restrict tools for this phase's agent" })),
211
- cwd: Type.Optional(Type.String({ description: "Working directory for this phase's subagent" })),
212
+ cwd: Type.Optional(Type.String({ description: "Working directory for this phase's subagent. A literal path, or a reserved keyword: 'temp' (ephemeral dir, removed after the phase), 'dedicated' (persistent dir under the run state, kept), or 'worktree' (a git worktree on a throwaway branch, removed after the phase)." })),
212
213
  final: Type.Optional(Type.Boolean({ description: "Mark this phase's output as the workflow result" })),
213
214
  optional: Type.Optional(
214
215
  Type.Boolean({ description: "If true, a failure does not abort the run", default: false }),
@@ -240,6 +241,12 @@ const PhaseSchema = Type.Object(
240
241
  }),
241
242
  ),
242
243
  cache: Type.Optional(CacheSchema),
244
+ shareContext: Type.Optional(
245
+ Type.Boolean({
246
+ description:
247
+ "Opt into the Shared Context Tree for this phase: the subagent gets ctx_read/ctx_write (a blackboard shared with siblings/ancestors, to avoid re-reading files) and ctx_report/ctx_spawn (report upward + queue child tasks the runtime picks up). Default false — existing flows are unaffected.",
248
+ }),
249
+ ),
243
250
  },
244
251
  { additionalProperties: false },
245
252
  );
@@ -271,6 +278,12 @@ export const TaskflowSchema = Type.Object(
271
278
  default: false,
272
279
  }),
273
280
  ),
281
+ contextSharing: Type.Optional(
282
+ Type.Boolean({
283
+ description:
284
+ "Enable the Shared Context Tree for ALL phases in this flow (shorthand for setting shareContext on every phase). Default false.",
285
+ }),
286
+ ),
274
287
  phases: Type.Array(PhaseSchema, { minItems: 1, description: "Ordered phase definitions (DAG via dependsOn)" }),
275
288
  },
276
289
  { additionalProperties: false },
@@ -485,11 +498,18 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
485
498
  if (typeof p.concurrency === "number" && p.concurrency > MAX_DYNAMIC_CONCURRENCY) {
486
499
  errors.push(`Dynamic sub-flow phase '${p.id}': concurrency too high (${p.concurrency}, max ${MAX_DYNAMIC_CONCURRENCY})`);
487
500
  }
488
- // cwd containment: a generated phase may not escape the run's cwd.
489
- if (typeof p.cwd === "string" && root) {
490
- const resolved = path.resolve(root, p.cwd);
491
- if (resolved !== root && !resolved.startsWith(root + path.sep)) {
492
- errors.push(`Dynamic sub-flow phase '${p.id}': cwd '${p.cwd}' escapes the run directory`);
501
+ // cwd containment: a generated phase may not escape the run's cwd, and
502
+ // may not request a reserved workspace keyword (temp/dedicated/worktree)
503
+ // LLM-authored sub-flows must not allocate isolated dirs or git
504
+ // worktrees that mutate the repo. Only author-written flows may.
505
+ if (typeof p.cwd === "string") {
506
+ if (WORKSPACE_KEYWORDS.includes(p.cwd as (typeof WORKSPACE_KEYWORDS)[number])) {
507
+ errors.push(`Dynamic sub-flow phase '${p.id}': cwd '${p.cwd}' is a reserved workspace keyword not allowed in generated flows`);
508
+ } else if (root) {
509
+ const resolved = path.resolve(root, p.cwd);
510
+ if (resolved !== root && !resolved.startsWith(root + path.sep)) {
511
+ errors.push(`Dynamic sub-flow phase '${p.id}': cwd '${p.cwd}' escapes the run directory`);
512
+ }
493
513
  }
494
514
  }
495
515
  }
@@ -508,6 +528,14 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
508
528
  if (ids.has(p.id)) errors.push(`Duplicate phase id: ${p.id}`);
509
529
  ids.add(p.id);
510
530
 
531
+ // When a phase opts into the Shared Context Tree, its id becomes a filesystem
532
+ // node id; restrict the charset so two ids can't sanitize to the same node
533
+ // (which would silently merge their blackboards). Non-sharing phases are
534
+ // unaffected (full backward compat).
535
+ if ((p.shareContext === true || flow.contextSharing === true) && !/^[A-Za-z0-9._-]+$/.test(p.id)) {
536
+ errors.push(`Phase '${p.id}': ids used with context sharing must match [A-Za-z0-9._-]+`);
537
+ }
538
+
511
539
  const type = (p.type ?? "agent") as PhaseType;
512
540
  if (!PHASE_TYPES.includes(type)) errors.push(`Phase '${p.id}': unknown type '${type}'`);
513
541
 
@@ -190,13 +190,14 @@ function lockPathForRun(runsRoot: string, flowName: string, runId: string): stri
190
190
  * Validate that a runId looks safe before performing any filesystem access.
191
191
  * Legitimate runIds are produced by newRunId() and contain only [A-Za-z0-9._-].
192
192
  */
193
- function validateRunId(runId: string): boolean {
193
+ export function validateRunId(runId: string): boolean {
194
194
  return (
195
195
  typeof runId === "string" &&
196
196
  runId.length > 0 &&
197
197
  !runId.includes("/") &&
198
198
  !runId.includes("\\") &&
199
- !runId.includes("\0")
199
+ !runId.includes("\0") &&
200
+ !runId.includes("..")
200
201
  );
201
202
  }
202
203
 
@@ -509,6 +510,16 @@ function cleanupTerminalRuns(
509
510
  try { fs.unlinkSync(filePath); } catch { /* already gone */ }
510
511
  // Also remove any orphaned lock file.
511
512
  try { fs.unlinkSync(filePath + ".lock"); } catch { /* ignore */ }
513
+ // Also remove the per-run Shared Context Tree directory (C6). Orphaned
514
+ // ctx dirs would otherwise accumulate under runs/ctx/ over many runs.
515
+ try { fs.rmSync(path.join(runsRoot, "ctx", e.runId), { recursive: true, force: true }); } catch { /* ignore */ }
516
+ // Also remove the per-run isolated-workspace dir tree (cwd:"dedicated").
517
+ // `dedicated` workspaces are persistent by design; reclaim them once the
518
+ // run is pruned. The dir name uses the same sanitization as workspace.ts.
519
+ try {
520
+ const wsSeg = e.runId.replace(/[^A-Za-z0-9._-]/g, "_").replace(/^\.+/, "_").slice(0, 100) || "phase";
521
+ fs.rmSync(path.join(runsRoot, "ws", wsSeg), { recursive: true, force: true });
522
+ } catch { /* ignore */ }
512
523
  }
513
524
 
514
525
  // Remove empty flow subdirectories.
@@ -622,7 +633,7 @@ export function saveFlow(
622
633
 
623
634
  // --- Run state ---
624
635
 
625
- function runsDir(cwd: string): string {
636
+ export function runsDir(cwd: string): string {
626
637
  // Safe non-null assertion: create=true guarantees a non-null return because
627
638
  // findProjectFlowsDirInternal falls back to path.join(cwd, ".pi", "taskflows").
628
639
  const projDir = findProjectFlowsDir(cwd, true)!;
@@ -636,7 +647,9 @@ export function cacheDir(cwd: string): string {
636
647
  }
637
648
 
638
649
  export function newRunId(flowName: string): string {
639
- const safe = flowName.replace(/[^\w.-]+/g, "_").slice(0, 24);
650
+ // Collapse to a safe charset AND fold any dot-runs so the result can never
651
+ // contain a '..' traversal token (validateRunId rejects '..').
652
+ const safe = flowName.replace(/[^\w.-]+/g, "_").replace(/\.{2,}/g, "_").slice(0, 24);
640
653
  return `${safe}-${Date.now().toString(36)}-${crypto.randomBytes(3).toString("hex")}`;
641
654
  }
642
655