pi-taskflow 0.0.12 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,11 +13,12 @@
13
13
  import * as path from "node:path";
14
14
  import * as fs from "node:fs";
15
15
  import type { AgentConfig } from "./agents.ts";
16
- import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
16
+ import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse, tryEvaluateCondition } from "./interpolate.ts";
17
17
  import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
18
18
  import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
19
- import { type Budget, dependenciesOf, finalPhase, type Phase, resolveArgs, type Taskflow, topoLayers } from "./schema.ts";
19
+ import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode } from "./schema.ts";
20
20
  import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
21
+ import { CacheStore, resolveFingerprint } from "./cache.ts";
21
22
 
22
23
  /** A human-in-the-loop approval request raised by an `approval` phase. */
23
24
  export interface ApprovalRequest {
@@ -49,6 +50,8 @@ export interface RuntimeDeps {
49
50
  requestApproval?: (req: ApprovalRequest) => Promise<ApprovalDecision>;
50
51
  /** Resolve a saved taskflow by name for `flow` (sub-workflow) phases. */
51
52
  loadFlow?: (name: string) => Taskflow | undefined;
53
+ /** Cross-run memoization store. Omit to construct a default one for `deps.cwd`. */
54
+ cacheStore?: CacheStore;
52
55
  /** Internal: sub-flow call stack, for recursion detection. */
53
56
  _stack?: string[];
54
57
  }
@@ -295,6 +298,23 @@ async function executePhase(
295
298
  const ctx = buildInterpolationContext(state, previousOutput);
296
299
  const preRead = await resolvePhaseContext(phase, ctx);
297
300
 
301
+ // Resolve this phase's cache policy once. Default scope is "run-only" (the
302
+ // historical within-run resume behavior). Only "cross-run" phases resolve a
303
+ // fingerprint and consult the persistent store.
304
+ const cacheScope: CacheScope = (phase.cache?.scope ?? "run-only") as CacheScope;
305
+ const cc: PhaseCacheCtx = {
306
+ scope: cacheScope,
307
+ ttlMs: phase.cache?.ttl ? (parseTtlMs(phase.cache.ttl) ?? undefined) : undefined,
308
+ fingerprint: cacheScope === "cross-run" ? resolveFingerprint(phase.cache?.fingerprint, phase.cwd ?? deps.cwd) : "",
309
+ store: deps.cacheStore ?? new CacheStore(deps.cwd),
310
+ prior,
311
+ phaseId: phase.id,
312
+ flowName: state.flowName,
313
+ runId: state.runId,
314
+ thinking: phase.thinking,
315
+ tools: phase.tools,
316
+ };
317
+
298
318
  const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
299
319
  run(
300
320
  deps.cwd,
@@ -437,13 +457,14 @@ async function executePhase(
437
457
  const { text } = interpolate(phase.task ?? "", ctx);
438
458
  const fullTask = preRead + text;
439
459
  const agentName = resolveAgent(phase.agent, deps, state);
440
- const inputHash = hashInput(phase.id, agentName, phase.model ?? "", fullTask);
441
- const cached = cachedPhase(prior, inputHash);
460
+ const inputHash = cacheKey(cc, [phase.id, agentName, phase.model ?? "", fullTask]);
461
+ const cached = cachedPhase(cc, inputHash);
442
462
  if (cached) return cached;
443
463
 
444
464
  const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress));
445
465
  const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
446
466
  if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
467
+ recordCache(cc, ps);
447
468
  return ps;
448
469
  }
449
470
 
@@ -455,12 +476,14 @@ async function executePhase(
455
476
  task: preRead + r.text,
456
477
  };
457
478
  });
458
- const inputHash = hashInput(phase.id, phase.model ?? "", JSON.stringify(branches));
459
- const cached = cachedPhase(prior, inputHash);
479
+ const inputHash = cacheKey(cc, [phase.id, phase.model ?? "", JSON.stringify(branches)]);
480
+ const cached = cachedPhase(cc, inputHash);
460
481
  if (cached) return cached;
461
482
 
462
483
  const results = await runFanout(branches);
463
- return mergePhaseState(phase.id, results, inputHash, parseJson);
484
+ const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
485
+ recordCache(cc, ps);
486
+ return ps;
464
487
  }
465
488
 
466
489
  if (type === "map") {
@@ -485,19 +508,21 @@ async function executePhase(
485
508
  task: preRead + interpolate(phase.task ?? "", localCtx).text,
486
509
  };
487
510
  });
488
- const inputHash = hashInput(phase.id, phase.model ?? "", JSON.stringify(tasks));
489
- const cached = cachedPhase(prior, inputHash);
511
+ const inputHash = cacheKey(cc, [phase.id, phase.model ?? "", JSON.stringify(tasks)]);
512
+ const cached = cachedPhase(cc, inputHash);
490
513
  if (cached) return cached;
491
514
 
492
515
  const results = await runFanout(tasks);
493
- return mergePhaseState(phase.id, results, inputHash, parseJson);
516
+ const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
517
+ recordCache(cc, ps);
518
+ return ps;
494
519
  }
495
520
 
496
521
  if (type === "approval") {
497
522
  const ctx = buildInterpolationContext(state, previousOutput);
498
523
  const message = interpolate(phase.task ?? "Approve to continue?", ctx).text;
499
524
  const inputHash = hashInput(phase.id, phase.model ?? "", "approval", message);
500
- const cached = cachedPhase(prior, inputHash);
525
+ const cached = cachedPhase(cc, inputHash);
501
526
  if (cached) return cached;
502
527
 
503
528
  // Non-interactive (headless/CI/tests): auto-approve, fail-open, but record it.
@@ -547,8 +572,8 @@ async function executePhase(
547
572
  provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
548
573
  }
549
574
  const subArgs = resolveArgs(subDef, provided);
550
- const inputHash = hashInput(phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs));
551
- const cached = cachedPhase(prior, inputHash);
575
+ const inputHash = cacheKey(cc, [phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs)]);
576
+ const cached = cachedPhase(cc, inputHash);
552
577
  if (cached) return cached;
553
578
 
554
579
  const live = state.phases[phase.id];
@@ -600,7 +625,7 @@ async function executePhase(
600
625
  },
601
626
  });
602
627
  const sp = Object.values(subState.phases);
603
- return {
628
+ const flowPs: PhaseState = {
604
629
  id: phase.id,
605
630
  status: subResult.ok ? "done" : "failed",
606
631
  output: subResult.finalOutput,
@@ -619,6 +644,207 @@ async function executePhase(
619
644
  inputHash,
620
645
  endedAt: Date.now(),
621
646
  };
647
+ recordCache(cc, flowPs);
648
+ return flowPs;
649
+ }
650
+
651
+ // loop-until-done: run the body repeatedly until `until` is truthy, the output
652
+ // converges to a fixed point, or maxIterations is hit (always terminates).
653
+ if (type === "loop") {
654
+ const agentName = resolveAgent(phase.agent, deps, state);
655
+ const rawMax = phase.maxIterations ?? LOOP_DEFAULT_MAX_ITERATIONS;
656
+ const maxIters = Math.max(1, Math.min(LOOP_HARD_MAX_ITERATIONS, Math.floor(rawMax)));
657
+ const convergence = phase.convergence ?? true;
658
+
659
+ const usages: UsageStats[] = [];
660
+ const loopWarnings: string[] = [];
661
+ let lastOutput = "";
662
+ let prevOutput: string | undefined;
663
+ let iterations = 0;
664
+ let stop: NonNullable<PhaseState["loop"]>["stop"] = "maxIterations";
665
+ let failedResult: RunResult | undefined;
666
+
667
+ for (let i = 1; i <= maxIters; i++) {
668
+ if (deps.signal?.aborted) {
669
+ stop = "failed";
670
+ break;
671
+ }
672
+ iterations = i;
673
+ // The body sees its iteration number and the prior iteration's output.
674
+ const bodyCtx = buildInterpolationContext(state, previousOutput, {
675
+ loop: { iteration: i, lastOutput, maxIterations: maxIters },
676
+ });
677
+ const body = preRead + interpolate(phase.task ?? "", bodyCtx).text;
678
+ const r = await runOne(agentName, body, liveSink(state, phase.id, emitProgress));
679
+ usages.push(r.usage);
680
+ if (isFailed(r)) {
681
+ failedResult = r;
682
+ stop = "failed";
683
+ break;
684
+ }
685
+ prevOutput = lastOutput;
686
+ lastOutput = r.output;
687
+
688
+ // Expose this iteration's output as {steps.<thisId>.output|json} so the
689
+ // `until` condition can inspect it (e.g. "{steps.refine.json.done}==true").
690
+ // Loop locals ({loop.iteration} etc.) are available to the condition too.
691
+ const untilCtx = buildInterpolationContext(state, previousOutput, {
692
+ loop: { iteration: i, lastOutput, maxIterations: maxIters },
693
+ });
694
+ untilCtx.steps[phase.id] = { output: lastOutput, json: safeParse(lastOutput) };
695
+ const { value: done, error: condErr } = tryEvaluateCondition(phase.until ?? "", untilCtx);
696
+ // A malformed condition must not spin forever: stop and surface a warning
697
+ // so the author learns the `until` never actually evaluated.
698
+ if (condErr) {
699
+ loopWarnings.push(`loop 'until' could not be evaluated (stopped early): ${condErr}`);
700
+ stop = "until";
701
+ break;
702
+ }
703
+ if (done) {
704
+ stop = "until";
705
+ break;
706
+ }
707
+ // Fixed-point convergence: identical consecutive output ⇒ further work is wasted.
708
+ if (convergence && prevOutput !== undefined && prevOutput === lastOutput) {
709
+ stop = "converged";
710
+ break;
711
+ }
712
+ }
713
+
714
+ const aggUsage = usages.length ? aggregateUsage(usages) : emptyUsage();
715
+ if (failedResult) {
716
+ return {
717
+ id: phase.id,
718
+ status: "failed",
719
+ output: lastOutput || undefined,
720
+ usage: aggUsage,
721
+ error: failedResult.errorMessage || failedResult.stderr || `loop '${phase.id}' iteration ${iterations} failed`,
722
+ loop: { iterations, stop: "failed" },
723
+ warnings: loopWarnings.length ? loopWarnings : undefined,
724
+ inputHash: hashInput(phase.id, "loop", phase.until ?? ""),
725
+ endedAt: Date.now(),
726
+ };
727
+ }
728
+ return {
729
+ id: phase.id,
730
+ status: "done",
731
+ output: lastOutput,
732
+ json: parseJson ? safeParse(lastOutput) : undefined,
733
+ usage: aggUsage,
734
+ loop: { iterations, stop },
735
+ warnings: loopWarnings.length ? loopWarnings : undefined,
736
+ inputHash: hashInput(phase.id, "loop", phase.until ?? "", String(iterations)),
737
+ endedAt: Date.now(),
738
+ };
739
+ }
740
+
741
+ // tournament: spawn N competing variants, then a judge picks the best (or
742
+ // synthesizes an aggregate). Combines the parallel fan-out with a gate-style
743
+ // verdict, expressed as a single declarative phase.
744
+ if (type === "tournament") {
745
+ const mode = (phase.mode ?? "best") as TournamentMode;
746
+ // Competitors: explicit `branches` win; otherwise N copies of `task`.
747
+ let competitors: Array<{ agent: string; task: string }>;
748
+ if (phase.branches && phase.branches.length > 0) {
749
+ competitors = phase.branches.map((b) => ({
750
+ agent: resolveAgent(b.agent ?? phase.agent, deps, state),
751
+ task: preRead + interpolate(b.task, ctx).text,
752
+ }));
753
+ } else {
754
+ const n = Math.max(2, Math.min(TOURNAMENT_HARD_MAX_VARIANTS, Math.floor(phase.variants ?? TOURNAMENT_DEFAULT_VARIANTS)));
755
+ const body = preRead + interpolate(phase.task ?? "", ctx).text;
756
+ competitors = Array.from({ length: n }, () => ({ agent: resolveAgent(phase.agent, deps, state), task: body }));
757
+ }
758
+
759
+ const results = await runFanout(competitors);
760
+ const ran = results.filter((r) => r.stopReason !== "budget-skipped");
761
+ const ok = ran.filter((r) => !isFailed(r));
762
+ const variantUsage = aggregateUsage(results.map((r) => r.usage));
763
+ // Winner numbers are 1-based over `ran` (exactly what the judge is shown).
764
+ // Using indexOf on the stable `ran` array is reference-based and correct even
765
+ // when two variants produce byte-identical output.
766
+ const ranIdx = (r: RunResult) => ran.indexOf(r) + 1;
767
+
768
+ // All competitors failed → the tournament fails (nothing to judge).
769
+ if (ok.length === 0) {
770
+ return {
771
+ id: phase.id,
772
+ status: "failed",
773
+ usage: variantUsage,
774
+ error: `tournament '${phase.id}': all ${competitors.length} variants failed`,
775
+ tournament: { variants: competitors.length, winner: 0, mode },
776
+ inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
777
+ endedAt: Date.now(),
778
+ };
779
+ }
780
+ // Only one competitor survived → no contest; it wins by default (skip judge).
781
+ if (ok.length === 1) {
782
+ return {
783
+ id: phase.id,
784
+ status: "done",
785
+ output: ok[0].output,
786
+ json: parseJson ? safeParse(ok[0].output) : undefined,
787
+ usage: variantUsage,
788
+ model: ok[0].model,
789
+ tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "only surviving variant" },
790
+ inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
791
+ endedAt: Date.now(),
792
+ };
793
+ }
794
+
795
+ // Build the judge prompt: label every variant output, then the rubric.
796
+ const labelled = ran
797
+ .map((r, i) => `### Variant ${i + 1}${isFailed(r) ? " (failed — ineligible)" : ""}\n\n${r.output}`)
798
+ .join("\n\n---\n\n");
799
+ const rubric =
800
+ interpolate(phase.judge ?? "", ctx).text.trim() ||
801
+ "You are judging competing answers to the same task. Pick the single best variant on correctness, completeness, and clarity.";
802
+ const directive =
803
+ mode === "best"
804
+ ? `End your reply with a line exactly: WINNER: <number> (1–${ran.length}), choosing the strongest eligible variant.`
805
+ : `Synthesize the strongest possible answer by combining the best parts of the eligible variants. Then end with a line: WINNER: <number> indicating which variant contributed most.`;
806
+ const judgeTask = `${rubric}\n\nThe candidate variants:\n\n${labelled}\n\n${directive}`;
807
+ const judgeAgent = resolveAgent(phase.judgeAgent ?? phase.agent, deps, state);
808
+ const judgeRes = await runOne(judgeAgent, judgeTask, liveSink(state, phase.id, emitProgress));
809
+ const judgeUsage = aggregateUsage([variantUsage, judgeRes.usage]);
810
+
811
+ if (isFailed(judgeRes)) {
812
+ // Judge failed: fall back to the first eligible variant (fail-open, never
813
+ // lose the work). Report the variant we actually used, not a hardcoded 1.
814
+ return {
815
+ id: phase.id,
816
+ status: "done",
817
+ output: ok[0].output,
818
+ json: parseJson ? safeParse(ok[0].output) : undefined,
819
+ usage: judgeUsage,
820
+ model: ok[0].model,
821
+ warnings: [`judge failed (${judgeRes.errorMessage ?? "error"}); used variant ${ranIdx(ok[0])}`],
822
+ tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge failed" },
823
+ inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
824
+ endedAt: Date.now(),
825
+ };
826
+ }
827
+
828
+ const { winner, reason } = parseTournamentWinner(judgeRes.output, ran.length);
829
+ const winnerResult = ran[winner - 1];
830
+ const winnerIneligible = !winnerResult || isFailed(winnerResult);
831
+ // In 'best' mode the output is the winning variant verbatim; in 'aggregate'
832
+ // mode it is the judge's synthesized answer.
833
+ const chosen = winnerIneligible ? ok[0] : winnerResult;
834
+ const winnerIdx = ranIdx(chosen);
835
+ const output = mode === "aggregate" ? judgeRes.output : chosen.output;
836
+ return {
837
+ id: phase.id,
838
+ status: "done",
839
+ output,
840
+ json: parseJson ? safeParse(output) : undefined,
841
+ usage: judgeUsage,
842
+ model: mode === "aggregate" ? judgeRes.model : chosen.model,
843
+ warnings: winnerIneligible ? [`judge picked an ineligible variant; used variant ${winnerIdx}`] : undefined,
844
+ tournament: { variants: competitors.length, winner: winnerIdx, mode, reason },
845
+ inputHash: hashInput(phase.id, "tournament", String(competitors.length), mode),
846
+ endedAt: Date.now(),
847
+ };
622
848
  }
623
849
 
624
850
  return {
@@ -657,13 +883,89 @@ function lastCompletedOutput(state: RunState, phase: Phase): string | undefined
657
883
  return undefined;
658
884
  }
659
885
 
660
- function cachedPhase(prior: PhaseState | undefined, inputHash: string): PhaseState | null {
661
- if (prior && prior.status === "done" && prior.inputHash === inputHash) {
662
- return { ...prior, status: "done" };
886
+ /**
887
+ * Per-phase cache policy resolved once at the top of executePhase. Carries the
888
+ * scope, optional TTL, and a pre-resolved fingerprint string so each phase-type
889
+ * branch can fold it into its inputHash and consult the cross-run store uniformly.
890
+ */
891
+ interface PhaseCacheCtx {
892
+ scope: CacheScope;
893
+ ttlMs?: number;
894
+ fingerprint: string;
895
+ store: CacheStore;
896
+ prior: PhaseState | undefined;
897
+ phaseId: string;
898
+ flowName: string;
899
+ runId: string;
900
+ /** Per-phase execution config that materially affects subagent output and
901
+ * therefore must be part of the cache identity (else a config change could
902
+ * silently serve a stale cross-run hit). */
903
+ thinking?: string;
904
+ tools?: string[];
905
+ }
906
+
907
+ /** Fold the phase fingerprint into the base hash parts to form the final cache key. */
908
+ function cacheKey(cc: PhaseCacheCtx, baseParts: string[]): string {
909
+ // Fold the full cache identity into the hash: flow name (prevents collisions
910
+ // across different flows that share a phase.id + task + model), the per-phase
911
+ // thinking/tools config (changing either changes the subagent's output), and
912
+ // the resolved world-state fingerprint.
913
+ const parts = [`flow:${cc.flowName}`, ...baseParts, `think:${cc.thinking ?? ""}`, `tools:${JSON.stringify(cc.tools ?? [])}`];
914
+ return cc.fingerprint ? hashInput(...parts, cc.fingerprint) : hashInput(...parts);
915
+ }
916
+
917
+ /**
918
+ * Resume/memoization lookup. Honors scope:
919
+ * - "off": never reuse (even within-run).
920
+ * - "run-only": within-run resume only (historical behavior).
921
+ * - "cross-run": within-run first, then the persistent cross-run store.
922
+ * On a cross-run hit, usage is zeroed and `cacheHit` records the source.
923
+ */
924
+ function cachedPhase(cc: PhaseCacheCtx, inputHash: string): PhaseState | null {
925
+ if (cc.scope === "off") return null;
926
+
927
+ // 1. within-run resume (fastest; always allowed unless scope is off)
928
+ if (cc.prior && cc.prior.status === "done" && cc.prior.inputHash === inputHash) {
929
+ return { ...cc.prior, status: "done" };
930
+ }
931
+
932
+ // 2. cross-run memoization (opt-in)
933
+ if (cc.scope === "cross-run") {
934
+ const e = cc.store.get(inputHash, cc.ttlMs);
935
+ if (e) {
936
+ return {
937
+ id: cc.phaseId,
938
+ status: "done",
939
+ inputHash,
940
+ output: e.output,
941
+ json: e.json,
942
+ model: e.model,
943
+ usage: emptyUsage(),
944
+ cacheHit: "cross-run",
945
+ endedAt: Date.now(),
946
+ };
947
+ }
663
948
  }
664
949
  return null;
665
950
  }
666
951
 
952
+ /** Persist a freshly-computed phase result to the cross-run store (best-effort). */
953
+ function recordCache(cc: PhaseCacheCtx, ps: PhaseState): void {
954
+ if (cc.scope !== "cross-run") return;
955
+ if (ps.status !== "done" || !ps.inputHash) return;
956
+ if (ps.cacheHit) return; // don't re-store a value we just read from cache
957
+ cc.store.put({
958
+ key: ps.inputHash,
959
+ createdAt: Date.now(),
960
+ output: ps.output,
961
+ json: ps.json,
962
+ model: ps.model,
963
+ flowName: cc.flowName,
964
+ phaseId: cc.phaseId,
965
+ runId: cc.runId,
966
+ });
967
+ }
968
+
667
969
  /**
668
970
  * Resolve an agent name against available agents. Falls back to the default
669
971
  * agent if the requested agent isn't found, logging a warning via safeEmit.
@@ -722,6 +1024,29 @@ function asReason(v: unknown): string | undefined {
722
1024
  return typeof v === "string" && v.trim() ? v.trim() : undefined;
723
1025
  }
724
1026
 
1027
+ /**
1028
+ * Parse a judge's pick of the winning variant. Accepts JSON ({"winner":n} or
1029
+ * {"best":n}) or a `WINNER: n` line (last match wins). Clamps to [1, count].
1030
+ * Fail-open: an unreadable verdict defaults to variant 1 so the work is never
1031
+ * lost. Returns the 1-based index plus an optional reason.
1032
+ */
1033
+ export function parseTournamentWinner(output: string, count: number): { winner: number; reason?: string } {
1034
+ const clamp = (n: number) => Math.min(Math.max(1, Math.floor(n)), Math.max(1, count));
1035
+ const json = safeParse(output);
1036
+ if (json && typeof json === "object") {
1037
+ const o = json as Record<string, unknown>;
1038
+ const raw = o.winner ?? o.best ?? o.choice;
1039
+ const n = typeof raw === "number" ? raw : typeof raw === "string" ? Number(raw) : NaN;
1040
+ if (Number.isFinite(n)) return { winner: clamp(n), reason: asReason(o.reason) };
1041
+ }
1042
+ const matches = [...output.matchAll(/WINNER\s*[:=]\s*#?\s*(\d+)/gi)];
1043
+ if (matches.length) {
1044
+ const n = Number(matches[matches.length - 1][1]);
1045
+ if (Number.isFinite(n)) return { winner: clamp(n) };
1046
+ }
1047
+ return { winner: 1, reason: "no parseable winner; defaulted to variant 1" };
1048
+ }
1049
+
725
1050
  /**
726
1051
  * Best-effort invocation of the user-provided `persist` + `onProgress` callbacks.
727
1052
  *
@@ -13,11 +13,27 @@ import { Type, type Static } from "typebox";
13
13
  // Phase types
14
14
  // ---------------------------------------------------------------------------
15
15
 
16
- export const PHASE_TYPES = ["agent", "parallel", "map", "gate", "reduce", "approval", "flow"] as const;
16
+ export const PHASE_TYPES = ["agent", "parallel", "map", "gate", "reduce", "approval", "flow", "loop", "tournament"] as const;
17
17
  export type PhaseType = (typeof PHASE_TYPES)[number];
18
18
 
19
+ /** Loop iteration bounds. Authors may lower the max; the hard cap is a runaway guard. */
20
+ export const LOOP_DEFAULT_MAX_ITERATIONS = 10;
21
+ export const LOOP_HARD_MAX_ITERATIONS = 100;
22
+
23
+ /** Tournament competitor bounds. */
24
+ export const TOURNAMENT_DEFAULT_VARIANTS = 3;
25
+ export const TOURNAMENT_HARD_MAX_VARIANTS = 20;
26
+ export const TOURNAMENT_MODES = ["best", "aggregate"] as const;
27
+ export type TournamentMode = (typeof TOURNAMENT_MODES)[number];
28
+
19
29
  export const OUTPUT_FORMATS = ["text", "json"] as const;
20
30
  export const JOIN_MODES = ["all", "any"] as const;
31
+ export const CACHE_SCOPES = ["run-only", "cross-run", "off"] as const;
32
+ export type CacheScope = (typeof CACHE_SCOPES)[number];
33
+ /** Allowed fingerprint entry prefixes. `glob!:` = content-hash variant of `glob:`. */
34
+ export const CACHE_FINGERPRINT_PREFIXES = ["git:", "glob:", "glob!:", "file:", "env:"] as const;
35
+ /** Phase types that must NOT be cached across runs (a fresh result is required each run). */
36
+ export const CACHE_CROSS_RUN_BLOCKED_TYPES = ["gate", "approval", "loop", "tournament"] as const;
21
37
 
22
38
  const ParallelTaskSchema = Type.Object(
23
39
  {
@@ -39,6 +55,36 @@ const RetrySchema = Type.Object(
39
55
  { additionalProperties: false },
40
56
  );
41
57
 
58
+ /**
59
+ * Per-phase cache policy. Defaults to `run-only` which is exactly the historical
60
+ * behavior (within-run resume only). `cross-run` opts a phase into the persistent
61
+ * cross-run memoization store; see docs/rfc-cross-run-memoization.md.
62
+ */
63
+ const CacheSchema = Type.Object(
64
+ {
65
+ scope: Type.Optional(
66
+ StringEnum(CACHE_SCOPES, {
67
+ description:
68
+ "Cache reuse scope. 'run-only' (default) = within-run resume only (historical behavior); 'cross-run' = reuse identical-input results from any prior run; 'off' = never reuse (even within-run).",
69
+ default: "run-only",
70
+ }),
71
+ ),
72
+ ttl: Type.Optional(
73
+ Type.String({
74
+ description:
75
+ "Max cache age before a cross-run hit is treated as a miss, e.g. '30m', '6h', '7d'. Omit for no time bound.",
76
+ }),
77
+ ),
78
+ fingerprint: Type.Optional(
79
+ Type.Array(Type.String(), {
80
+ description:
81
+ "Extra freshness inputs folded into the cache key so 'the world changed' becomes a cache miss. Each entry: 'git:HEAD' | 'glob:<pattern>' | 'glob!:<pattern>' (content-hash) | 'file:<path>' | 'env:<NAME>'.",
82
+ }),
83
+ ),
84
+ },
85
+ { additionalProperties: false },
86
+ );
87
+
42
88
  /** Run-wide cost / token ceiling. Exceeding it halts the run (remaining phases skipped). */
43
89
  const BudgetSchema = Type.Object(
44
90
  {
@@ -79,6 +125,51 @@ const PhaseSchema = Type.Object(
79
125
  }),
80
126
  ),
81
127
 
128
+ // loop-until-done
129
+ until: Type.Optional(
130
+ Type.String({
131
+ description:
132
+ "[loop] Stop condition evaluated after each iteration. The iteration's output is exposed as {steps.<thisId>.output}/.json. Supports the same operators as `when`. The loop stops when this is truthy, on convergence, or at maxIterations. A parse error stops the loop (fail-safe).",
133
+ }),
134
+ ),
135
+ maxIterations: Type.Optional(
136
+ Type.Number({
137
+ description: `[loop] Hard cap on iterations (default ${LOOP_DEFAULT_MAX_ITERATIONS}, max ${LOOP_HARD_MAX_ITERATIONS}). The loop always terminates within this bound even if 'until' never becomes truthy.`,
138
+ default: LOOP_DEFAULT_MAX_ITERATIONS,
139
+ }),
140
+ ),
141
+ convergence: Type.Optional(
142
+ Type.Boolean({
143
+ description:
144
+ "[loop] When true (default), stop early if an iteration's output is identical to the previous one (a fixed point — further iterations would not change anything).",
145
+ default: true,
146
+ }),
147
+ ),
148
+
149
+ // tournament: N variants compete, a judge picks the best (or aggregates)
150
+ variants: Type.Optional(
151
+ Type.Number({
152
+ description: `[tournament] Number of competing variants to spawn from 'task' (default ${TOURNAMENT_DEFAULT_VARIANTS}, max ${TOURNAMENT_HARD_MAX_VARIANTS}). Ignored when 'branches' is provided (those become the variants instead).`,
153
+ default: TOURNAMENT_DEFAULT_VARIANTS,
154
+ }),
155
+ ),
156
+ judge: Type.Optional(
157
+ Type.String({
158
+ description:
159
+ "[tournament] Judge prompt. The numbered variant outputs are injected before it. To pick a winner, end with a line like 'WINNER: <n>' or return JSON {\"winner\": <n>}. Defaults to a sensible built-in rubric.",
160
+ }),
161
+ ),
162
+ judgeAgent: Type.Optional(
163
+ Type.String({ description: "[tournament] Agent that runs the judge step (defaults to the phase 'agent')." }),
164
+ ),
165
+ mode: Type.Optional(
166
+ StringEnum(TOURNAMENT_MODES, {
167
+ description:
168
+ "[tournament] 'best' (default): output is the winning variant verbatim. 'aggregate': output is the judge's synthesized answer combining the variants.",
169
+ default: "best",
170
+ }),
171
+ ),
172
+
82
173
  dependsOn: Type.Optional(Type.Array(Type.String(), { description: "Phase ids this phase depends on" })),
83
174
  join: Type.Optional(
84
175
  StringEnum(JOIN_MODES, {
@@ -115,6 +206,7 @@ const PhaseSchema = Type.Object(
115
206
  default: 8000,
116
207
  }),
117
208
  ),
209
+ cache: Type.Optional(CacheSchema),
118
210
  },
119
211
  { additionalProperties: false },
120
212
  );
@@ -157,6 +249,7 @@ export type Taskflow = Static<typeof TaskflowSchema>;
157
249
  export type ArgSpec = Static<typeof ArgSpecSchema>;
158
250
  export type RetryPolicy = Static<typeof RetrySchema>;
159
251
  export type Budget = Static<typeof BudgetSchema>;
252
+ export type CachePolicy = Static<typeof CacheSchema>;
160
253
  export type JoinMode = (typeof JOIN_MODES)[number];
161
254
 
162
255
  // ---------------------------------------------------------------------------
@@ -260,6 +353,21 @@ export interface ValidationResult {
260
353
  warnings: string[];
261
354
  }
262
355
 
356
+ /**
357
+ * Parse a TTL string like '30m', '6h', '7d', '500ms', '90s' into milliseconds.
358
+ * Returns null for malformed or non-positive values. Plain integers = ms.
359
+ */
360
+ export function parseTtlMs(ttl: string): number | null {
361
+ if (typeof ttl !== "string") return null;
362
+ const m = ttl.trim().match(/^(\d+(?:\.\d+)?)\s*(ms|s|m|h|d)?$/i);
363
+ if (!m) return null;
364
+ const n = Number(m[1]);
365
+ if (!Number.isFinite(n) || n <= 0) return null;
366
+ const unit = (m[2] ?? "ms").toLowerCase();
367
+ const mult: Record<string, number> = { ms: 1, s: 1000, m: 60_000, h: 3_600_000, d: 86_400_000 };
368
+ return n * mult[unit];
369
+ }
370
+
263
371
  export interface ValidationOptions {
264
372
  /** Resolved invocation args, used for runtime checks like missing `{args.X}`. */
265
373
  args?: Record<string, unknown>;
@@ -320,6 +428,36 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
320
428
  if (type === "flow") {
321
429
  if (!p.use) errors.push(`Phase '${p.id}' (flow) requires 'use' (a saved flow name)`);
322
430
  }
431
+ if (type === "loop") {
432
+ if (!p.task) errors.push(`Phase '${p.id}' (loop) requires 'task' (the iteration body)`);
433
+ if (!p.until) errors.push(`Phase '${p.id}' (loop) requires 'until' (the stop condition)`);
434
+ if (p.maxIterations !== undefined) {
435
+ if (typeof p.maxIterations !== "number" || !Number.isFinite(p.maxIterations) || p.maxIterations < 1) {
436
+ errors.push(`Phase '${p.id}' (loop): maxIterations must be a number >= 1`);
437
+ } else if (p.maxIterations > LOOP_HARD_MAX_ITERATIONS) {
438
+ errors.push(`Phase '${p.id}' (loop): maxIterations must be <= ${LOOP_HARD_MAX_ITERATIONS}`);
439
+ }
440
+ }
441
+ }
442
+ if (type === "tournament") {
443
+ const hasBranches = Array.isArray(p.branches) && p.branches.length > 0;
444
+ if (!hasBranches && !p.task) {
445
+ errors.push(`Phase '${p.id}' (tournament) requires 'task' (the competitor prompt) or non-empty 'branches'`);
446
+ }
447
+ if (p.variants !== undefined) {
448
+ if (typeof p.variants !== "number" || !Number.isFinite(p.variants) || p.variants < 2) {
449
+ errors.push(`Phase '${p.id}' (tournament): variants must be a number >= 2`);
450
+ } else if (p.variants > TOURNAMENT_HARD_MAX_VARIANTS) {
451
+ errors.push(`Phase '${p.id}' (tournament): variants must be <= ${TOURNAMENT_HARD_MAX_VARIANTS}`);
452
+ }
453
+ }
454
+ if (hasBranches && p.branches!.length < 2) {
455
+ errors.push(`Phase '${p.id}' (tournament): 'branches' needs at least 2 competitors`);
456
+ }
457
+ if (p.mode && !TOURNAMENT_MODES.includes(p.mode as TournamentMode)) {
458
+ errors.push(`Phase '${p.id}' (tournament): unknown mode '${p.mode}'`);
459
+ }
460
+ }
323
461
  if (p.retry) {
324
462
  if (typeof p.retry.max !== "number" || p.retry.max < 0) {
325
463
  errors.push(`Phase '${p.id}': retry.max must be a number >= 0`);
@@ -337,6 +475,33 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
337
475
  errors.push(`Phase '${p.id}': unknown join mode '${p.join}'`);
338
476
  }
339
477
 
478
+ // Cache policy validation (cross-run memoization).
479
+ if (p.cache) {
480
+ const scope = p.cache.scope ?? "run-only";
481
+ if (!CACHE_SCOPES.includes(scope as CacheScope)) {
482
+ errors.push(`Phase '${p.id}': unknown cache.scope '${scope}' (expected one of ${CACHE_SCOPES.join(", ")})`);
483
+ }
484
+ // Gate B: gate/approval phases must produce a fresh result every run.
485
+ if (scope === "cross-run" && (CACHE_CROSS_RUN_BLOCKED_TYPES as readonly string[]).includes(type)) {
486
+ errors.push(
487
+ `Phase '${p.id}' (${type}): cache.scope 'cross-run' is not allowed for ${CACHE_CROSS_RUN_BLOCKED_TYPES.join("/")} phases — they must produce a fresh result each run. Use 'run-only'.`,
488
+ );
489
+ }
490
+ // Gate C: every fingerprint entry must use a known prefix (fail closed).
491
+ for (const fp of p.cache.fingerprint ?? []) {
492
+ const ok = CACHE_FINGERPRINT_PREFIXES.some((pre) => fp.startsWith(pre) && fp.length > pre.length);
493
+ if (!ok) {
494
+ errors.push(
495
+ `Phase '${p.id}': invalid cache.fingerprint entry '${fp}' (expected '<prefix><value>' with prefix one of ${CACHE_FINGERPRINT_PREFIXES.join(", ")})`,
496
+ );
497
+ }
498
+ }
499
+ // Gate D: TTL must parse to a positive duration when present.
500
+ if (p.cache.ttl !== undefined && parseTtlMs(p.cache.ttl) === null) {
501
+ errors.push(`Phase '${p.id}': invalid cache.ttl '${p.cache.ttl}' (expected e.g. '30m', '6h', '7d')`);
502
+ }
503
+ }
504
+
340
505
  // Agent name convention: hyphens only (per AGENTS.md naming convention)
341
506
  if (p.agent && typeof p.agent === "string" && p.agent.includes("_")) {
342
507
  errors.push(`Phase '${p.id}': agent name '${p.agent}' uses underscores — use hyphens (e.g. 'executor-code' not 'executor_code')`);