pi-taskflow 0.0.12 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,11 +13,12 @@
13
13
  import * as path from "node:path";
14
14
  import * as fs from "node:fs";
15
15
  import type { AgentConfig } from "./agents.ts";
16
- import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
16
+ import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse, tryEvaluateCondition } from "./interpolate.ts";
17
17
  import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
18
18
  import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
19
- import { type Budget, dependenciesOf, finalPhase, type Phase, resolveArgs, type Taskflow, topoLayers } from "./schema.ts";
19
+ import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode } from "./schema.ts";
20
20
  import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
21
+ import { CacheStore, resolveFingerprint } from "./cache.ts";
21
22
 
22
23
  /** A human-in-the-loop approval request raised by an `approval` phase. */
23
24
  export interface ApprovalRequest {
@@ -49,6 +50,8 @@ export interface RuntimeDeps {
49
50
  requestApproval?: (req: ApprovalRequest) => Promise<ApprovalDecision>;
50
51
  /** Resolve a saved taskflow by name for `flow` (sub-workflow) phases. */
51
52
  loadFlow?: (name: string) => Taskflow | undefined;
53
+ /** Cross-run memoization store. Omit to construct a default one for `deps.cwd`. */
54
+ cacheStore?: CacheStore;
52
55
  /** Internal: sub-flow call stack, for recursion detection. */
53
56
  _stack?: string[];
54
57
  }
@@ -283,6 +286,7 @@ async function executePhase(
283
286
  deps: RuntimeDeps,
284
287
  prior: PhaseState | undefined,
285
288
  emitProgress: () => void,
289
+ _retryDepth = 0,
286
290
  ): Promise<PhaseState> {
287
291
  const type = phase.type ?? "agent";
288
292
  const concurrency = phase.concurrency ?? state.def.concurrency ?? 8;
@@ -295,6 +299,23 @@ async function executePhase(
295
299
  const ctx = buildInterpolationContext(state, previousOutput);
296
300
  const preRead = await resolvePhaseContext(phase, ctx);
297
301
 
302
+ // Resolve this phase's cache policy once. Default scope is "run-only" (the
303
+ // historical within-run resume behavior). Only "cross-run" phases resolve a
304
+ // fingerprint and consult the persistent store.
305
+ const cacheScope: CacheScope = (phase.cache?.scope ?? "run-only") as CacheScope;
306
+ const cc: PhaseCacheCtx = {
307
+ scope: cacheScope,
308
+ ttlMs: phase.cache?.ttl ? (parseTtlMs(phase.cache.ttl) ?? undefined) : undefined,
309
+ fingerprint: cacheScope === "cross-run" ? resolveFingerprint(phase.cache?.fingerprint, phase.cwd ?? deps.cwd) : "",
310
+ store: deps.cacheStore ?? new CacheStore(deps.cwd),
311
+ prior,
312
+ phaseId: phase.id,
313
+ flowName: state.flowName,
314
+ runId: state.runId,
315
+ thinking: phase.thinking,
316
+ tools: phase.tools,
317
+ };
318
+
298
319
  const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
299
320
  run(
300
321
  deps.cwd,
@@ -434,16 +455,92 @@ async function executePhase(
434
455
  // interpolated task. gate additionally parses a verdict; reduce simply pulls
435
456
  // its inputs from `from` phases (already exposed via interpolation).
436
457
  if (type === "agent" || type === "gate" || type === "reduce") {
458
+ // Eval gate: zero-token machine checks before the LLM gate.
459
+ if (type === "gate" && Array.isArray(phase.eval) && phase.eval.length > 0) {
460
+ const evalCtx = buildInterpolationContext(state, previousOutput);
461
+ let allPassed = true;
462
+ for (const check of phase.eval) {
463
+ let expr = check;
464
+ // Pre-process `contains` expressions: "{steps.x.output} contains PASS"
465
+ // Convert to: interpolate LHS, check RHS substring inclusion.
466
+ const containsIdx = expr.indexOf(" contains ");
467
+ if (containsIdx > 0) {
468
+ const lhs = expr.slice(0, containsIdx).trim();
469
+ const rhs = expr.slice(containsIdx + " contains ".length).trim();
470
+ const lhsVal = interpolate(lhs, evalCtx);
471
+ const lhsStr = lhsVal.text;
472
+ if (!lhsStr.includes(rhs)) {
473
+ allPassed = false;
474
+ break;
475
+ }
476
+ continue;
477
+ }
478
+ if (!evaluateCondition(expr, evalCtx)) {
479
+ allPassed = false;
480
+ break;
481
+ }
482
+ }
483
+ if (allPassed) {
484
+ // All evals passed — skip the LLM gate, return an auto-pass.
485
+ const inputHash = cacheKey(cc, [phase.id, "eval-skip"]);
486
+ const ps: PhaseState = {
487
+ id: phase.id,
488
+ status: "done",
489
+ output: "PASS (eval checks passed — no LLM call)",
490
+ gate: { verdict: "pass" },
491
+ usage: emptyUsage(),
492
+ inputHash,
493
+ endedAt: Date.now(),
494
+ };
495
+ recordCache(cc, ps);
496
+ return ps;
497
+ }
498
+ }
437
499
  const { text } = interpolate(phase.task ?? "", ctx);
438
500
  const fullTask = preRead + text;
439
501
  const agentName = resolveAgent(phase.agent, deps, state);
440
- const inputHash = hashInput(phase.id, agentName, phase.model ?? "", fullTask);
441
- const cached = cachedPhase(prior, inputHash);
502
+ const inputHash = cacheKey(cc, [phase.id, agentName, phase.model ?? "", fullTask]);
503
+ const cached = cachedPhase(cc, inputHash);
442
504
  if (cached) return cached;
443
505
 
444
506
  const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress));
445
507
  const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
446
508
  if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
509
+
510
+ // onBlock:retry — re-execute upstream + gate until pass or max attempts.
511
+ if (type === "gate" && ps.gate?.verdict === "block") {
512
+ const onBlockV: string = phase.onBlock ?? "halt";
513
+ const MAX_RETRY_DEPTH = 3;
514
+ let attempt = 0;
515
+ let gatePs = ps;
516
+ while (onBlockV === "retry" && attempt < (phase.retry?.max ?? 1)) {
517
+ // H1: guard against unbounded spend and user abort
518
+ if (deps.signal?.aborted || overBudget(state).over) break;
519
+ attempt++;
520
+ // H2: cap nested retry depth to prevent exponential re-execution
521
+ // when a gate's upstream dependency is itself a gate with onBlock:retry
522
+ if (_retryDepth < MAX_RETRY_DEPTH) {
523
+ for (const depId of phase.dependsOn ?? []) {
524
+ const d = state.def.phases.find((p) => p.id === depId);
525
+ if (!d) continue;
526
+ const dPs = await executePhase(d, state, deps, prior, emitProgress, _retryDepth + 1);
527
+ state.phases[depId] = dPs;
528
+ }
529
+ }
530
+ const retryCtx = buildInterpolationContext(state, lastCompletedOutput(state, phase));
531
+ const retryText = interpolate(phase.task ?? "", retryCtx).text;
532
+ const retryTask = preRead + retryText;
533
+ const retryIH = cacheKey(cc, [phase.id, agentName, phase.model ?? "", retryTask]);
534
+ const retryR = await runOne(agentName, retryTask, liveSink(state, phase.id, emitProgress));
535
+ gatePs = resultToPhaseState(phase.id, retryR, retryIH, parseJson);
536
+ if (gatePs.status === "done") gatePs.gate = parseGateVerdict(retryR.output);
537
+ if (gatePs.gate?.verdict !== "block" || overBudget(state).over) break;
538
+ }
539
+ gatePs.attempts = (ps.attempts ?? 0) + attempt;
540
+ recordCache(cc, gatePs);
541
+ return gatePs;
542
+ }
543
+ recordCache(cc, ps);
447
544
  return ps;
448
545
  }
449
546
 
@@ -455,12 +552,14 @@ async function executePhase(
455
552
  task: preRead + r.text,
456
553
  };
457
554
  });
458
- const inputHash = hashInput(phase.id, phase.model ?? "", JSON.stringify(branches));
459
- const cached = cachedPhase(prior, inputHash);
555
+ const inputHash = cacheKey(cc, [phase.id, phase.model ?? "", JSON.stringify(branches)]);
556
+ const cached = cachedPhase(cc, inputHash);
460
557
  if (cached) return cached;
461
558
 
462
559
  const results = await runFanout(branches);
463
- return mergePhaseState(phase.id, results, inputHash, parseJson);
560
+ const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
561
+ recordCache(cc, ps);
562
+ return ps;
464
563
  }
465
564
 
466
565
  if (type === "map") {
@@ -485,19 +584,21 @@ async function executePhase(
485
584
  task: preRead + interpolate(phase.task ?? "", localCtx).text,
486
585
  };
487
586
  });
488
- const inputHash = hashInput(phase.id, phase.model ?? "", JSON.stringify(tasks));
489
- const cached = cachedPhase(prior, inputHash);
587
+ const inputHash = cacheKey(cc, [phase.id, phase.model ?? "", JSON.stringify(tasks)]);
588
+ const cached = cachedPhase(cc, inputHash);
490
589
  if (cached) return cached;
491
590
 
492
591
  const results = await runFanout(tasks);
493
- return mergePhaseState(phase.id, results, inputHash, parseJson);
592
+ const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
593
+ recordCache(cc, ps);
594
+ return ps;
494
595
  }
495
596
 
496
597
  if (type === "approval") {
497
598
  const ctx = buildInterpolationContext(state, previousOutput);
498
599
  const message = interpolate(phase.task ?? "Approve to continue?", ctx).text;
499
600
  const inputHash = hashInput(phase.id, phase.model ?? "", "approval", message);
500
- const cached = cachedPhase(prior, inputHash);
601
+ const cached = cachedPhase(cc, inputHash);
501
602
  if (cached) return cached;
502
603
 
503
604
  // Non-interactive (headless/CI/tests): auto-approve, fail-open, but record it.
@@ -547,8 +648,8 @@ async function executePhase(
547
648
  provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
548
649
  }
549
650
  const subArgs = resolveArgs(subDef, provided);
550
- const inputHash = hashInput(phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs));
551
- const cached = cachedPhase(prior, inputHash);
651
+ const inputHash = cacheKey(cc, [phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs)]);
652
+ const cached = cachedPhase(cc, inputHash);
552
653
  if (cached) return cached;
553
654
 
554
655
  const live = state.phases[phase.id];
@@ -600,7 +701,7 @@ async function executePhase(
600
701
  },
601
702
  });
602
703
  const sp = Object.values(subState.phases);
603
- return {
704
+ const flowPs: PhaseState = {
604
705
  id: phase.id,
605
706
  status: subResult.ok ? "done" : "failed",
606
707
  output: subResult.finalOutput,
@@ -619,6 +720,207 @@ async function executePhase(
619
720
  inputHash,
620
721
  endedAt: Date.now(),
621
722
  };
723
+ recordCache(cc, flowPs);
724
+ return flowPs;
725
+ }
726
+
727
+ // loop-until-done: run the body repeatedly until `until` is truthy, the output
728
+ // converges to a fixed point, or maxIterations is hit (always terminates).
729
+ if (type === "loop") {
730
+ const agentName = resolveAgent(phase.agent, deps, state);
731
+ const rawMax = phase.maxIterations ?? LOOP_DEFAULT_MAX_ITERATIONS;
732
+ const maxIters = Math.max(1, Math.min(LOOP_HARD_MAX_ITERATIONS, Math.floor(rawMax)));
733
+ const convergence = phase.convergence ?? true;
734
+
735
+ const usages: UsageStats[] = [];
736
+ const loopWarnings: string[] = [];
737
+ let lastOutput = "";
738
+ let prevOutput: string | undefined;
739
+ let iterations = 0;
740
+ let stop: NonNullable<PhaseState["loop"]>["stop"] = "maxIterations";
741
+ let failedResult: RunResult | undefined;
742
+
743
+ for (let i = 1; i <= maxIters; i++) {
744
+ if (deps.signal?.aborted) {
745
+ stop = "failed";
746
+ break;
747
+ }
748
+ iterations = i;
749
+ // The body sees its iteration number and the prior iteration's output.
750
+ const bodyCtx = buildInterpolationContext(state, previousOutput, {
751
+ loop: { iteration: i, lastOutput, maxIterations: maxIters },
752
+ });
753
+ const body = preRead + interpolate(phase.task ?? "", bodyCtx).text;
754
+ const r = await runOne(agentName, body, liveSink(state, phase.id, emitProgress));
755
+ usages.push(r.usage);
756
+ if (isFailed(r)) {
757
+ failedResult = r;
758
+ stop = "failed";
759
+ break;
760
+ }
761
+ prevOutput = lastOutput;
762
+ lastOutput = r.output;
763
+
764
+ // Expose this iteration's output as {steps.<thisId>.output|json} so the
765
+ // `until` condition can inspect it (e.g. "{steps.refine.json.done}==true").
766
+ // Loop locals ({loop.iteration} etc.) are available to the condition too.
767
+ const untilCtx = buildInterpolationContext(state, previousOutput, {
768
+ loop: { iteration: i, lastOutput, maxIterations: maxIters },
769
+ });
770
+ untilCtx.steps[phase.id] = { output: lastOutput, json: safeParse(lastOutput) };
771
+ const { value: done, error: condErr } = tryEvaluateCondition(phase.until ?? "", untilCtx);
772
+ // A malformed condition must not spin forever: stop and surface a warning
773
+ // so the author learns the `until` never actually evaluated.
774
+ if (condErr) {
775
+ loopWarnings.push(`loop 'until' could not be evaluated (stopped early): ${condErr}`);
776
+ stop = "until";
777
+ break;
778
+ }
779
+ if (done) {
780
+ stop = "until";
781
+ break;
782
+ }
783
+ // Fixed-point convergence: identical consecutive output ⇒ further work is wasted.
784
+ if (convergence && prevOutput !== undefined && prevOutput === lastOutput) {
785
+ stop = "converged";
786
+ break;
787
+ }
788
+ }
789
+
790
+ const aggUsage = usages.length ? aggregateUsage(usages) : emptyUsage();
791
+ if (failedResult) {
792
+ return {
793
+ id: phase.id,
794
+ status: "failed",
795
+ output: lastOutput || undefined,
796
+ usage: aggUsage,
797
+ error: failedResult.errorMessage || failedResult.stderr || `loop '${phase.id}' iteration ${iterations} failed`,
798
+ loop: { iterations, stop: "failed" },
799
+ warnings: loopWarnings.length ? loopWarnings : undefined,
800
+ inputHash: hashInput(phase.id, "loop", phase.until ?? ""),
801
+ endedAt: Date.now(),
802
+ };
803
+ }
804
+ return {
805
+ id: phase.id,
806
+ status: "done",
807
+ output: lastOutput,
808
+ json: parseJson ? safeParse(lastOutput) : undefined,
809
+ usage: aggUsage,
810
+ loop: { iterations, stop },
811
+ warnings: loopWarnings.length ? loopWarnings : undefined,
812
+ inputHash: hashInput(phase.id, "loop", phase.until ?? "", String(iterations)),
813
+ endedAt: Date.now(),
814
+ };
815
+ }
816
+
817
+ // tournament: spawn N competing variants, then a judge picks the best (or
818
+ // synthesizes an aggregate). Combines the parallel fan-out with a gate-style
819
+ // verdict, expressed as a single declarative phase.
820
+ if (type === "tournament") {
821
+ const mode = (phase.mode ?? "best") as TournamentMode;
822
+ // Competitors: explicit `branches` win; otherwise N copies of `task`.
823
+ let competitors: Array<{ agent: string; task: string }>;
824
+ if (phase.branches && phase.branches.length > 0) {
825
+ competitors = phase.branches.map((b) => ({
826
+ agent: resolveAgent(b.agent ?? phase.agent, deps, state),
827
+ task: preRead + interpolate(b.task, ctx).text,
828
+ }));
829
+ } else {
830
+ const n = Math.max(2, Math.min(TOURNAMENT_HARD_MAX_VARIANTS, Math.floor(phase.variants ?? TOURNAMENT_DEFAULT_VARIANTS)));
831
+ const body = preRead + interpolate(phase.task ?? "", ctx).text;
832
+ competitors = Array.from({ length: n }, () => ({ agent: resolveAgent(phase.agent, deps, state), task: body }));
833
+ }
834
+
835
+ const results = await runFanout(competitors);
836
+ const ran = results.filter((r) => r.stopReason !== "budget-skipped");
837
+ const ok = ran.filter((r) => !isFailed(r));
838
+ const variantUsage = aggregateUsage(results.map((r) => r.usage));
839
+ // Winner numbers are 1-based over `ran` (exactly what the judge is shown).
840
+ // Using indexOf on the stable `ran` array is reference-based and correct even
841
+ // when two variants produce byte-identical output.
842
+ const ranIdx = (r: RunResult) => ran.indexOf(r) + 1;
843
+
844
+ // All competitors failed → the tournament fails (nothing to judge).
845
+ if (ok.length === 0) {
846
+ return {
847
+ id: phase.id,
848
+ status: "failed",
849
+ usage: variantUsage,
850
+ error: `tournament '${phase.id}': all ${competitors.length} variants failed`,
851
+ tournament: { variants: competitors.length, winner: 0, mode },
852
+ inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
853
+ endedAt: Date.now(),
854
+ };
855
+ }
856
+ // Only one competitor survived → no contest; it wins by default (skip judge).
857
+ if (ok.length === 1) {
858
+ return {
859
+ id: phase.id,
860
+ status: "done",
861
+ output: ok[0].output,
862
+ json: parseJson ? safeParse(ok[0].output) : undefined,
863
+ usage: variantUsage,
864
+ model: ok[0].model,
865
+ tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "only surviving variant" },
866
+ inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
867
+ endedAt: Date.now(),
868
+ };
869
+ }
870
+
871
+ // Build the judge prompt: label every variant output, then the rubric.
872
+ const labelled = ran
873
+ .map((r, i) => `### Variant ${i + 1}${isFailed(r) ? " (failed — ineligible)" : ""}\n\n${r.output}`)
874
+ .join("\n\n---\n\n");
875
+ const rubric =
876
+ interpolate(phase.judge ?? "", ctx).text.trim() ||
877
+ "You are judging competing answers to the same task. Pick the single best variant on correctness, completeness, and clarity.";
878
+ const directive =
879
+ mode === "best"
880
+ ? `End your reply with a line exactly: WINNER: <number> (1–${ran.length}), choosing the strongest eligible variant.`
881
+ : `Synthesize the strongest possible answer by combining the best parts of the eligible variants. Then end with a line: WINNER: <number> indicating which variant contributed most.`;
882
+ const judgeTask = `${rubric}\n\nThe candidate variants:\n\n${labelled}\n\n${directive}`;
883
+ const judgeAgent = resolveAgent(phase.judgeAgent ?? phase.agent, deps, state);
884
+ const judgeRes = await runOne(judgeAgent, judgeTask, liveSink(state, phase.id, emitProgress));
885
+ const judgeUsage = aggregateUsage([variantUsage, judgeRes.usage]);
886
+
887
+ if (isFailed(judgeRes)) {
888
+ // Judge failed: fall back to the first eligible variant (fail-open, never
889
+ // lose the work). Report the variant we actually used, not a hardcoded 1.
890
+ return {
891
+ id: phase.id,
892
+ status: "done",
893
+ output: ok[0].output,
894
+ json: parseJson ? safeParse(ok[0].output) : undefined,
895
+ usage: judgeUsage,
896
+ model: ok[0].model,
897
+ warnings: [`judge failed (${judgeRes.errorMessage ?? "error"}); used variant ${ranIdx(ok[0])}`],
898
+ tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge failed" },
899
+ inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
900
+ endedAt: Date.now(),
901
+ };
902
+ }
903
+
904
+ const { winner, reason } = parseTournamentWinner(judgeRes.output, ran.length);
905
+ const winnerResult = ran[winner - 1];
906
+ const winnerIneligible = !winnerResult || isFailed(winnerResult);
907
+ // In 'best' mode the output is the winning variant verbatim; in 'aggregate'
908
+ // mode it is the judge's synthesized answer.
909
+ const chosen = winnerIneligible ? ok[0] : winnerResult;
910
+ const winnerIdx = ranIdx(chosen);
911
+ const output = mode === "aggregate" ? judgeRes.output : chosen.output;
912
+ return {
913
+ id: phase.id,
914
+ status: "done",
915
+ output,
916
+ json: parseJson ? safeParse(output) : undefined,
917
+ usage: judgeUsage,
918
+ model: mode === "aggregate" ? judgeRes.model : chosen.model,
919
+ warnings: winnerIneligible ? [`judge picked an ineligible variant; used variant ${winnerIdx}`] : undefined,
920
+ tournament: { variants: competitors.length, winner: winnerIdx, mode, reason },
921
+ inputHash: hashInput(phase.id, "tournament", String(competitors.length), mode),
922
+ endedAt: Date.now(),
923
+ };
622
924
  }
623
925
 
624
926
  return {
@@ -657,13 +959,89 @@ function lastCompletedOutput(state: RunState, phase: Phase): string | undefined
657
959
  return undefined;
658
960
  }
659
961
 
660
- function cachedPhase(prior: PhaseState | undefined, inputHash: string): PhaseState | null {
661
- if (prior && prior.status === "done" && prior.inputHash === inputHash) {
662
- return { ...prior, status: "done" };
962
+ /**
963
+ * Per-phase cache policy resolved once at the top of executePhase. Carries the
964
+ * scope, optional TTL, and a pre-resolved fingerprint string so each phase-type
965
+ * branch can fold it into its inputHash and consult the cross-run store uniformly.
966
+ */
967
+ interface PhaseCacheCtx {
968
+ scope: CacheScope;
969
+ ttlMs?: number;
970
+ fingerprint: string;
971
+ store: CacheStore;
972
+ prior: PhaseState | undefined;
973
+ phaseId: string;
974
+ flowName: string;
975
+ runId: string;
976
+ /** Per-phase execution config that materially affects subagent output and
977
+ * therefore must be part of the cache identity (else a config change could
978
+ * silently serve a stale cross-run hit). */
979
+ thinking?: string;
980
+ tools?: string[];
981
+ }
982
+
983
+ /** Fold the phase fingerprint into the base hash parts to form the final cache key. */
984
+ function cacheKey(cc: PhaseCacheCtx, baseParts: string[]): string {
985
+ // Fold the full cache identity into the hash: flow name (prevents collisions
986
+ // across different flows that share a phase.id + task + model), the per-phase
987
+ // thinking/tools config (changing either changes the subagent's output), and
988
+ // the resolved world-state fingerprint.
989
+ const parts = [`flow:${cc.flowName}`, ...baseParts, `think:${cc.thinking ?? ""}`, `tools:${JSON.stringify(cc.tools ?? [])}`];
990
+ return cc.fingerprint ? hashInput(...parts, cc.fingerprint) : hashInput(...parts);
991
+ }
992
+
993
+ /**
994
+ * Resume/memoization lookup. Honors scope:
995
+ * - "off": never reuse (even within-run).
996
+ * - "run-only": within-run resume only (historical behavior).
997
+ * - "cross-run": within-run first, then the persistent cross-run store.
998
+ * On a cross-run hit, usage is zeroed and `cacheHit` records the source.
999
+ */
1000
+ function cachedPhase(cc: PhaseCacheCtx, inputHash: string): PhaseState | null {
1001
+ if (cc.scope === "off") return null;
1002
+
1003
+ // 1. within-run resume (fastest; always allowed unless scope is off)
1004
+ if (cc.prior && cc.prior.status === "done" && cc.prior.inputHash === inputHash) {
1005
+ return { ...cc.prior, status: "done" };
1006
+ }
1007
+
1008
+ // 2. cross-run memoization (opt-in)
1009
+ if (cc.scope === "cross-run") {
1010
+ const e = cc.store.get(inputHash, cc.ttlMs);
1011
+ if (e) {
1012
+ return {
1013
+ id: cc.phaseId,
1014
+ status: "done",
1015
+ inputHash,
1016
+ output: e.output,
1017
+ json: e.json,
1018
+ model: e.model,
1019
+ usage: emptyUsage(),
1020
+ cacheHit: "cross-run",
1021
+ endedAt: Date.now(),
1022
+ };
1023
+ }
663
1024
  }
664
1025
  return null;
665
1026
  }
666
1027
 
1028
+ /** Persist a freshly-computed phase result to the cross-run store (best-effort). */
1029
+ function recordCache(cc: PhaseCacheCtx, ps: PhaseState): void {
1030
+ if (cc.scope !== "cross-run") return;
1031
+ if (ps.status !== "done" || !ps.inputHash) return;
1032
+ if (ps.cacheHit) return; // don't re-store a value we just read from cache
1033
+ cc.store.put({
1034
+ key: ps.inputHash,
1035
+ createdAt: Date.now(),
1036
+ output: ps.output,
1037
+ json: ps.json,
1038
+ model: ps.model,
1039
+ flowName: cc.flowName,
1040
+ phaseId: cc.phaseId,
1041
+ runId: cc.runId,
1042
+ });
1043
+ }
1044
+
667
1045
  /**
668
1046
  * Resolve an agent name against available agents. Falls back to the default
669
1047
  * agent if the requested agent isn't found, logging a warning via safeEmit.
@@ -722,6 +1100,29 @@ function asReason(v: unknown): string | undefined {
722
1100
  return typeof v === "string" && v.trim() ? v.trim() : undefined;
723
1101
  }
724
1102
 
1103
+ /**
1104
+ * Parse a judge's pick of the winning variant. Accepts JSON ({"winner":n} or
1105
+ * {"best":n}) or a `WINNER: n` line (last match wins). Clamps to [1, count].
1106
+ * Fail-open: an unreadable verdict defaults to variant 1 so the work is never
1107
+ * lost. Returns the 1-based index plus an optional reason.
1108
+ */
1109
+ export function parseTournamentWinner(output: string, count: number): { winner: number; reason?: string } {
1110
+ const clamp = (n: number) => Math.min(Math.max(1, Math.floor(n)), Math.max(1, count));
1111
+ const json = safeParse(output);
1112
+ if (json && typeof json === "object") {
1113
+ const o = json as Record<string, unknown>;
1114
+ const raw = o.winner ?? o.best ?? o.choice;
1115
+ const n = typeof raw === "number" ? raw : typeof raw === "string" ? Number(raw) : NaN;
1116
+ if (Number.isFinite(n)) return { winner: clamp(n), reason: asReason(o.reason) };
1117
+ }
1118
+ const matches = [...output.matchAll(/WINNER\s*[:=]\s*#?\s*(\d+)/gi)];
1119
+ if (matches.length) {
1120
+ const n = Number(matches[matches.length - 1][1]);
1121
+ if (Number.isFinite(n)) return { winner: clamp(n) };
1122
+ }
1123
+ return { winner: 1, reason: "no parseable winner; defaulted to variant 1" };
1124
+ }
1125
+
725
1126
  /**
726
1127
  * Best-effort invocation of the user-provided `persist` + `onProgress` callbacks.
727
1128
  *