pi-taskflow 0.0.12 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -23
- package/extensions/cache.ts +263 -0
- package/extensions/index.ts +194 -118
- package/extensions/init.ts +607 -0
- package/extensions/render.ts +39 -0
- package/extensions/runtime.ts +418 -17
- package/extensions/schema.ts +179 -1
- package/extensions/store.ts +16 -2
- package/extensions/verify.ts +367 -0
- package/package.json +4 -3
package/extensions/runtime.ts
CHANGED
|
@@ -13,11 +13,12 @@
|
|
|
13
13
|
import * as path from "node:path";
|
|
14
14
|
import * as fs from "node:fs";
|
|
15
15
|
import type { AgentConfig } from "./agents.ts";
|
|
16
|
-
import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
|
|
16
|
+
import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse, tryEvaluateCondition } from "./interpolate.ts";
|
|
17
17
|
import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
|
|
18
18
|
import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
|
|
19
|
-
import { type Budget, dependenciesOf, finalPhase, type Phase, resolveArgs, type Taskflow, topoLayers } from "./schema.ts";
|
|
19
|
+
import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode } from "./schema.ts";
|
|
20
20
|
import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
|
|
21
|
+
import { CacheStore, resolveFingerprint } from "./cache.ts";
|
|
21
22
|
|
|
22
23
|
/** A human-in-the-loop approval request raised by an `approval` phase. */
|
|
23
24
|
export interface ApprovalRequest {
|
|
@@ -49,6 +50,8 @@ export interface RuntimeDeps {
|
|
|
49
50
|
requestApproval?: (req: ApprovalRequest) => Promise<ApprovalDecision>;
|
|
50
51
|
/** Resolve a saved taskflow by name for `flow` (sub-workflow) phases. */
|
|
51
52
|
loadFlow?: (name: string) => Taskflow | undefined;
|
|
53
|
+
/** Cross-run memoization store. Omit to construct a default one for `deps.cwd`. */
|
|
54
|
+
cacheStore?: CacheStore;
|
|
52
55
|
/** Internal: sub-flow call stack, for recursion detection. */
|
|
53
56
|
_stack?: string[];
|
|
54
57
|
}
|
|
@@ -283,6 +286,7 @@ async function executePhase(
|
|
|
283
286
|
deps: RuntimeDeps,
|
|
284
287
|
prior: PhaseState | undefined,
|
|
285
288
|
emitProgress: () => void,
|
|
289
|
+
_retryDepth = 0,
|
|
286
290
|
): Promise<PhaseState> {
|
|
287
291
|
const type = phase.type ?? "agent";
|
|
288
292
|
const concurrency = phase.concurrency ?? state.def.concurrency ?? 8;
|
|
@@ -295,6 +299,23 @@ async function executePhase(
|
|
|
295
299
|
const ctx = buildInterpolationContext(state, previousOutput);
|
|
296
300
|
const preRead = await resolvePhaseContext(phase, ctx);
|
|
297
301
|
|
|
302
|
+
// Resolve this phase's cache policy once. Default scope is "run-only" (the
|
|
303
|
+
// historical within-run resume behavior). Only "cross-run" phases resolve a
|
|
304
|
+
// fingerprint and consult the persistent store.
|
|
305
|
+
const cacheScope: CacheScope = (phase.cache?.scope ?? "run-only") as CacheScope;
|
|
306
|
+
const cc: PhaseCacheCtx = {
|
|
307
|
+
scope: cacheScope,
|
|
308
|
+
ttlMs: phase.cache?.ttl ? (parseTtlMs(phase.cache.ttl) ?? undefined) : undefined,
|
|
309
|
+
fingerprint: cacheScope === "cross-run" ? resolveFingerprint(phase.cache?.fingerprint, phase.cwd ?? deps.cwd) : "",
|
|
310
|
+
store: deps.cacheStore ?? new CacheStore(deps.cwd),
|
|
311
|
+
prior,
|
|
312
|
+
phaseId: phase.id,
|
|
313
|
+
flowName: state.flowName,
|
|
314
|
+
runId: state.runId,
|
|
315
|
+
thinking: phase.thinking,
|
|
316
|
+
tools: phase.tools,
|
|
317
|
+
};
|
|
318
|
+
|
|
298
319
|
const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
|
|
299
320
|
run(
|
|
300
321
|
deps.cwd,
|
|
@@ -434,16 +455,92 @@ async function executePhase(
|
|
|
434
455
|
// interpolated task. gate additionally parses a verdict; reduce simply pulls
|
|
435
456
|
// its inputs from `from` phases (already exposed via interpolation).
|
|
436
457
|
if (type === "agent" || type === "gate" || type === "reduce") {
|
|
458
|
+
// Eval gate: zero-token machine checks before the LLM gate.
|
|
459
|
+
if (type === "gate" && Array.isArray(phase.eval) && phase.eval.length > 0) {
|
|
460
|
+
const evalCtx = buildInterpolationContext(state, previousOutput);
|
|
461
|
+
let allPassed = true;
|
|
462
|
+
for (const check of phase.eval) {
|
|
463
|
+
let expr = check;
|
|
464
|
+
// Pre-process `contains` expressions: "{steps.x.output} contains PASS"
|
|
465
|
+
// Convert to: interpolate LHS, check RHS substring inclusion.
|
|
466
|
+
const containsIdx = expr.indexOf(" contains ");
|
|
467
|
+
if (containsIdx > 0) {
|
|
468
|
+
const lhs = expr.slice(0, containsIdx).trim();
|
|
469
|
+
const rhs = expr.slice(containsIdx + " contains ".length).trim();
|
|
470
|
+
const lhsVal = interpolate(lhs, evalCtx);
|
|
471
|
+
const lhsStr = lhsVal.text;
|
|
472
|
+
if (!lhsStr.includes(rhs)) {
|
|
473
|
+
allPassed = false;
|
|
474
|
+
break;
|
|
475
|
+
}
|
|
476
|
+
continue;
|
|
477
|
+
}
|
|
478
|
+
if (!evaluateCondition(expr, evalCtx)) {
|
|
479
|
+
allPassed = false;
|
|
480
|
+
break;
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
if (allPassed) {
|
|
484
|
+
// All evals passed — skip the LLM gate, return an auto-pass.
|
|
485
|
+
const inputHash = cacheKey(cc, [phase.id, "eval-skip"]);
|
|
486
|
+
const ps: PhaseState = {
|
|
487
|
+
id: phase.id,
|
|
488
|
+
status: "done",
|
|
489
|
+
output: "PASS (eval checks passed — no LLM call)",
|
|
490
|
+
gate: { verdict: "pass" },
|
|
491
|
+
usage: emptyUsage(),
|
|
492
|
+
inputHash,
|
|
493
|
+
endedAt: Date.now(),
|
|
494
|
+
};
|
|
495
|
+
recordCache(cc, ps);
|
|
496
|
+
return ps;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
437
499
|
const { text } = interpolate(phase.task ?? "", ctx);
|
|
438
500
|
const fullTask = preRead + text;
|
|
439
501
|
const agentName = resolveAgent(phase.agent, deps, state);
|
|
440
|
-
const inputHash =
|
|
441
|
-
const cached = cachedPhase(
|
|
502
|
+
const inputHash = cacheKey(cc, [phase.id, agentName, phase.model ?? "", fullTask]);
|
|
503
|
+
const cached = cachedPhase(cc, inputHash);
|
|
442
504
|
if (cached) return cached;
|
|
443
505
|
|
|
444
506
|
const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress));
|
|
445
507
|
const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
|
|
446
508
|
if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
|
|
509
|
+
|
|
510
|
+
// onBlock:retry — re-execute upstream + gate until pass or max attempts.
|
|
511
|
+
if (type === "gate" && ps.gate?.verdict === "block") {
|
|
512
|
+
const onBlockV: string = phase.onBlock ?? "halt";
|
|
513
|
+
const MAX_RETRY_DEPTH = 3;
|
|
514
|
+
let attempt = 0;
|
|
515
|
+
let gatePs = ps;
|
|
516
|
+
while (onBlockV === "retry" && attempt < (phase.retry?.max ?? 1)) {
|
|
517
|
+
// H1: guard against unbounded spend and user abort
|
|
518
|
+
if (deps.signal?.aborted || overBudget(state).over) break;
|
|
519
|
+
attempt++;
|
|
520
|
+
// H2: cap nested retry depth to prevent exponential re-execution
|
|
521
|
+
// when a gate's upstream dependency is itself a gate with onBlock:retry
|
|
522
|
+
if (_retryDepth < MAX_RETRY_DEPTH) {
|
|
523
|
+
for (const depId of phase.dependsOn ?? []) {
|
|
524
|
+
const d = state.def.phases.find((p) => p.id === depId);
|
|
525
|
+
if (!d) continue;
|
|
526
|
+
const dPs = await executePhase(d, state, deps, prior, emitProgress, _retryDepth + 1);
|
|
527
|
+
state.phases[depId] = dPs;
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
const retryCtx = buildInterpolationContext(state, lastCompletedOutput(state, phase));
|
|
531
|
+
const retryText = interpolate(phase.task ?? "", retryCtx).text;
|
|
532
|
+
const retryTask = preRead + retryText;
|
|
533
|
+
const retryIH = cacheKey(cc, [phase.id, agentName, phase.model ?? "", retryTask]);
|
|
534
|
+
const retryR = await runOne(agentName, retryTask, liveSink(state, phase.id, emitProgress));
|
|
535
|
+
gatePs = resultToPhaseState(phase.id, retryR, retryIH, parseJson);
|
|
536
|
+
if (gatePs.status === "done") gatePs.gate = parseGateVerdict(retryR.output);
|
|
537
|
+
if (gatePs.gate?.verdict !== "block" || overBudget(state).over) break;
|
|
538
|
+
}
|
|
539
|
+
gatePs.attempts = (ps.attempts ?? 0) + attempt;
|
|
540
|
+
recordCache(cc, gatePs);
|
|
541
|
+
return gatePs;
|
|
542
|
+
}
|
|
543
|
+
recordCache(cc, ps);
|
|
447
544
|
return ps;
|
|
448
545
|
}
|
|
449
546
|
|
|
@@ -455,12 +552,14 @@ async function executePhase(
|
|
|
455
552
|
task: preRead + r.text,
|
|
456
553
|
};
|
|
457
554
|
});
|
|
458
|
-
const inputHash =
|
|
459
|
-
const cached = cachedPhase(
|
|
555
|
+
const inputHash = cacheKey(cc, [phase.id, phase.model ?? "", JSON.stringify(branches)]);
|
|
556
|
+
const cached = cachedPhase(cc, inputHash);
|
|
460
557
|
if (cached) return cached;
|
|
461
558
|
|
|
462
559
|
const results = await runFanout(branches);
|
|
463
|
-
|
|
560
|
+
const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
|
|
561
|
+
recordCache(cc, ps);
|
|
562
|
+
return ps;
|
|
464
563
|
}
|
|
465
564
|
|
|
466
565
|
if (type === "map") {
|
|
@@ -485,19 +584,21 @@ async function executePhase(
|
|
|
485
584
|
task: preRead + interpolate(phase.task ?? "", localCtx).text,
|
|
486
585
|
};
|
|
487
586
|
});
|
|
488
|
-
const inputHash =
|
|
489
|
-
const cached = cachedPhase(
|
|
587
|
+
const inputHash = cacheKey(cc, [phase.id, phase.model ?? "", JSON.stringify(tasks)]);
|
|
588
|
+
const cached = cachedPhase(cc, inputHash);
|
|
490
589
|
if (cached) return cached;
|
|
491
590
|
|
|
492
591
|
const results = await runFanout(tasks);
|
|
493
|
-
|
|
592
|
+
const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
|
|
593
|
+
recordCache(cc, ps);
|
|
594
|
+
return ps;
|
|
494
595
|
}
|
|
495
596
|
|
|
496
597
|
if (type === "approval") {
|
|
497
598
|
const ctx = buildInterpolationContext(state, previousOutput);
|
|
498
599
|
const message = interpolate(phase.task ?? "Approve to continue?", ctx).text;
|
|
499
600
|
const inputHash = hashInput(phase.id, phase.model ?? "", "approval", message);
|
|
500
|
-
const cached = cachedPhase(
|
|
601
|
+
const cached = cachedPhase(cc, inputHash);
|
|
501
602
|
if (cached) return cached;
|
|
502
603
|
|
|
503
604
|
// Non-interactive (headless/CI/tests): auto-approve, fail-open, but record it.
|
|
@@ -547,8 +648,8 @@ async function executePhase(
|
|
|
547
648
|
provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
|
|
548
649
|
}
|
|
549
650
|
const subArgs = resolveArgs(subDef, provided);
|
|
550
|
-
const inputHash =
|
|
551
|
-
const cached = cachedPhase(
|
|
651
|
+
const inputHash = cacheKey(cc, [phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs)]);
|
|
652
|
+
const cached = cachedPhase(cc, inputHash);
|
|
552
653
|
if (cached) return cached;
|
|
553
654
|
|
|
554
655
|
const live = state.phases[phase.id];
|
|
@@ -600,7 +701,7 @@ async function executePhase(
|
|
|
600
701
|
},
|
|
601
702
|
});
|
|
602
703
|
const sp = Object.values(subState.phases);
|
|
603
|
-
|
|
704
|
+
const flowPs: PhaseState = {
|
|
604
705
|
id: phase.id,
|
|
605
706
|
status: subResult.ok ? "done" : "failed",
|
|
606
707
|
output: subResult.finalOutput,
|
|
@@ -619,6 +720,207 @@ async function executePhase(
|
|
|
619
720
|
inputHash,
|
|
620
721
|
endedAt: Date.now(),
|
|
621
722
|
};
|
|
723
|
+
recordCache(cc, flowPs);
|
|
724
|
+
return flowPs;
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
// loop-until-done: run the body repeatedly until `until` is truthy, the output
|
|
728
|
+
// converges to a fixed point, or maxIterations is hit (always terminates).
|
|
729
|
+
if (type === "loop") {
|
|
730
|
+
const agentName = resolveAgent(phase.agent, deps, state);
|
|
731
|
+
const rawMax = phase.maxIterations ?? LOOP_DEFAULT_MAX_ITERATIONS;
|
|
732
|
+
const maxIters = Math.max(1, Math.min(LOOP_HARD_MAX_ITERATIONS, Math.floor(rawMax)));
|
|
733
|
+
const convergence = phase.convergence ?? true;
|
|
734
|
+
|
|
735
|
+
const usages: UsageStats[] = [];
|
|
736
|
+
const loopWarnings: string[] = [];
|
|
737
|
+
let lastOutput = "";
|
|
738
|
+
let prevOutput: string | undefined;
|
|
739
|
+
let iterations = 0;
|
|
740
|
+
let stop: NonNullable<PhaseState["loop"]>["stop"] = "maxIterations";
|
|
741
|
+
let failedResult: RunResult | undefined;
|
|
742
|
+
|
|
743
|
+
for (let i = 1; i <= maxIters; i++) {
|
|
744
|
+
if (deps.signal?.aborted) {
|
|
745
|
+
stop = "failed";
|
|
746
|
+
break;
|
|
747
|
+
}
|
|
748
|
+
iterations = i;
|
|
749
|
+
// The body sees its iteration number and the prior iteration's output.
|
|
750
|
+
const bodyCtx = buildInterpolationContext(state, previousOutput, {
|
|
751
|
+
loop: { iteration: i, lastOutput, maxIterations: maxIters },
|
|
752
|
+
});
|
|
753
|
+
const body = preRead + interpolate(phase.task ?? "", bodyCtx).text;
|
|
754
|
+
const r = await runOne(agentName, body, liveSink(state, phase.id, emitProgress));
|
|
755
|
+
usages.push(r.usage);
|
|
756
|
+
if (isFailed(r)) {
|
|
757
|
+
failedResult = r;
|
|
758
|
+
stop = "failed";
|
|
759
|
+
break;
|
|
760
|
+
}
|
|
761
|
+
prevOutput = lastOutput;
|
|
762
|
+
lastOutput = r.output;
|
|
763
|
+
|
|
764
|
+
// Expose this iteration's output as {steps.<thisId>.output|json} so the
|
|
765
|
+
// `until` condition can inspect it (e.g. "{steps.refine.json.done}==true").
|
|
766
|
+
// Loop locals ({loop.iteration} etc.) are available to the condition too.
|
|
767
|
+
const untilCtx = buildInterpolationContext(state, previousOutput, {
|
|
768
|
+
loop: { iteration: i, lastOutput, maxIterations: maxIters },
|
|
769
|
+
});
|
|
770
|
+
untilCtx.steps[phase.id] = { output: lastOutput, json: safeParse(lastOutput) };
|
|
771
|
+
const { value: done, error: condErr } = tryEvaluateCondition(phase.until ?? "", untilCtx);
|
|
772
|
+
// A malformed condition must not spin forever: stop and surface a warning
|
|
773
|
+
// so the author learns the `until` never actually evaluated.
|
|
774
|
+
if (condErr) {
|
|
775
|
+
loopWarnings.push(`loop 'until' could not be evaluated (stopped early): ${condErr}`);
|
|
776
|
+
stop = "until";
|
|
777
|
+
break;
|
|
778
|
+
}
|
|
779
|
+
if (done) {
|
|
780
|
+
stop = "until";
|
|
781
|
+
break;
|
|
782
|
+
}
|
|
783
|
+
// Fixed-point convergence: identical consecutive output ⇒ further work is wasted.
|
|
784
|
+
if (convergence && prevOutput !== undefined && prevOutput === lastOutput) {
|
|
785
|
+
stop = "converged";
|
|
786
|
+
break;
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
const aggUsage = usages.length ? aggregateUsage(usages) : emptyUsage();
|
|
791
|
+
if (failedResult) {
|
|
792
|
+
return {
|
|
793
|
+
id: phase.id,
|
|
794
|
+
status: "failed",
|
|
795
|
+
output: lastOutput || undefined,
|
|
796
|
+
usage: aggUsage,
|
|
797
|
+
error: failedResult.errorMessage || failedResult.stderr || `loop '${phase.id}' iteration ${iterations} failed`,
|
|
798
|
+
loop: { iterations, stop: "failed" },
|
|
799
|
+
warnings: loopWarnings.length ? loopWarnings : undefined,
|
|
800
|
+
inputHash: hashInput(phase.id, "loop", phase.until ?? ""),
|
|
801
|
+
endedAt: Date.now(),
|
|
802
|
+
};
|
|
803
|
+
}
|
|
804
|
+
return {
|
|
805
|
+
id: phase.id,
|
|
806
|
+
status: "done",
|
|
807
|
+
output: lastOutput,
|
|
808
|
+
json: parseJson ? safeParse(lastOutput) : undefined,
|
|
809
|
+
usage: aggUsage,
|
|
810
|
+
loop: { iterations, stop },
|
|
811
|
+
warnings: loopWarnings.length ? loopWarnings : undefined,
|
|
812
|
+
inputHash: hashInput(phase.id, "loop", phase.until ?? "", String(iterations)),
|
|
813
|
+
endedAt: Date.now(),
|
|
814
|
+
};
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
// tournament: spawn N competing variants, then a judge picks the best (or
|
|
818
|
+
// synthesizes an aggregate). Combines the parallel fan-out with a gate-style
|
|
819
|
+
// verdict, expressed as a single declarative phase.
|
|
820
|
+
if (type === "tournament") {
|
|
821
|
+
const mode = (phase.mode ?? "best") as TournamentMode;
|
|
822
|
+
// Competitors: explicit `branches` win; otherwise N copies of `task`.
|
|
823
|
+
let competitors: Array<{ agent: string; task: string }>;
|
|
824
|
+
if (phase.branches && phase.branches.length > 0) {
|
|
825
|
+
competitors = phase.branches.map((b) => ({
|
|
826
|
+
agent: resolveAgent(b.agent ?? phase.agent, deps, state),
|
|
827
|
+
task: preRead + interpolate(b.task, ctx).text,
|
|
828
|
+
}));
|
|
829
|
+
} else {
|
|
830
|
+
const n = Math.max(2, Math.min(TOURNAMENT_HARD_MAX_VARIANTS, Math.floor(phase.variants ?? TOURNAMENT_DEFAULT_VARIANTS)));
|
|
831
|
+
const body = preRead + interpolate(phase.task ?? "", ctx).text;
|
|
832
|
+
competitors = Array.from({ length: n }, () => ({ agent: resolveAgent(phase.agent, deps, state), task: body }));
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
const results = await runFanout(competitors);
|
|
836
|
+
const ran = results.filter((r) => r.stopReason !== "budget-skipped");
|
|
837
|
+
const ok = ran.filter((r) => !isFailed(r));
|
|
838
|
+
const variantUsage = aggregateUsage(results.map((r) => r.usage));
|
|
839
|
+
// Winner numbers are 1-based over `ran` (exactly what the judge is shown).
|
|
840
|
+
// Using indexOf on the stable `ran` array is reference-based and correct even
|
|
841
|
+
// when two variants produce byte-identical output.
|
|
842
|
+
const ranIdx = (r: RunResult) => ran.indexOf(r) + 1;
|
|
843
|
+
|
|
844
|
+
// All competitors failed → the tournament fails (nothing to judge).
|
|
845
|
+
if (ok.length === 0) {
|
|
846
|
+
return {
|
|
847
|
+
id: phase.id,
|
|
848
|
+
status: "failed",
|
|
849
|
+
usage: variantUsage,
|
|
850
|
+
error: `tournament '${phase.id}': all ${competitors.length} variants failed`,
|
|
851
|
+
tournament: { variants: competitors.length, winner: 0, mode },
|
|
852
|
+
inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
|
|
853
|
+
endedAt: Date.now(),
|
|
854
|
+
};
|
|
855
|
+
}
|
|
856
|
+
// Only one competitor survived → no contest; it wins by default (skip judge).
|
|
857
|
+
if (ok.length === 1) {
|
|
858
|
+
return {
|
|
859
|
+
id: phase.id,
|
|
860
|
+
status: "done",
|
|
861
|
+
output: ok[0].output,
|
|
862
|
+
json: parseJson ? safeParse(ok[0].output) : undefined,
|
|
863
|
+
usage: variantUsage,
|
|
864
|
+
model: ok[0].model,
|
|
865
|
+
tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "only surviving variant" },
|
|
866
|
+
inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
|
|
867
|
+
endedAt: Date.now(),
|
|
868
|
+
};
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
// Build the judge prompt: label every variant output, then the rubric.
|
|
872
|
+
const labelled = ran
|
|
873
|
+
.map((r, i) => `### Variant ${i + 1}${isFailed(r) ? " (failed — ineligible)" : ""}\n\n${r.output}`)
|
|
874
|
+
.join("\n\n---\n\n");
|
|
875
|
+
const rubric =
|
|
876
|
+
interpolate(phase.judge ?? "", ctx).text.trim() ||
|
|
877
|
+
"You are judging competing answers to the same task. Pick the single best variant on correctness, completeness, and clarity.";
|
|
878
|
+
const directive =
|
|
879
|
+
mode === "best"
|
|
880
|
+
? `End your reply with a line exactly: WINNER: <number> (1–${ran.length}), choosing the strongest eligible variant.`
|
|
881
|
+
: `Synthesize the strongest possible answer by combining the best parts of the eligible variants. Then end with a line: WINNER: <number> indicating which variant contributed most.`;
|
|
882
|
+
const judgeTask = `${rubric}\n\nThe candidate variants:\n\n${labelled}\n\n${directive}`;
|
|
883
|
+
const judgeAgent = resolveAgent(phase.judgeAgent ?? phase.agent, deps, state);
|
|
884
|
+
const judgeRes = await runOne(judgeAgent, judgeTask, liveSink(state, phase.id, emitProgress));
|
|
885
|
+
const judgeUsage = aggregateUsage([variantUsage, judgeRes.usage]);
|
|
886
|
+
|
|
887
|
+
if (isFailed(judgeRes)) {
|
|
888
|
+
// Judge failed: fall back to the first eligible variant (fail-open, never
|
|
889
|
+
// lose the work). Report the variant we actually used, not a hardcoded 1.
|
|
890
|
+
return {
|
|
891
|
+
id: phase.id,
|
|
892
|
+
status: "done",
|
|
893
|
+
output: ok[0].output,
|
|
894
|
+
json: parseJson ? safeParse(ok[0].output) : undefined,
|
|
895
|
+
usage: judgeUsage,
|
|
896
|
+
model: ok[0].model,
|
|
897
|
+
warnings: [`judge failed (${judgeRes.errorMessage ?? "error"}); used variant ${ranIdx(ok[0])}`],
|
|
898
|
+
tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge failed" },
|
|
899
|
+
inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
|
|
900
|
+
endedAt: Date.now(),
|
|
901
|
+
};
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
const { winner, reason } = parseTournamentWinner(judgeRes.output, ran.length);
|
|
905
|
+
const winnerResult = ran[winner - 1];
|
|
906
|
+
const winnerIneligible = !winnerResult || isFailed(winnerResult);
|
|
907
|
+
// In 'best' mode the output is the winning variant verbatim; in 'aggregate'
|
|
908
|
+
// mode it is the judge's synthesized answer.
|
|
909
|
+
const chosen = winnerIneligible ? ok[0] : winnerResult;
|
|
910
|
+
const winnerIdx = ranIdx(chosen);
|
|
911
|
+
const output = mode === "aggregate" ? judgeRes.output : chosen.output;
|
|
912
|
+
return {
|
|
913
|
+
id: phase.id,
|
|
914
|
+
status: "done",
|
|
915
|
+
output,
|
|
916
|
+
json: parseJson ? safeParse(output) : undefined,
|
|
917
|
+
usage: judgeUsage,
|
|
918
|
+
model: mode === "aggregate" ? judgeRes.model : chosen.model,
|
|
919
|
+
warnings: winnerIneligible ? [`judge picked an ineligible variant; used variant ${winnerIdx}`] : undefined,
|
|
920
|
+
tournament: { variants: competitors.length, winner: winnerIdx, mode, reason },
|
|
921
|
+
inputHash: hashInput(phase.id, "tournament", String(competitors.length), mode),
|
|
922
|
+
endedAt: Date.now(),
|
|
923
|
+
};
|
|
622
924
|
}
|
|
623
925
|
|
|
624
926
|
return {
|
|
@@ -657,13 +959,89 @@ function lastCompletedOutput(state: RunState, phase: Phase): string | undefined
|
|
|
657
959
|
return undefined;
|
|
658
960
|
}
|
|
659
961
|
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
962
|
+
/**
|
|
963
|
+
* Per-phase cache policy resolved once at the top of executePhase. Carries the
|
|
964
|
+
* scope, optional TTL, and a pre-resolved fingerprint string so each phase-type
|
|
965
|
+
* branch can fold it into its inputHash and consult the cross-run store uniformly.
|
|
966
|
+
*/
|
|
967
|
+
interface PhaseCacheCtx {
|
|
968
|
+
scope: CacheScope;
|
|
969
|
+
ttlMs?: number;
|
|
970
|
+
fingerprint: string;
|
|
971
|
+
store: CacheStore;
|
|
972
|
+
prior: PhaseState | undefined;
|
|
973
|
+
phaseId: string;
|
|
974
|
+
flowName: string;
|
|
975
|
+
runId: string;
|
|
976
|
+
/** Per-phase execution config that materially affects subagent output and
|
|
977
|
+
* therefore must be part of the cache identity (else a config change could
|
|
978
|
+
* silently serve a stale cross-run hit). */
|
|
979
|
+
thinking?: string;
|
|
980
|
+
tools?: string[];
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
/** Fold the phase fingerprint into the base hash parts to form the final cache key. */
|
|
984
|
+
function cacheKey(cc: PhaseCacheCtx, baseParts: string[]): string {
|
|
985
|
+
// Fold the full cache identity into the hash: flow name (prevents collisions
|
|
986
|
+
// across different flows that share a phase.id + task + model), the per-phase
|
|
987
|
+
// thinking/tools config (changing either changes the subagent's output), and
|
|
988
|
+
// the resolved world-state fingerprint.
|
|
989
|
+
const parts = [`flow:${cc.flowName}`, ...baseParts, `think:${cc.thinking ?? ""}`, `tools:${JSON.stringify(cc.tools ?? [])}`];
|
|
990
|
+
return cc.fingerprint ? hashInput(...parts, cc.fingerprint) : hashInput(...parts);
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
/**
|
|
994
|
+
* Resume/memoization lookup. Honors scope:
|
|
995
|
+
* - "off": never reuse (even within-run).
|
|
996
|
+
* - "run-only": within-run resume only (historical behavior).
|
|
997
|
+
* - "cross-run": within-run first, then the persistent cross-run store.
|
|
998
|
+
* On a cross-run hit, usage is zeroed and `cacheHit` records the source.
|
|
999
|
+
*/
|
|
1000
|
+
function cachedPhase(cc: PhaseCacheCtx, inputHash: string): PhaseState | null {
|
|
1001
|
+
if (cc.scope === "off") return null;
|
|
1002
|
+
|
|
1003
|
+
// 1. within-run resume (fastest; always allowed unless scope is off)
|
|
1004
|
+
if (cc.prior && cc.prior.status === "done" && cc.prior.inputHash === inputHash) {
|
|
1005
|
+
return { ...cc.prior, status: "done" };
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
// 2. cross-run memoization (opt-in)
|
|
1009
|
+
if (cc.scope === "cross-run") {
|
|
1010
|
+
const e = cc.store.get(inputHash, cc.ttlMs);
|
|
1011
|
+
if (e) {
|
|
1012
|
+
return {
|
|
1013
|
+
id: cc.phaseId,
|
|
1014
|
+
status: "done",
|
|
1015
|
+
inputHash,
|
|
1016
|
+
output: e.output,
|
|
1017
|
+
json: e.json,
|
|
1018
|
+
model: e.model,
|
|
1019
|
+
usage: emptyUsage(),
|
|
1020
|
+
cacheHit: "cross-run",
|
|
1021
|
+
endedAt: Date.now(),
|
|
1022
|
+
};
|
|
1023
|
+
}
|
|
663
1024
|
}
|
|
664
1025
|
return null;
|
|
665
1026
|
}
|
|
666
1027
|
|
|
1028
|
+
/** Persist a freshly-computed phase result to the cross-run store (best-effort). */
|
|
1029
|
+
function recordCache(cc: PhaseCacheCtx, ps: PhaseState): void {
|
|
1030
|
+
if (cc.scope !== "cross-run") return;
|
|
1031
|
+
if (ps.status !== "done" || !ps.inputHash) return;
|
|
1032
|
+
if (ps.cacheHit) return; // don't re-store a value we just read from cache
|
|
1033
|
+
cc.store.put({
|
|
1034
|
+
key: ps.inputHash,
|
|
1035
|
+
createdAt: Date.now(),
|
|
1036
|
+
output: ps.output,
|
|
1037
|
+
json: ps.json,
|
|
1038
|
+
model: ps.model,
|
|
1039
|
+
flowName: cc.flowName,
|
|
1040
|
+
phaseId: cc.phaseId,
|
|
1041
|
+
runId: cc.runId,
|
|
1042
|
+
});
|
|
1043
|
+
}
|
|
1044
|
+
|
|
667
1045
|
/**
|
|
668
1046
|
* Resolve an agent name against available agents. Falls back to the default
|
|
669
1047
|
* agent if the requested agent isn't found, logging a warning via safeEmit.
|
|
@@ -722,6 +1100,29 @@ function asReason(v: unknown): string | undefined {
|
|
|
722
1100
|
return typeof v === "string" && v.trim() ? v.trim() : undefined;
|
|
723
1101
|
}
|
|
724
1102
|
|
|
1103
|
+
/**
|
|
1104
|
+
* Parse a judge's pick of the winning variant. Accepts JSON ({"winner":n} or
|
|
1105
|
+
* {"best":n}) or a `WINNER: n` line (last match wins). Clamps to [1, count].
|
|
1106
|
+
* Fail-open: an unreadable verdict defaults to variant 1 so the work is never
|
|
1107
|
+
* lost. Returns the 1-based index plus an optional reason.
|
|
1108
|
+
*/
|
|
1109
|
+
export function parseTournamentWinner(output: string, count: number): { winner: number; reason?: string } {
|
|
1110
|
+
const clamp = (n: number) => Math.min(Math.max(1, Math.floor(n)), Math.max(1, count));
|
|
1111
|
+
const json = safeParse(output);
|
|
1112
|
+
if (json && typeof json === "object") {
|
|
1113
|
+
const o = json as Record<string, unknown>;
|
|
1114
|
+
const raw = o.winner ?? o.best ?? o.choice;
|
|
1115
|
+
const n = typeof raw === "number" ? raw : typeof raw === "string" ? Number(raw) : NaN;
|
|
1116
|
+
if (Number.isFinite(n)) return { winner: clamp(n), reason: asReason(o.reason) };
|
|
1117
|
+
}
|
|
1118
|
+
const matches = [...output.matchAll(/WINNER\s*[:=]\s*#?\s*(\d+)/gi)];
|
|
1119
|
+
if (matches.length) {
|
|
1120
|
+
const n = Number(matches[matches.length - 1][1]);
|
|
1121
|
+
if (Number.isFinite(n)) return { winner: clamp(n) };
|
|
1122
|
+
}
|
|
1123
|
+
return { winner: 1, reason: "no parseable winner; defaulted to variant 1" };
|
|
1124
|
+
}
|
|
1125
|
+
|
|
725
1126
|
/**
|
|
726
1127
|
* Best-effort invocation of the user-provided `persist` + `onProgress` callbacks.
|
|
727
1128
|
*
|