pi-taskflow 0.0.12 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -23
- package/extensions/cache.ts +263 -0
- package/extensions/index.ts +147 -118
- package/extensions/init.ts +607 -0
- package/extensions/render.ts +39 -0
- package/extensions/runtime.ts +342 -17
- package/extensions/schema.ts +166 -1
- package/extensions/store.ts +16 -2
- package/package.json +4 -3
package/extensions/runtime.ts
CHANGED
|
@@ -13,11 +13,12 @@
|
|
|
13
13
|
import * as path from "node:path";
|
|
14
14
|
import * as fs from "node:fs";
|
|
15
15
|
import type { AgentConfig } from "./agents.ts";
|
|
16
|
-
import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse } from "./interpolate.ts";
|
|
16
|
+
import { coerceArray, evaluateCondition, interpolate, type InterpolationContext, safeParse, tryEvaluateCondition } from "./interpolate.ts";
|
|
17
17
|
import { isFailed, isTransientError, type LiveUpdate, mapWithConcurrencyLimit, runAgentTask, type RunResult } from "./runner.ts";
|
|
18
18
|
import { aggregateUsage, emptyUsage, type UsageStats } from "./usage.ts";
|
|
19
|
-
import { type Budget, dependenciesOf, finalPhase, type Phase, resolveArgs, type Taskflow, topoLayers } from "./schema.ts";
|
|
19
|
+
import { type Budget, type CacheScope, dependenciesOf, finalPhase, LOOP_DEFAULT_MAX_ITERATIONS, LOOP_HARD_MAX_ITERATIONS, parseTtlMs, type Phase, resolveArgs, type Taskflow, topoLayers, TOURNAMENT_DEFAULT_VARIANTS, TOURNAMENT_HARD_MAX_VARIANTS, type TournamentMode } from "./schema.ts";
|
|
20
20
|
import { hashInput, newRunId, type PhaseState, type RunState } from "./store.ts";
|
|
21
|
+
import { CacheStore, resolveFingerprint } from "./cache.ts";
|
|
21
22
|
|
|
22
23
|
/** A human-in-the-loop approval request raised by an `approval` phase. */
|
|
23
24
|
export interface ApprovalRequest {
|
|
@@ -49,6 +50,8 @@ export interface RuntimeDeps {
|
|
|
49
50
|
requestApproval?: (req: ApprovalRequest) => Promise<ApprovalDecision>;
|
|
50
51
|
/** Resolve a saved taskflow by name for `flow` (sub-workflow) phases. */
|
|
51
52
|
loadFlow?: (name: string) => Taskflow | undefined;
|
|
53
|
+
/** Cross-run memoization store. Omit to construct a default one for `deps.cwd`. */
|
|
54
|
+
cacheStore?: CacheStore;
|
|
52
55
|
/** Internal: sub-flow call stack, for recursion detection. */
|
|
53
56
|
_stack?: string[];
|
|
54
57
|
}
|
|
@@ -295,6 +298,23 @@ async function executePhase(
|
|
|
295
298
|
const ctx = buildInterpolationContext(state, previousOutput);
|
|
296
299
|
const preRead = await resolvePhaseContext(phase, ctx);
|
|
297
300
|
|
|
301
|
+
// Resolve this phase's cache policy once. Default scope is "run-only" (the
|
|
302
|
+
// historical within-run resume behavior). Only "cross-run" phases resolve a
|
|
303
|
+
// fingerprint and consult the persistent store.
|
|
304
|
+
const cacheScope: CacheScope = (phase.cache?.scope ?? "run-only") as CacheScope;
|
|
305
|
+
const cc: PhaseCacheCtx = {
|
|
306
|
+
scope: cacheScope,
|
|
307
|
+
ttlMs: phase.cache?.ttl ? (parseTtlMs(phase.cache.ttl) ?? undefined) : undefined,
|
|
308
|
+
fingerprint: cacheScope === "cross-run" ? resolveFingerprint(phase.cache?.fingerprint, phase.cwd ?? deps.cwd) : "",
|
|
309
|
+
store: deps.cacheStore ?? new CacheStore(deps.cwd),
|
|
310
|
+
prior,
|
|
311
|
+
phaseId: phase.id,
|
|
312
|
+
flowName: state.flowName,
|
|
313
|
+
runId: state.runId,
|
|
314
|
+
thinking: phase.thinking,
|
|
315
|
+
tools: phase.tools,
|
|
316
|
+
};
|
|
317
|
+
|
|
298
318
|
const baseRun = (agentName: string, task: string, onLive?: (l: LiveUpdate) => void) =>
|
|
299
319
|
run(
|
|
300
320
|
deps.cwd,
|
|
@@ -437,13 +457,14 @@ async function executePhase(
|
|
|
437
457
|
const { text } = interpolate(phase.task ?? "", ctx);
|
|
438
458
|
const fullTask = preRead + text;
|
|
439
459
|
const agentName = resolveAgent(phase.agent, deps, state);
|
|
440
|
-
const inputHash =
|
|
441
|
-
const cached = cachedPhase(
|
|
460
|
+
const inputHash = cacheKey(cc, [phase.id, agentName, phase.model ?? "", fullTask]);
|
|
461
|
+
const cached = cachedPhase(cc, inputHash);
|
|
442
462
|
if (cached) return cached;
|
|
443
463
|
|
|
444
464
|
const r = await runOne(agentName, fullTask, liveSink(state, phase.id, emitProgress));
|
|
445
465
|
const ps = resultToPhaseState(phase.id, r, inputHash, parseJson);
|
|
446
466
|
if (type === "gate" && ps.status === "done") ps.gate = parseGateVerdict(r.output);
|
|
467
|
+
recordCache(cc, ps);
|
|
447
468
|
return ps;
|
|
448
469
|
}
|
|
449
470
|
|
|
@@ -455,12 +476,14 @@ async function executePhase(
|
|
|
455
476
|
task: preRead + r.text,
|
|
456
477
|
};
|
|
457
478
|
});
|
|
458
|
-
const inputHash =
|
|
459
|
-
const cached = cachedPhase(
|
|
479
|
+
const inputHash = cacheKey(cc, [phase.id, phase.model ?? "", JSON.stringify(branches)]);
|
|
480
|
+
const cached = cachedPhase(cc, inputHash);
|
|
460
481
|
if (cached) return cached;
|
|
461
482
|
|
|
462
483
|
const results = await runFanout(branches);
|
|
463
|
-
|
|
484
|
+
const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
|
|
485
|
+
recordCache(cc, ps);
|
|
486
|
+
return ps;
|
|
464
487
|
}
|
|
465
488
|
|
|
466
489
|
if (type === "map") {
|
|
@@ -485,19 +508,21 @@ async function executePhase(
|
|
|
485
508
|
task: preRead + interpolate(phase.task ?? "", localCtx).text,
|
|
486
509
|
};
|
|
487
510
|
});
|
|
488
|
-
const inputHash =
|
|
489
|
-
const cached = cachedPhase(
|
|
511
|
+
const inputHash = cacheKey(cc, [phase.id, phase.model ?? "", JSON.stringify(tasks)]);
|
|
512
|
+
const cached = cachedPhase(cc, inputHash);
|
|
490
513
|
if (cached) return cached;
|
|
491
514
|
|
|
492
515
|
const results = await runFanout(tasks);
|
|
493
|
-
|
|
516
|
+
const ps = mergePhaseState(phase.id, results, inputHash, parseJson);
|
|
517
|
+
recordCache(cc, ps);
|
|
518
|
+
return ps;
|
|
494
519
|
}
|
|
495
520
|
|
|
496
521
|
if (type === "approval") {
|
|
497
522
|
const ctx = buildInterpolationContext(state, previousOutput);
|
|
498
523
|
const message = interpolate(phase.task ?? "Approve to continue?", ctx).text;
|
|
499
524
|
const inputHash = hashInput(phase.id, phase.model ?? "", "approval", message);
|
|
500
|
-
const cached = cachedPhase(
|
|
525
|
+
const cached = cachedPhase(cc, inputHash);
|
|
501
526
|
if (cached) return cached;
|
|
502
527
|
|
|
503
528
|
// Non-interactive (headless/CI/tests): auto-approve, fail-open, but record it.
|
|
@@ -547,8 +572,8 @@ async function executePhase(
|
|
|
547
572
|
provided[k] = typeof v === "string" ? interpolate(v, ctx).text : v;
|
|
548
573
|
}
|
|
549
574
|
const subArgs = resolveArgs(subDef, provided);
|
|
550
|
-
const inputHash =
|
|
551
|
-
const cached = cachedPhase(
|
|
575
|
+
const inputHash = cacheKey(cc, [phase.id, `flow:${name}`, preRead, JSON.stringify(subArgs)]);
|
|
576
|
+
const cached = cachedPhase(cc, inputHash);
|
|
552
577
|
if (cached) return cached;
|
|
553
578
|
|
|
554
579
|
const live = state.phases[phase.id];
|
|
@@ -600,7 +625,7 @@ async function executePhase(
|
|
|
600
625
|
},
|
|
601
626
|
});
|
|
602
627
|
const sp = Object.values(subState.phases);
|
|
603
|
-
|
|
628
|
+
const flowPs: PhaseState = {
|
|
604
629
|
id: phase.id,
|
|
605
630
|
status: subResult.ok ? "done" : "failed",
|
|
606
631
|
output: subResult.finalOutput,
|
|
@@ -619,6 +644,207 @@ async function executePhase(
|
|
|
619
644
|
inputHash,
|
|
620
645
|
endedAt: Date.now(),
|
|
621
646
|
};
|
|
647
|
+
recordCache(cc, flowPs);
|
|
648
|
+
return flowPs;
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
// loop-until-done: run the body repeatedly until `until` is truthy, the output
|
|
652
|
+
// converges to a fixed point, or maxIterations is hit (always terminates).
|
|
653
|
+
if (type === "loop") {
|
|
654
|
+
const agentName = resolveAgent(phase.agent, deps, state);
|
|
655
|
+
const rawMax = phase.maxIterations ?? LOOP_DEFAULT_MAX_ITERATIONS;
|
|
656
|
+
const maxIters = Math.max(1, Math.min(LOOP_HARD_MAX_ITERATIONS, Math.floor(rawMax)));
|
|
657
|
+
const convergence = phase.convergence ?? true;
|
|
658
|
+
|
|
659
|
+
const usages: UsageStats[] = [];
|
|
660
|
+
const loopWarnings: string[] = [];
|
|
661
|
+
let lastOutput = "";
|
|
662
|
+
let prevOutput: string | undefined;
|
|
663
|
+
let iterations = 0;
|
|
664
|
+
let stop: NonNullable<PhaseState["loop"]>["stop"] = "maxIterations";
|
|
665
|
+
let failedResult: RunResult | undefined;
|
|
666
|
+
|
|
667
|
+
for (let i = 1; i <= maxIters; i++) {
|
|
668
|
+
if (deps.signal?.aborted) {
|
|
669
|
+
stop = "failed";
|
|
670
|
+
break;
|
|
671
|
+
}
|
|
672
|
+
iterations = i;
|
|
673
|
+
// The body sees its iteration number and the prior iteration's output.
|
|
674
|
+
const bodyCtx = buildInterpolationContext(state, previousOutput, {
|
|
675
|
+
loop: { iteration: i, lastOutput, maxIterations: maxIters },
|
|
676
|
+
});
|
|
677
|
+
const body = preRead + interpolate(phase.task ?? "", bodyCtx).text;
|
|
678
|
+
const r = await runOne(agentName, body, liveSink(state, phase.id, emitProgress));
|
|
679
|
+
usages.push(r.usage);
|
|
680
|
+
if (isFailed(r)) {
|
|
681
|
+
failedResult = r;
|
|
682
|
+
stop = "failed";
|
|
683
|
+
break;
|
|
684
|
+
}
|
|
685
|
+
prevOutput = lastOutput;
|
|
686
|
+
lastOutput = r.output;
|
|
687
|
+
|
|
688
|
+
// Expose this iteration's output as {steps.<thisId>.output|json} so the
|
|
689
|
+
// `until` condition can inspect it (e.g. "{steps.refine.json.done}==true").
|
|
690
|
+
// Loop locals ({loop.iteration} etc.) are available to the condition too.
|
|
691
|
+
const untilCtx = buildInterpolationContext(state, previousOutput, {
|
|
692
|
+
loop: { iteration: i, lastOutput, maxIterations: maxIters },
|
|
693
|
+
});
|
|
694
|
+
untilCtx.steps[phase.id] = { output: lastOutput, json: safeParse(lastOutput) };
|
|
695
|
+
const { value: done, error: condErr } = tryEvaluateCondition(phase.until ?? "", untilCtx);
|
|
696
|
+
// A malformed condition must not spin forever: stop and surface a warning
|
|
697
|
+
// so the author learns the `until` never actually evaluated.
|
|
698
|
+
if (condErr) {
|
|
699
|
+
loopWarnings.push(`loop 'until' could not be evaluated (stopped early): ${condErr}`);
|
|
700
|
+
stop = "until";
|
|
701
|
+
break;
|
|
702
|
+
}
|
|
703
|
+
if (done) {
|
|
704
|
+
stop = "until";
|
|
705
|
+
break;
|
|
706
|
+
}
|
|
707
|
+
// Fixed-point convergence: identical consecutive output ⇒ further work is wasted.
|
|
708
|
+
if (convergence && prevOutput !== undefined && prevOutput === lastOutput) {
|
|
709
|
+
stop = "converged";
|
|
710
|
+
break;
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
const aggUsage = usages.length ? aggregateUsage(usages) : emptyUsage();
|
|
715
|
+
if (failedResult) {
|
|
716
|
+
return {
|
|
717
|
+
id: phase.id,
|
|
718
|
+
status: "failed",
|
|
719
|
+
output: lastOutput || undefined,
|
|
720
|
+
usage: aggUsage,
|
|
721
|
+
error: failedResult.errorMessage || failedResult.stderr || `loop '${phase.id}' iteration ${iterations} failed`,
|
|
722
|
+
loop: { iterations, stop: "failed" },
|
|
723
|
+
warnings: loopWarnings.length ? loopWarnings : undefined,
|
|
724
|
+
inputHash: hashInput(phase.id, "loop", phase.until ?? ""),
|
|
725
|
+
endedAt: Date.now(),
|
|
726
|
+
};
|
|
727
|
+
}
|
|
728
|
+
return {
|
|
729
|
+
id: phase.id,
|
|
730
|
+
status: "done",
|
|
731
|
+
output: lastOutput,
|
|
732
|
+
json: parseJson ? safeParse(lastOutput) : undefined,
|
|
733
|
+
usage: aggUsage,
|
|
734
|
+
loop: { iterations, stop },
|
|
735
|
+
warnings: loopWarnings.length ? loopWarnings : undefined,
|
|
736
|
+
inputHash: hashInput(phase.id, "loop", phase.until ?? "", String(iterations)),
|
|
737
|
+
endedAt: Date.now(),
|
|
738
|
+
};
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
// tournament: spawn N competing variants, then a judge picks the best (or
|
|
742
|
+
// synthesizes an aggregate). Combines the parallel fan-out with a gate-style
|
|
743
|
+
// verdict, expressed as a single declarative phase.
|
|
744
|
+
if (type === "tournament") {
|
|
745
|
+
const mode = (phase.mode ?? "best") as TournamentMode;
|
|
746
|
+
// Competitors: explicit `branches` win; otherwise N copies of `task`.
|
|
747
|
+
let competitors: Array<{ agent: string; task: string }>;
|
|
748
|
+
if (phase.branches && phase.branches.length > 0) {
|
|
749
|
+
competitors = phase.branches.map((b) => ({
|
|
750
|
+
agent: resolveAgent(b.agent ?? phase.agent, deps, state),
|
|
751
|
+
task: preRead + interpolate(b.task, ctx).text,
|
|
752
|
+
}));
|
|
753
|
+
} else {
|
|
754
|
+
const n = Math.max(2, Math.min(TOURNAMENT_HARD_MAX_VARIANTS, Math.floor(phase.variants ?? TOURNAMENT_DEFAULT_VARIANTS)));
|
|
755
|
+
const body = preRead + interpolate(phase.task ?? "", ctx).text;
|
|
756
|
+
competitors = Array.from({ length: n }, () => ({ agent: resolveAgent(phase.agent, deps, state), task: body }));
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
const results = await runFanout(competitors);
|
|
760
|
+
const ran = results.filter((r) => r.stopReason !== "budget-skipped");
|
|
761
|
+
const ok = ran.filter((r) => !isFailed(r));
|
|
762
|
+
const variantUsage = aggregateUsage(results.map((r) => r.usage));
|
|
763
|
+
// Winner numbers are 1-based over `ran` (exactly what the judge is shown).
|
|
764
|
+
// Using indexOf on the stable `ran` array is reference-based and correct even
|
|
765
|
+
// when two variants produce byte-identical output.
|
|
766
|
+
const ranIdx = (r: RunResult) => ran.indexOf(r) + 1;
|
|
767
|
+
|
|
768
|
+
// All competitors failed → the tournament fails (nothing to judge).
|
|
769
|
+
if (ok.length === 0) {
|
|
770
|
+
return {
|
|
771
|
+
id: phase.id,
|
|
772
|
+
status: "failed",
|
|
773
|
+
usage: variantUsage,
|
|
774
|
+
error: `tournament '${phase.id}': all ${competitors.length} variants failed`,
|
|
775
|
+
tournament: { variants: competitors.length, winner: 0, mode },
|
|
776
|
+
inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
|
|
777
|
+
endedAt: Date.now(),
|
|
778
|
+
};
|
|
779
|
+
}
|
|
780
|
+
// Only one competitor survived → no contest; it wins by default (skip judge).
|
|
781
|
+
if (ok.length === 1) {
|
|
782
|
+
return {
|
|
783
|
+
id: phase.id,
|
|
784
|
+
status: "done",
|
|
785
|
+
output: ok[0].output,
|
|
786
|
+
json: parseJson ? safeParse(ok[0].output) : undefined,
|
|
787
|
+
usage: variantUsage,
|
|
788
|
+
model: ok[0].model,
|
|
789
|
+
tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "only surviving variant" },
|
|
790
|
+
inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
|
|
791
|
+
endedAt: Date.now(),
|
|
792
|
+
};
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
// Build the judge prompt: label every variant output, then the rubric.
|
|
796
|
+
const labelled = ran
|
|
797
|
+
.map((r, i) => `### Variant ${i + 1}${isFailed(r) ? " (failed — ineligible)" : ""}\n\n${r.output}`)
|
|
798
|
+
.join("\n\n---\n\n");
|
|
799
|
+
const rubric =
|
|
800
|
+
interpolate(phase.judge ?? "", ctx).text.trim() ||
|
|
801
|
+
"You are judging competing answers to the same task. Pick the single best variant on correctness, completeness, and clarity.";
|
|
802
|
+
const directive =
|
|
803
|
+
mode === "best"
|
|
804
|
+
? `End your reply with a line exactly: WINNER: <number> (1–${ran.length}), choosing the strongest eligible variant.`
|
|
805
|
+
: `Synthesize the strongest possible answer by combining the best parts of the eligible variants. Then end with a line: WINNER: <number> indicating which variant contributed most.`;
|
|
806
|
+
const judgeTask = `${rubric}\n\nThe candidate variants:\n\n${labelled}\n\n${directive}`;
|
|
807
|
+
const judgeAgent = resolveAgent(phase.judgeAgent ?? phase.agent, deps, state);
|
|
808
|
+
const judgeRes = await runOne(judgeAgent, judgeTask, liveSink(state, phase.id, emitProgress));
|
|
809
|
+
const judgeUsage = aggregateUsage([variantUsage, judgeRes.usage]);
|
|
810
|
+
|
|
811
|
+
if (isFailed(judgeRes)) {
|
|
812
|
+
// Judge failed: fall back to the first eligible variant (fail-open, never
|
|
813
|
+
// lose the work). Report the variant we actually used, not a hardcoded 1.
|
|
814
|
+
return {
|
|
815
|
+
id: phase.id,
|
|
816
|
+
status: "done",
|
|
817
|
+
output: ok[0].output,
|
|
818
|
+
json: parseJson ? safeParse(ok[0].output) : undefined,
|
|
819
|
+
usage: judgeUsage,
|
|
820
|
+
model: ok[0].model,
|
|
821
|
+
warnings: [`judge failed (${judgeRes.errorMessage ?? "error"}); used variant ${ranIdx(ok[0])}`],
|
|
822
|
+
tournament: { variants: competitors.length, winner: ranIdx(ok[0]), mode, reason: "judge failed" },
|
|
823
|
+
inputHash: hashInput(phase.id, "tournament", String(competitors.length)),
|
|
824
|
+
endedAt: Date.now(),
|
|
825
|
+
};
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
const { winner, reason } = parseTournamentWinner(judgeRes.output, ran.length);
|
|
829
|
+
const winnerResult = ran[winner - 1];
|
|
830
|
+
const winnerIneligible = !winnerResult || isFailed(winnerResult);
|
|
831
|
+
// In 'best' mode the output is the winning variant verbatim; in 'aggregate'
|
|
832
|
+
// mode it is the judge's synthesized answer.
|
|
833
|
+
const chosen = winnerIneligible ? ok[0] : winnerResult;
|
|
834
|
+
const winnerIdx = ranIdx(chosen);
|
|
835
|
+
const output = mode === "aggregate" ? judgeRes.output : chosen.output;
|
|
836
|
+
return {
|
|
837
|
+
id: phase.id,
|
|
838
|
+
status: "done",
|
|
839
|
+
output,
|
|
840
|
+
json: parseJson ? safeParse(output) : undefined,
|
|
841
|
+
usage: judgeUsage,
|
|
842
|
+
model: mode === "aggregate" ? judgeRes.model : chosen.model,
|
|
843
|
+
warnings: winnerIneligible ? [`judge picked an ineligible variant; used variant ${winnerIdx}`] : undefined,
|
|
844
|
+
tournament: { variants: competitors.length, winner: winnerIdx, mode, reason },
|
|
845
|
+
inputHash: hashInput(phase.id, "tournament", String(competitors.length), mode),
|
|
846
|
+
endedAt: Date.now(),
|
|
847
|
+
};
|
|
622
848
|
}
|
|
623
849
|
|
|
624
850
|
return {
|
|
@@ -657,13 +883,89 @@ function lastCompletedOutput(state: RunState, phase: Phase): string | undefined
|
|
|
657
883
|
return undefined;
|
|
658
884
|
}
|
|
659
885
|
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
886
|
+
/**
|
|
887
|
+
* Per-phase cache policy resolved once at the top of executePhase. Carries the
|
|
888
|
+
* scope, optional TTL, and a pre-resolved fingerprint string so each phase-type
|
|
889
|
+
* branch can fold it into its inputHash and consult the cross-run store uniformly.
|
|
890
|
+
*/
|
|
891
|
+
interface PhaseCacheCtx {
|
|
892
|
+
scope: CacheScope;
|
|
893
|
+
ttlMs?: number;
|
|
894
|
+
fingerprint: string;
|
|
895
|
+
store: CacheStore;
|
|
896
|
+
prior: PhaseState | undefined;
|
|
897
|
+
phaseId: string;
|
|
898
|
+
flowName: string;
|
|
899
|
+
runId: string;
|
|
900
|
+
/** Per-phase execution config that materially affects subagent output and
|
|
901
|
+
* therefore must be part of the cache identity (else a config change could
|
|
902
|
+
* silently serve a stale cross-run hit). */
|
|
903
|
+
thinking?: string;
|
|
904
|
+
tools?: string[];
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
/** Fold the phase fingerprint into the base hash parts to form the final cache key. */
|
|
908
|
+
function cacheKey(cc: PhaseCacheCtx, baseParts: string[]): string {
|
|
909
|
+
// Fold the full cache identity into the hash: flow name (prevents collisions
|
|
910
|
+
// across different flows that share a phase.id + task + model), the per-phase
|
|
911
|
+
// thinking/tools config (changing either changes the subagent's output), and
|
|
912
|
+
// the resolved world-state fingerprint.
|
|
913
|
+
const parts = [`flow:${cc.flowName}`, ...baseParts, `think:${cc.thinking ?? ""}`, `tools:${JSON.stringify(cc.tools ?? [])}`];
|
|
914
|
+
return cc.fingerprint ? hashInput(...parts, cc.fingerprint) : hashInput(...parts);
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
/**
|
|
918
|
+
* Resume/memoization lookup. Honors scope:
|
|
919
|
+
* - "off": never reuse (even within-run).
|
|
920
|
+
* - "run-only": within-run resume only (historical behavior).
|
|
921
|
+
* - "cross-run": within-run first, then the persistent cross-run store.
|
|
922
|
+
* On a cross-run hit, usage is zeroed and `cacheHit` records the source.
|
|
923
|
+
*/
|
|
924
|
+
function cachedPhase(cc: PhaseCacheCtx, inputHash: string): PhaseState | null {
|
|
925
|
+
if (cc.scope === "off") return null;
|
|
926
|
+
|
|
927
|
+
// 1. within-run resume (fastest; always allowed unless scope is off)
|
|
928
|
+
if (cc.prior && cc.prior.status === "done" && cc.prior.inputHash === inputHash) {
|
|
929
|
+
return { ...cc.prior, status: "done" };
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
// 2. cross-run memoization (opt-in)
|
|
933
|
+
if (cc.scope === "cross-run") {
|
|
934
|
+
const e = cc.store.get(inputHash, cc.ttlMs);
|
|
935
|
+
if (e) {
|
|
936
|
+
return {
|
|
937
|
+
id: cc.phaseId,
|
|
938
|
+
status: "done",
|
|
939
|
+
inputHash,
|
|
940
|
+
output: e.output,
|
|
941
|
+
json: e.json,
|
|
942
|
+
model: e.model,
|
|
943
|
+
usage: emptyUsage(),
|
|
944
|
+
cacheHit: "cross-run",
|
|
945
|
+
endedAt: Date.now(),
|
|
946
|
+
};
|
|
947
|
+
}
|
|
663
948
|
}
|
|
664
949
|
return null;
|
|
665
950
|
}
|
|
666
951
|
|
|
952
|
+
/** Persist a freshly-computed phase result to the cross-run store (best-effort). */
|
|
953
|
+
function recordCache(cc: PhaseCacheCtx, ps: PhaseState): void {
|
|
954
|
+
if (cc.scope !== "cross-run") return;
|
|
955
|
+
if (ps.status !== "done" || !ps.inputHash) return;
|
|
956
|
+
if (ps.cacheHit) return; // don't re-store a value we just read from cache
|
|
957
|
+
cc.store.put({
|
|
958
|
+
key: ps.inputHash,
|
|
959
|
+
createdAt: Date.now(),
|
|
960
|
+
output: ps.output,
|
|
961
|
+
json: ps.json,
|
|
962
|
+
model: ps.model,
|
|
963
|
+
flowName: cc.flowName,
|
|
964
|
+
phaseId: cc.phaseId,
|
|
965
|
+
runId: cc.runId,
|
|
966
|
+
});
|
|
967
|
+
}
|
|
968
|
+
|
|
667
969
|
/**
|
|
668
970
|
* Resolve an agent name against available agents. Falls back to the default
|
|
669
971
|
* agent if the requested agent isn't found, logging a warning via safeEmit.
|
|
@@ -722,6 +1024,29 @@ function asReason(v: unknown): string | undefined {
|
|
|
722
1024
|
return typeof v === "string" && v.trim() ? v.trim() : undefined;
|
|
723
1025
|
}
|
|
724
1026
|
|
|
1027
|
+
/**
|
|
1028
|
+
* Parse a judge's pick of the winning variant. Accepts JSON ({"winner":n} or
|
|
1029
|
+
* {"best":n}) or a `WINNER: n` line (last match wins). Clamps to [1, count].
|
|
1030
|
+
* Fail-open: an unreadable verdict defaults to variant 1 so the work is never
|
|
1031
|
+
* lost. Returns the 1-based index plus an optional reason.
|
|
1032
|
+
*/
|
|
1033
|
+
export function parseTournamentWinner(output: string, count: number): { winner: number; reason?: string } {
|
|
1034
|
+
const clamp = (n: number) => Math.min(Math.max(1, Math.floor(n)), Math.max(1, count));
|
|
1035
|
+
const json = safeParse(output);
|
|
1036
|
+
if (json && typeof json === "object") {
|
|
1037
|
+
const o = json as Record<string, unknown>;
|
|
1038
|
+
const raw = o.winner ?? o.best ?? o.choice;
|
|
1039
|
+
const n = typeof raw === "number" ? raw : typeof raw === "string" ? Number(raw) : NaN;
|
|
1040
|
+
if (Number.isFinite(n)) return { winner: clamp(n), reason: asReason(o.reason) };
|
|
1041
|
+
}
|
|
1042
|
+
const matches = [...output.matchAll(/WINNER\s*[:=]\s*#?\s*(\d+)/gi)];
|
|
1043
|
+
if (matches.length) {
|
|
1044
|
+
const n = Number(matches[matches.length - 1][1]);
|
|
1045
|
+
if (Number.isFinite(n)) return { winner: clamp(n) };
|
|
1046
|
+
}
|
|
1047
|
+
return { winner: 1, reason: "no parseable winner; defaulted to variant 1" };
|
|
1048
|
+
}
|
|
1049
|
+
|
|
725
1050
|
/**
|
|
726
1051
|
* Best-effort invocation of the user-provided `persist` + `onProgress` callbacks.
|
|
727
1052
|
*
|
package/extensions/schema.ts
CHANGED
|
@@ -13,11 +13,27 @@ import { Type, type Static } from "typebox";
|
|
|
13
13
|
// Phase types
|
|
14
14
|
// ---------------------------------------------------------------------------
|
|
15
15
|
|
|
16
|
-
export const PHASE_TYPES = ["agent", "parallel", "map", "gate", "reduce", "approval", "flow"] as const;
|
|
16
|
+
export const PHASE_TYPES = ["agent", "parallel", "map", "gate", "reduce", "approval", "flow", "loop", "tournament"] as const;
|
|
17
17
|
export type PhaseType = (typeof PHASE_TYPES)[number];
|
|
18
18
|
|
|
19
|
+
/** Loop iteration bounds. Authors may lower the max; the hard cap is a runaway guard. */
|
|
20
|
+
export const LOOP_DEFAULT_MAX_ITERATIONS = 10;
|
|
21
|
+
export const LOOP_HARD_MAX_ITERATIONS = 100;
|
|
22
|
+
|
|
23
|
+
/** Tournament competitor bounds. */
|
|
24
|
+
export const TOURNAMENT_DEFAULT_VARIANTS = 3;
|
|
25
|
+
export const TOURNAMENT_HARD_MAX_VARIANTS = 20;
|
|
26
|
+
export const TOURNAMENT_MODES = ["best", "aggregate"] as const;
|
|
27
|
+
export type TournamentMode = (typeof TOURNAMENT_MODES)[number];
|
|
28
|
+
|
|
19
29
|
export const OUTPUT_FORMATS = ["text", "json"] as const;
|
|
20
30
|
export const JOIN_MODES = ["all", "any"] as const;
|
|
31
|
+
export const CACHE_SCOPES = ["run-only", "cross-run", "off"] as const;
|
|
32
|
+
export type CacheScope = (typeof CACHE_SCOPES)[number];
|
|
33
|
+
/** Allowed fingerprint entry prefixes. `glob!:` = content-hash variant of `glob:`. */
|
|
34
|
+
export const CACHE_FINGERPRINT_PREFIXES = ["git:", "glob:", "glob!:", "file:", "env:"] as const;
|
|
35
|
+
/** Phase types that must NOT be cached across runs (a fresh result is required each run). */
|
|
36
|
+
export const CACHE_CROSS_RUN_BLOCKED_TYPES = ["gate", "approval", "loop", "tournament"] as const;
|
|
21
37
|
|
|
22
38
|
const ParallelTaskSchema = Type.Object(
|
|
23
39
|
{
|
|
@@ -39,6 +55,36 @@ const RetrySchema = Type.Object(
|
|
|
39
55
|
{ additionalProperties: false },
|
|
40
56
|
);
|
|
41
57
|
|
|
58
|
+
/**
|
|
59
|
+
* Per-phase cache policy. Defaults to `run-only` which is exactly the historical
|
|
60
|
+
* behavior (within-run resume only). `cross-run` opts a phase into the persistent
|
|
61
|
+
* cross-run memoization store; see docs/rfc-cross-run-memoization.md.
|
|
62
|
+
*/
|
|
63
|
+
const CacheSchema = Type.Object(
|
|
64
|
+
{
|
|
65
|
+
scope: Type.Optional(
|
|
66
|
+
StringEnum(CACHE_SCOPES, {
|
|
67
|
+
description:
|
|
68
|
+
"Cache reuse scope. 'run-only' (default) = within-run resume only (historical behavior); 'cross-run' = reuse identical-input results from any prior run; 'off' = never reuse (even within-run).",
|
|
69
|
+
default: "run-only",
|
|
70
|
+
}),
|
|
71
|
+
),
|
|
72
|
+
ttl: Type.Optional(
|
|
73
|
+
Type.String({
|
|
74
|
+
description:
|
|
75
|
+
"Max cache age before a cross-run hit is treated as a miss, e.g. '30m', '6h', '7d'. Omit for no time bound.",
|
|
76
|
+
}),
|
|
77
|
+
),
|
|
78
|
+
fingerprint: Type.Optional(
|
|
79
|
+
Type.Array(Type.String(), {
|
|
80
|
+
description:
|
|
81
|
+
"Extra freshness inputs folded into the cache key so 'the world changed' becomes a cache miss. Each entry: 'git:HEAD' | 'glob:<pattern>' | 'glob!:<pattern>' (content-hash) | 'file:<path>' | 'env:<NAME>'.",
|
|
82
|
+
}),
|
|
83
|
+
),
|
|
84
|
+
},
|
|
85
|
+
{ additionalProperties: false },
|
|
86
|
+
);
|
|
87
|
+
|
|
42
88
|
/** Run-wide cost / token ceiling. Exceeding it halts the run (remaining phases skipped). */
|
|
43
89
|
const BudgetSchema = Type.Object(
|
|
44
90
|
{
|
|
@@ -79,6 +125,51 @@ const PhaseSchema = Type.Object(
|
|
|
79
125
|
}),
|
|
80
126
|
),
|
|
81
127
|
|
|
128
|
+
// loop-until-done
|
|
129
|
+
until: Type.Optional(
|
|
130
|
+
Type.String({
|
|
131
|
+
description:
|
|
132
|
+
"[loop] Stop condition evaluated after each iteration. The iteration's output is exposed as {steps.<thisId>.output}/.json. Supports the same operators as `when`. The loop stops when this is truthy, on convergence, or at maxIterations. A parse error stops the loop (fail-safe).",
|
|
133
|
+
}),
|
|
134
|
+
),
|
|
135
|
+
maxIterations: Type.Optional(
|
|
136
|
+
Type.Number({
|
|
137
|
+
description: `[loop] Hard cap on iterations (default ${LOOP_DEFAULT_MAX_ITERATIONS}, max ${LOOP_HARD_MAX_ITERATIONS}). The loop always terminates within this bound even if 'until' never becomes truthy.`,
|
|
138
|
+
default: LOOP_DEFAULT_MAX_ITERATIONS,
|
|
139
|
+
}),
|
|
140
|
+
),
|
|
141
|
+
convergence: Type.Optional(
|
|
142
|
+
Type.Boolean({
|
|
143
|
+
description:
|
|
144
|
+
"[loop] When true (default), stop early if an iteration's output is identical to the previous one (a fixed point — further iterations would not change anything).",
|
|
145
|
+
default: true,
|
|
146
|
+
}),
|
|
147
|
+
),
|
|
148
|
+
|
|
149
|
+
// tournament: N variants compete, a judge picks the best (or aggregates)
|
|
150
|
+
variants: Type.Optional(
|
|
151
|
+
Type.Number({
|
|
152
|
+
description: `[tournament] Number of competing variants to spawn from 'task' (default ${TOURNAMENT_DEFAULT_VARIANTS}, max ${TOURNAMENT_HARD_MAX_VARIANTS}). Ignored when 'branches' is provided (those become the variants instead).`,
|
|
153
|
+
default: TOURNAMENT_DEFAULT_VARIANTS,
|
|
154
|
+
}),
|
|
155
|
+
),
|
|
156
|
+
judge: Type.Optional(
|
|
157
|
+
Type.String({
|
|
158
|
+
description:
|
|
159
|
+
"[tournament] Judge prompt. The numbered variant outputs are injected before it. To pick a winner, end with a line like 'WINNER: <n>' or return JSON {\"winner\": <n>}. Defaults to a sensible built-in rubric.",
|
|
160
|
+
}),
|
|
161
|
+
),
|
|
162
|
+
judgeAgent: Type.Optional(
|
|
163
|
+
Type.String({ description: "[tournament] Agent that runs the judge step (defaults to the phase 'agent')." }),
|
|
164
|
+
),
|
|
165
|
+
mode: Type.Optional(
|
|
166
|
+
StringEnum(TOURNAMENT_MODES, {
|
|
167
|
+
description:
|
|
168
|
+
"[tournament] 'best' (default): output is the winning variant verbatim. 'aggregate': output is the judge's synthesized answer combining the variants.",
|
|
169
|
+
default: "best",
|
|
170
|
+
}),
|
|
171
|
+
),
|
|
172
|
+
|
|
82
173
|
dependsOn: Type.Optional(Type.Array(Type.String(), { description: "Phase ids this phase depends on" })),
|
|
83
174
|
join: Type.Optional(
|
|
84
175
|
StringEnum(JOIN_MODES, {
|
|
@@ -115,6 +206,7 @@ const PhaseSchema = Type.Object(
|
|
|
115
206
|
default: 8000,
|
|
116
207
|
}),
|
|
117
208
|
),
|
|
209
|
+
cache: Type.Optional(CacheSchema),
|
|
118
210
|
},
|
|
119
211
|
{ additionalProperties: false },
|
|
120
212
|
);
|
|
@@ -157,6 +249,7 @@ export type Taskflow = Static<typeof TaskflowSchema>;
|
|
|
157
249
|
export type ArgSpec = Static<typeof ArgSpecSchema>;
|
|
158
250
|
export type RetryPolicy = Static<typeof RetrySchema>;
|
|
159
251
|
export type Budget = Static<typeof BudgetSchema>;
|
|
252
|
+
export type CachePolicy = Static<typeof CacheSchema>;
|
|
160
253
|
export type JoinMode = (typeof JOIN_MODES)[number];
|
|
161
254
|
|
|
162
255
|
// ---------------------------------------------------------------------------
|
|
@@ -260,6 +353,21 @@ export interface ValidationResult {
|
|
|
260
353
|
warnings: string[];
|
|
261
354
|
}
|
|
262
355
|
|
|
356
|
+
/**
|
|
357
|
+
* Parse a TTL string like '30m', '6h', '7d', '500ms', '90s' into milliseconds.
|
|
358
|
+
* Returns null for malformed or non-positive values. Plain integers = ms.
|
|
359
|
+
*/
|
|
360
|
+
export function parseTtlMs(ttl: string): number | null {
|
|
361
|
+
if (typeof ttl !== "string") return null;
|
|
362
|
+
const m = ttl.trim().match(/^(\d+(?:\.\d+)?)\s*(ms|s|m|h|d)?$/i);
|
|
363
|
+
if (!m) return null;
|
|
364
|
+
const n = Number(m[1]);
|
|
365
|
+
if (!Number.isFinite(n) || n <= 0) return null;
|
|
366
|
+
const unit = (m[2] ?? "ms").toLowerCase();
|
|
367
|
+
const mult: Record<string, number> = { ms: 1, s: 1000, m: 60_000, h: 3_600_000, d: 86_400_000 };
|
|
368
|
+
return n * mult[unit];
|
|
369
|
+
}
|
|
370
|
+
|
|
263
371
|
export interface ValidationOptions {
|
|
264
372
|
/** Resolved invocation args, used for runtime checks like missing `{args.X}`. */
|
|
265
373
|
args?: Record<string, unknown>;
|
|
@@ -320,6 +428,36 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
|
|
|
320
428
|
if (type === "flow") {
|
|
321
429
|
if (!p.use) errors.push(`Phase '${p.id}' (flow) requires 'use' (a saved flow name)`);
|
|
322
430
|
}
|
|
431
|
+
if (type === "loop") {
|
|
432
|
+
if (!p.task) errors.push(`Phase '${p.id}' (loop) requires 'task' (the iteration body)`);
|
|
433
|
+
if (!p.until) errors.push(`Phase '${p.id}' (loop) requires 'until' (the stop condition)`);
|
|
434
|
+
if (p.maxIterations !== undefined) {
|
|
435
|
+
if (typeof p.maxIterations !== "number" || !Number.isFinite(p.maxIterations) || p.maxIterations < 1) {
|
|
436
|
+
errors.push(`Phase '${p.id}' (loop): maxIterations must be a number >= 1`);
|
|
437
|
+
} else if (p.maxIterations > LOOP_HARD_MAX_ITERATIONS) {
|
|
438
|
+
errors.push(`Phase '${p.id}' (loop): maxIterations must be <= ${LOOP_HARD_MAX_ITERATIONS}`);
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
if (type === "tournament") {
|
|
443
|
+
const hasBranches = Array.isArray(p.branches) && p.branches.length > 0;
|
|
444
|
+
if (!hasBranches && !p.task) {
|
|
445
|
+
errors.push(`Phase '${p.id}' (tournament) requires 'task' (the competitor prompt) or non-empty 'branches'`);
|
|
446
|
+
}
|
|
447
|
+
if (p.variants !== undefined) {
|
|
448
|
+
if (typeof p.variants !== "number" || !Number.isFinite(p.variants) || p.variants < 2) {
|
|
449
|
+
errors.push(`Phase '${p.id}' (tournament): variants must be a number >= 2`);
|
|
450
|
+
} else if (p.variants > TOURNAMENT_HARD_MAX_VARIANTS) {
|
|
451
|
+
errors.push(`Phase '${p.id}' (tournament): variants must be <= ${TOURNAMENT_HARD_MAX_VARIANTS}`);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
if (hasBranches && p.branches!.length < 2) {
|
|
455
|
+
errors.push(`Phase '${p.id}' (tournament): 'branches' needs at least 2 competitors`);
|
|
456
|
+
}
|
|
457
|
+
if (p.mode && !TOURNAMENT_MODES.includes(p.mode as TournamentMode)) {
|
|
458
|
+
errors.push(`Phase '${p.id}' (tournament): unknown mode '${p.mode}'`);
|
|
459
|
+
}
|
|
460
|
+
}
|
|
323
461
|
if (p.retry) {
|
|
324
462
|
if (typeof p.retry.max !== "number" || p.retry.max < 0) {
|
|
325
463
|
errors.push(`Phase '${p.id}': retry.max must be a number >= 0`);
|
|
@@ -337,6 +475,33 @@ export function validateTaskflow(def: unknown, opts: ValidationOptions = {}): Va
|
|
|
337
475
|
errors.push(`Phase '${p.id}': unknown join mode '${p.join}'`);
|
|
338
476
|
}
|
|
339
477
|
|
|
478
|
+
// Cache policy validation (cross-run memoization).
|
|
479
|
+
if (p.cache) {
|
|
480
|
+
const scope = p.cache.scope ?? "run-only";
|
|
481
|
+
if (!CACHE_SCOPES.includes(scope as CacheScope)) {
|
|
482
|
+
errors.push(`Phase '${p.id}': unknown cache.scope '${scope}' (expected one of ${CACHE_SCOPES.join(", ")})`);
|
|
483
|
+
}
|
|
484
|
+
// Gate B: gate/approval phases must produce a fresh result every run.
|
|
485
|
+
if (scope === "cross-run" && (CACHE_CROSS_RUN_BLOCKED_TYPES as readonly string[]).includes(type)) {
|
|
486
|
+
errors.push(
|
|
487
|
+
`Phase '${p.id}' (${type}): cache.scope 'cross-run' is not allowed for ${CACHE_CROSS_RUN_BLOCKED_TYPES.join("/")} phases — they must produce a fresh result each run. Use 'run-only'.`,
|
|
488
|
+
);
|
|
489
|
+
}
|
|
490
|
+
// Gate C: every fingerprint entry must use a known prefix (fail closed).
|
|
491
|
+
for (const fp of p.cache.fingerprint ?? []) {
|
|
492
|
+
const ok = CACHE_FINGERPRINT_PREFIXES.some((pre) => fp.startsWith(pre) && fp.length > pre.length);
|
|
493
|
+
if (!ok) {
|
|
494
|
+
errors.push(
|
|
495
|
+
`Phase '${p.id}': invalid cache.fingerprint entry '${fp}' (expected '<prefix><value>' with prefix one of ${CACHE_FINGERPRINT_PREFIXES.join(", ")})`,
|
|
496
|
+
);
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
// Gate D: TTL must parse to a positive duration when present.
|
|
500
|
+
if (p.cache.ttl !== undefined && parseTtlMs(p.cache.ttl) === null) {
|
|
501
|
+
errors.push(`Phase '${p.id}': invalid cache.ttl '${p.cache.ttl}' (expected e.g. '30m', '6h', '7d')`);
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
|
|
340
505
|
// Agent name convention: hyphens only (per AGENTS.md naming convention)
|
|
341
506
|
if (p.agent && typeof p.agent === "string" && p.agent.includes("_")) {
|
|
342
507
|
errors.push(`Phase '${p.id}': agent name '${p.agent}' uses underscores — use hyphens (e.g. 'executor-code' not 'executor_code')`);
|