@miller-tech/uap 1.27.0 → 1.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -1
- package/dist/bin/cli.js +5 -0
- package/dist/bin/cli.js.map +1 -1
- package/dist/cli/deliver.d.ts +5 -0
- package/dist/cli/deliver.d.ts.map +1 -1
- package/dist/cli/deliver.js +91 -10
- package/dist/cli/deliver.js.map +1 -1
- package/dist/delivery/applier.d.ts +11 -0
- package/dist/delivery/applier.d.ts.map +1 -1
- package/dist/delivery/applier.js +189 -6
- package/dist/delivery/applier.js.map +1 -1
- package/dist/delivery/convergence-loop.d.ts +75 -13
- package/dist/delivery/convergence-loop.d.ts.map +1 -1
- package/dist/delivery/convergence-loop.js +209 -58
- package/dist/delivery/convergence-loop.js.map +1 -1
- package/dist/delivery/critic.d.ts +35 -0
- package/dist/delivery/critic.d.ts.map +1 -0
- package/dist/delivery/critic.js +77 -0
- package/dist/delivery/critic.js.map +1 -0
- package/dist/delivery/escalation.d.ts +66 -0
- package/dist/delivery/escalation.d.ts.map +1 -0
- package/dist/delivery/escalation.js +84 -0
- package/dist/delivery/escalation.js.map +1 -0
- package/dist/delivery/explorer.d.ts +77 -0
- package/dist/delivery/explorer.d.ts.map +1 -0
- package/dist/delivery/explorer.js +166 -0
- package/dist/delivery/explorer.js.map +1 -0
- package/dist/delivery/index.d.ts +7 -2
- package/dist/delivery/index.d.ts.map +1 -1
- package/dist/delivery/index.js +6 -1
- package/dist/delivery/index.js.map +1 -1
- package/dist/delivery/judge.d.ts +33 -0
- package/dist/delivery/judge.d.ts.map +1 -0
- package/dist/delivery/judge.js +70 -0
- package/dist/delivery/judge.js.map +1 -0
- package/dist/delivery/practice.d.ts +72 -0
- package/dist/delivery/practice.d.ts.map +1 -0
- package/dist/delivery/practice.js +185 -0
- package/dist/delivery/practice.js.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escalation Controller (Phase 5)
|
|
3
|
+
*
|
|
4
|
+
* An onIteration handler that watches for stagnation and climbs an escalation
|
|
5
|
+
* ladder: when the best gate score stops improving for N consecutive turns,
|
|
6
|
+
* advance to the next tier. Tiers escalate cost progressively — widen
|
|
7
|
+
* exploration, enable the critic, raise the turn budget, and finally switch to
|
|
8
|
+
* a stronger model — so cheap strategies are exhausted before expensive ones.
|
|
9
|
+
*
|
|
10
|
+
* The controller is pure policy: it inspects IterationRecords and returns
|
|
11
|
+
* directives. The loop owns all mutation. This keeps escalation testable in
|
|
12
|
+
* isolation and swappable without touching the loop.
|
|
13
|
+
*/
|
|
14
|
+
const DEFAULT_STAGNATION_TURNS = 2;
|
|
15
|
+
const DEFAULT_EPSILON = 0.01;
|
|
16
|
+
/**
|
|
17
|
+
* The canonical escalation ladder: exhaust cheap strategies before expensive
|
|
18
|
+
* ones — widen exploration, then enable the critic, then (if configured)
|
|
19
|
+
* switch to a stronger model with a couple extra turns. Library-owned so
|
|
20
|
+
* non-CLI callers get the same policy.
|
|
21
|
+
*/
|
|
22
|
+
export function defaultEscalationLadder(options = {}) {
|
|
23
|
+
const candidates = Math.max(3, options.candidates ?? 3);
|
|
24
|
+
const tiers = [
|
|
25
|
+
{ label: `widen exploration (${candidates} candidates)`, setCandidates: candidates },
|
|
26
|
+
{ label: 'enable critic', enableCritic: true },
|
|
27
|
+
];
|
|
28
|
+
if (options.escalateExecutor) {
|
|
29
|
+
tiers.push({
|
|
30
|
+
label: `escalate model → ${options.escalateModelName ?? 'stronger model'}`,
|
|
31
|
+
switchExecutor: options.escalateExecutor,
|
|
32
|
+
raiseMaxTurns: (options.maxTurns ?? 5) + 2,
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
return tiers;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Build a stagnation-driven escalation controller. Returns an `onIteration`
|
|
39
|
+
* suitable for ConvergenceConfig, plus a `tierIndex` accessor for tests.
|
|
40
|
+
*/
|
|
41
|
+
export function createEscalationController(config) {
|
|
42
|
+
const stagnationTurns = config.stagnationTurns ?? DEFAULT_STAGNATION_TURNS;
|
|
43
|
+
const epsilon = config.improvementEpsilon ?? DEFAULT_EPSILON;
|
|
44
|
+
let bestScore = -1;
|
|
45
|
+
let stagnant = 0;
|
|
46
|
+
let nextTier = 0;
|
|
47
|
+
return {
|
|
48
|
+
tierIndex: () => nextTier,
|
|
49
|
+
onIteration: (record) => {
|
|
50
|
+
// A passing turn ends the loop regardless; nothing to escalate.
|
|
51
|
+
if (record.passed)
|
|
52
|
+
return {};
|
|
53
|
+
if (record.score > bestScore + epsilon) {
|
|
54
|
+
bestScore = record.score;
|
|
55
|
+
stagnant = 0;
|
|
56
|
+
return {};
|
|
57
|
+
}
|
|
58
|
+
// No meaningful improvement this turn.
|
|
59
|
+
stagnant += 1;
|
|
60
|
+
// Record the high-water mark even without clearing epsilon, so later
|
|
61
|
+
// tiers measure improvement against the true best.
|
|
62
|
+
if (record.score > bestScore)
|
|
63
|
+
bestScore = record.score;
|
|
64
|
+
if (stagnant < stagnationTurns)
|
|
65
|
+
return {};
|
|
66
|
+
// Stagnated — advance a tier if any remain.
|
|
67
|
+
if (nextTier >= config.tiers.length) {
|
|
68
|
+
return {}; // ladder exhausted; let the loop run out its budget
|
|
69
|
+
}
|
|
70
|
+
const tier = config.tiers[nextTier];
|
|
71
|
+
nextTier += 1;
|
|
72
|
+
stagnant = 0; // give the new tier a fresh window to prove itself
|
|
73
|
+
config.onEscalate?.(tier, record.turn);
|
|
74
|
+
return {
|
|
75
|
+
setCandidates: tier.setCandidates,
|
|
76
|
+
enableCritic: tier.enableCritic,
|
|
77
|
+
raiseMaxTurns: tier.raiseMaxTurns,
|
|
78
|
+
switchExecutor: tier.switchExecutor,
|
|
79
|
+
note: `escalate → ${tier.label}`,
|
|
80
|
+
};
|
|
81
|
+
},
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
//# sourceMappingURL=escalation.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"escalation.js","sourceRoot":"","sources":["../../src/delivery/escalation.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAoCH,MAAM,wBAAwB,GAAG,CAAC,CAAC;AACnC,MAAM,eAAe,GAAG,IAAI,CAAC;AAa7B;;;;;GAKG;AACH,MAAM,UAAU,uBAAuB,CAAC,UAAgC,EAAE;IACxE,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC,CAAC;IACxD,MAAM,KAAK,GAAqB;QAC9B,EAAE,KAAK,EAAE,sBAAsB,UAAU,cAAc,EAAE,aAAa,EAAE,UAAU,EAAE;QACpF,EAAE,KAAK,EAAE,eAAe,EAAE,YAAY,EAAE,IAAI,EAAE;KAC/C,CAAC;IACF,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC;YACT,KAAK,EAAE,oBAAoB,OAAO,CAAC,iBAAiB,IAAI,gBAAgB,EAAE;YAC1E,cAAc,EAAE,OAAO,CAAC,gBAAgB;YACxC,aAAa,EAAE,CAAC,OAAO,CAAC,QAAQ,IAAI,CAAC,CAAC,GAAG,CAAC;SAC3C,CAAC,CAAC;IACL,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,0BAA0B,CAAC,MAAwB;IACjE,MAAM,eAAe,GAAG,MAAM,CAAC,eAAe,IAAI,wBAAwB,CAAC;IAC3E,MAAM,OAAO,GAAG,MAAM,CAAC,kBAAkB,IAAI,eAAe,CAAC;IAE7D,IAAI,SAAS,GAAG,CAAC,CAAC,CAAC;IACnB,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,OAAO;QACL,SAAS,EAAE,GAAG,EAAE,CAAC,QAAQ;QACzB,WAAW,EAAE,CAAC,MAAuB,EAAsB,EAAE;YAC3D,gEAAgE;YAChE,IAAI,MAAM,CAAC,MAAM;gBAAE,OAAO,EAAE,CAAC;YAE7B,IAAI,MAAM,CAAC,KAAK,GAAG,SAAS,GAAG,OAAO,EAAE,CAAC;gBACvC,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC;gBACzB,QAAQ,GAAG,CAAC,CAAC;gBACb,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,uCAAuC;YACvC,QAAQ,IAAI,CAAC,CAAC;YACd,qEAAqE;YACrE,mDAAmD;YACnD,IAAI,MAAM,CAAC,KAAK,GAAG,SAAS;gBAAE,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC;YAEvD,IAAI,QAAQ,GAAG,eAAe;gBAAE,OAAO,EAAE,CAAC;YAE1C,4CAA4C;YAC5C,IAAI,QAAQ,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;gBACpC,OAAO,EAAE,CAAC,CAAC,oDAAoD;YACjE,CAAC;YACD,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YACpC,QAAQ,IAAI,CAAC,CAAC;YACd,QAAQ,GAAG,CAAC,CAAC,CAAC,mDAAmD;YACjE,MAAM,CAAC,UAAU,EAAE,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;YAEvC,OAAO;gBACL,aAAa,EAAE,IAAI,CAAC,aAAa;gBACjC,YAAY,EAAE,IAAI,CAAC,YAAY;gBAC/B,aAAa,EAAE,IAAI,CAAC,aAAa;gBACjC,cAAc,EAAE,IAAI,CAAC,cAAc;gBACnC,IAAI,EAAE,cAAc,IAAI,CAAC,KAAK,EAAE;aACjC,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Explorer — best-of-N candidate exploration (Phase 2)
|
|
3
|
+
*
|
|
4
|
+
* Instead of committing to a single attempt per turn, the explorer:
|
|
5
|
+
* 1. generates N candidates concurrently, each steered by a distinct
|
|
6
|
+
* strategy seed (diversity by prompt, not temperature — small-model
|
|
7
|
+
* profiles pin temperature low for stability)
|
|
8
|
+
* 2. evaluates each candidate against the real gates on the same baseline
|
|
9
|
+
* tree via apply → verify → rollback
|
|
10
|
+
* 3. ranks objectively (gates passed, then score); a judge tie-breaks
|
|
11
|
+
* candidates tied at the top
|
|
12
|
+
* 4. commits only the winner to the tree
|
|
13
|
+
*
|
|
14
|
+
* Candidate verification is sequential by design: gates (npm test, builds)
|
|
15
|
+
* cannot safely run concurrently in one tree. Worktree-isolated parallel
|
|
16
|
+
* verification is a planned optimization (inject via `revertibleApplier` +
|
|
17
|
+
* a per-candidate workspace), not a Phase 2 requirement.
|
|
18
|
+
*
|
|
19
|
+
* Fairness caveat: rollback reverts files the model wrote, but gate commands
|
|
20
|
+
* also mutate the tree (dist/ output, snapshots, caches). Candidate N+1 may
|
|
21
|
+
* therefore see candidate N's gate side effects. The committed winner is
|
|
22
|
+
* re-verified after commit so its reported ladder reflects real on-disk
|
|
23
|
+
* state. Full per-candidate isolation requires the workspace seam above.
|
|
24
|
+
*/
|
|
25
|
+
import type { GateRung, LadderResult, LadderOptions } from './verifier-ladder.js';
|
|
26
|
+
import type { LadderRunner, LoopExecutor } from './convergence-loop.js';
|
|
27
|
+
import type { Applier, ApplyResult, RevertibleApply } from './applier.js';
|
|
28
|
+
import type { Judge } from './judge.js';
|
|
29
|
+
/** Hard ceiling on candidates per turn — guards direct library callers
|
|
30
|
+
* (the CLI caps lower); each candidate costs a model call + a full gate run. */
|
|
31
|
+
export declare const MAX_CANDIDATES = 8;
|
|
32
|
+
export interface StrategySeed {
|
|
33
|
+
id: string;
|
|
34
|
+
hint: string;
|
|
35
|
+
}
|
|
36
|
+
export declare const DEFAULT_STRATEGY_SEEDS: StrategySeed[];
|
|
37
|
+
export interface CandidateResult {
|
|
38
|
+
id: string;
|
|
39
|
+
strategy: string;
|
|
40
|
+
output: string;
|
|
41
|
+
applyResult: ApplyResult | null;
|
|
42
|
+
ladder: LadderResult | null;
|
|
43
|
+
/** Executor failure for this candidate, if any */
|
|
44
|
+
error?: string;
|
|
45
|
+
passed: boolean;
|
|
46
|
+
score: number;
|
|
47
|
+
}
|
|
48
|
+
export interface ExplorationResult {
|
|
49
|
+
winner: CandidateResult | null;
|
|
50
|
+
candidates: CandidateResult[];
|
|
51
|
+
/** Judge rationale when a tie-break occurred */
|
|
52
|
+
judgeRationale?: string;
|
|
53
|
+
/** Ladder result of the committed winner (from its evaluation run) */
|
|
54
|
+
ladder: LadderResult | null;
|
|
55
|
+
}
|
|
56
|
+
export interface ExplorerConfig {
|
|
57
|
+
/** Number of candidates per turn (default 3) */
|
|
58
|
+
candidates?: number;
|
|
59
|
+
seeds?: StrategySeed[];
|
|
60
|
+
judge?: Judge;
|
|
61
|
+
projectRoot: string;
|
|
62
|
+
rungs: GateRung[];
|
|
63
|
+
ladderOptions?: LadderOptions;
|
|
64
|
+
ladderRunner?: LadderRunner;
|
|
65
|
+
/** Override the commit applier (defaults to applyFileBlocks) */
|
|
66
|
+
applier?: Applier;
|
|
67
|
+
/** Override the per-candidate revertible applier (defaults to applyFileBlocksWithRollback) */
|
|
68
|
+
revertibleApplier?: (output: string, projectRoot: string) => RevertibleApply;
|
|
69
|
+
onCandidate?: (candidate: CandidateResult) => void;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Generate, evaluate, and commit the best candidate for one loop turn.
|
|
73
|
+
* The base prompt comes from the loop's prompt builder; each candidate
|
|
74
|
+
* appends its strategy seed.
|
|
75
|
+
*/
|
|
76
|
+
export declare function exploreAndCommit(task: string, basePrompt: string, executor: LoopExecutor, config: ExplorerConfig): Promise<ExplorationResult>;
|
|
77
|
+
//# sourceMappingURL=explorer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"explorer.d.ts","sourceRoot":"","sources":["../../src/delivery/explorer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAElF,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAExE,OAAO,KAAK,EAAE,OAAO,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC1E,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAExC;gFACgF;AAChF,eAAO,MAAM,cAAc,IAAI,CAAC;AAEhC,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;CACd;AAED,eAAO,MAAM,sBAAsB,EAAE,YAAY,EAiBhD,CAAC;AAEF,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,WAAW,GAAG,IAAI,CAAC;IAChC,MAAM,EAAE,YAAY,GAAG,IAAI,CAAC;IAC5B,kDAAkD;IAClD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,eAAe,GAAG,IAAI,CAAC;IAC/B,UAAU,EAAE,eAAe,EAAE,CAAC;IAC9B,gDAAgD;IAChD,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,sEAAsE;IACtE,MAAM,EAAE,YAAY,GAAG,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,cAAc;IAC7B,gDAAgD;IAChD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,YAAY,EAAE,CAAC;IACvB,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,gEAAgE;IAChE,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,8FAA8F;IAC9F,iBAAiB,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,KAAK,eAAe,CAAC;IAC7E,WAAW,CAAC,EAAE,CAAC,SAAS,EAAE,eAAe,KAAK,IAAI,CAAC;CACpD;AAID;;;;GAIG;AACH,wBAAsB,gBAAgB,CACpC,IAAI,EAAE,MAAM,EACZ,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,YAAY,EACtB,MAAM,EAAE,cAAc,GACrB,OAAO,CAAC,iBAAiB,CAAC,CA6H5B"}
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Explorer — best-of-N candidate exploration (Phase 2)
|
|
3
|
+
*
|
|
4
|
+
* Instead of committing to a single attempt per turn, the explorer:
|
|
5
|
+
* 1. generates N candidates concurrently, each steered by a distinct
|
|
6
|
+
* strategy seed (diversity by prompt, not temperature — small-model
|
|
7
|
+
* profiles pin temperature low for stability)
|
|
8
|
+
* 2. evaluates each candidate against the real gates on the same baseline
|
|
9
|
+
* tree via apply → verify → rollback
|
|
10
|
+
* 3. ranks objectively (gates passed, then score); a judge tie-breaks
|
|
11
|
+
* candidates tied at the top
|
|
12
|
+
* 4. commits only the winner to the tree
|
|
13
|
+
*
|
|
14
|
+
* Candidate verification is sequential by design: gates (npm test, builds)
|
|
15
|
+
* cannot safely run concurrently in one tree. Worktree-isolated parallel
|
|
16
|
+
* verification is a planned optimization (inject via `revertibleApplier` +
|
|
17
|
+
* a per-candidate workspace), not a Phase 2 requirement.
|
|
18
|
+
*
|
|
19
|
+
* Fairness caveat: rollback reverts files the model wrote, but gate commands
|
|
20
|
+
* also mutate the tree (dist/ output, snapshots, caches). Candidate N+1 may
|
|
21
|
+
* therefore see candidate N's gate side effects. The committed winner is
|
|
22
|
+
* re-verified after commit so its reported ladder reflects real on-disk
|
|
23
|
+
* state. Full per-candidate isolation requires the workspace seam above.
|
|
24
|
+
*/
|
|
25
|
+
import { runLadder } from './verifier-ladder.js';
|
|
26
|
+
import { applyFileBlocks, applyFileBlocksWithRollback } from './applier.js';
|
|
27
|
+
/** Hard ceiling on candidates per turn — guards direct library callers
|
|
28
|
+
* (the CLI caps lower); each candidate costs a model call + a full gate run. */
|
|
29
|
+
export const MAX_CANDIDATES = 8;
|
|
30
|
+
export const DEFAULT_STRATEGY_SEEDS = [
|
|
31
|
+
{
|
|
32
|
+
id: 'direct',
|
|
33
|
+
hint: 'STRATEGY: Make the most direct, minimal change that satisfies the task. Touch as few files as possible.',
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
id: 'test-first',
|
|
37
|
+
hint: 'STRATEGY: Reason from the failing gates first. Identify exactly what the gates check, then implement precisely that.',
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
id: 'defensive',
|
|
41
|
+
hint: 'STRATEGY: Implement with rigorous edge-case handling — empty inputs, wrong types, boundary values.',
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
id: 'rewrite',
|
|
45
|
+
hint: 'STRATEGY: Re-derive the solution from scratch rather than patching the previous attempt.',
|
|
46
|
+
},
|
|
47
|
+
];
|
|
48
|
+
const DEFAULT_CANDIDATES = 3;
|
|
49
|
+
/**
|
|
50
|
+
* Generate, evaluate, and commit the best candidate for one loop turn.
|
|
51
|
+
* The base prompt comes from the loop's prompt builder; each candidate
|
|
52
|
+
* appends its strategy seed.
|
|
53
|
+
*/
|
|
54
|
+
export async function exploreAndCommit(task, basePrompt, executor, config) {
|
|
55
|
+
const count = Math.min(MAX_CANDIDATES, Math.max(1, config.candidates ?? DEFAULT_CANDIDATES));
|
|
56
|
+
const seeds = config.seeds && config.seeds.length > 0 ? config.seeds : DEFAULT_STRATEGY_SEEDS;
|
|
57
|
+
const ladderRunner = config.ladderRunner ?? runLadder;
|
|
58
|
+
const apply = config.applier ?? applyFileBlocks;
|
|
59
|
+
const applyRevertible = config.revertibleApplier ?? applyFileBlocksWithRollback;
|
|
60
|
+
// 1. Generate all candidates concurrently (model calls parallelize fine)
|
|
61
|
+
const generations = await Promise.all(Array.from({ length: count }, async (_, i) => {
|
|
62
|
+
const seed = seeds[i % seeds.length];
|
|
63
|
+
const prompt = `${basePrompt}\n\n${seed.hint}`;
|
|
64
|
+
try {
|
|
65
|
+
return { seed, output: await executor(prompt), error: undefined };
|
|
66
|
+
}
|
|
67
|
+
catch (err) {
|
|
68
|
+
return { seed, output: '', error: err instanceof Error ? err.message : String(err) };
|
|
69
|
+
}
|
|
70
|
+
}));
|
|
71
|
+
// 2. Evaluate sequentially on the same baseline via apply → verify → rollback
|
|
72
|
+
const candidates = [];
|
|
73
|
+
for (let i = 0; i < generations.length; i++) {
|
|
74
|
+
const { seed, output, error } = generations[i];
|
|
75
|
+
const id = `c${i + 1}`;
|
|
76
|
+
if (error) {
|
|
77
|
+
const candidate = {
|
|
78
|
+
id,
|
|
79
|
+
strategy: seed.id,
|
|
80
|
+
output: '',
|
|
81
|
+
applyResult: null,
|
|
82
|
+
ladder: null,
|
|
83
|
+
error,
|
|
84
|
+
passed: false,
|
|
85
|
+
score: 0,
|
|
86
|
+
};
|
|
87
|
+
candidates.push(candidate);
|
|
88
|
+
config.onCandidate?.(candidate);
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
const { result: applyResult, restore } = applyRevertible(output, config.projectRoot);
|
|
92
|
+
let ladder = null;
|
|
93
|
+
try {
|
|
94
|
+
if (!applyResult.error && applyResult.filesWritten.length > 0) {
|
|
95
|
+
ladder = await ladderRunner(config.rungs, config.projectRoot, config.ladderOptions);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
finally {
|
|
99
|
+
restore();
|
|
100
|
+
}
|
|
101
|
+
const candidate = {
|
|
102
|
+
id,
|
|
103
|
+
strategy: seed.id,
|
|
104
|
+
output,
|
|
105
|
+
applyResult,
|
|
106
|
+
ladder,
|
|
107
|
+
passed: ladder?.passed ?? false,
|
|
108
|
+
score: ladder?.score ?? 0,
|
|
109
|
+
};
|
|
110
|
+
candidates.push(candidate);
|
|
111
|
+
config.onCandidate?.(candidate);
|
|
112
|
+
}
|
|
113
|
+
// Only candidates that wrote files and reached a real ladder run are
|
|
114
|
+
// committable; anything else (executor error, no/rejected blocks) cannot
|
|
115
|
+
// win regardless of its zero score.
|
|
116
|
+
const committable = (c) => c.ladder !== null && (c.applyResult?.filesWritten.length ?? 0) > 0;
|
|
117
|
+
// 3. Rank by evaluation tier, then pass/score. A committable candidate
|
|
118
|
+
// always outranks a non-committable one (fixes error candidates with
|
|
119
|
+
// score 0 tying with evaluated candidates that scored 0).
|
|
120
|
+
const ranked = [...candidates].sort((a, b) => {
|
|
121
|
+
const ca = committable(a);
|
|
122
|
+
const cb = committable(b);
|
|
123
|
+
if (ca !== cb)
|
|
124
|
+
return ca ? -1 : 1;
|
|
125
|
+
if (a.passed !== b.passed)
|
|
126
|
+
return a.passed ? -1 : 1;
|
|
127
|
+
return b.score - a.score;
|
|
128
|
+
});
|
|
129
|
+
const top = ranked[0];
|
|
130
|
+
if (!top || !committable(top)) {
|
|
131
|
+
return { winner: null, candidates, ladder: null };
|
|
132
|
+
}
|
|
133
|
+
// Judge tie-break among committable candidates tied with the top result
|
|
134
|
+
let winner = top;
|
|
135
|
+
let judgeRationale;
|
|
136
|
+
const tied = ranked.filter((c) => committable(c) && c.passed === top.passed && c.score === top.score);
|
|
137
|
+
if (tied.length > 1 && config.judge) {
|
|
138
|
+
try {
|
|
139
|
+
const verdict = await config.judge(task, tied.map((c) => ({
|
|
140
|
+
id: c.id,
|
|
141
|
+
strategy: c.strategy,
|
|
142
|
+
output: c.output,
|
|
143
|
+
ladderFeedback: c.ladder?.feedback ?? '',
|
|
144
|
+
score: c.score,
|
|
145
|
+
})));
|
|
146
|
+
const chosen = tied.find((c) => c.id === verdict.winnerId);
|
|
147
|
+
if (chosen) {
|
|
148
|
+
winner = chosen;
|
|
149
|
+
judgeRationale = verdict.rationale;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
catch {
|
|
153
|
+
// Judge is a public seam; a throwing judge falls back to objective top.
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
// 4. Commit the winner by re-applying. Re-verify the committed tree so the
|
|
157
|
+
// reported ladder reflects on-disk state (losers' gate side effects may
|
|
158
|
+
// have perturbed it between the winner's evaluation and now).
|
|
159
|
+
const committed = await apply(winner.output, config.projectRoot);
|
|
160
|
+
let finalLadder = winner.ladder;
|
|
161
|
+
if (committed.filesWritten.length > 0) {
|
|
162
|
+
finalLadder = await ladderRunner(config.rungs, config.projectRoot, config.ladderOptions);
|
|
163
|
+
}
|
|
164
|
+
return { winner: { ...winner, ladder: finalLadder }, candidates, judgeRationale, ladder: finalLadder };
|
|
165
|
+
}
|
|
166
|
+
//# sourceMappingURL=explorer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"explorer.js","sourceRoot":"","sources":["../../src/delivery/explorer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAGH,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAEjD,OAAO,EAAE,eAAe,EAAE,2BAA2B,EAAE,MAAM,cAAc,CAAC;AAI5E;gFACgF;AAChF,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,CAAC;AAOhC,MAAM,CAAC,MAAM,sBAAsB,GAAmB;IACpD;QACE,EAAE,EAAE,QAAQ;QACZ,IAAI,EAAE,yGAAyG;KAChH;IACD;QACE,EAAE,EAAE,YAAY;QAChB,IAAI,EAAE,sHAAsH;KAC7H;IACD;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,oGAAoG;KAC3G;IACD;QACE,EAAE,EAAE,SAAS;QACb,IAAI,EAAE,0FAA0F;KACjG;CACF,CAAC;AAuCF,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAE7B;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAY,EACZ,UAAkB,EAClB,QAAsB,EACtB,MAAsB;IAEtB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,cAAc,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,UAAU,IAAI,kBAAkB,CAAC,CAAC,CAAC;IAC7F,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,sBAAsB,CAAC;IAC9F,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,SAAS,CAAC;IACtD,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,IAAI,eAAe,CAAC;IAChD,MAAM,eAAe,GAAG,MAAM,CAAC,iBAAiB,IAAI,2BAA2B,CAAC;IAEhF,yEAAyE;IACzE,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,GAAG,CACnC,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE;QAC3C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;QACrC,MAAM,MAAM,GAAG,GAAG,UAAU,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;QAC/C,IAAI,CAAC;YACH,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;QACpE,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;QACvF,CAAC;IACH,CAAC,CAAC,CACH,CAAC;IAEF,8EAA8E;IAC9E,MAAM,UAAU,GAAsB,EAAE,CAAC;IACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC/C,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QAEvB,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,SAAS,GAAoB;gBACjC,EAAE;gBACF,QAAQ,EAAE,IAAI,CAAC,EAAE;gBACjB,MAAM,EAAE,EAAE;gBACV,WAAW,EAAE,IAAI;gBACjB,MAAM,EAAE,IAAI;gBACZ,KAAK;gBACL,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;aACT,CAAC;YACF,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC3B,MAAM,CAAC,WAAW,EAAE,CAAC,SAAS,CAAC,CAAC;YAChC,SAAS;QACX,CAAC;QAED,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,OAAO,EAAE,GAAG,eAAe,CAAC,MAAM,EAAE,MAAM,CAAC,WAAW,CAAC,CAAC;QACrF,IAAI,MAAM,GAAwB,IAAI,CAAC;QACvC,IAAI,CAAC;YACH,IAAI,CAAC,WAAW,CAAC,KAAK,IAAI,WAAW,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9D,MAAM,GAAG,MAAM,YAAY,CAAC,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,aAAa,CAAC,CAAC;YACtF,CAAC;QACH,CAAC;gBAAS,CAAC;YACT,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,SAAS,GAAoB;YACjC,EAAE;YACF,QAAQ,EAAE,IAAI,CAAC,EAAE;YACjB,MAAM;YACN,WAAW;YACX,MAAM;YACN,MAAM,EAAE,MAAM,EAAE,MAAM,IAAI,KAAK;YAC/B,KAAK,EAAE,MAAM,EAAE,KAAK,IAAI,CAAC;SAC1B,CAAC;QACF,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC3B,MAAM,CAAC,WAAW,EAAE,CAAC,SAAS,CAAC,CAAC;IAClC,CAAC;IAED,qEAAqE;IACrE,yEAAyE;IACzE,oCAAoC;IACpC,MAAM,WAAW,GAAG,CAAC,CAAkB,EAAW,EAAE,CAClD,CAAC,CAAC,MAAM,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,WAAW,EAAE,YAAY,CAAC,MAAM,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IAErE,uEAAuE;IACvE,wEAAwE;IACxE,6DAA6D;IAC7D,MAAM,MAAM,GAAG,CAAC,GAAG,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QAC3C,MAAM,EAAE,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC1B,MAAM,EAAE,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,EAAE,KAAK,EAAE;YAAE,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAClC,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM;YAAE,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpD,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;IAC3B,CAAC,CAAC,CAAC;IAEH,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACtB,IAAI,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;QAC9B,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;IACpD,CAAC;IAED,wEAAwE;IACxE,IAAI,MAAM,GAAG,GAAG,CAAC;IACjB,IAAI,cAAkC,CAAC;IACvC,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CACxB,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,KAAK,GAAG,CAAC,MAAM,IAAI,CAAC,CAAC,KAAK,KAAK,GAAG,CAAC,KAAK,CAC1E,CAAC;IACF,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QACpC,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,KAAK,CAChC,IAAI,EACJ,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACf,EAAE,EAAE,CAAC,CAAC,EAAE;gBACR,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,cAAc,EAAE,CAAC,CAAC,MAAM,EAAE,QAAQ,IAAI,EAAE;gBACxC,KAAK,EAAE,CAAC,CAAC,KAAK;aACf,CAAC,CAAC,CACJ,CAAC;YACF,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,QAAQ,CAAC,CAAC;YAC3D,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,GAAG,MAAM,CAAC;gBAChB,cAAc,GAAG,OAAO,CAAC,SAAS,CAAC;YACrC,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,wEAAwE;QAC1E,CAAC;IACH,CAAC;IAED,2EAA2E;IAC3E,2EAA2E;IAC3E,iEAAiE;IACjE,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,WAAW,CAAC,CAAC;IACjE,IAAI,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC;IAChC,IAAI,SAAS,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtC,WAAW,GAAG,MAAM,YAAY,CAAC,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,aAAa,CAAC,CAAC;IAC3F,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,EAAE,GAAG,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;AACzG,CAAC"}
|
package/dist/delivery/index.d.ts
CHANGED
|
@@ -5,8 +5,13 @@
|
|
|
5
5
|
* iterations against the project's real completion gates until delivery
|
|
6
6
|
* is achieved.
|
|
7
7
|
*/
|
|
8
|
-
export { ConvergenceLoop, defaultPromptBuilder, type ConvergenceConfig, type DeliveryResult, type IterationRecord, type LoopExecutor, type LadderRunner, type PromptBuilder, type PromptContext, } from './convergence-loop.js';
|
|
8
|
+
export { ConvergenceLoop, defaultPromptBuilder, type CandidateSummary, type ConvergenceConfig, type DeliveryResult, type ExplorerSettings, type IterationDirective, type IterationRecord, type LoopExecutor, type LadderRunner, type OnIteration, type PracticeProvider, type PromptBuilder, type PromptContext, } from './convergence-loop.js';
|
|
9
|
+
export { createEscalationController, defaultEscalationLadder, type DefaultLadderOptions, type EscalationConfig, type EscalationController, type EscalationTier, } from './escalation.js';
|
|
10
|
+
export { InMemoryPracticeStore, FilePracticeStore, extractKeywords, distillPractice, defaultPracticePath, type PracticeCard, type PracticeInput, type PracticeStore, } from './practice.js';
|
|
11
|
+
export { exploreAndCommit, DEFAULT_STRATEGY_SEEDS, MAX_CANDIDATES, type CandidateResult, type ExplorationResult, type ExplorerConfig, type StrategySeed, } from './explorer.js';
|
|
12
|
+
export { createModelJudge, extractJson, type Judge, type JudgeCandidate, type JudgeVerdict, } from './judge.js';
|
|
13
|
+
export { createModelCritic, parseFixList, type Critic, type Critique, type CritiqueInput, } from './critic.js';
|
|
9
14
|
export { detectRungs, runLadder, runRung, formatFeedback, type GateRung, type RungResult, type RungFailureReason, type LadderResult, type LadderOptions, } from './verifier-ladder.js';
|
|
10
|
-
export { applyFileBlocks, parseFileBlocks, type Applier, type ApplyResult, type FileBlock, } from './applier.js';
|
|
15
|
+
export { applyFileBlocks, applyFileBlocksWithRollback, parseFileBlocks, type Applier, type ApplyResult, type FileBlock, type RevertibleApply, } from './applier.js';
|
|
11
16
|
export { OpenAICompatClient, type OpenAICompatClientOptions } from '../models/openai-compat-client.js';
|
|
12
17
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/delivery/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EACL,eAAe,EACf,oBAAoB,EACpB,KAAK,iBAAiB,EACtB,KAAK,cAAc,EACnB,KAAK,eAAe,EACpB,KAAK,YAAY,EACjB,KAAK,YAAY,EACjB,KAAK,aAAa,EAClB,KAAK,aAAa,GACnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EACL,WAAW,EACX,SAAS,EACT,OAAO,EACP,cAAc,EACd,KAAK,QAAQ,EACb,KAAK,UAAU,EACf,KAAK,iBAAiB,EACtB,KAAK,YAAY,EACjB,KAAK,aAAa,GACnB,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EACL,eAAe,EACf,eAAe,EACf,KAAK,OAAO,EACZ,KAAK,WAAW,EAChB,KAAK,SAAS,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/delivery/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EACL,eAAe,EACf,oBAAoB,EACpB,KAAK,gBAAgB,EACrB,KAAK,iBAAiB,EACtB,KAAK,cAAc,EACnB,KAAK,gBAAgB,EACrB,KAAK,kBAAkB,EACvB,KAAK,eAAe,EACpB,KAAK,YAAY,EACjB,KAAK,YAAY,EACjB,KAAK,WAAW,EAChB,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAClB,KAAK,aAAa,GACnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EACL,0BAA0B,EAC1B,uBAAuB,EACvB,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EACzB,KAAK,cAAc,GACpB,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EACL,qBAAqB,EACrB,iBAAiB,EACjB,eAAe,EACf,eAAe,EACf,mBAAmB,EACnB,KAAK,YAAY,EACjB,KAAK,aAAa,EAClB,KAAK,aAAa,GACnB,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,gBAAgB,EAChB,sBAAsB,EACtB,cAAc,EACd,KAAK,eAAe,EACpB,KAAK,iBAAiB,EACtB,KAAK,cAAc,EACnB,KAAK,YAAY,GAClB,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,gBAAgB,EAChB,WAAW,EACX,KAAK,KAAK,EACV,KAAK,cAAc,EACnB,KAAK,YAAY,GAClB,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,iBAAiB,EACjB,YAAY,EACZ,KAAK,MAAM,EACX,KAAK,QAAQ,EACb,KAAK,aAAa,GACnB,MAAM,aAAa,CAAC;AAErB,OAAO,EACL,WAAW,EACX,SAAS,EACT,OAAO,EACP,cAAc,EACd,KAAK,QAAQ,EACb,KAAK,UAAU,EACf,KAAK,iBAAiB,EACtB,KAAK,YAAY,EACjB,KAAK,aAAa,GACnB,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EACL,eAAe,EACf,2BAA2B,EAC3B,eAAe,EACf,KAAK,OAAO,EACZ,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,eAAe,GACrB,MAAM,cAAc,CAAC;AAEtB,OAAO,EAAE,kBAAkB,EAAE,KAAK,yBAAyB,EAAE,MAAM,mCAAmC,CAAC"}
|
package/dist/delivery/index.js
CHANGED
|
@@ -6,7 +6,12 @@
|
|
|
6
6
|
* is achieved.
|
|
7
7
|
*/
|
|
8
8
|
export { ConvergenceLoop, defaultPromptBuilder, } from './convergence-loop.js';
|
|
9
|
+
export { createEscalationController, defaultEscalationLadder, } from './escalation.js';
|
|
10
|
+
export { InMemoryPracticeStore, FilePracticeStore, extractKeywords, distillPractice, defaultPracticePath, } from './practice.js';
|
|
11
|
+
export { exploreAndCommit, DEFAULT_STRATEGY_SEEDS, MAX_CANDIDATES, } from './explorer.js';
|
|
12
|
+
export { createModelJudge, extractJson, } from './judge.js';
|
|
13
|
+
export { createModelCritic, parseFixList, } from './critic.js';
|
|
9
14
|
export { detectRungs, runLadder, runRung, formatFeedback, } from './verifier-ladder.js';
|
|
10
|
-
export { applyFileBlocks, parseFileBlocks, } from './applier.js';
|
|
15
|
+
export { applyFileBlocks, applyFileBlocksWithRollback, parseFileBlocks, } from './applier.js';
|
|
11
16
|
export { OpenAICompatClient } from '../models/openai-compat-client.js';
|
|
12
17
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/delivery/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EACL,eAAe,EACf,oBAAoB,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/delivery/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EACL,eAAe,EACf,oBAAoB,GAarB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EACL,0BAA0B,EAC1B,uBAAuB,GAKxB,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EACL,qBAAqB,EACrB,iBAAiB,EACjB,eAAe,EACf,eAAe,EACf,mBAAmB,GAIpB,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,gBAAgB,EAChB,sBAAsB,EACtB,cAAc,GAKf,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,gBAAgB,EAChB,WAAW,GAIZ,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,iBAAiB,EACjB,YAAY,GAIb,MAAM,aAAa,CAAC;AAErB,OAAO,EACL,WAAW,EACX,SAAS,EACT,OAAO,EACP,cAAc,GAMf,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EACL,eAAe,EACf,2BAA2B,EAC3B,eAAe,GAKhB,MAAM,cAAc,CAAC;AAEtB,OAAO,EAAE,kBAAkB,EAAkC,MAAM,mCAAmC,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Candidate Judge
|
|
3
|
+
*
|
|
4
|
+
* Tie-breaks exploration candidates whose objective (gate) scores are equal.
|
|
5
|
+
* Objective verification always outranks judgment — the judge is consulted
|
|
6
|
+
* only among candidates tied at the top of the ladder ranking.
|
|
7
|
+
*/
|
|
8
|
+
import type { LoopExecutor } from './convergence-loop.js';
|
|
9
|
+
export interface JudgeCandidate {
|
|
10
|
+
/** Stable candidate id, e.g. 'c1' */
|
|
11
|
+
id: string;
|
|
12
|
+
/** Strategy seed that produced this candidate */
|
|
13
|
+
strategy: string;
|
|
14
|
+
/** Model output (the judge prompt truncates it before embedding) */
|
|
15
|
+
output: string;
|
|
16
|
+
/** Gate feedback for this candidate */
|
|
17
|
+
ladderFeedback: string;
|
|
18
|
+
/** Objective gate score */
|
|
19
|
+
score: number;
|
|
20
|
+
}
|
|
21
|
+
export interface JudgeVerdict {
|
|
22
|
+
winnerId: string;
|
|
23
|
+
rationale: string;
|
|
24
|
+
}
|
|
25
|
+
export type Judge = (task: string, candidates: JudgeCandidate[]) => Promise<JudgeVerdict>;
|
|
26
|
+
/** Extract the first JSON object from model output (tolerates prose/fences). */
|
|
27
|
+
export declare function extractJson(text: string): Record<string, unknown> | null;
|
|
28
|
+
/**
|
|
29
|
+
* Model-backed judge. Falls back to the first candidate (caller's ranking
|
|
30
|
+
* order) when the verdict is unparseable or names an unknown candidate.
|
|
31
|
+
*/
|
|
32
|
+
export declare function createModelJudge(executor: LoopExecutor): Judge;
|
|
33
|
+
//# sourceMappingURL=judge.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"judge.d.ts","sourceRoot":"","sources":["../../src/delivery/judge.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAE1D,MAAM,WAAW,cAAc;IAC7B,qCAAqC;IACrC,EAAE,EAAE,MAAM,CAAC;IACX,iDAAiD;IACjD,QAAQ,EAAE,MAAM,CAAC;IACjB,oEAAoE;IACpE,MAAM,EAAE,MAAM,CAAC;IACf,uCAAuC;IACvC,cAAc,EAAE,MAAM,CAAC;IACvB,2BAA2B;IAC3B,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,KAAK,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,KAAK,OAAO,CAAC,YAAY,CAAC,CAAC;AA4B1F,gFAAgF;AAChF,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAQxE;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,YAAY,GAAG,KAAK,CA0B9D"}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Candidate Judge
|
|
3
|
+
*
|
|
4
|
+
* Tie-breaks exploration candidates whose objective (gate) scores are equal.
|
|
5
|
+
* Objective verification always outranks judgment — the judge is consulted
|
|
6
|
+
* only among candidates tied at the top of the ladder ranking.
|
|
7
|
+
*/
|
|
8
|
+
const CANDIDATE_OUTPUT_CHARS = 2_000;
|
|
9
|
+
function buildJudgePrompt(task, candidates) {
|
|
10
|
+
const sections = [
|
|
11
|
+
'You are a strict senior code reviewer judging competing solutions to the same task.',
|
|
12
|
+
'Rate on: correctness, completeness, simplicity, and how well gate feedback was addressed.',
|
|
13
|
+
'',
|
|
14
|
+
`TASK: ${task}`,
|
|
15
|
+
'',
|
|
16
|
+
];
|
|
17
|
+
for (const c of candidates) {
|
|
18
|
+
sections.push(`=== CANDIDATE ${c.id} (strategy: ${c.strategy}, gate score: ${Math.round(c.score * 100)}%) ===`);
|
|
19
|
+
sections.push(c.output.slice(0, CANDIDATE_OUTPUT_CHARS));
|
|
20
|
+
if (c.ladderFeedback) {
|
|
21
|
+
sections.push(`Gate feedback: ${c.ladderFeedback.slice(0, 500)}`);
|
|
22
|
+
}
|
|
23
|
+
sections.push('');
|
|
24
|
+
}
|
|
25
|
+
sections.push(`Respond with ONLY a JSON object: {"winner": "<candidate id>", "rationale": "<one sentence>"}`);
|
|
26
|
+
return sections.join('\n');
|
|
27
|
+
}
|
|
28
|
+
/** Extract the first JSON object from model output (tolerates prose/fences). */
|
|
29
|
+
export function extractJson(text) {
|
|
30
|
+
const match = text.match(/\{[\s\S]*?\}/);
|
|
31
|
+
if (!match)
|
|
32
|
+
return null;
|
|
33
|
+
try {
|
|
34
|
+
return JSON.parse(match[0]);
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Model-backed judge. Falls back to the first candidate (caller's ranking
|
|
42
|
+
* order) when the verdict is unparseable or names an unknown candidate.
|
|
43
|
+
*/
|
|
44
|
+
export function createModelJudge(executor) {
|
|
45
|
+
return async (task, candidates) => {
|
|
46
|
+
const fallback = {
|
|
47
|
+
winnerId: candidates[0].id,
|
|
48
|
+
rationale: 'judge fallback: kept objective ranking',
|
|
49
|
+
};
|
|
50
|
+
if (candidates.length < 2)
|
|
51
|
+
return fallback;
|
|
52
|
+
let raw;
|
|
53
|
+
try {
|
|
54
|
+
raw = await executor(buildJudgePrompt(task, candidates));
|
|
55
|
+
}
|
|
56
|
+
catch {
|
|
57
|
+
return fallback;
|
|
58
|
+
}
|
|
59
|
+
const parsed = extractJson(raw);
|
|
60
|
+
const winner = typeof parsed?.winner === 'string' ? parsed.winner : undefined;
|
|
61
|
+
if (!winner || !candidates.some((c) => c.id === winner)) {
|
|
62
|
+
return fallback;
|
|
63
|
+
}
|
|
64
|
+
return {
|
|
65
|
+
winnerId: winner,
|
|
66
|
+
rationale: typeof parsed?.rationale === 'string' ? parsed.rationale : '',
|
|
67
|
+
};
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
//# sourceMappingURL=judge.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"judge.js","sourceRoot":"","sources":["../../src/delivery/judge.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAwBH,MAAM,sBAAsB,GAAG,KAAK,CAAC;AAErC,SAAS,gBAAgB,CAAC,IAAY,EAAE,UAA4B;IAClE,MAAM,QAAQ,GAAG;QACf,qFAAqF;QACrF,2FAA2F;QAC3F,EAAE;QACF,SAAS,IAAI,EAAE;QACf,EAAE;KACH,CAAC;IAEF,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,QAAQ,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,eAAe,CAAC,CAAC,QAAQ,iBAAiB,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,GAAG,GAAG,CAAC,QAAQ,CAAC,CAAC;QAChH,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,sBAAsB,CAAC,CAAC,CAAC;QACzD,IAAI,CAAC,CAAC,cAAc,EAAE,CAAC;YACrB,QAAQ,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QACpE,CAAC;QACD,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACpB,CAAC;IAED,QAAQ,CAAC,IAAI,CACX,8FAA8F,CAC/F,CAAC;IACF,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC7B,CAAC;AAED,gFAAgF;AAChF,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACzC,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IACxB,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAA4B,CAAC;IACzD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,QAAsB;IACrD,OAAO,KAAK,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE;QAChC,MAAM,QAAQ,GAAiB;YAC7B,QAAQ,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE;YAC1B,SAAS,EAAE,wCAAwC;SACpD,CAAC;QACF,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,QAAQ,CAAC;QAE3C,IAAI,GAAW,CAAC;QAChB,IAAI,CAAC;YACH,GAAG,GAAG,MAAM,QAAQ,CAAC,gBAAgB,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC,CAAC;QAC3D,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;QAChC,MAAM,MAAM,GAAG,OAAO,MAAM,EAAE,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC;QAC9E,IAAI,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,MAAM,CAAC,EAAE,CAAC;YACxD,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,OAAO;YACL,QAAQ,EAAE,MAAM;YAChB,SAAS,EAAE,OAAO,MAAM,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE;SACzE,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Practice Store (Phase 4)
|
|
3
|
+
*
|
|
4
|
+
* Best-practice "cards" learned from past deliveries and injected into future
|
|
5
|
+
* prompts for similar tasks. A card records guidance distilled from a
|
|
6
|
+
* *successful* run — the winning strategy seed and how many turns it took —
|
|
7
|
+
* keyed by task keywords.
|
|
8
|
+
*
|
|
9
|
+
* Provenance matters: cards are derived from the harness's own strategy seeds
|
|
10
|
+
* and gate outcomes, never from raw model output. This keeps the long-term
|
|
11
|
+
* store free of model-authored text (a stored-prompt-injection vector).
|
|
12
|
+
*/
|
|
13
|
+
export interface PracticeCard {
|
|
14
|
+
id: string;
|
|
15
|
+
/** Winning strategy seed id (provenance anchor — never model text) */
|
|
16
|
+
strategy: string;
|
|
17
|
+
/** Lowercased keywords that gate retrieval relevance */
|
|
18
|
+
keywords: string[];
|
|
19
|
+
/** Guidance injected into prompts — always regenerated from strategy+bestTurns */
|
|
20
|
+
guidance: string;
|
|
21
|
+
/** Times this practice has been reinforced by a successful delivery */
|
|
22
|
+
successCount: number;
|
|
23
|
+
/** Fewest turns a delivery using this practice took (lower = stronger) */
|
|
24
|
+
bestTurns: number;
|
|
25
|
+
}
|
|
26
|
+
/** Input to record a successful delivery as a practice. */
|
|
27
|
+
export interface PracticeInput {
|
|
28
|
+
strategy: string;
|
|
29
|
+
keywords: string[];
|
|
30
|
+
turns: number;
|
|
31
|
+
}
|
|
32
|
+
export interface PracticeStore {
|
|
33
|
+
/** Retrieve the most relevant cards for a task instruction */
|
|
34
|
+
retrieve(instruction: string, limit?: number): PracticeCard[];
|
|
35
|
+
/** Reinforce or create a card from a successful delivery */
|
|
36
|
+
record(input: PracticeInput): void;
|
|
37
|
+
}
|
|
38
|
+
/** Extract lowercased keyword tokens from a task instruction. */
|
|
39
|
+
export declare function extractKeywords(text: string, max?: number): string[];
|
|
40
|
+
/** In-memory store — the base implementation; the file store persists it. */
|
|
41
|
+
export declare class InMemoryPracticeStore implements PracticeStore {
|
|
42
|
+
protected cards: PracticeCard[];
|
|
43
|
+
constructor(cards?: PracticeCard[]);
|
|
44
|
+
retrieve(instruction: string, limit?: number): PracticeCard[];
|
|
45
|
+
record(input: PracticeInput): void;
|
|
46
|
+
/** Smallest unused pN id — robust to merges and dropped (corrupt) cards. */
|
|
47
|
+
private nextId;
|
|
48
|
+
all(): PracticeCard[];
|
|
49
|
+
}
|
|
50
|
+
/** File-backed practice store (JSON). Self-heals on missing/corrupt files. */
|
|
51
|
+
export declare class FilePracticeStore extends InMemoryPracticeStore {
|
|
52
|
+
private readonly path;
|
|
53
|
+
constructor(path: string);
|
|
54
|
+
/**
|
|
55
|
+
* Load cards from disk, validating every field. The on-disk file is NOT
|
|
56
|
+
* trusted to supply guidance text — guidance is regenerated from the
|
|
57
|
+
* validated strategy + bestTurns via distillPractice, so a tampered file
|
|
58
|
+
* cannot inject prompt text (the read path enforces the same provenance
|
|
59
|
+
* the write path guarantees). Structurally-invalid cards are dropped.
|
|
60
|
+
*/
|
|
61
|
+
private static load;
|
|
62
|
+
record(input: PracticeInput): void;
|
|
63
|
+
private persist;
|
|
64
|
+
}
|
|
65
|
+
/** Default on-disk location for a project's learned practices. */
|
|
66
|
+
export declare function defaultPracticePath(projectRoot: string): string;
|
|
67
|
+
/**
|
|
68
|
+
* Distill a one-line practice from a successful delivery. Provenance-safe:
|
|
69
|
+
* built only from the winning strategy and turn count, not model output.
|
|
70
|
+
*/
|
|
71
|
+
export declare function distillPractice(winningStrategy: string | undefined, turns: number): string;
|
|
72
|
+
//# sourceMappingURL=practice.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"practice.d.ts","sourceRoot":"","sources":["../../src/delivery/practice.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAKH,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,sEAAsE;IACtE,QAAQ,EAAE,MAAM,CAAC;IACjB,wDAAwD;IACxD,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,kFAAkF;IAClF,QAAQ,EAAE,MAAM,CAAC;IACjB,uEAAuE;IACvE,YAAY,EAAE,MAAM,CAAC;IACrB,0EAA0E;IAC1E,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,2DAA2D;AAC3D,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,aAAa;IAC5B,8DAA8D;IAC9D,QAAQ,CAAC,WAAW,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,YAAY,EAAE,CAAC;IAC9D,4DAA4D;IAC5D,MAAM,CAAC,KAAK,EAAE,aAAa,GAAG,IAAI,CAAC;CACpC;AAYD,iEAAiE;AACjE,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,SAAK,GAAG,MAAM,EAAE,CAUhE;AAYD,6EAA6E;AAC7E,qBAAa,qBAAsB,YAAW,aAAa;IACzD,SAAS,CAAC,KAAK,EAAE,YAAY,EAAE,CAAC;gBAEpB,KAAK,GAAE,YAAY,EAAO;IAItC,QAAQ,CAAC,WAAW,EAAE,MAAM,EAAE,KAAK,SAAI,GAAG,YAAY,EAAE;IAcxD,MAAM,CAAC,KAAK,EAAE,aAAa,GAAG,IAAI;IAqBlC,4EAA4E;IAC5E,OAAO,CAAC,MAAM;IAQd,GAAG,IAAI,YAAY,EAAE;CAGtB;AAED,8EAA8E;AAC9E,qBAAa,iBAAkB,SAAQ,qBAAqB;IAC1D,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;gBAElB,IAAI,EAAE,MAAM;IAKxB;;;;;;OAMG;IACH,OAAO,CAAC,MAAM,CAAC,IAAI;IAgCnB,MAAM,CAAC,KAAK,EAAE,aAAa,GAAG,IAAI;IAKlC,OAAO,CAAC,OAAO;CAYhB;AAED,kEAAkE;AAClE,wBAAgB,mBAAmB,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAE/D;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,eAAe,EAAE,MAAM,GAAG,SAAS,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAM1F"}
|