@miller-tech/uap 1.26.5 → 1.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -1
- package/dist/bin/cli.js +18 -0
- package/dist/bin/cli.js.map +1 -1
- package/dist/cli/deliver.d.ts +19 -0
- package/dist/cli/deliver.d.ts.map +1 -0
- package/dist/cli/deliver.js +167 -0
- package/dist/cli/deliver.js.map +1 -0
- package/dist/delivery/applier.d.ts +39 -0
- package/dist/delivery/applier.d.ts.map +1 -0
- package/dist/delivery/applier.js +75 -0
- package/dist/delivery/applier.js.map +1 -0
- package/dist/delivery/convergence-loop.d.ts +111 -0
- package/dist/delivery/convergence-loop.d.ts.map +1 -0
- package/dist/delivery/convergence-loop.js +199 -0
- package/dist/delivery/convergence-loop.js.map +1 -0
- package/dist/delivery/index.d.ts +12 -0
- package/dist/delivery/index.d.ts.map +1 -0
- package/dist/delivery/index.js +12 -0
- package/dist/delivery/index.js.map +1 -0
- package/dist/delivery/verifier-ladder.d.ts +78 -0
- package/dist/delivery/verifier-ladder.d.ts.map +1 -0
- package/dist/delivery/verifier-ladder.js +213 -0
- package/dist/delivery/verifier-ladder.js.map +1 -0
- package/dist/models/openai-compat-client.d.ts +34 -0
- package/dist/models/openai-compat-client.d.ts.map +1 -0
- package/dist/models/openai-compat-client.js +82 -0
- package/dist/models/openai-compat-client.js.map +1 -0
- package/package.json +1 -1
- package/tools/agents/config/qwen3.5-enhanced.jinja +4 -1
- package/tools/agents/docker-compose.qdrant.yml +7 -1
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Convergence Loop
|
|
3
|
+
*
|
|
4
|
+
* Phase 1 of the Fable-parity delivery harness: drives an underlying model
|
|
5
|
+
* through execute → apply → verify → feedback iterations until the project's
|
|
6
|
+
* completion gates (verifier ladder) pass or the turn budget is exhausted.
|
|
7
|
+
*
|
|
8
|
+
* The loop owns four pluggable seams so later phases extend without breaking
|
|
9
|
+
* changes:
|
|
10
|
+
* - executor: how a prompt becomes model output
|
|
11
|
+
* - applier: how model output is materialized into the project tree
|
|
12
|
+
* - promptBuilder: how instruction/feedback/prior output compose a prompt
|
|
13
|
+
* (Phase 3 structured critique and Phase 4 memory injection plug in here)
|
|
14
|
+
* - ladderRunner: how gates are verified (tests inject stubs; Phase 2 runs
|
|
15
|
+
* candidates in isolated worktrees)
|
|
16
|
+
*/
|
|
17
|
+
import type { GateRung, LadderResult, LadderOptions } from './verifier-ladder.js';
|
|
18
|
+
import type { Applier } from './applier.js';
|
|
19
|
+
export type LoopExecutor = (prompt: string) => Promise<string>;
|
|
20
|
+
/** Pluggable ladder runner — production uses runLadder, tests inject a stub. */
|
|
21
|
+
export type LadderRunner = (rungs: GateRung[], projectRoot: string, options?: LadderOptions) => LadderResult | Promise<LadderResult>;
|
|
22
|
+
export interface PromptContext {
|
|
23
|
+
instruction: string;
|
|
24
|
+
/** 1-based turn about to execute */
|
|
25
|
+
turn: number;
|
|
26
|
+
/** Model output from the previous turn (full, untruncated) */
|
|
27
|
+
previousOutput?: string;
|
|
28
|
+
/** Gate feedback from the previous turn's ladder run */
|
|
29
|
+
feedback?: string;
|
|
30
|
+
/** Apply-stage error from the previous turn (e.g. no file blocks found) */
|
|
31
|
+
applyError?: string;
|
|
32
|
+
/** Files written by the previous turn */
|
|
33
|
+
previousFiles?: string[];
|
|
34
|
+
}
|
|
35
|
+
export type PromptBuilder = (context: PromptContext) => string;
|
|
36
|
+
export interface IterationRecord {
|
|
37
|
+
/** Real 1-based loop turn (executor-error turns are recorded too) */
|
|
38
|
+
turn: number;
|
|
39
|
+
passed: boolean;
|
|
40
|
+
/** Fraction of gates passed this iteration (0 when the turn never reached verification) */
|
|
41
|
+
score: number;
|
|
42
|
+
gateResults: LadderResult['results'];
|
|
43
|
+
/** Files the applier wrote this turn */
|
|
44
|
+
filesApplied: string[];
|
|
45
|
+
/** Executor failure, if the model call itself errored */
|
|
46
|
+
executorError?: string;
|
|
47
|
+
/** Apply failure, if output could not be materialized */
|
|
48
|
+
applyError?: string;
|
|
49
|
+
durationMs: number;
|
|
50
|
+
}
|
|
51
|
+
export interface DeliveryResult {
|
|
52
|
+
success: boolean;
|
|
53
|
+
/** True when the baseline check found all gates already green (no turns ran) */
|
|
54
|
+
alreadyDelivered: boolean;
|
|
55
|
+
turns: number;
|
|
56
|
+
/** Highest gate score observed across iterations */
|
|
57
|
+
bestScore: number;
|
|
58
|
+
/** Turn that achieved bestScore (0 when no iterations reached verification) */
|
|
59
|
+
bestTurn: number;
|
|
60
|
+
history: IterationRecord[];
|
|
61
|
+
/** Feedback from the final ladder run (or apply/executor error context) */
|
|
62
|
+
finalFeedback: string;
|
|
63
|
+
/** Raw model output from the final turn */
|
|
64
|
+
finalOutput: string;
|
|
65
|
+
totalDurationMs: number;
|
|
66
|
+
}
|
|
67
|
+
export interface ConvergenceConfig {
|
|
68
|
+
/** Maximum execute→apply→verify iterations (default 5) */
|
|
69
|
+
maxTurns?: number;
|
|
70
|
+
/** Project whose gates define "delivered" */
|
|
71
|
+
projectRoot: string;
|
|
72
|
+
/** Override auto-detected gates (e.g. subset via CLI --gates) */
|
|
73
|
+
rungs?: GateRung[];
|
|
74
|
+
/** Ladder options forwarded to the runner */
|
|
75
|
+
ladderOptions?: LadderOptions;
|
|
76
|
+
/**
|
|
77
|
+
* Run the ladder once before turn 1 (default true). When the baseline is
|
|
78
|
+
* already green there is nothing to converge on — the loop returns
|
|
79
|
+
* alreadyDelivered without calling the model, preventing false-success
|
|
80
|
+
* outcomes from polluting adaptive routing.
|
|
81
|
+
*/
|
|
82
|
+
baselineCheck?: boolean;
|
|
83
|
+
/** Max characters of prior model output included in retry prompts (default 3000) */
|
|
84
|
+
previousOutputChars?: number;
|
|
85
|
+
/**
|
|
86
|
+
* Called after every iteration. Return 'stop' to abort the loop early
|
|
87
|
+
* (Phase 5 escalation controllers hook in here).
|
|
88
|
+
*/
|
|
89
|
+
onIteration?: (record: IterationRecord) => void | 'stop';
|
|
90
|
+
}
|
|
91
|
+
/** Default prompt strategy: lean contract + structured retry context. */
|
|
92
|
+
export declare const defaultPromptBuilder: PromptBuilder;
|
|
93
|
+
export declare class ConvergenceLoop {
|
|
94
|
+
private readonly config;
|
|
95
|
+
private readonly executor;
|
|
96
|
+
private readonly ladderRunner;
|
|
97
|
+
private readonly applier;
|
|
98
|
+
private readonly promptBuilder;
|
|
99
|
+
constructor(config: ConvergenceConfig, executor: LoopExecutor, seams?: {
|
|
100
|
+
ladderRunner?: LadderRunner;
|
|
101
|
+
applier?: Applier;
|
|
102
|
+
promptBuilder?: PromptBuilder;
|
|
103
|
+
});
|
|
104
|
+
/**
|
|
105
|
+
* Run the loop for an instruction until all required gates pass or the
|
|
106
|
+
* turn budget is exhausted. Returns the full iteration history so callers
|
|
107
|
+
* can record outcomes and inspect convergence behavior.
|
|
108
|
+
*/
|
|
109
|
+
deliver(instruction: string): Promise<DeliveryResult>;
|
|
110
|
+
}
|
|
111
|
+
//# sourceMappingURL=convergence-loop.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"convergence-loop.d.ts","sourceRoot":"","sources":["../../src/delivery/convergence-loop.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAElF,OAAO,KAAK,EAAE,OAAO,EAAe,MAAM,cAAc,CAAC;AAGzD,MAAM,MAAM,YAAY,GAAG,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;AAE/D,gFAAgF;AAChF,MAAM,MAAM,YAAY,GAAG,CACzB,KAAK,EAAE,QAAQ,EAAE,EACjB,WAAW,EAAE,MAAM,EACnB,OAAO,CAAC,EAAE,aAAa,KACpB,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;AAE1C,MAAM,WAAW,aAAa;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,oCAAoC;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,8DAA8D;IAC9D,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,wDAAwD;IACxD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2EAA2E;IAC3E,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,yCAAyC;IACzC,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,MAAM,MAAM,aAAa,GAAG,CAAC,OAAO,EAAE,aAAa,KAAK,MAAM,CAAC;AAE/D,MAAM,WAAW,eAAe;IAC9B,qEAAqE;IACrE,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,OAAO,CAAC;IAChB,2FAA2F;IAC3F,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,YAAY,CAAC,SAAS,CAAC,CAAC;IACrC,wCAAwC;IACxC,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,yDAAyD;IACzD,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,yDAAyD;IACzD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,gFAAgF;IAChF,gBAAgB,EAAE,OAAO,CAAC;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,oDAAoD;IACpD,SAAS,EAAE,MAAM,CAAC;IAClB,+EAA+E;IAC/E,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,2EAA2E;IAC3E,aAAa,EAAE,MAAM,CAAC;IACtB,2CAA2C;IAC3C,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,iBAAiB;IAChC,0DAA0D;IAC1D,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,6CAA6C;IAC7C,WAAW,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,KAAK,CAAC,EAAE,QAAQ,EAAE,CAAC;IACnB,6CAA6C;IAC7C,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B;;;;;OAKG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,oFAAoF;IACpF,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B;;;OAGG;IACH,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,eAAe,KAAK,IAAI,GAAG,MAAM,CAAC;CAC1D;AAsBD,yEAAyE;AACzE,eAAO,MAAM,oBAAoB,EAAE,aA0BlC,CAAC;AAEF,qBAAa,eAAe;IAC1B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAoB;IAC3C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAe;IACxC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAe;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAgB;gBAG5C,MAAM,EAAE,iBAAiB,EACzB,QAAQ,EAAE,YAAY,EACtB,KAAK,GAAE;QACL,YAAY,CAAC,EAAE,YAAY,CAAC;QAC5B,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,aAAa,CAAC,EAAE,aAAa,CAAC;KAC1B;IASR;;;;OAIG;IACG,OAAO,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC;CAoI5D"}
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Convergence Loop
|
|
3
|
+
*
|
|
4
|
+
* Phase 1 of the Fable-parity delivery harness: drives an underlying model
|
|
5
|
+
* through execute → apply → verify → feedback iterations until the project's
|
|
6
|
+
* completion gates (verifier ladder) pass or the turn budget is exhausted.
|
|
7
|
+
*
|
|
8
|
+
* The loop owns four pluggable seams so later phases extend without breaking
|
|
9
|
+
* changes:
|
|
10
|
+
* - executor: how a prompt becomes model output
|
|
11
|
+
* - applier: how model output is materialized into the project tree
|
|
12
|
+
* - promptBuilder: how instruction/feedback/prior output compose a prompt
|
|
13
|
+
* (Phase 3 structured critique and Phase 4 memory injection plug in here)
|
|
14
|
+
* - ladderRunner: how gates are verified (tests inject stubs; Phase 2 runs
|
|
15
|
+
* candidates in isolated worktrees)
|
|
16
|
+
*/
|
|
17
|
+
import { detectRungs, runLadder } from './verifier-ladder.js';
|
|
18
|
+
import { applyFileBlocks } from './applier.js';
|
|
19
|
+
const DEFAULT_MAX_TURNS = 5;
|
|
20
|
+
const DEFAULT_PREVIOUS_OUTPUT_CHARS = 3_000;
|
|
21
|
+
const OUTPUT_CONTRACT = [
|
|
22
|
+
'You are an autonomous software delivery agent. Complete the task by emitting complete file contents.',
|
|
23
|
+
'',
|
|
24
|
+
'OUTPUT FORMAT — emit every file you create or modify as a fenced block:',
|
|
25
|
+
'```file:relative/path/from/project/root',
|
|
26
|
+
'<entire file content>',
|
|
27
|
+
'```',
|
|
28
|
+
'Use a longer fence (````file:path) when the file itself contains ``` sequences.',
|
|
29
|
+
'Files are written to disk verbatim, then real gates (build, type-check, tests) run.',
|
|
30
|
+
'Emit only file blocks plus brief reasoning.',
|
|
31
|
+
].join('\n');
|
|
32
|
+
function truncateHead(text, maxChars) {
|
|
33
|
+
if (text.length <= maxChars)
|
|
34
|
+
return text;
|
|
35
|
+
return `${text.slice(0, maxChars)}\n…(truncated)…`;
|
|
36
|
+
}
|
|
37
|
+
/** Default prompt strategy: lean contract + structured retry context. */
|
|
38
|
+
export const defaultPromptBuilder = (ctx) => {
|
|
39
|
+
if (ctx.turn === 1) {
|
|
40
|
+
return [OUTPUT_CONTRACT, '', `TASK: ${ctx.instruction}`].join('\n');
|
|
41
|
+
}
|
|
42
|
+
const sections = [OUTPUT_CONTRACT, '', `TASK: ${ctx.instruction}`, ''];
|
|
43
|
+
sections.push(`PREVIOUS ATTEMPT (turn ${ctx.turn - 1}):`);
|
|
44
|
+
if (ctx.previousFiles && ctx.previousFiles.length > 0) {
|
|
45
|
+
sections.push(`Files you emitted: ${ctx.previousFiles.join(', ')}`);
|
|
46
|
+
}
|
|
47
|
+
if (ctx.applyError) {
|
|
48
|
+
sections.push(`Your output could not be applied: ${ctx.applyError}`);
|
|
49
|
+
}
|
|
50
|
+
if (ctx.feedback) {
|
|
51
|
+
sections.push(ctx.feedback);
|
|
52
|
+
}
|
|
53
|
+
if (ctx.previousOutput) {
|
|
54
|
+
sections.push('');
|
|
55
|
+
sections.push('Your previous output (truncated):');
|
|
56
|
+
sections.push(truncateHead(ctx.previousOutput, DEFAULT_PREVIOUS_OUTPUT_CHARS));
|
|
57
|
+
}
|
|
58
|
+
sections.push('');
|
|
59
|
+
sections.push('Fix the issues and emit corrected file blocks.');
|
|
60
|
+
return sections.join('\n');
|
|
61
|
+
};
|
|
62
|
+
export class ConvergenceLoop {
|
|
63
|
+
config;
|
|
64
|
+
executor;
|
|
65
|
+
ladderRunner;
|
|
66
|
+
applier;
|
|
67
|
+
promptBuilder;
|
|
68
|
+
constructor(config, executor, seams = {}) {
|
|
69
|
+
this.config = config;
|
|
70
|
+
this.executor = executor;
|
|
71
|
+
this.ladderRunner = seams.ladderRunner ?? runLadder;
|
|
72
|
+
this.applier = seams.applier ?? applyFileBlocks;
|
|
73
|
+
this.promptBuilder = seams.promptBuilder ?? defaultPromptBuilder;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Run the loop for an instruction until all required gates pass or the
|
|
77
|
+
* turn budget is exhausted. Returns the full iteration history so callers
|
|
78
|
+
* can record outcomes and inspect convergence behavior.
|
|
79
|
+
*/
|
|
80
|
+
async deliver(instruction) {
|
|
81
|
+
const start = Date.now();
|
|
82
|
+
const maxTurns = this.config.maxTurns ?? DEFAULT_MAX_TURNS;
|
|
83
|
+
const rungs = this.config.rungs && this.config.rungs.length > 0
|
|
84
|
+
? this.config.rungs
|
|
85
|
+
: detectRungs(this.config.projectRoot);
|
|
86
|
+
if (rungs.length === 0) {
|
|
87
|
+
throw new Error(`No verifiable gates for ${this.config.projectRoot} — pass explicit rungs or add package.json scripts.`);
|
|
88
|
+
}
|
|
89
|
+
if (!Number.isInteger(maxTurns) || maxTurns < 1) {
|
|
90
|
+
throw new Error(`maxTurns must be a positive integer, got ${String(this.config.maxTurns)}`);
|
|
91
|
+
}
|
|
92
|
+
const history = [];
|
|
93
|
+
const previousOutputChars = this.config.previousOutputChars ?? DEFAULT_PREVIOUS_OUTPUT_CHARS;
|
|
94
|
+
// Baseline: a green tree means there is nothing for the loop to deliver.
|
|
95
|
+
if (this.config.baselineCheck ?? true) {
|
|
96
|
+
const baseline = await this.ladderRunner(rungs, this.config.projectRoot, this.config.ladderOptions);
|
|
97
|
+
if (baseline.passed) {
|
|
98
|
+
return {
|
|
99
|
+
success: true,
|
|
100
|
+
alreadyDelivered: true,
|
|
101
|
+
turns: 0,
|
|
102
|
+
bestScore: baseline.score,
|
|
103
|
+
bestTurn: 0,
|
|
104
|
+
history,
|
|
105
|
+
finalFeedback: baseline.feedback,
|
|
106
|
+
finalOutput: '',
|
|
107
|
+
totalDurationMs: Date.now() - start,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
let success = false;
|
|
112
|
+
let finalOutput = '';
|
|
113
|
+
let finalFeedback = '';
|
|
114
|
+
let prevContext = {};
|
|
115
|
+
for (let turn = 1; turn <= maxTurns; turn++) {
|
|
116
|
+
const turnStart = Date.now();
|
|
117
|
+
const prompt = this.promptBuilder({ instruction, turn, ...prevContext });
|
|
118
|
+
// Execute
|
|
119
|
+
let output = '';
|
|
120
|
+
let executorError;
|
|
121
|
+
try {
|
|
122
|
+
output = await this.executor(prompt);
|
|
123
|
+
}
|
|
124
|
+
catch (err) {
|
|
125
|
+
executorError = err instanceof Error ? err.message : String(err);
|
|
126
|
+
}
|
|
127
|
+
finalOutput = output || finalOutput;
|
|
128
|
+
// Apply
|
|
129
|
+
let applyResult = null;
|
|
130
|
+
let applyError;
|
|
131
|
+
if (!executorError) {
|
|
132
|
+
applyResult = await this.applier(output, this.config.projectRoot);
|
|
133
|
+
if (applyResult.error) {
|
|
134
|
+
applyError = applyResult.error;
|
|
135
|
+
}
|
|
136
|
+
else if (applyResult.rejected.length > 0) {
|
|
137
|
+
applyError = `Rejected blocks: ${applyResult.rejected
|
|
138
|
+
.map((r) => `${r.path} (${r.reason})`)
|
|
139
|
+
.join('; ')}`;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
// Verify — only when something was applied; otherwise the tree is
|
|
143
|
+
// unchanged and re-running gates would waste minutes for no signal.
|
|
144
|
+
const filesApplied = applyResult?.filesWritten ?? [];
|
|
145
|
+
let ladder = null;
|
|
146
|
+
if (!executorError && filesApplied.length > 0) {
|
|
147
|
+
ladder = await this.ladderRunner(rungs, this.config.projectRoot, this.config.ladderOptions);
|
|
148
|
+
finalFeedback = ladder.feedback;
|
|
149
|
+
}
|
|
150
|
+
const record = {
|
|
151
|
+
turn,
|
|
152
|
+
passed: ladder?.passed ?? false,
|
|
153
|
+
score: ladder?.score ?? 0,
|
|
154
|
+
gateResults: ladder?.results ?? [],
|
|
155
|
+
filesApplied,
|
|
156
|
+
executorError,
|
|
157
|
+
applyError,
|
|
158
|
+
durationMs: Date.now() - turnStart,
|
|
159
|
+
};
|
|
160
|
+
history.push(record);
|
|
161
|
+
const directive = this.config.onIteration?.(record);
|
|
162
|
+
if (ladder?.passed) {
|
|
163
|
+
success = true;
|
|
164
|
+
break;
|
|
165
|
+
}
|
|
166
|
+
if (directive === 'stop') {
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
prevContext = {
|
|
170
|
+
previousOutput: executorError
|
|
171
|
+
? undefined
|
|
172
|
+
: truncateHead(output, previousOutputChars),
|
|
173
|
+
feedback: executorError ? `Model call failed: ${executorError}` : ladder?.feedback,
|
|
174
|
+
applyError,
|
|
175
|
+
previousFiles: filesApplied.length > 0 ? filesApplied : undefined,
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
let bestScore = 0;
|
|
179
|
+
let bestTurn = 0;
|
|
180
|
+
for (const record of history) {
|
|
181
|
+
if (record.score > bestScore) {
|
|
182
|
+
bestScore = record.score;
|
|
183
|
+
bestTurn = record.turn;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return {
|
|
187
|
+
success,
|
|
188
|
+
alreadyDelivered: false,
|
|
189
|
+
turns: history.length,
|
|
190
|
+
bestScore,
|
|
191
|
+
bestTurn,
|
|
192
|
+
history,
|
|
193
|
+
finalFeedback,
|
|
194
|
+
finalOutput,
|
|
195
|
+
totalDurationMs: Date.now() - start,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
//# sourceMappingURL=convergence-loop.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"convergence-loop.js","sourceRoot":"","sources":["../../src/delivery/convergence-loop.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAGH,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAE9D,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAqF/C,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAC5B,MAAM,6BAA6B,GAAG,KAAK,CAAC;AAE5C,MAAM,eAAe,GAAG;IACtB,sGAAsG;IACtG,EAAE;IACF,yEAAyE;IACzE,yCAAyC;IACzC,uBAAuB;IACvB,KAAK;IACL,iFAAiF;IACjF,qFAAqF;IACrF,6CAA6C;CAC9C,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,SAAS,YAAY,CAAC,IAAY,EAAE,QAAgB;IAClD,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ;QAAE,OAAO,IAAI,CAAC;IACzC,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,iBAAiB,CAAC;AACrD,CAAC;AAED,yEAAyE;AACzE,MAAM,CAAC,MAAM,oBAAoB,GAAkB,CAAC,GAAG,EAAE,EAAE;IACzD,IAAI,GAAG,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACnB,OAAO,CAAC,eAAe,EAAE,EAAE,EAAE,SAAS,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtE,CAAC;IAED,MAAM,QAAQ,GAAG,CAAC,eAAe,EAAE,EAAE,EAAE,SAAS,GAAG,CAAC,WAAW,EAAE,EAAE,EAAE,CAAC,CAAC;IACvE,QAAQ,CAAC,IAAI,CAAC,0BAA0B,GAAG,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC;IAE1D,IAAI,GAAG,CAAC,aAAa,IAAI,GAAG,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtD,QAAQ,CAAC,IAAI,CAAC,sBAAsB,GAAG,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACtE,CAAC;IACD,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC;QACnB,QAAQ,CAAC,IAAI,CAAC,qCAAqC,GAAG,CAAC,UAAU,EAAE,CAAC,CAAC;IACvE,CAAC;IACD,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;QACjB,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC9B,CAAC;IACD,IAAI,GAAG,CAAC,cAAc,EAAE,CAAC;QACvB,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClB,QAAQ,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;QACnD,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,cAAc,EAAE,6BAA6B,CAAC,CAAC,CAAC;IACjF,CAAC;IAED,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAClB,QAAQ,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;IAChE,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC7B,CAAC,CAAC;AAEF,MAAM,OAAO,eAAe;IACT,MAAM,CAAoB;IAC1B,QAAQ,CAAe;IACvB,YAAY,CAAe;IAC3B,OAAO,CAAU;IACjB,aAAa,CAAgB;IAE9C,YACE,MAAyB,EACzB,QAAsB,EACtB,QAII,EAAE;QAEN,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC,YAAY,IAAI,SAAS,CAAC;QACpD,IAAI,CAAC,OAAO,GAAG,KAAK,CAAC,OAAO,IAAI,eAAe,CAAC;QAChD,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC,aAAa,IAAI,oBAAoB,CAAC;IACnE,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,OAAO,CAAC,WAAmB;QAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,IAAI,iBAAiB,CAAC;QAC3D,MAAM,KAAK,GACT,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;YAC/C,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK;YACnB,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;QAE3C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CACb,2BAA2B,IAAI,CAAC,MAAM,CAAC,WAAW,qDAAqD,CACxG,CAAC;QACJ,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;YAChD,MAAM,IAAI,KAAK,CAAC,4CAA4C,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QAC9F,CAAC;QAED,MAAM,OAAO,GAAsB,EAAE,CAAC;QACtC,MAAM,mBAAmB,GAAG,IAAI,CAAC,MAAM,CAAC,mBAAmB,IAAI,6BAA6B,CAAC;QAE7F,yEAAyE;QACzE,IAAI,IAAI,CAAC,MAAM,CAAC,aAAa,IAAI,IAAI,EAAE,CAAC;YACtC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;YACpG,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;gBACpB,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,gBAAgB,EAAE,IAAI;oBACtB,KAAK,EAAE,CAAC;oBACR,SAAS,EAAE,QAAQ,CAAC,KAAK;oBACzB,QAAQ,EAAE,CAAC;oBACX,OAAO;oBACP,aAAa,EAAE,QAAQ,CAAC,QAAQ;oBAChC,WAAW,EAAE,EAAE;oBACf,eAAe,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;iBACpC,CAAC;YACJ,CAAC;QACH,CAAC;QAED,IAAI,OAAO,GAAG,KAAK,CAAC;QACpB,IAAI,WAAW,GAAG,EAAE,CAAC;QACrB,IAAI,aAAa,GAAG,EAAE,CAAC;QACvB,IAAI,WAAW,GAAgD,EAAE,CAAC;QAElE,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,IAAI,QAAQ,EAAE,IAAI,EAAE,EAAE,CAAC;YAC5C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,GAAG,WAAW,EAAE,CAAC,CAAC;YAEzE,UAAU;YACV,IAAI,MAAM,GAAG,EAAE,CAAC;YAChB,IAAI,aAAiC,CAAC;YACtC,IAAI,CAAC;gBACH,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACvC,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,aAAa,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACnE,CAAC;YACD,WAAW,GAAG,MAAM,IAAI,WAAW,CAAC;YAEpC,QAAQ;YACR,IAAI,WAAW,GAAuB,IAAI,CAAC;YAC3C,IAAI,UAA8B,CAAC;YACnC,IAAI,CAAC,aAAa,EAAE,CAAC;gBACnB,WAAW,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;gBAClE,IAAI,WAAW,CAAC,KAAK,EAAE,CAAC;oBACtB,UAAU,GAAG,WAAW,CAAC,KAAK,CAAC;gBACjC,CAAC;qBAAM,IAAI,WAAW,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC3C,UAAU,GAAG,oBAAoB,WAAW,CAAC,QAAQ;yBAClD,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,MAAM,GAAG,CAAC;yBACrC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAClB,CAAC;YACH,CAAC;YAED,kEAAkE;YAClE,oEAAoE;YACpE,MAAM,YAAY,GAAG,WAAW,EAAE,YAAY,IAAI,EAAE,CAAC;YACrD,IAAI,MAAM,GAAwB,IAAI,CAAC;YACvC,IAAI,CAAC,aAAa,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9C,MAAM,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;gBAC5F,aAAa,GAAG,MAAM,CAAC,QAAQ,CAAC;YAClC,CAAC;YAED,MAAM,MAAM,GAAoB;gBAC9B,IAAI;gBACJ,MAAM,EAAE,MAAM,EAAE,MAAM,IAAI,KAAK;gBAC/B,KAAK,EAAE,MAAM,EAAE,KAAK,IAAI,CAAC;gBACzB,WAAW,EAAE,MAAM,EAAE,OAAO,IAAI,EAAE;gBAClC,YAAY;gBACZ,aAAa;gBACb,UAAU;gBACV,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;aACnC,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrB,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC,MAAM,CAAC,CAAC;YAEpD,IAAI,MAAM,EAAE,MAAM,EAAE,CAAC;gBACnB,OAAO,GAAG,IAAI,CAAC;gBACf,MAAM;YACR,CAAC;YACD,IAAI,SAAS,KAAK,MAAM,EAAE,CAAC;gBACzB,MAAM;YACR,CAAC;YAED,WAAW,GAAG;gBACZ,cAAc,EAAE,aAAa;oBAC3B,CAAC,CAAC,SAAS;oBACX,CAAC,CAAC,YAAY,CAAC,MAAM,EAAE,mBAAmB,CAAC;gBAC7C,QAAQ,EAAE,aAAa,CAAC,CAAC,CAAC,sBAAsB,aAAa,EAAE,CAAC,CAAC,CAAC,MAAM,EAAE,QAAQ;gBAClF,UAAU;gBACV,aAAa,EAAE,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,SAAS;aAClE,CAAC;QACJ,CAAC;QAED,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,MAAM,CAAC,KAAK,GAAG,SAAS,EAAE,CAAC;gBAC7B,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC;gBACzB,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC;YACzB,CAAC;QACH,CAAC;QAED,OAAO;YACL,OAAO;YACP,gBAAgB,EAAE,KAAK;YACvB,KAAK,EAAE,OAAO,CAAC,MAAM;YACrB,SAAS;YACT,QAAQ;YACR,OAAO;YACP,aAAa;YACb,WAAW;YACX,eAAe,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SACpC,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Delivery harness — Fable-parity convergence loop.
|
|
3
|
+
*
|
|
4
|
+
* Drives underlying models through execute → apply → verify → feedback
|
|
5
|
+
* iterations against the project's real completion gates until delivery
|
|
6
|
+
* is achieved.
|
|
7
|
+
*/
|
|
8
|
+
export { ConvergenceLoop, defaultPromptBuilder, type ConvergenceConfig, type DeliveryResult, type IterationRecord, type LoopExecutor, type LadderRunner, type PromptBuilder, type PromptContext, } from './convergence-loop.js';
|
|
9
|
+
export { detectRungs, runLadder, runRung, formatFeedback, type GateRung, type RungResult, type RungFailureReason, type LadderResult, type LadderOptions, } from './verifier-ladder.js';
|
|
10
|
+
export { applyFileBlocks, parseFileBlocks, type Applier, type ApplyResult, type FileBlock, } from './applier.js';
|
|
11
|
+
export { OpenAICompatClient, type OpenAICompatClientOptions } from '../models/openai-compat-client.js';
|
|
12
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/delivery/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EACL,eAAe,EACf,oBAAoB,EACpB,KAAK,iBAAiB,EACtB,KAAK,cAAc,EACnB,KAAK,eAAe,EACpB,KAAK,YAAY,EACjB,KAAK,YAAY,EACjB,KAAK,aAAa,EAClB,KAAK,aAAa,GACnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EACL,WAAW,EACX,SAAS,EACT,OAAO,EACP,cAAc,EACd,KAAK,QAAQ,EACb,KAAK,UAAU,EACf,KAAK,iBAAiB,EACtB,KAAK,YAAY,EACjB,KAAK,aAAa,GACnB,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EACL,eAAe,EACf,eAAe,EACf,KAAK,OAAO,EACZ,KAAK,WAAW,EAChB,KAAK,SAAS,GACf,MAAM,cAAc,CAAC;AAEtB,OAAO,EAAE,kBAAkB,EAAE,KAAK,yBAAyB,EAAE,MAAM,mCAAmC,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Delivery harness — Fable-parity convergence loop.
|
|
3
|
+
*
|
|
4
|
+
* Drives underlying models through execute → apply → verify → feedback
|
|
5
|
+
* iterations against the project's real completion gates until delivery
|
|
6
|
+
* is achieved.
|
|
7
|
+
*/
|
|
8
|
+
export { ConvergenceLoop, defaultPromptBuilder, } from './convergence-loop.js';
|
|
9
|
+
export { detectRungs, runLadder, runRung, formatFeedback, } from './verifier-ladder.js';
|
|
10
|
+
export { applyFileBlocks, parseFileBlocks, } from './applier.js';
|
|
11
|
+
export { OpenAICompatClient } from '../models/openai-compat-client.js';
|
|
12
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/delivery/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EACL,eAAe,EACf,oBAAoB,GAQrB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EACL,WAAW,EACX,SAAS,EACT,OAAO,EACP,cAAc,GAMf,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EACL,eAAe,EACf,eAAe,GAIhB,MAAM,cAAc,CAAC;AAEtB,OAAO,EAAE,kBAAkB,EAAkC,MAAM,mCAAmC,CAAC"}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verifier Ladder
|
|
3
|
+
*
|
|
4
|
+
* Turns the repository's completion gates (build, type-check, test, lint)
|
|
5
|
+
* into a programmatic verifier that the convergence loop can call after each
|
|
6
|
+
* model iteration. Each rung runs a real command in the project root; the
|
|
7
|
+
* ladder reports a pass/fail per rung, an aggregate score (fraction of rungs
|
|
8
|
+
* passed), and structured feedback sized for small-model context budgets.
|
|
9
|
+
*
|
|
10
|
+
* Detection is npm-centric (package.json scripts); callers targeting other
|
|
11
|
+
* ecosystems can pass explicit rungs.
|
|
12
|
+
*/
|
|
13
|
+
export interface GateRung {
|
|
14
|
+
/** Stable identifier, e.g. 'build', 'typecheck', 'test', 'lint' */
|
|
15
|
+
id: string;
|
|
16
|
+
/** Human-readable name shown in feedback */
|
|
17
|
+
name: string;
|
|
18
|
+
/** Executable to run (no shell interpolation) */
|
|
19
|
+
command: string;
|
|
20
|
+
/** Arguments passed verbatim */
|
|
21
|
+
args: string[];
|
|
22
|
+
/**
|
|
23
|
+
* Required rungs gate delivery: the ladder only passes when all required
|
|
24
|
+
* rungs pass, and a required failure stops later rungs (fail-fast).
|
|
25
|
+
* Optional rungs are reported but never block delivery.
|
|
26
|
+
*/
|
|
27
|
+
required: boolean;
|
|
28
|
+
/** Per-rung timeout in milliseconds */
|
|
29
|
+
timeoutMs: number;
|
|
30
|
+
}
|
|
31
|
+
export type RungFailureReason = 'exit' | 'timeout' | 'signal' | 'spawn-error';
|
|
32
|
+
export interface RungResult {
|
|
33
|
+
id: string;
|
|
34
|
+
name: string;
|
|
35
|
+
passed: boolean;
|
|
36
|
+
/** True when the rung never ran because an earlier required rung failed */
|
|
37
|
+
skipped: boolean;
|
|
38
|
+
exitCode: number | null;
|
|
39
|
+
/** Why the rung failed; undefined when it passed or was skipped */
|
|
40
|
+
failureReason?: RungFailureReason;
|
|
41
|
+
durationMs: number;
|
|
42
|
+
/** Tail of combined stdout+stderr, truncated for prompt injection */
|
|
43
|
+
outputTail: string;
|
|
44
|
+
}
|
|
45
|
+
export interface LadderResult {
|
|
46
|
+
/** True when every required rung passed */
|
|
47
|
+
passed: boolean;
|
|
48
|
+
/** Fraction of all rungs that passed (skipped rungs count as not passed) */
|
|
49
|
+
score: number;
|
|
50
|
+
results: RungResult[];
|
|
51
|
+
/** Structured feedback for the next loop iteration */
|
|
52
|
+
feedback: string;
|
|
53
|
+
}
|
|
54
|
+
export interface LadderOptions {
|
|
55
|
+
/** Stop at the first failing required rung (default true — cheaper feedback) */
|
|
56
|
+
failFast?: boolean;
|
|
57
|
+
/** Max characters of command output included per failing rung (default 2000) */
|
|
58
|
+
outputTailChars?: number;
|
|
59
|
+
/** Default per-rung timeout in ms (default 300000) */
|
|
60
|
+
timeoutMs?: number;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Detect the gate rungs available in a project from its package.json scripts.
|
|
64
|
+
* Order matters: cheap/structural gates run before expensive ones so failure
|
|
65
|
+
* feedback arrives fast.
|
|
66
|
+
*/
|
|
67
|
+
export declare function detectRungs(projectRoot: string, timeoutMs?: number): GateRung[];
|
|
68
|
+
/** Run a single rung synchronously in the project root. */
|
|
69
|
+
export declare function runRung(rung: GateRung, projectRoot: string, tailChars?: number): RungResult;
|
|
70
|
+
/**
|
|
71
|
+
* Build feedback text from rung results. Only the first failing required
|
|
72
|
+
* rung's output is included in detail — small models do better with one
|
|
73
|
+
* concrete problem at a time than with a wall of every failure.
|
|
74
|
+
*/
|
|
75
|
+
export declare function formatFeedback(results: RungResult[], rungs: GateRung[]): string;
|
|
76
|
+
/** Run the full ladder, honoring fail-fast for required rungs. */
|
|
77
|
+
export declare function runLadder(rungs: GateRung[], projectRoot: string, options?: LadderOptions): LadderResult;
|
|
78
|
+
//# sourceMappingURL=verifier-ladder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"verifier-ladder.d.ts","sourceRoot":"","sources":["../../src/delivery/verifier-ladder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAMH,MAAM,WAAW,QAAQ;IACvB,mEAAmE;IACnE,EAAE,EAAE,MAAM,CAAC;IACX,4CAA4C;IAC5C,IAAI,EAAE,MAAM,CAAC;IACb,iDAAiD;IACjD,OAAO,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,IAAI,EAAE,MAAM,EAAE,CAAC;IACf;;;;OAIG;IACH,QAAQ,EAAE,OAAO,CAAC;IAClB,uCAAuC;IACvC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,iBAAiB,GAAG,MAAM,GAAG,SAAS,GAAG,QAAQ,GAAG,aAAa,CAAC;AAE9E,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,OAAO,CAAC;IAChB,2EAA2E;IAC3E,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,mEAAmE;IACnE,aAAa,CAAC,EAAE,iBAAiB,CAAC;IAClC,UAAU,EAAE,MAAM,CAAC;IACnB,qEAAqE;IACrE,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,YAAY;IAC3B,2CAA2C;IAC3C,MAAM,EAAE,OAAO,CAAC;IAChB,4EAA4E;IAC5E,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,UAAU,EAAE,CAAC;IACtB,sDAAsD;IACtD,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC5B,gFAAgF;IAChF,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,gFAAgF;IAChF,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAmBD;;;;GAIG;AACH,wBAAgB,WAAW,CAAC,WAAW,EAAE,MAAM,EAAE,SAAS,GAAE,MAA2B,GAAG,QAAQ,EAAE,CAmEnG;AAQD,2DAA2D;AAC3D,wBAAgB,OAAO,CAAC,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,EAAE,SAAS,GAAE,MAA2B,GAAG,UAAU,CA+C/G;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,OAAO,EAAE,UAAU,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,MAAM,CAqB/E;AAED,kEAAkE;AAClE,wBAAgB,SAAS,CACvB,KAAK,EAAE,QAAQ,EAAE,EACjB,WAAW,EAAE,MAAM,EACnB,OAAO,GAAE,aAAkB,GAC1B,YAAY,CA8Cd"}
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verifier Ladder
|
|
3
|
+
*
|
|
4
|
+
* Turns the repository's completion gates (build, type-check, test, lint)
|
|
5
|
+
* into a programmatic verifier that the convergence loop can call after each
|
|
6
|
+
* model iteration. Each rung runs a real command in the project root; the
|
|
7
|
+
* ladder reports a pass/fail per rung, an aggregate score (fraction of rungs
|
|
8
|
+
* passed), and structured feedback sized for small-model context budgets.
|
|
9
|
+
*
|
|
10
|
+
* Detection is npm-centric (package.json scripts); callers targeting other
|
|
11
|
+
* ecosystems can pass explicit rungs.
|
|
12
|
+
*/
|
|
13
|
+
import { spawnSync } from 'child_process';
|
|
14
|
+
import { existsSync, readFileSync } from 'fs';
|
|
15
|
+
import { join } from 'path';
|
|
16
|
+
const DEFAULT_TIMEOUT_MS = 300_000;
|
|
17
|
+
const DEFAULT_TAIL_CHARS = 2_000;
|
|
18
|
+
/** Env vars matching these patterns are stripped before running gate
|
|
19
|
+
* commands — project scripts (and npm lifecycle hooks) in arbitrary
|
|
20
|
+
* --project-root checkouts must not inherit provider credentials. */
|
|
21
|
+
const SECRET_ENV_RE = /(API_KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL)/i;
|
|
22
|
+
function sanitizedEnv() {
|
|
23
|
+
const env = { CI: 'true' };
|
|
24
|
+
for (const [key, value] of Object.entries(process.env)) {
|
|
25
|
+
if (SECRET_ENV_RE.test(key))
|
|
26
|
+
continue;
|
|
27
|
+
env[key] = value;
|
|
28
|
+
}
|
|
29
|
+
return env;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Detect the gate rungs available in a project from its package.json scripts.
|
|
33
|
+
* Order matters: cheap/structural gates run before expensive ones so failure
|
|
34
|
+
* feedback arrives fast.
|
|
35
|
+
*/
|
|
36
|
+
export function detectRungs(projectRoot, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
37
|
+
const pkgPath = join(projectRoot, 'package.json');
|
|
38
|
+
if (!existsSync(pkgPath)) {
|
|
39
|
+
return [];
|
|
40
|
+
}
|
|
41
|
+
let scripts = {};
|
|
42
|
+
try {
|
|
43
|
+
const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'));
|
|
44
|
+
scripts = pkg.scripts ?? {};
|
|
45
|
+
}
|
|
46
|
+
catch {
|
|
47
|
+
return [];
|
|
48
|
+
}
|
|
49
|
+
const rungs = [];
|
|
50
|
+
if (scripts['build']) {
|
|
51
|
+
rungs.push({
|
|
52
|
+
id: 'build',
|
|
53
|
+
name: 'Build (npm run build)',
|
|
54
|
+
command: 'npm',
|
|
55
|
+
args: ['run', 'build'],
|
|
56
|
+
required: true,
|
|
57
|
+
timeoutMs,
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
// Type-check is meaningful for TypeScript projects even when build exists
|
|
61
|
+
// (build may use a bundler that skips type errors). --no-install fails
|
|
62
|
+
// closed instead of letting npx fetch the registry package named 'tsc'.
|
|
63
|
+
if (existsSync(join(projectRoot, 'tsconfig.json')) &&
|
|
64
|
+
existsSync(join(projectRoot, 'node_modules', '.bin', 'tsc'))) {
|
|
65
|
+
rungs.push({
|
|
66
|
+
id: 'typecheck',
|
|
67
|
+
name: 'Type-check (tsc --noEmit)',
|
|
68
|
+
command: 'npx',
|
|
69
|
+
args: ['--no-install', 'tsc', '--noEmit'],
|
|
70
|
+
required: true,
|
|
71
|
+
timeoutMs,
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
if (scripts['test']) {
|
|
75
|
+
rungs.push({
|
|
76
|
+
id: 'test',
|
|
77
|
+
name: 'Tests (npm test)',
|
|
78
|
+
command: 'npm',
|
|
79
|
+
args: ['test'],
|
|
80
|
+
required: true,
|
|
81
|
+
timeoutMs,
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
if (scripts['lint']) {
|
|
85
|
+
rungs.push({
|
|
86
|
+
id: 'lint',
|
|
87
|
+
name: 'Lint (npm run lint)',
|
|
88
|
+
command: 'npm',
|
|
89
|
+
args: ['run', 'lint'],
|
|
90
|
+
required: false,
|
|
91
|
+
timeoutMs,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
return rungs;
|
|
95
|
+
}
|
|
96
|
+
function truncateTail(text, maxChars) {
|
|
97
|
+
const trimmed = text.trim();
|
|
98
|
+
if (trimmed.length <= maxChars)
|
|
99
|
+
return trimmed;
|
|
100
|
+
return `…(truncated)…\n${trimmed.slice(-maxChars)}`;
|
|
101
|
+
}
|
|
102
|
+
/** Run a single rung synchronously in the project root. */
|
|
103
|
+
export function runRung(rung, projectRoot, tailChars = DEFAULT_TAIL_CHARS) {
|
|
104
|
+
const start = Date.now();
|
|
105
|
+
const res = spawnSync(rung.command, rung.args, {
|
|
106
|
+
cwd: projectRoot,
|
|
107
|
+
encoding: 'utf-8',
|
|
108
|
+
timeout: rung.timeoutMs,
|
|
109
|
+
maxBuffer: 16 * 1024 * 1024,
|
|
110
|
+
env: sanitizedEnv(),
|
|
111
|
+
});
|
|
112
|
+
const durationMs = Date.now() - start;
|
|
113
|
+
const exitCode = res.status;
|
|
114
|
+
const passed = exitCode === 0;
|
|
115
|
+
// spawnSync reports timeouts/missing binaries via res.error (+ res.signal),
|
|
116
|
+
// with status null and empty output — without this the model would get a
|
|
117
|
+
// bare "FAIL" and burn its turn budget against an unexplained gate.
|
|
118
|
+
let failureReason;
|
|
119
|
+
let diagnostic = '';
|
|
120
|
+
if (!passed) {
|
|
121
|
+
const errCode = res.error?.code;
|
|
122
|
+
if (errCode === 'ETIMEDOUT' || (res.error && res.signal === 'SIGTERM')) {
|
|
123
|
+
failureReason = 'timeout';
|
|
124
|
+
diagnostic = `Gate timed out after ${rung.timeoutMs}ms.`;
|
|
125
|
+
}
|
|
126
|
+
else if (res.error) {
|
|
127
|
+
failureReason = 'spawn-error';
|
|
128
|
+
diagnostic = `Gate could not run: ${res.error.message}`;
|
|
129
|
+
}
|
|
130
|
+
else if (res.signal) {
|
|
131
|
+
failureReason = 'signal';
|
|
132
|
+
diagnostic = `Gate was killed by signal ${res.signal}.`;
|
|
133
|
+
}
|
|
134
|
+
else {
|
|
135
|
+
failureReason = 'exit';
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
const combined = `${diagnostic}\n${res.stdout ?? ''}\n${res.stderr ?? ''}`;
|
|
139
|
+
return {
|
|
140
|
+
id: rung.id,
|
|
141
|
+
name: rung.name,
|
|
142
|
+
passed,
|
|
143
|
+
skipped: false,
|
|
144
|
+
exitCode,
|
|
145
|
+
failureReason,
|
|
146
|
+
durationMs,
|
|
147
|
+
outputTail: passed ? '' : truncateTail(combined, tailChars),
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Build feedback text from rung results. Only the first failing required
|
|
152
|
+
* rung's output is included in detail — small models do better with one
|
|
153
|
+
* concrete problem at a time than with a wall of every failure.
|
|
154
|
+
*/
|
|
155
|
+
export function formatFeedback(results, rungs) {
|
|
156
|
+
const requiredIds = new Set(rungs.filter((r) => r.required).map((r) => r.id));
|
|
157
|
+
const lines = ['Gate results:'];
|
|
158
|
+
for (const r of results) {
|
|
159
|
+
const status = r.skipped ? 'SKIPPED (earlier gate failed)' : r.passed ? 'PASS' : 'FAIL';
|
|
160
|
+
const optional = requiredIds.has(r.id) ? '' : ' (optional)';
|
|
161
|
+
lines.push(`- ${r.name}${optional}: ${status}`);
|
|
162
|
+
}
|
|
163
|
+
const firstFailure = results.find((r) => !r.passed && !r.skipped && requiredIds.has(r.id)) ??
|
|
164
|
+
results.find((r) => !r.passed && !r.skipped);
|
|
165
|
+
if (firstFailure && firstFailure.outputTail) {
|
|
166
|
+
lines.push('');
|
|
167
|
+
lines.push(`Fix this gate first — ${firstFailure.name} output:`);
|
|
168
|
+
lines.push('```');
|
|
169
|
+
lines.push(firstFailure.outputTail);
|
|
170
|
+
lines.push('```');
|
|
171
|
+
}
|
|
172
|
+
return lines.join('\n');
|
|
173
|
+
}
|
|
174
|
+
/** Run the full ladder, honoring fail-fast for required rungs. */
|
|
175
|
+
export function runLadder(rungs, projectRoot, options = {}) {
|
|
176
|
+
const failFast = options.failFast ?? true;
|
|
177
|
+
const tailChars = options.outputTailChars ?? DEFAULT_TAIL_CHARS;
|
|
178
|
+
const results = [];
|
|
179
|
+
let stop = false;
|
|
180
|
+
for (const rung of rungs) {
|
|
181
|
+
if (stop) {
|
|
182
|
+
results.push({
|
|
183
|
+
id: rung.id,
|
|
184
|
+
name: rung.name,
|
|
185
|
+
passed: false,
|
|
186
|
+
skipped: true,
|
|
187
|
+
exitCode: null,
|
|
188
|
+
durationMs: 0,
|
|
189
|
+
outputTail: '',
|
|
190
|
+
});
|
|
191
|
+
continue;
|
|
192
|
+
}
|
|
193
|
+
const result = runRung(rung, projectRoot, tailChars);
|
|
194
|
+
results.push(result);
|
|
195
|
+
if (!result.passed && rung.required && failFast) {
|
|
196
|
+
stop = true;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
const passedCount = results.filter((r) => r.passed).length;
|
|
200
|
+
const score = rungs.length > 0 ? passedCount / rungs.length : 1;
|
|
201
|
+
// Delivery is gated on required rungs only; optional gates (lint) are
|
|
202
|
+
// reported but never block convergence.
|
|
203
|
+
const requiredRungs = rungs.filter((r) => r.required);
|
|
204
|
+
const requiredPassed = results.filter((r) => r.passed && requiredRungs.some((rung) => rung.id === r.id)).length;
|
|
205
|
+
const passed = requiredPassed === requiredRungs.length;
|
|
206
|
+
return {
|
|
207
|
+
passed,
|
|
208
|
+
score,
|
|
209
|
+
results,
|
|
210
|
+
feedback: formatFeedback(results, rungs),
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
//# sourceMappingURL=verifier-ladder.js.map
|